home *** CD-ROM | disk | FTP | other *** search
- From: gtoal@tharr.UUCP (Graham Toal)
- Newsgroups: alt.sources
- Subject: Spelling utilities in C (Part 2 of 3)
- Message-ID: <808@tharr.UUCP>
- Date: 28 Jun 90 07:07:54 GMT
-
- #!/bin/sh-----cut here-----cut here-----cut here-----cut here-----
- # shar: Shell Archiver
- # Run the following text with /bin/sh to create:
- # check.c # correct.c # dyntrie.c # init.c # locate.c # print.c # similcmp.c # soundex.c # utils.c
- cat - << \SHAR_EOF > check.c
- /*
-
- File: check.c
- Author: Graham Toal
- Purpose: Check a word using DAWG or TRIE.
- Functions exported: dawg_check, pack_check
- Internal function: dawg_ck, pack_ck
-
- Description:
-
- call as: dawg_check(dawg, "word")
- pack_check(trie, "word")
-
- Simple spelling-check. Unfortunately the two interfaces for
- DAWGs and TRIEs are different, and even DAWG stuff differs from
- program to program. The problem is primarily in how to spot
- the end of a search; at the moment it is done when a particular
- pointer points to the root. Unfortunately we enter the data
- structure either at root+1 or at some arbitrary point, so this
- scheme means passing in two pointers. The next idea is to check
- that the *data* pointed to by the terminating pointer is 0; this
- would be OK but I was hoping to leave the initial word in the
- dawg free for expansion... (dawg contains [0, Node1..NodeN])
-
- *best* idea would be to terminate on *INDEX* being 0. This is
- probably also simplest & I'll code it up properly when I'm awake...
-
- */
-
- static int
- #ifdef PROTOTYPES
- dawg_ck(NODE PCCRAP *dict, NODE PCCRAP *edge, char *word)
- #else
- dawg_ck(dict, edge, word)
- NODE PCCRAP *dict;
- NODE PCCRAP *edge;
- char *word;
- #endif
- {
- if (edge == dict) return(0!=0);
- for (;;) {
- if (*word == (((*edge >> V_LETTER) & M_LETTER))) {
- if (*++word == '\0') {
- return((*edge & M_END_OF_WORD) != 0);
- } else {
- if ((edge = dict+(*edge & M_NODE_POINTER)) == dict) break;
- continue;
- }
- }
- if (((*edge++) & M_END_OF_NODE) != 0) break;
- }
- return(0!=0);
- }
-
- int
- #ifdef PROTOTYPES
- dawg_check(NODE PCCRAP *dict, char *word)
- #else
- dawg_check(dict, word)
- NODE PCCRAP *dict;
- char *word;
- #endif
- {
- return(dawg_ck(dict, dict+ROOT_NODE, word));
- }
-
- static int
- #ifdef PROTOTYPES
- pack_ck(NODE PCCRAP *ptrie, INDEX p, char *s)
- #else
- pack_ck(ptrie, p, s)
- NODE PCCRAP *ptrie;
- INDEX p;
- char *s;
- #endif
- {
- /* Note: node is passed in as a parameter so that it is in a register -
- otherwise it would take an extra instruction every time it was accessed.
- We trust the compiler to allocate register variables most efficiently --
- when people tinker, it might improve one machine but ruin another...
- */
-
- #define next_letter(s) \
- p = ptrie[p + *s]; \
- if (((p >> V_LETTER) & M_LETTER) != *s++) return(0!=0); \
- if (*s == '\0') return((p >> V_END_OF_NODE) != 0); \
- if ((p &= M_NODE_POINTER) == 0) return(0!=0)
-
- /* Depending on whether your machine has a cache or not, and whether
- it optimises pipeline fetches, you should decrease or increase the
- number of macro instantiations in the loop below appropriately */
-
- for (;;) {
- next_letter(s); next_letter(s); next_letter(s); next_letter(s);
- }
- }
-
- int
- #ifdef PROTOTYPES
- pack_check(NODE PCCRAP *ptrie, char *s)
- #else
- pack_check(ptrie, s)
- NODE PCCRAP *ptrie;
- char *s;
- #endif
- {
- return(pack_ck(ptrie, 1L, s));
- }
- SHAR_EOF
- cat - << \SHAR_EOF > correct.c
- /*
-
- File: correct.c
- Author: Graham Toal
- Purpose: Correct a word using Soudex and similcmp on DAWG.
- Functions exported: dawg_correct
- Internal functions: reccmp, walk_rest_dawg, walk_dawg
-
- Description:
- Reduce word to rough phonetic representation.
- Reduce all words in dawg to same form. Compare. This
- results in a large list of plausible (HA!) candidates.
-
- Then apply 'similcmp' which returns a distance metric representing
- closeness of two words. Sort by this metric, and return N best
- matches. If one stands out above others, return it alone.
-
- I'm working on a *much better* algorithm, but this will do
- for prototyping the interface.
-
- Final version of this will have callbacks to handle corrected
- words as they are produced.
-
- */
-
- #include "soundex.c"
- #include "similcmp.c"
-
- typedef struct {
- char *possible;
- int score;
- } rec;
-
-
- /* Numbers of parameters exploded during process of removing global state :( */
-
- static void
- #ifdef PROTOTYPES
- walk_rest_dawg(NODE PCCRAP *dawg, INDEX node, int len,
- char *word, char *original, char *target,
- rec *ans, int *nextfreep)
- #else
- walk_rest_dawg(dawg, node, len, word, original, target, ans, nextfreep)
- NODE PCCRAP *dawg;
- INDEX node;
- int len;
- char *word;
- char *original;
- char *target;
- rec *ans;
- int *nextfreep;
- #endif
- #define nextfree (*nextfreep)
- {
- NODE PCCRAP *edge;
- long c;
-
- for (edge = (NODE PCCRAP *)&dawg[node]; ;edge++) {
- c = *edge; /* Avoid MS C compiler bug :-( */
- c = c >> V_LETTER;
- c = c & M_LETTER;
- word[len] = (char)c;
- if ((*edge & M_END_OF_WORD) != 0) {
- word[len+1] = 0;
- if (strcmp(soundex(word), target) == 0) {
- ans[nextfree].possible = (char *)Malloc(len+1, 1);
- if (ans[nextfree].possible == NULL) {
- fprintf(stderr,"I\'ve no idea - it could be anything...\n");
- exit(0);
- }
- strcpy(ans[nextfree].possible, word);
- ans[nextfree].score = simil(word, original);
- if (nextfree < 511) nextfree++;
- }
- }
- c = *edge & M_NODE_POINTER;
- if (c != 0) {
- walk_rest_dawg(dawg, c, len+1, word, original, target, ans, nextfreep);
- }
- if ((*edge & M_END_OF_NODE) != 0) break; /* End of node */
- }
- #undef nextfree
- }
-
-
- static void
- #ifdef PROTOTYPES
- walk_dawg(
- NODE PCCRAP *dawg, INDEX node, int len,
- char *original, char *target,
- char targets, char *word,
- rec *ans, int *nextfreep)
- #else
- walk_dawg(dawg, node, len, original, target, targets, word, ans, nextfreep)
- NODE PCCRAP *dawg;
- INDEX node;
- int len;
- char *original;
- char *target;
- char targets;
- char *word;
- rec *ans;
- int *nextfreep;
- #endif
- #define nextfree (*nextfreep)
- {
- NODE PCCRAP *edge;
- long c;
-
- edge = (NODE PCCRAP *)&dawg[node];
- /* Only search trie starting with initial letter */
- for (;;) {
- c = *edge; /* Avoid MS C compiler bug :-( */
- c = c >> V_LETTER;
- c = c & M_LETTER;
- word[len] = (char)c;
- if (c == targets) {
- c = *edge & M_NODE_POINTER;
- walk_rest_dawg(dawg, c, len+1, word, original, target, ans, nextfreep);
- return;
- }
- edge++;
- }
- #undef nextfree
- }
-
- static int
- #ifdef PROTOTYPES
- reccmp(const void *a, const void *b)
- #else
- reccmp(a, b)
- void *a;
- void *b;
- #endif
- {
- return(((rec *)b)->score - ((rec *)a)->score);
- }
-
-
- int
- #ifdef PROTOYPES
- dawg_correct(NODE PCCRAP *dawg, char *original)
- #else
- dawg_correct(dawg, original)
- NODE PCCRAP *dawg;
- char *original;
- #endif
- {
- char word[81];
- char target[256];
- static rec ans[256];
- /* static because brain-dead MSC can't hack a big stack :-( */
- int targets;
- int nextfree = 0;
- int i, prev=0;
-
- targets = original[0];
- strcpy(target, soundex(original));
- walk_dawg(
- dawg, (INDEX)ROOT_NODE, 0, original, target, targets, word, ans, &nextfree);
- if (nextfree == 0) return(FALSE);
- qsort(ans, (size_t)nextfree, sizeof(rec), /*&*/reccmp);
- for (i = 0; i < nextfree; i++) {
- if (((prev*100) / (ans[i].score)) > 120) break;
- fprintf(stdout, "%s (%d)\n", ans[i].possible, ans[i].score);
- if (ans[i].score >= 100) break;
- if (i == 5) break;
- prev = ans[i].score;
- }
- return(TRUE);
- }
- SHAR_EOF
- cat - << \SHAR_EOF > dyntrie.c
- /* The #define is to repair mangling by BITNET mailers */
- #define NOT ~
- /* This should be Tilde (twiddle) -- C unary Not */
- /*
-
- File: dyntrie.c
- Author: Graham Toal
- Purpose: Check a word using DAWG or TRIE.
- Functions exported: trie_new, trie_add, trie_dump
- Internal functions: insert_simple recurse_add
-
- Description:
-
- Builds DAWG-compatible trie in store, incrementally; words do not
- have to be presented in order. (Note it is NOT a 'packed-trie';
- in our scheme it is closer to a dawg - but without the tail-compression
- precomputed dawgs have).
-
- Any tries built using this should be used by check_dawg, print_dawg etc.
-
- Has rather nice property of only claiming enough memory for the
- job; dynamically moves the data structure when it fills!
-
- */
-
- /* To avoid global state, I'm putting these useful bits of info
- in the two words before the dawg itself; I *HOPE* that all systems
- allow negative indexing off arrays; I'm not at all sure they do
- though. However since I moved the base of the dict up by adding
- 2 to it, I *should* be able to get the old address by by subtracting
- 2 from it, so I'm using the first pair of macros below, not the
- more intuitive second pair.
- I should really do this for ordinary dawgs & tries too. Indeed
- I should *really* implement the dictlib.h interface, but thats a
- month or two off. Also I'ld like some feedback first.
- */
-
- #define EXTRAS 2
- #define MAX_ENTRIES(dawg) (*(dawg-2)) /* Safe way of aliasing */
- #define NEXT_FREE(dawg) (*(dawg-1))
-
-
- /* #define MAX_ENTRIES(dawg) dawg[-2] */ /* 'Obvious' alias */
- /* #define NEXT_FREE(dawg) dawg[-1] */ /* may be buggy */
-
- #define EMPTY 0
- #define INIT_MAX_ENTRIES 512
-
- /* Slop is so that we don't need to be continually checking nextfree
- as being close to max_entries */
- #define SLOP 12
-
- /* A debugging macro which rangechecks arrays -- happens when
- utils.c is included with RCHECK defined. Otherwise no overhead. */
- #define DICT(idx) dict[RANGECHECK(idx, MAX_ENTRIES(dict))]
-
-
- /*
- This quick hack job has special-case code in each function
- for the root node-set. Its rather tacky; I could clean it
- up and make the root node like all other nodes, but why
- bother; this is only test code anyway...
- */
-
-
-
- static INDEX
- #ifdef PROTOTYPES
- insert_simple(NODE PCCRAP **dictp, char *word)
- #else
- insert_simple(dictp, word)
- NODE PCCRAP **dictp;
- char *word;
- #endif
- #define dict (*dictp)
- {
- NODE bugfix;
- INDEX i; NODE n; int c;
-
- if (NEXT_FREE(dict) >= MAX_ENTRIES(dict)-SLOP) {
- NODE PCCRAP *moved;
- moved = MALLOC((SIZE_T)MAX_ENTRIES(dict)*2+EXTRAS, sizeof(NODE));
- if (moved != NULL) {
- moved += EXTRAS;
- MAX_ENTRIES(moved) = MAX_ENTRIES(dict);
- NEXT_FREE(moved) = NEXT_FREE(dict);
- /* Should use realloc but appears to be buggy :-( */
- for (i = MAX_ENTRIES(dict); i < MAX_ENTRIES(dict)*2; i++) {
- moved[i] = EMPTY;
- }
- for (i = 0; i < MAX_ENTRIES(dict); i++) moved[i] = DICT(i);
- dict -= EXTRAS;
- FREE(dict);
- dict = moved; MAX_ENTRIES(dict) *= 2;
- } else {
- fprintf(stderr, "Can\'t add any more words again\n");
- }
- }
-
- c = (*word++)&255;
- if (c == '\0') return(TERMINAL_NODE);
- i = NEXT_FREE(dict)++;
- bugfix = insert_simple(&dict, word);
- n = ((NODE)c << (NODE)V_LETTER) | M_END_OF_NODE | bugfix;
- DICT(i) = n; if (*word == '\0') DICT(i) |= M_END_OF_WORD;
- return(i);
- #undef dict
- }
-
- static void
- #ifdef PROTOTYPES
- recurse_add(NODE PCCRAP **dictp, INDEX base, char *word)
- #else
- recurse_add(dictp, base, word)
- NODE PCCRAP **dictp;
- INDEX base;
- char *word;
- #endif
- #define dict (*dictp)
- {
- NODE bugfix;
- INDEX i = base, newbase;
- NODE unpicked[MAX_CHARS], n;
- int c, ch;
- int gap, nextslot = 0, j;
- /* First see if first char is already in trie */
- ch = (*word++)&255;
- for (;;) {
- c = (int)(((dict[i] >> V_LETTER) & M_LETTER)&255);
- if (ch == c) {
- newbase = dict[i]&M_NODE_POINTER;
- if (newbase == 0) {
- /* have abc (this is c), adding (abc)defg */
- dict[i] &= (NOT M_NODE_POINTER);
- bugfix = insert_simple(&dict, word);
- dict[i] |= bugfix;
- } else {
- if (*word == '\0') {
- dict[i] |= M_END_OF_WORD;
- } else {
- recurse_add(dictp, newbase, word);
- }
- }
- return;
- }
- if ((dict[i]&M_END_OF_NODE) != 0) break;
- i++;
- }
-
- /* unpick base */
- for (nextslot = 0; nextslot < MAX_CHARS; nextslot++) {
- unpicked[nextslot] = EMPTY;
- }
- nextslot = 0;
-
- i = base; j = 0;
- for (;;) {
- if ((j == 0) && (((dict[i] >> V_LETTER) & M_LETTER) > ch)) {
- j = 1; newbase = nextslot++;
- }
- n = dict[i]; dict[i] = EMPTY; i++;
- unpicked[nextslot++] = n & (NOT M_END_OF_NODE);
- if ((n & M_END_OF_NODE) != 0) break;
- }
- if (j == 0) newbase = nextslot++; /* Was it last alphabetically? */
- /* add this char to the node */
- if (*word == '\0') {
- unpicked[newbase] =
- ((NODE)ch << (NODE)V_LETTER) | TERMINAL_NODE | M_END_OF_WORD;
- } else {
- /* and insert the rest of the
- string with insert_simple */
- bugfix = insert_simple(&dict, word);
- unpicked[newbase] =
- ((NODE)ch << (NODE)V_LETTER) | bugfix;
- /* The insert_simple call has to come after above loop, because
- of freeing slots... */
- }
- unpicked[nextslot-1] |= M_END_OF_NODE;
-
- /* scan for suitably-sized gap */
- /* MAX_CHARS+1 should be first 'real' node base (0, 1..MAX_CHARS reserved) */
-
- /*
- The particular application this is wanted for doesn't have a large
- number of words to be added dynamically, so the BFI code below which
- finds free holes in the trie space works fine. However if some bright
- spark cares to keep a freelist *in the holes* then this program
- effectively implements a linear-time sorting algorithm.
-
- (I know it's not *really*, but think about it before jumping
- down my throat; the N^2 case is very statistically unlikely)
- */
-
- for (i = (INDEX)MAX_CHARS+1; i < MAX_ENTRIES(dict)-nextslot-SLOP; i++) {
- /*
- Even without the freelist, the sneaky hack in pack.c for
- noting earliest bunch of free holes might well make a
- significant difference... (It was a lowest_search_start
- variable and used a bit of hysteresis)
- */
- gap = TRUE;
- for (j = 0; j < nextslot; j++) {
- if (dict[i+j] != EMPTY) {
- gap = FALSE;
- break;
- }
- }
- if (gap) break;
- }
- if (!gap) {
- NODE PCCRAP *moved;
- moved = MALLOC((SIZE_T)MAX_ENTRIES(dict)*2+EXTRAS, sizeof(NODE));
- if (moved != NULL) {
- moved += EXTRAS;
- MAX_ENTRIES(moved) = MAX_ENTRIES(dict);
- NEXT_FREE(moved) = NEXT_FREE(dict);
- /* Should use realloc but appears to be buggy :-( */
- for (i = MAX_ENTRIES(dict);
- i < MAX_ENTRIES(dict)*2; i++) moved[i] = EMPTY;
- for (i = 0; i < MAX_ENTRIES(dict); i++) moved[i] = DICT(i);
- dict -= EXTRAS;
- FREE(dict);
- dict = moved; /* If your compiler has aliasing bugs they may hit here. */
- MAX_ENTRIES(dict) *= 2;
- /* scan for suitably-sized gap */
- for (i = ((MAX_ENTRIES(dict)/2)-(MAX_CHARS+1));
- i < MAX_ENTRIES(dict)-nextslot; i++) {
- gap = TRUE;
- for (j = 0; j < nextslot; j++) {
- if (dict[i+j] != EMPTY) {
- gap = FALSE;
- break;
- }
- }
- if (gap) break;
- }
- }
- if (!gap) {
- fprintf(stderr, "Can\'t add any more words\n");
- return;
- }
- }
- newbase = i;
- /* reinsert */
- for (j = 0; j < nextslot; j++) {
- dict[newbase+j] = unpicked[j];
- }
- if (newbase+nextslot-1 >= NEXT_FREE(dict)) NEXT_FREE(dict) = newbase+nextslot;
- /* update pointer to the base of this node */
- for (i = 0; i < MAX_ENTRIES(dict); i++) {
- if ((dict[i] & M_NODE_POINTER) == base) {
- dict[i] &= (NOT M_NODE_POINTER);
- dict[i] |= newbase;
- break; /* (should only be one since trie, not dawg) */
- }
- }
- #undef dict
- }
-
- void
- #ifdef PROTOTYPES
- trie_add(NODE PCCRAP **dictp, char *word)
- #else
- trie_add(dictp, word)
- NODE PCCRAP **dictp;
- char *word;
- #endif
- #define dict (*dictp)
- {
- INDEX next;
- INDEX bugfix;
- int c;
- /* Root node is pre-reserved MAX_CHARS entries */
- c = (*word++)&255; /* bugfix - remember chars can be signed :-( */
- if (DICT((INDEX)ROOT_NODE+c) == EMPTY) {
- /* I'm allowing the root node to be sparse for speed; it *should*
- be dense like any other node. May change this later */
- if (*word == '\0') {
- DICT((INDEX)ROOT_NODE+c) =
- ((NODE)c << (NODE)V_LETTER) | M_END_OF_WORD;
- } else {
- bugfix = insert_simple(&dict, word);
- DICT((INDEX)ROOT_NODE+c) =
- ((NODE)c << (NODE)V_LETTER) | bugfix;
- }
- } else {
- if (*word == '\0') {
- DICT((INDEX)ROOT_NODE+c) |= M_END_OF_WORD;
- } else {
- next = DICT((INDEX)ROOT_NODE+c) & M_NODE_POINTER;
- if (next == 0) {
- /* have 'a', adding 'abcd' */
- /* (Should really check the letter for safety...) */
- bugfix = insert_simple(&dict, word);
- DICT((INDEX)ROOT_NODE+c) = ((NODE)c << (NODE)V_LETTER)
- | bugfix | M_END_OF_WORD;
- } else {
- recurse_add(dictp, next, word);
- }
- }
- }
- #undef dict
- }
-
- int
- #ifdef PROTOTYPES
- trie_new(NODE PCCRAP **dawgp)
- #else
- trie_new(dawgp)
- NODE PCCRAP **dawgp;
- #endif
- #define dawg (*dawgp)
- {
- INDEX i;
-
- dawg = MALLOC(INIT_MAX_ENTRIES+EXTRAS, sizeof(NODE));
- dawg += EXTRAS;
-
- MAX_ENTRIES(dawg) = INIT_MAX_ENTRIES; NEXT_FREE(dawg) = MAX_CHARS+1;
-
- dawg[0] = 0;
- for (i = 1; i <= MAX_CHARS; i++) dawg[i] = 0;
- dawg[MAX_CHARS] |= M_END_OF_NODE;
- /* 0 base, MAX_CHARS chars */
-
- return(TRUE);
- #undef dawg
- }
-
- int
- #ifdef PROTOTYPES
- trie_dump(NODE PCCRAP *dawg, char *filename)
- #else
- trie_dump(dawg, filename)
- NODE PCCRAP *dawg;
- char *filename;
- #endif
- {
- INDEX cnt, max;
- FILE *dumper;
- max = NEXT_FREE(dawg)*sizeof(NODE);
- /* I *knew* the next_free variable would come in handy :-) */
- dumper = fopen(filename, WRITE_BIN);
- if (dumper == NULL) {
- fprintf(stderr, "Failed to dump trie on file \'%s\'\n", filename);
- return(FALSE);
- }
- putword(max, dumper);
- if ((cnt = putwords(dawg, max, dumper)) != max) {
- fprintf(stderr, "Failed to dump: %ld bytes written instead of %ld\n",
- cnt, max);
- return(FALSE);
- }
- fclose(dumper);
- return(TRUE);
- }
-
- #undef MAX_ENTRIES
- #undef NEXT_FREE
- #undef DICT
- #undef SLOP
- SHAR_EOF
- cat - << \SHAR_EOF > init.c
- /*
-
- File: init.c
- Authors: Richard Hooker, Graham Toal
- Purpose: Loading of dictionary for spelling checker.
- Functions exported: dawg_init, pack_init
- Internal functions: spell_init
-
- Description:
-
- The address of a dictionary (either a PACKed dict or a DAWG) is set up by
- this procedure. This gives us the option of connecting the dict in read-only
- (or copy-on-write) virtual memory. On non-VM machines, we simply allocate a
- large buffer into which the relevant part of the dictionary is loaded.
-
- The magic number is used to check the dict type, *and* the machine byte-sex.
- If the sex is reversed, even a VM system has to load the data into writable
- memory (so that it can reverse it).
-
- */
-
- /*######################### INTERNAL FUNCTIONS #########################*/
-
-
- static int
- #ifdef PROTOTYPES
- spell_init(char *filename, NODE PCCRAP **dictp,
- char *DEFAULT_DICT, long magic_number, INDEX *nedges)
- #else
- spell_init(filename, dictp, DEFAULT_DICT, magic_number, nedges)
- char *filename;
- NODE PCCRAP **dictp;
- char *DEFAULT_DICT;
- long magic_number;
- INDEX *nedges;
- #endif
- #define dict (*dictp)
- {
- FILE *fp; INDEX count;
-
- /* init_dict("") gets default */
- if (*filename == '\0') filename = DEFAULT_DICT;
-
- /* Open the file and find out the size of the dict -- which
- is stored in the first word. Later I'll change the dict format
- to have a header, and the header will have to be skipped by
- this module. */
-
- if ((fp = fopen(filename, "rb")) == NULL) {
- fprintf (stderr, "Can\'t open file \"%s\"\n", filename);
- return(FALSE);
- }
- *nedges = getword(fp);
- #ifdef DEBUG
- fprintf(stderr, "dawg contains %8lx edges\n", *nedges);
- #endif
- /* Allocate precisely enough memory for all edges + 0 at root node. */
- if ((dict = MALLOC((SIZE_T)((*nedges)+1), sizeof(NODE PCCRAP *))) == 0) {
- fprintf (stderr, "Can\'t allocate space for dictionary\n");
- return(FALSE);
- }
-
- dict[0] = 0; /* Root node set to 0; terminal nodes point to 0. */
-
- /* Load in the dictionary. Routine 'getwords' should be efficient */
- count = getwords(&dict[1], (long)(4*(*nedges)), fp);
- if (count != 4*(*nedges)) {
- fprintf(stderr,
- "Failed to read dictionary %s - wanted %ld bytes, got %ld\n",
- filename, 4*(*nedges), count);
- return(FALSE);
- }
- fclose(fp);
-
- return(TRUE);
- #undef dict
- }
-
- /*####################### EXPORTED FUNCTIONS #########################*/
-
- int
- #ifdef PROTOTYPES
- dawg_init(char *filename, NODE PCCRAP **dawgp, INDEX *nedges)
- #else
- dawg_init(filename, dawgp, nedges)
- char *filename;
- NODE PCCRAP **dawgp;
- INDEX *nedges;
- #endif
- {
- return(spell_init(filename, dawgp, DEFAULT_DAWG, DAWG_MAGIC, nedges));
- }
-
- int
- #ifdef PROTOTYPES
- pack_init(char *filename, NODE PCCRAP **packp, INDEX *nedges)
- #else
- pack_init(filename, packp, nedges)
- char *filename;
- NODE PCCRAP **packp;
- INDEX *nedges;
- #endif
- {
- return(spell_init(filename, packp, DEFAULT_PACK, PACK_MAGIC, nedges));
- }
-
- SHAR_EOF
- cat - << \SHAR_EOF > locate.c
- /*
-
- File: locate.c
- Author: Graham Toal
- Purpose: Find all words beginning with particular prefix.
- Functions exported: locate_prefix
-
- Description:
- Searches as in spell-check for prefix in dict, but doesn't
- fail if word doesn't terminate at that point. It returns
- an index into the dict which can be used with print_dawg_prefix
- to display all the words found. However it is more useful
- than that; text-analysis programs can check that a word matches
- "root*", for instance, when doing stylistic analysis etc.
-
- */
-
- INDEX
- #ifdef PROTOTYPES
- dawg_locate_prefix(NODE PCCRAP *dawg, char *word, INDEX edge)
- #else
- dawg_locate_prefix(dawg, word, edge)
- NODE PCCRAP *dawg;
- char *word;
- INDEX edge;
- #endif
- {
- for (;;) {
- if (*word == (((dawg[edge] >> V_LETTER) & M_LETTER))) {
- if (*++word == '\0') {
- return(dawg[edge]&M_NODE_POINTER);
- } else {
- if ((edge = (dawg[edge] & M_NODE_POINTER)) == ROOT_NODE) break;
- continue;
- }
- }
- if (((dawg[edge++]) & M_END_OF_NODE) != 0) break;
- }
- /* What to do when none found? -- fail-safe, or some error code...? */
- return(ROOT_NODE);
- }
- SHAR_EOF
- cat - << \SHAR_EOF > print.c
- /*
-
- File: print.c
- Author: Graham Toal
- Purpose: Print a packed trie to stderr.
- Functions exported: dawg_print, pack_print, dawg_print_prefix
- Internal functions: pack_pr dawg_pr dawg_pr_prefix
-
- Description:
- Pre-order traverse of packed TRIE or DAWG. Will be modified
- soon to take output file as parameter. Then sometime after that
- to callback with each string at it is generated. Meanwhile,
- people requiring custom versions of dawg-walking stuff might
- as well just copy this code and edit it to do what they want.
-
- The special version print_dawg_prefix takes a NODE from somewhere
- in a dict as a parameter, and prints out only those words which
- contain that node. You have to locate the node seperately with
- 'locate_prefix', and pass the prefix string in so it can be printed.
-
- */
-
- static void
- #ifdef PROTOTYPES
- dawg_pr(NODE PCCRAP *dawg, INDEX node, int len)
- #else
- dawg_pr(dawg, node, len)
- NODE PCCRAP *dawg;
- INDEX node;
- int len;
- #endif
- {
- static char word[MAX_WORD_LEN];
- NODE PCCRAP *edge;
-
- for (edge = (NODE PCCRAP *)&dawg[node]; ; edge++) {
- long c;
- c = *edge; /* Don't rewrite this - its avoiding a MSC bug */
- c = c >> V_LETTER;
- c = c & M_LETTER;
- word[len] = (char)c;
- if ((*edge & M_END_OF_WORD) != 0) {
- word[len+1] = '\0';
- fprintf(stdout, "%s\n", word);
- }
- c = *edge & M_NODE_POINTER;
- if ((*edge & M_NODE_POINTER) != 0)
- dawg_pr (dawg, c, len + 1);
- if ((*edge & M_END_OF_NODE) != 0) break; /* End of node */
- }
- }
-
- void
- #ifdef PROTOTYPES
- dawg_print(NODE PCCRAP *dawg, INDEX node)
- #else
- dawg_print(dawg, node)
- NODE PCCRAP *dawg;
- INDEX node;
- #endif
- {
- dawg_pr(dawg, node, 0);
- }
-
- static void
- #ifdef PROTOTYPES
- pack_pr(NODE PCCRAP *ptrie, INDEX i, int pos)
- #else
- pack_pr(ptrie, i, pos)
- NODE PCCRAP *ptrie;
- INDEX i;
- int pos;
- #endif
- {
- static char s[MAX_WORD_LEN+1];
- int b;
- INDEX p;
- for (b = BASE_CHAR; b < BASE_CHAR+MAX_CHARS; b++) {
- if (b != 0) {
- p = ptrie[i+b-BASE_CHAR];
- if (((p>>V_LETTER)&M_LETTER) == b) {
- s[pos] = b; s[pos+1] = '\0';
- if ((p & M_END_OF_WORD) != 0) fprintf(stderr, "%s\n", s);
- if ((p &= M_NODE_POINTER) != 0) {
- pack_pr(ptrie, p, pos+1);
- }
- }
- }
- }
- }
-
-
- void
- #ifdef PROTOTYPES
- pack_print(NODE PCCRAP *ptrie, INDEX node)
- #else
- pack_print(ptrie, node)
- NODE PCCRAP *ptrie;
- INDEX node;
- #endif
- {
- pack_pr(ptrie, node, 0);
- }
-
-
- static void
- #ifdef PROTOTYPES
- dawg_pr_prefix(NODE PCCRAP *dawg, char *prefix, INDEX node, int len)
- #else
- dawg_pr_prefix(dawg, prefix, node, len)
- NODE PCCRAP *dawg;
- char *prefix;
- INDEX node;
- int len;
- #endif
- {
- NODE PCCRAP *edge;
- static char word[MAX_WORD_LEN];
- long c;
-
- for (edge = (NODE PCCRAP *)&dawg[node]; ; edge++) {
- /* Don't 'fix' - compiler bugaround for microsoft :-( */
- c = *edge; c = c >> V_LETTER; c = c & M_LETTER;
- word[len] = (char)c;
- if ((*edge & M_END_OF_WORD) != 0) {
- word[len+1] = 0;
- fprintf(stdout, "%s%s\n", prefix, word);
- }
- c = *edge & M_NODE_POINTER;
- if (c != 0) dawg_pr_prefix(dawg, prefix, c, len + 1);
- /* End of node - check repair later - I accidentally nobbled it */
- if ((*edge & M_END_OF_NODE) != 0) break;
- }
- }
-
- void
- #ifdef PROTOTYPES
- dawg_print_prefix(NODE PCCRAP *dawg, char *prefix, INDEX node)
- #else
- dawg_print_prefix(dawg, prefix, node)
- NODE PCCRAP *dawg;
- char *prefix;
- INDEX node;
- #endif
- {
- dawg_pr_prefix(dawg, prefix, node, 0);
- }
- SHAR_EOF
- cat - << \SHAR_EOF > similcmp.c
- #ifndef similcmp_c
- #define similcmp_c 1
-
- #ifdef LIB_STRINGS
- #include <strings.h>
- #else
- #include <string.h>
- #endif
-
- /*
-
- File: similcmp.c
- Authors: John Ratcliffe, and an anonymous coder.
- Purpose: Better approximate string equality test.
- Functions exported: simil
- Internal functions: GCsubstr
-
- Description:
- See below. I'll replace this one eventually with my own
- algorithm which does sophisticated compound-grapheme analysis
- and returns a degree of phonetic similarity and a probability
- that the transformations made were valid.
-
-
- 'ghoti==fish' (Match = 90%, Prob = 1%) ;-)
- lauGH = f match 100% prob 30%
- wOmen = i match 90% prob 10%
- staTIon = sh match 100% prob 5%
-
- */
-
- /* Ratcliff/Obershelp pattern matching
- *
- * Approximate word matching: takes two words and returns a
- * similarity score based on co-occurrence of subpatterns.
- *
- * This code appeared in a letter to the editor in DDJ, 11/88.
- * The original article on the pattern matching, "Pattern Matching
- * by Gestalt" by John Ratcliff, appeared in the July 1988
- * issue (#181) but the algorithm was presented in assembly.
- *
- * The 11/88 issue also contained another verison in C which was
- * a more faithful translation of the original; it has the
- * advantage of not being recursive.
- *
- * The algorithm seems to work nicely for a variety of test cases.
- * The main drawback of the algorithm is the cost of the pairwise
- * comparisons. It is significantly more expensive than stemming,
- * soundex, and the like. Might consider using this as a second
- * phase...
- */
-
- int
- #ifdef PROTOTYPES
- GCsubstr(char *st1, char *end1, char *st2, char *end2)
- #else
- GCsubstr(st1, end1, st2, end2)
- char *st1;
- char *end1;
- char *st2;
- char *end2;
- #endif
- {
- register char *a1, *a2;
- char *b1, *s1, *b2, *s2;
- short max, i;
-
- if (end1 <= st1 || end2 <= st2) return(0);
- if (end1 == st1 + 1 && end2 == st2 + 1) return(0);
-
- max = 0; b1 = end1; b2 = end2;
-
- for (a1 = st1; a1 < b1; a1++) {
- for (a2 = st2; a2 < b2; a2++) {
- if (*a1 == *a2) {
- /* determine length of common substring */
- for (i = 1; a1[i] && (a1[i] == a2[i]); i++) /* do nothing */;
- if (i > max) {
- max = i; s1 = a1; s2 = a2;
- b1 = end1 - max; b2 = end2 - max;
- }
- }
- }
- }
- if (!max) return(0);
- max += GCsubstr(s1 + max, end1, s2 + max, end2); /* rhs */
- max += GCsubstr(st1, s1, st2, s2); /* lhs */
- return(max);
- }
-
- int
- #ifdef PROTOTYPES
- simil(char *s1, char *s2)
- #else
- simil(s1, s2)
- char *s1;
- char *s2;
- #endif
- {
- int l1 = strlen(s1), l2 = strlen(s2);
- if (strcmp(s1, s2) == 0) return(100); /* exact match end-case */
- return(200 * GCsubstr(s1, s1 + l1, s2, s2 + l2) / (l1 + l2));
- }
- #endif /* similcmp_c */
- SHAR_EOF
- cat - << \SHAR_EOF > soundex.c
- #ifndef soundex_c
- #define soundex_c 1
-
- #ifdef LIB_STRINGS
- #include <strings.h>
- #else
- #include <string.h>
- #endif
-
-
- /* isalpha & toupper; define your own if you don't have ctype.h */
- #include <ctype.h>
-
- /*
-
- File: soundex.c
- Authors: Jonathan Leffler
- Purpose: Approximate string equality test.
- Functions exported: soundex
- Internal functions: nsoundex
-
- Description:
-
- There are umpteen soundexes around. At least this one is commented...
- (Actually I'd prefer one which understood ph/f and Mac/Mc as special
- cases; short of a proper phonetic alg such as I'm currently developing)
- See below for description:
-
- */
-
- /*
- ** SOUNDEX CODING
- **
- ** Rules:
- ** 1. Retain the first letter; ignore non-alphabetic characters.
- ** 2. Replace second and subsequent characters by a group code.
- ** Group Letters
- ** 1 BFPV
- ** 2 CGJKSXZ
- ** 3 DT
- ** 4 L
- ** 5 MN
- ** 6 R
- ** 3. Do not repeat digits
- ** 4. Truncate or ser-pad to 4-character result.
- **
- ** Originally formatted with tabstops set at 4 spaces -- you were warned!
- **
- ** Code by: Jonathan Leffler (john@sphinx.co.uk)
- ** This code is shareware -- I wrote it; you can have it for free
- ** if you supply it to anyone else who wants it for free.
- **
- ** BUGS: Assumes 7-ASCII; fails on ISO Latin1
- */
-
- static char lookup[] = {
- '0', /* A */ '1', /* B */ '2', /* C */
- '3', /* D */ '0', /* E */ '1', /* F */
- '2', /* G */ '0', /* H */ '0', /* I */
- '2', /* J */ '2', /* K */ '4', /* L */
- '5', /* M */ '5', /* N */ '0', /* O */
- '1', /* P */ '0', /* Q */ '6', /* R */
- '2', /* S */ '3', /* T */ '0', /* U */
- '1', /* V */ '0', /* W */ '2', /* X */
- '0', /* Y */ '2' /* Z */
- };
-
- /* Soundex for arbitrary number of characters of information */
- #define MAX_SOUNDEX_LEN 10
-
- static char *
- #ifdef PROTOTYPES
- nsoundex(char *str, int n)
- #else
- nsoundex(str, n)
- char *str; /* In: String to be converted */
- int n; /* In: Number of characters in result string */
- #endif
- {
- static char buff[MAX_SOUNDEX_LEN];
- register char *s;
- register char *t;
- char c;
- char l;
-
- if (n <= 0) n = 4; /* Default */
- if (n > sizeof(buff) - 1) n = sizeof(buff) - 1;
- t = &buff[0];
-
- for (s = str; ((c = *s) != '\0') && t < &buff[n]; s++) {
- if (!isalpha(c)) continue;
- c = toupper(c);
- if (t == &buff[0]) {
- l = *t++ = c;
- continue;
- }
- c = lookup[c-'A']; /* Fails on any alpha not in 'a'..'z' */
- if (c != '0' && c != l) l = *t++ = c;
- }
- while (t < &buff[n]) *t++ = '0'; *t = '\0';
- return(&buff[0]);
- }
-
- /* Normal external interface */
- char *
- #ifdef PROTOTYPES
- soundex(char *str)
- #else
- soundex(str)
- char *str;
- #endif
- {
- return(nsoundex(str, 4)); /* vary this? - try 5 or 6? */
- }
- #endif /* soundex_c */
-
- SHAR_EOF
- cat - << \SHAR_EOF > utils.c
- /*
-
- File: utils.c
- Author: Graham Toal
- Purpose: Portability library
-
- Description:
-
- Most of what differs between operating systems is handled here.
- This module is *incredibly* hacky -- I've just been interested
- in finding the problems so far, not in making the solutions neat.
-
- The final release of this suite will concentrate on cleaning up
- this file!
- */
-
-
-
- /* PENDING: Generic load dict; meanwhile should also put efficient
- msdos file loading into getwords(). See spelldawg for best coding. */
-
- #define SIZE_T size_t
- /* MSDos redefines this for huge allocation */
-
- #ifdef SYS_RISCOS
- #define DEFAULT_DAWG "run:dict-dwg"
- #define DEFAULT_PACK "run:dict-pck"
- #else
- #ifdef SYS_EMAS
- #define DEFAULT_DAWG "dict#dwg"
- #define DEFAULT_PACK "dict#pck"
- #else
- /* Unix, MessDross, etc... */
- #define DEFAULT_DAWG "dict.dwg"
- #define DEFAULT_PACK "dict.pck"
- #endif
- #endif
-
-
- /* Undo any #define's here if they are inappropriate for some system */
-
- #define EXT_CHAR '.'
-
- #define READ_TEXT "r"
- #define WRITE_BIN "wb"
- #define READ_BIN "rb"
-
-
- /* system configuration */
-
- #ifdef KNOWS_VOID
- #define VOID void
- #else
- /* As in... void fred(VOID) */
- #define void int
- #define VOID
- #endif
-
- #ifdef SYS_MSDOS
- #ifdef COMPILER_ZORTECH
- int _stack = 0x3000;
- #define PCCRAP far
- #else
- #ifdef COMPILER_TURBOC
- #define PCCRAP far
- #else
- #define PCCRAP huge
- #endif
- #endif
- #else
- #define PCCRAP
- #endif
-
- /* Hmph... I don't really want to do realloc too. Just as well that
- one implmentation is buggy; saves me having to work out what to do :-) */
-
-
- #ifdef SYS_MSDOS
- /* Normally int... */
- #undef SIZE_T
- #define SIZE_T long
- /* (Still to test AZTEC & LATTICE) */
- /* Mallocs of > 64K (maybe even > 32K) have to come off the far/huge heap */
- #ifdef COMPILER_ZORTECH
- #include <dos.h>
- #define MALLOC(x,s) (NODE PCCRAP *)farmalloc((x)*(s)) /* Zortech */
- #define Malloc(x,s) malloc((x)*(s))
- #define FREE(x) farfree(x)
- #define Free(x) free(x)
- #else /* else not microsoft */
- #ifdef COMPILER_TURBOC
- #include <alloc.h>
- #define MALLOC(x,s) (NODE PCCRAP *)farmalloc((x)*(s)) /* Turbo */
- #define Malloc(x,s) malloc((x)*(s))
- #define FREE(x) farfree(x)
- #define Free(x) free(x)
- #else /* else not turbo */
- #include <malloc.h>
- #ifdef NEVER
- #define MALLOC(x,s) (NODE PCCRAP *)my_halloc((long)(x),(size_t)(s))
- /* Microsoft, Aztec */
- #define Malloc(x,s) my_malloc((x)*(s))
- #define FREE(x) my_hfree((void huge *)(x)) /* For some reason MICROSOFT */
- #define Free(x) my_free((void *)x) /* complains of a type mismatch */
- /* without these casts */
- #endif
- #define MALLOC(x,s) (NODE PCCRAP *)halloc((long)(x),(size_t)(s))
- /* Microsoft, Aztec */
- #define Malloc(x,s) malloc((x)*(s))
- #define FREE(x) hfree((void huge *)(x)) /* For some reason MICROSOFT */
- #define Free(x) free((void *)x) /* complains of a type mismatch */
- /* without these casts */
- #endif
- #endif
- #else /* else not SYS_MSDOS */
- #ifdef STDLIBS
- #include <stdlib.h> /* for malloc, free & exit */
- #define MALLOC(x,s) (NODE PCCRAP *)malloc(((x)*(s)))
- #define Malloc(x,s) malloc((x)*(s))
- #define FREE(x) free(x)
- #define Free(x) free(x)
- #else
- #define MALLOC(x,s) (NODE PCCRAP *)malloc(((x)*(s)))
- #define Malloc(x,s) malloc((x)*(s))
- #define FREE(x) free(x)
- #define Free(x) free(x)
- #ifndef size_t /* Doesn't work if size_t was typedef'd */
- #define size_t int /* And if an int isn't big enough we're in trouble! */
- #endif
- #ifdef PROTOTYPES
- extern void *malloc(size_t bytes);
- extern void exit(int rc);
- #else
- extern void *malloc();
- extern void exit();
- #endif
- #endif /* not stdlibs */
- #endif /* Not msdos */
-
- #ifdef SYS_RISCOS
- #undef EXT_CHAR
- #define EXT_CHAR '-'
- #endif
-
- #ifdef SYS_EMAS
- #undef EXT_CHAR
- #define EXT_CHAR '#'
- #endif
-
- #ifdef SYS_MAC
- #ifdef COMPILER_THINK
- #undef WRITE_BIN
- #undef READ_BIN
- #define WRITE_BIN "w"
- #define READ_BIN "r"
- #endif
- #endif
-
- #ifdef MEMMODELS
- #define SMALL_MEMORY 1
- #endif
-
- /*
- Error handling
-
- At the moment we'll just go for OK / NOT OK. Later we should
- fix all exit's to use a specific code symbol (eg EXIT_MALLOCFAIL)
- but this has to be done system by system. Whenever a new one is
- added, a default should be set up (as 0?)
- */
-
- /*#include <errno.h>*/ /* Only later when we want to be more precise! */
- #define EXIT_OK (0) /* Generic success */
- #define EXIT_ERROR (1) /* Generaic failure */
-
- /* For each system, replace generic errors with system-dependant ones. */
- #ifdef vms
- /*
- * VMS-specific error status codes. Approximate Ultrix equivalents.
- */
- #include <ssdef.h>
- #include <stsdef.h>
- #undef EXIT_OK
- #define EXIT_OK SS$_NORMAL /* Generic success */
- #undef EXIT_ERROR
- #define EXIT_ERROR SS$_NORMAL /* Don't want random error message! */
- #endif
-
- #define DELETE(filename)
-
- #ifdef __STDC__
- #undef DELETE
- #define DELETE(filename) remove(filename)
- #else
- #ifdef unix
- #undef DELETE
- #define DELETE(filename) unlink(filename)
- #endif
- #endif
-
- #ifdef NEVER
-
- /* these macros caught the fact that on MICROSOFT, the parameters
- being passed in were ints -- and I hadn't been given a warning
- because I had explicitly cast the to size-t. Hence why I've
- declared SIZE_T as long. This is all a bit messy. Curse you IBM PCs
- */
-
- void PCCRAP *my_halloc(long n,size_t s) {
- char PCCRAP *p;
- p = halloc(n, s);
- fprintf(stderr, "halloc(%8lx*%d) -> %8lx\n", n, s, (long)p);
- return(p);
- }
-
- void *my_malloc(size_t s) {
- char *p;
- p = malloc(s);
- fprintf(stderr, "malloc(%4x) -> %4x\n", s, (int)p);
- return(p);
- }
-
- void my_hfree(void PCCRAP *p) {
- fprintf(stderr, "hfree(%8lx)\n", (long)p);
- hfree(p);
- }
-
- void my_free(void *p) {
- fprintf(stderr, "free(%4x)\n", (int)p);
- free(p);
- }
- #endif
-
-
- #ifdef RCHECK
- #ifndef PROTOTYPES
- long toosmall(idx, max, line)
- long idx;
- long max;
- int line;
- #else
- long toosmall(long idx, long max, int line)
- #endif
- {
- if (line == 0) {
- fprintf(stderr,
- "RANGE CHECK: %ld should not be less than 0 (max %ld)\n", idx, max);
- } else {
- fprintf(stderr,
- "RANGE CHECK AT LINE %d: %ld should not be less than 0 (max %ld)\n",
- line, idx, max);
- }
- return(0L);
- }
- #ifndef PROTOTYPES
- long toobig(idx, max, line)
- long idx;
- long max;
- int line;
- #else
- long toobig(long idx, long max, int line)
- #endif
- {
- if (line == 0) {
- fprintf(stderr,
- "RANGE CHECK: %ld should not be larger than %ld\n", idx, max);
- } else {
- fprintf(stderr,
- "RANGE CHECK AT LINE %d: %ld should not be larger than %ld\n",
- line, idx, max);
- }
- return(max);
- }
-
- #ifdef KNOWS_LINE
- #define TOOSMALL(idx, max) toosmall((long)idx, (long)max, __LINE__)
- #define TOOBIG(idx, max) toobig((long)idx, (long)max, __LINE__)
- #else
- #define TOOSMALL(idx, max) toosmall((long)idx, (long)max, 0)
- #define TOOBIG(idx, max) toobig((long)idx, (long)max, 0)
- #endif
-
- #define RANGECHECK(idx,max) \
- (idx < 0 ? (TOOSMALL(idx,max)) : (idx >= max ? (TOOBIG(idx,max)) : idx))
- #else
- #define RANGECHECK(idx,max) (idx)
- #endif
-
- #ifdef PROTOTYPES
- long getwords(long PCCRAP *data, long count, FILE *fp)
- #else
- long getwords(data, count, fp)
- long PCCRAP *data;
- long count;
- FILE *fp;
- #endif
- {
- #ifdef SYS_MSDOS
- char PCCRAP *p; int c; long byte_count;
- p = (char PCCRAP *)(&data[0]);
- /* Somewhere I have a version which fread()s into a 16K near
- buffer, then copies it out; use that technique here - *MUCH*
- faster */
- for (byte_count = 0; byte_count < count; byte_count++) {
- c = fgetc(fp);
- if (c == -1 || ferror(fp)) {
- printf ("Dictionary read error - %ld wanted - %ld got\n",
- count, byte_count)/*, exit(0)*/;
- break;
- }
- *p++ = (c & 255);
- }
- return(count);
- #else
- return((long)fread(&data[0], (size_t)1, (size_t)count, fp));
- #endif
- }
-
- #ifdef PROTOTYPES
- long putwords(long PCCRAP *data, long count, FILE *fp)
- #else
- long putwords(data, count, fp)
- long PCCRAP *data;
- long count;
- FILE *fp;
- #endif
- {
- #ifdef SYS_MSDOS
- extern int _NEAR _CDECL errno;
- long i; char PCCRAP *p;
- p = (char PCCRAP *)&data[0];
- if (fp == NULL) {
- fprintf(stderr, "putwords: no file?\n");
- exit(0);
- }
- for (i = 0L; i < count; i++) {
- /* Somewhere I have a version which copies to a 16K near
- buffer, then frwrite()s it out; use that technique here - *MUCH*
- faster */
- int rc = fputc(*p++, fp);
- if (ferror(fp)) {
- fprintf(stderr, "rc = %d\n", rc);
- perror("dawg");
- fprintf (stderr, "Error writing to output file\n");
- exit(0);
- }
- }
- return(count);
- #else
- return(fwrite(&data[0], (size_t)1, (size_t)count, fp));
- #endif
- }
-
- static long PCCRAP *WTMP = NULL;
- /* A bit hacky having this single 4 bytes in heap space, but it makes
- things a little more consistent. it'll all go away eventually
- anyway... */
-
- #ifdef PROTOTYPES
- void putword(long w, FILE *fp)
- #else
- void putword(w, fp)
- long w;
- FILE *fp;
- #endif
- {
- if (WTMP == NULL) {
- WTMP = (long PCCRAP *)MALLOC(1,sizeof(long));
- }
- *WTMP = w;
- if (putwords(WTMP, (long)sizeof(long), fp) != sizeof(long)) {
- /* (using putwords avoids confusion over bytesex) */
- fprintf(stderr, "Data write error - putword\n");
- }
- }
-
- #ifdef PROTYPES
- long getword(FILE *fp)
- #else
- long getword(fp)
- FILE *fp;
- #endif
- {
- if (WTMP == NULL) {
- WTMP = (long PCCRAP *)MALLOC(1,sizeof(long));
- }
- if (getwords(WTMP, (long)sizeof(long), fp) != sizeof(long)) {
- fprintf(stderr, "Data read error - getword\n");
- }
- return(*WTMP);
- }
- SHAR_EOF
-