home *** CD-ROM | disk | FTP | other *** search
- From: gtoal@tharr.UUCP (Graham Toal)
- Newsgroups: alt.sources
- Subject: Spelling utilities in C (Part 3 of 3)
- Message-ID: <809@tharr.UUCP>
- Date: 28 Jun 90 07:09:57 GMT
-
- #!/bin/sh-----cut here-----cut here-----cut here-----cut here-----
- # shar: Shell Archiver
- # Run the following text with /bin/sh to create:
- # dawg.c # dwgcheck.c # pack.c # pckcheck.c # pdawg.c # ppack.c # proot.c # sample.c # tell.c
- cat - << \SHAR_EOF > dawg.c
- /* The first three #define's are to repair mangling by BITNET mailers */
- #define EOR ^
- /* This should be Caret (hat, up arrow) -- C Ex-or */
- #define MOD %
- /* This should be Percent -- C Modulus */
- #define NOT ~
- /* This should be Tilde (twiddle) -- C unary Not */
- #include "copyr.h"
- /*****************************************************************************/
- /* */
- /* FILE : DAWG.C */
- /* */
- /* Convert an alphabetically-ordered dictionary file in text format */
- /* (one word per line) into a Directed Acyclic Word Graph. The command */
- /* syntax is */
- /* */
- /* DAWG <text file (inc .ext)> <output file (no ext)> */
- /* */
- /* The first 4 bytes of the output file form a 24-bit number containing */
- /* the number of edges in the dawg. The rest of the file contains the */
- /* dawg itself (see "The World's Fastest Scrabble Program" by Appel */
- /* and Jacobson for a description of a dawg). */
- /* */
- /*****************************************************************************/
-
- #include "grope.h"
- #ifndef grope_h
- /* If you don't have grope.h, create an empty one.
- These will do for a basic system: */
- #undef KNOWS_VOID
- #undef STDLIBS
- #undef PROTOTYPES
- #define SYS_ANY 1
- #define COMPILER_ANY 1
- #define SMALL_MEMORY 1
- /* To be defined if you have to generate
- the data structure in bits. This will
- certainly be true for any non-trivial
- dictionary on MSDOS, or most home
- micros with 1Mb Ram or under. */
- #endif
- /* Portability notes:
-
- 0) KISS! Keep It Simple, Stupid!
-
- 1) No typedef's
- 2) No bitfields
- 3) No structs
- 4) No #if defined()
- 5) No complex #if's
- 6) No procedure variables
- 7) Don't trust tolower() as some libs don't range check
- 8) Stay character-set independent (EBCDIC should work?)
- 9) Assume sizeof(long) >= 32 bits, but sizeof(int) just >= 16
- 10) Note use of ABS in preference to unsigned longs.
- 11) Assume 8-bit char set
- 12) Don't use k&r reserved words for variables, even if not
- in ANSI. Such as 'entry'.
- 13) Unix people; no sys/ or machine/ files please unless under a
- suitable #ifdef, and generic code supplied for everyone else...
- 14) this is byte-sex independant at the moment. KEEP IT THAT WAY!
- Don't assume dawgs are stored in a fixed form, such as so-called
- 'net-order'.
- 15) Since C doesn't range-check arrays, we'll do it. If possible,
- leave the running systems with range checking on if you can
- afford it!
- 16) No nested include files.
- 17) Don't pull in any include files twice. (*Kills* the Atari!)
- 18) Don't use 'register' -- trust the compiler; the compiler is your friend.
- */
- /*#define RCHECK 1*/ /* We want range-checking of array accesses */
-
- #include <stdio.h>
-
- #ifdef LIB_STRINGS
- #include <strings.h> /* Some unixes, Mac? */
- #else
- #include <string.h>
- #endif
-
- #ifdef SYS_MAC
- /* To compile with THINK C 4.0, place the files dawg.h grope.h,
- dawg.c and utils.c in a folder. Create a project that contains
- dawg.c and the libraries unix and ANSI.
- */
- #include <unix.h>
- #include <stdlib.h>
- #include <console.h>
- /* Assume a 1Mb mac for the moment. Someday I'll add dynamic RAM sizing */
- #define SMALL_MEMORY 1
- #endif
-
- #include "dawg.h" /* main system constants */
-
- #include "utils.c" /* utils.c pulls in header file for malloc/free & exit */
-
- #define ABS(x) ((x) < 0 ? -(x) : (x))
-
- /**
- * The following constant, HASH_TABLE_SIZE, can be changed according to
- * dictionary size. The two values shown will be fine for both large
- * and small systems. It MUST be prime.
- **/
-
- #ifdef SMALL_MEMORY
- #define HASH_TABLE_SIZE 30011
-
- #ifdef SYS_MAC
- /* boy, you guys must be *really* tight on space...
- are you sure you can handle a reasonable size of dictionary with
- such a small table? Bump this back up as soon as you get everything
- else working...
- (I was given this info by a Mac site while they were first porting
- this stuff; maybe now it works on macs we can put the buffer size
- back up to something reasonable)
- */
- #undef HASH_TABLE_SIZE
- #define HASH_TABLE_SIZE 17389
- #endif
-
- #else
- /* pick one about 20% larger than needed */
- #define HASH_TABLE_SIZE 240007
-
- /* Suitable prime numbers if you want to tune it:
- 30011
- 150001 <-- probably a good compromise. OK for dicts to about 1Mb text
- 200003
- 220009
- 240007
-
- If you try any others, for goodness sake CHECK THAT THEY ARE PRIME!!!
-
- */
- #endif
- #define MAX_EDGES (HASH_TABLE_SIZE-1)
-
- static FILE *fpin, *fpout; /* Input/output files */
- static char current_word[(MAX_LINE+1)]; /* The last word read from fpin */
- static int first_diff, save_first_diff;
- /* The position of the first letter at which */
- /* current_word differs from the previous */
- /* word read */
- static NODE PCCRAP *hash_table;
- static int first_time = TRUE;
- static int this_char = '?', lastch;
- static NODE PCCRAP *dawg;
-
- static int FILE_ENDED = FALSE; /* I'm having real problems getting
- merged dawgs to work on the PC; this is yet another filthy hack. Sorry. */
-
- static INDEX nwords, nnodes, total_edges;
-
- #ifdef SMALL_MEMORY
- #define MAX_SUBDAWG 30000 /* Big enough for largest dict,
- even on small systems */
- static NODE PCCRAP *merge;
-
- static INDEX dawgsize[256];
- static INDEX dawgstart[256];
- static INDEX dawgentry[256];
-
- static int nextslot; /* Dawgs are packed in sequentially - not in their
- 'proper' indexed position */
- #endif
- /*
- Shorthand macros for array accesses with checking
- If RCHECK isn't defined, these don't contribute any overhead. I suggest
- leaving RCHECK on except for production code.
- */
-
- #define EDGE_LIST(idx) \
- edge_list[RANGECHECK(idx, MAX_CHARS)]
- #define CURRENT_WORD(idx) \
- current_word[RANGECHECK(idx, MAX_LINE+1)]
- #define DAWG(idx) \
- dawg[RANGECHECK(idx, MAX_EDGES)]
- #define HASH_TABLE(idx) \
- hash_table[RANGECHECK(idx, HASH_TABLE_SIZE)]
-
- /*
- Forward references
- */
-
- #ifdef PROTOTYPES
- static INDEX build_node(int depth);
- static INDEX add_node(NODE *edge_list, INDEX nedges);
- static void read_next_word(VOID);
- static INDEX compute_hashcode(NODE *edge_list, INDEX nedges);
- static void report_size(VOID);
- #else
- static INDEX build_node();
- static INDEX add_node();
- static void read_next_word();
- static INDEX compute_hashcode();
- static void report_size();
- #endif
-
- #ifdef SMALL_MEMORY
-
- #ifdef PROTOTYPES
- static void merge_dawg (char *filename);
- #else
- static void merge_dawg ();
- #endif
-
- #endif
-
- /**
- * main
- *
- * Program entry point
- **/
-
- long words; /* dirty communication variable -- the multi-pass stuff
- was hacked in at the last minute. */
-
- int
- #ifdef PROTOTYPES
- main(int argc, char **argv)
- #else
- main(argc, argv)
- int argc;
- char **argv;
- #endif
- {
- INDEX i;
- char fname[128];
-
- #ifdef SYS_MAC
- argc = ccommand(&argv);
- #endif
-
- if (argc != 3) {
- fprintf(stderr,
- "usage: dawg dictfile.ext dawgfile\n");
- exit(EXIT_ERROR);
- }
-
- /**
- * Allocate the memory for the dawg
- **/
-
- if ((dawg = MALLOC(MAX_EDGES, sizeof(NODE PCCRAP *))) == NULL) {
- fprintf(stderr, "Can\'t allocate dictionary memory\n");
- #ifndef SMALL_MEMORY
- fprintf(stderr, "-- try recompiling with -DSMALL_MEMORY\n");
- #endif
- exit(EXIT_ERROR);
- }
- for (i = 0; i < (INDEX)MAX_EDGES; i++) dawg[i] = 0L;
- /**
- * Allocate the hash table.
- * Fill it with zeroes later just before use. Don't trust calloc etc.
- * - not portable enough. Anyway, in the multi-pass version we don't
- * want to continually free/claim...
- **/
-
- if ((hash_table =
- MALLOC((HASH_TABLE_SIZE+1), sizeof(NODE))) == NULL) {
- fprintf(stderr, "Can\'t allocate memory for hash table\n");
- exit(EXIT_ERROR);
- }
- /**
- * Open the input/output files
- **/
-
- fpin = fopen(argv[1], READ_TEXT);
- if (fpin == NULL) {
- fprintf(stderr, "Can\'t open text file \"%s\"\n", argv[1]);
- /* Could print out error string but it's not portable... */
- exit(EXIT_ERROR);
- }
-
- /**
- * Read the first word from the dictionary
- **/
-
- first_time = TRUE;
- nwords = 0;
- current_word[0] = 0;
- read_next_word();
- lastch = *current_word;
- /**
- * Initialise the counters, taking account of the root node (which is
- * a special case)
- **/
-
- nnodes = 1; total_edges = MAX_CHARS;
-
- /**
- * Build the dawg and report the outcome
- **/
-
- /* Now, in the dim & distant past, this code supported the concept
- of a restricted character set - ie a..z & A..Z were packed into 6 bits;
- this caused awful problems in the loop below, where we had to try to
- keep the loop-control variable and the character code in synch; nowadays
- chars are 8 bits or else, so I'm starting to tidy up the places
- where these hacks were necessary... */
-
- #ifdef SMALL_MEMORY
- for (this_char = 0; this_char < MAX_CHARS; this_char++) {
- if (FILE_ENDED) break; /* Don't waste time in this loop... */
- #endif
- /* Explicitly initialise hash table to all zeros */
- {INDEX a; for (a = 0; a <= HASH_TABLE_SIZE; a++) hash_table[a] = 0;}
- words = 0;
- (void) build_node(0);
- #ifdef SMALL_MEMORY
- #ifdef DEBUG
- fprintf(stderr,
- "Char %d done. %ld Words, %ld Nodes\n", *current_word, words, total_edges);
- #endif
- if (total_edges /* words */ == 0) continue;
- #endif
-
- /**
- * Save the dawg to file
- **/
- #ifdef SMALL_MEMORY
- sprintf(fname, "%s%c%d", argv[2], EXT_CHAR, lastch);
- #else
- sprintf(fname, "%s%cdwg", argv[2], EXT_CHAR);
- #endif
- fpout = fopen(fname, WRITE_BIN);
- if (fpout == NULL) {
- fprintf(stderr, "Can\'t open output file \"%s\"\n", fname);
- exit(EXIT_ERROR);
- }
- #ifdef DEBUG
- fprintf(stderr, "Writing to %s\n", fname);
- #endif
-
- *dawg = total_edges;
- total_edges = sizeof(NODE) * (total_edges + 1); /* Convert to byte count */
-
- {
- long cnt;
- if ((cnt = putwords(dawg, (long)total_edges, fpout)) != total_edges) {
- fprintf(stderr, "%ld bytes written instead of %ld\n.", cnt, total_edges);
- exit(EXIT_ERROR);
- }
- }
- fclose(fpout);
-
- /**
- * Read the first word from the dictionary
- **/
-
- first_diff = save_first_diff;
- first_time = FALSE;
- nwords = 0;
- /**
- * Initialise the counters, taking account of the root node (which is
- * a special case)
- **/
-
- nnodes = 1; total_edges = MAX_CHARS;
-
- lastch = *current_word;
- /**
- * Build the dawg and report the outcome
- **/
-
- #ifdef SMALL_MEMORY
- }
- #endif
- fclose(fpin);
- fprintf(stderr, "Dawg generated\n");
- #ifdef SMALL_MEMORY
- merge_dawg(argv[2]);
- #endif
- exit(EXIT_OK);
- }
-
- /**
- * BUILD_NODE
- *
- * Recursively build the next node and all its sub-nodes
- **/
-
- static INDEX
- #ifdef PROTOTYPES
- build_node(int depth)
- #else
- build_node(depth)
- int depth;
- #endif
- {
- INDEX nedges = 0;
- INDEX i;
- NODE *edge_list;
-
- edge_list = NULL;
- if (CURRENT_WORD(depth) == 0) {
- /**
- * End of word reached. If the next word isn't a continuation of the
- * current one, then we've reached the bottom of the recursion tree.
- **/
-
- read_next_word();
- if (first_diff < depth) return(0);
- }
-
- edge_list = (NODE *)malloc(MAX_CHARS*sizeof(NODE));
- /* Note this is a 'near' array */
- if (edge_list == NULL) {
- fprintf(stderr, "Stack full (depth %d)\n", depth);
- exit(EXIT_ERROR);
- }
- for (i = 0; i < MAX_CHARS; i++) EDGE_LIST(i) = 0L;
-
- /**
- * Loop through all the sub-nodes until a word is read which can't
- * be reached via this node
- **/
-
- do
- {
- /* Construct the edge. Letter.... */
- EDGE_LIST(nedges) = (NODE) (((NODE)CURRENT_WORD(depth)))
- << (NODE)V_LETTER;
- /* ....end-of-word flag.... */
- if (CURRENT_WORD(depth+1L) == 0) EDGE_LIST(nedges) |= M_END_OF_WORD;
- /* ....and node pointer. */
- EDGE_LIST(nedges) |= build_node(depth+1); nedges++;
- /* (don't ++ in a macro) */
- } while (first_diff == depth);
-
- if (first_diff > depth) {
- fprintf(stderr, "Internal error -- first_diff = %d, depth = %d\n",
- first_diff, depth);
- exit(EXIT_ERROR);
- }
-
- EDGE_LIST(nedges-1) |= M_END_OF_NODE;
- /* Flag the last edge in the node */
-
- /**
- * Add the node to the dawg
- **/
-
- if (depth) {
- NODE result = add_node(edge_list, nedges);
- free(edge_list);
- return(result);
- }
-
- /**
- * depth is zero, so the root node (as a special case) goes at the start
- **/
-
- edge_list[MAX_CHARS-1] |= M_END_OF_NODE; /* For backward searches */
- for (i = 0; i < MAX_CHARS; i++)
- {
- dawg[i+1] = edge_list[i];
- }
- free(edge_list);
- return(0);
- }
-
- /**
- * ADD_NODE
- *
- * Add a node to the dawg if it isn't already there. A hash table is
- * used to speed up the search for an identical node.
- **/
-
- static INDEX
- #ifdef PROTOTYPES
- add_node(NODE *edge_list, INDEX nedges)
- #else
- add_node(edge_list, nedges)
- NODE *edge_list;
- INDEX nedges;
- #endif
- {
- NODE hash_entry;
- INDEX inc;
- INDEX a, first_a;
- INDEX i;
-
- /**
- * Look for an identical node. A quadratic probing algorithm is used
- * to traverse the hash table.
- **/
-
- first_a = compute_hashcode(edge_list, nedges);
- first_a = ABS(first_a) MOD HASH_TABLE_SIZE;
- a = first_a;
- inc = 9;
-
- for (;;)
- {
- hash_entry = HASH_TABLE(a) & M_NODE_POINTER;
-
- if (hash_entry == 0) break; /* Node not found, so add it to the dawg */
-
- for (i = 0; i < nedges; i++)
- if (DAWG((hash_entry+i) MOD HASH_TABLE_SIZE) != EDGE_LIST(i)) break;
-
- /* On the 1.6M dictionary, this gave a rangecheck with < 0. Now fixed
- I think - it was a problem with MOD giving unexpected results. */
-
- if (i == nedges) {
- return(hash_entry); /* Node found */
- }
- /**
- * Node not found here. Look in the next spot
- **/
-
- a += inc;
- inc += 8;
- if (a >= HASH_TABLE_SIZE) a -= HASH_TABLE_SIZE;
- if (inc >= HASH_TABLE_SIZE) inc -= HASH_TABLE_SIZE;
- if (a == first_a) {
- fprintf(stderr, "Hash table full\n");
- exit(EXIT_ERROR);
- }
- }
-
- /**
- * Add the node to the dawg
- **/
-
- if (total_edges + nedges >= MAX_EDGES) {
- fprintf(stderr,
- "Error -- dictionary full - total edges = %ld\n", total_edges);
- exit(EXIT_ERROR);
- }
-
- HASH_TABLE(a) |= total_edges + 1;
- nnodes++;
- for (i = 0; i < nedges; i++) {
- DAWG((total_edges + 1 + i) MOD HASH_TABLE_SIZE) = EDGE_LIST(i);
- }
- total_edges += nedges;
- return(total_edges - nedges + 1);
- }
-
- /**
- * READ_NEXT_WORD
- *
- * Read the next word from the input file, setting first_diff accordingly
- **/
-
- static void
- #ifdef PROTOTYPES
- read_next_word(VOID)
- #else
- read_next_word()
- #endif
- {
- /* This stuff imposes the limitation of not allowing '\0' in a word;
- not yet a problem but the dawg structure itself could probably cope
- if the feature were wanted. (Maybe with a little teweaking) */
- char linebuff[(MAX_LINE+1)];
- int length;
- for (;;)
- {
- int next = 0, c;
- for (;;) {
- c = fgetc(fpin);
- if (FILE_ENDED || c == EOF || ferror(fpin) || feof(fpin)) {
- /* for some reason, we always get a blank line at the end of files */
- current_word[0] = 0;
- first_diff = -1;
- linebuff[next] = '\0';
- FILE_ENDED = TRUE;
- return;
- }
- c &= 255;
- if (next == 0 && c == '\n') continue; /* skip blank lines... */
- linebuff[next++] = c;
- if (c == '\n') break;
- }
- linebuff[next] = '\0';
-
- words++;
-
- length = strlen(linebuff);
-
- if (linebuff[length-1] == '\n') linebuff[length-1] = '\0';
- if (linebuff[length] == '\n') linebuff[length] = '\0';
-
- if (length < 2 || length > MAX_LINE-1)
- {
- fprintf(stderr, "\n%s - invalid length\n", linebuff);
- continue; /* Previously exit()ed, but now ignore so that
- test sites without my pddict can use /usr/dict/words */
- }
- break;
- }
- for (length = 0; current_word[length] == linebuff[length]; length++) {
- /* Get common part of string to check order */
- }
- if (current_word[length] > linebuff[length]) {
- fprintf(stderr, "Error -- %s (word out of sequence)\n", linebuff);
- exit(EXIT_ERROR);
- }
- first_diff = length;
-
- nwords++;
- strcpy(current_word, linebuff);
-
- if ((nwords > 1) && (!(nwords & 0x3FF))) report_size();
- #ifdef SMALL_MEMORY
- if (current_word[0] != lastch) {
- save_first_diff = first_diff;
- first_diff = -1;
- report_size();
- }
- #else
- this_char = current_word[0]; /* for diagnostics... */
- #endif
- }
- /**
- * COMPUTE_HASHCODE
- *
- * Compute the hash code for a node
- **/
-
- static INDEX
- #ifdef PROTOTYPES
- compute_hashcode(NODE *edge_list, INDEX nedges)
- #else
- compute_hashcode(edge_list, nedges)
- NODE *edge_list;
- INDEX nedges;
- #endif
- {
- INDEX i;
- INDEX res = 0L;
-
- for (i = 0; i < nedges; i++)
- res = ((res << 1) | (res >> 31)) EOR EDGE_LIST(i);
- return(res);
- }
-
- /**
- * REPORT_SIZE
- *
- * Report the current size etc
- **/
-
- static void
- #ifdef PROTOTYPES
- report_size(VOID)
- #else
- report_size()
- #endif
- {
-
- if (first_time)
- {
- fprintf(stderr, " Words Nodes Edges Bytes BPW\n");
- fprintf(stderr, " ----- ----- ----- ----- ---\n");
- first_time = FALSE;
- }
- if (*current_word) fprintf(stderr, "%c:", *current_word);
- else fprintf(stderr, "Total:");
-
- /* THE FOLLOWING IS RATHER GRATUITOUS USE OF FLOATING POINT - REMOVE
- IT AND REPLACE WITH INTEGER ARITHMETIC * 100 */
-
- /* (hey - I already did this in the copy I sent to Richard; so how
- come its missing? Oh no, not again: I've got out of synch and
- used an old copy, haven't I? :-( ) */
-
- fprintf(stderr, " %7ld %7ld %7ld %7ld %5.2f\n",
- nwords, nnodes, total_edges, sizeof(NODE PCCRAP *)*(total_edges+1),
- (float)(sizeof(NODE PCCRAP *)*(total_edges+1))/(float)nwords);
- }
-
- #ifdef SMALL_MEMORY
- static void
- #ifdef PROTOTYPES
- merge_dawg (char *filename)
- #else
- merge_dawg (filename)
- char *filename;
- #endif
- {
- FILE *fp, *outfile;
- NODE data, edge;
- INDEX nedges, nextfree, i, dentry;
- INDEX count, lastnode;
- int dictch, padding;
- char fname[128];
-
- nedges = (INDEX)MAX_SUBDAWG;
- if ((merge = MALLOC((SIZE_T)nedges, sizeof(INDEX))) == 0) {
- fprintf(stderr, "Memory allocation error -- %ld wanted\n",
- nedges*sizeof(INDEX));
- exit(EXIT_ERROR);
- }
-
- nextfree = MAX_CHARS; /* 0 is special, 1..26 are root nodes for a..z */
- nextslot = 0;
- for (dictch = 0; dictch < MAX_CHARS; dictch++) {
- /***
- * Open the file and find out the size of the dawg
- ***/
- sprintf(fname, "%s%c%d", filename, EXT_CHAR, dictch);
- if ((fp = fopen(fname, READ_BIN)) == NULL) {
- continue;
- }
- nedges = getword(fp);
- dawgstart[nextslot] = nextfree;
- dawgsize[nextslot] = nedges-MAX_CHARS;
-
- /* the first entry is (erroneously) the pointer to the chunk */
- dentry = getword(fp);
- dawgentry[nextslot] = dentry - MAX_CHARS + nextfree;
-
- nextfree += nedges - MAX_CHARS;
- nextslot++;
-
- fclose(fp);
- }
-
- /* Now output total edges, and starts[a..z] */
- /* Then set nextfree to start of each block */
- sprintf(fname, "%s%cdwg", filename, EXT_CHAR);
- outfile = fopen(fname, WRITE_BIN);
- if (outfile == NULL) {
- fprintf(stderr, "Cannot open output dawg file %s\n", fname);
- exit(EXIT_ERROR);
- }
- putword(nextfree, outfile);
- nextfree = 1; nextslot = 0; padding = 0;
- lastnode = MAX_CHARS-1;
- for (;;) {
- if (dawgentry[lastnode] != 0) break; /* Leave with 'lastnode' set */
- lastnode -= 1;
- }
- for (dictch = 0; dictch < MAX_CHARS; dictch++) {
- NODE edge = dawgentry[nextslot];
- if (edge == 0) {
- padding++;
- continue;
- }
- if (dictch == lastnode) {
- edge |= M_END_OF_NODE;
- } else {
- edge &= (NOT M_END_OF_NODE);
- }
- putword(edge, outfile);
- nextfree++; nextslot++;
- }
- nextfree += padding;
- while (padding > 0) {
- putword(0L, outfile); padding -= 1;
- }
- /* nextslot = 0; ???? This one not used? */
- for (dictch = 0; dictch < MAX_CHARS; dictch++) {
- sprintf(fname, "%s%c%d", filename, EXT_CHAR, dictch);
- if ((fp = fopen(fname, READ_BIN)) == NULL) {
- continue;
- }
-
- nedges = getword(fp);
-
- for (i = 0; i < MAX_CHARS; i++) {
- (void) getword(fp);
- /* Skip root pointers */
- }
-
- nedges -= MAX_CHARS;
- count = getwords(&merge[1], (long)(nedges*sizeof(NODE)), fp);
- if (count != nedges*sizeof(NODE)) {
- fprintf(stderr, "Dictionary read error - %ld wanted - %ld got\n",
- nedges*sizeof(NODE), count);
- exit(EXIT_ERROR);
- }
- fclose(fp);
-
- DELETE(fname); /* On floppy systems, we can almost get away with
- little more space than the final files would take! */
-
- /* relocate all nodes */
- for (i = 1; i <= nedges; i++) {
- data = merge[i];
- edge = data & M_NODE_POINTER;
- if (edge > MAX_CHARS) {
- data &= (NOT M_NODE_POINTER);
- data |= edge - MAX_CHARS - 1 + nextfree;
- merge[i] = data;
- }
- putword(merge[i], outfile);
- }
- nextfree += nedges;
- /* nextslot++; this one not used??? */
- }
- fclose(outfile);
- FREE(merge);
- }
- #endif
- SHAR_EOF
- cat - << \SHAR_EOF > dwgcheck.c
- /*
-
- File: dwgcheck.c
- Author: Graham Toal
- Purpose: check correct spelling using dict.dwg
- Creation date: 22/06/90
- Lastedit: 23/06/90 01:10:12
-
- Description:
-
- This can be expanded to be like the unix 'spell' command.
- This demo file simply checks words passed on the command line.
- note how it remembers words so that the second time it sees
- them, it will call them correct. This is how you should
- implement the 'ignore' feature of an interactive checker.
-
- This code uses the dawg data structure, which I recommend
- you stick with; however for *extremely* fast checking in
- special circumstances you can use the 'pack' versions of
- check() et al.
-
- */
-
-
- /* Manadatory header files */
- #include <stdio.h>
- #include "dawg.h"
- #include "grope.h"
-
- /*#define RCHECK*/ /* Enable for internal range checks... */
-
- #include "utils.c"
-
- /* Headers here as needed on per-program basis */
- #include <ctype.h> /* eg, for isalpha() */
-
- /* Spelling library utilities */
- #include "init.c" /* Loading dicts */
- #include "dyntrie.c" /* Creating dicts at run-time */
- #include "print.c" /* Printing dicts */
- #include "check.c" /* Checking words */
- #include "locate.c" /* Finding words by their stems */
-
- #include "soundex.c" /* Soundex algorithm for word-likeness comparison */
- #include "similcmp.c" /* Closeness-metric for correction */
- #include "correct.c" /* Hack code to attempt error correction (uses above) */
-
- /* Let's be friendly to these guys... */
- #ifdef SYS_MAC
- /* To compile with THINK C 4.0, place all the relevant .h and .c
- files in a folder. Then create a project which contains this main.c
- and the libraries unix and ANSI.
- */
- #include <unix.h>
- #include <stdlib.h>
- #include <console.h>
- #endif
-
- /* Your own header files go here, for instance:
- #include "readword.h"
- */
-
-
-
- int
- #ifdef PROTOTYPES
- main(int argc, char **argv)
- #else
- main(argc, argv)
- int argc;
- char **argv;
- #endif
- {
- NODE PCCRAP *dawg; /* precompiled dictionary (dict.dwg) */
- INDEX edges; /* size of above */
-
- NODE PCCRAP *userdict; /* run-time dictionary, added to dynamically */
-
- char *word;
- int each;
-
- #ifdef SYS_MAC
- argc = ccommand(&argv); /* Mac users have my sympathy */
- #endif
-
- /* Your program goes here... */
- if (argc == 1) {
- fprintf(stderr, "usage: %s possibly mispeled wurdz\n", argv[0]);
- exit(EXIT_ERROR);
- }
- if (!dawg_init("", &dawg, &edges)) exit(EXIT_ERROR);
-
- if (!trie_new(&userdict)) exit(EXIT_ERROR);
-
- for (each = 1; each < argc; each++) {
- word = argv[each];
- fprintf(stderr, "* %s:\n", word);
- if (dawg_check(dawg, word) || dawg_check(userdict, word)) {
- fprintf(stderr, "Correct\n");
- } else {
- fprintf(stderr, "Wrong\n");
- (void)trie_add(&userdict, word);
- }
- if (each+1 != argc) fprintf(stderr, "\n");
- }
-
- exit(EXIT_OK);
- }
- SHAR_EOF
- cat - << \SHAR_EOF > pack.c
- /* The line below is to protect against Bitnet mailer damage */
- #define MOD %
- /* This should be a Percent (for C Modulus) */
-
- /* Blech. This file is a mess. Make it the first rewrite... */
- #include "copyr.h"
- /*****************************************************************************/
- /* */
- /* FILE : PACK.C */
- /* */
- /* Re-pack a dawg/trie data structure using Franklin Liang's */
- /* algorithm for sparse matrix storage. Final structure allows */
- /* direct indexing into trie nodes, so is exceedingly fast at checking. */
- /* Unfortunately the trade off is that any algorithms which walk */
- /* the data structure and generate words will take much longer */
- /* */
- /* PACK <dawg file (inc .ext)> <pack file (inc .ext)> */
- /* */
- /*****************************************************************************/
-
- /* Pending:
-
- see what closest gap between dawgptr + freenode is, and see whether we
- can save space by overlapping input & output arrays with a window between
- them. Should get almost 50% of memory back. Also, because of hacking
- around a bug some time back, I'm using an extra array (new_loc) for
- relocation of pointers, when in fact I could (and have in the past)
- managed to relocate them in situ with not much extra overhead.
- As I said, it needs a rewrite...
-
- */
-
- /* Note: I tried one implementation of this which used bitsets to test
- whether two nodes were compatible. In fact, it wasn't sufficiently
- faster to justify the extra space it used for the arrays of flags.
- Now I check for compatibility on the fly with lots of comparisons.
- I do however have a seperate byte array to flag whether a trie
- is based at any address. There's probably a way of removing this.
- */
-
- #include "grope.h"
- #ifndef grope_h
- /* If you don't have grope.h, create an empty one.
- These will do for a basic system: */
- #undef KNOWS_VOID
- #undef STDLIBS
- #undef PROTOTYPES
- #define SYS_ANY 1
- #define COMPILER_ANY 1
- #define SMALL_MEMORY 1 /* To be defined if you have to generate
- the data structure in bits. This will
- certainly be true for any non-trivial
- dictionary on MSDOS, or most home
- micros with 1Mb Ram or under. */
- #endif
-
- #include <stdio.h>
-
- /*#define RCHECK*/ /* Turn this back on if you have any problems. */
-
- #include "dawg.h"
- #include "utils.c"
- #include "init.c"
- #include "print.c"
-
- #ifdef SYS_MAC
- /* To compile with THINK C 4.0, place the files dawg.h grope.h,
- pack.c and utils.c in a folder. Create a project that contains
- pack.c and the libraries unix and ANSI.
- */
- #include <unix.h>
- #include <stdlib.h>
- #include <console.h>
- /* Assume a 1Mb mac for the moment. Someday I'll add dynamic RAM sizing */
- #define SMALL_MEMORY
- #endif
-
- #define TRUE (0==0)
- #define FALSE (0!=0)
- #define MAX_WORD_LENGTH 32
-
- /* These two magic numbers alter a very hacky heuristic employed in
- the packing algorithm. Tweaking them judiciously ought to speed
- it up significantly (at the expense of a slightly sparser packing */
- #define DENSE_LOWER 100
- #define DENSE_UPPER 200
-
- /*###########################################################################*/
- INDEX ptrie_size = 0;
- static NODE PCCRAP *ptrie;
- #ifdef RCHECK
- /* can't use the standard range_check macro because these are slightly
- non-standard. */
- #define PTRIE(x) ptrie[RANGECHECK((x), ptrie_size)]
- #define DATA(x) (((x) >> 12) == 0xf2f1 ? toosmall((x), 0, __LINE__) : (x))
- #else
- /* so supply an explicit base case */
- #define PTRIE(x) ptrie[x]
- #define DATA(x) (x)
- #endif
- static char PCCRAP *trie_at; /* save some time by caching this info --
- previously it was a function called on each test */
- static INDEX freenode, lowest_base = 1, highest_base = -1;
- static int debug = FALSE;
-
- int
- #ifdef PROTOTYPES
- compatible(NODE *node, INDEX i) /* Can a node be overlaid here? */
- #else
- compatible(node, i)
- NODE *node;
- INDEX i;
- /* Can a node be overlaid here? */
- #endif
- {
- int c;
- for (c = 0; c < MAX_CHARS; c++) {
- if ((node[c]&M_FREE) == 0) { /* Something to slot in... */
- if ((PTRIE(i+c) & M_FREE) == 0) return(FALSE); /* but no slot for it */
- }
- }
- return(TRUE);
- }
-
- INDEX
- #ifdef PROTOTYPES
- find_slot(NODE *node)
- #else
- find_slot(node)
- NODE *node;
- #endif
- { /* Try each position until we can overlay this node */
- INDEX i;
- for (i = lowest_base; i < freenode; i++) {
- if ((!trie_at[i]) && (compatible(node, i))) {
- /* nothing here already */
- /* and this one will fit */
- return(i);
- }
- }
- fprintf(stderr, "Should never fail to find a slot!\n");
- /* because the output array is larger than the input... */
- exit(5);
- /* NOT REACHED */
- return(0);
- }
-
- static int changes;
-
- INDEX
- #ifdef PROTOTYPES
- pack(NODE *node)
- #else
- pack(node)
- NODE *node;
- #endif
- {
- int c;
- INDEX slot;
- NODE value;
-
- slot = find_slot(node); /* slot is also returned as the result,
- to be used in relocation */
- /* Place node at slot */
- changes = 0;
- for (c = 0; c < MAX_CHARS; c++) {
- value = node[c];
- if ((value&M_FREE) == 0) { /* Something to fit? */
- if (((value>>V_LETTER)&M_LETTER) == (INDEX)c+BASE_CHAR) {
- /* Just a consistency check - could safely be removed */
- PTRIE(slot+c) = DATA(value);
- changes++;
- }
- }
- }
- /* The hack heuristics below keep a N^2 operation down to linear time! */
- if ((slot == lowest_base) || (slot >= lowest_base+DENSE_LOWER)) {
- /* Heuristic is: we increase the initial search position if the
- array is packed solid up to this point or we're finding it *very*
- hard to find suitable slots */
- int found = FALSE;
- for (;;) {
- INDEX c;
- while (trie_at[lowest_base]) lowest_base++;
- for (c = lowest_base; c < lowest_base+MAX_CHARS; c++) {
- if ((PTRIE(c)&M_FREE) != 0) {found = TRUE; break;}
- }
- if (found && (slot < lowest_base+DENSE_UPPER)) break;
- /* ^ Skip hard slots to fill */
- lowest_base++; /* although nothing is based here, the next N slots
- are filled with data from the last N tries. */
-
- /* Actually I'm not sure if 256 sequential trees each with the
- same element used would actually block the next 256 slots
- without a trie_at[] flag being set for them. However it
- does no harm to believe this... I must formally check this
- someday once all the other stuff is in order. */
- }
- }
-
- if (slot > highest_base) highest_base = slot;
- /* This is info for when we try to overlap input & output
- arrays, -- with the output sitting some large number of
- bytes lower down than the input. */
- trie_at[slot] = TRUE;
- return(slot);
- }
- /*###########################################################################*/
-
- static NODE PCCRAP *dawg;
- static INDEX PCCRAP *new_loc;
- static INDEX nedges;
-
- NODE this_node[MAX_CHARS];
-
- INDEX
- #ifdef PROTOTYPES
- take_node(INDEX ptr)
- #else
- take_node(ptr)
- INDEX ptr;
- #endif
- {
- NODE data;
- INDEX edge;
- int let;
- int endsword;
- int endsnode;
- char ch;
- int changes = 0;
- for (let = 0; let < MAX_CHARS; let++) this_node[let] = M_FREE;
- for (;;) {
- data = dawg[ptr++];
- if (data == 0) {
- return(-1);
- } else {
- endsword = ((data & M_END_OF_WORD) != 0);
- endsnode = ((data & M_END_OF_NODE) != 0);
- edge = data & M_NODE_POINTER;
- let = (int) ((data >> V_LETTER) & M_LETTER);
- ch = let + BASE_CHAR;
-
- this_node[let] = edge & M_NODE_POINTER;
- if (endsword) this_node[let] |= M_END_OF_WORD;
- this_node[let] |= (NODE)ch<<V_LETTER;
-
- changes++;
- if (endsnode) break;
- }
- }
- if (changes != 0) {
- return(ptr);
- } else {
- fprintf(stderr, "Fatal error\n");
- return(0);
- }
- }
-
- NODE
- #ifdef PROTOTYPES
- redirect_node(NODE ptr)
- #else
- redirect_node(ptr)
- NODE ptr;
- #endif
- {
- NODE data;
- INDEX edge;
- int endsword;
- char ch;
- data = DATA(PTRIE(ptr));
-
- endsword = ((data & M_END_OF_WORD) != 0);
- edge = data & M_NODE_POINTER;
- ch = (int) ((data >> V_LETTER) & M_LETTER);
-
- /*edge = dawg[edge] & M_NODE_POINTER;*/
- edge = new_loc[edge];
-
- ptr = edge;
- ptr |= (NODE)ch<<V_LETTER;
- if (endsword) ptr |= M_END_OF_WORD;
-
- return(ptr);
- }
-
- int
- #ifdef PROTOTYPES
- main(int argc, char **argv)
- #else
- main(argc, argv)
- int argc;
- char **argv;
- #endif
- {
- INDEX dawgptr = 1, trieptr, newdawgptr, i, nodes = 0;
- FILE *triefile;
-
- #ifdef SYS_MAC
- argc = ccommand(&argv);
- #endif
-
- if (argc != 3) {
- fprintf(stderr, "pack: wrong parameters - should be infile outfile\n");
- exit(EXIT_ERROR);
- }
-
- if (!dawg_init((argc >= 2 ? argv[1]: ""), &dawg, &nedges)) exit(EXIT_ERROR);
- if (debug) dawg_print(dawg, (INDEX)ROOT_NODE); /* assume small test file! */
-
- freenode = ((nedges*16)/15)+(4*MAX_CHARS);
- /* Minimal slop for pathological packing? */
- ptrie_size = freenode;
- ptrie = MALLOC((SIZE_T)freenode, sizeof(NODE));
- if (ptrie == NULL) {
- fprintf(stderr, "Cannot claim %ld bytes for ptrie[]\n",
- sizeof(NODE)*freenode);
- exit(EXIT_ERROR);
- }
- new_loc = MALLOC((SIZE_T)freenode, sizeof(NODE));
- if (new_loc == NULL) {
- fprintf(stderr, "Cannot claim %ld bytes for new_loc[]\n",
- sizeof(NODE)*freenode);
- exit(EXIT_ERROR);
- }
- trie_at = (char PCCRAP *)MALLOC((SIZE_T)freenode, sizeof(char));
- if (trie_at == NULL) {
- fprintf(stderr, "Cannot claim %ld bytes for trie_at[]\n", freenode);
- exit(EXIT_ERROR);
- }
- for (i = 0; i < freenode; i++) {
- ptrie[i] = M_FREE; new_loc[i] = 0; trie_at[i] = FALSE;
- }
-
- dawg[0] = 0; /* 1st entry is never looked at, and maps to itself */
-
- dawgptr = 1;
-
- /* Relocate initial node at 1 seperately */
-
- newdawgptr = take_node(dawgptr /* 1 */);
- trieptr = pack(this_node);
- /*dawg[dawgptr] = trieptr;*/
- /* What the hell does this do??? I've forgotten!!! - oh yes, this
- was relocating in situ, without new_loc... */
- new_loc[dawgptr] = trieptr;
- dawgptr = MAX_CHARS+1;
-
- {INDEX maxdiff = 0, diff;
- for (;;) {
- if (highest_base > dawgptr) {
- diff = highest_base - dawgptr;
- if (diff > maxdiff) maxdiff = diff;
- }
- newdawgptr = take_node(dawgptr);
- if (newdawgptr == -1) {
- dawgptr++;
- continue;
- }
- trieptr = pack(this_node);
- /*dawg[dawgptr] = trieptr;*/
- new_loc[dawgptr] = trieptr;
- dawgptr = newdawgptr;
- if (dawgptr > nedges) {
- break; /* AHA!!! I was doing this in the
- wrong order & lost last entry! *AND* had '>=' for '>' */
- }
- nodes++;
- if ((nodes MOD 1000) == 0) fprintf(stderr, "%ld nodes\n", nodes);
- }
- if (debug) fprintf(stderr, "wavefront gets up to %ld\n", maxdiff);
- }
- if (debug) fprintf(stderr, "All packed - used %ld nodes\n", highest_base);
- for (trieptr = 1; trieptr < freenode; trieptr++) {
- /*
- extract edge from ptrie[trieptr],
- look it up via dawg[edge], slot it back in
- (while preserving other bits)
- */
- PTRIE(trieptr) = redirect_node(trieptr);
- }
- /* Finally, remember to bump up highest_base in case last node is only
- one edge and 25 others are missing! */
- if (debug) fprintf(stderr, "Redirected...\n");
-
- triefile = fopen((argc >= 3 ? argv[2] : DEFAULT_PACK), WRITE_BIN);
- if (triefile == NULL) {
- fprintf(stderr, "Cannot write to packed trie file\n");
- exit(1);
- }
- if (debug) fprintf(stderr, "File opened...\n");
-
- ptrie[0] = highest_base+MAX_CHARS-1;/* File size in words
- (-1 because doesn't include itself) */
- if (debug) fprintf(stderr, "Dumping... (0..%ld)\n", highest_base+MAX_CHARS-1);
- for (i = 0; i < highest_base+MAX_CHARS; i++) {/* dump packed DAG */
- NODE n;
- n = DATA(PTRIE(i));
- putword(n, triefile);
- if (ferror(triefile)) {
- fprintf(stderr, "*** TROUBLE: Writing DAG -- disk full?\n");
- fclose(triefile);
- exit(1);
- }
- }
- if (debug) fprintf(stderr, "Dumped...\n");
- fclose(triefile);
- if (debug) fprintf(stderr, "Done.\n");
- }
- SHAR_EOF
- cat - << \SHAR_EOF > pckcheck.c
- /*
-
- File: pckcheck.c
- Author: Graham Toal
- Purpose: check correct spelling using dict.pck
- Creation date: 22/06/90
- Lastedit: 23/06/90 01:15:39
-
- Description:
-
- This can be expanded to be like the unix 'spell' command.
- This demo file simply checks words passed on the command line.
- note how it remembers words so that the second time it sees
- them, it will call them correct. This is how you should
- implement the 'ignore' feature of an interactive checker.
-
- This version used the fast 'packed' data structure. This is
- approximately 3 times faster, but not all utilities support
- the packed versions. Also utilities which walk the trie are
- considerably slower (say by a factor of 100) -- so chose when
- to used 'dawg' and when to use 'pack'ed versions carefully!
-
- */
-
-
- /* Manadatory header files */
- #include <stdio.h>
- #include "dawg.h"
- #include "grope.h"
-
- /*#define RCHECK*/ /* Enable for internal range checks... */
-
- #include "utils.c"
-
- /* Headers here as needed on per-program basis */
- #include <ctype.h> /* eg, for isalpha() */
-
- /* Spelling library utilities */
- #include "init.c" /* Loading dicts */
- #include "dyntrie.c" /* Creating dicts at run-time */
- #include "print.c" /* Printing dicts */
- #include "check.c" /* Checking words */
- #include "locate.c" /* Finding words by their stems */
-
- #include "soundex.c" /* Soundex algorithm for word-likeness comparison */
- #include "similcmp.c" /* Closeness-metric for correction */
- #include "correct.c" /* Hack code to attempt error correction (uses above) */
-
- /* Let's be friendly to these guys... */
- #ifdef SYS_MAC
- /* To compile with THINK C 4.0, place all the relevant .h and .c
- files in a folder. Then create a project which contains this main.c
- and the libraries unix and ANSI.
- */
- #include <unix.h>
- #include <stdlib.h>
- #include <console.h>
- #endif
-
- /* Your own header files go here, for instance:
- #include "readword.h"
- */
-
-
-
- int
- #ifdef PROTOTYPES
- main(int argc, char **argv)
- #else
- main(argc, argv)
- int argc;
- char **argv;
- #endif
- {
- NODE PCCRAP *ptrie; /* precompiled dictionary (dict.pck) */
- INDEX edges; /* size of above */
-
- NODE PCCRAP *userdict; /* run-time dictionary, added to dynamically */
-
- char *word;
- int each;
-
- #ifdef SYS_MAC
- argc = ccommand(&argv); /* Mac users have my sympathy */
- #endif
-
- /* Your program goes here... */
- if (argc == 1) {
- fprintf(stderr, "usage: %s possibly mispeled wurdz\n", argv[0]);
- exit(EXIT_ERROR);
- }
-
- if (!pack_init("", &ptrie, &edges)) exit(EXIT_ERROR);
-
- if (!trie_new(&userdict)) exit(EXIT_ERROR);
-
- for (each = 1; each < argc; each++) {
- word = argv[each];
- fprintf(stderr, "* %s:\n", word);
- if (pack_check(ptrie, word) || dawg_check(userdict, word)) {
- fprintf(stderr, "Correct\n");
- } else {
- fprintf(stderr, "Wrong\n");
- (void)trie_add(&userdict, word);
- }
- if (each+1 != argc) fprintf(stderr, "\n");
- }
-
- exit(EXIT_OK);
- }
- SHAR_EOF
- cat - << \SHAR_EOF > pdawg.c
- /* Manadatory headers */
- /*#define RCHECK*/
- /*#define DEBUG*/
- #include <stdio.h>
- #include "dawg.h"
- #include "grope.h"
- #include "utils.c"
-
- /* Headers here as needed on per-program basis */
- #include <ctype.h> /* for isalpha() */
-
- /* Spelling library utilities */
- #include "init.c"
- #include "print.c"
- /*
- #include "check.c"
- #include "locate.c"
- #include "similcmp.c"
- #include "soundex.c"
- #include "correct.c"
- */
- /*#include "dyntrie.c"*/
-
- int
- #ifdef PROTOTYPES
- main(int argc, char **argv)
- #else
- main(argc, argv)
- int argc;
- char **argv;
- #endif
- {
- NODE PCCRAP *dawg;
- INDEX nedges;
- if (!dawg_init((argc == 1 ? "" : argv[1]), &dawg, &nedges)) exit(EXIT_ERROR);
- dawg_print(dawg, (INDEX)ROOT_NODE);
- fprintf(stderr, "Finished printing dawg\n");
- exit(0);
- }
- SHAR_EOF
- cat - << \SHAR_EOF > ppack.c
- /* Manadatory headers */
- #include <stdio.h>
- #include "dawg.h"
- #include "grope.h"
- #include "utils.c"
-
- /* Headers here as needed on per-program basis */
- #include <ctype.h> /* for isalpha() */
-
- /* Spelling library utilities */
- #include "init.c"
- #include "print.c"
- /*
- #include "check.c"
- #include "locate.c"
- #include "similcmp.c"
- #include "soundex.c"
- #include "correct.c"
- */
- /*#include "dyntrie.c"*/
-
-
-
- int
- #ifdef PROTOTYPES
- main(int argc, char **argv)
- #else
- main(argc, argv)
- int argc;
- char **argv;
- #endif
- {
- NODE PCCRAP *ptrie;
- INDEX nedges;
- if (!pack_init((argc == 1 ? "": argv[1]), &ptrie, &nedges)) exit(EXIT_ERROR);
- pack_print(ptrie, (INDEX)ROOT_NODE);
- exit(0);
- }
- SHAR_EOF
- cat - << \SHAR_EOF > proot.c
- /*
-
- File: proot.c
- Author: Graham Toal
- Purpose: find all words starting with 'root'
- Creation date: 22/06/90
- Lastedit: 22:24:32
-
- Description:
- some spelling programs remove characters from the end of
- wrongly spelt words one by one until the resulting root is
- found to be a prefix of other words in the dictionary. This
- works because the assumption is made that the word was in
- fact correct, but was an inflected form of a word in the
- dictionary which had not been stored.
- */
-
-
- /* Manadatory header files */
- #include <stdio.h>
- #include "dawg.h"
- #include "grope.h"
- #include "utils.c"
-
- /* Headers here as needed on per-program basis */
- #include <ctype.h> /* eg, for isalpha() */
-
- /* Spelling library utilities */
- #include "init.c" /* Loading dicts */
- /*#include "dyntrie.c"*/ /* Creating dicts at run-time */
- #include "print.c" /* Printing dicts */
- /*#include "check.c"*/ /* Checking words */
- #include "locate.c" /* Finding words by their stems */
-
- /*#include "soundex.c"*/ /* Soundex algorithm for word-likeness comparison */
- /*#include "similcmp.c"*//* Closeness-metric for correction */
- /*#include "correct.c"*/ /* Code to attempt error correction (uses above) */
-
- /* Let's be friendly to these guys... */
- #ifdef SYS_MAC
- /* To compile with THINK C 4.0, place all the relevant .h and .c
- files in a folder. Then create a project which contains this main.c
- and the libraries unix and ANSI.
- */
- #include <unix.h>
- #include <stdlib.h>
- #include <console.h>
- #endif
-
- /* Your own header files go here, for instance:
- #include "readword.h"
- */
-
-
-
- int
- #ifdef PROTOTYPES
- main(int argc, char **argv)
- #else
- main(argc, argv)
- int argc;
- char **argv;
- #endif
- {
- NODE PCCRAP *dawg;
- INDEX root;
- INDEX edges;
- int each;
-
- #ifdef SYS_MAC
- argc = ccommand(&argv); /* Mac users have my sympathy */
- #endif
-
- /* Your program goes here... */
- if (argc == 1) {
- fprintf(stderr, "usage: %s part\n", argv[0]);
- exit(EXIT_ERROR);
- }
- if (!dawg_init("", &dawg, &edges)) exit(EXIT_ERROR);
- for (each = 1; each < argc; each++) {
- fprintf(stderr, "* %s:\n", argv[each]);
- root = dawg_locate_prefix(dawg, argv[each], ROOT_NODE);
- if (root != ROOT_NODE) {
- dawg_print_prefix(dawg, argv[each], root);
- } else {
- fprintf(stderr, "(none found)\n");
- }
- if (each+1 != argc) fprintf(stderr, "\n");
- }
-
- exit(EXIT_OK);
- }
- SHAR_EOF
- cat - << \SHAR_EOF > sample.c
- /*
- Dummy program for writing word utilities.
- If you stick to the style shown here, your programs will
- remain portable across a vast range of machines.
- Skeleton file by Graham Toal. Remove this header when you've
- added your own...
-
- (If you want to see some worked examples, try
- tell.c dwgcheck.c pckcheck.c)
-
- */
-
- /*
-
- File:
- Author:
- Purpose:
- Creation date:
- Lastedit:
-
- Description:
-
- */
-
-
- /* Manadatory header files */
- #include <stdio.h>
- #include "dawg.h"
- #include "grope.h"
- #include "utils.c"
-
- /* Headers here as needed on per-program basis */
- #include <ctype.h> /* eg, for isalpha() */
-
- /* Spelling library utilities */
- #include "init.c" /* Loading dicts */
- #include "dyntrie.c" /* Creating dicts at run-time */
- #include "print.c" /* Printing dicts */
- #include "check.c" /* Checking words */
- #include "locate.c" /* Finding words by their stems */
-
- #include "soundex.c" /* Soundex algorithm for word-likeness comparison */
- #include "similcmp.c" /* Closeness-metric for correction */
- #include "correct.c" /* Hack code to attempt error correction (uses above) */
-
- /* Let's be friendly to these guys... */
- #ifdef SYS_MAC
- /* To compile with THINK C 4.0, place all the relevant .h and .c
- files in a folder. Then create a project which contains this main.c
- and the libraries unix and ANSI.
- */
- #include <unix.h>
- #include <stdlib.h>
- #include <console.h>
- #endif
-
- /* Your own header files go here, for instance:
- #include "readword.h"
- */
-
-
-
- int
- #ifdef PROTOTYPES
- main(int argc, char **argv)
- #else
- main(argc, argv)
- int argc;
- char **argv;
- #endif
- {
- #ifdef SYS_MAC
- argc = ccommand(&argv); /* Mac users have my sympathy */
- #endif
-
- /* Your program goes here... */
-
- exit(EXIT_OK);
- }
- SHAR_EOF
- cat - << \SHAR_EOF > tell.c
- /*
-
- File: tell.c
- Author: Graham Toal
- Purpose: offer correct spelling
- Creation date: 22/06/90
- Lastedit: 22:24:32
-
- Description:
-
- Like the unix 'spelltell' command.
-
- */
-
-
- /* Manadatory header files */
- #include <stdio.h>
- #include "dawg.h"
- #include "grope.h"
- #include "utils.c"
-
- /* Headers here as needed on per-program basis */
- #include <ctype.h> /* eg, for isalpha() */
-
- /* Spelling library utilities */
- #include "init.c" /* Loading dicts */
- #include "dyntrie.c" /* Creating dicts at run-time */
- #include "print.c" /* Printing dicts */
- #include "check.c" /* Checking words */
- #include "locate.c" /* Finding words by their stems */
-
- #include "soundex.c" /* Soundex algorithm for word-likeness comparison */
- #include "similcmp.c" /* Closeness-metric for correction */
- #include "correct.c" /* Hack code to attempt error correction (uses above) */
-
- /* Let's be friendly to these guys... */
- #ifdef SYS_MAC
- /* To compile with THINK C 4.0, place all the relevant .h and .c
- files in a folder. Then create a project which contains this main.c
- and the libraries unix and ANSI.
- */
- #include <unix.h>
- #include <stdlib.h>
- #include <console.h>
- #endif
-
- /* Your own header files go here, for instance:
- #include "readword.h"
- */
-
-
-
- int
- #ifdef PROTOTYPES
- main(int argc, char **argv)
- #else
- main(argc, argv)
- int argc;
- char **argv;
- #endif
- {
- NODE PCCRAP *dawg;
- INDEX edges;
- int each;
-
- #ifdef SYS_MAC
- argc = ccommand(&argv); /* Mac users have my sympathy */
- #endif
-
- /* Your program goes here... */
- if (argc == 1) {
- fprintf(stderr, "usage: %s mispeled wurdz\n", argv[0]);
- exit(EXIT_ERROR);
- }
- if (!dawg_init("", &dawg, &edges)) exit(EXIT_ERROR);
- for (each = 1; each < argc; each++) {
- fprintf(stderr, "* %s:\n", argv[each]);
- if (!dawg_correct(dawg, argv[each])) {
- fprintf(stderr, "(none found)\n");
- }
- if (each+1 != argc) fprintf(stderr, "\n");
- }
-
- exit(EXIT_OK);
- }
- SHAR_EOF
-