home *** CD-ROM | disk | FTP | other *** search
- /* unjumble.c - try to make words from a jumbled string of letters.
-
- Ron Charlton
- 9002 Balcor Circle
- Knoxville, TN 37923
-
- Phone: (615)694-0800
-
- PLINK: R*CHARLTON
- BINTNET: charltr@utkvx1
-
- 05-Jul-90
-
- This program is in the public domain. It may be used for any purpose.
-
- Algorithm: Generate all of the permutations of the letters in a string
- and test each one for its "trigram weight". (A trigram is a group of three
- letters, e.g., AAA, EWE, QQQ.) Use a table of trigram weights (frequencies)
- derived from a 38,500 word dictionary. Calculate an n-letter string's
- trigram weight as the sum of the weights of its (n - 2) trigrams. A
- trigram's weight is the log base 2 of its frequency of occurrence in
- the dictionary.
-
- Keep a table of the twenty highest weights while avoiding duplicates due to
- multiple occurrences of a single letter. Print out only the non-zero
- entries in the table. This all works because some trigrams (such as "ING")
- are more likely than others (such as "QQQ").
-
- The trigram data are stored on disk as a nybble_table and a bit_table for
- compactness, then converted to an array (nn[][][]) for execution speed.
- The string to be unjumbled is adjusted by making 'A' 1, 'B' 2, ... 'Z' 26
- so that the characters can be used as indices into nn[][][].
- This allows nn[][][] to be as small as possible (leaving '\0' available as
- the end-of-string indicator).
-
-
- history:
- v1.3 - don't include any 'word' with any individual trigram weight of 0
- v1.4 - general cleanup (& change to strupr)
- v1.5 - added exit() from main() & editorial comments & misc. tweakings
- v1.6 - changed to structure for table of words and weights
- v1.7 - changed do{}while() to for() in print_table(); READ_NYBBLE(); test_bit
- v1.8 - TEST_BIT() as macro; removed unused variables in main()
- v1.9 - rearranged main(), changed function names, modified intro(), strlwr()
- v1.10 removed strlwr() definition
- v1.11 changed temp variable to static in insert()
- v1.12 eliminated second strcpy() in permute() (faster!!)
- v1.13 allow 8-letter words, add 'nothing found'
- v1.14 adjust MAXLEN, restored strlwr() definition, conbined printf's in intro()
- */
-
- char version[] = "v1.14";
-
- #include <stdio.h>
- #include <ctype.h>
-
- #define CSI 0x9b
- #define CLS 0x0c
-
- #define FALSE 0
- #define TRUE (!FALSE)
-
- #define MAPVALUE ('a'-1) /* 'A' for uppercase, 'a' for lowercase */
-
- #define MAXLEN 10 /* max. length of string to unjumble */
- #define TABLESIZE 20 /* size of table of guesses */
-
- /* read a nybble from a table of nybbles (32-bit longs in table) */
- #define READ_NYBBLE( nyb_number ) \
- ( (nybble_table [ nyb_number >> 3 ] >> ((nyb_number & 7L) << 2)) & 0xFL )
-
- /* test bit n in table of 32-bit ints */
- #define TEST_BIT( n ) \
- ( bit_table [ n >> 5 ] & ( 1L << (n & 0x1FL) ) ? 1 : 0 )
-
- /* the follow two lines are associated with tables.c */
- extern int bit_table_size, nybble_table_size;
- extern unsigned long bit_table[], nybble_table[];
-
- /* structure array to hold candidate words and their weights */
- struct {
- char word [ MAXLEN + 1 ];
- int weight;
- } table [ TABLESIZE ];
-
- unsigned char nn[27][27][27]; /* array of probabilities of trigrams */
- int min_weight; /* used in insert() for minimum weight */
- int min_weight_ptr; /* used in insert() to point at minimum */
-
- char *strlwr();
-
-
- /*------------------------------------------------------------------------*/
-
- main()
- {
- char buf [ 255+1 ];
- int length;
-
- intro();
-
- get_weights(); /* read weights into nn[][][] */
-
- for (;;)
- {
- init_table();
- printf ( "\tEnter 3 to 8 letters ( <RETURN> to quit ): ");
-
- if ( ! gets ( buf ) )
- break;
-
- length = strlen ( buf );
-
- if ( length == 0 )
- break;
-
- else if ( !all_letters ( buf ) )
- printf ( "\tYou may only use letters (a-z; A-Z).\n\n" );
-
- else if ( length > 8 )
- printf ( "\tToo many letters.\n\n" );
-
- else if ( length < 3 )
- printf ( "\tToo few letters.\n\n" );
-
- else
- {
- /* do the unjumbling */
- strlwr ( buf );
- map ( buf ); /* convert to array indices */
- permute ( buf, 0, length ); /* do all permutations */
- print_table(); /* display the results */
- }
- }
- exit ( 0 );
- }
-
- /*------------------------------------------------------------------------*/
-
- intro()
- {
- #ifdef MCH_AMIGA
- printf ( "%c", CLS ); /* clear screen */
- printf ( "%c0;33;40m", CSI ); /* color 3,0 */
- #endif
- printf ( "\n\t\t\t Unjumble %s\n\n"
- "\t\t\t by\n\n"
- "\t\t\t Ron Charlton\n"
- "\t\t\t 9002 Balcor Circle\n"
- "\t\t\t Knoxville, TN 37923\n"
- "\t\t\t Plink: R*CHARLTON\n"
- "\t\t\t BITNET: charltr@utkvx1\n"
- "\t\t Internet: charltr@utkvx1.utk.edu\n\n", version );
- #ifdef MCH_AMIGA
- printf ( "%c0;31;40m", CSI ); /* color 1,0 */
- #endif
-
- printf ( "\tUnjumble tries to create words out of a group of letters\n"
- "\tthat you enter. It will print out up to twenty guesses in\n"
- "\torder with each guess' weight before it. Three to 8 letters at\n"
- "\ta time are accepted. Eight letters requires several seconds of \n"
- "\tcalculation. More letters are harder to unjumble.\n\n" );
- }
- /*------------------------------------------------------------------------*/
-
- /* test for all letters in a string */
- all_letters ( str )
- char *str;
- {
- while ( isalpha ( *str++ ) )
- ;
- return ( ! *--str ); /* pointing at '\0' if all alpha */
- }
-
- /*------------------------------------------------------------------------*/
-
- /* convert letters to array indices (A-->1, B-->2 ... Z-->26) */
- map ( str )
- char *str;
- {
- while ( * str )
- * str++ -= MAPVALUE;
- }
-
- /*------------------------------------------------------------------------*/
-
- /* find sum of weights of trigrams in a string. */
- weigh ( str )
- char *str;
- {
- register int weight = 0, len, pos, triweight;
- if ( (len = strlen ( str ) ) >= 3 )
- for ( pos = 0; pos <= len - 3; pos++)
- {
- triweight = nn [ str [pos] ] [ str [pos+1] ] [ str [pos+2] ];
- if ( triweight > 0 )
- weight += triweight;
- else
- {
- /* eliminate those with any trigram of 0 weight */
- weight = 0;
- break;
- }
- }
- return ( weight );
- }
-
- /*------------------------------------------------------------------------*/
-
- /* find all permutations of a string */
- permute ( a, k, n )
- char *a;
- int k, n;
- {
- int i;
- char b [ MAXLEN ];
- static char temp;
- if (k == n)
- insert ( a ); /* insert into table if weighty */
- else
- {
- strcpy(b, a);
- for (i = k; i < n; i++)
- {
- temp = b [ k ];
- b [ k ] = b [ i ];
- b [ i ] = temp;
- permute ( b, k+1, n );
- }
- }
- }
-
- /*------------------------------------------------------------------------*/
-
- /* convert array indices to alphabetic 1-->A, 2-->B ... 26-->Z */
- unmap ( str )
- char *str;
- {
- while ( * str )
- *str++ += MAPVALUE;
- }
-
- /*------------------------------------------------------------------------*/
-
- /* initialize the guess table to empty */
- init_table()
- {
- int i;
- min_weight = 0;
- min_weight_ptr = 0;
- for ( i = 0; i < TABLESIZE; i++)
- {
- table[i].weight = 0;
- table[i].word[0] = '\0';
- }
- }
-
- /*------------------------------------------------------------------------*/
-
- /* if this is a weighty word, insert it in the table */
- insert ( str )
- char *str;
- {
- register int weight, i;
-
- weight = weigh ( str );
- if ( weight > min_weight ) /* does this word weigh more than table min.? */
- {
- for ( i = 0; i < TABLESIZE; i++ ) /* avoid duplicates (BIGgER BIgGER) */
- if ( ! strcmp ( table[i].word, str ) )
- return; /* it's already in the table */
-
- /* put this one in table in place of old minimum */
- table [ min_weight_ptr ].weight = weight;
- strcpy ( table [ min_weight_ptr ].word, str );
-
- /* search for new minimum in the table */
- min_weight_ptr = 0;
- min_weight = table [ 0 ].weight;
- for ( i = 1; i < TABLESIZE; i++ )
- if ( table [ i ].weight < min_weight )
- {
- min_weight_ptr = i;
- min_weight = table [ i ].weight;
- }
- }
- }
-
- /*------------------------------------------------------------------------*/
-
- /* print the non-zero table entries in descending order. */
- /* this is a bogus, but adequate, sort */
- print_table()
- {
- int max_entry, i, first_pass = TRUE;
-
- for(;;)
- {
- max_entry = 0; /* point at maximum weight */
-
- for ( i = 1; i < TABLESIZE; i++ )
- if ( table [ i ].weight > table [ max_entry ].weight )
- max_entry = i;
-
- if ( first_pass && table [ max_entry ].weight == 0 )
- {
- printf ( "\t\tNothing found.\n" );
- break;
- }
-
- if ( table [ max_entry ].weight )
- {
- unmap ( table [ max_entry ].word );
- printf("\t\t%5d %s\n", table [ max_entry ].weight,
- table [ max_entry ].word );
- table [ max_entry ].weight = 0; /* 'delete' from table */
- }
- else
- break;
-
- first_pass = FALSE;
- }
- }
-
- /*------------------------------------------------------------------------*/
-
- /* get trigram weights from nybble_table and bit_table into nn[][][] */
- get_weights()
- {
- register int i, j, k, m = 0, ptr = 0;
- for ( i = 1; i <= 26; i++ )
- for ( j = 1; j <= 26; j++ )
- for ( k = 1; k <= 26; k++ )
- {
- /* beware of ++x side effects */
- if ( TEST_BIT ( m ) )
- {
- nn [ i ] [ j ] [ k ] = READ_NYBBLE ( ptr );
- ++ptr;
- }
- ++m;
- }
- }
-
- /*------------------------------------------------------------------------*/
- /*
- * Convert a string to lower case in place and return a pointer
- * to the resulting string.
- */
-
- char *
- strlwr ( char *str )
- {
- register char *s = str;
- while ( *s )
- {
- if (*s >= 'A' && *s <= 'Z')
- *s = *s - 'A' + 'a';
- ++s;
- }
-
- return ( str );
- }
-
- /*-------------------------- END OF FILE ---------------------------------*/
-