home *** CD-ROM | disk | FTP | other *** search
/ Liren Large Software Subsidy 7 / 07.iso / c / c072 / 1.ddi / PRG4_3.C < prev    next >
Encoding:
C/C++ Source or Header  |  1987-09-19  |  6.1 KB  |  167 lines

  1. /*Program 4_3 - Simple gramatical parser using state tables
  2.    by Stephen Davis, 1987
  3.  
  4.   Impliment a simple sentence parser using a state table approach.
  5.   Program recognizes uppercase, lowercase, comma, space, period
  6.   and newline.
  7. */
  8.  
  9. #include <stdio.h>
  10. #include <ctype.h>
  11. #define maxcases 5
  12. enum chartype {invalid = 1,  period,
  13.                space,     comma,    newline,
  14.                lowercase, uppercase};
  15. #define marker 0x5678
  16.  
  17. /*prototype definitions --*/
  18. enum chartype evaluate (char);
  19. int parse (struct state **, char);
  20. void fixup (void);
  21. int main (void);
  22.  
  23. /*grammatical state table*/
  24. struct state {
  25.                unsigned signature;
  26.                char currvalue;
  27.                struct {
  28.                        enum chartype value;
  29.                        struct state *next;
  30.                       } choice [maxcases];
  31.                struct state *error;
  32.               } *current;
  33.  
  34. struct state start = {marker, 0,         {{uppercase, NULL /*&normal*/},
  35.                                           {0,         NULL},
  36.                                           {0,         NULL},
  37.                                           {0,         NULL},
  38.                                           {0,         NULL}},
  39.                                           NULL /*&normal*/},
  40.              normal= {marker, lowercase, {{lowercase, &normal},
  41.                                           {space,     NULL /*&break1*/},
  42.                                           {comma,     NULL /*&break2*/},
  43.                                           {period,    NULL /*&end1*/},
  44.                                           {newline,   NULL /*&begline*/}},
  45.                                           &normal},
  46.              break1= {marker, space,     {{lowercase, &normal},
  47.                                           {uppercase, &normal},
  48.                                           {0,         NULL},
  49.                                           {0,         NULL},
  50.                                           {0,         NULL}},
  51.                                           &normal},
  52.              break2= {marker, comma,     {{space,     &break1},
  53.                                           {newline,   NULL /*&begline*/},
  54.                                           {0,         NULL},
  55.                                           {0,         NULL},
  56.                                           {0,         NULL}},
  57.                                           &normal},
  58.              end1 =  {marker, period,    {{space,     NULL /*&end2*/},
  59.                                           {newline,   &start},
  60.                                           {0,         NULL},
  61.                                           {0,         NULL},
  62.                                           {0,         NULL}},
  63.                                           &start},
  64.              end2 =  {marker, space,     {{space,     &start},
  65.                                           {0,         NULL},
  66.                                           {0,         NULL},
  67.                                           {0,         NULL},
  68.                                           {0,         NULL}},
  69.                                           &start},
  70.              begline={marker, newline,   {{uppercase, &normal},
  71.                                           {lowercase, &normal},
  72.                                           {0,         NULL},
  73.                                           {0,         NULL},
  74.                                           {0,         NULL}},
  75.                                           &normal};
  76.  
  77.  
  78. /*Evaluate - evaluate the current character. Return corresponding
  79.              chartype*/
  80. enum chartype evaluate (c)
  81.     char c;
  82. {
  83.     switch (c) {
  84.                 case '.':
  85.                 case '?': return period;
  86.                 case ',': return comma;
  87.                 case ' ': return space;
  88.                 case '\n':return newline;
  89.                 default:
  90.                           if (islower (c))
  91.                                return lowercase;
  92.                           else
  93.                                if (isupper (c))
  94.                                     return uppercase;
  95.                                else
  96.                                     return invalid;
  97.                }
  98. }
  99.  
  100. /*Parse - given the current state, compare the current character
  101.           against the legal choices to select the next state.
  102.           Return a 1 on no error and a 0 on error.*/
  103. int parse (stateptr, c)
  104.     struct state **stateptr;
  105.     char c;
  106. {
  107.     unsigned i;
  108.     enum chartype val;
  109.     struct state *localptr;
  110.  
  111.     localptr = *stateptr;
  112.     if (localptr -> signature != marker) {
  113.          printf ("pointer error!\n");
  114.          *stateptr = &start;
  115.     } else {
  116.          val = evaluate (c);
  117.          for (i = 0; i < maxcases; i++)
  118.               if (localptr -> choice [i].value == val) {
  119.                    *stateptr = localptr -> choice [i].next;
  120.                    return 1;
  121.               }
  122.     }
  123.     *stateptr = localptr -> error;
  124.     return 0;
  125. }
  126.  
  127. /*Fixup - forward references are not allowed in data initializations.
  128.       Fixup initializes those values which would generate error
  129.       messages in a data initialization.*/
  130. void fixup ()
  131. {
  132.     start.choice[0].next = &normal;
  133.     start.error = &normal;
  134.  
  135.     normal.choice[1].next = &break1;
  136.     normal.choice[2].next = &break2;
  137.     normal.choice[3].next = &end1;
  138.     normal.choice[4].next = &begline;
  139.  
  140.     break2.choice[1].next = &begline;
  141.  
  142.     end1.choice[0].next = &end2;
  143. }
  144.  
  145. /*Main - read in strings from STDIN.  Parse them against our state
  146.          table grammar rules.  Point out any errors detected.*/
  147. main ()
  148. {
  149.     char buffer [256], *ptr, mark;
  150.  
  151.     fixup ();                         /*complete pointer init*/
  152.  
  153.     current = &start;
  154.     while (gets (buffer)) {
  155.          printf ("%s\n", buffer);
  156.          for (ptr = buffer; *ptr; ptr++) {
  157.               if (parse (¤t, *ptr))
  158.                    mark = ' ';
  159.               else
  160.                    mark = '^';
  161.               printf ("%c", mark);
  162.          }
  163.          parse (¤t, '\n');       /*tack on a carriage return*/
  164.          printf ("\n");
  165.     }
  166. }
  167.