home *** CD-ROM | disk | FTP | other *** search
- _C PROGRAMMING COLUMN_
- by Al Stevens
-
- [LISTING ONE]
-
- /* ----------- textsrch.h ---------- */
-
- #define MXTOKS 25 /* maximum number of tokens */
-
- #define OK 0
- #define ERROR !OK
-
- struct postfix {
- char pfix; /* tokens in postfix notation */
- char *pfixop; /* operand strings */
- };
-
- extern struct postfix pftokens[];
- extern int xp_offset;
-
- /* --------- expression token values ---------- */
- #define TERM 0
- #define OPERAND 'O'
- #define AND '&'
- #define OR '|'
- #define OPEN '('
- #define CLOSE ')'
- #define NOT '!'
- #define QUOTE '"'
-
- /* ---------- textsrch prototypes ---------- */
- struct postfix *lexical_scan(char *expr);
-
-
-
-
- [LISTING TWO]
-
- /* ---------- express.c ----------- */
-
- #include <stdlib.h>
- #include <string.h>
- #include <ctype.h>
- #include "textsrch.h"
-
- /*
- * Parse a search expression into a valid postfix token stream.
- * The input expression has this form:
- * <expr> := <ident>
- * <ident> <op> <expr>
- * NOT <expr>
- * (<expr>)
- * <op> := AND
- * OR
- * <ident> := <character>
- * <character> <ident>
- * "<phrase>"
- * <phrase> := <ident>
- * <ident> <space> <phrase>
- */
-
- #define iswhite(c) (c < 33 && c > 0)
- #define isparen(c) (c == OPEN || c == CLOSE)
- #define isop(c) (c == AND || c == OR)
- #define ischaracter(c) (!isparen(c) && \
- !isop(c) && \
- c != TERM && \
- !iswhite(c) && \
- c != QUOTE)
-
- /* ----------- prototypes --------------- */
- static int getword(char *expr);
- static int gettoken(char *expr);
- static int expression(char *expr);
- static void postfix(void);
- static int isp(int tok);
- static int icp(int tok);
- static void poststack(void);
-
- int xp_offset = 0; /* offset into the expression */
- static char word[50]; /* word from the expression */
-
- static char tokens[MXTOKS+1]; /* tokens in infix notation */
- static char *operands[MXTOKS]; /* operand strings */
- static int token_ptr = 0;
-
- static char stack[MXTOKS]; /* stack for tokens */
- static char *stopr[MXTOKS]; /* operand strings */
- static int top = 0;
-
- struct postfix pftokens[MXTOKS];
- static int pf = 0;
-
- /* ------ analyze the expression for valid syntax ----------*/
- struct postfix *lexical_scan(char *expr)
- {
- token_ptr = xp_offset = 0;
- if (expression(expr) == ERROR)
- return NULL;
- else if (gettoken(expr) != TERM)
- return NULL;
- postfix();
- return pftokens;
- }
-
- /* ---------- analyze an element of the expression ---------- */
- static int expression(char *expr)
- {
- int tok;
-
- tok = gettoken(expr);
- switch (tok) {
- case OPEN:
- if (expression(expr) == ERROR)
- return ERROR;
- if ((tok = gettoken(expr)) != CLOSE)
- return ERROR;
- break;
- case NOT:
- if (expression(expr) == ERROR)
- return ERROR;
- break;
- case OPERAND:
- break;
- default:
- return ERROR;
- }
- tok = gettoken(expr);
- switch (tok) {
- case TERM:
- return OK;
- case AND:
- case OR:
- return expression(expr);
- case CLOSE:
- --token_ptr;
- --xp_offset;
- return OK;
- default:
- break;
- }
- return ERROR;
- }
-
- /* ------- extract the next token from the expression ------- */
- static int gettoken(char *expr)
- {
- char tok;
-
- operands[token_ptr] = 0;
- if ((tok = getword(expr))== OPERAND) {
- operands[token_ptr] = malloc(strlen(word) + 1);
- strcpy(operands[token_ptr], word);
- }
- tokens[token_ptr++] = tok;
- return tok;
- }
-
- /* ------- extract a word, operator, parenthesis,
- or terminator from the expression ------------- */
- static int getword(char *expr)
- {
- int w = 0;
-
- /* ------- bypass white space -------- */
- while (iswhite(expr[xp_offset]))
- xp_offset++;
- switch (expr[xp_offset]) {
- case OPEN:
- case CLOSE:
- return expr[xp_offset++];
- case TERM:
- return TERM;
- case QUOTE:
- while (expr[++xp_offset] != QUOTE) {
- if (w == 50)
- return ERROR;
- word[w++] = tolower(expr[xp_offset]);
- }
- xp_offset++;
- word[w] = '\0';
- return OPERAND;
- default:
- while (ischaracter(expr[xp_offset])) {
- if (w == 50)
- return ERROR;
- word[w++] = tolower(expr[xp_offset]);
- xp_offset++;
- }
- word[w] = '\0';
- if (strcmp(word, "and") == 0)
- return AND;
- else if (strcmp(word, "or") == 0)
- return OR;
- else if (strcmp(word, "not") == 0)
- return NOT;
- return OPERAND;
- }
- }
-
- /* - convert the expression from infix to postfix notation - */
- static void postfix(void)
- {
- char tok = '*';
-
- top = token_ptr = pf = 0;
- stack[top] = '*';
- while (tok != TERM) {
- switch (tok = tokens[token_ptr]) {
- case OPERAND:
- pftokens[pf].pfix = tok;
- pftokens[pf].pfixop = operands[token_ptr];
- pf++;
- break;
- case NOT:
- case OPEN:
- case AND:
- case OR:
- while (isp(stack[top]) >= icp(tok))
- poststack();
- stack[++top] = tok;
- break;
- case CLOSE:
- while (stack[top] != OPEN)
- poststack();
- --top;
- break;
- case TERM:
- while (top)
- poststack();
- pftokens[pf++].pfix = tok;
- break;
- }
- token_ptr++;
- }
- }
-
- static int isp(int tok)
- {
- return ((tok == OPEN) ? 0 :
- (tok == '*') ? -1 :
- (tok == NOT) ? 2 :
- 1 );
- }
-
- static int icp(int tok)
- {
- return ((tok == OPEN) ? 4 :
- (tok == NOT) ? 2 :
- 1 );
- }
-
- /* --- remove a token from the stack, put it into postfix --- */
- static void poststack(void)
- {
- pftokens[pf].pfix = stack[top];
- pftokens[pf].pfixop = stopr[top];
- --top;
- pf++;
- }
-
-
-
-
- [LISTING THREE]
-
- /* ----------- testexpr.c ------------- */
-
- /*
- * A program to test the TEXTSRCH expression analyzer
- */
-
- #include <stdio.h>
- #include <process.h>
- #include <string.h>
- #include "textsrch.h"
-
- static void disp_token(struct postfix *pf);
-
- void main(void)
- {
- char expr[80];
-
- do {
- /* ----- read the expression from the console ------ */
- printf("\nEnter the search expression:\n");
- gets(expr);
- if (*expr) {
- /* --- scan for errors and convert to postfix --- */
- if (lexical_scan(expr) == NULL) {
- while(xp_offset--)
- putchar(' ');
- putchar('^');
- printf("\nSyntax Error");
- exit(1);
- }
-
- /* ------ display the postfix tokens ------ */
- printf("\nToken");
- printf("\n--------------");
- disp_token(pftokens);
- printf("\n--------------");
- }
- } while (*expr);
- }
-
- static void disp_token(struct postfix *pf)
- {
- if (pf->pfix != TERM) {
- disp_token(pf+1);
- printf("\n %s", pf->pfix == AND ? "<and>" :
- pf->pfix == OR ? "<or>" :
- pf->pfix == NOT ? "<not>" :
- pf->pfixop);
- }
- }
-
-
-
-
-
-