home *** CD-ROM | disk | FTP | other *** search
- /* EXPR_LEX.C
- Copyright (C) 1992 Keith L. Robertson All Rights Reserved
-
- Lexical analyzer for the EXPR expression evaluator.
- */
- #include <ctype.h>
- #include "expr_lex.h"
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
-
-
- /****************************************/
- /* Options */
- /****************************************/
-
- /* Maximum length of identifiers and other lexemes EXCEPT strings. */
- #define MAX_LEX_LENGTH 128
-
- extern SHORT ERROR_THRESHOLD = 5; /* Max # errors before it quits. */
- extern SHORT TAB_STOPS = 8;
-
-
- /****************************************/
- /* Helper Functions */
- /****************************************/
-
- /* Convert a constant number to a string. */
- #define to_string(num) #num
-
- /* Convert unsigned short to string. */
- #define MX_NLEN 6
- extern STRING ustoa (USHORT num)
- { static CHAR nstr [MX_NLEN];
- STRING p;
-
- p = nstr+MX_NLEN-1;
- *p = 0;
- do {
- *--p = num % 10 + '0';
- num = num / 10;
- } while (num != 0);
-
- return p;
- }
-
- /* Write a string to Standard Output. */
- extern VOID put_string (CSTRING str)
- { fputs (str, stdout);
- }
-
-
- /****************************************/
- /* Tables & Constant Data */
- /****************************************/
-
- /* Character classification: */
- enum {
- ALPHAB, /* A-Z, a-z, _ */
- NUMBER, /* 0-9 */
- WHT_SP, /* \t \n \v \f \r space */
- BAD_CH, /* Bad character c < 32 || 127 < c */
- SYM, /* SYM+ 0 - SYM+10, symbols recognized by lexer */
- BD_SYM= SYM+12, /* Bad symbol, not otherwise listed. */
- EOF_CH, /* End-of-file character */
- ENDCLASS
- };
-
- /* Map character to classification: */
- UCHAR char_class[256] = {
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
- BAD_CH, WHT_SP, WHT_SP, WHT_SP, WHT_SP, WHT_SP, BAD_CH, BAD_CH, /* \btnvfr?? */
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
- WHT_SP, BD_SYM, BD_SYM, BD_SYM, BD_SYM, BD_SYM, BD_SYM, BD_SYM, /* !"#$%&' */
- SYM+ 0, SYM+ 1, SYM+ 2, SYM+ 3, SYM+11, SYM+ 4, SYM+ 5, SYM+ 6, /* ()*+,-./ */
- NUMBER, NUMBER, NUMBER, NUMBER, NUMBER, NUMBER, NUMBER, NUMBER, /* 01234567 */
- NUMBER, NUMBER, BD_SYM, SYM+ 7, BD_SYM, SYM+ 8, BD_SYM, SYM+10, /* 89:;<=>? */
- BD_SYM, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, /* @ABCDEFG */
- ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, /* HIJKLMNO */
- ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, /* PQRSTUVW */
- ALPHAB, ALPHAB, ALPHAB, BD_SYM, BD_SYM, BD_SYM, SYM+ 9, ALPHAB, /* XYZ[\]^_ */
- BD_SYM, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, /* `abcdefg */
- ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, /* hijklmno */
- ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, /* pqrstuvw */
- ALPHAB, ALPHAB, ALPHAB, BD_SYM, BD_SYM, BD_SYM, BD_SYM, BAD_CH, /* xyz{|}~Del*/
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, /* 128+ */
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
- BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, EOF_CH,
- };
-
- #define is_idchar(ch) (char_class [ch] <= NUMBER)
- #define is_symchar(ch) (temp_ch=char_class [ch],\
- SYM <= temp_ch && temp_ch < EOF_CH)
-
-
- /****************************************/
- /* Static Data */
- /****************************************/
-
- static UCHAR next_char; /* Next character to process */
-
- /* String corresponding to returned token. */
- static CHAR lex_buffer [MAX_LEX_LENGTH + 1];
- static USHORT lex_len; /* Length of lexeme. */
-
- static USHORT c_line, c_posn;
-
- /* lexeme
- l c
- lexeme.line, lexeme.posn -- start of lexeme
- c_line, c_posn -- start of current character
- */
-
-
- /****************************************/
- /* Initialization Function */
- /****************************************/
-
- extern USHORT error_count = 0; /* Forward decls */
- extern UCHAR _near adv ();
-
- /* Initialize lexical analyzer. Called before any call to get_token. */
- extern VOID init_lexer ()
- { error_count = 0;
- c_posn = 0;
- c_line = 1;
- lex_len = 0;
-
- next_char = adv ();
- }
-
-
- /****************************************/
- /* Error Output Functions */
- /****************************************/
-
- static CSTRING err_type_str [] = {
- "Lexical", "Syntax", "Semantic", "Fatal",
- };
-
- /* Mtype is message type, error or warning. */
- static VOID _near log_output (USHORT etype, CSTRING desc, STRING mtype)
- { put_string ("Posn ");
- put_string (ustoa (etype==LEXICAL ? c_posn : lexeme.posn));
- put_string (": ");
- put_string (err_type_str [etype]);
- put_string (mtype);
- put_string (desc);
- put_string ("\n");
- }
-
- /* Print an error message, desc. */
- /* Locn is type of error, LEXICAL, SYNTAX, SEMANTIC, FATAL */
- extern VOID log_error (USHORT etype, CSTRING desc)
- { log_output (etype, desc, " error: ");
-
- if (etype == FATAL) { exit (1); }
-
- if (ERROR_THRESHOLD != 0) {
- ++error_count;
- if (error_count >= ERROR_THRESHOLD) {
- put_string ("Aborting...too many errors.\n");
- exit (1);
- }
- }
- }
-
- extern VOID log_warning (USHORT etype, CSTRING desc)
- { log_output (etype, desc, " warning: ");
- }
-
-
- /****************************************/
- /* Advance Input Functions */
- /****************************************/
- /* Use static '_near' functions to improve call time and program space. */
-
- static UCHAR _near get_char ()
- { UCHAR ch;
- /* Skip and ignore carriage returns */
- do { ch = getchar (); } while (ch == '\r');
-
- if (ch=='\n') { ++c_line; c_posn = 0; }
- else if (ch=='\t') { c_posn = c_posn - (c_posn % TAB_STOPS) + TAB_STOPS; }
- else { ++c_posn; }
-
- return ch;
- }
-
- /* Advance to next character and return it */
- static UCHAR _near adv ()
- { UCHAR ch;
- lex_buffer [lex_len++] = next_char;
- ch = get_char ();
- return (next_char = ch);
- }
-
-
- /****************************************/
- /* External 'get_' Functions */
- /****************************************/
-
- /* Public */
- LEX_INFO lexeme;
- USHORT last_token;
-
-
- /* Get token from input stream. */
- extern USHORT get_token ()
- { register SHORT token;
- register UCHAR ch = next_char, temp_ch;
-
- lex_len = 0;
- lexeme.name = 0;
- lexeme.line = c_line;
- lexeme.posn = c_posn;
-
- while (1) {
-
- switch (char_class [ch]) {
- /*---------------------------------*/
- case EOF_CH:
- token = EOS; goto RETURN;
-
- /*---------------------------------*/
- case ALPHAB:
- if (ch == 'e') {
- if ((ch=adv())=='x' && (ch=adv())=='i' && (ch=adv())=='t') {
- token = EOS; goto ENDKEY;
- }
- else { goto IDENT; }
- }
- else if (ch == 'h') {
- if ((ch=adv())=='e' && (ch=adv())=='l' && (ch=adv())=='p') {
- token = HELP; goto ENDKEY;
- }
- else { goto IDENT; }
- }
- else if (ch == 'k') {
- if ((ch=adv())=='i' && (ch=adv())=='l' && (ch=adv())=='l') {
- token = KILL; goto ENDKEY;
- }
- else { goto IDENT; }
- }
- else if (ch == 'l') {
- if ((ch=adv()) == 'i' && (ch=adv()) == 's') {
- if ((ch=adv()) == 'p') {
- token = LISP; goto ENDKEY;
- }
- else if (ch == 't') {
- token = LIST; goto ENDKEY;
- }
- else { goto IDENT; }
- }
- else { goto IDENT; }
- }
- else { goto IDENT; }
-
- ENDKEY:
- if (!is_idchar(ch=adv())) { goto RETURN; }
- ch = adv();
- IDENT:
- while (is_idchar(ch)) { ch = adv(); }
- token = IDENTIFIER;
- goto RETURN_LEXEME_STRING;
-
- /*---------------------------------*/
- case NUMBER:
- while ('0' <= ch && ch <= '9') { ch=adv(); }
- case SYM+ 5:
- if (ch == '.') { ch=adv(); }
- while ('0' <= ch && ch <= '9') { ch=adv(); }
-
- /* Convert to floating point value. Don't return string. */
- lex_buffer [lex_len] = '\0';
- lexeme.val.f = strtod (lex_buffer, 0);
- token = REAL;
- goto RETURN;
-
- /*---------------------------------*/
- case WHT_SP:
- do {
- ch = adv ();
- } while (char_class [ch] == WHT_SP);
- goto RESTART;
-
- /*---------------------------------*/
- case BAD_CH:
- log_error (LEXICAL, "unrecognized character(s) in input stream.");
- do {
- ch = adv ();
- } while (char_class [ch] == BAD_CH);
- goto RESTART;
-
- /*---------------------------------*/
- case SYM+ 0: /* ( */
- token = OPEN_P; goto ENDSYM;
-
- case SYM+ 1: /* ) */
- token = CLOSE_P; goto ENDSYM;
-
- case SYM+ 2: /* * */
- token = TIMES; goto ENDSYM;
-
- case SYM+ 3: /* + */
- token = PLUS; goto ENDSYM;
-
- case SYM+ 4: /* - */
- token = MINUS; goto ENDSYM;
-
- case SYM+ 6: /* / */
- token = DIVIDE; goto ENDSYM;
-
- case SYM+ 8: /* = */
- token = EQUAL; goto ENDSYM;
-
- case SYM+ 9: /* ^ */
- token = POWER; goto ENDSYM;
-
- case SYM+10: /* ? */
- token = HELP; goto ENDSYM;
-
- case SYM+11: /* , */
- token = COMMA; goto ENDSYM;
-
- case SYM+ 7: /* ; */
- token = SEMICOLON;
-
- ENDSYM:
- ch = adv();
- goto RETURN;
-
- /*---------------------------------*/
- case BD_SYM:
- log_error (LEXICAL, "unknown symbol.");
- do {
- ch = adv ();
- } while (is_symchar (ch));
- RESTART:
- next_char = ch;
- lex_len = 0;
- lexeme.line = c_line;
- lexeme.posn = c_posn;
- }
- }
-
- RETURN_LEXEME_STRING:
- if (lex_len > MAX_LEX_LENGTH) {
- /* Memory after lex_buffer has been trampled on. */
- log_error (FATAL,
- "only strings can exceed " to_string(MAX_LEX_LENGTH) " characters in length.");
- }
- lex_buffer [lex_len] = '\0';
- lexeme.name = malloc (lex_len + 1);
- strcpy (lexeme.name, lex_buffer);
-
- RETURN:
- return (lexeme.token = last_token = token);
- }
-