home *** CD-ROM | disk | FTP | other *** search
Modula Implementation | 1991-08-10 | 19.9 KB | 689 lines |
- (*======================================================================*)
- (* Modula-2 Lexical Analyzer -- Finite State Machine *)
- (*======================================================================*)
- (* Version: 1.00 Author: Dennis Brueni *)
- (* Date: 04-29-91 Changes: original *)
- (*======================================================================*)
- (* This module contains the definitions and initialization code for *)
- (* the major tables used by the Finite State Machine of the Lexical *)
- (* Analyzer. *)
- (*======================================================================*)
-
- IMPLEMENTATION MODULE FSM;
-
- IMPORT
- SYSTEM,ASCII,FIO;
- IMPORT
- Err,HashTab;
-
-
-
-
-
-
- CONST
- FirstCh = MIN(Alphabet);
- LastCh = MAX(Alphabet);
-
- (************************************************************************)
-
- TYPE
- HashTabEntry = POINTER TO HTEntry;
-
- HTEntry = RECORD
- KW : POINTER TO StringsType;
- KWType : Lexicals;
- END;
-
- (*----------------------------------------------------------------------*)
- (* Global (to module) variables. *)
- (*----------------------------------------------------------------------*)
-
- VAR
- token: Lexicals;
-
- KeyWords: ARRAY [MIN(Lexicals)..M2NOT] OF HTEntry;
- HT: HashTab.HashTable;
-
- VAR
- ch: CHAR; (* Current INPUT CHAR *)
- PBack: BOOLEAN; (* PBack done? *)
- State: States; (* Current State *)
- CommNest: CARDINAL; (* Current Nesting Level*)
-
- (************************************************************************)
- (** ACTIONS *)
- (************************************************************************)
- (*--PUSHBACK--Pushes the most recently read charactor back--------------*)
-
-
-
- PROCEDURE PushBack;
- BEGIN
-
- PBack:=TRUE ;
- END PushBack;
-
- (*--EMIT--Adds the charactor to the output string-----------------------*)
-
- PROCEDURE Emit;
- BEGIN
- WITH Token DO
- IF Length >= TokenSize THEN
- Err.Message(Err.TokTooLong);
- ELSE
- String[Length]:=ch;
- INC(Length);
- END;
- END;
- END Emit;
-
- (*--UNEMIT--Removes a charactor from the output string------------------*)
-
- PROCEDURE UnEmit;
- BEGIN
- WITH Token DO
- DEC(Length);
- String[Length]:=ASCII.NUL;
- END;
- END UnEmit;
-
- (*--ERROR--Writes the appropriate error message-------------------------*)
-
- PROCEDURE Error;
-
- VAR
- mess: Err.Names;
- BEGIN
- CASE State OF
- STStr:
- mess:=Err.StrNotEnded;
- |STCommBeg, STCommSkip, STCommNest, STCommEnd, STCommLine:
- mess:=Err.ComNotEnded;
- ELSE
- mess:=Err.IFail;
- END;
- Err.Message(mess);
- State:=STStart;
- Token.Length:=0;
- CommNest:=0;
- END Error;
-
- (*--NOTHING--ignores the current input charactor.-----------------------*)
-
- PROCEDURE Nothing;
- BEGIN
- END Nothing;
-
- (*--LINEECHO--Echo the current INPUT charactor if a linefeed------------*)
-
- PROCEDURE LineEcho;
- BEGIN
- INC(Err.LineNum);
- IF IncludeLevel # 0 THEN
- ch:=' ';
- DEC(Err.LineNum);
- END;
- FIO.WriteChar(DestFile,ch);
- END LineEcho;
-
- (*--ECHOCOMMENT--Echo a charactor in a comment to the output file------*)
-
- PROCEDURE EchoComment;
- BEGIN
- IF NOT StripFlag THEN
- FIO.WriteChar(DestFile,ch);
- END;
- END EchoComment;
-
- (*--INCOMMENT--Increment the comment nesting level count----------------*)
-
- PROCEDURE InComment;
- BEGIN
- INC(CommNest);
- IF NOT StripFlag THEN
- FIO.WriteChar(DestFile,'(');
- FIO.WriteChar(DestFile,'*');
- EchoComment;
- END;
- END InComment;
-
- (*--OUTCOMMENT--Decrement the comment nesting level count---------------*)
-
- PROCEDURE OutComment;
- BEGIN
- DEC(CommNest);
- IF CommNest=0 THEN
- State:=STSkipSpaces; (* replace comment with single space *)
- END;
- EchoComment;
- END OutComment;
-
- (*--ACCATSIGN--Accept an '@'-------------------------------------------*)
-
-
-
- PROCEDURE AccAtSign;
- BEGIN
- Emit;
- Token.Class:=M2AtSign;
- END AccAtSign;
-
- (*--ACCNOT--Accept a tilde (not)---------------------------------------*)
-
- PROCEDURE AccNot;
- BEGIN
- Emit;
- Token.Class:=M2NOT;
- END AccNot;
-
- (*--ACCCOMMA--Accept a comma-------------------------------------------*)
-
- PROCEDURE AccComma;
- BEGIN
- Emit;
- Token.Class:=M2Comma;
- END AccComma;
-
- (*--ACCLPAREN--Accept a left parenthesis-------------------------------*)
-
- PROCEDURE AccLParen;
- BEGIN
- PBack:=TRUE ;
- Token.Class:=M2LParen;
- END AccLParen;
-
- (*--ACCRPAREN--Accept a right parenthesis------------------------------*)
-
- PROCEDURE AccRParen;
- BEGIN
- Emit;
- Token.Class:=M2RParen;
- END AccRParen;
-
- (*--ACCAND--Accept a & (and)-------------------------------------------*)
-
- PROCEDURE AccAnd;
- BEGIN
- Emit;
- Token.Class:=M2AND;
- END AccAnd;
-
- (*--ACCOR--Accept a vertical bar (or)----------------------------------*)
-
- PROCEDURE AccOr;
- BEGIN
- Emit;
- Token.Class:=M2OR;
- END AccOr;
-
- (*--ACCString--Accept a string (for include filenames)-----------------*)
-
- PROCEDURE AccString;
- BEGIN
- Token.Class:=M2String;
- END AccString;
-
- (*--ACCString--Accept a single quoted string --------------------------*)
-
- PROCEDURE AccStr;
- BEGIN
- Token.Class:=M2Str;
- END AccStr;
-
- (*--ACCCH--Accept any charactor----------------------------------------*)
-
- PROCEDURE Accch;
- BEGIN
- Emit;
- Token.Class:=M2ch;
- END Accch;
-
- (*--ACCEOF--Accept EOF condition---------------------------------------*)
-
- PROCEDURE AccEOF;
- BEGIN
- Token.Class:=M2EOF;
- END AccEOF;
-
- (*--AccDir--Accept a directive start-----------------------------------*)
-
-
-
- PROCEDURE AccDir;
- BEGIN
- IF CommNest=0 THEN
- WITH Token DO
- String:='(*$';
- Class:=M2Block;
- Length:=3;
- END;
- ELSE
- State:=STCommSkip;
- INC(CommNest);
- END;
- END AccDir;
-
- (*--ACCSPACE--Accept a space-------------------------------------------*)
-
- PROCEDURE AccSpace;
- BEGIN
- PBack:=TRUE ;
- WITH Token DO
- String[0]:=' ';
- Length:=1;
- Class:=M2ch;
- END;
- END AccSpace;
-
- (*--ACCID--Accept an identifier----------------------------------------*)
-
- PROCEDURE AccID;
- VAR
- KeyWord: POINTER TO HTEntry;
- BEGIN
- PBack:=TRUE ;
- WITH Token DO
- String[Length]:=ASCII.NUL;
- KeyWord:=HashTab.Search(HT,String);
- IF KeyWord # NIL THEN
- Class:=KeyWord^.KWType;
- ELSE
- Class:=M2ID;
- END;
- END;
- END AccID;
-
- (************************************************************************)
- (*----------------------------------------------------------------------*)
- (* GETBSU Gets the next Basic Syntactic Unit from the source *)
- (* file. This is the driver of the Finite State Machine. *)
- (*----------------------------------------------------------------------*)
-
- PROCEDURE GetBSU;
-
- VAR
- Delta: StateTabEntry;
- BEGIN
- Token.Length:=0;
- State:=STStart;
- WITH Delta DO
- WHILE State # STTerm DO
- IF NOT PBack THEN
- ch:=FIO.ReadChar(SourceFile);
- END;
- PBack:=FALSE;
- Delta:=Transitions[State,CharClass[ch]];
- State:=NextState;
- Action();
- END;
- END;
- WITH Token DO
- String[Length]:=ASCII.NUL;
- END;
- END GetBSU;
-
- (*----------------------------------------------------------------------*)
- (* LAMBDA Fills in lambda transitions for finite state machine *)
- (* *)
- (* PARAMETERS State - the state to initialize *)
- (* next - the next state to visit *)
- (* act - the action to take *)
- (* *)
- (* CALLED BY Main initialization code *)
- (*----------------------------------------------------------------------*)
-
- PROCEDURE Lambda(State,next: States; act: PROC);
-
- VAR
- chclass: Alphabet; (* Counter used for Lambda defs *)
- BEGIN
- FOR chclass := FirstCh TO LastCh DO
- Transitions[State,chclass].NextState:=next;
- Transitions[State,chclass].Action :=act;
- END;
- END Lambda;
-
- (************************************************************************)
- (* Lots and Lots of Initialization code for Lexical Analyzer *)
- (************************************************************************)
-
- BEGIN
- PBack:=FALSE;
- CommNest:=0;
- IncludeLevel:=0;
- StripFlag:=TRUE;
- SourceFile:=FIO.INPUT;
- DestFile:=FIO.OUTPUT;
-
- (*----------------------------------------------------------------------*)
- (* Set up the strings for the lexical units *)
- (*----------------------------------------------------------------------*)
-
- Strings[M2IF] := "IF";
- Strings[M2THEN] := "THEN";
- Strings[M2ELSIF] := "ELSIF";
- Strings[M2ELSE] := "ELSE";
- Strings[M2END] := "END";
- Strings[M2DEFINE] := "DEFINE";
- Strings[M2UNDEF] := "UNDEF";
- Strings[M2INCLUDE] := "INCLUDE";
- Strings[M2MACRO] := "MACRO";
- Strings[M2ENDM] := "ENDM";
- Strings[M2LINE] := "LINE";
- Strings[M2SPACE] := "SPACE";
- Strings[M2STRIP] := "STRIP";
- Strings[M2NOSTRIP] := "NOSTRIP";
- Strings[M2AND] := "AND";
- Strings[M2OR] := "OR";
- Strings[M2NOT] := "NOT";
- Strings[M2ID] := "ID";
- Strings[M2RParen] := "RParen";
- Strings[M2LParen] := "LParen";
- Strings[M2AtSign] := "AtSign";
- Strings[M2Comma] := "Comma";
- Strings[M2ch] := "ch";
- Strings[M2String] := "String";
- Strings[M2Str] := "Str";
- Strings[M2KillArg] := "KillArg";(* not a real token! *)
- Strings[M2EOF] := "EOF";
-
- (*----------------------------------------------------------------------*)
- (* Populate the Hash table. *)
- (*----------------------------------------------------------------------*)
-
- HashTab.Create(HT,31);
- FOR token:=MIN(Lexicals) TO M2NOT DO
- KeyWords[token].KWType:=token;
- KeyWords[token].KW:=SYSTEM.ADR(Strings[token]);
- HashTab.Insert(HT,SYSTEM.ADR(KeyWords[token]));
- END;
-
- (*----------------------------------------------------------------------*)
- (* Each ASCII charactor falls into a distinct classification in order *)
- (* to cut down the size of the State-Change tables for the FSM. *)
- (*----------------------------------------------------------------------*)
-
- FOR ch:=MIN(CHAR) TO MAX(CHAR) DO
- CharClass[ch]:=CHNoClass;
- END;
- CharClass[ASCII.HT]:=CHSpace;
- CharClass[ASCII.LF]:=CHEOLN;
- CharClass[ASCII.VT]:=CHEOLN;
- CharClass[ASCII.FF]:=CHEOLN;
- CharClass[ASCII.CR]:=CHEOLN;
- CharClass[ASCII.FS]:=CHEOF;
- CharClass[' ']:=CHSpace;
- CharClass['"']:=CHDQuote;
- CharClass['$']:=CHDollar;
- CharClass['&']:=CHAnd;
- CharClass[47C]:=CHSQuote;
- CharClass['(']:=CHLParen;
- CharClass[')']:=CHRParen;
- CharClass['*']:=CHStar;
- CharClass[',']:=CHComma;
- FOR ch:='0' TO '9' DO
- CharClass[ch]:=CHDigit;
- END;
- CharClass['@']:=CHAtSign;
- FOR ch:='A' TO 'Z' DO
- CharClass[ch]:=CHLetter;
- END;
- CharClass[134C]:=CHBSlash;
- CharClass['_']:=CHLetter;
- FOR ch:='a' TO 'z' DO
- CharClass[ch]:=CHLetter;
- END;
- CharClass['|']:=CHOr;
- CharClass['~']:=CHNot;
-
- (*----------------------------------------------------------------------*)
- (* Define the transitions for the Finite State Automata. *)
- (*----------------------------------------------------------------------*)
- (* For each states, the transitions and imbedded actions are defined *)
- (* using the following convention. *)
- (* *)
- (* (1) The lambda transition is defined. *)
- (* *)
- (* (2) Meaningful (non-Lambda) transitions are then defined. *)
- (*----------------------------------------------------------------------*)
-
- (************************************************************************)
- (* START STATE *)
- (************************************************************************)
-
- Lambda(STStart,STTerm,Accch);
-
- Transitions[STStart,CHLetter].NextState :=STID;
- Transitions[STStart,CHLetter].Action :=Emit;
-
- Transitions[STStart,CHEOF].NextState :=STTerm;
- Transitions[STStart,CHEOF].Action :=AccEOF;
-
- Transitions[STStart,CHEOLN].NextState :=STStart;
- Transitions[STStart,CHEOLN].Action :=LineEcho;
-
- Transitions[STStart,CHDQuote].NextState :=STString;
- Transitions[STStart,CHDQuote].Action :=Nothing;
-
- Transitions[STStart,CHSQuote].NextState :=STStr;
- Transitions[STStart,CHSQuote].Action :=Nothing;
-
- Transitions[STStart,CHComma].NextState :=STTerm;
- Transitions[STStart,CHComma].Action :=AccComma;
-
- Transitions[STStart,CHLParen].NextState :=STLParen;
- Transitions[STStart,CHLParen].Action :=Emit;
-
- Transitions[STStart,CHRParen].NextState :=STTerm;
- Transitions[STStart,CHRParen].Action :=AccRParen;
-
- Transitions[STStart,CHComma].NextState :=STTerm;
- Transitions[STStart,CHComma].Action :=AccComma;
-
- Transitions[STStart,CHOr].NextState :=STTerm;
- Transitions[STStart,CHOr].Action :=AccOr;
-
- Transitions[STStart,CHAnd].NextState :=STTerm;
- Transitions[STStart,CHAnd].Action :=AccAnd;
-
- Transitions[STStart,CHNot].NextState :=STTerm;
- Transitions[STStart,CHNot].Action :=AccNot;
-
- Transitions[STStart,CHAtSign].NextState :=STTerm;
- Transitions[STStart,CHAtSign].Action :=AccAtSign;
-
- Transitions[STStart,CHSpace].NextState :=STSkipSpaces;
- Transitions[STStart,CHSpace].Action :=Nothing;
-
- (************************************************************************)
- (* IDENTIFIER? *)
- (************************************************************************)
-
- Lambda(STID,STTerm,AccID);
-
- Transitions[STID,CHLetter].NextState :=STID;
- Transitions[STID,CHLetter].Action :=Emit;
-
- Transitions[STID,CHDigit].NextState :=STID;
- Transitions[STID,CHDigit].Action :=Emit;
-
- (************************************************************************)
- (* SKIP WHITESPACE *)
- (************************************************************************)
-
- Lambda(STSkipSpaces,STTerm,AccSpace);
-
- Transitions[STSkipSpaces,CHSpace].NextState:=STSkipSpaces;
- Transitions[STSkipSpaces,CHSpace].Action :=Nothing;
-
- Transitions[STSkipSpaces,CHEOLN].NextState :=STStart;
- Transitions[STSkipSpaces,CHEOLN].Action :=LineEcho;
-
- Transitions[STSkipSpaces,CHEOF].NextState :=STTerm;
- Transitions[STSkipSpaces,CHEOF].Action :=AccEOF;
-
- (************************************************************************)
- (* DOUBLE-QUOTED STRING LITERAL BEGINNING *)
- (************************************************************************)
-
- Lambda(STString,STString,Emit);
-
- Transitions[STString,CHEOLN].NextState :=STString;
- Transitions[STString,CHEOLN].Action :=Error;
-
- Transitions[STString,CHEOF].NextState :=STString;
- Transitions[STString,CHEOF].Action :=Error;
-
- Transitions[STString,CHDQuote].NextState :=STTerm;
- Transitions[STString,CHDQuote].Action :=AccString;
-
- Transitions[STString,CHBSlash].NextState :=STEQuote;
- Transitions[STString,CHBSlash].Action :=Emit;
-
- (************************************************************************)
- (* BACKSTROKE IN DOUBLE-QUOTED STRING *)
- (************************************************************************)
-
- Lambda(STEQuote,STString,Emit);
-
- Transitions[STEQuote,CHEOLN].NextState :=STEQuote;
- Transitions[STEQuote,CHEOLN].Action :=Error;
-
- Transitions[STEQuote,CHEOF].NextState :=STEQuote;
- Transitions[STEQuote,CHEOF].Action :=Error;
-
- (************************************************************************)
- (* SINGLE-QUOTED STRING LITERAL BEGINNING *)
- (************************************************************************)
-
- Lambda(STStr,STStr,Emit);
-
- Transitions[STStr,CHEOLN].NextState :=STStr;
- Transitions[STStr,CHEOLN].Action :=Error;
-
- Transitions[STStr,CHEOF].NextState :=STStr;
- Transitions[STStr,CHEOF].Action :=Error;
-
- Transitions[STStr,CHSQuote].NextState :=STTerm;
- Transitions[STStr,CHSQuote].Action :=AccStr;
-
- Transitions[STStr,CHBSlash].NextState :=STSQuote;
- Transitions[STStr,CHBSlash].Action :=Emit;
-
- (************************************************************************)
- (* BACKSTROKE IN SINGLE-QUOTED STRING *)
- (************************************************************************)
-
- Lambda(STSQuote,STStr,Emit);
-
- Transitions[STSQuote,CHEOLN].NextState :=STSQuote;
- Transitions[STSQuote,CHEOLN].Action :=Error;
-
- Transitions[STSQuote,CHEOF].NextState :=STSQuote;
- Transitions[STSQuote,CHEOF].Action :=Error;
-
- (************************************************************************)
- (* LEFT PARENTHESIS *)
- (************************************************************************)
-
- Lambda(STLParen,STTerm,AccLParen);
-
- Transitions[STLParen,CHStar].NextState :=STCommBeg;
- Transitions[STLParen,CHStar].Action :=UnEmit;
-
- (************************************************************************)
- (* COMMENT BEGINING *)
- (************************************************************************)
-
- Lambda(STCommBeg,STCommSkip,InComment);
-
- Transitions[STCommBeg,CHLParen].NextState :=STCommNest;
- Transitions[STCommBeg,CHLParen].Action :=InComment;
-
- Transitions[STCommBeg,CHDollar].NextState :=STTerm;
- Transitions[STCommBeg,CHDollar].Action :=AccDir;
-
- Transitions[STCommBeg,CHEOLN].NextState :=STCommLine;
- Transitions[STCommBeg,CHEOLN].Action :=LineEcho;
-
- Transitions[STCommBeg,CHStar].NextState :=STCommEnd;
- Transitions[STCommBeg,CHStar].Action :=InComment;
-
- (************************************************************************)
- (* EOLN AT BEGINNING OF COMMENT *)
- (************************************************************************)
-
- Lambda(STCommLine,STCommSkip,InComment);
-
- Transitions[STCommLine,CHLParen].NextState :=STCommNest;
- Transitions[STCommLine,CHLParen].Action :=InComment;
-
- Transitions[STCommLine,CHEOLN].NextState :=STCommLine;
- Transitions[STCommLine,CHEOLN].Action :=LineEcho;
-
- Transitions[STCommLine,CHStar].NextState :=STCommEnd;
- Transitions[STCommLine,CHStar].Action :=InComment;
-
- (************************************************************************)
- (* COMMENT BEING SKIPPED *)
- (************************************************************************)
-
- Lambda(STCommSkip,STCommSkip,EchoComment);
-
- Transitions[STCommSkip,CHEOF].NextState :=STCommSkip;
- Transitions[STCommSkip,CHEOF].Action :=Error;
-
- Transitions[STCommSkip,CHEOLN].NextState :=STCommSkip;
- Transitions[STCommSkip,CHEOLN].Action :=LineEcho;
-
- Transitions[STCommSkip,CHStar].NextState :=STCommEnd;
- Transitions[STCommSkip,CHStar].Action :=EchoComment;
-
- Transitions[STCommSkip,CHLParen].NextState :=STCommNest;
- Transitions[STCommSkip,CHLParen].Action :=EchoComment;
-
- (************************************************************************)
- (* NESTED COMMENT POSSIBLY? *)
- (************************************************************************)
-
- Lambda(STCommNest,STCommSkip,EchoComment);
-
- Transitions[STCommNest,CHEOF].NextState :=STCommNest;
- Transitions[STCommNest,CHEOF].Action :=Error;
-
- Transitions[STCommNest,CHStar].NextState :=STCommBeg;
- Transitions[STCommNest,CHStar].Action :=EchoComment;
-
- Transitions[STCommNest,CHLParen].NextState :=STCommNest;
- Transitions[STCommNest,CHLParen].Action :=EchoComment;
-
- Transitions[STCommNest,CHEOLN].NextState :=STCommSkip;
- Transitions[STCommNest,CHEOLN].Action :=LineEcho;
-
- (************************************************************************)
- (* COMMENT ENDING (just read * ) *)
- (************************************************************************)
-
- Lambda(STCommEnd,STCommSkip,EchoComment);
-
- Transitions[STCommEnd,CHEOF].NextState :=STCommEnd;
- Transitions[STCommEnd,CHEOF].Action :=Error;
-
- Transitions[STCommEnd,CHEOLN].NextState :=STCommSkip;
- Transitions[STCommEnd,CHEOLN].Action :=LineEcho;
-
- Transitions[STCommEnd,CHRParen].NextState :=STCommSkip;
- Transitions[STCommEnd,CHRParen].Action :=OutComment;
-
- Transitions[STCommEnd,CHStar].NextState :=STCommEnd;
- Transitions[STCommEnd,CHStar].Action :=EchoComment;
-
- (************************************************************************)
- (* TERMINATION STATE *)
- (************************************************************************)
-
- Lambda(STTerm,STTerm,Error);
-
- END FSM.
-