home *** CD-ROM | disk | FTP | other *** search
-
-
- { PROGRAM: DADA.PAS
-
- AUTHOR: Brian Hayes
-
- DATE BEGUN: September 17, 1985
-
- FOR COMPILATION BY: Turbo Pascal v 3.0
- }
- { DESCRIPTION:
- A compiler for a toy language called Dada, which adopts much
- of the vocabulary of Pascal but lacks many of its features, most
- notably functions, local variables, defined types and a facility
- for passing parameters to procedures. The "object code" produced
- by the compiler consists of Forth words. For additional detail see
- the comments below and the accompanying file DADA.DOC.
-
- This program is intended for demonstration purposes only. It
- has been compiled and casually tested, but it is certainly not
- guaranteed to be error-free. Testing was done with version 3.0
- of Borland International's Turbo Pascal, but I have tried to avoid
- features peculiar to Borland's implementation. Adaptation to other
- Pascal compilers should be easy.
-
- The compatibility of the object code with various Forth systems
- is more difficult to assess. See the comments on the code gen-
- erator and DADA.DOC Note 7.
- }
-
- { COMPILER DIRECTIVES (Valid for Turbo Pascal only) }
- (*
- {$B+} {B+ assigns StdIn/StdOut to CON, B- to TRM; default +}
- {$C+} {C+ allows ^C and ^S during Read/ReadLn; default +}
- {$I+} {I+ enables automatic I/O error checking; default +}
- {$R-} {R+ enables run-time checking of index bounds; default -}
- {$V-} {V+ requires string parameters to match declared length; default +}
- {$U-} {U+ allows ^C interrupt at any time; default -}
- {$D+} {D+ unbuffers I/O for devices; default +}
- {$F16} {Fn sets maximum number of files open simultaneously; default 16}
- {$K+} {K+ enables checking for stack-heap collision; default +}
- *)
-
-
-
- program Dada;
- USES
- Crt;
- {*****************************************************************************}
- {***************** GLOBAL CONSTANTS *****************}
- {*****************************************************************************}
-
- const
- MaxIdentLen = 31; { only the first 31 chars saved }
- MaxKeyLen = 9; { longest keyword }
- MaxErrorLen = 35; { longest error message }
-
-
- {*****************************************************************************}
- {***************** GLOBAL TYPES *****************}
- {*****************************************************************************}
-
- type
- IdentStr = string[MaxIdentLen];
- KeyStr = string[MaxKeyLen];
- ErrorStr = string[MaxErrorLen];
- ForthStr = string[64]; { for Forth output; see procedure Gen }
-
-
- { ErrCode identifies error messages in the array ErrorList;
- see DADA.DOC Note 1. }
-
- ErrCode = (Disk, QChar, XPgm, XIdent, XVar, XInt, XBool, XColon, XType, XSemi,
- XBegin, XSemEnd, XThen, XDo, XAssgn, XStmt, DupDec, UnDec, Match,
- XFactor, XParen, XDot, UnXEOF);
-
-
- { TokCode lists all symbols that can possibly be returned by the
- scanner. Null is a placeholder that can appear in a few fields
- of symbol-table entries. }
-
- TokCode = (Null, Ident, Number, PgmSym, VarSym, ProcSym, BeginSym, EndSym,
- IfSym, ThenSym, ElseSym, WhileSym, DoSym, IntSym, BoolSym, TrueSym,
- FalseSym, EQ, GT, GE, NE, LE, LT, Plus, Minus, OrSym, Times, Divide,
- AndSym, ModSym, NotSym, AssignOp, Colon, LeftParen, RightParen,
- Semi, Dot, ReadSym, WriteSym);
-
- TokenRec = record { Definition of the mailbox where the scanner }
- Name : IdentStr; { leaves dope on the current token and where }
- Code : TokCode; { the parser picks it up. }
- end;
-
- SymClass = (Variable, Proc); { Every symbol must be one or the other. }
- SymPtr = ^Symbol; { Points to a symbol-table entry. }
-
-
- { Format of a symbol-table entry. See DADA.DOC Note 2 }
-
- Symbol = record
- Name : IdentStr; { UpCase string of name as read }
- Class : SymClass; { either Variable or Proc }
- VarType : TokCode; { either IntSym or BoolSym }
- Scope : integer; { zero for global, then 1,2,3...}
- Next : SymPtr; { pointer to next table entry }
- end;
-
-
- { The output buffer represents a Forth "screen" of 16 lines
- by 64 characters. }
-
- OutBufLines = 1..16;
- OutBufChars = 1..64;
- OutBufArray = array[OutBufLines] of array[OutBufChars] of char;
-
-
- {*****************************************************************************}
- {***************** GLOBAL VARIABLES *****************}
- {*****************************************************************************}
-
- var
- OutLine : OutBufLines; { Declared global becaused called by }
- OutPoint : OutBufChars; { both InitOutBuf and Gen. }
- OutBuf : OutBufArray;
-
- InFile : Text; { source code }
- OutFile : file of OutBufArray; { object code }
-
- TK : TokenRec; { where dope on the current token is stashed }
- CH : char; { current scanner input }
- LineCount : integer; { number of lines in source text }
-
- TypeSet : set of TokCode; { sets defined for convenience }
- TFset : set of TokCode; { in the parsing logic }
- RelOpSet : set of TokCode;
- AddOpSet : set of TokCode;
- MultOpSet : set of TokCode;
-
- FirstSym : SymPtr; { link to the start of the symbol-table chain }
-
- CurrentScope : integer; { nesting depth of procedures }
-
- Keywords : array[TokCode] of KeyStr;
- ErrorList : array[ErrCode] of ErrorStr;
-
-
- {*****************************************************************************}
- {*****************************************************************************}
- {***************** *****************}
- {***************** UTILITY ROUTINES *****************}
- {***************** *****************}
- {*****************************************************************************}
- {*****************************************************************************}
-
- { The Keywords and ErrorList arrays must be initialized when the
- program is started. So must the five small sets used to form
- symbols into groups. }
-
- procedure InitKeywords;
- begin
- Keywords[PgmSym] := 'PROGRAM';
- Keywords[VarSym] := 'VAR';
- Keywords[IntSym] := 'INTEGER';
- Keywords[BoolSym] := 'BOOLEAN';
- Keywords[BeginSym] := 'BEGIN';
- Keywords[EndSym] := 'END';
- Keywords[IfSym] := 'IF';
- Keywords[ThenSym] := 'THEN';
- Keywords[ElseSym] := 'ELSE';
- Keywords[WhileSym] := 'WHILE';
- Keywords[DoSym] := 'DO';
- Keywords[NotSym] := 'NOT';
- Keywords[OrSym] := 'OR';
- Keywords[AndSym] := 'AND';
- Keywords[ModSym] := 'MOD';
- Keywords[ProcSym] := 'PROCEDURE';
- Keywords[TrueSym] := 'TRUE';
- Keywords[FalseSym] := 'FALSE';
- Keywords[ReadSym] := 'READLN';
- Keywords[WriteSym] := 'WRITELN';
- end;
-
- procedure InitErrorList;
- begin
- ErrorList[Disk ] := 'Trouble with file or disk.';
- ErrorList[QChar ] := 'Unrecognized character in input.';
- ErrorList[Xpgm ] := 'No program header.';
- ErrorList[XIdent ] := 'Identifier expected.';
- ErrorList[XVar ] := 'Variable expected.';
- ErrorList[XInt ] := 'Integer value expected.';
- ErrorList[XBool ] := 'Boolean value expected.';
- ErrorList[XColon ] := 'Colon expected.';
- ErrorList[XType ] := 'Type designator expected.';
- ErrorList[XSemi ] := 'Semicolon expected.';
- ErrorList[XBegin ] := '"Begin" expected.';
- ErrorList[XSemEnd] := 'Semicolon or "end" expected.';
- ErrorList[XThen ] := '"Then" expected.';
- ErrorList[XDo ] := '"Do" expected.';
- ErrorList[XAssgn ] := 'Assignment statement expected.';
- ErrorList[XStmt ] := 'Statement expected.';
- ErrorList[DupDec ] := 'Duplicate declaration.';
- ErrorList[UnDec ] := 'Undeclared variable or procedure.';
- ErrorList[Match ] := 'Type mismatch.';
- ErrorList[XFactor] := 'Factor expected.';
- ErrorList[XParen ] := 'Closing parenthesis expected.';
- ErrorList[XDot ] := 'Period expected.';
- ErrorList[UnXEOF ] := 'Unexpected end of file.';
- end;
-
- procedure InitSets;
- begin
- TypeSet := [IntSym, BoolSym];
- TFset := [TrueSym, FalseSym];
- RelOpSet := [EQ..LT];
- AddOpSet := [Plus..OrSym];
- MultOpSet := [Times..ModSym];
- end;
-
-
- { The error-handling given here is minimal. Procedure Error is
- handed a code and prints the corresponding string. The only
- information supplied on what might have caused the error is
- a line number. The program then halts. See DADA.DOC Note 3. }
-
- procedure Error(Problem : ErrCode);
- begin
- WriteLn('ERROR IN LINE ',LineCount,': ',ErrorList[Problem]);
- WriteLn; WriteLn('Compilation aborted.');
- Halt;
- end;
-
- procedure SayHello;
- begin
- ClrScr;
- WriteLn;
- WriteLn;
- WriteLn('DADA: A demonstration compiler');
- WriteLn;
- WriteLn('This program is described in Computer Language, December, 1985');
- WriteLn;
- WriteLn;
- WriteLn;
- end;
-
-
- { The file handling is as rudimentary as the error routine. Further-
- more, the version given here depends on features peculiar to Turbo
- Pascal. See DADA.DOC Note 4. }
-
- procedure OpenFiles;
- var
- FileOK : boolean;
- InFileName : string[14];
- OutFileName : string[14];
- begin
- Write('Enter the name of the file to be compiled: ');
- ReadLn(InFileName);
- Assign(InFile, InFileName);
- {$I-} Reset(InFile); {$I+}
- FileOK := (IoResult = 0); if not FileOK then Error(Disk);
- WriteLn;
- Write('Enter the name of the output file: ');
- ReadLn(OutFileName);
- Assign(OutFile, OutFileName);
- {$I-} ReWrite(OutFile); {$I+};
- FileOK := (IoResult = 0); if not FileOK then Error(Disk);
- WriteLn; WriteLn;
- end;
-
- procedure CloseFiles;
- begin
- Close(InFile);
- Close(OutFile);
- end;
-
-
- {*****************************************************************************}
- {*****************************************************************************}
- {***************** *****************}
- {***************** LEXICAL ANALYZER *****************}
- {***************** *****************}
- {*****************************************************************************}
- {*****************************************************************************}
-
- { Procedure GetTK constitutes the scanner, or lexical analyzer. It
- calls on GetCH to read the next character from the input stream
- and uses Recognize to set up the two global-variable fields
- TK.Code and TK.Name. The main routine first strips out all com-
- ments and whitespace characters and then enters a state deter-
- mined by the first character of the remaining input. Each state
- corresponds to one clause of the case statement. Note that GetCH
- converts all alphabetic characters to upper case, so that the
- compiler in insensitive to case. Two errors can be issued by the
- scanner. Error(QChar) is called if a character outside the recog
- nized set appears in the input (except in comments). Error(UnXEOF)
- is reported if the scanner encounters end-of-file. Since GetTK
- is not called after the final period marking the end of a pro-
- gram, the scanner should never read the end of the file.
-
- NOTE: Each time the scanner is called, it goes to work on the
- character already in variable CH, not on the next character
- from the stream. When GetTK exits, CH holds the first char-
- acter beyond the token returned. In other words, the file
- pointer is pre-incremented.
-
- See also DADA.DOC Note 5. }
-
- procedure GetTK;
- var
- I : TokCode; { used in a FOR loop to check for keywords }
-
- procedure GetCH;
- begin
- if Eof(InFile) then CH := #0 else Read(InFile, CH); { get next if possible}
- CH := Upcase(CH); { make case immaterial}
- if CH = #13 then LineCount := LineCount + 1; { count for Error }
- end;
-
- procedure Recognize(Tok: TokCode); { Called once for each character }
- begin { scanned, adding it to the string }
- TK.Code := Tok; { in TK.Name and recording the }
- TK.Name := Concat(TK.Name,CH); { current analysis in TK.Code. Note}
- GetCH; { that TK.Code is not actually }
- end; { valid until GetTK returns. }
-
-
- { The first section of GetTK strips out comments and the whitespace
- characters #9 (tab), #10 (line feed), #12 (form feed), #13 (carriage
- return) and $32 (space). For comments any characters following a
- left brace are ignored up to the first right brace. Note that this
- means comments cannot be nested: Any number of opening braces will
- be canceled by the first closing brace. The nested while loops are
- needed because comments and whitespace can be interspersed in any
- sequence. }
-
- begin { GetTK }
- while (CH in ['{',#9,#10,#12,#13,#32]) do { loop while comment, space }
- begin
- if CH = '{' then repeat GetCH until CH = '}'; { eat up the comment }
- GetCH; { toss out the right brace }
- while (CH in [#9,#10,#12,#13,#32]) do GetCH; { eat the whitespace }
- end;
- TK.Name := ''; { reset the identifier string to null }
- case CH of { look at the current char from stream}
- 'A'..'Z' : begin { Ident or keyword}
- while (CH in ['A'..'Z','0'..'9']) do { add chars to the}
- Recognize(Ident); { TK.Name string }
- for I := PgmSym to WriteSym do { An Ident unless }
- if Keywords[I] = TK.Name then TK.Code := I; { listed here }
- end;
- '0'..'9' : while (CH in ['0'..'9']) do Recognize(Number); { numeric literal}
- '>' : begin
- Recognize(GT); { With two-symbol oper- }
- if CH = '=' then Recognize(GE); { ators, start by assum-}
- end; { ing the one-symbol }
- '<' : begin { form and then revise }
- Recognize(LT); { the verdict if the }
- if CH = '>' then Recognize(NE) { second character is }
- else if CH = '=' then Recognize(LE) { found. }
- end;
- ':' : begin
- Recognize(Colon);
- if CH = '=' then Recognize(AssignOp);
- end;
- '=' : Recognize(EQ);
- '+' : Recognize(Plus);
- '-' : Recognize(Minus);
- '*' : Recognize(Times);
- '/' : Recognize(Divide);
- '(' : Recognize(LeftParen);
- ')' : Recognize(RightParen);
- ';' : Recognize(Semi);
- '.' : Recognize(Dot);
- #0 : Error(UnXEOF); { Program has ended without a period }
- else Error(QChar); { Queer character; can't digest it }
- end;
- end;
-
-
- {*****************************************************************************}
- {*****************************************************************************}
- {***************** *****************}
- {***************** SYMBOL TABLE *****************}
- {***************** *****************}
- {*****************************************************************************}
- {*****************************************************************************}
-
- { The three routines Find, Declare and Blot manage the symbol
- table. The table is organized as a linked list in which
- FirstSym always points to the most recently added entry. The
- Next field points to the next-youngest entry, so that
- following the chain of Nexts ultimately leads to the first
- entry, which is always the declaration of the program
- header. Because all variables in Dada are global, the
- symbol table has a fixed, predictable structure: the program
- declaration is followed by variable declarations and then by
- procedure declarations. See DADA.DOC Note 6. }
-
- { Find is passed an identifier string and returns either a
- pointer to the corresponding symbol-table entry or nil if
- the identifier does not exist. It traverses the chain of
- entries beginning with FirstSym, and so the first matching
- entry will be found. }
-
- function Find(ID: IdentStr): SymPtr;
- var
- ThisSym : SymPtr;
- begin
- ThisSym := FirstSym; { start with the latest entry }
- while ((ID<>ThisSym^.Name) and { loop if no match and... }
- (ThisSym<>nil)) do { we're not at the end of list }
- ThisSym := ThisSym^.Next; { get next record }
- Find := ThisSym; { a match if there is one, or nil }
- end;
-
-
- { Declare installs both variable names and procedure names in
- the symbol table. ID is the name of the Identifier, as given
- in TK.Name; CL is either "Proc" or "Variable"; Kind is "IntSym"
- or "BoolSym" for variables, "Null" for procedures. }
-
- procedure Declare(ID: IdentStr; CL: SymClass; Kind: TokCode);
- var
- ThisSym : SymPtr;
- begin
- ThisSym := Find(ID); { See if it already exists }
- if ThisSym <> nil then Error(DupDec); { Call error & halt if it does }
- New(ThisSym); { Create a new record }
- ThisSym^.Next := FirstSym; { Swap pointers to put the... }
- FirstSym := ThisSym; { ...new record first in list }
- with FirstSym^ do
- begin
- Name := ID; { Plug in the values passed... }
- Class := CL; { ...as arguments... }
- VarType := Kind;
- Scope := CurrentScope; { ...and a value from a global }
- end;
- end;
-
-
- { Blot is called when the "end" of a block is reached and removes
- from the symbol table all names whose scope is confined to that
- block. The global variable CurrentScope is initialized to zero
- and incremented each time ParseBlock is called. Blot decrements
- CurrentScope and unlinks from the symbol table any entry whose
- Scope field is numerically greater than CurrentScope. }
-
- procedure Blot;
- var
- TrashSym : SymPtr;
- begin
- CurrentScope := CurrentScope - 1; { back to scope of next outer block}
- while FirstSym^.Scope > CurrentScope do { erase entries for closed block }
- begin
- TrashSym := FirstSym; { Give the pointer an alias }
- FirstSym := FirstSym^.Next; { Unlink the record }
- Dispose(TrashSym); { Free the allocated memory }
- end;
- end;
-
-
- {*****************************************************************************}
- {*****************************************************************************}
- {***************** *****************}
- {***************** CODE GENERATOR *****************}
- {***************** *****************}
- {*****************************************************************************}
- {*****************************************************************************}
-
- { The code generator is simple to the point of triviality, largely
- because the Forth virtual machine offers a very powerful assembly
- language. All address calculations, for instance, are done by the
- Forth interpreter. With a Forth system that accepts input as a
- sequence of CR/LF-delimited lines, the code generator could be
- reduced to a one-line procedure: WriteLn(OutFile,Forth). The
- routines given here produce Forth "screens," or blocks of 1,024
- bytes filled out with blanks (ASCII #32). For more on this for-
- matting see DADA.DOC Note 7.
-
- The main procedure of the code generator is Gen, which is called
- by the various parsing routines; the argument is a string to be
- written to the output file. The string is actually appended to
- a buffer that holds 16 lines of 64 characters (the standard .SCR
- format). When a line exceeds 62 characters, a new line is started;
- when line 16 is reached, the continuation symbol "-->" is written
- and the buffer is flushed to the disk and then reset to all blanks.
- To make the generated code more readable, the symbol "|" is defined
- as a control character that forces Gen to start a new line. The
- parsing routines issue Gen('|') after each colon definition.
-
- InitOutBuf sets the 1,024 bytes of the output buffer to the
- ASCII blank character (#32) and resets the two array indices
- OutLine and OutPoint to 1, which corresponds to the upper
- left-hand corner of a Forth screen. The procedure is not made
- local to Gen because it is called from the main initializing
- routine at program startup. }
-
- procedure InitOutBuf;
- begin
- for OutLine := 1 to 16 do
- for OutPoint := 1 to 64 do OutBuf[Outline,OutPoint] := #32;
- OutLine := 1; OutPoint := 1;
- end;
-
- procedure Gen(Forth : ForthStr);
- var
- FileOK : boolean;
- I, TempPoint, TempLine : integer; { two temps for testing length }
-
-
- { WriteBuf, like OpenFiles, is written with a Turbo-specific
- error-checking method. It simply writes the accumulated
- buffer to the output file and, if there is no disk error,
- calls InitOutBuf to reinitialize the array. }
-
- procedure WriteBuf;
- begin
- {$I-} Write(OutFile,OutBuf); {$I+}
- FileOK := (IoResult = 0); if not FileOK then Error(Disk);
- InitOutBuf;
- end;
-
-
- { NewLine resets the character counter and tests the line count;
- if we are on line 15, the recursive call Gen('-->') flushes
- the buffer and starts a new screen. }
-
- procedure NewLine;
- begin
- OutPoint := 1; TempLine := OutLine + 1;
- if TempLine >= 15 then Gen('-->') else OutLine := TempLine;
- end;
-
- begin { Gen }
- if Forth = '|' then begin NewLine; exit; end; { force new line & leave }
- TempPoint := OutPoint + Length(Forth); { Temp avoids out-of-range }
- if TempPoint > 62 then NewLine; { 62 (not 64) to allow blanks}
- for I := 1 to Length(Forth) do
- begin
- OutBuf[OutLine,OutPoint] := Forth[I]; { copy the string into buffer}
- OutPoint := OutPoint + 1;
- end;
- OutPoint := OutPoint + 1; { allow one blank after code }
- if ((Forth = '-->') or (Forth = ';S')) then WriteBuf;
- end;
-
-
- { GenHeader creates a "run-time library" that precedes the object
- code for all Dada programs. Some Forth systems may need additional
- or different definitions here. The READ routine provides keyboard
- input of signed integers. It could readily be improved. }
-
- procedure GenHeader(PgmName : IdentStr);
- begin
- Gen('( Output of Dada compiler )'); Gen('|'); { Screen 0 comments }
- Gen(Concat('( To execute type: 1 LOAD ',PgmName,' )')); Gen(';S');
- Gen('FORTH DEFINITIONS DECIMAL'); Gen('|');
- Gen('1 CONSTANT TRUE '); { }
- Gen('0 CONSTANT FALSE'); { These synonyms will }
- Gen(': NEGATE MINUS ;'); Gen('|'); { not be needed by all }
- Gen(': NOT 0= ;'); { Forth systems; others }
- Gen(': <> = NOT ;'); { may be required. }
- Gen(': >= < NOT ;'); { }
- Gen(': <= > NOT ;'); Gen('|'); { }
- Gen(': READ KEY DUP 45 = IF TRUE SWAP EMIT KEY ELSE FALSE SWAP'); Gen('|');
- Gen(' THEN 0 SWAP BEGIN DUP 13 = NOT WHILE DUP 48 < OVER'); Gen('|');
- Gen(' 57 > OR IF DROP 7 EMIT ELSE DUP EMIT 48 - SWAP 10 * +'); Gen('|');
- Gen(' THEN KEY REPEAT DROP SWAP IF NEGATE THEN SWAP ! ;'); Gen('|');
- Gen(': WRITE @ . CR ;');
- Gen('-->');
- end;
-
-
- {*****************************************************************************}
- {*****************************************************************************}
- {***************** *****************}
- {***************** PARSER *****************}
- {***************** *****************}
- {*****************************************************************************}
- {*****************************************************************************}
-
- { ParseProgram and the routines nested under it constitute the main
- driver of DADA.PAS. The organization is outlined in DADA.DOC Note 8.
- Each routine calls on GetTK (the scanner). Statements haveing to do
- with parsing proper are interleaved with those for type checking
- and code generation. }
-
- procedure ParseProgram;
- var
- HoldID : IdentStr; { hangs onto the program name }
-
-
- { ParseVariables is called once by ParseProgram. If the current
- token is not "var," there are no variables in the program and
- the routine exits. Otherwise each declaration is checked for
- proper form and a statement "0 VARIABLE IDENT" is generated to
- allocate 16 bits of storage and record its address under the
- name IDENT in the Forth dictionary. }
-
- procedure ParseVariables;
- var
- HoldVar : IdentStr;
- begin
- if TK.Code = VarSym then { else no variables in entire program }
- begin
- GetTK; { eat the "var" token }
- repeat { loop for arbitrary number of variables }
- if TK.Code <> Ident then Error(XIdent); { format is "Ident: Type;" }
- HoldVar := TK.Name; GetTK; { hang onto identifier }
- if TK.Code <> Colon then Error(XColon); GetTK;
- if not (TK.Code in TypeSet) then Error(XType); { TypeSet=IntSym,BoolSym }
- Declare(HoldVar,Variable,TK.Code); GetTK; { install in symbol table}
- Gen(Concat('0 VARIABLE ',HoldVar)); Gen('|'); { gen code & new line }
- if TK.Code <> Semi then Error(XSemi); GetTK; { every decl. must have }
- until (TK.Code in [ProcSym,BeginSym]); { no more variables }
- end;
- end;
-
- procedure ParseBlock(Caller: IdentStr); { "Caller" will be the Ident gen-}
- var { erated when "begin" is reached.}
- HoldID : IdentStr; { HoldID passed as Caller to }
- { next nested block. }
-
- procedure ParseStatement;
- var
- IdentPtr : SymPtr; { used to check symbol table }
- HoldID : IdentStr; { hold while class & type checked }
- HoldType : TokCode; { hold while exp. type is checked }
-
-
- { All the routines from ParseExpression on down are defined as
- functions rather than procedures. They return the type (integer
- or boolean) deduced from the operations specified. The "HoldOp"
- variables are needed to delay code generation for postfix notation.
- The "HoldType" variables record the type of the first operand so
- that it can be compared with the type of the second operand. }
-
- function ParseExpression: TokCode;
- var
- HoldRelOp : IdentStr;
- HoldType : TokCode;
-
- function ParseSimpleExpr: TokCode;
- var
- HoldAddOp : IdentStr;
- HoldType : TokCode;
-
- function ParseTerm: TokCode;
- var
- HoldMultOp : IdentStr;
- HoldType : TokCode;
-
- function ParseSignedFactor: TokCode;
- var
- IdentPtr : SymPtr;
- HoldType : TokCode;
-
-
- { ParseFactor is the lowest-level routine in the parser. For a factor
- to be recognized as valid it must be either a boolean literal (TRUE
- of FALSE), a numeric literal, an identifier that designates a var-
- iable or a parenthesized expression. The case statement considers
- each of these possibilities in turn. }
-
- function ParseFactor: TokCode;
- var
- IdentPtr: SymPtr; { needed to consult the symbol table }
- begin
- case TK.Code of
- TrueSym,
- FalseSym : begin
- ParseFactor := BoolSym; { return type boolean }
- Gen(TK.Name); GetTK; { Gen TRUE or FLASE }
- end;
- Number : begin
- ParseFactor := IntSym; { return type integer }
- Gen(TK.Name); GetTK; { Gen numeric literal }
- end;
- Ident : begin
- IdentPtr := Find(TK.Name); { look up the name }
- if IdentPtr = nil then Error(UnDec) { not found? }
- else begin
- if IdentPtr^.Class <> Variable { can't be proc }
- then Error(XVar)
- else begin
- ParseFactor := IdentPtr^.VarType; { rtn Int or Bool }
- Gen(ConCat(TK.Name,' @')); GetTK; { code to fetch }
- end;
- end;
- end;
- LeftParen : begin { call ParseExpression recursively }
- GetTK; { and return the type }
- ParseFactor := ParseExpression; { that it returns }
- if TK.Code <> RightParen then Error(XParen);
- GetTK; { eat the ")" }
- end;
- else Error(XFactor); { if none of above, not a valid factor }
- end;
- end;
-
-
- { ParseSignedFactor is introduced into the chain of expression-
- parsing functions merely to handle a unary plus, minus or logical
- NOT preceding a factor. If none of these is found, the code drops
- through directly to ParseFactor. If one of them is found, the
- appropriate code is generated after ParseFactor returns, thereby
- converting the notation to postfix form. }
-
- begin {ParseSignedFactor}
- case TK.Code of
- Plus : begin
- GetTK; { eat the + sign }
- HoldType := ParseFactor; { parse & get type }
- if HoldType <> IntSym
- then Error(XInt) { +boolean illegal }
- else ParseSignedFactor := IntSym; { HoldType=Int }
- end;
- Minus : begin
- GetTK; { eat the - sign }
- HoldType := ParseFactor; { parse & get type }
- if HoldType <> IntSym
- then Error(XInt) { -boolean illegal }
- else begin
- ParseSignedFactor := IntSym; { HoldType = Int }
- Gen('NEGATE'); { code toggles sign}
- end;
- end;
- NotSym : begin
- GetTK; { eat NOT symbol }
- HoldType := ParseFactor; { parse & get type }
- if HoldType <> BoolSym
- then Error(XBool) { NOT number illegal}
- else begin
- ParseSignedFactor := BoolSym; { HoldType = boolean}
- Gen('NOT'); { code to invert }
- end;
- end;
- else ParseSignedFactor := ParseFactor; { no +, -, NOT found}
- end;
- end;
-
-
- { ParseTerm recognizes either "SignedFactor" or a subexpression of
- the form "SignedFactor MultOp Term". Thus it will always call
- ParseSignedFactor, and if the next token is a MultOp, it will
- also call itself recursively. }
-
- { For a lacuna in type-checking, see DADA.DOC Note 9. }
-
- begin {ParseTerm}
- HoldType := ParseSignedFactor; { parse & get type first operand }
- if (TK.Code in MultOpSet) then { TK = *, /, OR? }
- begin
- HoldMultOp := TK.Name; { save the Op for postfix }
- GetTK; { and eat it }
- if not (HoldType = ParseTerm) { parse & get type 2d operand }
- then Error(Match); { 1st & 2d operands same type? }
- Gen(HoldMultOp); { issue the saved operator }
- end;
- ParseTerm := HoldType; { return the operand type }
- end;
-
-
- { ParseSimpleExpr recognizes either "Term" or a subexpression of
- the form "Term AddOp SimpleExpr". It always calls ParseTerm
- and if the next token is an AddOp, it also calls itself. }
-
- begin {ParseSimpleExpr}
- HoldType := ParseTerm; { parse & get type 1st operand }
- if (TK.Code in AddOpSet) then { TK = +, -, AND? }
- begin
- HoldAddOp := TK.Name; { save the Op for postfix }
- GetTK; { and eat it }
- if not (HoldType = ParseSimpleExpr) { parse & get type 2d operand }
- then Error(Match); { 1st & 2d operands same type? }
- Gen(HoldAddOp); { issue the save operator }
- end;
- ParseSimpleExpr := HoldType; { return the operand type }
- end;
-
-
- { ParseExpression recognizes either "SimpleExpr" or a sub-
- expression of the form "SimpleExpr RelOp SimpleExpr." It always
- calls ParseSimpleExpr once, and if the next token is a RelOp, it
- also makes a second call to ParseSimpleExpr. Note that this scheme
- is slightly different from the recursive pattern in the lower-
- level functions. On that model one would expect "SimpleExpr RelOp
- Expression," so that to parse the second operand the function would
- call itself. Such a construction, however, would allow expressions
- of the form A > B < C = D, and so on. It would be easy enough to
- assign a meaning to these expressions, but the language definition
- does not supply one. }
-
- begin {ParseExpression}
- HoldType := ParseSimpleExpr; { parse & get type 1st operand }
- ParseExpression := HoldType; { type to be returned if no RelOp }
- if (TK.Code in RelOpSet) then { TK is >, <, =, etc. ? }
- begin
- HoldRelOp := TK.Name; { save operator for postfix }
- GetTK; { and eat it }
- if not (HoldType = ParseSimpleExpr) { parse & get type 2d operand }
- then Error(Match); { 1st & 2d operands same type ? }
- ParseExpression := BoolSym; { if Expr has Relop, type = bool }
- Gen(HoldRelOp); { issue the saved operator }
- end;
- end;
-
-
- { ParseStatement is the most elaborate routine in the parser. The
- grammar for Dada specifies five constructs to be recognized as
- valid statements: a compound statement delimited by "begin" and
- "end," an assignment statement, a procedure call, an "if" state-
- ment and a "while" statement. The parser actually includes two
- more possibilities: "Read" and "Write" statements, which can be
- viewed as predefined procedures. With one exception the grammar
- allows these possibilities to be distinguished on the basis of
- the first token presented to ParseStatement. The exception is
- the discrimination between assignment statements and procedure
- calls, which both begin with an identifier. The parser chooses
- its path by checking the identifier's class in the symbol table:
- a value can be assigned only to a variable, and only a procedure
- can be called. }
-
- { See also DADA.DOC Note 10 }
-
- begin {ParseStatement}
- case TK.Code of
- BeginSym : begin { must be compound }
- GetTK; { eat the "BEGIN" }
- while TK.Code <> EndSym do { loop while stmts }
- begin
- ParseStatement; { calls itself }
- if not (TK.Code in [Semi,EndSym]) { delimiter expected }
- then Error(XSemEnd);
- if TK.Code = Semi then GetTK; { go back for another}
- end;
- GetTK; { TK must be "END"; eat it }
- end;
- IfSym : begin { must be If statement}
- GetTK; { eat the "IF" }
- if not (BoolSym = ParseExpression) { parse expr & ck type}
- then Error(XBool); { only boolean allowed}
- Gen('IF'); { Forth IF after expr }
- if TK.Code <> ThenSym { must have then part }
- then Error(XThen); GetTK; { if present, eat it }
- ParseStatement; { calls itself }
- if TK.Code = ElseSym then { else is optional }
- begin { if present, Gen code}
- Gen('ELSE'); GetTK; { and eat the token }
- ParseStatement; { calls itself again }
- end;
- Gen('THEN'); { end of Forth cond. }
- end;
- WhileSym : begin { this is a while loop}
- Gen('BEGIN'); GetTK; { Gen marker; eat tok }
- if not (BoolSym = ParseExpression) { parse and check type}
- then Error(XBool); { must be boolean }
- if TK.Code <> DoSym then Error(XDo); { must have Do part }
- Gen('WHILE'); GetTK; { eat; Gen Forth test }
- ParseStatement; { recursive call }
- Gen('REPEAT'); { end of Forth block }
- end;
- Ident : begin { assignment or call }
- IdentPtr := Find(TK.Name); { look up in table }
- if IdentPtr = nil then Error(UnDec); { can't find it }
- if IdentPtr^.Class = Variable then { must be assignment }
- begin
- HoldType := IdentPtr^.VarType; { save Ident type... }
- HoldID := TK.Name; GetTK; { and name for postfix}
- if TK.Code <> AssignOp { must have := sign }
- then Error(XAssgn); GetTK; { if so, eat it }
- if not (HoldType = ParseExpression) { parse expr & ck type}
- then Error(Match); { report mismatch }
- Gen(Concat(HoldID,' !')); { code to store value }
- end
- else { must be proc call }
- begin { invoke the Forth }
- Gen(TK.Name); GetTK; { word and consume }
- end; { the token }
- end;
- ReadSym : begin { predefined READ proc }
- GetTK; { eat token }
- if TK.Code <> Ident { must name variable... }
- then Error(XIdent); { to hold the value read }
- IdentPtr := Find(TK.Name); { look up in table }
- if IdentPtr^.Class <> Variable { cannot be proc Ident }
- then Error(XVar);
- if IdentPtr^.VarType <> IntSym { only integers can... }
- then Error(XInt); { be read in Dada }
- Gen(Concat(TK.Name,' READ')); { issue the call in Forth}
- GetTK; { eat up the Ident }
- end;
- WriteSym : begin { predefined WRITE proc }
- GetTK; { eat token }
- if TK.Code <> Ident { must name variable... }
- then Error(XIdent); { to be written }
- IdentPtr := Find(TK.Name); { look it up }
- if IdentPtr^.Class <> Variable { cannot be Proc name }
- then Error(XVar);
- if IdentPtr^.VarType <> IntSym { only integers can... }
- then Error(XInt); { be written }
- Gen(Concat(TK.Name,' WRITE')); { issue the call }
- GetTK; { consume the Ident }
- end;
- else Error(XStmt); { if none of the above }
- end;
- end;
-
-
- { ParseBlock has two parts. It first checks for a procedure declar-
- ation; if it finds one, it parses the header and calls itself again.
- Ultimately, the BEGIN symbol that marks the statement part of a block
- must be reached. Each statement is then processed in turn (by Parse-
- Statement) until the matching END is reached. The possible nesting
- of blocks within blocks is accommodated automatically by the re-
- cursive organization of the routines. Recall that ParseBlock is passed
- an identifier as an argument, namely the Ident of the procedure or
- program that issued the call. This Ident is written into the code as
- the designator of a Forth word when "begin" is reached. }
-
- begin { ParseBlock }
- CurrentScope := CurrentScope + 1; { bump up nesting count }
- while TK.Code = ProcSym do { proc declarations }
- begin
- GetTK; { eat "procedure" token }
- if TK.Code <> Ident then Error(XIdent); { proc must have name }
- HoldID := TK.Name; { save to pass to next level }
- Declare(TK.Name,Proc,Null); { put in table as proc name }
- GetTK; { eat the Ident }
- if TK.Code <> Semi then Error(XSemi); { must have a semi }
- GetTK; { throw the semi away }
- ParseBlock(HoldID); { call again, pass proc name }
- if TK.Code <> Semi then Error(XSemi); { proc block must have semi }
- GetTK; { eat it up }
- end;
- if TK.Code <> BeginSym then Error(XBegin); { block begins "BEGIN" }
- Gen(Concat(': ',Caller)); { start colon definition }
- GetTK; { eat the "BEGIN" }
- while TK.Code <> EndSym do { loop for all statements }
- begin
- ParseStatement; { call for each stmt }
- if not (TK.Code in [Semi,EndSym]) { separator or terminator... }
- then Error(XSemEnd); { need after each one }
- if TK.Code = Semi then GetTK; { if semi, eat & go back }
- end; { TK must have been "END" }
- GetTK; { eat the END }
- Gen(';'); Gen('|'); { end Forth def, force CR }
- Blot; { clean up symbol table }
- end;
-
-
- { ParseProgram sets the entire compiler in motion. It first handles
- the program header, saving the program name (which will be the
- last Forth word generated). The program is declared in the symbol
- table as a procedure like any other, except that its scope field
- has a value of zero, which no other procedure can have. ParseProgram
- then calls ParseVariables and ParseBlock, which process the body
- of the program. Finally there is a check for the final dot. }
-
- begin { ParseProgram }
- if TK.Code <> PgmSym then Error(XPgm); { must begin "PROGRAM" }
- GetTK; { dispose of that token }
- if TK.Code <> Ident then Error(XIdent); { program must have a name }
- HoldID := TK.Name; { save, pass to ParseBlock }
- Declare(TK.Name,Proc,Null); { install in table }
- GenHeader(TK.Name); { output the Forth prelude }
- GetTK; { eat the Ident }
- if TK.Code <> Semi then Error(XSemi); { header must end with semi }
- GetTK; { toss out the semi }
- ParseVariables; { do the global declarations }
- ParseBlock(HoldID); { give Block the program name }
- if TK.Code <> Dot then Error(XDot); { not done until "." read }
- Gen(';S'); { tell Forth to stop }
- end;
-
-
- {*****************************************************************************}
- {*****************************************************************************}
- {***************** *****************}
- {***************** MAIN BLOCK *****************}
- {***************** *****************}
- {*****************************************************************************}
- {*****************************************************************************}
-
- { The main driver routine has little to do: initialize some global
- variables, open the files and crank up the parser. }
-
- procedure Initialize;
- begin
- InitErrorList; { fill up one static array... }
- InitKeywords; { and then another }
- InitSets; { define sets of tokens }
- InitOutBuf; { set up a clean slate }
- FirstSym := nil; { make pointer point nowhere }
- CurrentScope := 0; { at start scope is global }
- LineCount := 1; { start on first source line }
- SayHello; { paint the screen }
- end;
-
- begin { main block }
- Initialize;
- OpenFiles;
- Read(InFile,CH); CH := Upcase(CH); { get first char for scanner }
- GetTK; { and first token for parser }
- ParseProgram;
- CloseFiles;
- WriteLn('Compilation complete.');
- end.
-