home *** CD-ROM | disk | FTP | other *** search
- {[B+]}
- {*--------------------------*
- | Lexical Scanner, Utility |
- *--------------------------*}
-
-
- procedure Symbol_Put(This_Char: Char); {ch to symbol}
- begin
- Sym_Len := Sym_Len + 1;
- Symbol[Sym_Len] := This_Char;
- Get_Char;
- end {symbol_put} ;
-
-
- procedure Print_Char; {print ASCII chars not belonging to Pascal}
- begin
- if Write_Col >= Out_Line_Len then Print_Line(Indent + Continue_Spaces);
- if Formatting then Write_A(Ch);
- Get_Char;
- end {print_char} ;
-
-
- procedure Scan_Blanks; {scan off blanks in the input}
- begin
- while (Ch = ' ') and not End_File do Get_Char;
- end;
-
-
- procedure String_Constant;
-
- var
- String_End: Boolean;
-
- begin
- New_Input_Line := false;
- Symbol_Found := true;
- Sym := Str_Const;
-
- repeat
- if Ch = '#' then
- begin
- Symbol_Put(Ch);
-
- if Ch = '$' then
- begin
- Symbol_Put(Ch);
-
- while Ch in ['0'..'9', 'A'..'F', 'a'..'f'] do
- Symbol_Put(UpperCase[Ch]);
- end
- else while Ch in ['0'..'9'] do Symbol_Put(Ch);
- end
- else if Ch = '^' then
- begin
- Symbol_Put(Ch);
- if Ch in ['@'..'_', 'a'..'z'] then Symbol_Put(UpperCase[Ch]);
- end
- else if Ch = '''' then
- begin
- String_End := false;
-
- repeat
- Symbol_Put(Ch);
- if Ch = '''' then
- begin
- Symbol_Put(Ch);
- String_End := Ch <> '''';
- end;
- until New_Input_Line or String_End;
-
- end;
-
- String_End := (Ch <> '#') and (Ch <> '^') and (Ch <> '''');
- until New_Input_Line or String_End;
-
- if not String_End then Abort(Syntax);
- end; {String_Constant}
-
-
- procedure Test_Resv_Wrd;
-
- var
- Id: Word_Type;
- Index: 1..No_Res_Words;
- P: 1..Max_Word_Len;
-
- begin {test for reserved word}
-
- if (Sym_Len >= 2) and (Sym_Len <= Max_Word_Len) then
- begin
-
- for P := 1 to Max_Word_Len do
- if P > Sym_Len then Id[P] := ' '
- else Id[P] := LowerCase[Symbol[P]];
-
- with Res_Len[Sym_Len] do
- begin {length index search}
- Index := Low_Index;
-
- while (Resv_Wrd[Index] <> Id) and (Index < Hi_Index) do
- Index := Index + 1;
-
- end {length index search} ;
-
- if Resv_Wrd[Index] = Id then Sym := Res_Symbol[Index]
- else Sym := Identifier;
- end
- else Sym := Identifier;
- end {test_resv_wrd} ;
-
- {*-----------------------------*
- | Identifier or Reserved Word |
- *-----------------------------*}
-
-
- procedure Adjust_Spelling;
- {
- ! Adjust the spelling of the current identifier to the first spelling
- ! encountered for the same identifier. Identifiers are matched without
- ! regard to case or break-characters. If this is the first appearance of
- ! this identifier, the exact spelling is saved for future use. If it is
- ! not the first appearance, it is replaced with the spelling from the first
- ! appearance.
- }
-
- var
- This_Id: Id_Ptr; {Ref for current id}
- Hash_Base: Hash_Value; {hash value for this ident}
- This_Piece: String_Block_Index; {current piece of string table}
- This_Char: String_Piece_Index; {character in current piece}
- J: Line_Index; {induction var}
-
-
- function Hash_Ident: Hash_Value; {hash the current identifier}
-
- var
- i: Line_Index; {induction var}
- H: Hash_Value; {partial hash value}
-
- begin
- H := 0;
- for i := 1 to Sym_Len do
- if Symbol[i] <> '_' then
- H := (H * 3 + Ord(UpperCase[Symbol[i]])) mod Hash_Max;
- Hash_Ident := H;
- end; {Hash_Ident}
-
-
- function Same_Ident(P: Id_Ptr): Boolean;
- {
- ! Returns true if the identifier pointed to by p is the same as the
- ! current identifier
- }
-
- var
- i: Integer; {induction var on symbol characters}
- J: Integer; {count of characters in id}
- This_Piece: String_Block_Index; {current piece of string table}
- This_Char: String_Piece_Index; {current character within the
- piece}
-
- begin
-
- if P = nil then Same_Ident := true
- else
- begin
- i := 0;
- J := 0;
- This_Piece := (P^.Start - 1) div String_Block_Size;
- This_Char := (P^.Start - 1) mod String_Block_Size;
-
- repeat
- if i < Sym_Len then
- repeat
- i := i + 1;
- until (Symbol[i] <> '_') or (i = Sym_Len);
-
- if J < P^.Len then
- repeat
- J := J + 1;
-
- if This_Char = String_Block_Max then
- begin
- This_Piece := This_Piece + 1;
- This_Char := 0;
- end
- else This_Char := This_Char + 1;
- until (J = P^.Len) or
- (String_Index[This_Piece]^[This_Char] <> '_');
- until ((J = P^.Len) and (i = Sym_Len)) or
- (UpperCase[Symbol[i]] <>
- UpperCase[String_Index[This_Piece]^[This_Char]]);
-
- Same_Ident := (J = P^.Len) and (i = Sym_Len) and
- ((UpperCase[Symbol[i]] =
- UpperCase[String_Index[This_Piece]^[This_Char]]) or
- (Symbol[i] = '_') or
- (String_Index[This_Piece]^[This_Char] = '_'));
- end;
- end; {Same_Id}
-
- begin
- Hash_Base := Hash_Ident; {hash for current identifier}
- This_Id := Hash_Table[Hash_Base];
-
- while not Same_Ident(This_Id) do This_Id := This_Id^.Next;
-
- if This_Id = nil then
- begin {Add this identifier to the table for future reference}
- new(This_Id);
-
- with This_Id^ do
- begin
- Next := Hash_Table[Hash_Base];
- Hash_Table[Hash_Base] := This_Id;
- Len := Sym_Len;
- Start := String_Top + 1;
- end;
-
- if String_Top = 0 then new(String_Index[0]);
- This_Piece := String_Top div String_Block_Size;
- This_Char := String_Top mod String_Block_Size;
- for J := 1 to Sym_Len do
- begin
- if This_Char = String_Block_Max then
- begin
- This_Piece := This_Piece + 1;
- new(String_Index[This_Piece]);
- This_Char := 0;
- end
- else This_Char := This_Char + 1;
- String_Top := String_Top + 1;
- String_Index[This_Piece]^[This_Char] := Symbol[J];
- end;
- end
- else
- with This_Id^ do
- begin
- This_Piece := Start div String_Block_Size;
- This_Char := Start mod String_Block_Size;
- Sym_Len := Len;
- for J := 1 to Len do
- begin
- Symbol[J] := String_Index[This_Piece]^[This_Char];
- if This_Char = String_Block_Max then
- begin
- This_Piece := This_Piece + 1;
- This_Char := 0;
- end
- else This_Char := This_Char + 1;
- end;
- end;
-
- end; {Adjust_Spelling}
-
-
- procedure Set_Symbol_Case(Kind: Symbols);
-
- var
- Last_Underscore: Boolean; {true if last char underscore}
- i, J: Line_Index; {induction vars}
-
- begin {Convert a reserved word or identifier to the proper case}
- if Kind = Identifier then
- begin
- if Portability_Mode then
- begin
- J := 0;
- Last_Underscore := true;
- for i := 1 to Sym_Len Do
- if Symbol[i] = '_' then Last_Underscore := true
- else if Last_Underscore then
- begin
- Last_Underscore := false;
- J := J + 1;
- Symbol[J] := UpperCase[Symbol[i]];
- end
- else
- begin
- J := J + 1;
- Symbol[J] := LowerCase[Symbol[i]];
- end;
-
- for i := J + 1 to Sym_Len do Symbol[i] := ' ';
- Sym_Len := J;
- end
- else if First_Spelling then Adjust_Spelling
- else if Uc_Idents then
- for i := 1 to Sym_Len do Symbol[i] := UpperCase[Symbol[i]]
- else if Lc_Idents then
- for i := 1 to Sym_Len do Symbol[i] := LowerCase[Symbol[i]];
- end
- else
- begin
- if Uc_Res_Words then
- for i := 1 to Sym_Len do Symbol[i] := UpperCase[Symbol[i]]
- else if Lc_Res_Words then
- for i := 1 to Sym_Len do Symbol[i] := LowerCase[Symbol[i]];
- end;
- end; {Set_Symbol_Case}
-
-
- procedure Alpha_Char;
- begin {identifier or reserved word to symbol}
- New_Input_Line := false;
- Symbol_Found := true;
- while Ch in ['A'..'Z', 'a'..'z', '0'..'9', '_'] do Symbol_Put(Ch);
- Test_Resv_Wrd;
- Set_Symbol_Case(Sym);
- end {alpha char} ;
-
-
- procedure Numeric_Char;
- begin
- {unsigned number to symbol}
- New_Input_Line := false;
- Symbol_Found := true;
- Sym := Number;
- if Ch = '$' then
- begin {Hexadecimal number}
- Symbol_Put('$');
-
- while Ch in ['0'..'9', 'A'..'F', 'a'..'f'] do
- Symbol_Put(UpperCase[Ch]);
-
- end
- else
- begin
-
- while (Ch >= '0') and (Ch <= '9') do {integer or fractional portion}
- Symbol_Put(Ch);
-
- if Ch = '.' then
- begin
- Symbol_Put(Ch);
-
- if Ch = '.' then
- begin {actually subrange, must fudge}
- Sym_Len := Sym_Len - 1; {erase period}
- Double_Period := true;
- end
- else
- while (Ch >= '0') and (Ch <= '9') do Symbol_Put(Ch);
-
- end;
-
- if (Ch = 'E') or (Ch = 'e') then
- begin {exponential portion}
- Symbol_Put('E');
- if (Ch = '+') or (Ch = '-') then {sign} Symbol_Put(Ch);
-
- while (Ch >= '0') and (Ch <= '9') do {characteristic}
- Symbol_Put(Ch);
-
- end {exponential}
- end;
- end {numeric char} ;
-
-
- procedure Special_Char;
- begin {operators or delimiters to symbol}
- Symbol_Found := true; {untrue only for comments}
- New_Input_Line := false;
-
- case Ch of {special symbols}
- '+':
- begin {plus}
- Sym := Plus;
- Symbol_Put(Ch);
- end {plus} ;
-
- '-':
- begin {minus}
- Sym := Minus;
- Symbol_Put(Ch);
- end {minus} ;
-
- '*':
- begin {multiply}
- Sym := Mult;
- Symbol_Put(Ch);
- end {multiply} ;
-
- '/':
- begin {divide}
- Sym := Divide;
- Symbol_Put(Ch);
- end; {divide}
-
- '.':
- begin {subrange or period}
- Sym := Period;
- Symbol_Put(Ch);
- if Double_Period then
- begin {fudge a subrange}
- Symbol[2] := '.';
- Sym_Len := 2;
- Sym := Subrange;
- end
- else if Ch = '.' then
- begin {subrange}
- Sym := Subrange;
- Symbol_Put(Ch);
- end;
- Double_Period := false;
- end {subrange or period} ;
-
- ',':
- begin {comma}
- Sym := Comma;
- Symbol_Put(Ch);
- end {comma} ;
-
- ';':
- begin {semicolon}
- Sym := Semicolon;
- Symbol_Put(Ch);
- end {semicolon} ;
-
- ':':
- begin {becomes, or colon}
- Sym := Colon;
- Symbol_Put(Ch);
- if Ch = '=' then
- begin {becomes}
- Sym := Becomes;
- Symbol_Put(Ch);
- end {becomes}
- end {becomes, or colon} ;
-
- '=':
- begin {equals}
- Sym := Equal;
- Symbol_Put(Ch);
- end {equals} ;
-
- '<':
- begin {less than, less equal, not equal}
- Sym := Rel_Op;
- Symbol_Put(Ch);
- if (Ch = '=') or (Ch = '>') then Symbol_Put(Ch);
- end {less than, less equal, not equal} ;
-
- '>':
- begin {greater equal, greater than}
- Sym := Rel_Op;
- Symbol_Put(Ch);
- if Ch = '=' then Symbol_Put(Ch);
- end {great than, or great equals} ;
-
- '^':
- begin
- { Determine whether we have the pointer symbol or the start of
- a string constant (like " = ^M^J; ") }
-
- if In_Type_Or_Var_Dcl or
- (Last_Sym in [Identifier, Close_Brack]) then
- begin {pointer}
- Sym := Pointer;
- Symbol_Put(Ch);
- end
- else String_Constant;
- end;
-
- '''', '#': String_Constant;
-
- ')':
- begin {close parenthesis}
- Sym := Close_Paren;
- Symbol_Put(Ch);
- end {close parenthesis} ;
-
- '[':
- begin {open bracket}
- Sym := Open_Brack;
- Symbol_Put(Ch);
- end {open bracket} ;
-
- ']':
- begin {close bracket}
- Sym := Close_Brack;
- Symbol_Put(Ch);
- end {close bracket} ;
- end; {case}
- end {special_char} ;
-
- {*------------------*
- | Start of Comment |
- *------------------*}
-
-
- procedure Comment_Char;
-
- var
- Init_Char: Char; {starting character}
-
- begin {possible start of comment}
-
- if Ch = '(' then
- begin {see if comment or just open paren}
- Init_Char := Ch;
- Symbol_Put(Ch);
-
- if Ch = '*' then
- begin
- Sym_Len := 0;
- Do_Comment(New_Input_Line, Column - 1, Init_Char);
- end
- else
- begin
- Sym := Open_Paren;
- New_Input_Line := false;
- Symbol_Found := true;
- end;
- end
- else Do_Comment(New_Input_Line, Column, Ch);
- end; {Comment_Char}
-
- {*---------------------------*
- | Get Next Symbol (get_sym) |
- *---------------------------*}
-
-
- procedure Get_Sym;
- begin {extract next basic sym from text}
- Sym_Len := 0;
- Symbol_Found := false;
- Sym_Written := false;
-
- repeat
- if End_File then
- begin
- Sym := Text_End;
- Symbol_Found := true
- end
- else if Ch = ' ' then Scan_Blanks
- else
- begin
-
- case Ch of {lexical analysis}
- '0'..'9', '$': Numeric_Char;
-
- 'A'..'Z', 'a'..'z', '_': Alpha_Char;
-
- ')', '*', '/', '+', ',', '-', '.', ':', ';', '<', '=', '>',
- '[', ']', '^', '''', '#':
- Special_Char;
-
- '(', '{': Comment_Char;
-
- '!', '&', '?', '\', '`', '|', '~', '}', '"', '@': Print_Char;
-
- else
- if Formatting and (Ch = Chr(Ff)) then
- begin
- Print_Line(0);
- Print_Char;
- Space(0);
- Clear_Breaks;
- End_Line := true;
- end
- else Get_Char;
- end
- end;
- until Symbol_Found
- end {get_sym} ;
-
- {*-------------------------*
- | Parser Utility Routines |
- *-------------------------*}
-
-
- procedure Next_Sym;
- begin {output current sym and input next}
-
- if Sym <> Text_End then
- begin {symbol}
- if not Sym_Written then Put_Sym;
- Get_Sym;
- end {symbol}
-
- end {next_sym} ;
-
-
- procedure check(Fsym: Set_Of_Syms);
- begin {check if the next symbol is in fsym}
- if not (Sym in Fsym) then Abort(Syntax);
- end; {check}
-
-
- procedure Check_Sym(Desired: Symbols);
- begin {abort if current symbol not desired, else next_sym}
- if Sym = Desired then Next_Sym
- else Abort(Syntax);
- end; {Check_Sym}
-
-
- procedure Next_On_New_Line(Spacing, Delta: Integer);
- {
- ! Space "spacing" lines, indent, put new symbol, and increment indent
- ! by "delta"
- }
- begin
- if (Blank_Lines > 0) or (Current_Line = 0) then Spacing := Spacing - 1;
-
- repeat
- Format_Line(Indent);
- Spacing := Spacing - 1;
- until Spacing < 0;
-
- Indent_Plus(Delta);
- Stat_Indent := Indent;
- Next_Sym;
- end; {Next_On_New_Line}
-
-
- procedure Log_Symbol_Start(var Log: Col_Log);
- begin {log the starting loc of the next symbol}
-
- with Log do
- begin
- Log_Char := Char_Count + 1;
- Log_Col := Write_Col + 1;
- Log_Line := Current_Line;
- end;
-
- end; {Log_Symbol_Start}
-
- {*--------------------*
- | Statement bunching |
- *--------------------*}
-
-
- procedure Bunch(Start: Col_Log; {start of statement}
- var Success: Boolean);
- {
- ! Move a statement up to the previous line if it will fit
- }
- begin
-
- with Start do
- if Formatting and (Char_Count - Log_Char < Buf_Size) and
- (Char_Count >= Log_Char) and (Log_Line + 1 = Current_Line) and
- (Write_Col - Indent + Log_Col < Out_Line_Len) then
- begin {move it up, adjusting things as we go}
-
- with Unwritten[Log_Char mod Buf_Size] do
- begin
- Action_Is := Spaces;
- Spacing := 1;
- Write_Col := Write_Col - Indent + Log_Col + 1;
- end;
-
- Current_Line := Current_Line - 1;
- Success := true;
- end
- else Success := false;
-
- end; {Bunch}
-
-
- procedure Bunch_Statement(Start: Col_Log);
-
- var
- Tab_Int: Integer; {tab interval}
- Next_Tab: Integer; {next tab location}
-
- begin {see if we can put multiple statements on a line}
- if Formatting then
- with Start do
- begin
- Tab_Int := (Out_Line_Len - Indent) div Stats_Per_Line;
- if Tab_Int = 0 then Tab_Int := 1;
- if Log_Col = Indent + 1 then Log_Col := Indent;
-
- {fudge for start}
-
- Next_Tab := (Log_Col - Indent + Tab_Int - 1) div Tab_Int *
- Tab_Int + Indent;
-
- if (Next_Tab > Indent) and (Log_Line + 1 = Current_Line) and
- (Char_Count - Log_Char < Buf_Size) and
- (Next_Tab + Write_Col - Indent <= Out_Line_Len) then
- begin {move up to prior line and fiddle pointers}
-
- with Unwritten[Log_Char mod Buf_Size] do
- begin
- Action_Is := Spaces;
- Spacing := Next_Tab - Log_Col + 1;
- end;
-
- Write_Col := Next_Tab + Write_Col - Indent;
- Current_Line := Current_Line - 1;
- end;
- end;
-
- end; {Bunch_Statement}
-
-
- procedure Terminal_Semicolon;
- {
- ! Parse a possible terminal semicolon at the end of a statement. This
- ! is done this way to make sure that it gets indented properly
- }
- begin
- if (Sym = Semicolon) and not Sym_Written then Put_Sym;
- end; {Terminal_Semicolon}