home *** CD-ROM | disk | FTP | other *** search
-
- program Lex;
-
- uses LexBase, LexTables, LexPos, LexDFA, LexOpt, LexList, LexRules, LexMsgs;
-
- (* Copyright (c) 1990,91 by Albert Graef, Schillerstr. 18,
- 6509 Schornsheim/Germany
- All rights reserved *)
-
- (* TP Lex - A lexical analyzer generator for MS-DOS/Turbo Pascal
-
- Version 3.0 as of April 91
-
- Author
-
- Albert Graef
- Schillerstr. 18
- 6509 Schornsheim/Germany
-
- Graef@DMZRZU71.bitnet
-
- Synopsis LEX [options] lex-file[.L] [output-file[.PAS]]
-
- Options
-
- /v "Verbose:" Lex generates a readable description of the generated
- lexical analyzer, written to lex-file with new extension .LST.
-
- /o "Optimize:" Lex optimizes DFA tables to produce a minimal DFA
-
- Description
-
- This is a reimplementation of the popular UNIX lexical analyzer generator
- Lex for MS-DOS and Turbo Pascal.
-
- Differences from UNIX Lex:
-
- - Produces output code for Turbo Pascal, rather than for C.
-
- - Character tables (%T) are not supported; neither are any directives
- to determine internal table sizes (%p, %n, etc.).
-
- *)
-
- procedure get_line;
- (* obtain line from source file *)
- begin
- readln(yyin, line);
- inc(lno);
- end(*get_line*);
-
- procedure next_section;
- (* find next section mark (%%) in code template *)
- var line : String;
- begin
- while not eof(yycod) do
- begin
- readln(yycod, line);
- if line='%%' then exit;
- writeln(yyout, line);
- end;
- end(*next_section*);
-
- (* Semantic routines: *)
-
- var n_rules : Integer; (* current number of rules *)
-
- procedure define_start_state ( symbol : String; pos : Integer );
- (* process start state definition *)
- begin
- with sym_table^[key(symbol, max_keys, lookup, entry)] do
- if sym_type=none then
- begin
- inc(n_start_states);
- if n_start_states>max_start_states then
- fatal(state_table_overflow);
- sym_type := start_state_sym;
- start_state := n_start_states;
- writeln(yyout, 'const ', symbol, ' = ', 2*start_state, ';');
- first_pos_table^[2*start_state] := newIntSet;
- first_pos_table^[2*start_state+1] := newIntSet;
- end
- else
- error(symbol_already_defined, pos)
- end(*define_start_state*);
-
- procedure define_macro ( symbol, replacement : String );
- (* process macro definition *)
- begin
- with sym_table^[key('{'+symbol+'}', max_keys, lookup, entry)] do
- if sym_type=none then
- begin
- sym_type := macro_sym;
- subst := newStr(replacement);
- end
- else
- error(symbol_already_defined, 1)
- end(*define_macro*);
-
- procedure add_rule;
- (* process rule *)
- var i : Integer;
- FIRST : IntSet;
- begin
- addExpr(r, FIRST);
- if n_st=0 then
- if cf then
- setunion(first_pos_table^[1]^, FIRST)
- else
- begin
- setunion(first_pos_table^[0]^, FIRST);
- setunion(first_pos_table^[1]^, FIRST);
- end
- else
- if cf then
- for i := 1 to n_st do
- setunion(first_pos_table^[2*st[i]+1]^, FIRST)
- else
- for i := 1 to n_st do
- begin
- setunion(first_pos_table^[2*st[i]]^, FIRST);
- setunion(first_pos_table^[2*st[i]+1]^, FIRST);
- end
- end(*add_rule*);
-
- procedure generate_table;
-
- (* write the DFA table to the output file
-
- Tables are represented as a collection of typed array constants:
-
- type YYTRec = record
- cc : set of Char; { characters }
- s : Integer; { next state }
- end;
-
- const
-
- { table sizes: }
-
- yynmarks = ...;
- yynmatches = ...;
- yyntrans = ...;
- yynstates = ...;
-
- { rules of mark positions for each state: }
-
- yyk : array [1..yynmarks] of Integer = ...;
-
- { rules of matches for each state: }
-
- yym : array [1..yynmatches] of Integer = ...;
-
- { transition table: }
-
- yyt : array [1..yyntrans] of YYTRec = ...;
-
- { offsets into the marks, matches and transition tables: }
-
- yykl, yykh,
- yyml, yymh,
- yytl, yyth : array [0..yynstates-1] of Integer = ...;
-
- *)
-
- var yynmarks, yynmatches, yyntrans, yynstates : Integer;
- yykl, yykh,
- yyml, yymh,
- yytl, yyth : array [0..max_states-1] of Integer;
-
- procedure counters;
- (* compute counters and offsets *)
- var s, i : Integer;
- begin
- yynstates := n_states; yyntrans := n_trans;
- yynmarks := 0; yynmatches := 0;
- for s := 0 to n_states-1 do with state_table^[s] do
- begin
- yytl[s] := trans_lo; yyth[s] := trans_hi;
- yykl[s] := yynmarks+1; yyml[s] := yynmatches+1;
- for i := 1 to size(state_pos^) do
- with pos_table^[state_pos^[i]] do
- if pos_type=mark_pos then
- if pos=0 then
- inc(yynmatches)
- else if pos=1 then
- inc(yynmarks);
- yykh[s] := yynmarks; yymh[s] := yynmatches;
- end;
- end(*counters*);
-
- procedure writecc(var f : Text; cc : CClass);
- (* print the given character class *)
- function charStr(c : Char) : String;
- begin
- case c of
- #0..#31, (* nonprintable characters *)
- #127..#255 : charStr := '#'+intStr(ord(c));
- '''' : charStr := '''''''''';
- else charStr := ''''+c+'''';
- end;
- end(*charStr*);
- var c1, c2 : Char;
- col : Integer;
- tag : String;
- begin
- write(f, '[ ');
- col := 0;
- for c1:=#0 to #255 do
- if c1 in cc then
- begin
- if col>0 then
- begin
- write(f, ',');
- inc(col);
- end;
- if col>40 then
- (* insert line break *)
- begin
- writeln(f);
- write(f, ' ':12);
- col := 0;
- end;
- c2 := c1;
- while (c2<#255) and (succ(c2) in cc) do
- inc(c2);
- if c1=c2 then
- tag := charStr(c1)
- else if c2=succ(c1) then
- tag := charStr(c1)+','+charStr(c2)
- else
- tag := charStr(c1)+'..'+charStr(c2);
- write(f, tag);
- inc(col, length(tag));
- c1 := c2
- end;
- write(f, ' ]');
- end(*writecc*);
-
- procedure tables;
- (* print tables *)
- var s, i, count : Integer;
- begin
- writeln(yyout);
- writeln(yyout, 'type YYTRec = record');
- writeln(yyout, ' cc : set of Char;');
- writeln(yyout, ' s : Integer;');
- writeln(yyout, ' end;');
- writeln(yyout);
- writeln(yyout, 'const');
- (* table sizes: *)
- writeln(yyout);
- writeln(yyout, 'yynmarks = ', yynmarks, ';');
- writeln(yyout, 'yynmatches = ', yynmatches, ';');
- writeln(yyout, 'yyntrans = ', yyntrans, ';');
- writeln(yyout, 'yynstates = ', yynstates, ';');
- (* mark table: *)
- writeln(yyout);
- writeln(yyout, 'yyk : array [1..yynmarks] of Integer = (');
- count := 0;
- for s := 0 to n_states-1 do with state_table^[s] do
- begin
- writeln(yyout, ' { ', s, ': }');
- for i := 1 to size(state_pos^) do
- with pos_table^[state_pos^[i]] do
- if (pos_type=mark_pos) and (pos=1) then
- begin
- write(yyout, ' ', rule); inc(count);
- if count<yynmarks then write(yyout, ',');
- writeln(yyout);
- end;
- end;
- writeln(yyout, ');');
- (* match table: *)
- writeln(yyout);
- writeln(yyout, 'yym : array [1..yynmatches] of Integer = (');
- count := 0;
- for s := 0 to n_states-1 do with state_table^[s] do
- begin
- writeln(yyout, '{ ', s, ': }');
- for i := 1 to size(state_pos^) do
- with pos_table^[state_pos^[i]] do
- if (pos_type=mark_pos) and (pos=0) then
- begin
- write(yyout, ' ', rule); inc(count);
- if count<yynmatches then write(yyout, ',');
- writeln(yyout);
- end;
- end;
- writeln(yyout, ');');
- (* transition table: *)
- writeln(yyout);
- writeln(yyout, 'yyt : array [1..yyntrans] of YYTrec = (');
- count := 0;
- for s := 0 to n_states-1 do with state_table^[s] do
- begin
- writeln(yyout, '{ ', s, ': }');
- for i := trans_lo to trans_hi do
- with trans_table^[i] do
- begin
- write(yyout, ' ( cc: ');
- writecc(yyout, cc^);
- write(yyout, '; s: ');
- write(yyout, next_state, ')');
- inc(count);
- if count<yyntrans then write(yyout, ',');
- writeln(yyout);
- end;
- end;
- writeln(yyout, ');');
- (* offset tables: *)
- writeln(yyout);
- writeln(yyout, 'yykl : array [0..yynstates-1] of Integer = (');
- for s := 0 to n_states-1 do
- begin
- write(yyout, '{ ', s, ': } ', yykl[s]);
- if s<n_states-1 then write(yyout, ',');
- writeln(yyout);
- end;
- writeln(yyout, ');');
- writeln(yyout);
- writeln(yyout, 'yykh : array [0..yynstates-1] of Integer = (');
- for s := 0 to n_states-1 do
- begin
- write(yyout, '{ ', s, ': } ', yykh[s]);
- if s<n_states-1 then write(yyout, ',');
- writeln(yyout);
- end;
- writeln(yyout, ');');
- writeln(yyout);
- writeln(yyout, 'yyml : array [0..yynstates-1] of Integer = (');
- for s := 0 to n_states-1 do
- begin
- write(yyout, '{ ', s, ': } ', yyml[s]);
- if s<n_states-1 then write(yyout, ',');
- writeln(yyout);
- end;
- writeln(yyout, ');');
- writeln(yyout);
- writeln(yyout, 'yymh : array [0..yynstates-1] of Integer = (');
- for s := 0 to n_states-1 do
- begin
- write(yyout, '{ ', s, ': } ', yymh[s]);
- if s<n_states-1 then write(yyout, ',');
- writeln(yyout);
- end;
- writeln(yyout, ');');
- writeln(yyout);
- writeln(yyout, 'yytl : array [0..yynstates-1] of Integer = (');
- for s := 0 to n_states-1 do
- begin
- write(yyout, '{ ', s, ': } ', yytl[s]);
- if s<n_states-1 then write(yyout, ',');
- writeln(yyout);
- end;
- writeln(yyout, ');');
- writeln(yyout);
- writeln(yyout, 'yyth : array [0..yynstates-1] of Integer = (');
- for s := 0 to n_states-1 do
- begin
- write(yyout, '{ ', s, ': } ', yyth[s]);
- if s<n_states-1 then write(yyout, ',');
- writeln(yyout);
- end;
- writeln(yyout, ');');
- writeln(yyout);
- end(*tables*);
-
- begin
- counters; tables;
- end(*generate_table*);
-
- (* Parser: *)
-
- const
-
- max_items = 255;
-
- var
-
- itemstr : String;
- itemc : Integer;
- itempos,
- itemlen : array [1..max_items] of Integer;
-
- procedure split ( str : String; count : Integer );
- (* split str into at most count whitespace-delimited items
- (result in itemstr, itemc, itempos, itemlen) *)
- procedure scan(var act_pos : Integer);
- (* scan one item *)
- var l : Integer;
- begin
- while (act_pos<=length(itemstr)) and
- ((itemstr[act_pos]=' ') or (itemstr[act_pos]=tab)) do
- inc(act_pos);
- l := 0;
- while (act_pos+l<=length(itemstr)) and
- (itemstr[act_pos+l]<>' ') and (itemstr[act_pos+l]<>tab) do
- inc(l);
- inc(itemc);
- itempos[itemc] := act_pos;
- itemlen[itemc] := l;
- inc(act_pos, l+1);
- while (act_pos<=length(itemstr)) and
- ((itemstr[act_pos]=' ') or (itemstr[act_pos]=tab)) do
- inc(act_pos);
- end(*scan*);
- var i, act_pos : Integer;
- begin
- itemstr := str; act_pos := 1;
- itemc := 0;
- while (itemc<count-1) and (act_pos<=length(itemstr)) do scan(act_pos);
- if act_pos<=length(itemstr) then
- begin
- inc(itemc);
- itempos[itemc] := act_pos;
- itemlen[itemc] := length(itemstr)-act_pos+1;
- end;
- end(*split*);
-
- function itemv ( i : Integer ) : String;
- (* return ith item in splitted string (whole string for i=0) *)
- begin
- if i=0 then
- itemv := itemstr
- else if (i<0) or (i>itemc) then
- itemv := ''
- else
- itemv := copy(itemstr, itempos[i], itemlen[i])
- end(*itemv*);
-
- procedure code;
- begin
- while not eof(yyin) do
- begin
- get_line;
- if line='%}' then
- exit
- else
- writeln(yyout, line);
- end;
- error(unmatched_lbrace, length(line)+1);
- end(*code*);
-
- procedure definitions;
- procedure definition;
- function check_id ( symbol : String ) : Boolean;
- var i : Integer;
- begin
- if (symbol='') or not (symbol[1] in letters) then
- check_id := false
- else
- begin
- for i := 2 to length(symbol) do
- if not (symbol[i] in alphanums) then
- begin
- check_id := false;
- exit;
- end;
- check_id := true
- end
- end(*check_id*);
- var i : Integer;
- com : String;
- begin
- split(line, 2);
- com := upper(itemv(1));
- if (com='%S') or (com='%START') then
- begin
- split(line, max_items);
- for i := 2 to itemc do
- if check_id(itemv(i)) then
- define_start_state(itemv(i), itempos[i])
- else
- error(syntax_error, itempos[i]);
- end
- else if check_id(itemv(1)) then
- define_macro(itemv(1), itemv(2))
- else
- error(syntax_error, 1);
- end(*definition*);
- begin
- while not eof(yyin) do
- begin
- get_line;
- if line='' then
- writeln(yyout)
- else if line='%%' then
- exit
- else if line='%{' then
- code
- else if (line[1]='%') or (line[1] in letters) then
- definition
- else
- writeln(yyout, line)
- end;
- end(*definitions*);
-
- procedure rules;
- begin
- next_section;
- if line='%%' then
- while not eof(yyin) do
- begin
- get_line;
- if line='' then
- writeln(yyout)
- else if line='%%' then
- begin
- next_section;
- exit;
- end
- else if line='%{' then
- code
- else if (line[1]<>' ') and (line[1]<>tab) then
- begin
- if n_rules=0 then next_section;
- inc(n_rules);
- parse_rule(n_rules);
- if errors=0 then
- begin
- add_rule;
- write(yyout, ' ', n_rules);
- if strip(stmt)='|' then
- writeln(yyout, ',')
- else
- begin
- writeln(yyout, ':');
- writeln(yyout, blankStr(expr), stmt);
- end;
- end
- end
- else
- writeln(yyout, line)
- end
- else
- error(unexpected_eof, length(line)+1);
- next_section;
- end(*rules*);
-
- procedure auxiliary_procs;
- begin
- if line='%%' then
- begin
- writeln(yyout);
- while not eof(yyin) do
- begin
- get_line;
- writeln(yyout, line);
- end;
- end;
- end(*auxiliary_procs*);
-
- (* Main program: *)
-
- var i : Integer;
-
- begin
-
- (* sign-on: *)
-
- writeln(sign_on);
-
- (* parse command line: *)
-
- if paramCount=0 then
- begin
- writeln(usage);
- writeln(options);
- halt(0);
- end;
-
- lfilename := '';
- pasfilename := '';
-
- for i := 1 to paramCount do
- if copy(paramStr(i), 1, 1)='/' then
- if upper(paramStr(i))='/V' then
- verbose := true
- else if upper(paramStr(i))='/O' then
- optimize := true
- else
- begin
- writeln(invalid_option, paramStr(i));
- halt(1);
- end
- else if lfilename='' then
- lfilename := addExt(upper(paramStr(i)), 'L')
- else if pasfilename='' then
- pasfilename := addExt(upper(paramStr(i)), 'PAS')
- else
- begin
- writeln(illegal_no_args);
- halt(1);
- end;
-
- if lfilename='' then
- begin
- writeln(illegal_no_args);
- halt(1);
- end;
-
- if pasfilename='' then pasfilename := root(lfilename)+'.PAS';
- lstfilename := root(lfilename)+'.LST';
-
- (* open files: *)
-
- assign(yyin, lfilename);
- assign(yyout, pasfilename);
- assign(yylst, lstfilename);
-
- reset(yyin); if ioresult<>0 then fatal(cannot_open_file+lfilename);
- rewrite(yyout); if ioresult<>0 then fatal(cannot_open_file+pasfilename);
- rewrite(yylst); if ioresult<>0 then fatal(cannot_open_file+lstfilename);
-
- (* search code template in current directory, then on path where Lex
- was executed from: *)
- codfilename := 'YYLEX.COD';
- assign(yycod, codfilename);
- reset(yycod);
- if ioresult<>0 then
- begin
- codfilename := upper(path(paramStr(0)))+'YYLEX.COD';
- assign(yycod, codfilename);
- reset(yycod);
- if ioresult<>0 then fatal(cannot_open_file+codfilename);
- end;
-
- (* parse source grammar: *)
-
- write('parse ... ');
- lno := 0; n_rules := 0; next_section;
- first_pos_table^[0] := newIntSet;
- first_pos_table^[1] := newIntSet;
- definitions;
- rules;
- if n_rules=0 then error(empty_grammar, length(line)+1);
- if errors=0 then
- begin
- (* generate DFA table and listings and write output code: *)
- write('DFA construction ... ');
- makeDFATable;
- if optimize then
- begin
- write('DFA optimization ... ');
- optimizeDFATable;
- end;
- write('code generation ... ');
- if verbose then listDFATable;
- generate_table; next_section;
- end;
- auxiliary_procs;
- if errors=0 then writeln('DONE');
-
- (* close files: *)
-
- close(yyin); close(yyout); close(yylst); close(yycod);
-
- (* print statistics: *)
-
- if errors>0 then
- writeln( lno, ' lines, ',
- errors, ' errors found.' )
- else
- writeln( lno, ' lines, ',
- n_rules, ' rules, ',
- n_pos, '/', max_pos, ' p, ',
- n_states, '/', max_states, ' s, ',
- n_trans, '/', max_trans, ' t.');
-
- if warnings>0 then writeln(warnings, ' warnings.');
-
- writeln( n_bytes, '/', max_bytes, ' bytes of memory used.');
-
- (* terminate: *)
-
- if errors>0 then erase(yyout);
- if file_size(lstfilename)=0 then
- erase(yylst)
- else
- writeln('(see ', lstfilename, ' for more information)');
-
- halt(errors);
-
- end(*Lex*).
-