home *** CD-ROM | disk | FTP | other *** search
-
- %{
-
- (* PASLEX.L: lexical analyzer for Pascal, adapted to TP Yacc, 2-28-89 AG *)
-
- %}
-
- %{
- (*
- * lex input file for pascal scanner
- *
- * extensions: to ways to spell "external" and "->" ok for "^".
- *
- *)
- %}
-
- %{
-
- (* Note: Keywords are determined by scanning a keyword table, rather
- than including the keyword patterns in the Lex source which is done
- in the original version of this file. I prefer this method, because
- it makes the grammar itself more readable (handling case-insensitive
- keywords in Lex is quite cumbersome, e.g., you will have to write
- something like [Aa][Nn][Dd] to match the keyword `and'), and also
- produces a more (space-) efficient analyzer (184 states and 375
- transitions for the keyword pattern version, against only 40 states
- and 68 transitions for the keyword table version). *)
-
- procedure commenteof;
- begin
- writeln('unexpected EOF inside comment at line ', yylineno);
- end(*commenteof*);
-
- function upper(str : String) : String;
- (* converts str to uppercase *)
- var i : integer;
- begin
- for i := 1 to length(str) do
- str[i] := upCase(str[i]);
- upper := str
- end(*upper*);
-
- function is_keyword(id : string; var token : integer) : boolean;
- (* checks whether id is Pascal keyword; if so, returns corresponding
- token number in token *)
- const
- id_len = 20;
- type
- Ident = string[id_len];
- const
- (* table of Pascal keywords: *)
- no_of_keywords = 39;
- keyword : array [1..no_of_keywords] of Ident = (
- 'AND', 'ARRAY', 'BEGIN', 'CASE',
- 'CONST', 'DIV', 'DO', 'DOWNTO',
- 'ELSE', 'END', 'EXTERNAL', 'EXTERN',
- 'FILE', 'FOR', 'FORWARD', 'FUNCTION',
- 'GOTO', 'IF', 'IN', 'LABEL',
- 'MOD', 'NIL', 'NOT', 'OF',
- 'OR', 'OTHERWISE', 'PACKED', 'PROCEDURE',
- 'PROGRAM', 'RECORD', 'REPEAT', 'SET',
- 'THEN', 'TO', 'TYPE', 'UNTIL',
- 'VAR', 'WHILE', 'WITH');
- keyword_token : array [1..no_of_keywords] of integer = (
- _AND, _ARRAY, _BEGIN, _CASE,
- _CONST, _DIV, _DO, _DOWNTO,
- _ELSE, _END, _EXTERNAL, _EXTERNAL,
- (* EXTERNAL: 2 spellings (see above)! *)
- _FILE, _FOR, _FORWARD, _FUNCTION,
- _GOTO, _IF, _IN, _LABEL,
- _MOD, _NIL, _NOT, _OF,
- _OR, _OTHERWISE, _PACKED, _PROCEDURE,
- _PROGRAM, _RECORD, _REPEAT, _SET,
- _THEN, _TO, _TYPE, _UNTIL,
- _VAR, _WHILE, _WITH);
- var m, n, k : integer;
- begin
- id := upper(id);
- (* binary search: *)
- m := 1; n := no_of_keywords;
- while m<=n do
- begin
- k := m+(n-m) div 2;
- if id=keyword[k] then
- begin
- is_keyword := true;
- token := keyword_token[k];
- exit
- end
- else if id>keyword[k] then
- m := k+1
- else
- n := k-1
- end;
- is_keyword := false
- end(*is_keyword*);
-
- %}
-
- NQUOTE [^']
-
- %%
-
- %{
-
- var c : char;
- kw : integer;
-
- %}
-
- [a-zA-Z]([a-zA-Z0-9])* if is_keyword(yytext, kw) then
- return(kw)
- else
- return(IDENTIFIER);
-
- ":=" return(ASSIGNMENT);
- '({NQUOTE}|'')+' return(CHARACTER_STRING);
- ":" return(COLON);
- "," return(COMMA);
- [0-9]+ return(DIGSEQ);
- "." return(DOT);
- ".." return(DOTDOT);
- "=" return(EQUAL);
- ">=" return(GE);
- ">" return(GT);
- "[" return(LBRAC);
- "<=" return(LE);
- "(" return(LPAREN);
- "<" return(LT);
- "-" return(MINUS);
- "<>" return(NOTEQUAL);
- "+" return(PLUS);
- "]" return(RBRAC);
- [0-9]+"."[0-9]+ return(REALNUMBER);
- ")" return(RPAREN);
- ";" return(SEMICOLON);
- "/" return(SLASH);
- "*" return(STAR);
- "**" return(STARSTAR);
- "->" |
- "^" return(UPARROW);
-
- "(*" |
- "{" begin
- repeat
- c := get_char;
- case c of
- '}' : ;
- '*' : begin
- c := get_char;
- if c=')' then exit else unget_char(c)
- end;
- #0 : begin
- commenteof;
- exit;
- end;
- end;
- until false
- end;
- [ \n\t\f] ;
-
- . return(ILLEGAL);
-