home *** CD-ROM | disk | FTP | other *** search
- UNIT Tokenizer; { Turbo Pascal 4.0-up; QuickPascal 1.0 }
-
- INTERFACE
- USES Dos, FNameHax, Back1Hax;
-
- TYPE Tkind =
- (newline,whitespace,symbol,number,
- qstring,bracom,eolcom,special);
-
- CONST KindName:
- ARRAY[Tkind] OF string[10] =
- ('newline','whitespace','symbol','number',
- 'qstring','bracom','eolcom','special');
-
- VAR KindOfToken: Tkind;
-
- PROCEDURE ReadToken(VAR f:text; VAR t:string);
- { - Reads a token from f into t, consuming
- exactly the characters that make up the token.
- - Sets global variable KindOfToken to the kind of token read.
- - KindName[KindOfToken] is then a printable description
- of the kind of token. }
-
- IMPLEMENTATION
-
- CONST WhiteSpaces: SET OF char = [#0..#9,#11..#12,#14..' '];
- Alphabetics: SET OF char = ['a'..'z','A'..'Z','_'];
- Digits: SET OF char = ['0'..'9'];
-
- PROCEDURE ReadToken(VAR f:text; VAR t:string);
-
- LABEL OneTooMany, AllDone;
-
- VAR c, quote, prev: Char;
-
- BEGIN
- read(f,c); t:=c; { Get first char of token }
-
- IF (c=^M) THEN { NEWLINE - Type 1 }
- BEGIN { ^M^J or just ^M }
- KindOfToken := newline;
- read(f,c); t:=t+c; { Look at the next character }
- IF c=^J THEN { If it's ^J, it belongs to this token }
- GOTO AllDone { so keep it }
- ELSE
- GOTO OneTooMany { Otherwise we've read one too many }
- END
-
- ELSE IF (c=^J) THEN { NEWLINE - Type 2 }
- BEGIN { ^J by itself }
- KindOfToken := newline;
- GOTO AllDone
- END
-
- ELSE IF c IN WhiteSpaces THEN { WHITESPACE }
- BEGIN
- KindOfToken := whitespace;
- REPEAT
- read(f,c); t:=t+c
- UNTIL NOT (c IN WhiteSpaces);
- GOTO OneTooMany
- END
-
- ELSE IF c IN Alphabetics THEN { SYMBOL }
- BEGIN
- KindOfToken := symbol;
- REPEAT
- read(f,c); t:=t+c
- UNTIL (NOT (c IN Alphabetics)) AND (NOT (c in Digits));
- GOTO OneTooMany
- END
-
- ELSE IF (c='''') OR (c='"') THEN { QUOTED STRING }
- BEGIN
- KindOfToken := qstring;
- quote := c;
- REPEAT
- read(f,c); t:=t+c;
- IF (c=quote) THEN
- IF NextChar(f) <> quote THEN { found end of string }
- GOTO AllDone
- ELSE { it's a quote written double; grab it }
- BEGIN
- read(f,c); t:=t+c
- END
- UNTIL eoln(f);
- FileError(f,'Expecting '+quote+', found end of line')
- END
-
- ELSE IF (c IN Digits) OR
- ((c='.') AND (NextChar(f) IN Digits)) THEN { NUMBER }
- BEGIN
- KindOfToken := number;
- IF c<>'.' THEN
- REPEAT
- read(f,c); t:=t+c { Some digits... }
- UNTIL NOT (c IN Digits);
- IF c='.' THEN { Decimal point... }
- REPEAT
- read(f,c); t:=t+c { More digits... }
- UNTIL NOT (c IN Digits);
- IF (c='E') or (c='e') THEN { E for exponent... }
- BEGIN
- read(f,c); t:=t+c;
- IF (c IN Digits) OR
- (c='+') OR (c='-') THEN { +, -, or digit }
- REPEAT
- read(f,c); t:=t+c { Digits of exponent }
- UNTIL NOT (c IN Digits)
- END;
- GOTO OneTooMany
- END
-
- ELSE IF c='{' THEN { BRACKETED COMMENT - Type 1 }
- BEGIN
- KindOfToken := bracom;
- t:=' '; { Return a blank to the calling program }
- REPEAT
- read(f,c);
- IF c='}' THEN GOTO AllDone
- UNTIL eof(f);
- FileError(f,'Expecting ''}'', found end of file')
- END
-
- ELSE IF (c='(') AND
- (NextChar(f)='*') THEN { BRACKETED COMMENT - Type 2 }
- BEGIN
- KindOfToken := bracom;
- t:=' '; { Return a blank to the calling program }
- REPEAT
- prev:=c; read(f,c);
- IF (c=')') AND (prev='*') THEN GOTO AllDone
- UNTIL eof(f);
- FileError(f,'Expecting ''*)'', found end of file')
- END
-
- ELSE IF c='%' THEN { COMMENT TO END OF LINE }
- BEGIN
- KindOfToken := eolcom;
- REPEAT
- read(f,c); t:=t+c
- UNTIL (c=^M) OR (c=^J);
- GOTO OneTooMany
- END
-
- ELSE { THE CHARACTER BY ITSELF IS A TOKEN }
- BEGIN
- KindOfToken := special;
- GOTO AllDone
- END;
-
- OneTooMany: { Jump here if an extra character has been read }
- back1char(f);
- delete(t,length(t),1);
-
- AllDone: { Jump here if nothing else needs to be done }
- END;
-
- END. { Tokenizer }