home *** CD-ROM | disk | FTP | other *** search
-
- (*
- * TPTC - Turbo Pascal to C translator
- *
- * (C) 1988 Samuel H. Smith (rev. 13-Feb-88)
- *
- *)
-
-
- (********************************************************************)
- (*
- * lexical scanner
- *
- *)
-
- function coctal(n: integer): anystring;
- {convert an integer into a c style octal character literal}
- function odigit(n: integer): char;
- (* convert an integer into an octal digit *)
- begin
- odigit := chr( (n and 7) + ord('0') );
- end;
- begin
- coctal := '''\' + odigit(n shr 6) + odigit(n shr 3) + odigit(n) + '''';
- toktype := strng;
- end;
-
-
- (********************************************************************)
- procedure getchar;
- {consume the current char and get the next one}
- begin
-
- if read_include then
- begin
-
- if eof(inclfd) then
- begin
- close(inclfd);
-
- read_include := false;
- writeln(ofd[level]);
-
- if includeinclude then
- writeln(ofd[level],'/* end of ',incl_name,' */')
- else
- begin
- discard_nested;
- write(con,^M^J,LJUST(' ',level*2+15),srcfiles[level],^M);
- end;
- end
- else
- read(inclfd, nextc);
- end;
-
- if not read_include then
- begin
- if eof(infd) then
- endfile;
-
- read(infd, nextc);
- end;
-
- if nextc = ^J then
- begin
- inc(srclines[level]);
-
- if (srclines[level] mod 6) = 0 then
- write(con,srcfiles[level],'(',srclines[level],') '^M);
-
- abortcheck;
- end;
-
- end;
-
-
- (********************************************************************)
- function usec: char;
- {use up the current character(return it) and get
- the next one from the input stream}
- var
- c: char;
- begin
- c := nextc;
- getchar;
- usec := c;
- end;
-
-
- (********************************************************************)
- function newc(n: string40): string40;
- {replace the current character with a different one and get the next
- character from the input stream}
- var
- c: char;
- begin
- c := nextc;
- getchar;
- newc := n;
- end;
-
-
- (********************************************************************)
- procedure scan_ident;
- {scan an identifier; output is ltok; nextc is first character following
- the identifier; toktype = identifier; this is the protocol for all of
- the scan_xxxx procedures in the lexical analyzer}
- begin
-
- toktype := unknown;
- ltok := '';
-
- repeat
- case nextc of
- 'A'..'Z':
- begin
- if map_lower then
- nextc := chr( ord(nextc)+32 );
- ltok := ltok + nextc;
- getchar;
- end;
-
- 'a'..'z', '0'..'9', '_','@':
- ltok := ltok + usec;
-
- else
- toktype := identifier;
- end;
-
- until toktype = identifier;
- end;
-
-
-
- (********************************************************************)
- procedure scan_preproc;
- {scan a tshell preprocessor directive; same syntax as C already}
- begin
- write(ofd[level],'#');
-
- repeat
- write(ofd[level],nextc);
- getchar;
- until nextc = ^M;
-
- getchar;
- writeln(ofd[level]);
- toktype := unknown;
- end;
-
-
- (********************************************************************)
- procedure scan_number;
- {scan a number; this also processes #nnn character literals, which are
- converted into octal character literals. imbedded periods are processed,
- and a special condition is noted for trailing periods. this is needed
- for scanning the ".." keyword when used after numbers. an ungetchar
- facility would be more general, but isn't needed anywhere else.
- in pascal/mt+, #nnn is translated into nnnL }
- var
- hasdot: boolean;
- octal: boolean;
- islong: boolean;
-
- begin
- hasdot := false;
- islong := false;
- octal := false;
- toktype := number;
-
- (* check for preprocessor directives, character literals or long literals *)
- if nextc = '#' then
- begin
- ltok := '';
- if mt_plus then
- islong := true
- else
- octal := true;
- end;
-
- getchar;
-
- (* check for preprocessor directives *)
- if octal and (nextc in ['a'..'z']) then
- scan_preproc
- else
-
- repeat
- case nextc of
- '0'..'9':
- ltok := ltok + usec;
-
- '.':
- if hasdot then
- begin
- if ltok[length(ltok)] = '.' then
- begin
- ltok[0] := pred(ltok[0]); {remove trailing ., part of ..}
- if octal then
- ltok := coctal(atoi(ltok));
- extradot := true;
- end;
- exit;
- end
- else
-
- begin
- hasdot := true;
- ltok := ltok + usec;
- end;
-
- else
- begin
- if octal then
- ltok := coctal(atoi(ltok))
- else
- if islong then
- ltok := ltok + 'L';
- exit;
- end;
- end;
-
- until true=false;
- end;
-
-
- (********************************************************************)
- procedure scan_hat;
- {scan tokens starting with ^ - returns ^X as a character literal
- corresponding to the specified control character. returns ^ident as
- an identifier with the leading ^ intact. also scans ^. and ^[.}
- var
- c: char;
-
- begin
- getchar;
-
- if (nextc = '.') or (nextc = '[') then
- ltok := '^' + usec {^. or ^[}
- else
-
- if nextc in ['A'..'Z','a'..'z','@'..'_'] then
- begin
- ltok := nextc;
- scan_ident;
-
- if length(ltok) = 1 then {^c = control char}
- ltok := coctal( ord(upcase(ltok[1])) - ord('@') )
- else
- ltok := '^' + ltok; {^ident = pointer to ident}
- end;
- end;
-
-
- (********************************************************************)
- procedure scan_dot;
- {scans tokens starting with "."; knows about the 'extra dot' condition
- that comes up in number scanning. returns a token of either '.' or '..'}
- begin
- getchar;
-
- if (nextc = '.') or extradot then
- begin
- ltok := '..';
- extradot := false;
- end;
-
- if nextc = '.' then
- getchar;
- end;
-
-
- (********************************************************************)
- procedure scan_string;
- {scans a literal string. processes imbedded quotes ala pascal. translates
- the string into a C string with the proper escapes on imbedded quotes.
- converts single character strings into character constants. these are
- sometimes converted back to strings when the parser needs to}
- begin
-
- toktype := unknown;
- ltok := '"';
- getchar; {consume the open quote}
-
- repeat
- if nextc in [^J,^M] then
- begin
- syntax('Closing quote expected (scan_string)');
- exit;
- end;
-
- if nextc = '''' then
- begin
- getchar; {consume the quote}
-
- if nextc = '''' then
- ltok := ltok + usec
- {double quotes are coded as a single quote}
- else
-
- begin {end of string}
- ltok := ltok + '"';
- toktype := strng;
- end;
- end
- else
-
- if nextc = '"' then
- ltok := ltok + newc('\"')
- else
-
- if nextc = '\' then
- ltok := ltok + newc('\\')
-
- else
- ltok := ltok + usec;
-
- until toktype = strng;
-
- if length(ltok) = 3 then
- begin
- ltok[1] := '''';
- ltok[3] := '''';
- end;
-
- if ltok = '"\""' then
- ltok := '''"'''
- else
- if (ltok = '"''"') or (ltok = '''''''') then
- ltok := '''\''''';
-
- end;
-
-
- (********************************************************************)
- procedure scan_hex;
- {scans a hex constant and returns it as a C style 0xHHHH literal}
- begin
- getchar; {consume the '$'}
- ltok := '0x';
-
- while nextc in ['0'..'9', 'A'..'F', 'a'..'f'] do
- ltok := ltok + usec;
-
- toktype := number;
- end;
-
-
- (********************************************************************)
- procedure scan_pragma;
- {scans a turbo pascal compiler option and translates it into a general
- "pragma" ','nd. include directive is translated into the #include
- ','nd. returns with the first non-blank after the pragma}
- var
- code: char;
- prag: anystring;
- arg: anystring;
-
- begin
-
- repeat
- if nextc = ',' then
- newline;
-
- getchar; {consume the $ or ,}
-
- code := upcase(usec);
- arg := usec;
-
- if arg = '+' then
- arg := 'ON'
- else
-
- if arg = '-' then
- arg := 'OFF'
- else
-
- begin {decode numeric or string pragma params}
- if arg = ' ' then
- arg := '';
- while not (nextc in [' ','*','}',',']) do
- ltok := ltok + usec;
- arg := arg;
- end;
-
- case code of
-
- 'I': if (arg = 'ON') or (arg = 'OFF') then
- prag := '/* I(' + arg + ')' + ' */'
- else
- begin
- prag := '#include "' + arg + '"' + ^M^J;
- {$I-} assign(inclfd, arg);
- reset(inclfd); {$I+}
- if ioresult = 0 then
- begin
- read_include := true;
- incl_name := arg;
- if not includeinclude then
- begin
- write(ofd[level],prag,' ');
- enter_nested;
- srcfiles[level] := incl_name;
- srclines[level] := 0;
- write(con,^M^J,'':level*2+15,
- srcfiles[level],^M);
- end;
- end;
- end;
-
- else prag := '/* ' + code + '(' + arg + ')' + ' */';
- end;
-
- write(ofd[level],prag,' ');
-
- while nextc = ' ' do
- getchar;
-
- until nextc <> ',';
-
- end;
-
-
- (********************************************************************)
- procedure scan_curlycomment;
- {processes a curly-brace enclosed comment}
- begin
- getchar; {consume the open comment}
-
- if nextc = '$' then
- begin
- scan_pragma;
- if nextc = '}' then
- begin
- getchar;
- exit;
- end;
- end;
-
- write(ofd[level],' /* ');
-
- while nextc <> '}' do
- begin
- write(ofd[level],nextc);
- getchar;
- end;
-
- writeln(ofd[level],' */ ');
- getchar; {consume the close comment}
- end;
-
-
- (********************************************************************)
- procedure scan_parencomment;
- {process a (* enclosed comment}
- begin
- getchar; {consume the *}
-
- if nextc = '$' then
- scan_pragma;
-
- write(ofd[level],'/*');
-
- repeat
- write(ofd[level],nextc);
-
- if nextc = '*' then
- begin
- getchar;
-
- if nextc = ')' then
- begin
- writeln(ofd[level],'/ ');
- getchar;
- exit;
- end;
- end
- else
- getchar;
-
- until true=false;
- end;
-
-
- (********************************************************************)
- procedure scan_blanks;
- {scan white space. this procedure sometimes passes whitespace to the
- output. it keeps track of the indentation of the current line so it
- can be used by newline}
- var
- linestart: boolean;
- indent: anystring;
- valid: boolean;
-
- begin
- linestart := false;
- indent := '';
- valid := false;
-
- repeat
-
- case nextc of
- ^J,^M: begin
- if nospace=false then
- write(ofd[level],nextc);
- indent := '';
- linestart := true;
- getchar;
- end;
-
- ' ',^I,^@,^L:
- indent := indent + usec;
-
- '#': if linestart then
- begin
- write(ofd[level],indent); {pass preprocessor directives}
- indent := ''; {without change (single-line only)}
- repeat
- write(ofd[level],nextc);
- getchar;
- until nextc = ^M;
- getchar;
- writeln(ofd[level]);
- end
- else
- valid := true;
-
- else
- valid := true;
- end;
-
- until valid;
-
- if linestart then
- begin
- spaces := indent;
- if nospace=false then
- write(ofd[level],spaces);
- end;
- end;
-
-
- (********************************************************************)
- procedure scan_tok;
- {scans the next lexical token; returns the token in ltok and toktype}
- begin
- scan_blanks;
-
- toktype := unknown;
- ltok := nextc;
-
- case nextc of
- 'a'..'z',
- '_', 'A'..'Z': scan_ident;
-
- '''': scan_string;
-
- '0'..'9': scan_number;
-
- '#': begin
- scan_number;
- if toktype = unknown then
- scan_tok; {in case of #directive}
- end;
-
- '$': scan_hex;
-
- '<': begin
- getchar;
- if (nextc = '>') or (nextc = '=') then
- ltok := '<' + usec;
- end;
-
- '>': begin
- getchar;
- if nextc = '=' then
- ltok := '>' + usec;
- end;
-
- ':': begin
- getchar;
- if nextc = '=' then
- ltok := ':' + usec;
- end;
-
- '^': scan_hat;
-
- '.': scan_dot;
-
- '{': begin
- scan_curlycomment;
- scan_tok;
- end;
-
- '(': begin
- getchar;
- if nextc = '*' then
- begin
- scan_parencomment;
- scan_tok;
- end;
- end;
-
- else getchar; {consume the unknown char}
- end;
- end;
-
-
- (********************************************************************)
- procedure gettok;
- {get the next input token; this is the top level of the lexical analyzer.
- it returns ltok, tok(ltok in upper case), toktype. it translates BEGIN
- and END into braces; it checks for statement and section keywords}
- var
- i: integer;
-
- begin
-
- scan_tok;
- tok := ltok;
-
- if toktype = identifier then
- begin
- stoupper(tok);
-
- if tok = 'BEGIN' then
- begin
- tok := '{';
- ltok := tok;
- toktype := keyword;
- exit;
- end;
-
- if tok = 'END' then
- begin
- tok := '}';
- ltok := tok;
- toktype := keyword;
- exit;
- end;
-
- (* check for statement keywords *)
- for i := 1 to nkeywords do
- if tok = keywords[i] then
- begin
- toktype := keyword;
- exit;
- end;
- end;
- end;
-
-
- (********************************************************************)
- function usetok: string80;
- {return (use) and consume current token}
- var
- tv: string80;
- begin
- tv := ltok;
- gettok;
- usetok := tv;
- end;
-
-
-