Programmer Power Tools

home *** CD-ROM | disk | FTP | other *** search

/ Programmer Power Tools / Programmer Power Tools.iso / turbopas / tptc17sc.arc / TPCSCAN.INC < prev next >

Wrap

Text File | 1988-03-26 | 19.1 KB | 834 lines

(* * TPTC - Turbo Pascal to C translator * * (C) 1988 Samuel H. Smith (rev. 13-Feb-88) * *) (********************************************************************) (* * lexical scanner * *) function numlit(n: integer): anystring; var lit: string[6]; {convert an integer into a c style numeric character literal} function digit(n: integer): char; (* convert an integer into a hex digit *) begin n := n and 15; if n > 9 then n := n + 7; digit := chr( n + ord('0') ); end; begin lit := '''\?'''; case n of $07: lit[3] := 'a'; $08: lit[3] := 'b'; $09: lit[3] := 't'; $0a: lit[3] := 'n'; $0b: lit[3] := 'v'; $0c: lit[3] := 'f'; $0d: lit[3] := 'r'; 32..126,128..254: lit := ''''+chr(n)+''''; else begin lit := '''\x??'''; lit[4] := digit(n shr 4); lit[5] := digit(n); end; end; numlit := lit; toktype := chars; end; (********************************************************************) procedure getchar; {consume the current char and get the next one} var stack: char; begin if ofs(stack) < minstack then fatal('Out of stack space'); while (srclevel > 0) and eof(srcfd[srclevel]) do begin if not linestart then putline; putln('/* TPTC: end of '+srcfiles[srclevel]+' */'); if debug then writeln; writeln(^M,srcfiles[srclevel],'(',srclines[srclevel],')'); close(srcfd[srclevel]); freemem(inbuf[srclevel],inbufsiz); dec(srclevel); statustime := 0; end; if eof(srcfd[srclevel]) then nextc := '.' else read(srcfd[srclevel], nextc); if nextc = ^J then begin inc(srclines[srclevel]); inc(srctotal); mark_time(curtime); if (curtime >= statustime) or debug then begin if debug then writeln; write(^M,srcfiles[srclevel],'(',srclines[srclevel],')'); statustime := curtime+statrate; abortcheck; end; end; end; (********************************************************************) function usec: char; {use up the current character(return it) and get the next one from the input stream} var c: char; begin c := nextc; getchar; usec := c; end; (********************************************************************) function newc(n: string40): string40; {replace the current character with a different one and get the next character from the input stream} var c: char; begin c := nextc; getchar; newc := n; end; (********************************************************************) procedure concat_tokens; {concatenate the next token and the current token} var cur: string; begin cur := ltok; ltok := nextc; toktype := unknown; scan_tok; ltok := copy(cur,1,length(cur)-1) + copy(ltok,2,255); ltok[1] := '"'; ltok[length(ltok)] := '"'; toktype := strng; end; (********************************************************************) procedure scan_ident; {scan an identifier; output is ltok; nextc is first character following the identifier; toktype = identifier; this is the protocol for all of the scan_xxxx procedures in the lexical analyzer} begin toktype := unknown; ltok := ''; repeat case nextc of 'A'..'Z': begin if map_lower then nextc := chr( ord(nextc)+32 ); ltok := ltok + nextc; getchar; end; 'a'..'z', '0'..'9', '_','@': ltok := ltok + usec; else toktype := identifier; end; until toktype = identifier; end; (********************************************************************) procedure scan_preproc; {scan a tshell preprocessor directive; same syntax as C already} begin puts('#'); repeat puts(nextc); getchar; until nextc = ^M; getchar; putline; toktype := unknown; end; (********************************************************************) procedure scan_number; {scan a number; this also processes #nnn character literals, which are converted into octal character literals. imbedded periods are processed, and a special condition is noted for trailing periods. this is needed for scanning the ".." keyword when used after numbers. an ungetchar facility would be more general, but isn't needed anywhere else. in pascal/mt+, #nnn is translated into nnnL } var hasdot: boolean; charlit: boolean; islong: boolean; begin hasdot := false; islong := false; charlit := false; toktype := number; (* check for preprocessor directives, character literals or long literals *) if nextc = '#' then begin ltok := ''; if mt_plus then islong := true else charlit := true; end; getchar; (* check for preprocessor directives *) if tshell and charlit and (nextc >= 'a') and (nextc <= 'z') then scan_preproc else repeat case nextc of '$','0'..'9','a'..'f','A'..'F': ltok := ltok + usec; '.': if hasdot then begin if ltok[length(ltok)] = '.' then begin ltok[0] := pred(ltok[0]); {remove trailing ., part of ..} if charlit then ltok := numlit(atoi(ltok)); extradot := true; end; exit; end else begin hasdot := true; ltok := ltok + usec; end; else begin if charlit then begin ltok := numlit(atoi(ltok)); if (nextc = '''') or (nextc = '^') or (nextc = '#') then concat_tokens; exit; end; if ltok[1] = '$' then ltok := '0x' + copy(ltok,2,99); if islong then ltok := ltok + 'L'; exit; end; end; until true=false; end; (********************************************************************) procedure scan_hat; {scan tokens starting with ^ - returns ^X as a character literal corresponding to the specified control character. returns ^ident as an identifier with the leading ^ intact. also scans ^. and ^[.} var c: char; begin getchar; if ((nextc = '.') or (nextc = '[')) and ((ptoktype = identifier) or (ptok = ']')) then begin ltok := '^' + usec; {^. or ^[} exit; end; case nextc of '@','['..'`': ltok := usec; 'A'..'Z','a'..'z': begin ltok := nextc; scan_ident; end; else exit; end; if length(ltok) = 1 then {^c = control char} begin ltok := numlit( ord(upcase(ltok[1])) - ord('@') ); if (nextc = '''') or (nextc = '^') or (nextc = '#') then concat_tokens; end else ltok := '^' + ltok; {^ident = pointer to ident} end; (********************************************************************) procedure scan_dot; {scans tokens starting with "."; knows about the 'extra dot' condition that comes up in number scanning. returns a token of either '.' or '..'} begin getchar; if (nextc = '.') or extradot then begin ltok := '..'; extradot := false; end; if nextc = '.' then getchar; end; (********************************************************************) procedure scan_string; {scans a literal string. processes imbedded quotes ala pascal. translates the string into a C string with the proper escapes on imbedded quotes. converts single character strings into character constants. these are sometimes converted back to strings when the parser needs to} begin toktype := unknown; ltok := '"'; getchar; repeat case nextc of ^J,^M: begin error_message('Closing quote expected (scan_string)'); toktype := strng; end; '''': begin getchar; {consume the quote} if nextc = '''' then ltok := ltok + usec {double quotes are coded as a single quote} else begin {end of string} ltok := ltok + '"'; toktype := strng; end; end; '"': ltok := ltok + newc('\"'); '\': ltok := ltok + newc('\\'); else ltok := ltok + usec; end; until toktype = strng; if length(ltok) = 3 then begin ltok[1] := ''''; ltok[3] := ''''; toktype := chars; end; if ltok = '"\""' then begin ltok := '''"'''; toktype := chars; end else if (ltok = '"''"') or (ltok = '''''''') then ltok := '''\''''' else if (ltok = '"\\"') then begin ltok := '''\\'''; toktype := chars; end; if (nextc = '^') or (nextc = '#') then concat_tokens; end; (********************************************************************) procedure scan_pragma(var isinclude: anystring); {scans a turbo pascal compiler option and translates it into a comment. include directive is translated into the #include. returns with the first non-blank after the pragma} var code: anystring; prag: anystring; arg: anystring; procedure scanword(var dest: anystring); begin dest := ' '; {insure dest[2] is initialized} dest := ''; while true do case nextc of ' ', '*', '}', ',': exit; else begin dest := dest + upcase(nextc); getchar; end; end; end; begin isinclude := ''; repeat if nextc = ',' then newline; getchar; {consume the $ or ,} {get the progma code} scanword(code); if nextc = ' ' then begin getchar; scanword(arg); end else arg := ''; if code[2] = '+' then arg := 'ON' else if code[2] = '-' then arg := 'OFF'; prag := '/* '+code[1]+'(' + arg + ')' + ' */'; case code[1] of 'D': if code[2] = 'E' then prag := '#define '+arg; 'E': if code[2] = 'N' then prag := '#endif' else if code[2] = 'L' then prag := '#else'; 'I': if code[2] = ' ' then begin if pos('.',arg) = 0 then arg := arg + '.PAS'; prag := '#include "' + arg + '" '; if includeinclude then begin prag := ''; isinclude := arg; end; end else if code[2] = 'F' then begin if code[3] = 'N' then prag := '#ifndef '+arg else prag := '#ifdef '+arg; end; 'U': if code[2] = 'N' then prag := '#undef '+arg; end; puts(prag); puts(' '); while nextc = ' ' do getchar; until nextc <> ','; end; (********************************************************************) procedure open_include(name: anystring); begin if length(name) = 0 then exit; inc(srctotal); inc(objtotal); inc(srclevel); if srclevel > maxincl then fatal('Includes nested too deeply'); srcfiles[srclevel] := name; srclines[srclevel] := 1; assign(srcfd[srclevel],name); {$I-} reset(srcfd[srclevel]); {$I+} if ioresult <> 0 then begin dec(srclevel); ltok := name; warning('Missing include file'); end else begin if not linestart then putline; putln('/* TPTC: include '+name+' */'); if maxavail-300 <= inbufsiz then begin ltok := name; fatal('Out of memory'); end; getmem(inbuf[srclevel],inbufsiz); SetTextBuf(srcfd[srclevel],inbuf[srclevel]^,inbufsiz); end; if {quietmode and} not debug then write(^M,'':40,^M) else writeln; statustime := 0; end; (********************************************************************) procedure scan_curlycomment; {processes a curly-brace enclosed comment} var isinclude: anystring; begin toktype := comment; getchar; {consume the open comment} isinclude := ''; if nextc = '$' then scan_pragma(isinclude); if nextc = '}' then begin getchar; open_include(isinclude); exit; end; if pass_comments then puts(' /* '); while nextc <> '}' do begin if pass_comments then puts(nextc); getchar; end; if pass_comments then begin puts(' */ '); if nospace then newline; end; getchar; {consume the close comment} open_include(isinclude); end; (********************************************************************) procedure scan_parencomment; {process a (* enclosed comment} var isinclude: anystring; begin toktype := comment; getchar; {consume the *} isinclude := ''; if nextc = '$' then scan_pragma(isinclude); if pass_comments then puts('/*'); repeat if pass_comments then puts(nextc); if nextc = '*' then begin getchar; if nextc = ')' then begin getchar; if pass_comments then begin puts('/ '); if nospace then putline; end; open_include(isinclude); exit; end; end else getchar; until true=false; end; (********************************************************************) procedure scan_blanks; {scan white space. this procedure sometimes passes whitespace to the output. it keeps track of the indentation of the current line so it can be used by newline} var indent: anystring; valid: boolean; begin linestart := false; indent := ''; valid := false; repeat case nextc of ^J,^M: begin if (nospace = false) and (nextc = ^J) then putline; indent := ''; linestart := true; getchar; end; ' ',^I,^@,^L: indent := indent + usec; '#': if linestart and tshell then begin puts(indent); {pass preprocessor directives} indent := ''; {without change (single-line only)} repeat puts(nextc); getchar; until nextc = ^M; getchar; putline; end else valid := true; else valid := true; end; until valid; if linestart then begin spaces := indent; if nospace=false then puts(spaces); linestart := true; end; end; (********************************************************************) procedure scan_tok; {scans the next lexical token; returns the token in ltok and toktype} begin scan_blanks; toktype := unknown; ltok := nextc; case nextc of 'a'..'z', '_', 'A'..'Z': scan_ident; '$': scan_number; '0'..'9': scan_number; '''': scan_string; '^': scan_hat; '#': begin scan_number; if toktype = unknown then scan_tok; {in case of #directive} end; '<': begin getchar; if (nextc = '>') or (nextc = '=') then ltok := '<' + usec; end; '>': begin getchar; if nextc = '=' then ltok := '>' + usec; end; ':': begin getchar; if nextc = '=' then ltok := ':' + usec; end; '.': scan_dot; '{': scan_curlycomment; '(': begin getchar; if nextc = '*' then scan_parencomment; end; else getchar; {consume the unknown char} end; end; (********************************************************************) procedure gettok; {get the next input token; this is the top level of the lexical analyzer. it returns ltok, tok(ltok in upper case), toktype. it translates BEGIN and END into braces; it checks for statement and section keywords} var i: integer; begin ptoktype := toktype; ptok := tok; cursym := nil; repeat scan_tok; until toktype <> comment; tok := ltok; if debug then write(' {',ltok,'}'); if toktype = identifier then begin stoupper(tok); if tok = 'BEGIN' then begin tok := '{'; ltok := tok; toktype := keyword; end else if tok = 'END' then begin tok := '}'; ltok := tok; toktype := keyword; end; (* check for statement keywords *) i := 0; repeat inc(i); if tok[1] = keywords[i][1] then {hack for speed} if length(tok) = length(keywords[i]) then if tok = keywords[i] then toktype := keyword; until (i = nkeywords) or (toktype = keyword); (* get symbol table information for this item *) cursym := locatesym(tok); end; end; (********************************************************************) function usetok: string80; {return (use) and consume current token} var tv: string80; begin tv := ltok; gettok; usetok := tv; end;