home *** CD-ROM | disk | FTP | other *** search
- { *****************************************************
- NewParse Unit
-
- Paul Warren
- HomeGrown Software Development
- (c) 1997 Langley British Columbia.
- (604) 856-6523
- e-mail: hg_soft@uniserve.com
- Home page: http://users.uniserve.com/~hg_soft
- ***************************************************** }
-
- unit Newparse;
- { $DEFINE DEBUG}
-
- interface
-
- uses Classes, Consts, SysUtils, Dialogs;
-
- type
- TParserClass = class of TCustomParser;
-
- TCustomParser = class
- private
- { private declarations }
- FStream: TStream;
- FOrigin: Longint;
- FBuffer: PChar;
- FBufPtr: PChar;
- FBufEnd: PChar;
- FSourcePtr: PChar;
- FSourceEnd: PChar;
- FTokenPtr: PChar;
- FStringPtr: PChar;
- FSourceLine: Integer;
- FSaveChar: Char;
- FToken: Char;
- procedure ReadBuffer;
- procedure SkipBlanks;
- {$IFDEF Win32}
- procedure Error(const Ident: string); virtual;
- {$ELSE}
- procedure Error(MessageID: Word); virtual;
- {$ENDIF}
- procedure ErrorStr(const Message: string);
- public
- { public declarations }
- constructor Create(Stream: TStream); virtual;
- destructor Destroy; override;
- function NextToken: Char; virtual;
- function TokenString: string; virtual;
- function SourcePos: Longint;
- property Token: Char read FToken;
- property SourceLine: integer read FSourceLine;
- end;
-
- TCSVParser = class(TCustomParser)
- private
- { private declarations }
- public
- { public declarations }
- function TokenString: string; override;
- function NextToken: char; override;
- end;
- TTextParser = class(TCustomParser)
- private
- { private declarations }
- public
- { public declarations }
- function NextToken: Char; override; end;
- TPasParser = class(TTextParser)
- private
- { private declarations }
- public
- { public declarations }
- function NextToken: Char; override; end;
- const
- toComment = Char(5);
-
- type
- TEnhPasParser = class(TPasParser)
- private
- { private declarations }
- public
- { public declarations }
- function TokenString: string; override; function NextToken: Char; override; end;
- const
- toOpenTag = Char(6);
- toCloseTag = Char(7);
-
- type
- THtmlParser = class(TTextParser)
- private
- { private declarations }
- public
- { public declarations }
- function TokenString: string; override; function NextToken: Char; override; end;
- var
- Log: TextFile;
-
- implementation
-
- { TCustomParser }
-
- const
- ParseBufSize: integer = 4096;
-
- constructor TCustomParser.Create(Stream: TStream);
- begin
- FStream := Stream;
- GetMem(FBuffer, ParseBufSize);
- FBuffer[0] := #0;
- FBufPtr := FBuffer;
- FBufEnd := FBuffer + ParseBufSize;
- FSourcePtr := FBuffer;
- FSourceEnd := FBuffer;
- FTokenPtr := FBuffer;
- FSourceLine := 1;
- {$IFDEF DEBUG}
- writeln(log,'');
- writeln(log, 'FBuffer FBufPtr FSrcPtr FSrcEnd FBufEnd Pos Occured');
- writeln(log,'');
- writeln(log,LongInt(FBuffer), ' ', LongInt(FBufPtr), ' ', LongInt(FSourcePtr),' ', FSourcePtr^, ' ', LongInt(FSourceEnd), ' ', LongInt(FBufEnd), ' ',FStream.Position,' on create');
- {$ENDIF}
- NextToken;
- end;
-
- destructor TCustomParser.Destroy;
- begin
- if FBuffer <> nil then
- begin
- FStream.Seek(Longint(FTokenPtr) - Longint(FSourceEnd), 1);
- FreeMem(FBuffer, ParseBufSize);
- end;
- end;
-
- procedure TCustomParser.ReadBuffer;
- var
- Count: Integer;
- begin
- try
- Inc(FOrigin, FSourcePtr - FBuffer);
- FSourceEnd[0] := FSaveChar;
- {$IFDEF DEBUG}
- writeln(log,LongInt(FBuffer), ' ', LongInt(FBufPtr), ' ', LongInt(FSourcePtr),' ', FSourcePtr^,' ', LongInt(FSourceEnd),' ',LongInt(FBufEnd), ' ',FStream.Position, ' before read');
- {$ENDIF}
- Count := FBufPtr - FSourcePtr;
- if Count <> 0 then Move(FSourcePtr[0], FBuffer[0], Count);
- FBufPtr := FBuffer + Count;
- Inc(FBufPtr, FStream.Read(FBufPtr[0], FBufEnd - FBufPtr));
- {$IFDEF DEBUG}
- writeln(log,LongInt(FBuffer), ' ', LongInt(FBufPtr), ' ', LongInt(FSourcePtr),' ', FSourcePtr^, ' ', LongInt(FSourceEnd), ' ', LongInt(FBufEnd), ' ',FStream.Position, ' after read');
- {$ENDIF}
- FSourcePtr := FBuffer;
- FSourceEnd := FBufPtr;
- if FSourceEnd = FBufEnd then
- begin
- FSourceEnd := LineStart(FBuffer, FSourceEnd - 1);
- if FSourceEnd = FBuffer then Error(SLineTooLong);
- end;
- FSaveChar := FSourceEnd[0];
- FSourceEnd[0] := #0;
- except
- on EStreamError do
- MessageDlg('FSourcePtr^ = '+FSourcePtr^,mtError,
- [mbOK],0);
- on EAccessViolation do
- MessageDlg('FSourcePtr^ = '+FSourcePtr^,mtError,
- [mbOK],0);
- end;
- end;
-
- function TCustomParser.NextToken: Char;
- begin
- FToken := FSourcePtr^;
- if FToken <> toEOF then Inc(FSourcePtr);
- Result := FToken;
- end;
-
- procedure TCustomParser.SkipBlanks;
- begin
- while True do
- begin
- case FSourcePtr^ of
- #0:
- begin
- ReadBuffer;
- if FSourcePtr^ = #0 then Exit;
- Continue;
- end;
- #10:
- Inc(FSourceLine);
- #33..#255:
- Exit;
- end;
- Inc(FSourcePtr);
- end;
- end;
-
- function TCustomParser.TokenString: string;
- var
- L: Integer;
- begin
- if (FToken = toString) then
- L := FStringPtr - FTokenPtr else
- L := FSourcePtr - FTokenPtr;
- {$IFDEF Win32}
- SetString(Result, FTokenPtr, L);
- {$ELSE}
- if L > 255 then L := 255;
- Result[0] := Char(L);
- {$ENDIF}
- Move(FTokenPtr[0], Result[1], L);
- end;
-
- {$IFDEF Win32}
- procedure TCustomParser.Error(const Ident: string);
- begin
- ErrorStr(Ident);
- end;
-
- procedure TCustomParser.ErrorStr(const Message: string);
- begin
- raise EParserError.CreateFmt(SParseError, [Message, FSourceLine]);
- end;
- {$ELSE}
- procedure TCustomParser.Error(MessageID: Word);
- begin
- ErrorStr(LoadStr(MessageID));
- end;
-
- procedure TCustomParser.ErrorStr(const Message: string);
- begin
- raise EParserError.Create(FmtLoadStr(SParseError, [Message, FSourceLine]));
- end;
- {$ENDIF}
-
- function TCustomParser.SourcePos: Longint;
- begin
- Result := FOrigin + (FTokenPtr - FBuffer);
- end;
-
- { TCSVParser }
- function TCSVParser.TokenString: string;
- var
- L: Integer;
- begin
- if (FToken = toSymbol) then
- L := FStringPtr - FTokenPtr else
- L := FSourcePtr - FTokenPtr;
- {$IFDEF Win32}
- SetString(Result, FTokenPtr, L);
- {$ELSE}
- if L > 255 then L := 255;
- Result[0] := Char(L);
- {$ENDIF}
- Move(FTokenPtr[0], Result[1], L);
- end;
-
- function TCSVParser.NextToken: Char;
- begin
- SkipBlanks;
- FTokenPtr := FSourcePtr;
- case FSourcePtr^ of
- 'A'..'Z', 'a'..'z', '_':
- begin
- Inc(FSourcePtr);
- FStringPtr := FSourcePtr;
- while true do
- begin
- case FSourcePtr^ of
- ',': Break;
- #0: Break;
- end;
- FStringPtr^ := FSourcePtr^;
- Inc(FStringPtr);
- Inc(FSourcePtr);
- end;
- FToken := toSymbol;
- Result := FToken;
- end;
- '-', '0'..'9':
- begin
- Inc(FSourcePtr);
- while FSourcePtr^ in ['0'..'9'] do Inc(FSourcePtr);
- FToken := toInteger;
- Result := FToken;
- while FSourcePtr^ in ['0'..'9', '.', 'e', 'E', '+', '-'] do
- begin
- Inc(FSourcePtr);
- FToken := toFloat;
- Result := FToken;
- end;
- end;
- else Result := inherited NextToken;
- end;
- end;
-
- { TTextParser }
- function TTextParser.NextToken: Char;
- begin
- SkipBlanks;
- FTokenPtr := FSourcePtr;
- case FSourcePtr^ of
- 'A'..'Z', 'a'..'z', '_':
- begin
- Inc(FSourcePtr);
- while True do
- case FSourcePtr^ of
- 'A'..'Z', 'a'..'z', '0'..'9', '_': Inc(FSourcePtr);
- '''': begin { apostrophies }
- if (FSourcePtr+1)^ in ['A'..'Z', 'a'..'z', '0'..'9', '_'] then Inc(FSourcePtr)
- else Break;
- end;
- '-': begin { hyphenated words }
- if (FSourcePtr+1)^ in ['A'..'Z', 'a'..'z', '0'..'9', '_'] then Inc(FSourcePtr)
- else Break;
- end;
- else Break;
- end;
- FToken := toSymbol;
- Result := FToken;
- end;
- '-', '0'..'9':
- begin
- Inc(FSourcePtr);
- while FSourcePtr^ in ['0'..'9'] do Inc(FSourcePtr);
- FToken := toInteger;
- Result := FToken;
- while FSourcePtr^ in ['0'..'9', '.', 'e', 'E', '+', '-'] do
- begin
- Inc(FSourcePtr);
- FToken := toFloat;
- Result := FToken;
- end;
- end;
- else Result := inherited NextToken;
- end;
- end;
-
- { TPasParser }
- function TPasParser.NextToken: Char;
- var
- I: integer;
- begin
- SkipBlanks;
- FTokenPtr := FSourcePtr;
- case FSourcePtr^ of
- 'A'..'Z', 'a'..'z', '_':
- begin
- Inc(FSourcePtr);
- while FSourcePtr^ in ['A'..'Z', 'a'..'z', '0'..'9', '_'] do Inc(FSourcePtr);
- FToken := toSymbol;
- Result := FToken;
- end;
- '#', '''':
- begin
- FStringPtr := FSourcePtr;
- while True do
- case FSourcePtr^ of
- '#':
- begin
- Inc(FSourcePtr);
- I := 0;
- while FSourcePtr^ in ['0'..'9'] do
- begin
- I := I * 10 + (Ord(FSourcePtr^) - Ord('0'));
- Inc(FSourcePtr);
- end;
- FStringPtr^ := Chr(I);
- Inc(FStringPtr);
- end;
- '''':
- begin
- Inc(FSourcePtr);
- while True do
- begin
- case FSourcePtr^ of
- #0, #10, #13:
- Error(SInvalidString);
- '''':
- begin
- Inc(FSourcePtr);
- if FSourcePtr^ <> '''' then Break;
- end;
- end;
- FStringPtr^ := FSourcePtr^;
- Inc(FStringPtr);
- Inc(FSourcePtr);
- end;
- end;
- else
- Break;
- end;
- FToken := toString;
- Result := FToken;
- end;
- '$':
- begin
- FToken := FSourcePtr^; { assume NOT an integer }
- Result := FToken;
- Inc(FSourcePtr);
- while true do
- begin
- case FSourcePtr^ of
- '0'..'9', 'A'..'F', 'a'..'f': Inc(FSourcePtr);
- else Break;
- end;
- FToken := toInteger;
- Result := FToken;
- end;
- end;
- (* '-', '0'..'9':
- begin
- Inc(FSourcePtr);
- while FSourcePtr^ in ['0'..'9'] do Inc(FSourcePtr);
- FToken := toInteger;
- Result := FToken;
- while FSourcePtr^ in ['0'..'9', '.', 'e', 'E', '+', '-'] do
- begin
- Inc(FSourcePtr);
- FToken := toFloat;
- Result := FToken;
- end;
- end; *)
- else Result := inherited NextToken;
- end;
- end;
-
- { TEnhPasParser }
- function TEnhPasParser.TokenString: string;
- var
- L: Integer;
- begin
- if (FToken = toString) or (FToken = toComment) then
- L := FStringPtr - FTokenPtr else
- L := FSourcePtr - FTokenPtr;
- {$IFDEF Win32}
- SetString(Result, FTokenPtr, L);
- {$ELSE}
- if L > 255 then L := 255;
- Result[0] := Char(L);
- {$ENDIF}
- Move(FTokenPtr[0], Result[1], L);
- end;
-
- function TEnhPasParser.NextToken: Char;
- begin
- SkipBlanks;
- FTokenPtr := FSourcePtr;
- case FSourcePtr^ of
- '{':
- begin { comment or compiler directive... }
- FStringPtr := FSourcePtr;
- Inc(FSourcePtr); { check next char... }
- while true do
- begin
- case FSourcePtr^ of
- #0: begin
- ReadBuffer;
- FStringPtr := FSourcePtr;
- if FSourcePtr^ = #0 then Break;
- {$IFDEF DEBUG}
- writeln(Log, 'in comment');
- {$ENDIF}
- end;
- #10: Inc(FSourceLine);
- '}':
- begin
- Inc(FSourcePtr);
- Break; { end comment... }
- end;
- end;
- FStringPtr^ := FSourcePtr^;
- Inc(FStringPtr);
- Inc(FSourcePtr);
- end;
- FToken := toComment;
- Result := FToken;
- end;
- '(', '/': { possible comment or compiler directive... }
- begin
- FToken := FSourcePtr^; { assume NOT a comment }
- Result := FToken;
- FStringPtr := FSourcePtr;
- Inc(FSourcePtr); { check next char... }
- case FSourcePtr^ of
- '*': { is a comment }
- begin
- Inc(FSourcePtr); { check next char... }
- while True do
- begin
- case FSourcePtr^ of
- #0: begin
- ReadBuffer;
- FStringPtr := FSourcePtr;
- if FSourcePtr^ = #0 then Break;
- {$IFDEF DEBUG}
- writeln(Log, 'in comment');
- {$ENDIF}
- end;
- #10: Inc(FSourceLine);
- '*':
- begin
- Inc(FSourcePtr);
- if FSourcePtr^ = ')' then
- begin
- Inc(FSourcePtr);
- Break; { end of comment }
- end;
- end;
- end;
- FStringPtr^ := FSourcePtr^;
- Inc(FStringPtr);
- Inc(FSourcePtr);
- end;
- FToken := toComment;
- Result := FToken;
- end;
- '/': { is a comment }
- begin
- Inc(FSourcePtr);
- while (FSourcePtr^ <> #13) do { end of line, hence comment }
- begin
- FStringPtr^ := FSourcePtr^;
- Inc(FStringPtr);
- Inc(FSourcePtr);
- end;
- FToken := toComment;
- Result := FToken;
- end;
- end;
- end;
- else Result := inherited NextToken;
- end;
- end;
-
- { THtmlParser }
- function THtmlParser.TokenString: string;
- var
- L: Integer;
- begin
- if (FToken = toString) or (FToken = toOpenTag)
- or (FToken = toCloseTag) then
- L := FStringPtr - FTokenPtr else
- L := FSourcePtr - FTokenPtr;
- {$IFDEF Win32}
- SetString(Result, FTokenPtr, L);
- {$ELSE}
- if L > 255 then L := 255;
- Result[0] := Char(L);
- {$ENDIF}
- Move(FTokenPtr[0], Result[1], L);
- end;
-
- function THtmlParser.NextToken: Char;
- begin
- SkipBlanks;
- FTokenPtr := FSourcePtr;
- case FSourcePtr^ of
- '<': { is a tag }
- begin
- FStringPtr := FSourcePtr;
- Inc(FSourcePtr);
- case FSourcePtr^ of
- '/': { is an 'close' tag }
- begin
- Inc(FSourcePtr);
- while true do
- begin
- case FSourcePtr^ of
- #0: begin
- ReadBuffer;
- FStringPtr := FSourcePtr;
- if FSourcePtr^ = #0 then Break;
- end;
- '>': begin
- Inc(FSourcePtr);
- Break; { end of tag }
- end;
- end; {case}
- FStringPtr^ := FSourcePtr^;
- Inc(FStringPtr);
- Inc(FSourcePtr);
- end;
- FToken := toCloseTag;
- Result := FToken;
- end;
- else
- begin
- while true do
- begin
- case FSourcePtr^ of
- #0: begin
- ReadBuffer;
- FStringPtr := FSourcePtr;
- if FSourcePtr^ = #0 then Break;
- end;
- '>': begin
- Inc(FSourcePtr);
- Break; { end of tag }
- end;
- end; {case}
- FStringPtr^ := FSourcePtr^;
- Inc(FStringPtr);
- Inc(FSourcePtr);
- end;
- FToken := toOpenTag;
- Result := FToken;
- end;
- end; {case}
- end;
- else Result := inherited NextToken;
- end;
- end;
-
- {$IFDEF DEBUG}
- initialization
- AssignFile(Log, 'debug.log');
- Rewrite(Log);
- finalization
- CloseFile(Log);
- {$ENDIF}
- end.
-