home *** CD-ROM | disk | FTP | other *** search
- PROGRAM regular;
- {
- Search input lines for regular expressions. Similar to DOS
- "FIND.EXE" and UNIX "GREP". Reads from standard input, writes
- to standard output. Usage: C:>DIR | REGULAR PAS
- }
-
- CONST
- { REGULAR EXPRESSION OPERATORS }
- CLOSURE = '*';
- BOL = '^'; { match starting at beginning of line }
- EOL = '$'; { match at end of line }
- ANY = '.'; { match any single character }
- CCL = '['; { begin character class }
- CCLEND = ']'; { end character class }
- NEGATE = '^'; { signify negative character class }
- NCCL = '!'; { negative character class: internal form }
- LITCHAR = '@'; { next character not an operator }
- ESCAPE = '\'; { treat next operator as literal character }
- DASH = '-'; { consecutive range within class }
-
- EOF_NUM=255; { end of file }
- EOLN1_NUM=13; { return }
- EOLN2_NUM=10; { line feed }
- ENDSTR = ^A; { End String: internal code for end of line }
-
-
-
-
- {$I InOut.pas} { Get line from Standard Input, Put line to STDOUT }
-
- var ARG, { input string: regular expression }
- LIN, { line from standard input }
- PAT: maxstr; { regular expression (internal form)}
-
- {$I Compile.pas} { compile regular expression to internal form }
-
- function locate(c: char; pat: maxstr; offset: integer) : boolean;
- {
- Search for the character C in the character class at pat[offset]
- }
- var i: integer;
- begin
- { size of class is at pat[offset], characters follow }
- locate:=true;
- i:=offset+ord(pat[offset]); {last position in class}
- while i>offset do
- if c=pat[i] then exit else i:=i-1;
- locate:=false;
- end;
-
- function lin_advance(lin: maxstr; l: integer;
- pat: maxstr; p: integer): integer;
- {
- Matches character pattern pat[p] against input line characters
- starting at lin[l]. LIN_ADVANCE=-1 means no match.
- }
-
- begin
- lin_advance:=-1;
- case pat[p] of
- LITCHAR: if lin[l]=pat[p+1] then lin_advance:=1;
- BOL: if l=1 then lin_advance:=0;
- ANY: if l<length(lin) then lin_advance:=1;
- EOL: if l=length(lin) then lin_advance:=0;
- CCL: if locate(lin[l], pat, p+1)
- then lin_advance:=1;
- NCCL: if (l<length(lin)) and
- (not (locate(lin[l], pat, p+1)))
- then lin_advance:=1;
- else error('in lin_advance: can''t happen')
- end; {case}
- end;
-
- function pat_advance(pat: maxstr; p: integer) : integer;
- {
- Returns offset of next pattern within PAT string. Current pattern
- starts at PAT[P]. ex. if pat="@c@a@t" and p=1 then pat_advance=3.
- }
- begin
- case pat[p] of
- LITCHAR: pat_advance:=p+2;
- BOL,EOL,ANY: pat_advance:=p+1;
- CCL,NCCL: pat_advance:=p+ord(pat[p+1])+2;
- CLOSURE: pat_advance:=p+1;
- else error('in pat_advance: can''t happen');
- end; {case}
- end;
-
- function amatch (lin: maxstr; offset: integer;
- pat: maxstr; p: integer): boolean; forward;
-
- function match_closure(lin: maxstr; offset:integer;
- pat:maxstr; p:integer): integer;
- {
- Match as many characters as possible with closure.
- Does rest of pattern match remaining characters on line?
- If not, shorted closure match by one and try again.
- If closure shortened to 0, no match is possible (match_closure=-1)
- }
- var n, backtrack, increment: integer;
- begin
- match_closure:=0;
- n:=offset;
- repeat
- increment:=lin_advance(lin,n,pat,p);
- if increment>=0 then n:=n+increment;
- until ((increment<0) or (n>length(lin)));
- if n=offset then exit; { closure length is zero }
- for backtrack:=n downto offset do
- begin
- if amatch(lin,backtrack,pat,pat_advance(pat,p)) then
- begin
- match_closure:=backtrack;
- exit;
- end;
- end;
- match_closure:=-1;
- end;
-
- function amatch;
- {
- Anchored match. Does pattern PAT match input line starting at
- LIN[offset]? Loop through PAT distinguishing the two cases;
- if PAT[P] is a closure, find appropriate closure size to match.
- Otherwise, just compare characters and update PAT and LIN indexes.
- }
- var l,increment, closure_end: integer;
-
- begin
- amatch:=false;
- l:=offset;
- while (p<=length(pat)) do
- begin
- if l>length(lin) then exit;
- if pat[p]=CLOSURE then
- begin
- closure_end:=match_closure(lin,l,
- pat,pat_advance(pat,p)); { jump over "*" }
- if closure_end<0 then exit;
- l:=closure_end;
- p:=pat_advance(pat,p);
- end
- else
- begin
- increment:=lin_advance(lin,l,pat,p);
- if increment<0 then exit;
- l:=l+increment;
- end;
- p:=pat_advance(pat,p);
- end; {while}
- amatch:=true;
- end;
-
- function match(lin,pat: maxstr): boolean;
- {
- Loop through input line checking for match at each position.
- }
- var i: integer;
- begin
- match:=true;
- for i:=1 to length(lin) do if amatch(lin,i,pat,1) then exit;
- match:=false;
- end;
-
- begin
- if not getarg(arg) then error('no pattern specified');
- pat:=makepat(arg);
- while getline(lin) do
- if match(lin,pat) then putline(lin);
- end.