home *** CD-ROM | disk | FTP | other *** search
-
- procedure error(message: maxstr);
- begin
- writeln('error in regular.com: ',message);
- halt; { stop the program }
- end;
-
- function dodash(var expand: maxstr) : boolean;
- {
- Expand character class like "a-h" to "abcdefgh".
- If syntax is wrong, DODASH returns false and all subsequent DASH
- operators are interpreted as literal characters.
- }
- var st: maxstr; count: integer;
- begin
- dodash:=false;
- st:='';
- if expand[1]>='0' then
- if expand[3]<='z' then
- if expand[1]<expand[3] then
- begin
- for count:=ord(expand[1]) to ord(expand[3]) do st:=st+chr(count);
- expand:=st;
- dodash:=true;
- end;
- end;
-
- function getccl(class: maxstr) : maxstr;
- {
- Convert character class to internal form by removing brackets and
- expanding all DASH operators. The internal form is
- <prefix character> <n> <char 1> <char 2> ... <char n> where prefix is
- CCL for positive character class and NCCL for negative character class.
- }
- var encoded, part1, part2, expand: maxstr; PREFIX: char; dash_spot: integer;
- begin
- encoded:=copy(class,2,length(class)-2); {drop CCL and CCLEND}
- if encoded[1]=NEGATE then
- begin
- PREFIX:=NCCL; delete(encoded,1,1);
- end
- else PREFIX:=CCL;
-
- dash_spot:=pos(DASH,encoded);
- if dash_spot<length(encoded) then
- while dash_spot>1 do
- begin
- part1:=copy(encoded,1,dash_spot-2);
- part2:=copy(encoded,dash_spot+2,length(encoded));
- expand:=copy(encoded,dash_spot-1,dash_spot+1);
- if dodash(expand) then
- begin
- if length(part1)+length(part2)+length(expand)>255
- then error('regular expression too complex');
- encoded:=part1+expand+part2;
- dash_spot:=pos(DASH,encoded);
- end
- else dash_spot:=0; { DASH syntax wrong. Terminate loop }
- end; {while}
- getccl:=PREFIX+chr(length(encoded))+encoded;
- end;
-
- function nextpat(var arg, pattern: maxstr) : boolean;
- (*
- Delete next pattern from input string ARG and return it in PATTERN.
- ' '..'}' is the set of all literal characters.
- *)
- var class_length: integer;
- begin
- nextpat:=false;
- if arg='' then exit;
- case arg[1] of
- ESCAPE: begin
- if length(arg)=1 then arg:=arg+ESCAPE;
- pattern:=copy(arg,1,2);
- delete(arg,1,2);
- end;
- CCL: begin
- pattern:='';
- class_length:=pos(CCLEND,arg);
- if class_length<3 then
- begin
- pattern:=ESCAPE;
- class_length:=1;
- end;
- pattern:=pattern+copy(arg,1,class_length);
- delete(arg,1,class_length);
- end;
- ANY,BOL,EOL, CLOSURE, ' '..'}':
- begin
- pattern:=arg[1];
- delete(arg,1,1);
- end
- else error('nextpat');
- end; {case}
- nextpat:=true;
- end;
-
- procedure literal(var pat: maxstr; ch: char);
- { Internal format for a literal character. ex. "C" --> "@C" }
- begin
- pat:=pat+LITCHAR+ch;
- end;
-
- function makepat(entered_arg: maxstr): maxstr;
- {
- Takes input parameter ENTERED_ARG and returns internal form. To
- encode a closure, the CLOSURE character must be inserted before
- the last pattern in the PAT string. The starting position of the
- last pattern is held in OLD_LENGTH.
- }
- var pat, arg, pattern: maxstr; old_length, new_length: integer;
- begin
- pat:=''; arg:=entered_arg; old_length:=0; new_length:=0;
- while nextpat(arg,pattern) do
- begin
- case pattern[1] of
- ESCAPE: pat:=pat+LITCHAR+pattern[2];
- ANY: pat:=pat+ANY;
- BOL: if pat='' then pat:=BOL else literal(pat,BOL);
- EOL: if arg='' then pat:=pat+EOL else literal(pat,EOL);
- CCL: pat:=pat+getccl(pattern);
- CLOSURE: if new_length=0 then literal(pat,CLOSURE)
- else
- insert(CLOSURE,pat,old_length+1);
- else literal(pat,pattern);
- end; {case}
- old_length:=new_length;
- new_length:=length(pat);
- end; {while}
- makepat:=pat;
- end;