home *** CD-ROM | disk | FTP | other *** search
-
- {
- HDIF : a file difference finder
- }
-
- program diff;
-
- const
- VERSION = '1.0';
- MAXFILE = 1000; { lines in largest old/new file }
- MAXOVER = 1001; { Allow for a sentinal Value }
- MAXCHAR = 127; { Max chars/line }
- MAXSYM = 2053; { Symbol Table Size - Prime > 2*MAXFILE - }
- TOPSYM = 2052; { Max Index of SYM array }
-
- {
- CP/M storage = 6 * MAXOVER + 8 * MAXSYM + (128 * UNIQUE LINES)
- }
-
- type
- symnum = 0..TOPSYM; { Type of symbol table indices }
- linenum = 1..MAXOVER; { indices into OA and NA }
- linecnt = 0..MAXOVER;
-
- { Compiler dependent - }
-
- ltext = string[MAXCHAR];
-
- linerec = record
- matched : boolean; { True when matched with other file }
- index : integer; { index to symtab of other file }
- end;
-
- symrec = record { one symbol table entry }
- hashval : integer; { hash value of this entry }
- lineval : ^ltext; { address of text string }
- Oline : linenum; { index to line in Old File OA }
- Ocount : 0..2; { how many occurances in old file 0,1,many }
- Ncount : 0..2; { " for new file }
- end;
-
- var
- oldmax, newmax : linecnt; { linecount of each file }
- oldfile,newfile,
- diffile : text; { files used }
- OA,NA : array[linenum] of linerec;
- ST : array[symnum ] of symrec;
- kill_line_1 : boolean; { used by levee driver }
-
- {
- Compute a 15 bit signature based on all the characters
- in the line. Pick a symtab entry based on the hash value
- and return the symbol index. If there is a collision
- (2 lines hash to same entry), just overflow by 1 till
- no more collision. The full hash value is saved in the
- entry for quick discovery of unequal lines.
- }
-
- function store(var t : ltext ) : symnum;
- label try_entry, empty_ent, done;
- var s : symnum;
- h : integer;
-
- function hash(var t : ltext) : integer;
- const hashlim = 16256; { Maxint div 4 - 127 }
- var q : integer;
- len : 0..MAXCHAR;
- begin
- q := 0;
- for len := length(t) downto 1 do begin
- q := q + ord(t[len]);
- { shift left to 14 bits then wrap }
- if q < hashlim then
- q := q + q { << 1 bit pos }
- else
- q := 1 + q div 2;
- end;
- hash := q;
- end; { hash }
-
- begin { store }
- h := hash(t);
- s := h mod maxsym; { initial probe of symtab }
- {*
- * find duplicate line or a vacant entry starting at ST[s].
- *}
- try_entry: if (ST[s].hashval < 0) then goto empty_ent;
- {*
- * entry is used - is it a duplicate?
- *}
- if (ST[s].hashval = h) then { fast test }
- if (ST[s].lineval^ = t) then goto done;
- s := (s+1) mod MAXSYM; { try next entry }
- goto try_entry;
- empty_ent: with st[s] do begin
- hashval := h;
- new(lineval);
- lineval^ := t;
- end;
- done: store := s;
- end;
-
- {*
- * PASS 1 read old file,
- * store its lines in the symtab
- * count their occurances in Ocount for each entry
- *}
-
- procedure pass1;
- var o : linecnt;
- s : symnum;
- t : ltext;
- begin
- o := 0;
- repeat
- readln(oldfile,t);
- o := o + 1;
- s := store(t);
- with ST[s] do begin
- Oline := o;
- if Ocount <> 2 then Ocount := succ(Ocount);
- end;
- with OA[o] do begin
- matched := false;
- index := s;
- end;
- until (eof(oldfile) or (o = maxfile));
- with OA[o+1] do begin { create sentinal saying END }
- matched := true;
- index := MAXOVER;
- end;
- oldmax := o;
- end;
-
-
- {*
- * PASS 2 read new file,
- * store its lines in the symtab
- * count their occurances in Ncount for each entry
- *}
-
- procedure pass2;
- var n : linecnt;
- s : symnum;
- t : ltext;
- begin
- n := 0;
- repeat
- readln(newfile,t);
- n := n + 1;
- s := store(t);
- with ST[s] do
- if (Ncount <> 2) then Ncount := succ(Ncount);
- with NA[n] do begin
- matched := false;
- index := s;
- end;
- until (eof(newfile) or (n = MAXFILE) );
- with NA[n+1] do begin { create sentinal saying END }
- matched := true;
- index := MAXOVER;
- end;
- newmax := n;
- end;
-
- {*
- * MATCHUP record the fact that lines match
- * by placing reciprocal indices
- * in their array indices.
- *}
-
- procedure matchup(o,n : linenum);
- begin
- with OA[o] do begin
- matched := true;
- index := n;
- end;
- with NA[n] do begin
- matched := true;
- index := o;
- end;
- end;
-
- {*
- * PASS 3 rule 1 - a line appears 1 time in
- * a file is the same line, but possably
- * moved
- *}
-
- procedure pass3;
- var s : symnum;
- o,n : linenum;
- begin
- for n := 1 to newmax do begin
- s := NA[n].index;
- with ST[s] do
- if (Ocount = 1) and (Ncount = 1) then
- matchup(Oline,n)
- end;
- end;
-
- {*
- * PASS 4a rule 2 - sweeping down to end of file
- * to spread the rule-1 matches towards end of file
- *}
-
- procedure pass4a;
- var o, o1, n, n1 : linenum;
- begin
- for n := 1 to newmax - 1 do
- if (NA[n].matched) then begin
- n1 := n + 1;
- if not NA[n1].matched then begin
- o := NA[n].index;
- if o < oldmax then begin
- o1 := o + 1;
- {*
- * The 'matched' flags are used to ensure that symbol
- * indices are compared.
- *}
- if (OA[o1].matched = NA[n1].matched) AND
- (OA[o1].index = NA[n1].index) then
- matchup(o1,n1);
- end;
- end;
- end;
- end;
-
- {*
- * PASS 4b rule 2 - sweeping up to begining of file
- * to spread the rule-1 matches.
- *}
-
- procedure pass4b;
- var o, o1, n, n1 : linenum;
- begin
- for n := newmax downto 2 do
- if (NA[n].matched) then begin
- n1 := n - 1;
- if not NA[n1].matched then begin
- o := NA[n].index;
- if o > 1 then begin
- o1 := o - 1;
- if (OA[o1].matched = NA[n1].matched) AND
- (OA[o1].index = NA[n1].index) then
- matchup(o1,n1);
- end;
- end;
- end;
- end;
-
-
- {*
- * PASS 5 KLUGE - Change block moves into delete/insert pairs
- *}
-
- procedure pass5;
- var o, n : linenum;
-
- procedure resolve( var o,n : linenum);
- {*
- * a discontinuity starts at OA[o] and NA[n].
- * figure out which block has fewer lines.
- * convert smaller group to inserts/deletes
- *}
- var xo, xn : linenum;
- t, ospan, nspan : integer;
- s : symnum;
- begin
- xo := o; { measure block starting at OA[o] }
- repeat
- t := 1 + OA[xo].index;
- xo := xo + 1;
- until t <> OA[xo].index;
- ospan := xo - o;
-
- xn := n; { measure block starting at NA[n] }
- repeat
- t := 1 + NA[xn].index;
- xn := xn + 1;
- until t <> NA[xn].index;
- nspan := xn - n;
-
- if ospan < nspan then begin { block move DOWN }
- xo := o;
- xn := OA[o].index;
- t := ospan;
- o := o + ospan;
- end
- else begin
- xn := n;
- xo := NA[n].index;
- t := nspan;
- n := n + nspan;
- end;
-
- {*
- * unmatch a block of matched lines
- * we need symbol table entry here - find by
- * scanning table. Could be sped up.
- *}
- while t > 0 do begin
- s := 0;
- while ST[s].Oline <> xo do
- s := s + 1;
- with OA[xo] do begin
- matched := false;
- index := s;
- end;
- with NA[xn] do begin
- matched := false;
- index := s;
- end;
- xo := xo + 1;
- xn := xn + 1;
- t := t - 1;
- end;
- end; { resolve }
-
- begin { pass 5 }
- o := 1;
- n := 1;
- while OA[o].index <> maxover do begin
- while not OA[o].matched do {skip deletes }
- o := o + 1;
- while not NA[n].matched do {skip deletes }
- n := n + 1;
- if OA[o].index = n then begin { matching lines }
- o := o + 1;
- n := n + 1;
- end
- else { discontinuity ? }
- if OA[o].index <> maxover then {yes}
- resolve(o,n)
- else {no - just the sentinal at end.} ;
- end {while}
- end; {pass 5 }
-
- {*
- * PASS 6 write the difference file as an editor script
- *}
-
- {$I DIFF-ENG.DRV } { driver for ENGLISH text }
- { There is a driver for CPM's ED, also }
-
- procedure pass6;
- var o, n, xn : linenum;
- t : integer;
-
- begin { pass 6 }
- putfirst;
- o := 1;
- n := 1;
- repeat
- if not OA[o].matched then begin { deleting }
- t := 0;
- repeat
- t := t + 1;
- o := o + 1;
- until OA[o].matched;
- putdel(t);
- end;
- if not NA[n].matched then begin { inserting }
- if n = 1 then puttop; { Put text above line 1 }
- if o > oldmax then putbot; { insert at bottom }
- t := 0;
- xn := n;
- repeat
- t := t + 1;
- n := n + 1;
- until NA[n].matched;
- putins(xn,t);
- end;
- t := 0;
- while OA[o].matched AND NA[n].matched AND (o <= oldmax) do begin
- { skip unchanged lines }
- t := t + 1;
- o := o + 1;
- n := n + 1;
- end;
- if t > 0 then putmov(t);
- until (OA[o].index = maxover) AND (NA[n].index = maxover);
- putlast;
- end; { pass 6 }
-
- procedure setup;
- var j : symnum;
- fspec : string[14];
-
- procedure quit(i : integer);
- begin
- writeln;
- write('DIFF: Aborted because of ');
- case i of
- 1 : writeln('no OLD filename.');
- 2 : writeln('no NEW filename.');
- 3 : writeln('no Difference output file.');
- end;
- halt;
- end;
-
- begin
- for j := 0 to TOPSYM do
- with ST[j] do begin
- hashval := -1; { entry not used }
- Oline := maxover;
- Ocount := 0;
- Ncount := 0;
- end;
- oldmax := 0;
- newmax := 0;
-
- write(chr(27),'(');
- writeln('DIFF - file difference generator for "',ED_NAME,'".');
- write ('Original (old) file: '); readln(fspec); if fspec = '' then quit(1);
- assign(oldfile, fspec); reset(oldfile);
- write ('Modified (new) file: '); readln(fspec); if fspec = '' then quit(2);
- assign(newfile, fspec); reset(newfile);
- write ('Diff output file: '); readln(fspec); if fspec = '' then quit(3);
- assign(DIFFILE, fspec); rewrite(diffile);
- end;
-
- begin { MAIN program - DIFF }
- setup;
- if eof(oldfile) then
- writeln('Old file is empty - DIFF is all insert')
- else if eof(newfile) then
- writeln('New file is empty - DIFF is all delete')
- else begin
- write('Pass 1',^M);
- pass1;
- write('Pass 2',^M);
- pass2;
- write('Pass 3',^M);
- pass3;
- write('Pass 4a',^M);
- pass4a;
- write('Pass 4b',^M);
- pass4b;
- write('Pass 5 ',^M);
- pass5;
- write('Pass 6',^M);
- pass6;
- close(oldfile);
- close(newfile);
- close(diffile);
- writeln('Done ');
- end;
- end.
-
-