home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Chip 2004 March
/
CMCD0304.ISO
/
Software
/
Freeware
/
Programare
/
nullsoft
/
nsis20.exe
/
Contrib
/
VPatch
/
Source
/
GenPat
/
PatchGenerator.pas
< prev
next >
Wrap
Pascal/Delphi Source File
|
2003-12-27
|
19KB
|
616 lines
unit PatchGenerator;
{
VPatch 2 - Patch Generator
==========================
(c) 2002-2003 Van de Sande Productions
This unit contains the 'core' functionality of VPatch. TPatchGenerator can
load/create/save .PAT files and supports CreatePatch(Old, New) to generate
new patches. The only configurable parameter is StartBlockSize.
Though I cleaned up the code a little bit, there is very little documentation.
That's why I will briefly explain the general workings of the current VPatch
algoritm.
There is a source file, which is divided into blocks of BlockSize. Block 1
spans bytes 0-15, block 2 16-31, etc if blocksize = 16. For every block, a
checksum is calculated and then the block is inserted into a binary search
tree, which is sorted on this checksum.
Now, the target file (new version) is traversed linearly. For a block at a
certain position, the checksum is calculated. Then, a lookup is performed in
the binary search tree to find all blocks in the source file which match this
checksum. For every occurence, it is checked how many consecutive bytes match
with this block (note: since the checksum is not unique, this can be 0 as well,
but since all occurences are checked, the largest match is selected). Note also
that this match length is not limited to BlockSize but can be larger as well;
everything beyond the block end is checked as well (the block is merely used
as a starting position for checking the match).
For those biggest block matches between source/target files, a copy instruction
will be generated in the patch file. For 'inbetween' or unmatchable blocks, the
data of the new file is placed in the patch file. This involves some
housekeeping, which is what most of the other code does.
What's new
----------
2.1 20031219 Koen Added error checking to CreatePatch, returns
negative numbers when there are errors.
2.0 20030811 Koen Initial documentation
}
interface
uses
Classes,
Sysutils,
TreeCode,
VDSP_CRC;
type
TStatusNotifyEvent = procedure(S: String; Current, Total, Savings: Integer) of object;
TDebugNotifyEvent = procedure(S: String) of object;
PDataBlock = ^TDataBlock;
TDataBlock = record
SourceOffset: Integer;
TargetOffset: Integer;
Size: Integer;
Next: PDataBlock;
end;
//internal structure for FindBlock
TBlock = record
Offset: Integer;
Size: Integer;
end;
TPatchGenerator = class
private
noPat: Integer;
PRay: Array of TDataBlock;
NRay: Array of TDataBlock;
FPatchData: TMemoryStream;
FStartBlockSize: Integer; //initial block size
FBlockDivider: Integer; //... block size is divided by this
FMinimumBlockSize: Integer;//until this minimum is reached
FStepSize: Integer;
//input: ASubBlock, which is a pointer to the start of the block to look
//for in ABlock. The entire ABlock is searched. The function returns the
//offset of the block, when it is found. The ASize parameter contains the
//size of this block
procedure ShowDebug(S: String);
function FindBlock(ASubBlock, ABlock, ABlockTree: Pointer;
var ASubBlockStart: Integer; ASubBlockSize, ABlockSize,
AMatchSize, ABlockTreeNodeCount: Integer; var ASize: Integer): Integer;
procedure FindBlockSize(ASubBlock, ABlock: Pointer; ASubBlockSize,
ABlockSize: Integer; var ASubStart, AStart, AFoundSize: Integer);
function WritePatchToStream(Target: Pointer; SourceCRC,
TargetCRC: Integer): Integer;
procedure RemoveExistingPatch(ACRC: Integer);
public
constructor Create;
destructor Destroy; override;
procedure Clear;
function CreatePatch(SourceFileName, TargetFileName: String): Integer;
property StartBlockSize: Integer read FStartBlockSize write FStartBlockSize;
property BlockDivider: Integer read FBlockDivider write FBlockDivider;
property MinimumBlockSize: Integer read FMinimumBlockSize write FMinimumBlockSize;
property StepSize: Integer read FStepSize write FStepSize;
function Size: Integer;
procedure WriteToFile(AFileName: String);
procedure WriteToStream(AStream: TStream);
procedure LoadFromFile(AFileName: String);
end;
const
BUF_BLOCK_SIZE = 4096;
INIT_BLOCK_COUNT=10000;
var
DebugEvent: TDebugNotifyEvent = nil;
implementation
{ TPatchGenerator }
procedure TPatchGenerator.Clear;
begin
FPatchData.Clear;
end;
constructor TPatchGenerator.Create;
begin
inherited;
FPatchData:=TMemoryStream.Create;
end;
function TPatchGenerator.CreatePatch(SourceFileName,
TargetFileName: String): Integer;
var
fsSource, fsTarget: TFileStream;
fm: TMemoryStream;
Source, Target: Pointer;
SourceSize, TargetSize: Integer;
SourceCRC, TargetCRC: Integer;
SourceTree: Pointer;
SourceTreeNodeCount: Cardinal;
cBlockSize: Integer;
o,i,lastO: Integer;
Start,Siz,BetweenSiz: Integer;
retTO: Integer;
noN: Integer;
begin
fsSource:=TFileStream.Create(SourceFileName,fmOpenRead+fmShareDenyNone);
fsTarget:=TFileStream.Create(TargetFileName,fmOpenRead+fmShareDenyNone);
fm:=TMemoryStream.Create;
SetLength(PRay,INIT_BLOCK_COUNT);
SetLength(NRay,INIT_BLOCK_COUNT);
//Load those files into memory!
SourceSize:=fsSource.Size;
try
GetMem(Source,SourceSize);
except
on EOutOfMemory do begin
Result:=-2; // not enough memory for source file
Exit;
end;
end;
fm.CopyFrom(fsSource,SourceSize);
Move(fm.Memory^,Source^,SourceSize);
SourceCRC:=FileCRC(fsSource);
fsSource.Free;
fm.Clear;
TargetSize:=fsTarget.Size;
try
GetMem(Target,TargetSize);
except
on EOutOfMemory do begin
FreeMem(Source,SourceSize);
Result:=-3; // not enough memory for target file
Exit;
end;
end;
fm.CopyFrom(fsTarget,TargetSize);
Move(fm.Memory^,Target^,TargetSize);
TargetCRC:=FileCRC(fsTarget);
fsTarget.Free;
fm.Free;
if(SourceCRC = TargetCRC) then begin
FreeMem(Source,SourceSize);
FreeMem(Target,TargetSize);
Result:=-1;
Exit;
end;
PRay[0].TargetOffset:=0;
PRay[0].SourceOffset:=0;
PRay[0].Size:=0;
noPat:=1;
//termination block
PRay[noPat].SourceOffset:=0;
PRay[noPat].TargetOffset:=TargetSize;
PRay[noPat].Size:=0;
//we only have one pass in this mode
// StartBlockSize:=16;
MinimumBlockSize:=StartBlockSize;
StepSize:=1;
BlockDivider:=2;
//because we are dividing first inside.
cBlockSize:=StartBlockSize*BlockDivider;
SourceTree:=nil;
SourceTreeNodeCount:=BuildTree(Source,SourceTree,SourceSize,cBlockSize div BlockDivider);
SortTree(SourceTree,SourceTreeNodeCount);
//now, we must do the above again - with a smaller block size
repeat
if cBlockSize<=MinimumBlockSize then break;
cBlockSize:=cBlockSize div BlockDivider;
noN:=0;
for i:=1 to noPat do begin
//calculate location of the inbetween parts
Start:=PRay[i-1].TargetOffset+PRay[i-1].Size;
BetweenSiz:=PRay[i].TargetOffset-Start;
NRay[noN].SourceOffset:=PRay[i-1].SourceOffset;
NRay[noN].TargetOffset:=PRay[i-1].TargetOffset;
NRay[noN].Size:=PRay[i-1].Size;
Inc(noN);
if BetweenSiz>0 then begin
o:=Start;
repeat
//ShowDebug(PChar('DoFind '+IntToStr(o)));
LastO:=o;
retTO:=FindBlock(Target,Source,SourceTree,o,TargetSize,SourceSize,cBlockSize,SourceTreeNodeCount,Siz);
if not (Siz=0) then
ShowDebug(IntToStr(LastO)+' -> Source='+IntToStr(retTO)+' Target='+IntToStr(o)+' Size='+IntToStr(Siz));
if Siz=0 then begin
o:=LastO+StepSize;
end else begin
//we have found a block, let's add it!
NRay[noN].SourceOffset:=retTO;
NRay[noN].TargetOffset:=o;
NRay[noN].Size:=Siz;
Inc(noN);
if noN>=Length(NRay) then begin
SetLength(NRay,Length(NRay)*2);
SetLength(PRay,Length(PRay)*2);
end;
Inc(o,Siz);
end;
//check to see if we're not inside another one.
Siz:=NRay[noN].TargetOffset-NRay[noN-1].TargetOffset-NRay[noN-1].Size;
If Siz<0 then begin //that's impossible! (overlapping should be eliminated)
NRay[noN].TargetOffset:=NRay[noN].TargetOffset-Siz;
NRay[noN].Size:=NRay[noN].Size+Siz;
NRay[noN].SourceOffset:=NRay[noN].SourceOffset-Siz;
end;
until o>Start+BetweenSiz;
end;
end;
//I think the last termination block isn't copied: do so now.
NRay[noN].SourceOffset:=PRay[noPat].SourceOffset;
NRay[noN].TargetOffset:=PRay[noPat].TargetOffset;
NRay[noN].Size:=PRay[noPat].Size;
//copy back into PRay
for i:=0 to noN do begin
PRay[i].SourceOffset:=NRay[i].SourceOffset;
PRay[i].TargetOffset:=NRay[i].TargetOffset;
PRay[i].Size:=NRay[i].Size;
end;
noPat:=noN;
until false;
//writing is next!
ShowDebug('Writing patch');
Result:=WritePatchToStream(Target, SourceCRC, TargetCRC);
ClearTree(SourceTree,SourceTreeNodeCount);
FreeMem(Source,SourceSize);
FreeMem(Target,TargetSize);
ShowDebug('Done');
end;
destructor TPatchGenerator.Destroy;
begin
FPatchData.Free;
inherited;
end;
function TPatchGenerator.FindBlock(ASubBlock, ABlock, ABlockTree: Pointer; var ASubBlockStart: Integer;
ASubBlockSize, ABlockSize, AMatchSize, ABlockTreeNodeCount: Integer; var ASize: Integer): Integer;
//This procedure locates location of a block in the target file
//Then, it calls FindBlockSize to determine size of this block
var
MatchSize, FoundSize: Integer;
q,r,i: Integer;
FoundCache_SubOffset, FoundCache_Size, FoundCache_Offset: Integer;
Checksum: Cardinal;
PFound: PTreeNode;
FoundCount: Integer;
begin
//if we find nothing...
FoundCache_Size:=0;
FoundCache_Offset:=0;
FoundCache_SubOffset:=ASubBlockStart;
FindBlock:=0;
ASize:=0;
MatchSize:=AMatchSize;
//we can only find MatchSize sized blocks in the tree!
if MatchSize > ASubBlockSize - ASubBlockStart then Exit;
if MatchSize = 0 then Exit;
Checksum:=0;
calculateChecksum(ASubBlock,ASubBlockStart,MatchSize,Checksum);
PFound:=TreeFind(Checksum,ABlockTree,ABlockTreeNodeCount,FoundCount);
for i:=0 to Pred(FoundCount) do begin
FoundSize:=MatchSize;
//q = offset in Block
q:=PFound^.Offset;
//r = offset in SubBlock
r:=ASubBlockStart;
FindBlockSize(ASubBlock, ABlock, ASubBlockSize, ABlockSize, r, q, FoundSize);
if FoundSize>FoundCache_Size then begin
FoundCache_SubOffset:=r;
FoundCache_Offset:=q;
FoundCache_Size:=FoundSize;
end;
ShowDebug(' Block Size Start='+IntToStr(r)+' tarStart='+IntToStr(q)+' Size='+IntToStr(FoundSize));
PFound:=PTreeNode(Integer(PFound)+SizeOf(TTreeNode));
end;
FindBlock:=FoundCache_Offset;
ASize:=FoundCache_Size;
ASubBlockStart:=FoundCache_SubOffset;
end;
procedure TPatchGenerator.FindBlockSize(ASubBlock, ABlock: Pointer; ASubBlockSize, ABlockSize: Integer; var ASubStart,AStart,AFoundSize: Integer);
var
FoundSize: Integer;
a,c,d,i: Integer;
f1p,f2p,f1Size,f2Size: Integer;
beforeSize: Integer;
CurBufSize: Integer;
begin
//OK, now let's go...
//Trace after -> how long does this go on?
f1p:=Integer(ASubBlock)+ASubStart;
f2p:=Integer(ABlock)+AStart;
f1Size:=ASubBlockSize-ASubStart;
f2Size:=ABlockSize-AStart;
FoundSize:=0;
CurBufSize := BUF_BLOCK_SIZE; //size of the block we're checking
while not (CurBufSize = 0) do begin
//we need equal bytes from both... so if one of them EOF, it's the end.
if FoundSize+CurBufSize>f1Size then CurBufSize:=f1Size - FoundSize;
if FoundSize+CurBufSize>f2Size then CurBufSize:=f2Size - FoundSize;
if CompareMem(Pointer(f1p),Pointer(f2p),CurBufSize) then begin
Inc(FoundSize,CurBufSize);
Inc(f1p,CurBufSize);
Inc(f2p,CurBufSize);
end
else begin
CurBufSize:=CurBufSize div 2;
end;
end;
if FoundSize = 0 then begin AFoundSize:=0; Exit; end;
//Trace before -> how much bytes are still the same before the block?
//First, read 1 block from source and 1 block from target, start from back to compare how much they differ
//just take BUF_BLOCK_SIZE as maximum size for the block before - that's surely
//big enough!
beforeSize:=BUF_BLOCK_SIZE;
a:=ASubStart-beforeSize;
if a<0 then begin
a:=0;
beforeSize:=ASubStart;
end;
//b is the current before block size
c:=AStart-beforeSize;
if c<0 then begin
c:=0;
beforeSize:=AStart;
a:=ASubStart-beforeSize;
end;
//a=Offset in source
//b=Size of beforeblock
//c=offset in target
d:=0;
for i:=beforeSize-1 downto 0 do begin
//if not (f1^[a+i]=f2^[c+i]) then begin
if not (PByte(Integer(ASubBlock)+a+i)^=PByte(Integer(ABlock)+c+i)^) then begin
//d=how many bytes before are the same?
Break;
end;
Inc(d);
end;
Inc(FoundSize,d);
Dec(ASubStart,d);
Dec(AStart,d);
AFoundSize:=FoundSize;
end;
function TPatchGenerator.Size: Integer;
begin
Result:=FPatchData.Size;
end;
procedure TPatchGenerator.ShowDebug(S: String);
begin
if Assigned(DebugEvent) then DebugEvent(S);
end;
function TPatchGenerator.WritePatchToStream(Target: Pointer; SourceCRC, TargetCRC: Integer): Integer;
var
HeadID: Array[0..3] of Char;
NoBlocks, NoBlocksOffset, BodySize, BodySizeOffset: Integer;
b: Byte;
w: Word;
i, j: Integer;
l: LongWord;
Start, Siz: Integer;
PTarget: Pointer;
begin
RemoveExistingPatch(SourceCRC);
with FPatchData do begin
Seek(0,soFromEnd);
if Size = 0 then begin
HeadID:='VPAT';
Write(HeadID,SizeOf(HeadID));
l:=0;
Write(l,SizeOf(l)); //NoFiles
end;
l:=0;
NoBlocksOffset:=Position;
Write(l,SizeOf(l)); //should become NoBlocks later
Write(SourceCRC,SizeOf(SourceCRC)); //source CRC
Write(TargetCRC,SizeOf(TargetCRC)); //target CRC
BodySizeOffset:=Position;
Write(l,SizeOf(l)); //should become BodySize (of this patch)
NoBlocks:=0;
BodySize:=0;
//Write the patch...
for i:=0 to noPat - 1 do begin
//write char 1 - integer/copysource
//write char 2 - long/copysource
//write char 5 - integer/insidepatch
//write char 6 - long/insidepatch
Start:=PRay[i].TargetOffset+PRay[i].Size;
Siz:= PRay[i+1].TargetOffset-Start;
If Siz<0 then begin //that's impossible! (overlapping should be eliminated)
PRay[i+1].TargetOffset:=PRay[i+1].TargetOffset-Siz;
PRay[i+1].Size:=PRay[i+1].Size+Siz;
PRay[i+1].SourceOffset:=PRay[i+1].SourceOffset-Siz;
Siz:=0;
end;
if not (PRay[i].Size=0) then begin
if (PRay[i].Size<=255) then begin
b:=1;
Write(b,SizeOf(b));
b:=PRay[i].Size;
Write(b,SizeOf(b));
Inc(BodySize,2);
end else if PRay[i].Size<=65535 then begin
b:=2;
Write(b,SizeOf(b));
w:=PRay[i].Size;
Write(w,SizeOf(w));
Inc(BodySize,3);
end else begin
b:=3;
Write(b,SizeOf(b));
Write(PRay[i].Size,SizeOf(Integer));
Inc(BodySize,5);
end;
Write(PRay[i].SourceOffset,SizeOf(Integer));
Inc(BodySize,SizeOf(Integer));
Inc(NoBlocks);
end;
//Now write the writeblock
If Not (Siz = 0) Then begin
if Siz<=255 then begin
b:=5;
Write(b,SizeOf(b));
b:=Siz;
Write(b,SizeOf(b));
Inc(BodySize,2);
end else if Siz<=65535 then begin
b:=6;
Write(b,1);
w:=Siz;
Write(w,2);
Inc(BodySize,3);
end else begin
b:=7;
Write(b,1);
Write(Siz,4);
Inc(BodySize,5);
end;
PTarget:=Pointer(Integer(Target)+Start);
j:=Start;
repeat
//read
if (j+4096>Start+Siz) then begin
Write(PTarget^,Start+Siz-j);
break;
end;
Write(PTarget^,4096);
Inc(j,4096);
PTarget:=Pointer(Integer(PTarget)+4096);
until false;
Inc(BodySize,Siz);
Inc(NoBlocks);
end;
end;
Seek(NoBlocksOffset,soFromBeginning);
Write(NoBlocks,SizeOf(NoBlocks));
Seek(BodySizeOffset,soFromBeginning);
Write(BodySize,SizeOf(BodySize));
ShowDebug('Patch body size: '+IntToStr(BodySize));
ShowDebug('Total patch size:'+IntToStr(Size));
//now increase file count
Seek(4,soFromBeginning);
Read(i,SizeOf(i));
Inc(i);
Seek(4,soFromBeginning);
Write(i,SizeOf(i));
Seek(0,soFromEnd);
Result:=BodySize;
end;
end;
procedure TPatchGenerator.WriteToFile(AFileName: String);
var
fs: TFileStream;
begin
fs:=TFileStream.Create(AFileName,fmCreate);
FPatchData.Seek(0,soFromBeginning);
fs.CopyFrom(FPatchData,FPatchData.Size);
fs.Free;
end;
procedure TPatchGenerator.LoadFromFile(AFileName: String);
var
fs: TFileStream;
begin
fs:=TFileStream.Create(AFileName,fmOpenRead);
FPatchData.Clear;
FPatchData.CopyFrom(fs,fs.Size);
fs.Free;
end;
procedure TPatchGenerator.RemoveExistingPatch(ACRC: Integer);
var
HeadID: Array[0..3] of Char;
NoFiles, i, j, SourceCRC, MSize: Integer;
StartPos: Integer;
ms: TMemoryStream;
begin
with FPatchData do begin
if Size = 0 then Exit;
Seek(0,soFromBeginning);
Read(HeadID,SizeOf(HeadID));
if HeadID = 'VPAT' then begin
Read(NoFiles,SizeOf(NoFiles));
for i:=0 to Pred(NoFiles) do begin
if Position >= Size then Break;
StartPos:=Position;
Read(j,SizeOf(j)); //NoBlocks
Read(SourceCRC,SizeOf(SourceCRC)); //SourceCRC
Read(j,SizeOf(j)); //TargetCRC
Read(j,SizeOf(j)); //BodySize
Seek(j,soFromCurrent);
if SourceCRC = ACRC then begin
ms:=TMemoryStream.Create;
MSize:=Size-Position;
if MSize > 0 then ms.CopyFrom(FPatchData,MSize);
ms.Seek(0, soFromBeginning);
FPatchData.Seek(StartPos,soFromBeginning);
FPatchData.SetSize(Size - j - SizeOf(Integer) * 4);
FPatchData.CopyFrom(ms,ms.Size);
ms.Free;
Dec(NoFiles);
Seek(4,soFromBeginning);
Write(NoFiles,SizeOf(NoFiles));
Break;
end;
end;
end;
end;
end;
procedure TPatchGenerator.WriteToStream(AStream: TStream);
begin
FPatchData.Seek(0,soFromBeginning);
AStream.CopyFrom(FPatchData,FPatchData.Size);
end;
end.