home *** CD-ROM | disk | FTP | other *** search
- ; Searches a buffer for a specified pattern. In case of a mismatch,
- ; uses the value of the mismatched byte to skip across as many
- ; potential match locations as possible (partial Boyer-Moore).
- ; Returns start offset of first match searching forward, or NULL if
- ; no match is found.
- ; Tested with TASM 2.0.
- ; C near-callable as:
- ; unsigned char * FindString(unsigned char * BufferPtr,
- ; unsigned int BufferLength, unsigned char * PatternPtr,
- ; unsigned int PatternLength);
-
- parms struc
- dw 2 dup(?) ;pushed BP & return address
- BufferPtr dw ? ;pointer to buffer to be searched
- BufferLength dw ? ;# of bytes in buffer to be searched
- PatternPtr dw ? ;pointer to pattern for which to search
- PatternLength dw ? ;length of pattern for which to search
- parms ends
-
- .model small
- .code
- public _FindString
- _FindString proc near
- cld
- push bp ;preserve caller's stack frame
- mov bp,sp ;point to our stack frame
- push si ;preserve caller's register variables
- push di
- sub sp,256*2 ;allocate space for SkipTable
- ; Create the table of distances by which to skip ahead on mismatches
- ; for every possible byte value. First, initialize all skips to the
- ; pattern length; this is the skip distance for bytes that don't
- ; appear in the pattern.
- mov ax,[bp+PatternLength]
- and ax,ax ;return an instant match if the pattern is
- jz InstantMatch ; 0-length
- mov di,ds
- mov es,di ;ES=DS=SS
- mov di,sp ;point to SkipBuffer
- mov cx,256
- rep stosw
- dec ax ;from now on, we only need
- mov [bp+PatternLength],ax ; PatternLength - 1
- ; Point to last (rightmost) byte of first potential pattern match
- ; location in buffer.
- add [bp+BufferPtr],ax
- ; Reject if buffer is too small, and set the count of the number of
- ; potential pattern match locations in the buffer.
- sub [bp+BufferLength],ax
- jbe NoMatch
- ; Set the skip values for the bytes that do appear in the pattern to
- ; the distance from the byte location to the end of the pattern. When
- ; there are multiple instances of the same byte, the rightmost
- ; instance's skip value is used. Note that the rightmost byte of the
- ; pattern isn't entered in the skip table; if we get that value for a
- ; mismatch, we know for sure that the right end of the pattern has
- ; already passed the mismatch location, so this is not a relevant byte
- ; for skipping purposes.
- mov si,[bp+PatternPtr] ;point to start of pattern
- and ax,ax ;are there any skips to set?
- jz SetSkipDone ;no
- mov di,sp ;point to SkipBuffer
- SetSkipLoop:
- sub bx,bx ;prepare for word addressing off byte value
- mov bl,[si] ;get the next pattern byte
- inc si ;advance the pattern pointer
- shl bx,1 ;prepare for word look-up
- mov [di+bx],ax ;set the skip value when this byte value is
- ; the mismatch value in the buffer
- dec ax
- jnz SetSkipLoop
- SetSkipDone:
- mov dl,[si] ;DL=rightmost pattern byte from now on
- dec si ;point to next-to-rightmost byte of pattern
- mov [bp+PatternPtr],si ; from now on
- ; Search the buffer.
- std ;for backward REPZ CMPSB
- mov di,[bp+BufferPtr] ;point to the first search location
- mov cx,[bp+BufferLength] ;# of match locations to check
- SearchLoop:
- mov si,sp ;point SI to SkipTable
- ; Skip through until there's a match for the rightmost pattern byte.
- QuickSearchLoop:
- mov bl,[di] ;rightmost buffer byte at this location
- cmp dl,bl ;does it match the rightmost pattern byte?
- jz FullCompare ;yes, so keep going
- sub bh,bh ;convert to a word
- add bx,bx ;prepare for look-up in SkipTable
- mov ax,[si+bx] ;get skip value from skip table for this
- ; mismatch value
- add di,ax ;BufferPtr += Skip;
- sub cx,ax ;BufferLength -= Skip;
- ja QuickSearchLoop ;continue if any buffer left
- jmp short NoMatch
- ; Return a pointer to the start of the buffer (for 0-length pattern).
- align 2
- InstantMatch:
- mov ax,[bp+BufferPtr]
- jmp short Done
- ; Compare the pattern and the buffer location, searching from high
- ; memory toward low (right to left).
- align 2
- FullCompare:
- mov [bp+BufferPtr],di ;save the current state of
- mov [bp+BufferLength],cx ; the search
- mov cx,[bp+PatternLength] ;# of bytes yet to compare
- jcxz Match ;done if there was only one character
- mov si,[bp+PatternPtr] ;point to next-to-rightmost bytes
- dec di ; of buffer location and pattern
- repz cmpsb ;compare the rest of the pattern
- jz Match ;that's it; we've found a match
- ; It's a mismatch; let's see what we can learn from it.
- inc di ;compensate for 1-byte overrun of REPZ CMPSB;
- ; point to mismatch location in buffer
- ; # of bytes that did match.
- mov si,[bp+BufferPtr]
- sub si,di
- ; If, based on the mismatch character, we can't even skip ahead as far
- ; as where we started this particular comparison, then just advance by
- ; 1 to the next potential match; otherwise, skip ahead from this
- ; comparison location by the skip distance for the mismatch character,
- ; less the distance covered by the partial match.
- sub bx,bx ;prepare for word addressing off byte value
- mov bl,[di] ;get the value of the mismatch byte in buffer
- add bx,bx ;prepare for word look-up
- add bx,sp ;SP points to SkipTable
- mov cx,[bx] ;get the skip value for this mismatch
- mov ax,1 ;assume we'll just advance to the next
- ; potential match location
- sub cx,si ;is the skip far enough to be worth taking?
- jna MoveAhead ;no, go with the default advance of 1
- mov ax,cx ;yes; this is the distance to skip ahead from
- ; the last potential match location checked
- MoveAhead:
- ; Skip ahead and perform the next comparison, if there's any buffer
- ; left to check.
- mov di,[bp+BufferPtr]
- add di,ax ;BufferPtr += Skip;
- mov cx,[bp+BufferLength]
- sub cx,ax ;BufferLength -= Skip;
- ja SearchLoop ;continue if any buffer left
- ; Return a NULL pointer for no match.
- align 2
- NoMatch:
- sub ax,ax
- jmp short Done
- ; Return start of match in buffer (BufferPtr - (PatternLength - 1)).
- align 2
- Match:
- mov ax,[bp+BufferPtr]
- sub ax,[bp+PatternLength]
- Done:
- cld ;restore default direction flag
- add sp,256*2 ;deallocate space for SkipTable
- pop di ;restore caller's register variables
- pop si
- pop bp ;restore caller's stack frame
- ret
- _FindString endp
- end