home *** CD-ROM | disk | FTP | other *** search
- ;Faster string search routine to substitute the POS()
- ;function in Turbo Pascal 4 or 5.
- ;Based on the Boyer-Moore algorithm.
- ;Program Author: Costas Menico (Dr Dobbs, Jul 89)
- ;
- ;Declare as follows:
- ; {$F+}
- ; {$L SEARCH.OBJ}
- ; FUNCTION posBM(pat,str : STRING) : BYTE; EXTERNAL;
- ;Call as follows:
- ; location := posBM(pat, str);
- ;
- ;Courtesy of Toad Hall
- ;v1.1 Toad Hall Tweaks, 11 Jun 89
- ; - Very minor, no functional changes
-
- SKIPARRLENGTH EQU 256 ;length of the skip array
-
- ;function work stack
- dstk STRUC
- patlen dw ? ;pattern length (also BP base work area)
- strlen dw ? ;str length
- skiparr db SKIPARRLENGTH DUP(?) ;skip array
- pattxt dd 0 ;pattern address
- strtxt dd 0 ;string text address
- dstk ENDS
-
- ;Total stack (caller's plus work stack)
- cstk STRUC
- ourdata db SIZE dstk DUP(?) ;work stack size
- bpsave dw 0 ;save BP here
- retaddr dd 0 ;points to return address
- straddr dd 0 ;points to string address
- pataddr dd 0 ;points to pattern address
- cstk ENDS
-
- PARAMSIZE EQU SIZE pataddr + SIZE straddr ;size ofr parameter list
-
- PUBLIC PosBM ;function name declaration
-
- CODE SEGMENT PARA PUBLIC 'CODE'
- ASSUME CS:CODE
-
- ;Entry point to POSBM function
-
- PosBM proc FAR
- push bp
- sub sp,SIZE dstk ;create work area
- mov bp,sp
-
- push DS ;save caller's DS
- xor ah,ah
- cld
-
- ;Get and save pattern length and address
- lds si,[bp.pataddr]
- mov word ptr [bp.pattxt][2],DS
- lodsb ;get pattern length (1 byte)
- or al,al ;if length is null
- jnz NotNullP
- jmp NoMatch ; then exit
-
- NotNullP:
- mov cx,ax ;save length to check if 1 later
-
- mov [bp.patlen],ax ;save pattern length
- mov word ptr [bp.pattxt],si ;save address
-
- ;Get and save string text length and address
- lds si,[bp.straddr]
- mov word ptr [bp.strtxt][2],DS
- lodsb ;get string length
- or al,al ;if string text is null
- jnz NotNullS
- jmp NoMatch ; then exit
-
- NotNullS:
- mov [bp.strlen],ax ;save string length
- mov word ptr [bp.strtxt],si ;save address
-
- cmp cx,1 ;is pattern length 1 char?
- jne Do_Boyer_Moore ;nope
-
- lds si,[bp.pattxt] ;yes, do a straight search
- mov cx,ax ;still has string length v1.1
- lodsb ;get the single char pattern
- les di,[bp.strtxt] ;get string address
- ;v1.1 mov cx,[bp.strlen] ;get string length
- repne scasb ;search
- jz Match1 ;found - adjust last DI psn
- jmp NoMatch ;not found, exit
-
- Match1:
- mov si,di
- sub si,2 ;adjust SI psn
- jmp ExactMatch
-
- Do_Boyer_Moore:
-
- ;Fill the ASCII character skiparray with the pattern length
- lea di,[bp.skiparr] ;get skip array address
- mov dx,SS
- mov ES,dx
- mov al,byte ptr [bp.patlen] ;get pattern size
- mov ah,al ;put in AH also
- mov cx,SKIPARRLENGTH / 2 ;array size
- rep stosw ;fill with pattern length
-
- ;Replace in the ASCII skiparray the corresponding
- ;character offset from the end of the pattern minus 1
-
- lds si,[bp.pattxt] ;get pattern address
-
- ;v1.1 donno why he did this .. he never uses it!
- ;v1.1 lea bx,[bp.skiparr] ;get skip array address
-
- ;v1.1 mov cx,[bp.patlen] ;get pattern length
- xor ah,ah ;clear MSB v1.1
- mov cx,ax ;AX is still pattern length v1.1
- dec cx ; minus 1
- mov bx,bp ;save BP
- lea bp,[bp.skiparr] ;get skip array address
- ;v1.1 xor ah,ah
- Fill_SkipArray:
- lodsb ;get char from pattern
- mov di,ax ;use it as an index
- mov [bp+di],cl ;store the last skip value
- loop Fill_SkipArray
-
- lodsb
- mov di,ax
- mov [bp+di],cl ;store the last skip value
- mov bp,bx ;restore BP
-
- ;Now initialize our pattern and string text pointers
- ;to start searching
- lds si,[bp.strtxt] ;string address
- lea di,[bp.skiparr] ;skip array address
- mov dx,[bp.strlen] ;string length
- dec dx ; minus 1 for each check
- mov ax,[bp.patlen] ;pattern length
- dec ax ; starting skip value
- xor bh,bh ;clear MSB
- std ;reverse compare
-
- ;Get char from text. Use the char as an index
- ;into the skip array, looking for a skip value of 0.
- ;If found, execute a brute force search on the pattern.
- SearchLast:
- sub dx,ax ;check if string exhausted
- jc NoMatch ;yes, no match
-
- add si,ax ;no - slide pattern with skip value
- mov bl,[si] ;get char, use as an index
- mov al,SS:[di+bx] ; and get the new skip value
- or al,al ;if 0, then possible match
- jne SearchLast ; try again by sliding to right
-
- ;We have a possible match,
- ;therefore do the reverse Brute-force compare
-
- mov bx,si ;save string addr
- mov cx,[bp.patlen] ;pattern length
- les di,[bp.pattxt] ;pattern address
- dec di ;adjust
- add di,cx ; and add to point to EOS
- repe cmpsb ;do reverse compare
- je ExactMatch ;if equal, we found a match
-
- mov ax,1 ;else set skip value to 1
- lea di,[bp.skiparr] ;skip array address
- mov si,bx ;get string address
- xor bh,bh ;No - clear MSB
- jmp short SearchLast ;try again
-
- ExactMatch:
- mov ax,si ;save current psn in string
- lds si,[bp.strtxt] ;get start of strtxt
- sub ax,si ;sub and add 2 to get psn
- add ax,2 ; in strtxt where pattern is found
- jmp short EndSearch ;exit function
-
- NoMatch:
- xor ax,ax ;no match, return a 0
-
- EndSearch:
- cld
- pop DS ;recover
-
- mov sp,bp ;recover last stack psn
- add sp,SIZE dstk ;clear up work area
- pop bp ;recover BP
- ret PARAMSIZE ;return with AX the POSBM value
-
- PosBM ENDP
-
- CODE ENDS
- END