home *** CD-ROM | disk | FTP | other *** search
- ;
- ; al = input character
- ; di = nlist tail
- ; si = clist tail
- ; dx = clist head
- ;
- RE_TEXT SEGMENT WORD PUBLIC 'CODE'
- assume cs:RE_TEXT
- assume ds:RE_TEXT
- assume es:RE_TEXT
- extrn _reFast1:BYTE
- extrn _reFast2:BYTE
- ;
- ; XTAG:re_scan
- ; re_scan( char * beginBuffer, char * endBuffer, char *segBuffer,
- ; char ** matchBegin, char ** matchEnd,
- ; int * numberOfNewlines );
- ;
- ; returns:
- ; 0 = NOT_FOUND -- RE was not found between beginBuffer
- ; and endBuffer
- ; 1 = FOUND -- RE was found between matchBegin and matchEnd
- ; 2 = PARTIAL_MATCH -- partial match at the end of the buffer
- ; starting at matchBegin
- ;
- public _re_scan
- _re_scan PROC FAR
- push bp ; save bp
- mov bp,sp ; set up bp to access the arguments
- ;
- mov cs:dataSegment,ds ; save ds
- ;
- push ds ; save ds and es
- push es
- push si ; save si and di
- push di
- ;
- push cs ; have all the segment registers point to
- pop ds ; the beginning of the code segment
- push cs
- pop es
- ;
- mov ax,[bp+6] ; pick up and save the arguments
- mov cs:nextByteOffset,ax
- mov cs:firstByteOffset,ax
- mov ax,[bp+8]
- mov cs:lastByteOffset,ax
- ; move the segment into a position so GetNextChar can pick it up with a LDS
- mov ax,[bp+10]
- mov cs:nextByteSegment,ax
- ;
- init:
- cld ; set search direction to forward
- lea di,list1 ; initially list1=nlist and list2=clist
- mov nlistTop,di ; but they are swapped after each character
- lea si,list2
- mov clistTop,si
- mov dx,si ; dx = clist head
- mov al,0AH ; "last char read" at beginning is a NL
- jmp myxchg
- ;
- ;
- finish:
- ; count the number of lines in the scanned bytes
- push ax ; save ax since we need to change al
- mov di,cs:firstByteOffset ; start the scan here
- mov cx,cs:nextByteOffset ; compute the number of bytes to scan
- mov ds,cs:nextByteSegment
- mov es,cs:nextByteSegment
- sub cx,di ; cx = the number of bytes
- mov al,0AH ; 0AH = newline, ASCII line feed
- xor dx,dx ; newline counter, start at 0 (of course)
- NLLoop:
- repne scasb ; scan to the next newline
- jne noMoreNLs ; go to end of buffer w/o finding a newline
- inc dx ; found another one, bump the counter
- cmp cx,0
- jne NLLoop ; find any more
- noMoreNLs:
- mov ds,cs:dataSegment
- mov bx,[bp+16] ; get address of int to put newline count in
- mov [bx],dx ; store the computed newline count
- ; end of newline counting
- ;
- pop ax ; restore ax -- the return value
- pop di
- pop si
- pop es
- pop ds
- pop bp ; restore bp
- ret
- _re_scan ENDP
- ;
- public _re_cnode
- _re_cnode:
- pop bx ; get the return address
- mov [si],bx ; save it in the clist
- mov cx,thisMatchBegin
- mov [si+2],cx
- add si,4 ; mov clist tail (si)
- add bx,3 ; generate bx+3 as an address
- push bx ; push it so we can "JMP" to it
- ret ; with a RET. We cannot use JMP 3[bx]
- ; directly since it will be indirect
- ;
- public _re_nnode
- extrn _addrJmpFound:WORD
- _re_nnode:
- pop bx ; get the return address
- cmp bx,_addrJmpFound ; did we find it?
- je _re_found
- mov [di],bx ; save it in the nlist
- mov cx,thisMatchBegin
- mov [di+2],cx
- add di,4 ; mov nlist tail (di)
- ; then move to next clist item
- ;
- public _re_clist
- _re_clist:
- cmp dx,si
- je myxchg ; head=tail => clist is empty
- mov bx,dx ; move address into base register
- mov cx,[bx+2]
- mov thisMatchBegin,cx
- add dx,4 ; move to the next item
- jmp [bx]
- ;
- public _re_found
- _re_found:
- mov cx,thisMatchBegin
- dec cx ; since it was taken from nextByteOffset
- ; and so is one to big
- mov ds,cs:dataSegment
- mov bx,[bp+12]
- mov [bx],cx ; store begin address of found RE
- mov cx,cs:nextByteOffset
- dec cx ; the RE ends here
- mov bx,[bp+14]
- mov [bx],cx ; store end address of found RE
- mov ax,1 ; FOUND flag is returned in ax
- jmp finish
- ;
- ;
- myxchg:
- cmp di,nlistTop
- jne skipFastScan
- ;
- ; if this char is 0 then do not try fast searches, if it is not 0 then the
- ; first character of the RE is a single fixed character
- ;
- cmp _reFast1,0
- je skipFastScan
- ;
- ; If there is no pending list of partial RE matches, then try to speed up
- ; the search by scanning for the first character of the RE. The 8086 string
- ; search instructions are very fast for looking for a single character.
- ;
- ; get the parameters for the string scan
- push ax ; save last char read (in al)
- mov al,_reFast1
- les di,DWORD PTR nextByteOffset
- mov cx,lastByteOffset
- cmp cx,di
- jbe noScan
- sub cx,di
- inc cx ; since lastByteOffset points to a valid byte
- mov dx,cx ; save cx for the second scan
- repne scasb
- jne notFound1 ; distinguish: not found - found at last char
- dec di ; adjust since repne scasb goes one too far
- notFound1:
- mov cx,dx ; restore the count we saved
- mov dx,di ; save the results of the first scasb in dx
- mov al,_reFast2 ; get the second fast scan character
- jnz doSecondScan ; only look for chars not equal to '\0'
- mov di,lastByteOffset ; make sure this is not the lowest
- jmp notFound2
- doSecondScan:
- mov di,WORD PTR nextByteOffset
- repne scasb
- jne notFound2
- dec di
- notFound2:
- ; use the one that came first (the lower one)
- cmp di,dx
- jbe useSecond
- mov di,dx
- useSecond:
- mov nextByteOffset,di
- noScan:
- push cs
- pop es ; restore es (NECESSARY since we use DI)
- mov di,nlistTop ; restore di
- pop ax ; restore last char read (back into al)
- ;
- ;
- skipFastScan:
- mov si,di ; set new clist tail
- mov di,clistTop ; set new nlist tail
- mov bx,nlistTop ; set up to exchange clist and nlist
- mov nlistTop,di ; now do the reverse
- mov clistTop,bx ; reversing is faster than moving the lists
- mov dx,bx ; start adding at the top of the clist
- ;
- ; get the next character
- ;
- lds bx,DWORD PTR nextByteOffset
- cmp bx,cs:lastByteOffset
- ja endOfSpan ; use unsigned comparison
- mov BYTE PTR cs:_re_sidechars,al ; save last char read
- mov al,[bx+1] ; get char after the next one
- mov BYTE PTR cs:_re_sidechars+1,al ; save next char to read
- mov al,[bx] ; get the next character (finally)
- push cs ; restore DS to equal CS
- pop ds
- inc bx
- cmp bx,lastByteOffset ; are we at the end of the buffer?
- jb notAtEnd
- mov ah,0AH ; if so, simulate a NL as the next char
- mov BYTE PTR _re_sidechars+1,ah
- notAtEnd:
- mov nextbyteOffset,bx
- mov thisMatchBegin,bx
- jmp _re_code
- ;
- endOfSpan:
- ;
- mov es,cs:nextByteSegment
- cmp dx,si
- je noClist
- mov bx,dx
- mov dx,cs:[bx+2]
- loop1:
- add bx,4
- cmp bx,si
- je endLoop
- cmp dx,cs:[bx+2]
- jbe loop1
- mov dx,cs:[bx+2]
- jmp loop1
- endLoop:
- mov ds,cs:dataSegment ; restore ds
- dec dx ; taken from nextByteOffset and so 1 too high
- mov bx,[bp+12]
- mov [bx],dx
- mov ax,2
- jmp over1
- noClist:
- xor ax,ax
- over1:
- jmp finish
- ;
- even
- public _re_code
- _re_code:
- db 1000 DUP (0)
- ;
- even
- nlistTop dw 0
- clistTop dw 0
- firstByteOffset dw 0
- ; These next words two must stay in this order since they are picked up with
- ; an LDS instruction that wants then in this order in two consecutive words
- nextByteOffset dw 0
- nextByteSegment dw 0
- dataSegment dw 0
- ;
- ; Here we will keep the last character read and the next character to be read
- ;
- public _re_sidechars
- _re_sidechars dw 0
- ;
- lastByteOffset dw 0
- thisMatchBegin dw 0
- ;
- public _re_wordtable
- _re_wordtable db 0, 0, 0, 0, 0, 0, -1, 3
- db -2, -1, -1, 7, -2, -1, -1, 7
- db 0, 0, 0, 0, 0, 0, 0, 0
- db 0, 0, 0, 0, 0, 0, 0, 0
- ;
- even
- list1:
- db 400 DUP (0)
- ;
- even
- list2:
- db 400 DUP (0)
- ;
- RE_TEXT ENDS
- end
-