home *** CD-ROM | disk | FTP | other *** search
- ;History:772,1
- ;Wed Nov 29 23:58:27 1989 Add support for \|
- ;Tue Nov 07 23:45:44 1989 match newlines in character classes.
- ;Mon Nov 06 00:40:16 1989 try to make backwards regexp searches work.
- ;Sat Nov 05 22:05:14 1988 let CR LF match LINENEW.
- ;10-08-88 08:48:54 add \n to regexp search.
- ;09-26-88 21:23:42 add case translation for character classes.
- ;08-19-88 23:36:40 closure didn't work because omatch iterated on matching.
- ;08-13-88 22:12:46 try forwards again.
- ;07-24-88 16:42:24 BOL and EOL match BOB and EOB respectively.
- ;07-21-88 22:49:18 add optimized search backwards.
- ;07-20-88 00:15:38 too late at night to continue...
- ;07-20-88 00:02:35 optimize forward searches.
- ;07-19-88 23:38:07 use the right omatch_chr for both regexps and literals.
- ;07-19-88 00:51:06 initialize the case table.
- ;07-18-88 21:20:18 don't increment di twice in omatch_NCCL
- ;07-18-88 00:04:34 replace bad patterns with "".
- ;07-17-88 23:15:23 Check for topbot right after incrementing di.
- ;07-17-88 22:55:12 search *at* the end_ptr (check for end_ptr after searching).
- ;07-17-88 18:54:53 when searching backwards, don't search past right_ptr.
- ;07-17-88 10:59:27 save di around omatch()
- ;07-17-88 10:42:13 omatch_CHR was incrementing di even if it didn't match.
- ;06-06-88 23:58:09 change the regexp chars to match Gnu's.
- ;07-06-87 06:55:31 Use botbot for eof, not LINENEW
- include memory.def
-
- data segment byte public
-
- b_struc struc
- b db ?
- b_struc ends
-
- w_struc struc
- w dw ?
- w_struc ends
-
- extrn outpat: byte
- extrn OUTPATSIZE: abs
- inpat_ptr dw ? ;beginning of input pattern.
- direction dw ? ;routine to increment di in correct direction.
- scan_char dw ? ;routine to scan for a character.
- end_ptr dw ? ;end of region we're searching.
- right_ptr dw ? ;rightmost end of region we're searching.
- clo_si dw ? ;saved pointer for closure.
- last_ptr dw ? ;pointer to last character matched.
- which_chr dw ? ;which omatch_CHR to use.
- this_pattern dw ? ;->this pattern (for closure).
- last_pattern dw ? ;->previous pattern (for closure).
- last_or dw ? ;->last or pointer.
-
- extrn textseg: word
-
- init_case dw init_case_table
- case_ignore_table db 256 dup(?)
-
- data ends
-
-
- bufseg segment public
-
- extrn toptop: word
- extrn topbot: word
- extrn bottop: word
- extrn botbot: word
-
- bufseg ends
-
-
- code segment byte public
- assume cs:code, ds:data, ss:data
-
- public slowly
-
- extrn get_mark: near, set_mark_si: near
- extrn get_syntax: near
-
- public search
- search:
- ;enter with ch=start mark, cl=end mark, dh=first mark, dl=last mark.
- ;start searching at mark ch. If the string is found, then return the
- ; beginning in mark dh, and the end in mark dl, and cy=0. If the string
- ; wasn't found, return cy=1.
- push dx ;save the first, last marks.
- push es
- mov es,textseg
- assume es:bufseg
- push ds ;save ds
- push es
- pop ds
- assume ds:bufseg ;for get_mark
- mov al,cl ;get the end mark.
- push cx
- call get_mark
- mov end_ptr,si ;save a copy of the end.
- mov right_ptr,si ;save a copy of the end.
- pop cx
- mov al,ch ;get the start mark.
- call get_mark
- pop ds ;restore ds
- assume ds:data
- mov direction,offset inc_di
- cmp si,end_ptr ;start>=end?
- jb search_4 ;no. (doesn't matter if they're equal)
- mov direction,offset dec_di ;yes, go in reverse direction.
- mov right_ptr,si ;yes, remember that start is rightmost.
- search_4:
- mov di,si ;get the pointer to our string.
- call slowly
- pop es
- assume es:data
- pop dx
- jc search_1 ;not found.
-
- push ds
- mov ds,textseg ;for set_mark_si
- assume ds:bufseg
- mov al,dh
- mov si,di
- call set_mark_si ;set the first mark.
-
- mov si,last_ptr
- mov al,dl
- call set_mark_si ;set the last mark.
-
- pop ds
- assume ds:data
- clc ;return a match.
- ret
- search_1:
- stc ;return no match.
- ret
-
-
- assume ds:data, es:bufseg
-
-
- scan_char_literal:
- or sp,sp ;ensure NZ in case cx=0.
- repne scasb ;search for the character.
- ret
-
-
- scan_char_fold:
- xlat
- mov ah,al
- or sp,sp ;if cx=0, be sure to return nz.
- jcxz scan_char_fold_2
- shr cx,1 ;we unrolled the loop once.
- jnc scan_char_fold_1 ;if even, start at the top.
- inc cx ;otherwise, add one for the odd
- jmp short scan_char_fold_3 ; iteration, and jump to it.
- scan_char_fold_1:
- mov al,es:[di] ;unroll this puppy once.
- add di,dx
- xlat
- cmp al,ah ;compare them.
- je scan_char_fold_2 ;if equal, we're done.
- scan_char_fold_3:
- mov al,es:[di] ;now do the second set.
- add di,dx
- xlat
- cmp al,ah
- loopne scan_char_fold_1
- scan_char_fold_2:
- mov al,ah ;get our character back.
- ret
-
-
- slowly:
- ;es:si -> first char to look at.
- ;es:right_ptr -> after last char to look at.
- ;return cy if no match,
- ; else nc, si->start of match, last_ptr->after end of match.
- cmp di,topbot ;at topbot already?
- jne slowly_0
- mov di,bottop
- slowly_0:
- mov ax,which_chr ;does the pattern start with a CHR?
- cmp ax,word ptr outpat
- jne slowly_1 ;no.
- cmp outpat+2,CR ;searching for literal CR?
- je slowly_1 ;yes - don't optimize because of CRLFs.
- cmp outpat+2,LF ;searching for literal LF?
- je slowly_1 ;yes - don't optimize because of CRLFs.
-
- mov scan_char,offset scan_char_literal
- cmp ax,offset omatch_CHR ;Are we folding case?
- je quickly_1 ;no.
- mov scan_char,offset scan_char_fold
- quickly_1:
- cmp direction,offset inc_di ;Are we going forwards?
- je forwards_0 ;yes.
- if 0 ;disable optimization for now.
- jmp slowly_1
- endif
- jmp backwards_0 ;no.
-
- slowly_1:
- mov si,offset outpat ;start at beginning of pattern.
- mov bx,offset case_ignore_table
- push di ;remember where we're starting.
- call omatch ;now search.
- pop di
- jnc slowly_succeed ;we found a match
- ;not found, should we give up?
- cmp di,end_ptr ;at the end yet?
- je slowly_fail ;yes - not found.
- ;not found, we have to bump di.
- call direction
- jmp slowly_1
- slowly_fail:
- stc ;not found.
- ret
- slowly_succeed:
- if 1 ;an attempt to make backwards regexp searches work right.
- cmp direction,offset inc_di ;Are we going forwards?
- je slowly_done ;yes - we're done now.
- slowly_backwards_again:
- call dec_di ;move backwards.
- push last_ptr ;remember the pointer to the end of it.
- mov si,offset outpat ;start at beginning of pattern.
- mov bx,offset case_ignore_table
- push di
- call omatch ;did it match?
- pop di
- pop ax
- jc slowly_backwards_done ;no - we're done.
- cmp ax,last_ptr ;did last_ptr change?
- je slowly_backwards_again ;no, we can try again.
- slowly_backwards_done:
- mov last_ptr,ax
- call inc_di ;point to the last match again.
- slowly_done:
- endif
- clc
- ret
-
- public forwards_0
- forwards_0:
- mov bx,offset case_ignore_table
- mov al,outpat+2 ;get the character
- cmp di,bottop ;are we in the bottom?
- jae forwards_2 ;yes - don't search the top.
-
- mov cx,topbot ;should we search to topbot
- cmp cx,end_ptr ; or to end_ptr?
- jbe forwards_3
- mov cx,end_ptr ;just to end_ptr.
- forwards_3:
- sub cx,di ;compute the amount left in the top.
- mov dx,1
- call scan_char ;scan for our character.
- je forwards_1 ;we found it!
-
- cmp di,end_ptr ;are we at the end?
- jae slowly_fail ;yes - no match.
-
- mov di,bottop
- forwards_2:
- mov cx,end_ptr ;we only need search that far.
- sub cx,di
- mov dx,1
- call scan_char ;scan for our character.
- jne slowly_fail ;we didn't find it.
- forwards_1:
- mov si,offset outpat+3 ;start at beginning of pattern.
- push di ;remember where we're starting.
- call omatch ;now search.
- pop di
- jnc forwards_4 ;we matched - return it.
- cmp di,end_ptr ;are we at the end?
- jb forwards_0 ;no - keep matching.
- slowly_fail_j_1:
- jmp slowly_fail ;yes - no match.
- forwards_4:
- dec di ;remember that we actually started
- jmp slowly_succeed ; one character into the pattern.
-
-
- public backwards_0
- backwards_0:
- mov bx,offset case_ignore_table
- mov al,outpat+2 ;get the character
- cmp di,bottop ;are we in the top?
- jb backwards_2 ;yes - don't search the bottom.
- je backwards_5
-
- mov si,bottop ;should we search to bottop
- cmp si,end_ptr ; or to end_ptr?
- jae backwards_3
- mov si,end_ptr ;just to end_ptr.
- backwards_3:
-
- dec di
- mov cx,di ;compute the amount left in the bottom.
- sub cx,si
- inc cx ;be sure to look at where di points.
- std
- mov dx,-1
- call scan_char ;scan for our character.
- cld
- je backwards_1 ;we found it!
-
- backwards_5:
- cmp di,end_ptr ;are we at the end?
- jbe slowly_fail_j_1 ;yes - no match.
-
- mov di,topbot
- dec di
- backwards_2:
- mov cx,di ;we only search here if end_ptr is here.
- sub cx,end_ptr
- inc cx ;be sure to compare where di is.
- std
- mov dx,-1
- call scan_char ;scan for our character.
- cld
- jne slowly_fail_j_1 ;we didn't find it.
- backwards_1:
- mov si,offset outpat+3 ;start at beginning of pattern.
- push di ;remember where we're starting.
- add di,2 ;we post-decremented.
- call omatch ;now search.
- pop di
- jnc backwards_4 ;we suceeded.
- inc di
- cmp di,end_ptr ;are we after the end?
- jb slowly_fail_j_1 ;yes - no match.
- dec di
- jmp backwards_0
- backwards_4:
- inc di ;remember that we post-decremented,
- jmp slowly_succeed ; so we're one character too far.
-
- inc_di:
- ;bump di forwards.
- inc di
- cmp di,topbot ;at bottom of top?
- je inc_di_1 ;yes - can't possibly be split over newline.
- cmp es:[di-1].w,LINENEW ;did we just move into a newline?
- jne inc_di_2 ;no.
- inc di ;yes - skip LF part of newline.
- cmp di,topbot ;at topbot already?
- jne inc_di_2
- inc_di_1:
- mov di,bottop
- inc_di_2:
- ret
-
- dec_di:
- ;bump di backwards.
- cmp di,bottop ;at top of bottom?
- jne dec_di_1 ;no.
- mov di,topbot ;yes - load bottom of top.
- dec_di_1:
- dec di ;back up to previous character.
- cmp es:[di-1].w,LINENEW ;at newline?
- jne dec_di_2 ;no.
- cmp di,bottop ;at top of bottom now?
- je dec_di_2 ;yes - can't possibly be split over newline.
- dec di ;yes - skip to beginning of newline.
- dec_di_2:
- ret
-
-
- omatch:
- ;return nc if we matched, cy if not.
- ;es:di -> source text
- ;ds:si -> pattern
- omatch_0:
- cmp di,topbot ;at bottom of top?
- jne omatch_1
- mov di,bottop ;yes, go to top of bottom.
- omatch_1:
- lodsw
- call ax
- jnc omatch_0
- ret
-
-
- ;each of the omatch_XXX routines operates under the following constraints
- ; on failure, return with cy set.
- ; on matching (only used by omatch_EOS right now), return to caller's caller
- ; with cy clear.
- ; on success, bump si as needed so that it points to the next omatch,
- ; bump di as needed (either zero or one), and return with cy clear.
-
- public omatch_EOS
- omatch_EOS:
- mov last_ptr,di ;remember the last thing we matched.
- add sp,2 ;pop our return address.
- clc ;if we get to the end of the
- ret ; pattern, then we matched.
-
- public omatch_CLO
- omatch_CLO:
- push di ;save the first closure pattern.
- mov CLO_si,si ;remember the pattern we're closing.
- ;Note that we don't have to worry about CLO_si being global because the
- ; next pattern can't be another closure.
- ;match as many as fit the next pattern
- mov bx,offset case_ignore_table
- omatch_CLO_1:
- mov si,CLO_si ;get the pattern being closed.
- cmp di,topbot ;at bottom of top?
- jne omatch_CLO_5
- mov di,bottop ;yes, go to top of bottom.
- omatch_CLO_5:
- lodsw
- call ax
- jnc omatch_CLO_1
- pop bx
- ;match only as many as fit the pattern after the next pattern.
- omatch_CLO_2:
- push si
- push di
- push bx
- mov bx,offset case_ignore_table
- call omatch ;try to match rest of pattern.
- pop bx
- pop di
- pop si
- jnc omatch_CLO_4 ;go if it matched.
- cmp di,bottop ;backing up past the point?
- jne omatch_CLO_3 ;no - just decrement.
- mov di,topbot ;yes - get the bottom of the top.
- omatch_CLO_3:
- dec di ;point to the previous character.
- cmp di,bx ;zero or more matches still?
- jae omatch_CLO_2 ;yes.
- stc ;no matches--return no match.
- ret
- omatch_CLO_4:
- pop bx ;get rid of our return address.
- ret
-
-
- omatch_OR:
- add si,2 ;skip past our param.
- push si
- push di
- mov bx,offset case_ignore_table
- call omatch ;try to match rest of pattern.
- jnc omatch_OR_1 ;go if it matched.
- pop di
- pop si
- push si
- mov si,[si-2] ;point to the next or-clause.
- push di
- call omatch
- jnc omatch_OR_1 ;go if it matched.
- pop di
- pop si
- ;guaranteed cy.
- ret
- omatch_OR_1:
- add sp,6 ;get rid of si,di, and our return addr.
- ;guaranteed nc.
- ret
-
-
- public omatch_CHR
- omatch_CHR:
- cmp di,right_ptr ;are we at the end?
- je omatch_CHR_skip ;yes - we never match CHR
- cmp es:[di].w,LINENEW
- je omatch_CHR_linenew
- cmpsb
- je omatch_yes ;if they're the same, match again.
- dec di ;don't modify buffer pointer if no match.
- stc
- ret
- omatch_CHR_linenew:
- cmp [si].b,CR ;got a LINENEW, are we looking for one?
- jne omatch_CHR_skip ;no.
- mov ax,which_chr ;is the next one another char?
- cmp [si+1].w,ax
- jne omatch_CHR_skip ;no - no match.
- cmp [si+1+2].b,LF ;Are we really looking for a linenew?
- jne omatch_CHR_skip ;no - no match.
- add si,1+2+1 ;skip past the two of them.
- add di,2 ;skip in the buffer also.
- clc
- ret
- omatch_CHR_skip:
- inc si ;skip the pattern character.
- omatch_CHR_no:
- stc
- ret
- omatch_yes:
- clc
- ret
-
-
- public omatch_NCHR
- omatch_NCHR:
- cmp di,right_ptr ;are we at the end?
- je omatch_CHR_skip ;yes - we never match CHR
- cmp es:[di].w,LINENEW
- je omatch_CHR_linenew
- lodsb
- xlat
- mov ah,al
- mov al,es:[di]
- inc di
- xlat
- cmp ah,al
- je omatch_yes ;if they're the same, match again.
- dec di ;don't modify buffer pointer if no match.
- stc
- ret
-
-
- omatch_NL:
- cmp di,right_ptr ;are we at the end?
- je omatch_NL_no ;yes - we never match newline.
- cmp es:[di].w,LINENEW ;is it newline?
- jne omatch_NL_no ;no - don't match it.
- add di,2 ;yes - skip it.
- clc
- ret
- omatch_NL_no:
- stc
- ret
-
-
- public omatch_BOB
- omatch_BOB:
- ;match beginning of buffer.
- cmp di,toptop ;are we at the beginning of the buffer?
- je omatch_yes ;yes.
- stc
- ret
-
-
- public omatch_BOL
- omatch_BOL:
- ;match beginning of line.
- push di ;we might have to look at the top.
- cmp di,bottop ;are we at the point?
- jne omatch_BOL_1 ;yes - ok.
- mov di,topbot ;no - get the top.
- omatch_BOL_1:
- cmp di,toptop
- je omatch_BOL_2
- cmp es:[di-2].w,LINENEW
- pop di
- jne omatch_CHR_no
- clc
- ret
- omatch_BOL_2:
- pop di
- clc
- ret
-
-
- public omatch_ISW
- omatch_ISW:
- ;match word character.
- cmp di,botbot
- je omatch_CHR_no
- cmp es:[di].w,LINENEW
- je omatch_CHR_no
- call chars_around_di
- test al,1 ;word character?
- je omatch_CHR_no ;nope--no match.
- inc di ;match the character.
- clc
- ret
-
-
- public omatch_NOW
- omatch_NOW:
- ;match word character.
- cmp di,botbot
- je omatch_no
- cmp es:[di].w,LINENEW
- je omatch_no
- call chars_around_di
- test al,1 ;whitespace before and word after?
- jne omatch_no ;nope--no match.
- inc di ;match the character.
- clc
- ret
-
-
- public omatch_BOW
- omatch_BOW:
- ;match beginning of word.
- cmp di,botbot
- je omatch_no
- cmp es:[di].w,LINENEW
- je omatch_no
- call chars_around_di
- cmp al,1 ;whitespace before and word after?
- jne omatch_no ;nope--no match.
- clc
- ret
-
-
- public omatch_EOW
- omatch_EOW:
- ;match end of word.
- call chars_around_di
- cmp al,2 ;word before and whitespace after?
- jne omatch_no ;nope--no match.
- clc
- ret
-
-
- public omatch_WOR
- omatch_WOR:
- ;match end of word.
- call chars_around_di
- cmp al,2 ;word before and whitespace after?
- je omatch_WOR_yes ;yes - match.
- cmp al,1 ;whitespace before and word after?
- je omatch_WOR_yes ;yes - match.
- stc
- ret
- omatch_WOR_yes:
- clc
- ret
-
-
- public omatch_NWR
- omatch_NWR:
- ;match end of word.
- call chars_around_di
- cmp al,0 ;whitespace before and whitespace after?
- je omatch_NWR_yes ;yes - match.
- cmp al,3 ;word before and word after?
- je omatch_NWR_yes ;yes - match.
- stc
- ret
- omatch_NWR_yes:
- clc
- ret
-
-
- public omatch_EOB
- omatch_EOB:
- ;match end of buffer.
- cmp di,botbot ;are we at the end of the buffer?
- je omatch_NWR_yes ;yes.
- stc
- ret
-
-
- public omatch_EOL
- omatch_EOL:
- ;match end of line.
- cmp di,botbot ;are we at the end?
- je omatch_EOL_yes ;yes.
- cmp es:[di].w,LINENEW
- jne omatch_no
- omatch_EOL_yes:
- clc
- ret
- omatch_no:
- stc
- ret
-
-
- public omatch_ANY
- omatch_ANY:
- ;match any single character.
- cmp di,right_ptr ;are we at the end?
- je omatch_no ;yes - we never match ANY
- cmp es:[di].w,LINENEW ;we never match EOL.
- je omatch_no
- inc di
- clc
- ret
-
-
- public omatch_CCL
- omatch_CCL:
- ;match a character class.
- cmp di,right_ptr ;are we at the end?
- je omatch_ccl_no ;yes - we never match CCL
- cmp es:[di].w,LINENEW ;we never match EOL.
- je omatch_ccl_newline
- call locate ;see if it's in our set.
- jnz omatch_no ;nope.
- inc di
- clc
- ret
- omatch_ccl_newline:
- lea ax,[di+1] ;are we near the end?
- cmp ax,right_ptr
- je omatch_ccl_no ;yes - no match.
- cmp ds:[si+1].w,LINENEW ;does the class begin with crlf?
- jne omatch_ccl_no ;no - don't match it.
- lodsb ;skip past this pattern.
- xor ah,ah
- add si,ax
- add di,2
- clc
- ret
-
-
- public omatch_NCCL
- omatch_NCCL:
- ;match not in a character class.
- cmp di,right_ptr ;are we at the end?
- je omatch_ccl_no ;yes - we never match NCCL
- cmp es:[di].w,LINENEW ;we only match EOL if it begins the class.
- je omatch_ccl_no
- call locate ;see if it's in our set.
- jz omatch_ccl_no ;yes - we don't match.
- inc di
- clc
- ret
- omatch_ccl_no:
- lodsb ;skip past the pattern.
- xor ah,ah
- add si,ax
- stc
- ret
-
-
- locate:
- ;es:di -> search string, bx -> case translate table.
- ;ds:si -> CCL
- ;exit with zr if found, nz if not found, si -> after the pattern.
- push cx
- lodsb ;get the count.
- mov cl,al
- xor ch,ch
- mov al,es:[di] ;get the character we're trying to match.
- xlat ;case translate it.
- mov ah,al ;keep it somewhere safe.
- locate_2:
- lodsb
- xlat
- cmp al,ah ;is this one it?
- loopne locate_2
- lahf ;remember whether or not we found it.
- add si,cx
- sahf
- pop cx
- ret
-
-
- chars_around_di:
- ;return al bit 1=syntax of char to left of point.
- ; al bit 0=syntax of char to right of point.
- push di ;get the character before point.
- cmp di,bottop ;are we at the point?
- jne chars_around_di_1 ;yes.
- mov di,topbot
- chars_around_di_1:
- xor al,al ;if no character, it's whitespace.
- cmp di,toptop
- je chars_around_di_2
- mov al,es:[di-1]
- call get_syntax ;get the syntax for the char before point.
- and al,1 ;isolate the 'word' bit.
- chars_around_di_2:
- shl al,1
- mov ah,al
- pop di
-
- xor al,al ;if no character, it's whitespace.
- cmp di,botbot ;are we at the end?
- je chars_around_di_3 ;yes - can't match beginning of word.
- mov al,es:[di]
- call get_syntax
- and al,1
- chars_around_di_3:
- or al,ah ;include the syntax of the char to left of point.
- ret
-
-
- assume ds:data
-
- public set_pattern
- set_pattern:
- ;enter with si, cx->pattern. dx<>0 if regular expression. di <> 0 if we
- ; want to fold case.
- ;exit with cy=1 if error.
- call init_case
- mov ax,offset omatch_CHR
- or di,di
- je set_pattern_0
- mov ax,offset omatch_NCHR
- set_pattern_0:
- mov which_chr,ax ;remember which omatch_CHR to use.
- or dx,dx
- jne regexp_pat
- mov di,offset outpat
- jcxz set_pattern_1
- mov bp,offset outpat-2
- add bp,OUTPATSIZE
- set_pattern_2:
- cmp di,bp ;do we have enough room?
- jae set_pattern_3 ;no - quit now.
- stosw ;store the appropriate comparison omatcher.
- movsb
- loop set_pattern_2
- set_pattern_1:
- mov ax,offset omatch_EOS ;store the end of string.
- stosw
- clc
- ret
- set_pattern_3:
- stc
- ret
-
-
- public regexp_pat
- regexp_pat:
- ;enter with si, cx->pattern.
- ;exit with cy=1 if error.
- mov bx,cx
- mov [si+bx],byte ptr 0 ;store the terminating null.
- call makepat
- jnc regexp_pat_1
- mov word ptr outpat,offset omatch_EOS ;uh-oh, bad pattern -- null it.
- regexp_pat_1:
- ret
-
-
- makepat:
- ;si -> source pat (null terminated)
- ;di -> dest pattern, dx -> last dest entry.
- ;bx -> last closure
- ;return cy=1 if error.
- mov inpat_ptr,si
- mov di,offset outpat
- mov dx,OUTPATSIZE
- add dx,di
- mov last_pattern,-1 ;remember where the previous pattern started.
- mov last_or,di ;remember that it's here.
- makepat_1:
- lodsb ;get the first character.
- or al,al ;end of string?
- je makepat_0 ;yes.
-
- mov this_pattern,di ;remember where this pattern starts.
-
- cmp al,'\' ;are we escaping something?
- jne makepat_a
- cmp byte ptr [si],0 ;is the '\' at the end?
- je makepat_9 ;yes - just use \.
- lodsb ;get the escaped char.
- call escaped_char ;check for the special escapes.
- jmp makepat_2
- makepat_a:
- cmp al,'.'
- jne makepat_3
- mov ax,offset omatch_ANY
- call addset
- jmp makepat_2
- ;this really belongs at the end of makepat, but the short jump can't get there.
- makepat_0:
- mov ax,offset omatch_EOS
- call addset
- cmp di,dx
- jne makepat__0_1
- stc
- ret
- makepat__0_1:
- clc
- ret
- makepat_3:
- cmp al,'^'
- jne makepat_7
- lea ax,[si-1] ;get the buffer pointer.
- cmp ax,inpat_ptr ;are we at the beginning?
- jne makepat_6 ;no - this can't be it.
- mov ax,offset omatch_BOL
- call addset
- jmp makepat_2
- makepat_6:
- mov al,'^'
- call addchar
- jmp makepat_2
- makepat_7:
- cmp al,'$'
- jne makepat_8
- cmp word ptr [si],'\' + '|'*256;is the '$' at the end of an alternation?
- je makepat_7a ;no - not special.
- cmp byte ptr [si],0 ;is the '$' at the end?
- jne makepat_9 ;no - not special.
- makepat_7a:
- mov ax,offset omatch_EOL
- call addset
- jmp makepat_2
- makepat_9:
- call addchar
- jmp makepat_2
- makepat_8:
- cmp al,'['
- jne makepat_10
- call getccl
- jnc makepat_2
- pop di
- stc
- ret
- makepat_10:
- cmp al,'*'
- jne makepat_11
- cmp last_pattern,0 ;is last_pattern>0?
- jnge makepat_12 ;no - not closure.
- mov bx,last_pattern
- mov ax,word ptr [bx]
- cmp ax,offset omatch_CLO ;trying to close a closure?
- je makepat_12 ;yes - not closure.
- cmp ax,offset omatch_BOL ;trying to close a beginning of line?
- je makepat_12 ;yes - not closure.
- call stclos
- mov this_pattern,bx ;remember where this one was.
- jmp makepat_2
- makepat_11:
- ;put more characters here.
- makepat_12:
- call addchar
- jmp makepat_2
- makepat_2:
- mov bx,this_pattern
- mov last_pattern,bx
- jmp makepat_1
-
-
- escaped_char:
- mov cx,offset omatch_NL
- cmp al,"n" ;newline?
- je escaped_1
-
- mov cx,offset omatch_BOB
- cmp al,"`" ;beginning of buffer?
- je escaped_1
-
- mov cx,offset omatch_EOB
- cmp al,"'" ;end of buffer?
- je escaped_1
-
- mov cx,offset omatch_WOR
- cmp al,"b" ;beginning or end of word?
- je escaped_1
-
- mov cx,offset omatch_NWR
- cmp al,"B" ;not beginning nor end of word?
- je escaped_1
-
- mov cx,offset omatch_BOW
- cmp al,"<" ;beginning of word?
- je escaped_1
-
- mov cx,offset omatch_EOW
- cmp al,">" ;end of word?
- je escaped_1
-
- mov cx,offset omatch_ISW
- cmp al,"w" ;word character?
- je escaped_1
-
- mov cx,offset omatch_NOW
- cmp al,"W" ;not word character?
- je escaped_1
-
- cmp al,'|' ;is this an "or" operator?
- jne addchar ;no.
-
- mov inpat_ptr,si ;start a new regexp here...
- call stor ;store a "or" operator.
- ret
- escaped_1:
- mov ax,cx
- call addset
- ret
-
- addchar:
- ;al = CHR to put.
- push ax
- mov ax,which_chr ;use the right omatch_chr.
- call addset
- pop ax
- call addbyte
- ret
-
-
- addset: ;only command chars call addset.
- call addbyte
- xchg ah,al
- call addbyte
- xchg ah,al
- ret
-
-
- addbyte:
- ;al = char to put, di->dest, dx->end of dest.
- cmp di,dx
- je addbyte_1
- mov [di],al
- inc di
- addbyte_1:
- ret
-
-
- stclos:
- ;di->last set added + 1
- ;bx->last closure added
- push di
- stclos_1:
- dec di
- mov al,[di]
- mov [di+2],al
- cmp di,bx
- jne stclos_1
- stclos_2:
- mov word ptr [bx],offset omatch_CLO
- pop di
- add di,2
- ret
-
-
- stor:
- ;di->last set added + 1
- mov bx,last_or
- push di
- stor_1:
- dec di
- mov al,[di]
- mov [di+4],al
- cmp di,bx
- jne stor_1
- stor_2:
- pop di ;get the new last set.
- add di,4
- mov ax,offset omatch_EOS ;store the end of string.
- stosw
- mov word ptr [bx],offset omatch_OR
- mov [bx+2],di ;remember where the next starts.
- mov last_or,bx
- ret
-
-
- getccl:
- ;si -> source (null terminated)
- ;di -> dest, dx -> end of dest
- ;return cy=1 if error.
- lodsb
- cmp al,'^'
- jne getccl_1
- mov ax,offset omatch_NCCL
- call addset
- jmp getccl_2
- getccl_1:
- dec si ;unparse the '^'.
- mov ax,offset omatch_CCL
- call addset
- getccl_2:
- push bx
- mov bx,di
- call addbyte ;leave room for count
- call dodash
- mov ax,di
- sub ax,bx
- dec al
- mov [bx],al
- pop bx
- lodsb
- cmp al,']' ;now make sure that we end in ']'.
- je getccl_3 ;yup, we do.
- dec si ;make si -> the null.
- stc
- ret
- getccl_3:
- clc
- ret
-
-
- dodash:
- ;si -> source pattern (null terminated)
- ;di -> destination pattern
- ;dx -> end of destination pattern
- push bx
- mov bx,si
- dodash_1:
- lodsb
- or al,al
- je dodash_2
- cmp al,']'
- je dodash_2
- cmp al,'-'
- je dodash_4
- call addbyte
- jmp dodash_1
- dodash_4:
- cmp si,bx ;'-' at beginning?
- je dodash_5
- cmp [si].b,0 ;or '-' at end?
- jne dodash_6
- dodash_5:
- mov al,'-' ;if at beginning or at end, just a '-'
- call addbyte
- jmp dodash_1
- dodash_6:
- mov al,[si-2] ;in increasing alphabetic order?
- cmp al,[si]
- ja dodash_5 ;no - forget it.
- call alphanumeric ;left char alphanumeric?
- jnc dodash_5 ;no - forget it.
- mov al,[si]
- call alphanumeric ;right char alphanumeric?
- jnc dodash_5 ;no - forget it.
- mov al,[si-2]
- dodash_7:
- inc al ;pre-increment -- the first one's there.
- cmp al,[si]
- ja dodash_9
- call addbyte
- jmp dodash_7
- dodash_9:
- inc si
- jmp dodash_1
- dodash_2:
- dec si
- pop bx
- ret
-
-
- alphanumeric:
- ;return cy=1 if al is alphanumeric
- cmp al,'0'
- jb alphanumeric_1
- cmp al,'9'
- jbe alphanumeric_2
- cmp al,'A'
- jb alphanumeric_1
- cmp al,'Z'
- jbe alphanumeric_2
- cmp al,'a'
- jb alphanumeric_1
- cmp al,'z'
- jbe alphanumeric_2
- alphanumeric_1:
- clc
- ret
- alphanumeric_2:
- stc
- ret
-
-
- init_case_table:
- push bx
- mov init_case,offset init_case_2
- mov bx,0
- init_case_0:
- mov case_ignore_table[bx],bl
- inc bl
- jne init_case_0
- ;now translate 'a' to 'A'.
- mov bx,'a'
- init_case_1:
- mov al,bl
- sub al,20h
- mov case_ignore_table[bx],al
- inc bx
- cmp bx,'z'
- jbe init_case_1
- pop bx
- init_case_2:
- ret
-
-
- code ends
-
- end
-
-