Computerworld 1996 March

home *** CD-ROM | disk | FTP | other *** search

/ Computerworld 1996 March / Computerworld_1996-03_cd.bin / idg_cd3 / grafika / fraktaly / frasr192 / bignuma.asm < prev next >

Wrap

Assembly Source File | 1995-02-03 | 88KB | 2,476 lines

; bignuma.asm - asm routines for bignumbers ; far pointer version ; Wesley Loewer's Big Numbers. (C) 1994, Wesley B. Loewer ; IMPORTANT!!! The following routines assume that all the bignumber arrays ; were allocated from the same far segment. This allows the routines to ; switch the DS register to that segment and treat everything then as a ; near pointer. If that array is not in this segment, then copy it into ; an array that is. ; See BIGNUMC.C for further documentation. .MODEL medium, c ifdef ??version ; for TASM ideal TYPEDEF bn_t far ptr byte masm masm51 quirks ; invoke w/ one arg invoke macro a,b call a, b endm ; invoke with 2 args invoke2 macro a,b,c call a, b, c endm extern equ extrn else bn_t TYPEDEF far ptr byte ; far pointer to bignumber array endif .DATA extern cpu:word extern bnlength:word, rlength:word; .CODE .8086 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = 0 clear_bn PROC USES di, r:bn_t les di, r ; load pointer in es:di for stos mov cx, bnlength cmp cpu, 386 ; check cpu je short use_32_bit ; use faster 32 bit code if possible sub ax, ax ; clear ax shr cx, 1 ; 1 byte = 1/2 word rep stosw ; clear r, word at a time jmp bottom use_32_bit: .386 sub eax, eax ; clear eax shr cx, 2 ; 1 byte = 1/4 word rep stosd ; clear r, dword at a time bottom: .8086 mov dx, es ; return r in dx:ax mov ax, word ptr r ret clear_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = max positive value max_bn PROC USES di, r:bn_t les di, r ; load pointer in es:di for stos mov cx, bnlength cmp cpu, 386 ; check cpu je short use_32_bit ; use faster 32 bit code if possible mov ax, 0FFFFh ; set ax to max value shr cx, 1 ; 1 byte = 1/2 word rep stosw ; max out r, word at a time jmp bottom use_32_bit: .386 mov eax, 0FFFFFFFFh ; set eax to max value shr cx, 2 ; 1 byte = 1/4 word rep stosd ; max out r, dword at a time bottom: .8086 ; when the above stos is finished, di points to the byte past the end dec di ; find sign bit in msb mov byte ptr es:[di], 7Fh ; turn off the sign bit mov dx, es ; return r in dx:ax mov ax, word ptr r ret max_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n copy_bn PROC USES di si, r:bn_t, n:bn_t les di, r ; load pointer in es:di for movs mov ax, ds ; save ds for later mov cx, bnlength cmp cpu, 386 ; check cpu je short use_32_bit ; use faster 32 bit code if possible lds si, n ; load pointer in ds:si for movs shr cx, 1 ; 1 byte = 1/2 word rep movsw ; copy word at a time jmp bottom use_32_bit: .386 lds si, n ; load pointer in ds:si for movs shr cx, 2 ; 1 byte = 1/4 word rep movsd ; copy dword at a time bottom: .8086 mov ds, ax ; restore ds mov dx, es ; return r in dx:ax mov ax, word ptr r ret copy_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; n1 != n2 ? ; RETURNS: if n1 == n2 returns 0 ; if n1 > n2 returns a positive (steps left to go when mismatch occured) ; if n1 < n2 returns a negative (steps left to go when mismatch occured) cmp_bn PROC USES di, n1:bn_t, n2:bn_t mov cx, bnlength push ds ; save DS mov di, word ptr n2 lds bx, n1 ; load pointers add bx, cx ; point to end of bignumbers add di, cx ; where the msb is cmp cpu, 386 ; check cpu je short use_32_bit ; use faster 32 bit code if possible shr cx, 1 ; byte = 1/2 word mov dx, cx ; keep a copy of cx top_loop_16: sub bx, 2 ; decrement to previous word sub di, 2 mov ax, [bx] ; load n1 cmp ax, [di] ; compare to n2 jne bottom ; don't match loop top_loop_16 jmp match use_32_bit: .386 shr cx, 2 ; byte = 1/4 dword mov dx, cx ; keep a copy of cx top_loop_32: sub bx, 4 ; decrement to previous dword sub di, 4 mov eax, [bx] ; load n1 cmp eax, [di] ; compare to n2 jne bottom ; don't match loop top_loop_32 jmp match bottom: .8086 ; flags are still set from last cmp ; if cx == dx, then most significant part didn't match, use signed comparison ; else the decimals didn't match, use unsigned comparison lahf ; load results of last cmp cmp cx, dx ; did they differ on very first cmp jne not_first_step ; no sahf ; yes jg n1_bigger ; signed comparison jmp n2_bigger not_first_step: sahf ja n1_bigger ; unsigned comparison n2_bigger: neg cx ; make it negative n1_bigger: ; leave it positive match: ; leave it zero mov ax, cx pop ds ; restore DS ret cmp_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r < 0 ? ; returns 1 if negative, 0 if positive or zero is_bn_neg PROC n:bn_t ; for a one-pass routine like this, don't bother with ds les bx, n ; find sign bit add bx, bnlength dec bx ; here it is mov al, es:[bx] ; got it and al, 80h ; check the sign bit rol al, 1 ; rotate sign big to bit 0 sub ah, ah ; clear upper ax ret is_bn_neg ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; n != 0 ? ; RETURNS: if n != 0 returns 1 ; else returns 0 is_bn_not_zero PROC n:bn_t mov cx, bnlength mov ax, ds ; save DS lds bx, n ; load pointers with DS cmp cpu, 386 ; check cpu je short use_32_bit ; use faster 32 bit code if possible shr cx, 1 ; byte = 1/2 word top_loop_16: cmp word ptr [bx], 0 ; compare to n to 0 jnz bottom ; not zero add bx, 2 ; increment to next word loop top_loop_16 jmp bottom use_32_bit: .386 shr cx, 2 ; byte = 1/4 dword top_loop_32: cmp dword ptr [bx], 0 ; compare to n to 0 jnz bottom ; not zero add bx, 4 ; increment to next dword loop top_loop_32 jmp bottom bottom: .8086 mov ds, ax ; restore DS ; if cx is zero, then n was zero mov ax, cx ret is_bn_not_zero ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n1 + n2 add_bn PROC USES di si, r:bn_t, n1:bn_t, n2:bn_t mov cx, bnlength push ds ; save ds mov di, word ptr n1 ; load pointers ds:di mov si, word ptr n2 ; ds:si lds bx, r ; ds:bx cmp cpu, 386 ; check cpu je short use_32_bit ; use faster 32 bit code if possible shr cx, 1 ; byte = 1/2 word clc ; clear carry flag top_loop_16: mov ax, [di] ; n1 adc ax, [si] ; n1+n2 mov [bx], ax ; r = n1+n2 ; inc does not change carry flag inc di ; add di, 2 inc di inc si ; add si, 2 inc si inc bx ; add bx, 2 inc bx loop top_loop_16 jmp short bottom use_32_bit: .386 shr cx, 2 ; byte = 1/4 double word clc ; clear carry flag top_loop_32: mov eax, [di] ; n1 adc eax, [si] ; n1+n2 mov [bx], eax ; r = n1+n2 lahf ; save carry flag add di, 4 ; increment by double word size add si, 4 add bx, 4 sahf ; restore carry flag loop top_loop_32 bottom: .8086 mov dx, ds ; return r in dx:ax pop ds ; restore ds mov ax, word ptr r ret add_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r += n add_a_bn PROC USES di, r:bn_t, n:bn_t mov cx, bnlength push ds mov di, word ptr n ; load pointers ds:di lds bx, r ; ds:bx cmp cpu, 386 ; check cpu je short use_32_bit ; use faster 32 bit code if possible shr cx, 1 ; byte = 1/2 word clc ; clear carry flag top_loop_16: mov ax, [di] ; n adc [bx], ax ; r += n ; inc does not change carry flag inc di ; add di, 2 inc di inc bx ; add di, 2 inc bx loop top_loop_16 jmp short bottom use_32_bit: .386 shr cx, 2 ; byte = 1/4 double word clc ; clear carry flag top_loop_32: mov eax, [di] ; n adc [bx], eax ; r += n lahf ; save carry flag add di, 4 ; increment by double word size add bx, 4 sahf ; restore carry flag loop top_loop_32 bottom: .8086 mov dx, ds ; return r in dx:ax pop ds ; restore ds mov ax, word ptr r ret add_a_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n1 - n2 sub_bn PROC USES di si, r:bn_t, n1:bn_t, n2:bn_t mov cx, bnlength push ds mov di, word ptr n1 ; load pointers ds:di mov si, word ptr n2 ; ds:si lds bx, r ; ds:bx cmp cpu, 386 ; check cpu je short use_32_bit ; use faster 32 bit code if possible shr cx, 1 ; byte = 1/2 word clc ; clear carry flag top_loop_16: mov ax, [di] ; n1 sbb ax, [si] ; n1-n2 mov [bx], ax ; r = n1-n2 ; inc does not change carry flag inc di ; add di, 2 inc di inc si ; add si, 2 inc si inc bx ; add bx, 2 inc bx loop top_loop_16 jmp short bottom use_32_bit: .386 shr cx, 2 ; byte = 1/4 double word clc ; clear carry flag top_loop_32: mov eax, [di] ; n1 sbb eax, [si] ; n1-n2 mov [bx], eax ; r = n1-n2 lahf ; save carry flag add di, 4 ; increment by double word size add si, 4 add bx, 4 sahf ; restore carry flag loop top_loop_32 bottom: .8086 mov dx, ds ; return r in dx:ax pop ds ; restore ds mov ax, word ptr r ret sub_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r -= n sub_a_bn PROC USES di, r:bn_t, n:bn_t mov cx, bnlength push ds mov di, word ptr n ; load pointers ds:di lds bx, r ; ds:bx cmp cpu, 386 ; check cpu je short use_32_bit ; use faster 32 bit code if possible shr cx, 1 ; byte = 1/2 word clc ; clear carry flag top_loop_16: mov ax, [di] ; n sbb [bx], ax ; r -= n ; inc does not change carry flag inc di ; add di, 2 inc di inc bx ; add di, 2 inc bx loop top_loop_16 jmp short bottom use_32_bit: .386 shr cx, 2 ; byte = 1/4 double word clc ; clear carry flag top_loop_32: mov eax, [di] ; n sbb [bx], eax ; r -= n lahf ; save carry flag add di, 4 ; increment by double word size add bx, 4 sahf ; restore carry flag loop top_loop_32 bottom: .8086 mov dx, ds ; return r in dx:ax pop ds ; restore ds mov ax, word ptr r ret sub_a_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = -n neg_bn PROC USES di, r:bn_t, n:bn_t mov cx, bnlength push ds mov di, word ptr n ; load pointers ds:di lds bx, r ; ds:bx cmp cpu, 386 je short use_32_bit ; use faster 32 bit code if possible shr cx, 1 ; byte = 1/2 word top_loop_16: mov ax, [di] neg ax mov [bx], ax jc short no_more_carry_16 ; notice the "reverse" logic here add di, 2 ; increment by word size add bx, 2 loop top_loop_16 jmp short bottom no_more_carry_16: add di, 2 add bx, 2 loop top_loop_no_more_carry_16 ; jump down jmp short bottom top_loop_no_more_carry_16: mov ax, [di] not ax mov [bx], ax add di, 2 add bx, 2 loop top_loop_no_more_carry_16 jmp short bottom use_32_bit: .386 shr cx, 2 ; byte = 1/4 dword top_loop_32: mov eax, [di] neg eax mov [bx], eax jc short no_more_carry_32 ; notice the "reverse" logic here add di, 4 ; increment by double word size add bx, 4 loop top_loop_32 jmp short bottom no_more_carry_32: add di, 4 ; increment by double word size add bx, 4 loop top_loop_no_more_carry_32 ; jump down jmp short bottom top_loop_no_more_carry_32: mov eax, [di] not eax mov [bx], eax add di, 4 ; increment by double word size add bx, 4 loop top_loop_no_more_carry_32 bottom: .8086 mov dx, ds ; return r in dx:ax pop ds ; restore ds mov ax, word ptr r ret neg_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r *= -1 neg_a_bn PROC r:bn_t mov cx, bnlength push ds lds bx, r ; ds:bx cmp cpu, 386 je short use_32_bit ; use faster 32 bit code if possible shr cx, 1 ; byte = 1/2 word top_loop_16: neg word ptr [bx] jc short no_more_carry_16 ; notice the "reverse" logic here add bx, 2 loop top_loop_16 jmp short bottom no_more_carry_16: add bx, 2 loop top_loop_no_more_carry_16 ; jump down jmp short bottom top_loop_no_more_carry_16: not word ptr [bx] add bx, 2 loop top_loop_no_more_carry_16 jmp short bottom use_32_bit: .386 shr cx, 2 ; byte = 1/4 dword top_loop_32: neg dword ptr [bx] jc short no_more_carry_32 ; notice the "reverse" logic here add bx, 4 loop top_loop_32 jmp short bottom no_more_carry_32: add bx, 4 loop top_loop_no_more_carry_32 ; jump down jmp short bottom top_loop_no_more_carry_32: not dword ptr [bx] add bx, 4 loop top_loop_no_more_carry_32 bottom: .8086 mov dx, ds ; return r in dx:ax pop ds ; restore ds mov ax, word ptr r ret neg_a_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = 2*n double_bn PROC USES di, r:bn_t, n:bn_t mov cx, bnlength push ds mov di, word ptr n ; load pointers ds:di lds bx, r ; ds:bx cmp cpu, 386 je short use_32_bit ; use faster 32 bit code if possible shr cx, 1 ; byte = 1/2 word clc top_loop_16: mov ax, [di] rcl ax, 1 ; rotate with carry left mov [bx], ax ; inc does not change carry flag inc di ; add di, 2 inc di inc bx ; add bx, 2 inc bx loop top_loop_16 jmp short bottom use_32_bit: .386 shr cx, 2 ; byte = 1/4 dword clc ; clear carry flag top_loop_32: mov eax, [di] rcl eax, 1 ; rotate with carry left mov [bx], eax lahf ; save carry flag add di, 4 ; increment by double word size add bx, 4 sahf ; restore carry flag loop top_loop_32 bottom: .8086 mov dx, ds ; return r in dx:ax pop ds ; restore ds mov ax, word ptr r ret double_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r *= 2 double_a_bn PROC r:bn_t mov cx, bnlength push ds lds bx, r ; ds:bx cmp cpu, 386 je short use_32_bit ; use faster 32 bit code if possible shr cx, 1 ; byte = 1/2 word clc top_loop_16: rcl word ptr [bx], 1 ; rotate with carry left ; inc does not change carry flag inc bx ; add bx, 2 inc bx loop top_loop_16 jmp short bottom use_32_bit: .386 shr cx, 2 ; byte = 1/4 dword clc ; clear carry flag top_loop_32: rcl dword ptr [bx], 1 ; rotate with carry left lahf ; save carry flag add bx, 4 sahf ; restore carry flag loop top_loop_32 bottom: .8086 mov dx, ds ; return r in dx:ax pop ds ; restore ds mov ax, word ptr r ret double_a_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n/2 half_bn PROC USES di, r:bn_t, n:bn_t mov cx, bnlength push ds mov di, word ptr n ; load pointers ds:di lds bx, r ; ds:bx add di, cx ; start with msb add bx, cx cmp cpu, 386 je short use_32_bit ; use faster 32 bit code if possible shr cx, 1 ; byte = 1/2 word ; handle the first step with sar, the rest with rcr sub di, 2 sub bx, 2 mov ax, [di] sar ax, 1 ; shift arithmetic right mov [bx], ax loop top_loop_16 jmp short bottom top_loop_16: ; inc does not change carry flag dec di ; sub di, 2 dec di dec bx ; sub bx, 2 dec bx mov ax, [di] rcr ax, 1 ; rotate with carry right mov [bx], ax loop top_loop_16 jmp short bottom use_32_bit: .386 shr cx, 2 ; byte = 1/4 dword sub di, 4 ; decrement by double word size sub bx, 4 mov eax, [di] sar eax, 1 ; shift arithmetic right mov [bx], eax loop top_loop_32 jmp short bottom top_loop_32: lahf ; save carry flag sub di, 4 ; decrement by double word size sub bx, 4 sahf ; restore carry flag mov eax, [di] rcr eax, 1 ; rotate with carry right mov [bx], eax loop top_loop_32 bottom: .8086 mov dx, ds ; return r in dx:ax pop ds ; restore ds mov ax, word ptr r ret half_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r /= 2 half_a_bn PROC r:bn_t mov cx, bnlength push ds lds bx, r ; ds:bx add bx, cx cmp cpu, 386 je short use_32_bit ; use faster 32 bit code if possible shr cx, 1 ; byte = 1/2 word ; handle the first step with sar, the rest with rcr sub bx, 2 sar word ptr [bx], 1 ; shift arithmetic right loop top_loop_16 jmp short bottom top_loop_16: ; inc does not change carry flag dec bx ; sub bx, 2 dec bx rcr word ptr [bx], 1 ; rotate with carry right loop top_loop_16 jmp short bottom use_32_bit: .386 shr cx, 2 ; byte = 1/4 dword sub bx, 4 ; decrement by double word size sar dword ptr [bx], 1 ; shift arithmetic right loop top_loop_32 jmp short bottom top_loop_32: lahf ; save carry flag sub di, 4 ; decrement by double word size sub bx, 4 sahf ; restore carry flag rcr dword ptr [bx], 1 ; rotate with carry right loop top_loop_32 bottom: .8086 mov dx, ds ; return r in dx:ax pop ds ; restore ds mov ax, word ptr r ret half_a_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n1 * n2 ; Note: r will be a double wide result, 2*bnlength ; n1 and n2 can be the same pointer ; SIDE-EFFECTS: n1 and n2 are changed to their absolute values ; unsafe_full_mult_bn PROC USES di si, r:bn_t, n1:bn_t, n2:bn_t LOCAL sign1:byte, sign2:byte, samevar:byte, \ i:word, j:word, steps:word, doublesteps:word, carry_steps:word, \ n1p: ptr byte, n2p: ptr byte ; This routine uses near variables and far pointers together in the ; inner-most loops. Therefore, there is little to gain by setting DS ; to the far segment. Instead, set ES to the far segment and use segment ; overrides where necessary. ; By forcing the bignumber to be positive and keeping track of the sign ; bits separately, quite a few multiplies are saved. ; set the ES register les bx, n1 ; check for sign bits add bx, bnlength dec bx mov al, es:[bx] and al, 80h ; check the sign bit mov sign1, al jz already_pos1 invoke neg_a_bn, n1 already_pos1: ; Test to see if n1 and n2 are the same variable. It would be better to ; use square_bn(), but it could happen. mov samevar, 1 ; assume they are the same mov bx, word ptr n2 cmp bx, word ptr n1 ; are they the same? je already_pos2 ; if so, it has already been negated mov samevar, 0 ; they weren't the same after all add bx, bnlength dec bx mov al, es:[bx] and al, 80h ; check the sign bit mov sign2, al jz already_pos2 invoke neg_a_bn, n2 already_pos2: ; in the following loops, the following pointers are used ; n1p, n2p = points to the part of n1, n2 being used ; di = points to part of doublebignumber used in outer loop ; si = points to part of doublebignumber used in inner loop ; bx = points to part of doublebignumber for carry flag loop cmp cpu, 386 ; check cpu je use_32_bit ; use faster 32 bit code if possible mov ax, word ptr n1 ; load pointers mov n1p, ax mov dx, bnlength ; set outer loop counter shr dx, 1 ; byte = 1/2 word mov steps, dx ; save in steps mov i, dx shl dx, 1 ; double steps sub ax, ax ; clear ax mov cx, dx ; size of doublebignumber (r) in words mov di, word ptr r ; es already set rep stosw ; initialize r to 0 sub dx, 2 ; only 2*s-2 steps are really needed mov doublesteps, dx mov carry_steps, dx mov di, word ptr r mov si, di ; both si and di are used here top_outer_loop_16: mov ax, word ptr n2 ; set n2p pointer mov n2p, ax mov ax, steps ; set inner loop counter mov j, ax top_inner_loop_16: mov bx, n1p mov ax, es:[bx] mov bx, n2p mul word ptr es:[bx] mov bx, si add es:[bx], ax ; add low word inc bx ; increase by size of word inc bx adc es:[bx], dx ; add high word jnc no_more_carry_16 ; carry loop not necessary mov cx, carry_steps ; how many till end of double big number jcxz no_more_carry_16 add bx, 2 ; move pointer to next word ; loop until no more carry or until end of double big number top_carry_loop_16: add word ptr es:[bx], 1 ; use add, not inc jnc no_more_carry_16 add bx, 2 ; increase by size of word loop top_carry_loop_16 no_more_carry_16: add n2p, 2 ; increase by word size add si, 2 dec carry_steps ; use one less step dec j ja top_inner_loop_16 add n1p, 2 ; increase by word size add di, 2 mov si, di ; start with si=di dec doublesteps ; reduce the carry steps needed mov ax, doublesteps mov carry_steps, ax dec i ja top_outer_loop_16 ; result is now r, a double wide bignumber jmp bottom use_32_bit: .386 mov ax, word ptr n1 ; load pointers mov n1p, ax mov dx, bnlength ; set outer loop counter shr dx, 2 ; byte = 1/4 dword mov steps, dx ; save in steps mov i, dx shl dx, 1 ; double steps sub eax, eax ; clear eax mov cx, dx ; size of doublebignumber in dwords mov di, word ptr r ; es already set rep stosd ; initialize r to 0 sub dx, 2 ; only 2*s-2 steps are really needed mov doublesteps, dx mov carry_steps, dx mov di, word ptr r mov si, di ; both si and di are used here top_outer_loop_32: mov ax, word ptr n2 ; set n2p pointer mov n2p, ax mov ax, steps ; set inner loop counter mov j, ax top_inner_loop_32: mov bx, n1p mov eax, es:[bx] mov bx, n2p mul dword ptr es:[bx] mov bx, si add es:[bx], eax ; add low dword lahf ; save carry flag add bx, 4 ; increase by size of dword sahf ; restor carry flag adc es:[bx], edx ; add high dword jnc no_more_carry_32 ; carry loop not necessary mov cx, carry_steps ; how many till end of double big number jcxz no_more_carry_32 add bx, 4 ; move pointer to next dword ; loop until no more carry or until end of double big number top_carry_loop_32: add dword ptr es:[bx], 1 ; use add, not inc jnc no_more_carry_32 add bx, 4 ; increase by size of dword loop top_carry_loop_32 no_more_carry_32: add n2p, 4 ; increase by dword size add si, 4 dec carry_steps ; use one less step dec j ja top_inner_loop_32 add n1p, 4 ; increase by dword size add di, 4 mov si, di ; start with si=di dec doublesteps ; reduce the carry steps needed mov ax, doublesteps mov carry_steps, ax dec i ja top_outer_loop_32 ; result is now r, a double wide bignumber bottom: .8086 cmp samevar, 1 ; were the variable the same ones? je pos_answer ; if yes, then jump mov al, sign1 ; is result + or - ? cmp al, sign2 ; sign(n1) == sign(n2) ? je pos_answer ; yes push bnlength ; save bnlength shl bnlength, 1 ; temporarily double bnlength ; for double wide bignumber invoke neg_a_bn, r ; does not affect ES pop bnlength ; restore bnlength pos_answer: mov dx, es ; return r in dx:ax mov ax, word ptr r ret unsafe_full_mult_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n1 * n2 calculating only the top rlength bytes ; Note: r will be of length rlength ; 2*bnlength <= rlength < bnlength ; n1 and n2 can be the same pointer ; SIDE-EFFECTS: n1 and n2 are changed to their absolute values ; unsafe_mult_bn PROC USES di si, r:bn_t, n1:bn_t, n2:bn_t LOCAL sign1:byte, sign2:byte, samevar:byte, \ i:word, j:word, steps:word, doublesteps:word, \ carry_steps:word, skips:word, \ n1p: ptr byte, n2p: ptr byte ; By forcing the bignumber to be positive and keeping track of the sign ; bits separately, quite a few multiplies are saved. ; set the ES register here les bx, n1 ; check for sign bits add bx, bnlength dec bx mov al, es:[bx] and al, 80h ; check the sign bit mov sign1, al jz already_pos1 invoke neg_a_bn, n1 already_pos1: ; Test to see if n1 and n2 are the same variable. It would be better to ; use square_bn(), but it could happen. mov samevar, 1 ; assume they are the same mov bx, word ptr n2 cmp bx, word ptr n1 ; are they the same? je already_pos2 ; if so, it has already been negated mov samevar, 0 ; they weren't the same after all add bx, bnlength dec bx mov al, es:[bx] and al, 80h ; check the sign bit mov sign2, al jz already_pos2 invoke neg_a_bn, n2 already_pos2: mov ax, word ptr n1 ; load pointers mov n1p, ax mov ax, bnlength shl ax, 1 ; 2*bnlength sub ax, rlength ; 2*bnlength-rlength add word ptr n2, ax ; n2 = n2+2*bnlength-rlength ; in the following loops, the following pointers are used ; n1p, n2p = points to the part of n1, n2 being used ; di = points to part of doublebignumber used in outer loop ; si = points to part of doublebignumber used in inner loop ; bx = points to part of doublebignumber for carry flag loop cmp cpu, 386 ; check cpu je use_32_bit ; use faster 32 bit code if possible sub ax, ax ; clear ax mov cx, rlength ; size of r in bytes shr cx, 1 ; byte = 1/2 word mov di, word ptr r ; es already set rep stosw ; initialize r to 0 mov ax, rlength ; set steps for first loop sub ax, bnlength shr ax, 1 ; byte = 1/2 word mov steps, ax ; save in steps mov ax, bnlength shr ax, 1 ; byte = 1/2 word mov i, ax sub ax, steps mov skips, ax ; how long to skip over pointer shifts mov ax, rlength ; set steps for first loop shr ax, 1 ; byte = 1/2 word sub ax, 2 ; only rlength/2-2 steps are really needed mov doublesteps, ax mov carry_steps, ax mov di, word ptr r ; this is r mov si, di ; both si and di are used here top_outer_loop_16: mov ax, word ptr n2 ; set n2p pointer mov n2p, ax mov ax, steps ; set inner loop counter mov j, ax top_inner_loop_16: mov bx, n1p mov ax, es:[bx] mov bx, n2p mul word ptr es:[bx] mov bx, si add es:[bx], ax ; add low word inc bx ; increase by size of word inc bx adc es:[bx], dx ; add high word jnc no_more_carry_16 ; carry loop not necessary mov cx, carry_steps ; how many till end of double big number jcxz no_more_carry_16 add bx, 2 ; move pointer to next word ; loop until no more carry or until end of double big number top_carry_loop_16: add word ptr es:[bx], 1 ; use add, not inc jnc no_more_carry_16 add bx, 2 ; increase by size of word loop top_carry_loop_16 no_more_carry_16: add n2p, 2 ; increase by word size add si, 2 dec carry_steps ; use one less step dec j ja top_inner_loop_16 add n1p, 2 ; increase by word size cmp skips, 0 je type2_shifts_16 sub word ptr n2, 2 ; shift n2 back a word inc steps ; one more step this time ; leave di and doublesteps where they are dec skips ; keep track of how many times we've done this jmp shifts_bottom_16 type2_shifts_16: add di, 2 ; shift di forward a word dec doublesteps ; reduce the carry steps needed shifts_bottom_16: mov si, di ; start with si=di mov ax, doublesteps mov carry_steps, ax dec i ja top_outer_loop_16 ; result is in r jmp bottom use_32_bit: .386 sub eax, eax ; clear eax mov cx, rlength ; size of r in bytes shr cx, 2 ; byte = 1/4 dword mov di, word ptr r ; es already set rep stosd ; initialize r to 0 mov ax, rlength ; set steps for first loop sub ax, bnlength shr ax, 2 ; byte = 1/4 dword mov steps, ax ; save in steps mov ax, bnlength shr ax, 2 ; byte = 1/4 dword mov i, ax sub ax, steps mov skips, ax ; how long to skip over pointer shifts mov ax, rlength ; set steps for first loop shr ax, 2 ; byte = 1/4 dword sub ax, 2 ; only rlength/4-2 steps are really needed mov doublesteps, ax mov carry_steps, ax mov di, word ptr r ; this is r mov si, di ; both si and di are used here top_outer_loop_32: mov ax, word ptr n2 ; set n2p pointer mov n2p, ax mov ax, steps ; set inner loop counter mov j, ax top_inner_loop_32: mov bx, n1p mov eax, es:[bx] mov bx, n2p mul dword ptr es:[bx] mov bx, si add es:[bx], eax ; add low dword lahf ; save carry flag add bx, 4 ; increase by size of dword sahf ; restor carry flag adc es:[bx], edx ; add high dword jnc no_more_carry_32 ; carry loop not necessary mov cx, carry_steps ; how many till end of double big number jcxz no_more_carry_32 add bx, 4 ; move pointer to next dword ; loop until no more carry or until end of r top_carry_loop_32: add dword ptr es:[bx], 1 ; use add, not inc jnc no_more_carry_32 add bx, 4 ; increase by size of dword loop top_carry_loop_32 no_more_carry_32: add n2p, 4 ; increase by dword size add si, 4 dec carry_steps ; use one less step dec j ja top_inner_loop_32 add n1p, 4 ; increase by dword size cmp skips, 0 je type2_shifts_32 sub word ptr n2, 4 ; shift n2 back a dword inc steps ; one more step this time ; leave di and doublesteps where they are dec skips ; keep track of how many times we've done this jmp shifts_bottom_32 type2_shifts_32: add di, 4 ; shift di forward a dword dec doublesteps ; reduce the carry steps needed shifts_bottom_32: mov si, di ; start with si=di mov ax, doublesteps mov carry_steps, ax dec i ja top_outer_loop_32 ; result is in r bottom: .8086 cmp samevar, 1 ; were the variable the same ones? je pos_answer ; if yes, then jump mov al, sign1 ; is result + or - ? cmp al, sign2 ; sign(n1) == sign(n2) ? je pos_answer ; yes push bnlength ; save bnlength mov ax, rlength mov bnlength, ax ; set bnlength = rlength invoke neg_a_bn, r ; does not affect ES pop bnlength ; restore bnlength pos_answer: mov dx, es ; return r in dx:ax mov ax, word ptr r ret unsafe_mult_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n^2 ; because of the symetry involved, n^2 is much faster than n*n ; for a bignumber of length l ; n*n takes l^2 multiplications ; n^2 takes (l^2+l)/2 multiplications ; which is about 1/2 n*n as l gets large ; uses the fact that (a+b+c+...)^2 = (a^2+b^2+c^2+...)+2(ab+ac+bc+...) ; ; Note: r will be a double wide result, 2*bnlength ; SIDE-EFFECTS: n is changed to its absolute value ; unsafe_full_square_bn PROC USES di si, r:bn_t, n:bn_t LOCAL i:word, j:word, steps:word, doublesteps:word, carry_steps:word, \ rp1: ptr byte, rp2: ptr byte ; This routine uses near variables and far pointers together in the ; inner-most loops. Therefore, there is little to gain by setting DS ; to the far segment. Instead, set ES to the far segment and use segment ; overrides where necessary. ; By forcing the bignumber to be positive and keeping track of the sign ; bits separately, quite a few multiplies are saved. ; load ES here les bx, n ; check for sign bit add bx, bnlength dec bx mov al, es:[bx] and al, 80h ; check the sign bit jz already_pos invoke neg_a_bn, n already_pos: ; in the following loops, the following pointers are used ; n1p(di), n2p(si) = points to the parts of n being used ; rp1 = points to part of doublebignumber used in outer loop ; rp2 = points to part of doublebignumber used in inner loop ; bx = points to part of doublebignumber for carry flag loop mov cx, bnlength ; size of doublebignumber in words cmp cpu, 386 ; check cpu je use_32_bit ; use faster 32 bit code if possible sub ax, ax ; clear ax ; 2{twice the size}*bnlength/2{bytes per word} mov di, word ptr r ; es already set rep stosw ; initialize r to 0 mov dx, bnlength ; set outer loop counter shr dx, 1 ; byte = 1/2 word dec dx ; don't need to do last one mov i, dx ; loop counter mov steps, dx ; save in steps shl dx, 1 ; double steps sub dx, 1 ; only 2*s-1 steps are really needed mov doublesteps, dx mov carry_steps, dx mov ax, word ptr r add ax, 2 ; start with second word mov rp1, ax mov rp2, ax ; start with rp2=rp1 mov di, word ptr n ; load n1p pointer cmp i, 0 ; if bignumberlength is 2 je skip_middle_terms_16 top_outer_loop_16: mov si, di ; set n2p pointer add si, 2 ; to 1 word beyond n1p(di) mov ax, steps ; set inner loop counter mov j, ax top_inner_loop_16: mov ax, es:[di] mul word ptr es:[si] mov bx, rp2 add es:[bx], ax ; add low word inc bx ; increase by size of word inc bx adc es:[bx], dx ; add high word jnc no_more_carry_16 ; carry loop not necessary mov cx, carry_steps ; how many till end of double big number jcxz no_more_carry_16 add bx, 2 ; move pointer to next word ; loop until no more carry or until end of double big number top_carry_loop_16: add word ptr es:[bx], 1 ; use add, not inc jnc no_more_carry_16 add bx, 2 ; increase by size of word loop top_carry_loop_16 no_more_carry_16: add si, 2 ; increase by word size add rp2, 2 dec carry_steps ; use one less step dec j ja top_inner_loop_16 add di, 2 ; increase by word size add rp1, 4 ; increase by 2*word size mov ax, rp1 mov rp2, ax ; start with rp2=rp1 sub doublesteps,2 ; reduce the carry steps needed mov ax, doublesteps mov carry_steps, ax dec steps ; use one less step dec i ja top_outer_loop_16 ; All the middle terms have been multiplied. Now double it. push bnlength shl bnlength, 1 ; r is a double wide bignumber invoke double_a_bn, r pop bnlength skip_middle_terms_16: ; Now go back and add in the squared terms. ; In the following loops, the following pointers are used ; n1p(di) = points to the parts of n being used ; rp1(si) = points to part of doublebignumber used in outer loop ; bx = points to part of doublebignumber for carry flag loop mov di, word ptr n ; load n1p pointer mov dx, bnlength ; set outer loop counter shr dx, 1 ; 1 bytes = 1/2 word mov i, dx ; loop counter shl dx, 1 ; double steps sub dx, 2 ; only 2*s-2 steps are really needed mov doublesteps, dx mov carry_steps, dx mov si, word ptr r ; set rp1 top_outer_loop_squares_16: mov ax, es:[di] mul ax ; square it mov bx, si add es:[bx], ax ; add low word inc bx ; increase by size of word inc bx adc es:[bx], dx ; add high word jnc no_more_carry_squares_16 ; carry loop not necessary mov cx, carry_steps ; how many till end of double big number jcxz no_more_carry_squares_16 add bx, 2 ; move pointer to next word ; loop until no more carry or until end of double big number top_carry_loop_squares_16: add word ptr es:[bx], 1 ; use add, not inc jnc no_more_carry_squares_16 add bx, 2 ; increase by size of word loop top_carry_loop_squares_16 no_more_carry_squares_16: add di, 2 ; increase by word size add si, 4 ; increase by 2*word size sub doublesteps,2 ; reduce the carry steps needed mov ax, doublesteps mov carry_steps, ax dec i ja top_outer_loop_squares_16 ; result is in r, a double wide bignumber jmp bottom use_32_bit: .386 sub eax, eax ; clear eax ; 2{twice the size}*bnlength/4{bytes per word} shr cx, 1 ; size of doublebignumber in dwords mov di, word ptr r ; es already set rep stosd ; initialize r to 0 mov dx, bnlength ; set outer loop counter shr dx, 2 ; byte = 1/4 dword dec dx ; don't need to do last one mov i, dx ; loop counter mov steps, dx ; save in steps shl dx, 1 ; double steps sub dx, 1 ; only 2*s-1 steps are really needed mov doublesteps, dx mov carry_steps, dx mov ax, word ptr r add ax, 4 ; start with second dword mov rp1, ax mov rp2, ax ; start with rp2=rp1 mov di, word ptr n ; load n1p pointer cmp i, 0 ; if bignumberlength is 4 je skip_middle_terms_32 top_outer_loop_32: mov si, di ; set n2p pointer add si, 4 ; to 1 dword beyond n1p(di) mov ax, steps ; set inner loop counter mov j, ax top_inner_loop_32: mov eax, es:[di] mul dword ptr es:[si] mov bx, rp2 add es:[bx], eax ; add low dword lahf add bx, 4 ; increase by size of dword sahf adc es:[bx], edx ; add high dword jnc no_more_carry_32 ; carry loop not necessary mov cx, carry_steps ; how many till end of double big number jcxz no_more_carry_32 add bx, 4 ; move pointer to next dword ; loop until no more carry or until end of double big number top_carry_loop_32: add dword ptr es:[bx], 1 ; use add, not inc jnc no_more_carry_32 add bx, 4 ; increase by size of dword loop top_carry_loop_32 no_more_carry_32: add si, 4 ; increase by dword size add rp2, 4 dec carry_steps ; use one less step dec j ja top_inner_loop_32 add di, 4 ; increase by dword size add rp1, 8 ; increase by 2*dword size mov ax, rp1 mov rp2, ax ; start with rp2=rp1 sub doublesteps,2 ; reduce the carry steps needed mov ax, doublesteps mov carry_steps, ax dec steps ; use one less step dec i ja top_outer_loop_32 ; All the middle terms have been multiplied. Now double it. push bnlength shl bnlength, 1 ; r is a double wide bignumber invoke double_a_bn, r pop bnlength skip_middle_terms_32: ; Now go back and add in the squared terms. ; In the following loops, the following pointers are used ; n1p(di) = points to the parts of n being used ; rp1(si) = points to part of doublebignumber used in outer loop ; bx = points to part of doublebignumber for carry flag loop mov di, word ptr n ; load n1p pointer mov dx, bnlength ; set outer loop counter shr dx, 2 ; 1 bytes = 1/4 dword mov i, dx ; loop counter shl dx, 1 ; double steps sub dx, 2 ; only 2*s-2 steps are really needed mov doublesteps, dx mov carry_steps, dx mov si, word ptr r ; set rp1 top_outer_loop_squares_32: mov eax, es:[di] mul eax ; square it mov bx, si add es:[bx], eax ; add low dword lahf add bx, 4 ; increase by size of dword sahf adc es:[bx], edx ; add high dword jnc no_more_carry_squares_32 ; carry loop not necessary mov cx, carry_steps ; how many till end of double big number jcxz no_more_carry_squares_32 add bx, 4 ; move pointer to next dword ; loop until no more carry or until end of double big number top_carry_loop_squares_32: add dword ptr es:[bx], 1 ; use add, not inc jnc no_more_carry_squares_32 add bx, 4 ; increase by size of dword loop top_carry_loop_squares_32 no_more_carry_squares_32: add di, 4 ; increase by dword size add si, 8 ; increase by 2*dword size sub doublesteps,2 ; reduce the carry steps needed mov ax, doublesteps mov carry_steps, ax dec i ja top_outer_loop_squares_32 ; result is in r, a double wide bignumber bottom: .8086 ; since it is a square, the result has to already be positive mov dx, es ; return r in dx:ax mov ax, word ptr r ret unsafe_full_square_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n^2 ; because of the symetry involved, n^2 is much faster than n*n ; for a bignumber of length l ; n*n takes l^2 multiplications ; n^2 takes (l^2+l)/2 multiplications ; which is about 1/2 n*n as l gets large ; uses the fact that (a+b+c+...)^2 = (a^2+b^2+c^2+...)+2(ab+ac+bc+...) ; ; Note: r will be of length rlength ; 2*bnlength >= rlength > bnlength ; SIDE-EFFECTS: n is changed to its absolute value ; unsafe_square_bn PROC USES di si, r:bn_t, n:bn_t LOCAL i:word, j:word, steps:word, doublesteps:word, carry_steps:word, \ skips:word, rodd:word, \ n3p: ptr byte, \ rp1: ptr byte, rp2: ptr byte ; This whole procedure would be a great deal simpler if we could assume that ; rlength < 2*bnlength (that is, not =). Therefore, we will take the ; easy way out and call full_square_bn() if it is. mov ax, rlength shr ax, 1 ; 1/2 * rlength cmp ax, bnlength ; 1/2 * rlength == bnlength? jne not_full_square ifndef ??version invoke unsafe_full_square_bn, r, n else invoke2 unsafe_full_square_bn, r, n endif ; dx:ax is still loaded with return value jmp quit_proc ; we're outa here not_full_square: ; This routine uses near variables and far pointers together in the ; inner-most loops. Therefore, there is little to gain by setting DS ; to the far segment. Instead, set ES to the far segment and use segment ; overrides where necessary. ; By forcing the bignumber to be positive and keeping track of the sign ; bits separately, quite a few multiplies are saved. ; set ES here les bx, n ; check for sign bit add bx, bnlength dec bx mov al, es:[bx] and al, 80h ; check the sign bit jz already_pos invoke neg_a_bn, n already_pos: ; in the following loops, the following pointers are used ; n1p(di), n2p(si) = points to the parts of n being used ; rp1 = points to part of doublebignumber used in outer loop ; rp2 = points to part of doublebignumber used in inner loop ; bx = points to part of doublebignumber for carry flag loop cmp cpu, 386 ; check cpu je use_32_bit ; use faster 32 bit code if possible sub ax, ax ; clear ax mov cx, rlength ; size of rlength in bytes shr cx, 1 ; byte = 1/2 word mov di, word ptr r ; es already set rep stosw ; initialize r to 0 ; determine whether r is on an odd or even word in the number ; (even if rlength==2*bnlength, dec r alternates odd/even) mov ax, bnlength shl ax, 1 ; double wide width sub ax, rlength ; 2*bnlength-rlength shr ax, 1 ; 1 byte = 1/2 word and ax, 0001h ; check the odd sign bit mov rodd, ax mov ax, bnlength ; set outer loop counter shr ax, 1 ; byte = 1/2 word dec ax ; don't need to do last one mov i, ax ; loop counter mov ax, rlength ; set steps for first loop sub ax, bnlength shr ax, 1 ; byte = 1/2 word mov steps, ax ; save in steps mov dx, bnlength shr dx, 1 ; bnlength/2 add ax, dx ; steps+bnlength/2 sub ax, 2 ; steps+bnlength/2-2 mov doublesteps, ax mov carry_steps, ax mov ax, i sub ax, steps shr ax, 1 ; for both words and dwords mov skips, ax ; how long to skip over pointer shifts mov ax, word ptr r mov rp1, ax mov rp2, ax ; start with rp2=rp1 mov di, word ptr n ; load n1p pointer mov si, di mov ax, bnlength shr ax, 1 ; 1 byte = 1/2 word sub ax, steps shl ax, 1 ; 1 byte = 1/2 word add si, ax ; n2p = n1p + 2*(bnlength/2 - steps) mov n3p, si ; save for later use cmp i, 0 ; if bignumberlength is 2 je skip_middle_terms_16 top_outer_loop_16: mov ax, steps ; set inner loop counter mov j, ax top_inner_loop_16: mov ax, es:[di] mul word ptr es:[si] mov bx, rp2 add es:[bx], ax ; add low word inc bx ; increase by size of word inc bx adc es:[bx], dx ; add high word jnc no_more_carry_16 ; carry loop not necessary mov cx, carry_steps ; how many till end of double big number jcxz no_more_carry_16 add bx, 2 ; move pointer to next word ; loop until no more carry or until end of double big number top_carry_loop_16: add word ptr es:[bx], 1 ; use add, not inc jnc no_more_carry_16 add bx, 2 ; increase by size of word loop top_carry_loop_16 no_more_carry_16: add si, 2 ; increase by word size add rp2, 2 dec carry_steps ; use one less step dec j ja top_inner_loop_16 add di, 2 ; increase by word size mov ax, rodd ; whether r is on an odd or even word cmp skips, 0 jle type2_shifts_16 sub n3p, 2 ; point to previous word mov si, n3p inc steps ; one more step this time ; leave rp1 and doublesteps where they are dec skips jmp shifts_bottom_16 type2_shifts_16: ; only gets executed once jl type3_shifts_16 sub steps, ax ; steps -= (0 or 1) inc ax ; ax = 1 or 2 now sub doublesteps, ax ; decrease double steps by 1 or 2 shl ax, 1 ; 1 byte = 1/2 word add rp1, ax ; add 1 or 2 words mov si, di add si, 2 ; si = di + word dec skips ; make skips negative jmp shifts_bottom_16 type3_shifts_16: dec steps sub doublesteps, 2 add rp1, 4 ; + two words mov si, di add si, 2 ; si = di + word shifts_bottom_16: mov ax, rp1 mov rp2, ax ; start with rp2=rp1 mov ax, doublesteps mov carry_steps, ax dec i ja top_outer_loop_16 ; All the middle terms have been multiplied. Now double it. push bnlength ; save bnlength mov ax, rlength mov bnlength, ax ; r is of length rlength invoke double_a_bn, r pop bnlength skip_middle_terms_16: ; Now go back and add in the squared terms. ; In the following loops, the following pointers are used ; n1p(di) = points to the parts of n being used ; rp1(si) = points to part of doublebignumber used in outer loop ; bx = points to part of doublebignumber for carry flag loop ; be careful, the next dozen or so lines are confusing! ; determine whether r is on an odd or even word in the number mov ax, bnlength shl ax, 1 ; double wide width sub ax, rlength ; 2*bnlength-rlength mov dx, ax ; save this for a moment and ax, 0002h ; check the odd sign bit mov si, word ptr r add si, ax ; depending on odd or even byte shr dx, 1 ; assumes word size inc dx and dx, 0FFFEh ; ~2+1, turn off last bit, mult of 2 mov di, word ptr n add di, dx mov ax, bnlength sub ax, dx shr ax, 1 ; 1 byte = 1/2 word mov i, ax shl ax, 1 ; double steps sub ax, 2 ; only 2*s-2 steps are really needed mov doublesteps, ax mov carry_steps, ax top_outer_loop_squares_16: mov ax, es:[di] mul ax ; square it mov bx, si add es:[bx], ax ; add low word inc bx ; increase by size of word inc bx adc es:[bx], dx ; add high word jnc no_more_carry_squares_16 ; carry loop not necessary mov cx, carry_steps ; how many till end of double big number jcxz no_more_carry_squares_16 add bx, 2 ; move pointer to next word ; loop until no more carry or until end of double big number top_carry_loop_squares_16: add word ptr es:[bx], 1 ; use add, not inc jnc no_more_carry_squares_16 add bx, 2 ; increase by size of word loop top_carry_loop_squares_16 no_more_carry_squares_16: add di, 2 ; increase by word size add si, 4 ; increase by 2*word size sub doublesteps,2 ; reduce the carry steps needed mov ax, doublesteps mov carry_steps, ax dec i ja top_outer_loop_squares_16 ; result is in r jmp bottom use_32_bit: .386 sub eax, eax ; clear eax mov cx, rlength ; size of rlength in bytes shr cx, 2 ; byte = 1/4 dword mov di, word ptr r ; es already set rep stosd ; initialize r to 0 ; determine whether r is on an odd or even dword in the number ; (even if rlength==2*bnlength, dec r alternates odd/even) mov ax, bnlength shl ax, 1 ; double wide width sub ax, rlength ; 2*bnlength-rlength shr ax, 2 ; 1 byte = 1/4 dword and ax, 0001h ; check the odd sign bit mov rodd, ax mov ax, bnlength ; set outer loop counter shr ax, 2 ; byte = 1/4 dword dec ax ; don't need to do last one mov i, ax ; loop counter mov ax, rlength ; set steps for first loop sub ax, bnlength shr ax, 2 ; byte = 1/4 dword mov steps, ax ; save in steps mov dx, bnlength shr dx, 2 ; bnlength/4 add ax, dx ; steps+bnlength/4 sub ax, 2 ; steps+bnlength/4-2 mov doublesteps, ax mov carry_steps, ax mov ax, i sub ax, steps shr ax, 1 ; for both words and dwords mov skips, ax ; how long to skip over pointer shifts mov ax, word ptr r mov rp1, ax mov rp2, ax ; start with rp2=rp1 mov di, word ptr n ; load n1p pointer mov si, di mov ax, bnlength shr ax, 2 ; 1 byte = 1/4 dword sub ax, steps shl ax, 2 ; 1 byte = 1/4 dword add si, ax ; n2p = n1p + bnlength/4 - steps mov n3p, si ; save for later use cmp i, 0 ; if bignumberlength is 2 je skip_middle_terms_32 top_outer_loop_32: mov ax, steps ; set inner loop counter mov j, ax top_inner_loop_32: mov eax, es:[di] mul dword ptr es:[si] mov bx, rp2 add es:[bx], eax ; add low dword lahf ; save carry flag add bx, 4 ; increase by size of dword sahf ; restore carry flag adc es:[bx], edx ; add high dword jnc no_more_carry_32 ; carry loop not necessary mov cx, carry_steps ; how many till end of double big number jcxz no_more_carry_32 add bx, 4 ; move pointer to next dword ; loop until no more carry or until end of double big number top_carry_loop_32: add dword ptr es:[bx], 1 ; use add, not inc jnc no_more_carry_32 add bx, 4 ; increase by size of dword loop top_carry_loop_32 no_more_carry_32: add si, 4 ; increase by dword size add rp2, 4 dec carry_steps ; use one less step dec j ja top_inner_loop_32 add di, 4 ; increase by dword size mov ax, rodd ; whether r is on an odd or even dword cmp skips, 0 jle type2_shifts_32 sub n3p, 4 ; point to previous dword mov si, n3p inc steps ; one more step this time ; leave rp1 and doublesteps where they are dec skips jmp shifts_bottom_32 type2_shifts_32: ; only gets executed once jl type3_shifts_32 sub steps, ax ; steps -= (0 or 1) inc ax ; ax = 1 or 2 now sub doublesteps, ax ; decrease double steps by 1 or 2 shl ax, 2 ; 1 byte = 1/4 dword add rp1, ax ; add 1 or 2 dwords mov si, di add si, 4 ; si = di + dword dec skips ; make skips negative jmp shifts_bottom_32 type3_shifts_32: dec steps sub doublesteps, 2 add rp1, 8 ; + two dwords mov si, di add si, 4 ; si = di + dword shifts_bottom_32: mov ax, rp1 mov rp2, ax ; start with rp2=rp1 mov ax, doublesteps mov carry_steps, ax dec i ja top_outer_loop_32 ; All the middle terms have been multiplied. Now double it. push bnlength ; save bnlength mov ax, rlength mov bnlength, ax ; r is of length rlength invoke double_a_bn, r pop bnlength skip_middle_terms_32: ; Now go back and add in the squared terms. ; In the following loops, the following pointers are used ; n1p(di) = points to the parts of n being used ; rp1(si) = points to part of doublebignumber used in outer loop ; bx = points to part of doublebignumber for carry flag loop ; be careful, the next dozen or so lines are confusing! ; determine whether r is on an odd or even word in the number mov ax, bnlength shl ax, 1 ; double wide width sub ax, rlength ; 2*bnlength-rlength mov dx, ax ; save this for a moment and ax, 0004h ; check the odd sign bit mov si, word ptr r add si, ax ; depending on odd or even byte shr dx, 2 ; assumes dword size inc dx and dx, 0FFFEh ; ~2+1, turn off last bit, mult of 2 shl dx, 1 mov di, word ptr n add di, dx mov ax, bnlength sub ax, dx shr ax, 2 ; 1 byte = 1/4 dword mov i, ax shl ax, 1 ; double steps sub ax, 2 ; only 2*s-2 steps are really needed mov doublesteps, ax mov carry_steps, ax top_outer_loop_squares_32: mov eax, es:[di] mul eax ; square it mov bx, si add es:[bx], eax ; add low dword lahf ; save carry flag add bx, 4 ; increase by size of dword sahf ; restore carry flag adc es:[bx], edx ; add high dword jnc no_more_carry_squares_32 ; carry loop not necessary mov cx, carry_steps ; how many till end of double big number jcxz no_more_carry_squares_32 add bx, 4 ; move pointer to next dword ; loop until no more carry or until end of double big number top_carry_loop_squares_32: add dword ptr es:[bx], 1 ; use add, not inc jnc no_more_carry_squares_32 add bx, 4 ; increase by size of dword loop top_carry_loop_squares_32 no_more_carry_squares_32: add di, 4 ; increase by dword size add si, 8 ; increase by 2*dword size sub doublesteps,2 ; reduce the carry steps needed mov ax, doublesteps mov carry_steps, ax dec i ja top_outer_loop_squares_32 ; result is in r bottom: .8086 ; since it is a square, the result has to already be positive mov dx, es ; return r in dx:ax mov ax, word ptr r quit_proc: ret unsafe_square_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n * u where u is an unsigned integer mult_bn_int PROC USES di si, r:bn_t, n:bn_t, u:word LOCAL lu:dword ; long unsigned integer in 32 bit math mov cx, bnlength ; set outer loop counter mov ax, ds ; mov DS to ES for one segment override mov es, ax mov si, word ptr n ; load pointers ds:si lds di, r ; ds:di cmp cpu, 386 ; check cpu je use_32_bit ; use faster 32 bit code if possible ; no need to clear r shr cx, 1 ; byte = 1/2 word sub bx, bx ; use bx for temp holding carried word top_loop_16: mov ax, [si] ; load next word from n mul es:u ; n * u add ax, bx ; add last carried upper word adc dx, 0 ; inc the carried word if carry flag set mov bx, dx ; save high word in bx mov [di], ax ; save low word add di, 2 ; next word in r add si, 2 ; next word in n loop top_loop_16 jmp bottom use_32_bit: .386 ; no need to clear r shr cx, 2 ; byte = 1/4 dword sub ebx, ebx ; use ebx for temp holding carried dword sub eax, eax ; clear upper eax mov ax, es:u ; convert u (unsigned int) mov es:lu, eax ; to lu (long unsigned int) top_loop_32: mov eax, [si] ; load next dword from n mul es:lu ; n * lu add eax, ebx ; add last carried upper dword adc edx, 0 ; inc the carried dword if carry flag set mov ebx, edx ; save high dword in ebx mov [di], eax ; save low dword add di, 4 ; next dword in r add si, 4 ; next dword in n loop top_loop_32 bottom: .8086 mov dx, ds ; return r in dx:ax mov ax, es ; restore ds mov ds, ax mov ax, word ptr r ret mult_bn_int ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r *= u where u is an unsigned integer mult_a_bn_int PROC USES di si, r:bn_t, u:word mov cx, bnlength ; set outer loop counter mov ax, ds ; save ds mov es, ax mov si, u ; save u in ds:si lds di, r ; load pointer ds:di cmp cpu, 386 ; check cpu je use_32_bit ; use faster 32 bit code if possible ; no need to clear r shr cx, 1 ; byte = 1/2 word sub bx, bx ; use bx for temp holding carried word top_loop_16: mov ax, [di] ; load next word from r mul si ; r * u add ax, bx ; add last carried upper word adc dx, 0 ; inc the carried word if carry flag set mov bx, dx ; save high word in bx mov [di], ax ; save low word add di, 2 ; next word in r loop top_loop_16 jmp bottom use_32_bit: .386 ; no need to clear r shr cx, 2 ; byte = 1/4 dword sub ebx, ebx ; use ebx for temp holding carried dword sub esi, esi ; clear upper esi top_loop_32: mov eax, [di] ; load next dword from r mul esi ; r * u add eax, ebx ; add last carried upper dword adc edx, 0 ; inc the carried dword if carry flag set mov ebx, edx ; save high dword in ebx mov [di], eax ; save low dword add di, 4 ; next dword in r loop top_loop_32 bottom: .8086 mov dx, ds ; return r in dx:ax mov ax, es ; restore ds mov ds, ax mov ax, word ptr r ret mult_a_bn_int ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n / u where u is an unsigned integer unsafe_div_bn_int PROC USES di si, r:bn_t, n:bn_t, u:word LOCAL sign:byte les bx, n ; check for sign bits add bx, bnlength dec bx mov al, es:[bx] and al, 80h ; check the sign bit mov sign, al jz already_pos invoke neg_a_bn, n already_pos: mov cx, bnlength ; set outer loop counter mov ax, ds ; use es for default ds mov es, ax ; past most significant portion of the number mov si, word ptr n ; load pointers ds:si add si, cx lds di, r ; ds:di add di, cx cmp cpu, 386 ; check cpu je use_32_bit ; use faster 32 bit code if possible ; no need to clear r here, values get mov'ed, not add'ed shr cx, 1 ; byte = 1/2 word mov bx, es:u ; need to start with most significant portion of the number sub si, 2 ; most sig word sub di, 2 ; most sig word sub dx, dx ; clear dx register ; for first time through loop top_loop_16: mov ax, [si] ; load next word from n div bx mov [di], ax ; store low word ; leave remainder in dx sub si, 2 ; next word in n sub di, 2 ; next word in r loop top_loop_16 jmp bottom use_32_bit: .386 ; no need to clear r here, values get mov'ed, not add'ed shr cx, 2 ; byte = 1/4 dword sub ebx, ebx ; clear upper word or ebx mov bx, es:u ; need to start with most significant portion of the number sub si, 4 ; most sig dword sub di, 4 ; most sig dword sub edx, edx ; clear edx register ; for first time through loop top_loop_32: mov eax, [si] ; load next dword from n div ebx mov [di], eax ; store low dword ; leave remainder in edx sub si, 4 ; next dword in n sub di, 4 ; next dword in r loop top_loop_32 bottom: .8086 mov ax, es ; swap es & ds mov bx, ds mov ds, ax mov es, bx cmp sign, 0 ; is result + or - ? je pos_answer ; yes invoke neg_a_bn, r ; does not affect ES pos_answer: mov dx, es ; return r in dx:ax mov ax, word ptr r ; ret unsafe_div_bn_int ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r /= u where u is an unsigned integer div_a_bn_int PROC USES di, r:bn_t, u:word LOCAL sign:byte les bx, r ; check for sign bits add bx, bnlength dec bx mov al, es:[bx] and al, 80h ; check the sign bit mov sign, al jz already_pos invoke neg_a_bn, r already_pos: mov cx, bnlength ; set outer loop counter mov ax, ds ; save ds in es mov es, ax lds di, r ; load pointer r in ds:di add di, cx cmp cpu, 386 ; check cpu je use_32_bit ; use faster 32 bit code if possible ; no need to clear r here, values get mov'ed, not add'ed shr cx, 1 ; byte = 1/2 word mov bx, es:u ; need to start with most significant portion of the number sub di, 2 ; most sig word sub dx, dx ; clear dx register ; for first time through loop top_loop_16: mov ax, [di] ; load next word from r div bx mov [di], ax ; store low word ; leave remainder in dx sub di, 2 ; next word in r loop top_loop_16 jmp bottom use_32_bit: .386 ; no need to clear r here, values get mov'ed, not add'ed shr cx, 2 ; byte = 1/4 dword sub ebx, ebx ; clear upper word or ebx mov bx, es:u ; need to start with most significant portion of the number sub di, 4 ; most sig dword sub edx, edx ; clear edx register ; for first time through loop top_loop_32: mov eax, [di] ; load next dword from r div ebx mov [di], eax ; store low dword ; leave remainder in edx sub di, 4 ; next dword in r loop top_loop_32 bottom: .8086 mov ax, es ; swap es & ds mov bx, ds mov ds, ax mov es, bx cmp sign, 0 ; is result + or - ? je pos_answer ; yes invoke neg_a_bn, r ; does not affect ES pos_answer: mov dx, es ; return r in dx:ax mov ax, word ptr r ; ret div_a_bn_int ENDP END