home *** CD-ROM | disk | FTP | other *** search
-
- ; MEM.ASM
-
- public _lBZero ; Zero a block of memory
- public _lBSet ; Set a block of memory to (byte val)
- public _lBMov ; move a block of memory
- public _lBCmp ; compare two blocks of memory
-
- ; BSET(buffer:D0, len:D1, byte:A0.B)
- ; BZERO(buffer:D0, len:D1)
-
- _lBZero: move.w #0,A0
- _lBSet: exg A0,D0 ; A0 = buffer (D0=byte)
- exg D0,D1 ; D0 = length (D1=byte)
- add.l D0,A0 ; start at end of address
- cmp.l #40,D0 ; unscientifically chosen
- bls .bs2
- bra .bs10
- .bs1 move.b D1,-(A0) ; any count < 65536
- .bs2 dbf D0,.bs1
- rts
-
- ; at least 2 bytes in count (D0)
- .bs10 movem.l D2-D7/A2-A6,-(sp) ;ant count > 4
- move.l A0,D2
- btst.l #0,D2 ; is it aligned?
- beq .bs22
- move.b D1,-(A0) ; no, copy one byte
- subq.l #1,D0
-
- .bs22 andi.l #$FF,D1 ; expand data D1.B -> D2-D7/A1-A6
- move.l D1,D2 ; D1 000000xx D2 000000xx
- asl.w #8,D2 ; 0000xx00
- or.w D2,D1 ; 0000xxxx
- move.w D1,D2 ; 0000xxxx 0000xxxx
- swap D2 ; 0000xxxx xxxx0000
- or.l D1,D2 ; D2.L
- move.l D2,D3
- move.l D2,D4
- move.l D2,D5
- move.l D2,D6
- move.l D2,D7
- move.l D2,A1
- move.l D2,A2
- move.l D2,A3
- move.l D2,A4
- move.l D2,A5
- move.l D2,A6 ; D2-D7/A1-A6 (12 registers)
- move.l #12*4,D1 ; bytes per transfer (48)
- .bs30 sub.l D1,D0 ; pre subtract
- bmi .bs40
- .bs31 movem.l D2-D7/A1-A6,-(A0)
- sub.l D1,D0
- bpl .bs31
- .bs40 add.w D1,D0 ; less than 48 bytes remaining
-
- move.w #4,D1 ; by 4's
- sub.w D1,D0
- bmi .bs50
- .bs41 move.l D2,-(A0)
- sub.w D1,D0
- bpl .bs41
- .bs50 add.w D1,D0
- bra .bs52
- .bs51 move.b D2,-(A0) ; by 1's
- .bs52 dbf D0,.bs51
- movem.l (sp)+,D2-D7/A2-A6
- rts
-
- ; BCMP(src:D0, dst:D1, len:A0)
-
- _lBCmp: exg A0,D0 ;A0 = src, D0 = len
- exg A1,D1 ;A1 = dst
-
- tst.l D0
- beq .bcsucc
- cmp.w D0,D0 ;force Z bit
- bra .bc2
- .bc1 cmpm.b (A0)+,(A1)+
- .bc2 dbne D0,.bc1
- bne .bcfail
- sub.l #$10000,D0
- bcc .bc1
- .bcsucc moveq.l #1,D0 ;success!
- rts
- .bcfail moveq.l #0,D0 ;failure!
- rts
-
- ; BMOV(src:D0, dst:D1, len:A0)
- ;
- ; The memory move algorithm is somewhat more of a mess
- ; since we must do it either ascending or decending.
-
- _lBMov: exg A0,D0 ;A0 = src, D0 = len
- exg A1,D1 ;A1 = dst
- cmp.l A0,A1
- beq .bmend
- bls .bmup
- .bmdown adda.l D0,A0 ;descending copy
- adda.l D0,A1
- move.w A0,D1 ;CHECK WORD ALIGNED
- btst.l #0,D1
- bne .bmdown1
- move.w A1,D1
- btst.l #0,D1
- bne .bmdown1
- cmp.l #259,D0 ;chosen by calculation.
- blo .bmdown8
-
- move.l D0,D1 ;overhead for bmd44: ~360
- divu #44,D1
- bvs .bmdown8 ;too big (> 2,883,540)
- movem.l D2-D7/A2-A6,-(sp) ;use D2-D7/A2-A6 (11 regs)
- move.l #11*4,D0
- bra .bmd44b
- .bmd44a sub.l D0,A0 ;8 total 214/44bytes
- movem.l (A0),D2-D7/A2-A6 ;12 + 8*11 4.86 cycles/byte
- movem.l D2-D7/A2-A6,-(A1) ; 8 + 8*11
- .bmd44b dbf D1,.bmd44a ;10
- swap D1 ;D0<15:7> already contain 0
- move.w D1,D0 ;D0 = remainder
- movem.l (sp)+,D2-D7/A2-A6
-
- .bmdown8 move.w D0,D1 ;D1<2:0> = #bytes left later
- lsr.l #3,D0 ;divide by 8
- bra .bmd8b
- .bmd8a move.l -(A0),-(A1) ;20 total 50/8bytes
- move.l -(A0),-(A1) ;20 = 6.25 cycles/byte
- .bmd8b dbf D0,.bmd8a ;10
- sub.l #$10000,D0
- bcc .bmd8a
- move.w D1,D0 ;D0 = 0 to 7 bytes
- and.l #7,D0
- bne .bmdown1
- rts
-
- .bmd1a move.b -(A0),-(A1) ;12 total 22/byte
- .bmdown1 ; = 22 cycles/byte
- .bmd1b dbf D0,.bmd1a ;10
- sub.l #$10000,D0
- bcc .bmd1a
- rts
-
- .bmup move.w A0,D1 ;CHECK WORD ALIGNED
- btst.l #0,D1
- bne .bmup1
- move.w A1,D1
- btst.l #0,D1
- bne .bmup1
- cmp.l #259,D0 ;chosen by calculation
- blo .bmup8
-
- move.l D0,D1 ;overhead for bmu44: ~360
- divu #44,D1
- bvs .bmup8 ;too big (> 2,883,540)
- movem.l D2-D7/A2-A6,-(sp) ;use D2-D7/A2-A6 (11 regs)
- move.l #11*4,D0
- bra .bmu44b
- .bmu44a movem.l (A0)+,D2-D7/A2-A6 ;12 + 8*11 ttl 214/44bytes
- movem.l D2-D7/A2-A6,(A1) ;8 + 8*11 4.86 cycles/byte
- add.l D0,A1 ;8
- .bmu44b dbf D1,.bmu44a ;10
- swap D1 ;D0<15:7> already contain 0
- move.w D1,D0 ;D0 = remainder
- movem.l (sp)+,D2-D7/A2-A6
-
- .bmup8 move.w D0,D1 ;D1<2:0> = #bytes left later
- lsr.l #3,D0 ;divide by 8
- bra .bmu8b
- .bmu8a move.l (A0)+,(A1)+ ;20 total 50/8bytes
- move.l (A0)+,(A1)+ ;20 = 6.25 cycles/byte
- .bmu8b dbf D0,.bmu8a ;10
- sub.l #$10000,D0
- bcc .bmu8a
- move.w D1,D0 ;D0 = 0 to 7 bytes
- and.l #7,D0
- bne .bmup1
- rts
-
- .bmu1a move.b (A0)+,(A1)+
- .bmup1
- .bmu1b dbf D0,.bmu1a
- sub.l #$10000,D0
- bcc .bmu1a
- .bmend rts
-
-