home *** CD-ROM | disk | FTP | other *** search
- nolist
- **********************************************************************
- * CopyMemQuicker 2.1 - (C) 1991, 1992 Arthur Hagen *
- * Parts of code: (C) 1985-1991 Commodore Business Machines Ltd. *
- * Posted to the Public Domain *
- **********************************************************************
-
- xref _LVOOldOpenLibrary
- xref _LVOCloseLibrary
- xref _LVOOutput
- xref _LVOWrite
- xref _LVOAllocMem
- xref _LVOFreeMem
- xref _LVOSetFunction
- xref _LVOCopyMem
- xref _LVOCopyMemQuick
- include 'exec/execbase.i'
-
- public _QuickMem
- list
- _QuickMem
- movea.l (4).w,a6 ; ExecBase
- lea dosname(pc),a1
- jsr _LVOOldOpenLibrary(a6) ; Any version will do
- movea.l d0,a5
- exg.l a5,a6
- jsr _LVOOutput(a6)
- exg.l a5,a6
- move.l d0,d7
- moveq #title_end-title,d3
- lea title(pc),a0
- bsr.s wrt
- move.l #(CopyEnd-CopyStart),d4
- movea.l _LVOCopyMem+2(a6),a0
- lea CopyStart-CopyMemQuicker(a0),a3
- move.l Identity-CopyStart(a3),d1
- cmpi.l #'*Art',d1
- beq.s isquicker
-
- move.l d4,d0
- moveq #0,d1 ; Any memory type
- jsr _LVOAllocMem(a6)
- tst.l d0
- beq.s nomem
-
- movea.l d0,a3
- movea.l a3,a1
- lea CopyStart(pc),a0
- move.l d4,d0
- bsr.s CopyMemQuicker ; beats jsr _LVOCopyMem(a6)
-
- * The movem-trick uses some extra cycles for setting up, so
- * if we run the loop < n times, we will actually slow things down.
- * For 68000: n = 2; for 68010: n = 8; for 68020+: n = 4
- * The reason is that the 68010 has a loop mode for dbf-loops, but no
- * cache, whereas the 68020+'es run all code faster.
-
- moveq #44*2,d0
- btst #AFB_68020,AttnFlags+1(a6)
- beq.s tst10
- add.w d0,d0 ; We have a 020 or higher
- bra.s cmpset
- tst10 btst #AFB_68010,AttnFlags+1(a6)
- bne.s cmpok ; We have a 010
- cmpset move.w d0,CmpValS-CopyStart(a3)
- move.w d0,CmpValQ-CopyStart(a3)
- cmpok moveq #255-(CopyMemQuickest-CopyStart),d0
- not.b d0
- add.l a3,d0
- bsr.s setcmq
- move.l d0,(a3)
- moveq #CopyMemQuicker-CopyStart,d0
- add.l a3,d0
- bsr.s setcm
- move.l d0,4(a3)
- quit movea.l a5,a1
- jmp _LVOCloseLibrary(a6) ; faster than jsr + rts
-
- nomem moveq #memerr_end-memerr,d3
- lea memerr(pc),a0
- pea quit(pc) ; faster than 'bsr.s wrt' + 'bra.s quit'
-
- wrt move.l a0,d2
- move.l d7,d1
- exg.l a5,a6
- jsr _LVOWrite(a6)
- exg.l a5,a6
- rts
-
- isquicker
- moveq #already_end-already,d3
- lea already(pc),a0
- bsr.s wrt
- move.l 4(a3),d0
- bsr.s setcm
- move.l (a3),d0
-
- * Too darn dangerous! Some other program might be using the routine
- * in the background, and freeing the code it is running just won't do!
- ; bsr.s setcmq
- ; move.l d4,d0
- ; movea.l a3,a1
- ; jsr _LVOFreeMem(a6)
- ; bra.s quit
-
- pea quit(pc) ; faster than 'bsr.s setcmq' + 'bra.s quit'
-
- setcmq lea (_LVOCopyMemQuick).w,a0
- bra.s setit
-
- setcm lea (_LVOCopyMem).w,a0
- setit movea.l a6,a1
- jmp _LVOSetFunction(a6)
-
-
- ************************************************************
-
- CopyStart
-
- OldCopyMemQuick
- dc.l 0
- OldCopyMem
- dc.l 0
- Identity
- dc.l '*Art'
- CopyMemQuicker
- moveq #12,d1
- cmp.l d1,d0
- bcs.s tinycpy ; too small to gain anything
- move.w a0,d1
- lsr.b #1,d1 ; fastest test for evenness
- bcc.s evena0
- move.b (a0)+,(a1)+
- subq.l #1,d0
- evena0 move.w a1,d1
- lsr.b #1,d1
- bcc.s CopyMemEvenQuicker
-
- moveq #36*3,d1
- cmp.l d1,d0
- bcs.s tinycpy
-
- * This is tricky! They said it couldn't be done...
- unevcpy movem.l a2-a4/d2-d7,-(sp)
- moveq #32,d1 ; 8 registers of 4 bytes
- move.w d1,a3
- moveq #36,d1 ; as above plus 4 "roundoff" bytes
- move.w d1,a4
- sub.l d1,d0
- move.l d0,a2
- uloop movem.l (a0)+,d0-d7
- rol.l #8,d0
- rol.l #8,d1
- rol.l #8,d2
- rol.l #8,d3
- rol.l #8,d4
- rol.l #8,d5
- rol.l #8,d6
- rol.l #8,d7
- move.b d0,(a1)+
- move.b d1,d0
- move.b d2,d1
- move.b d3,d2
- move.b d4,d3
- move.b d5,d4
- move.b d6,d5
- move.b d7,d6
- move.b (a0)+,d7
- movem.l d0-d7,(a1)
- adda.w a3,a1
- move.b (a0)+,(a1)+ ; even up to next longword
- move.b (a0)+,(a1)+
- move.b (a0)+,(a1)+
- move.l a2,d0
- sub.l a4,d0
- movea.l d0,a2
- bcc.s uloop
- add.w a4,d0
- movem.l (sp)+,a2-a4/d2-d7
- subq.b #1,d0
- bcs.s tdone
-
- tloop move.b (a0)+,(a1)+
- tinycpy dbf d0,tloop
- tdone rts
-
- CopyMemEvenQuicker
- dc.w $0c80 ; cmpi.l #nnnn,d0
- dc.w 0 ; Need 8 loops to be economical on 68010
- CmpValS dc.w 44*8 ; (on 68000 this will be set to 44*2
- ; and on 68020+ this will be 44*4
- bcs.s smlmov
- moveq #44,d1 ; 11 registers of 4 bytes
- sub.l d1,d0
- movem.l d2-d7/a2-a6,-(sp)
- bigmov movem.l (a0)+,d2-d7/a2-a6
- movem.l d2-d7/a2-a6,(a1)
- adda.w d1,a1
- sub.l d1,d0
- bcc.s bigmov
- add.w d1,d0
- movem.l (sp)+,d2-d7/a2-a6
-
- smlmov lsr.w #1,d0
- beq.s even01
- bcs.s sm13
- lsr.w #1,d0
- beq.s even2
- bcs.s sm2
- sm0 subq.w #1,d0
- loop0 move.l (a0)+,(a1)+
- dbf d0,loop0
- even0 rts
- sm2 subq.w #1,d0
- loop2 move.l (a0)+,(a1)+
- dbf d0,loop2
- even2 move.w (a0),(a1)
- rts
- sm13 lsr.w #1,d0
- beq.s even3
- bcs.s sm3
- sm1 subq.w #1,d0
- loop1 move.l (a0)+,(a1)+
- dbf d0,loop1
- even1 move.b (a0),(a1)
- rts
- sm3 subq.w #1,d0
- loop3 move.l (a0)+,(a1)+
- dbf d0,loop3
- even3 move.w (a0)+,(a1)+
- move.b (a0),(a1)
- rts
- even01 bcs.s even1
- rts
-
- CopyMemQuickest
- dc.w $0c80 ; cmpi.l #nnnn,d0
- dc.w 0 ; Need 8 loops to be economical on 68010
- CmpValQ dc.w 44*8 ; (on 68000 this will be set to 44*2
- ; and on 68020+ this will be 44*4
- bcs.s smlmovQ
- moveq #44,d1 ; 11 registers of 4 bytes
- sub.l d1,d0
- movem.l d2-d7/a2-a6,-(sp)
- bigmovQ movem.l (a0)+,d2-d7/a2-a6
- movem.l d2-d7/a2-a6,(a1)
- adda.w d1,a1
- sub.l d1,d0
- bcc.s bigmovQ
- add.w d1,d0
- movem.l (sp)+,d2-d7/a2-a6
- smlmovQ lsr.w #2,d0
- beq.s done
- subq.w #1,d0
- qloop move.l (a0)+,(a1)+
- dbf d0,qloop
- done rts
- CopyEnd
-
- ************************************************************
-
- * Leave this for the 2.0 Version function!!!
- version dc.b '$VER: '
- title dc.b 'CopyMemQuicker 2.1 (8 Feb 1992)',$0A,$0D
- dc.b 'Copyright ',$A9,' 1992 Arthur Hagen.',$0A
- title_end
- already dc.b 'Restoring vectors.',$0A
- already_end
- memerr dc.b 'No memory!',$0A
- memerr_end
- dosname dc.b 'dos.library',0
- even
- end
-