Otherware

home *** CD-ROM | disk | FTP | other *** search

/ Otherware / Otherware_1_SB_Development.iso / amiga / utility / v2 / copymem.lzh / CopyMemQuicker.asm next >

Wrap

Assembly Source File | 1992-02-09 | 5.7 KB | 274 lines

nolist ********************************************************************** * CopyMemQuicker 2.1 - (C) 1991, 1992 Arthur Hagen * * Parts of code: (C) 1985-1991 Commodore Business Machines Ltd. * * Posted to the Public Domain * ********************************************************************** xref _LVOOldOpenLibrary xref _LVOCloseLibrary xref _LVOOutput xref _LVOWrite xref _LVOAllocMem xref _LVOFreeMem xref _LVOSetFunction xref _LVOCopyMem xref _LVOCopyMemQuick include 'exec/execbase.i' public _QuickMem list _QuickMem movea.l (4).w,a6 ; ExecBase lea dosname(pc),a1 jsr _LVOOldOpenLibrary(a6) ; Any version will do movea.l d0,a5 exg.l a5,a6 jsr _LVOOutput(a6) exg.l a5,a6 move.l d0,d7 moveq #title_end-title,d3 lea title(pc),a0 bsr.s wrt move.l #(CopyEnd-CopyStart),d4 movea.l _LVOCopyMem+2(a6),a0 lea CopyStart-CopyMemQuicker(a0),a3 move.l Identity-CopyStart(a3),d1 cmpi.l #'*Art',d1 beq.s isquicker move.l d4,d0 moveq #0,d1 ; Any memory type jsr _LVOAllocMem(a6) tst.l d0 beq.s nomem movea.l d0,a3 movea.l a3,a1 lea CopyStart(pc),a0 move.l d4,d0 bsr.s CopyMemQuicker ; beats jsr _LVOCopyMem(a6) * The movem-trick uses some extra cycles for setting up, so * if we run the loop < n times, we will actually slow things down. * For 68000: n = 2; for 68010: n = 8; for 68020+: n = 4 * The reason is that the 68010 has a loop mode for dbf-loops, but no * cache, whereas the 68020+'es run all code faster. moveq #44*2,d0 btst #AFB_68020,AttnFlags+1(a6) beq.s tst10 add.w d0,d0 ; We have a 020 or higher bra.s cmpset tst10 btst #AFB_68010,AttnFlags+1(a6) bne.s cmpok ; We have a 010 cmpset move.w d0,CmpValS-CopyStart(a3) move.w d0,CmpValQ-CopyStart(a3) cmpok moveq #255-(CopyMemQuickest-CopyStart),d0 not.b d0 add.l a3,d0 bsr.s setcmq move.l d0,(a3) moveq #CopyMemQuicker-CopyStart,d0 add.l a3,d0 bsr.s setcm move.l d0,4(a3) quit movea.l a5,a1 jmp _LVOCloseLibrary(a6) ; faster than jsr + rts nomem moveq #memerr_end-memerr,d3 lea memerr(pc),a0 pea quit(pc) ; faster than 'bsr.s wrt' + 'bra.s quit' wrt move.l a0,d2 move.l d7,d1 exg.l a5,a6 jsr _LVOWrite(a6) exg.l a5,a6 rts isquicker moveq #already_end-already,d3 lea already(pc),a0 bsr.s wrt move.l 4(a3),d0 bsr.s setcm move.l (a3),d0 * Too darn dangerous! Some other program might be using the routine * in the background, and freeing the code it is running just won't do! ; bsr.s setcmq ; move.l d4,d0 ; movea.l a3,a1 ; jsr _LVOFreeMem(a6) ; bra.s quit pea quit(pc) ; faster than 'bsr.s setcmq' + 'bra.s quit' setcmq lea (_LVOCopyMemQuick).w,a0 bra.s setit setcm lea (_LVOCopyMem).w,a0 setit movea.l a6,a1 jmp _LVOSetFunction(a6) ************************************************************ CopyStart OldCopyMemQuick dc.l 0 OldCopyMem dc.l 0 Identity dc.l '*Art' CopyMemQuicker moveq #12,d1 cmp.l d1,d0 bcs.s tinycpy ; too small to gain anything move.w a0,d1 lsr.b #1,d1 ; fastest test for evenness bcc.s evena0 move.b (a0)+,(a1)+ subq.l #1,d0 evena0 move.w a1,d1 lsr.b #1,d1 bcc.s CopyMemEvenQuicker moveq #36*3,d1 cmp.l d1,d0 bcs.s tinycpy * This is tricky! They said it couldn't be done... unevcpy movem.l a2-a4/d2-d7,-(sp) moveq #32,d1 ; 8 registers of 4 bytes move.w d1,a3 moveq #36,d1 ; as above plus 4 "roundoff" bytes move.w d1,a4 sub.l d1,d0 move.l d0,a2 uloop movem.l (a0)+,d0-d7 rol.l #8,d0 rol.l #8,d1 rol.l #8,d2 rol.l #8,d3 rol.l #8,d4 rol.l #8,d5 rol.l #8,d6 rol.l #8,d7 move.b d0,(a1)+ move.b d1,d0 move.b d2,d1 move.b d3,d2 move.b d4,d3 move.b d5,d4 move.b d6,d5 move.b d7,d6 move.b (a0)+,d7 movem.l d0-d7,(a1) adda.w a3,a1 move.b (a0)+,(a1)+ ; even up to next longword move.b (a0)+,(a1)+ move.b (a0)+,(a1)+ move.l a2,d0 sub.l a4,d0 movea.l d0,a2 bcc.s uloop add.w a4,d0 movem.l (sp)+,a2-a4/d2-d7 subq.b #1,d0 bcs.s tdone tloop move.b (a0)+,(a1)+ tinycpy dbf d0,tloop tdone rts CopyMemEvenQuicker dc.w $0c80 ; cmpi.l #nnnn,d0 dc.w 0 ; Need 8 loops to be economical on 68010 CmpValS dc.w 44*8 ; (on 68000 this will be set to 44*2 ; and on 68020+ this will be 44*4 bcs.s smlmov moveq #44,d1 ; 11 registers of 4 bytes sub.l d1,d0 movem.l d2-d7/a2-a6,-(sp) bigmov movem.l (a0)+,d2-d7/a2-a6 movem.l d2-d7/a2-a6,(a1) adda.w d1,a1 sub.l d1,d0 bcc.s bigmov add.w d1,d0 movem.l (sp)+,d2-d7/a2-a6 smlmov lsr.w #1,d0 beq.s even01 bcs.s sm13 lsr.w #1,d0 beq.s even2 bcs.s sm2 sm0 subq.w #1,d0 loop0 move.l (a0)+,(a1)+ dbf d0,loop0 even0 rts sm2 subq.w #1,d0 loop2 move.l (a0)+,(a1)+ dbf d0,loop2 even2 move.w (a0),(a1) rts sm13 lsr.w #1,d0 beq.s even3 bcs.s sm3 sm1 subq.w #1,d0 loop1 move.l (a0)+,(a1)+ dbf d0,loop1 even1 move.b (a0),(a1) rts sm3 subq.w #1,d0 loop3 move.l (a0)+,(a1)+ dbf d0,loop3 even3 move.w (a0)+,(a1)+ move.b (a0),(a1) rts even01 bcs.s even1 rts CopyMemQuickest dc.w $0c80 ; cmpi.l #nnnn,d0 dc.w 0 ; Need 8 loops to be economical on 68010 CmpValQ dc.w 44*8 ; (on 68000 this will be set to 44*2 ; and on 68020+ this will be 44*4 bcs.s smlmovQ moveq #44,d1 ; 11 registers of 4 bytes sub.l d1,d0 movem.l d2-d7/a2-a6,-(sp) bigmovQ movem.l (a0)+,d2-d7/a2-a6 movem.l d2-d7/a2-a6,(a1) adda.w d1,a1 sub.l d1,d0 bcc.s bigmovQ add.w d1,d0 movem.l (sp)+,d2-d7/a2-a6 smlmovQ lsr.w #2,d0 beq.s done subq.w #1,d0 qloop move.l (a0)+,(a1)+ dbf d0,qloop done rts CopyEnd ************************************************************ * Leave this for the 2.0 Version function!!! version dc.b '$VER: ' title dc.b 'CopyMemQuicker 2.1 (8 Feb 1992)',$0A,$0D dc.b 'Copyright ',$A9,' 1992 Arthur Hagen.',$0A title_end already dc.b 'Restoring vectors.',$0A already_end memerr dc.b 'No memory!',$0A memerr_end dosname dc.b 'dos.library',0 even end