home *** CD-ROM | disk | FTP | other *** search
- Newsgroups: comp.sys.mac.programmer
- Path: sparky!uunet!zaphod.mps.ohio-state.edu!cis.ohio-state.edu!magnus.acs.ohio-state.edu!usenet.ins.cwru.edu!agate!apple!mumbo.apple.com!gallant.apple.com!nuntius
- From: Steve Christensen <stevec@apple.com>
- Subject: Re: Help! making an assembly routine faster
- Sender: news@gallant.apple.com
- Message-ID: <1992Nov20.040608.25002@gallant.apple.com>
- X-Useragent: Nuntius v1.1
- Date: Fri, 20 Nov 1992 04:06:08 GMT
- References: <1992Nov18.010815.6649@cs.uoregon.edu>
- Organization: Apple Computer, Inc.
- Lines: 211
-
- I took a pass at the code and here's what I came up with (changed lines
- will have a * in the comment field).
-
- MOVE.W h, D0 ; put height loop variable in D0
- MOVEA.L src, A0 ; put the source pixmap address in
- A0
- MOVEA.L dst, A1 ; put the destination address in A1
- MOVEA.L mask, A2 ; put the mask address in A2
- CLR.L D2 ; clear the mask register
- @1: ; copy the next row
- MOVE.W w, D1
- @2: ; copy the next eight bytes in the
- row
- MOVE.B (A2)+, D2 ; copy the next mask byte
- BEQ.S @nocopy ;*if zero, don't copy anything
-
- CMPI.B #0xFF, D2
- BNE.S @hardway ;*don't copy everything
-
- MOVE.L (A0)+, (A1)+ ; copy all bytes
- MOVE.L (A0)+, (A1)+
- DBF D1, @2
- BRA.S @endloop ;*
-
- @nocopy: ; copy no bytes
- ADDQ.L #8, A0
- ADDQ.L #8, A1
- DBF D1, @2
- BRA.S @endloop ;*
-
- @hardway:
- MOVEQ #0xF, D3 ;*mask off the lower nibble for
- later
- AND.B D2, D3 ;*
- LSR.W #4, D2 ; shift bits 4-7 into bits 0-3
- ADD.W D2, D2 ; double the index
- MOVE.W @table(D2.W), D2 ;*calculate the address
- LEA @rts1, A3 ; save the return address
- JMP @table(D2.W) ; plot four pixels
- @rts1:
-
- ;*******MOVE.B -1(A2), D2 ; copy the next mask byte
- ;*******ANDI.B #0xF, D2 ; mask off the high four bits
- ADD.W D2, D2 ; double the index
- MOVE.W @table(D3.W), D2 ;*calculate the address
- LEA @rts2, A3 ; save the return address
- JMP @table(D2.W) ; plot four pixels
- @rts2:
- DBF D1, @2
-
- @endloop:
-
- MOVE.W e, D1 ;*
- BLT.S @4 ;*continue if e is less than 0
-
- MOVE.B (A2)+, D2 ; copy the next mask byte
- ;*******MOVE.W e, D1 ; initialize the loop counter
- MOVEQ.L #7, D3 ; initialize the bit counter
-
- @3: ; copy the next byte
- BTST D3, D2 ; test the next bit in the mask
- BEQ.S @skip ;*if zero, continue
- MOVE.B (A0)+, (A1)+ ; else copy the pixel
- SUBQ.L #1, D3 ; decrement the bit counter
- DBF D1, @3
- BRA.S @4 ;*
- @skip:
- ADDQ.L #1, A0
- ADDQ.L #1, A1
- SUBQ.L #1, D3 ; decrement the bit counter
- DBF D1, @3
-
- @4:
- ADDA.L srcNewline, A0 ; bring the src pointer to the
- start of the next row
- ADDA.L dstNewline, A1 ; bring the dst pointer to the
- start of the next row
-
- DBF D0, @1
-
- JMP @end ; skip to the end
-
- @table:
- DC.W @sub0-@table ;*
- DC.W @sub1-@table ;*
- DC.W @sub2-@table ;*
- DC.W @sub3-@table ;*
- DC.W @sub4-@table ;*
- DC.W @sub5-@table ;*
- DC.W @sub6-@table ;*
- DC.W @sub7-@table ;*
- DC.W @sub8-@table ;*
- DC.W @sub9-@table ;*
- DC.W @sub10-@table ;*
- DC.W @sub11-@table ;*
- DC.W @sub12-@table ;*
- DC.W @sub13-@table ;*
- DC.W @sub14-@table ;*
- DC.W @sub15-@table ;*
-
- @sub0: ; mask = 0000, draw nothing
- ADDQ.L #4, A0
- ADDQ.L #4, A1
- JMP (A3) ; RTS
-
- @sub1: ; mask = 0001
- ADDQ.L #3, A0
- ADDQ.L #3, A1
- MOVE.B (A0)+, (A1)+
- JMP (A3) ; RTS
-
- @sub2: ; mask = 0010
- ADDQ.L #2, A0
- ADDQ.L #2, A1
- MOVE.B (A0), (A1)
- ADDQ.L #2, A0
- ADDQ.L #2, A1
- JMP (A3) ; RTS
-
- @sub3: ; mask = 0011
- ADDQ.L #2, A0
- ADDQ.L #2, A1
- MOVE.W (A0)+, (A1)+
- JMP (A3) ; RTS
-
- @sub4: ; mask = 0100
- ADDQ.L #1, A0
- ADDQ.L #1, A1
- MOVE.B (A0), (A1)
- ADDQ.L #3, A0
- ADDQ.L #3, A1
- JMP (A3) ; RTS
-
- @sub5: ; mask = 0101
- ADDQ.L #1, A0
- ADDQ.L #1, A1
- MOVE.B (A0), (A1)
- ADDQ.L #2, A0
- ADDQ.L #2, A1
- MOVE.B (A0)+, (A1)+
- JMP (A3) ; RTS
-
- @sub6: ; mask = 0110
- ADDQ.L #1, A0
- ADDQ.L #1, A1
- MOVE.W (A0), (A1)
- ADDQ.L #3, A0
- ADDQ.L #3, A1
- JMP (A3) ; RTS
-
- @sub7: ; mask = 0111
- ADDQ.L #1, A0
- ADDQ.L #1, A1
- MOVE.B (A0)+, (A1)+
- MOVE.W (A0)+, (A1)+
- JMP (A3) ; RTS
-
- @sub8: ; mask = 1000
- MOVE.B (A0), (A1)
- ADDQ.L #4, A0
- ADDQ.L #4, A1
- JMP (A3) ; RTS
-
- @sub9: ; mask = 1001
- MOVE.B (A0), (A1)
- ADDQ.L #3, A0
- ADDQ.L #3, A1
- MOVE.B (A0)+, (A1)+
- JMP (A3) ; RTS
-
- @sub10: ; mask = 1010
- MOVE.B (A0), (A1)
- ADDQ.L #2, A0
- ADDQ.L #2, A1
- MOVE.B (A0), (A1)
- ADDQ.L #2, A0
- ADDQ.L #2, A1
- JMP (A3) ; RTS
-
- @sub11: ; mask = 1011
- MOVE.B (A0), (A1)
- ADDQ.L #2, A0
- ADDQ.L #2, A1
- MOVE.W (A0)+, (A1)+
- JMP (A3) ; RTS
-
- @sub12: ; mask = 1100
- MOVE.W (A0), (A1)
- ADDQ.L #4, A0
- ADDQ.L #4, A1
- JMP (A3) ; RTS
-
- @sub13: ; mask = 1101
- MOVE.W (A0), (A1)
- ADDQ.L #3, A0
- ADDQ.L #3, A1
- MOVE.B (A0)+, (A1)+
- JMP (A3) ; RTS
-
- @sub14: ; mask = 1110
- MOVE.W (A0)+, (A1)+
- MOVE.B (A0), (A1)
- ADDQ.L #2, A0
- ADDQ.L #2, A1
- JMP (A3) ; RTS
-
- @sub15: ; mask = 1111
- MOVE.L (A0)+, (A1)+
- JMP (A3) ; RTS
-
- @end:
-