The Datafile PD-CD 5

home *** CD-ROM | disk | FTP | other *** search

/ The Datafile PD-CD 5 / DATAFILE_PDCD5.iso / demos / baah / Damn! / damn!src / damn!src < prev next >

Wrap

Text File | 1997-02-15 | 93.5 KB | 1,997 lines

; \|/ ; O O ; --------------------------------oOO--U--OOo-------------------------------- ; - - ; - damn!demo - ; - © Alain BROBECKER (baah/Arm'sTeack) - ; - July-Sept 96 - ; --------------------------------------------------------------------------- ; ; This source is given for free and is widely commented, so I hope some ; people will look at it and (maybe) improve their own code. Re-use of my ; routines is allowed (though not recommended, cos you' ll understand more ; if you write your owns...) as long as it is not for commercial purposes, ; as long as you credit me and send me a free copy of your proggy. Oh, btw ; the assembler I used is ExtASM 0.50b. You' ll have to make changes in ; macros if you use a newer version of ExtASM. ; ; Alain BROBECKER Dracula / Positivity (STe) ; rte de Dardagny baah / Arm's Tech (Archie) ; 01630 CHALLEX baah (PC-C64) ; FRANCE ; ; 15 Feb 97 - StrongARM compatible version... FastBox and CopyBox routines ; are no more using generated code when SA is detected. ; - Added exit handler. #name damn! #set zizik = 1 ; 1 for the beauty, 0 for the beast. ;------ Constants ----------------------------------------------------------- #set XScreenBlanker_Control = &ef063100 ; Value of the swi. #set XOS_PlatformFeatures = &2006d ; Value for the swi. #set screennb_min = 3 ; Minimum number of mode13 screens. #set screennb_max = 8 ; Maximum number. #set inv_N = 18 ; Shift for fixed point inverses. #set inv_nb = 4096+1024 ; Nb of inverses to calculate. #set sin_N = 14 ; Shift for fixed point sinuses. #set sin_pow = 10 ; 2^sin_pow is the nb of angles. #set sin_nb = 1<<(sin_pow-3) ; Nb of angles between [0;pi/4[. #set bg_M = 8 ; Well, texture must be a power of 2, so #set bg_N = 1<<bg_M ; bg_N=2^bg_M is the size of texture. #set bg_middle = 8 ; Intensity for null relief*2. #set fractal = 30 ; Value for shift of rnd nb in fracland. #set hclip = 304 ; Values for horizontal and vertical #set vclip = 200 ; clipping in poly rout. #set max_checker_height = 7 #set nb_pts_objects = 13+9+8+1 ; Nb of points for objects. #set nb_pts = 1024 ; Total nb of points for vectorworld. #set nb_faces = 1024 ; And the total nb of faces. ;------ BSS Offsets --------------------------------------------------------- #set stack = 128*4 ; Top of stack. #set extra = stack ; Extra storage, just before inverses. #set inverses = extra+12*4 ; Inverses table. #set sinus = inverses+inv_nb*4 ; Sinus table. #set background = sinus+10*sin_nb*4 ; Background. #set clsboxes = background+320*256 ; Coords for clear boxes. #set misc = clsboxes+4*screennb_max*4 ; Extra. #set end = misc+8*1024 #set screen_xy = background ; Projected x&y coords. #set space_z = screen_xy+2*nb_pts*4 ; z coord of points. #set static_sort = space_z+nb_pts*4 ; Z table. #set dynamic_sort = static_sort+inv_nb*4 ; Faces table with link. #set end3d = dynamic_sort+nb_faces*2*4 ;**************************************************************************** ;**************************************************************************** ;***** ***** ;***** MACROS ***** ;***** ***** ;**************************************************************************** ;**************************************************************************** ;====> Umul64 <===================== ; This macro performs an unsigned 32*32 bits multiply. ; [m0|m1]=m2*m3. You can choose [m2|m3]=[m0|m1] ; It destroys m0,m1,m4,m5,m6 and the flags. (C is the carry flag) macro umul64 m0,m1,m2,m3,m4,m5,m6 { mov m4,m2,lsr #16 ; m4=up(m2). sub m5,m2,m4,lsl #16 ; m5=down(m2). mov m0,m3,lsr #16 ; m0=up(m3). sub m1,m3,m0,lsl #16 ; m1=down(m3). mul m6,m5,m0 ; m6=down(m2)*up(m3). mul m0,m4,m0 ; m0=up(m2)*up(m3). mlaS m6,m1,m4,m6 ; C+m6=up(m2)*down(m3)+down(m2)*up(m3). adc m0,m0,m6,lsr #16 mul m1,m5,m1 ; m1=down(m2)*down(m3). addS m1,m1,m6,lsl #16 adc m0,m0,#0 ; [m0|m1]=m2*m3. } ;====> Adjust64 <=================== ; This macro adjusts the 64 bits result to 32 bits, according to the fixed ; point shift factor. (c0) ; m0=[m1|m2]>>c0. You can choose m1=m0. ; It destroys m0. macro adjust64 m0,m1,m2,c0 { mov m0,m1,lsl #32-c0 add m0,m0,m2,lsr #c0 } ;====> Add64 <====================== ; This macro performs a 64 bits addition. ; [m0|m1]=[m2|m3]+[m4|m5]. You can choose [m2|m3] or [m4|m5]=[m0|m1]. ; It destroys [m0|m1] and the flags. macro add64 m0,m1,m2,m3,m4,m5 { addS m1,m3,m5 adc m0,m2,m4 } ;====> Sub64 <====================== ; This macro performs a 64 bits substract. ; [m0|m1]=[m2|m3]-[m4|m5]. You can choose [m2|m3] or [m4|m5]=[m0|m1]. ; It destroys [m0|m1] and the flags. macro sub64 m0,m1,m2,m3,m4,m5 { subS m1,m3,m5 sbc m0,m2,m4 } ;====> Sqrt32 <===================== ; This macro extracts the squareroot of a 32 bits register. ; m0=sqrt(m1). ; It destroys m0,m1,m2 and the flags. macro sqrt32 m0,m1,m2 { mov m0,#0 ; This is ripped from Jan/BASS. mov m2,#1<<30 ; | cmp m1,m2 ; | subCS m1,m1,m2 ; | adc m0,m0,m0 ; | #set N=2 #rept 15 add m2,m0,#1<<30 ; | cmp m1,m2,ror #N ; | subCS m1,m1,m2,ror #N ; | adc m0,m0,m0 ; End of ripped code. #set N=N+2 #endr cmp m1,m0 ; Flags=val-root. addPL m0,m0,#1 ; Round to nearest integer. } ;====> Random32 <=================== ; This macro takes a random number, and makes a new one. macro random32 m0 { eor m0,m0,m0,rrx adc m0,m0,m0,ror #7 } ;====> Swap32 <===================== ; This macro exchanges the contents of m0 and m1. macro swap32 m0,m1 { add m0,m0,m1 ; m0=a+b. sub m1,m0,m1 ; m1=a+b-b=a. sub m0,m0,m1 ; m0=a+b-a=b. } ;====> YX Matrix <================== ; This macro calculates the coefficient of the XY rotation matrix. ; IN - m0 adress of sinus table ; m1=ay. (In top bits (for modulo) other bits set to 0) ; m4=ax. ; OUT - m1=A | m2=C | m3=D | m4=E | m5=F | m6=G | m7=H | m8=I macro yx_matrix m0,m1,m2,m3,m4,m5,m6,m7,m8 { sub m3,m1,m4 ; m3=ay-ax. add m8,m1,m4 ; m8=ay+ax. ldr m2,[m0,m1,lsr #32-sin_pow-2] ; m2=sin(ay)=-C. rsb m2,m2,#0 ; m2=-sin(ay)=C. ldr m7,[m0,m4,lsr #32-sin_pow-2] ; m7=sin(ax)=-H. rsb m7,m7,#0 ; m7=-sin(ax)=H. ldr m6,[m0,m3,lsr #32-sin_pow-2] ; m6=sin(ay-ax). ldr m5,[m0,m8,lsr #32-sin_pow-2] ; m5=sin(ay+ax). sub m5,m5,m6 ; m5=sin(ay+ax)-sin(ay-ax)=2*F. mov m5,m5,asr #1 ; m5=int((sin(ay+ax)-sin(ay-ax))/2)=F. add m6,m5,m6 ; m6=int((sin(ay+ax)+sin(ay-ax))/2)=G. add m0,m0,#2*sin_nb*4 ; m0 points on cosinus table. ldr m1,[m0,m1,lsr #32-sin_pow-2] ; m1=cos(ay)=A. ldr m4,[m0,m4,lsr #32-sin_pow-2] ; m4=cos(ax)=E. ldr m3,[m0,m3,lsr #32-sin_pow-2] ; m3=cos(ay-ax). ldr m8,[m0,m8,lsr #32-sin_pow-2] ; m8=cos(ay+ax). sub m3,m3,m8 ; m3=cos(ay-ax)-cos(ay+ax)=2*D. mov m3,m3,asr #1 ; m3=int((cos(ay-ax)-cos(ay+ax))/2)=D. add m8,m3,m8 ; m8=int((cos(ay-ax)+cos(ay+ax))/2)=I. } ;**************************************************************************** ;**************************************************************************** ;***** ***** ;***** CODE ***** ;***** ***** ;**************************************************************************** ;**************************************************************************** ; Initialise stack, deinit screen blanker and ask for slot+screen memory. .proggy_start adr r13,bss+stack ; Initialise stack pointer. mov r0,#1 ; Read screenblanker control. swi XScreenBlanker_Control ; r0=screenblanker infos. movVS r0,#0 ; Error, then set them to 0. str r0,old_scrblank ; Save infos. mov r0,#3 ; Write screenblanker control. mov r1,#0 ; Set it to 0. swi XScreenBlanker_Control mov r0,#bss+end-&8000 ; Ask for needed slot memory. mov r1,#-1 ; Don' t change next slot size. swi Wimp_SlotSize mov r0,#2 ; Ask for ScreenMem size. swi OS_ReadDynamicArea ; r1=current ScreenMem size. rsbS r1,r1,#screennb_min*81920 ; r1=minimum-curent screenmem. movGT r0,#2 ; Ask for it if we don' t have enough. swiGT OS_ChangeDynamicArea mov r0,#screennb_min ; Count the number of screen banks. ._count_screens addS r1,r1,#81920 ; r1+=size of one mode13 screen. addLE r0,r0,#1 ; If r1 still <=0, we have one more bank. bLT _count_screens ; If r1<0, maybe there is even more... cmp r0,#screennb_max ; More than the maximum amount? movGE r0,#screennb_max ; Then don' t use extra ones. strB r0,vbl_screennb_mov ; Modify code which needs ScreenNb strB r0,vsync_screennb_mov ; for multiple buffering, strB r0,clear_one_screen_mov ; and for screen clearing. ;---------------------------------------------------------------------------- ; If StrongARM is not there, modify branches in routines so that we use ; versions with generated code. mov r0,#0 swi XOS_PlatformFeatures ; Read platform feature. bvs noStrongARM ; If swi is unknown, no StrongARM. tst r0,#1 ; Separated instruction and data caches. bne StrongARMed .noStrongARM mov r0,#&e0800002 ; r0="add r0,r0,r2". str r0,StrongBranch2 eor r0,r0,#&02042003 ; r0=&e2842001="add r2,r4,#1". str r0,StrongBranch1 .StrongARMed ;---------------------------------------------------------------------------- ; Switch to the good video mode, and some more stupid bits. swi 256+22 ; Vdu 22, set screenmode. swi 256+13 ; Switch to mode 13. swi OS_RemoveCursors ; Who needs them? adr r0,videoram_adress ; Get videoram adress. mov r1,r0 swi OS_ReadVduVariables #if zizik mov r0,#0 ; Load music from memory. adr r1,music ; Start of music. mov r2,r1 ; It is not packed. mov r3,#bss-music ; Length of dest adress. adr r4,bss+background ; Adress of a 32Kb buffer. mov r11,#0 ; DSym_Load=swi#0. bl player+&3C8 ; Jump into symphony's swi-jumptable. #endif ;---------------------------------------------------------------------------- ; Clear the bss section, since some routines need it. I assume that bss+end ; is longword aligned. (So you must take care when defining bss offsets) adr r0,bss+stack ; Begin to clear here. mov r1,#end-stack ; Nb of bytes to clear. mov r2,#0 .clear_bss subS r1,r1,#4 ; One long will be cleared. str r2,[r0,r1] bNE clear_bss ;---------------------------------------------------------------------------- ; Creates the inverses table. This routine has been previously released ; through the "Memory War" article. (October 95) .make_inverses adr r0,bss+inverses+4 ; Create table here. mov r1,#1 ; Used by the division routine. mov r2,#1 ; r2 is the current divisor. ._make_one_inverse mov r3,#1<<inv_N ; r3=dividend. mov r4,#0 ; r4 will contain the quotient. mov r5,#15 ; r5=Shift for the division. ._divide_one_step cmp r3,r2,lsl r5 ; dividend bigger than divisor<<r5? subGE r3,r3,r2,lsl r5 ; Yes, then dividend-=divisor<<r5, addGE r4,r4,r1,lsl r5 ; and add 1<<r5 to the quotient. subS r5,r5,#1 ; Next shift. bGE _divide_one_step cmp r2,r3,lsl #1 ; Flags=divisor-2*rest. addLE r4,r4,#1 ; Round to nearest integer. str r4,[r0],#4 ; Save dividend/divisor. add r2,r2,#1 ; One inverse calculated. cmp r2,#inv_nb ; It was the last one? bNE _make_one_inverse ;---------------------------------------------------------------------------- ; Creates the sinus table. As for the inverses creation, the routine has ; already been released through "Memory War". .make_sinus adr r0,bss+sinus ldr r1,sinA ; r1=sinA*2^28. ldr r2,cosA ; r2=cosA*2^28. mov r3,#0 ; r3=sin0*2^28. mov r4,#1<<28 ; r4=cos0*2^28. mov r5,#sin_nb+1 .make_one_sinus mov r6,r4,lsr #28-sin_N ; r6=cosN*2^sin_N. str r6,[r0,#sin_nb*2*4] ; Save sin(N+pi/2)=cosN. mov r6,r3,lsr #28-sin_N ; r6=sinN*2^sin_N. str r6,[r0],#4 ; Save sinN. umul64 r6,r7,r1,r3,r8,r9,r10 ; [r6|r7]=sinN*sinA. umul64 r8,r9,r2,r4,r10,r11,r12 ; [r8|r9]=cosN*cosA. sub64 r6,r7,r8,r9,r6,r7 ; [r6|r7]=cos(N+1)=cosN*sin1-sinN*sin1. umul64 r3,r8,r3,r2,r9,r10,r11 ; [r3|r8]=sinN*cosA. umul64 r4,r9,r4,r1,r10,r11,r12 ; [r4|r9]=cosN*sinA. add64 r3,r8,r3,r8,r4,r9 ; [r3|r8]=sin(N+1)=sinN*cos1+cosN*sin1. adjust64 r3,r3,r8,28 ; r1=sin(N+1)=sinN*cos1+cosN*sin1. adjust64 r4,r6,r7,28 ; r2=cos(N+1)=cosN*sin1-sinN*sin1. subS r5,r5,#1 ; One sinus processed. bNE make_one_sinus sub r0,r0,#4 ; Complete the table by stupid copy. mov r1,r0 ; Point on the position which are like add r2,r0,#sin_nb*8 ; (pi/4+k*(pi/2)) 0<=k<=4 mov r3,r2 add r4,r2,#sin_nb*8 mov r5,r4 add r6,r4,#sin_nb*8 mov r7,r6 add r8,r6,#sin_nb*8 mov r9,r8 mov r10,#sin_nb+1 ; Copy sin_nb+1 values. ._make_sinus_copy ldr r11,[r0],#-4 str r11,[r3],#4 ; sin(pi-X)=sinX. str r11,[r8],#-4 ; sin(2*pi+X)=sinX. rsb r11,r11,#0 str r11,[r4],#-4 ; sin(pi+X)=-sinX. str r11,[r7],#4 ; sin(2*pi-X)=-sinX. ldr r11,[r2],#-4 str r11,[r1],#4 ; cos(-X)=cosX. subS r10,r10,#1 ; One value copied. strNE r11,[r9],#4 ; cos(2*pi+X)=cosX. No copy if r10=0. rsb r11,r11,#0 str r11,[r5],#4 ; cos(pi-X)=-cosX. str r11,[r6],#-4 ; cos(pi+X)=-cosX. bNE _make_sinus_copy ;---------------------------------------------------------------------------- ; Creates the background texture. The main idea is to make a N*N fractal ; landscape, take the modulo of it, smooth, emboss and re-smooth it. The ; emboss and smoothing routines where already released in "Graphics War". .make_fracland adr r0,bss+background ldr r1,random_germ ; Load the random germ. mov r2,#1 ; This will be used by routine. strB r2,[r0] ; Also save 1 as upper left pixel. mov r3,#bg_M ; Iteration=bg_M. mov r4,#0 ; Position of upper left pixel. mov r5,#0 bl recursive_landscape ldr r1,videoram_adress ; Adress of a second buffer. add r1,r1,#320*256 bl smooth_texture ; Smooth texture. .emboss_texture mov r2,#0 ; r2=y counter<<(32-bg_M). ._emboss_one_line sub r3,r2,#1<<(32-bg_M) ; r3=(y-1) mod bg_N <<(32-bg_M). (Wrapping) add r4,r3,#1<<(32-bg_M) ; r4=(y+1) mod bg_N <<(32-bg_M). (Wrapping) add r3,r1,r3,lsr #(32-2*bg_M); r3 points on src_line up. add r4,r1,r4,lsr #(32-2*bg_M); r4 points on src_line down. mov r5,#bg_N ; r5=nb of pixels per line. ._emboss_one ldrB r14,[r3],#1 ; r14=pixie up. ldrB r6,[r4],#1 ; r6=pixie down. and r14,r14,#&1f ; Take the modulo(32). and r6,r6,#&1f sub r6,r6,r14 ; r6=delta. addS r6,r6,#bg_middle ; Add the middle constant. movMI r6,#0 ; Make sure intensity is between 0-31. cmp r6,#31 rsbGE r6,r6,#63 strB r6,[r0],#1 ; Save it. subS r5,r5,#1 ; One pixel done bNE _emboss_one addS r2,r2,#1<<(32-bg_M) ; Line done. bNE _emboss_one_line sub r0,r0,#bg_N*bg_N ; r0 points back on buffer. bl smooth_texture ; Smooth texture. swap32 r0,r1 bl smooth_texture swap32 r0,r1 bl smooth_texture ; Now convert the texture to 320*256 with bg_colors. adr r2,bg_colors mov r3,#bg_N ; y counter. ._bg_convert_line mov r4,#bg_N ; x counter. ._bg_convert_one ldrB r5,[r1],#1 ; Load pixel. ldrB r5,[r2,r5] ; Load color. cmp r4,#bg_N-(320-bg_N) ; If x<2*bg_N-320, then strGTB r5,[r0,#bg_N] ; copy pixel at r0+bg_N. strB r5,[r0],#1 ; Draw pixel. subS r4,r4,#1 ; One pixel done. bNE _bg_convert_one add r0,r0,#320-bg_N ; Next line. subS r3,r3,#1 ; One line done. bNE _bg_convert_line ; Copy the arm's tech logo on background. adr r0,bss+background+10*320+64 ; Adress where to copy logo. adr r1,logo191_24 mov r2,#24 ; Nb of lines to copy. ._copy_logo_line mov r3,#191 ; Nb of bytes to copy. ._copy_logo_byte ldrB r4,[r1],#1 ; Load one byte. cmp r4,#0 ; Transparent pixel? strNEB r4,[r0,r3] ; No, then copy it. subS r3,r3,#1 ; One byte drawn. bNE _copy_logo_byte add r0,r0,#320 ; Next line. subS r2,r2,#1 bNE _copy_logo_line ;---------------------------------------------------------------------------- ; Clear all screen banks. ldr r0,videoram_adress ; Set parameters for the FastBox mov r1,#0 ; routine, so that it clears the mov r2,#0 ; whole screen. mov r3,#0 mov r4,#319 mov r5,#255 .clear_one_screen_mov mov r6,#0 ; r6=ScreenNb. .clear_one_screen bl FastBox256 add r0,r0,#320*256 ; Next screen bank. subS r6,r6,#1 ; One screen copies. bNE clear_one_screen ;---------------------------------------------------------------------------- ; Enables our Vertical Blanking (VBl) interrupt and change exit handler. mov r0,#&10 ; Claim event vector. (&10) adr r1,vbl_routine ; Adress of claiming routine. adr r2,workscr_nb ; Value to pass in r12 when rout is called. swi OS_Claim mov r0,#&e ; Enable an event. mov r1,#4 ; Event 4=VBl. swi OS_Byte mov r0,#11 ; Assign exit handler. adr r1,proggy_exit ; This is the terminal routine. mov r2,r13 ; Will be in r12 when OS_Exit called. swi OS_ChangeEnvironment stmfd r13!,{r1-r3} ; Save old environment. ;############################################################################ ;##### CRASHING LOGO PART ##### ;############################################################################ ; At first we create the movements for the crashing 'pixels' of the logo. adr r0,bss+misc ; Store calculi here. adr r1,bss+inverses ldr r2,random_germ adr r3,logo_1bpp ; Here is the logo. mov r4,#1 ; Counter for 1bpp logo longs. mov r5,#-8<<9 ; r5=y_counter*4*256. .calc_crash_y mov r6,#-35<<9 ; r6=x_counter*4*256. mul r7,r5,r5 ; r7=y_counter^2*(4*256)^2. .calc_crash_x add r8,r6,#158<<8 ; Recenter. add r9,r5,#124<<8 mla r10,r6,r6,r7 ; r10=(x^2+y^2)*(4*256)^2. sqrt32 r11,r10,r12 ; r11=sqrt(x^2+y^2)*4*256=dist*4*256. mov r11,r11,lsr #6 ldr r12,[r1,r11,lsl #2] ; r12=inv_N/(dist*4*16). mul r10,r12,r6 ; r10=x*4*256*inv_N/(dist*4*16). mul r11,r12,r5 ; r11=x*4*256*inv_N/(dist*4*16). mov r10,r10,asr #inv_N-2 ; r10=256*x/dist. add r10,r10,r2,asr #32-6 ; r10=256*x/dist+rnd(64)=vx. random32 r2 mov r11,r11,asr #inv_N-2 ; r11=256*y/dist. add r11,r11,r2,asr #32-7 ; r11=256*y/dist+rnd(128)=vy. random32 r2 subS r4,r4,#1 ; All pixels in long seen? ldrEQ r14,[r3],#4 ; Then load a new logo long, movEQ r4,#32 ; and reinit counter. addS r14,r14,r14 ; Put pixel in carry. movCS r12,#0 ; Set color according to pixel. movCC r12,#-1 stmia r0!,{r8-r12} ; Save x,y,vx,vy,color. add r6,r6,#2<<9 ; Next x. cmp r6,#35<<9 bLE calc_crash_x add r5,r5,#2<<9 ; Next y. cmp r5,#8<<9 bLE calc_crash_y ; Logo is falling on the floor. .falling_logo bl get_workscr_adr ; r0=workscr_adr | r1=clsbox adress. ldmia r1,{r2-r5} ; Load xleft,yup,xright,ydown. sub r2,r2,#1 ; Always better to clear a bigger box. sub r3,r3,#1 mov r1,r0 adr r0,bss+background bl CopyBigBox256 ; Go for clearing. ldr r5,fall_height ; r5=old height. cmp r5,#512 ; Is it last one? bEQ crashing_logo ; In such case this part is over. add r5,r5,#4 ; r5=new height. str r5,fall_height ; Save it. adr r6,bss+inverses ldr r6,[r6,r5,lsl #1] ; r6=2^inv_N/(height/2). rsb r5,r5,#512 ; r5=512-height=shadow position. adr r7,logo_definition ldmia r7!,{r8-r11} ; Load x1,y1,x2,y2. mul r8,r6,r8 ; Multiply x1 by zoom coef. mov r8,r8,asr #inv_N-10 ; ..re-scale.. add r8,r8,#160 ; ..recenter.. mul r9,r6,r9 ; Do the same for y1. mov r9,r9,asr #inv_N-10 add r9,r9,#128 mul r10,r6,r10 ; .. for x2.. mov r10,r10,asr #inv_N-10 add r10,r10,#159 mul r11,r6,r11 ; .. and for y2.. mov r11,r11,asr #inv_N-10 add r11,r11,#127 bl get_workscr_adr ; r0=workscr_adr | r1=clsbox adress. stmia r1!,{r8,r9} ; Save minimum x,y. add r2,r8,r5 ; r2=x1 of shadow. add r3,r9,r5 ; r3=y1. add r4,r10,r5 ; r4=x2. add r5,r11,r5 ; r5=y2. stmia r1!,{r4,r5} ; Save maximum x,y. ldr r1,[r7],#4 ; Load shadow color. bl FastBox256 ; Draw shadow. mov r2,r8 ; r2=x1 of logo box. mov r3,r9 ; r3=y1. mov r4,r10 ; r4=x2. mov r5,r11 ; r5=y2. mov r1,#-1 ; White color. bl FastBox256 ; Draw logo box. ldr r8,[r7],#4 ; Load nb of boxes. mov r1,#0 ; Black color for all other boxes. mov r9,#160<<(inv_N-10) ; Values used for boxes recentering. mov r10,#128<<(inv_N-10) mov r11,#159<<(inv_N-10) mov r12,#127<<(inv_N-10) ._one_box ldmia r7!,{r2-r5} ; Load x1,y1,x2,y2. mla r2,r6,r2,r9 ; Multiply x1 by zoom coef and recenter. mov r2,r2,asr #inv_N-10 ; ..re-scale.. mla r3,r6,r3,r10 ; Do the same for y1. mov r3,r3,asr #inv_N-10 mla r4,r6,r4,r11 ; .. for x2.. mov r4,r4,asr #inv_N-10 mla r5,r6,r5,r12 ; .. and for y2.. mov r5,r5,asr #inv_N-10 bl FastBox256 subS r8,r8,#1 ; One box drawn. bNE _one_box adr r14,falling_logo ; Return adress is this one. b vsync_routine ; Wait until next workscr is ready. ; The logo eventually hits the floor, and breaks into several parts. ; At the same time, the music begins. .crashing_logo #if zizik mov r11,#1 ; DSym_RestartSong=swi#1. bl player+&3C8 ; Jump into symphony's swi-jumptable. #endif .logo_breaks bl get_workscr_adr ; r0=workscr_adr | r1=clsbox adress. ldmia r1,{r2-r5} ; Load xleft,yup,xright,ydown. sub r2,r2,#1 ; Always better to clear a bigger box. sub r3,r3,#1 mov r1,r0 adr r0,bss+background bl CopyBigBox256 ; Go for clearing. bl get_workscr_adr ; r0=workscr_adr | r1=clsbox adress. adr r2,bss+misc ; Here are points coords. mov r3,#36*9 ; Nb of blocks. mov r4,#320 ; r4=xmin. mov r5,#256 ; r5=ymin. mov r6,#0 ; r6=xmax. mov r7,#0 ; r7=ymax. ._one_block ldmia r2,{r8-r12} ; Load x,y,vx,vy,color. add r8,r8,r10 ; x+=vx. add r9,r9,r11 ; y+=vy. cmp r4,r8,asr #8 ; Flags=xmin-x. movPL r4,r8,asr #8 cmp r5,r9,asr #8 ; Flags=ymin-y. movPL r5,r9,asr #8 cmp r6,r8,asr #8 ; Flags=xmax-x. movMI r6,r8,asr #8 cmp r7,r9,asr #8 ; Flags=ymax-y. movMI r7,r9,asr #8 cmp r10,#0 ; Check sign of vx. bGE _vxpos addS r10,r10,#4 ; If vx<0, then substract 4 to it, movPL r10,#0 ; and stop when vx>0. b _vxok ._vxpos subS r10,r10,#4 ; If vx>0, then add 4 to it, movMI r10,#0 ; and stop when vx<0. ._vxok cmp r11,#0 ; Same kind of things with vy. bGE _vypos addS r11,r11,#5 movPL r11,#0 b _vyok ._vypos subS r11,r11,#5 movMI r11,#0 ._vyok stmia r2!,{r8-r12} ; Save with new values. mov r9,r9,asr #8 ; r9=int(pos_y). add r9,r9,r9,lsl #2 add r8,r8,r9,lsl #6+8 ; r8=256*(x+320*int(y)). add r8,r0,r8,asr #8 strB r12,[r8]:strB r12,[r8,#1]:strB r12,[r8,#2]:strB r12,[r8,#3] strB r12,[r8,#320]:strB r12,[r8,#321]:strB r12,[r8,#322]:strB r12,[r8,#323] strB r12,[r8,#640]:strB r12,[r8,#641]:strB r12,[r8,#642]:strB r12,[r8,#643] strB r12,[r8,#960]:strB r12,[r8,#961]:strB r12,[r8,#962]:strB r12,[r8,#963] subS r3,r3,#1 bNE _one_block add r6,r6,#3 ; Add size of block to xmax, ymax. add r7,r7,#3 stmia r1,{r4-r7} ; Save cls_box coords. bl vsync_routine ; Wait until next workscr is ready. #if zizik mov r0,#-1 ; Read pattern pos. mov r1,#-1 ; Read pos in pattern. mov r11,#12 ; DSym_SongPos=swi#12. bl player+&3C8 ; Jump into symphony's swi-jumptable. cmp r0,#2 ; Reached pattern 2? bNE logo_breaks ; No, then continue. #else ldrB r12,crash_counter subS r12,r12,#1 strB r12,crash_counter bNE logo_breaks #endif ;############################################################################ ;##### SENTINEL LIKE PART ##### ;############################################################################ .sentinel_one_frame bl get_workscr_adr ; r0=workscr_adr | r1=clsbox adress. mov r1,#&0a ; r1=sky color. add r1,r1,r1,lsl #8 add r1,r1,r1,lsl #16 mov r2,#(320-hclip)/2 ; Clear 3d screen. mov r3,#256-8-vclip mov r4,#(320-hclip)/2+hclip-1 mov r5,#256-8-vclip+vclip-1 bl FastBox256 adr r0,bss+static_sort ; Clear the static sort table. mov r1,#1<<30 ; Fill it with end marker. mov r2,#0 ; 320*64/4=5120 longwords. mov r3,#0 mov r4,#319 mov r5,#63 bl FastBox256 ; It was not is primary purpose, wow! ; Perform movements of viewer. For each mvt we have a control long containing ; nb_steps*2 and bit0 contains 0 for a linear head mvt, and 1 for a jump. ; (Performed with cubic splines) In first case, control long is followed by ; add_ax&add_ay, else by a0x,a2x,a3x,a0y,a1y,a2y,a3y,a0z a2z,a3z,a,b,c which ; are the coefficients for the movements splines. (a1x,a1z are null) adr r0,viewer_angles ; Points on ax,ay. ldr r1,mvt_counter ; Load mvt_counter. add r1,r1,#1 ; Increment it. ldr r2,mvt_pointer ; r2 points on current mvt in mvt_table. ldr r3,[r2],#4 ; r3=control for this mvt. cmp r1,r3,lsr #1 ; Flags=mvt_counter-mvt_length. bMI no_mvt_change ; If mvt_counter<mvt_length, no change. add r2,r2,#2*4 ; Head mvts, so pass 2 longs. movS r3,r3,asr #1 ; Carry=bit 0 of control. addCS r2,r2,#11*4 ; Mvt was a jump so pass more longs. .mvt_change_next str r2,mvt_pointer ; Save new mvt pointer. ldr r3,[r2],#4 ; Load new control byte. cmp r3,#0 ; Flags=control-0. adrMI r2,mvts_table ; If control=-1, reinit mvts and bMI mvt_change_next ; re-load control byte. mov r1,#0 ; Reinit mvt_counter. .no_mvt_change str r1,mvt_counter ; Save modified mvt_counter. movS r3,r3,lsr #1 ; Carry=bit 0 of control. bCS jumping_mvt ldmia r2,{r3-r4} ; Head_mvt, load add_ax,add_ay. ldmia r0,{r5-r6} ; Load ax,ay. add r5,r5,r3 ; ax+=add_ax. add r6,r6,r4 ; ay+=add_ay. stmia r0,{r5-r6} ; Save new ones. b mvts_done .jumping_mvt ; Fixed point shift is indicated in parenthesis. mul r3,r1,r1 ; r3=mvt_counter^2. (14) mul r4,r3,r1 ; r4=mvt_counter^3. (21) mov r1,r1,lsl #4 ; r1=mvt_counter. (11) mov r3,r3,lsr #3 ; r3=mvt_counter^2. (11) mov r4,r4,lsr #7+3 ; r4=mvt_counter^3. (11) ldmia r2!,{r5-r11} ; Load a0x,...,a3y. (18,7,7,18,7,7,7) mla r5,r3,r6,r5 ; r5+=a2x*t^2. (18) mla r5,r4,r7,r5 ; r5+=a3x*t^3. (18) mla r8,r1,r9,r8 ; r8=a1y*t+a0y. (18) mla r8,r3,r10,r8 ; r8+=a2y*t^2. (18) mla r8,r4,r11,r8 ; r8+=a3y*t^3. (18) mov r8,r8,asr #18 ; r8=new_y. mov r9,r5,asr #18 ; r9=new_x. ldmia r2!,{r5-r7} ; Load a0z,a2z,a3z. (18,7,7) mla r5,r3,r6,r5 ; r5+=a2z*t^2. (18) mla r5,r4,r7,r5 ; r5+=a3z*t^3. (18) mov r5,r5,asr #18 ; r5=new_z. mul r10,r5,r9 ; r10=x*z. add r9,r5,r9,lsl #16 ; r9=x|z. adr r5,viewer_pos+4 stmia r5,{r8-r10} ; Save y, x|z, x*z. ldmia r2!,{r5-r7} ; Load a,b,c. (18,7,7) mla r5,r1,r6,r5 ; r5=a+b*t. (18) mla r5,r3,r7,r5 ; r5+=c*t^2. (18) mov r5,r5,lsl #32-sin_pow-18 ; r5=ax. (32-sin_pow) str r5,[r0] ; Save it. ldr r6,[r0,#4] ; Load ay. (32-sin_pow) ; Here we have r5=ax, r6=ay. Calculate the rotation matrix and rotate points ; for all the objects which will be at a given position on the checkerboard. .mvts_done str r13,old_stack ; What about an Arm with 32 registers? mov r1,r6,lsr #32-sin_pow ; r1=ay. (Frac part removed) mov r1,r1,lsl #32-sin_pow ; Back in upper bits. (easier modulo) mov r4,r5,lsr #32-sin_pow ; r4=ax. (idem) mov r4,r4,lsl #32-sin_pow adr r0,bss+sinus yx_matrix r0,r1,r6,r2,r4,r7,r3,r5,r8 ; r1=A,r2=D,r3=G,r4=E,r5=H,r6=C,r7=F,r8=I adr r0,bss+inverses-4*4 ; We' ll use this buffer. stmia r0,{r6-r8} ; Save C,F,I. str r1,[r0,#12] ; Save A. mul r7,r2,r7 ; r7=D*F. rsb r7,r7,#0 ; r7=-D*F. mul r8,r3,r8 ; r8=G*I. rsb r8,r8,#0 ; r8=-G*I. stmdb r0,{r4,r5,r7-r8} ; Save E,H,-D*F,-G*I. mul r1,r6,r1 ; r1=A*C. rsb r1,r1,#0 ; r1=-A*C. adr r4,objects_pts_src ; Adress of source coords, adr r5,objects_pts_dest ; and of destination. .rotate_one_y_set ldmia r4!,{r6,r7} ; r6=nb pts with this y | r7=y. cmp r6,#0 ; No more points? bEQ _rotate_end ldmdb r0,{r8-r11} ; Load E,H,-D*F,-G*I. mla r8,r7,r8,r10 ; r8=-D*F+E*y. mla r7,r9,r7,r11 ; r7=-G*I+H*y. ._rotate_one_point ldmia r4!,{r9,r10} ; r9=[x|z] | r10=x*z. ldmia r0,{r11-r14} ; Load C,F,I,A. add r11,r11,r9,asr #16 ; r11=C+x. add r12,r12,r9,asr #16 ; r12=F+x. add r13,r13,r9,asr #16 ; r13=I+x. mov r9,r9,lsl #16 ; r9=z<<16. add r14,r14,r9,asr #16 ; r14=A+z. mla r11,r14,r11,r1 ; r11=(A+z)*(C+x)-A*C. sub r11,r11,r10 ; r11=x'=A*x+C*z. mov r11,r11,asr #sin_N add r14,r2,r9,asr #16 ; r14=D+z. mla r12,r14,r12,r8 ; r12=(D+z)*(F+x)-D*F+E*y. sub r12,r12,r10 ; r12=y'=D*x+E*y+F*z. mov r12,r12,asr #sin_N add r14,r3,r9,asr #16 ; r14=G+z. mla r13,r14,r13,r7 ; r13=(G+z)*(I+x)-G*I+H*y. sub r13,r13,r10 ; r13=z'=G*x+H*y+I*z. mov r13,r13,asr #sin_N stmia r5!,{r11-r13} ; Save x',y',z'. subS r6,r6,#1 ; One point rotated. bNE _rotate_one_point b rotate_one_y_set ; Next y set. ._rotate_end ; Here r11-r13=coord of checkerboard extremity, because last object was the ; viewer' s position. We now compute and store all possible heights for ; checkerboard points, and we also fill the extra storage buffer with: ; wx=C*3,wy=F*3,wz=I*3,?,?,?,?,@checker_height,ux=A*3,uy=D*3,uz=G*3 ; (Note that all values A-I are fixed point real with shift=sin_N.) mov r11,r11,lsl #sin_N-6 ; Convert current coord to fixed point. mov r12,r12,lsl #sin_N-6 mov r13,r13,lsl #sin_N-6 adr r1,checker_heights ; Adress of precalculated heights buffer. ldmia r0,{r4-r7} ; Load C,F,I,A. add r4,r4,r4,lsl #1 ; r4=C*3. add r5,r5,r5,lsl #1 ; r5=F*3. add r6,r6,r6,lsl #1 ; r6=I*3. add r7,r7,r7,lsl #1 ; r7=A*3. stmia r0!,{r1,r7} ; Save @checker_heights and A*3. add r2,r2,r2,lsl #1 ; r2=D*3. add r3,r3,r3,lsl #1 ; r3=G*3. stmia r0!,{r2-r3} ; Save D*3,G*3 & r0 points on inverses. sub r2,r0,#6*4 ldmdb r2!,{r7-r8} ; Load E,H. add r7,r7,r7,lsl #1 add r8,r8,r8,lsl #1 stmdb r2,{r4-r6} ; Save C*3, F*3, I*3. mov r4,#0 ; First height is null. mov r5,#0 stmia r1!,{r4-r5} ; Save first height. mov r6,#max_checker_height ; Nb of height to compute. ._one_checker_height sub r4,r4,r7 ; r4-=E. sub r5,r5,r8 ; r5-=H. stmia r1!,{r4-r5} ; Save heights. subS r6,r6,#1 bNE _one_checker_height ; Here' s the last part related to vectorworld rotations and translations. ; Main idea is to go through a grid (with v=0) and for each point of this ; board adding its height by using the checker_heights table. Also we can ; have an object on this square, and in such case we translate the coords of ; object at origin (computed before) by simply adding the grid point coords. adr r1,heights&objects_nb adr r2,bss+screen_xy ; Will contain x&y screen coords. adr r3,bss+space_z ; Will contain z spatial coord. mov r7,#17 ; w_counter. sub r0,r0,#16 ; r0 points on the 4 empty longs. ._checker_w_loop stmdb r0,{r7,r11-r13} ; Save w_counter and current row. mov r4,#17 ; u_counter. ldr r5,[r0],#16 ; r5=@checker_heights & restore r0. ._checker_u_loop ldrB r6,[r1],#1 ; r6=height | object_nb. and r7,r6,#&f0 ; r7=height<<4. add r7,r5,r7,lsr #1 ; r7 points on good height vector. ldmia r7,{r7-r8} ; r7=vy | r8=vz. add r7,r12,r7,asr #1 ; r7=current_y+vy*96. add r8,r13,r8,asr #1 ; r8=current_z+vz*96. andS r6,r6,#&f ; r6=object_nb. bEQ _no_object_here sub r5,r5,r6,lsl #3 ; r5 points on object block. ldmia r5,{r5-r6} ; r5=@objects_coords | r6=nb_pts. ._object_one_point ldmia r5!,{r9-r10,r14} ; Load coords of point. add r9,r9,r11,asr #sin_N-6 ; r9=x=int(object_x+current_x). add r10,r10,r7,asr #sin_N-6 ; r10=y=int(object_y+current_y+vy). add r14,r14,r8,asr #sin_N-6 ; r14=z=int(object_z+current_z+vz). addS r14,r14,#256 ; z+=dist. str r14,[r3],#4 ; Save it in space_z. bLE _object_point_next ; We won' t need x'&y' if z+dist<=0. ldr r14,[r0,r14,lsl #2] ; r14=inv_N/(z+dist). mul r9,r14,r9 ; r9=x*inv_N/(z+dist). mov r9,r9,asr #inv_N-8 ; r9=x'=x*dist/(z+dist). add r9,r9,#hclip/2 ; Recenter. mul r10,r14,r10 ; r10=y*inv_N/(z+dist). mov r10,r10,asr #inv_N-8 ; r10=y'=y*dist/(z+dist). add r10,r10,#vclip/2 ; Recenter. stmia r2,{r9-r10} ; Save screen x&y. ._object_point_next add r2,r2,#8 ; Next position for screen_xy. subS r6,r6,#1 ; One point performed. bNE _object_one_point ._no_object_here mov r8,r8,asr #sin_N-6 ; r8=int(z). addS r8,r8,#256 ; checker_z+=dist. str r8,[r3],#4 ; Save it in space_z. bLE _next_u ; We won' t need x'&y' if z+dist<=0. ldr r8,[r0,r8,lsl #2] ; r8=inv_N/(z+dist). mov r6,r11,asr #sin_N-6 ; r6=int(x). mul r6,r8,r6 ; r6=checker_x*inv_N/(z+dist). mov r6,r6,asr #inv_N-8 ; r6=x'=checker_x*dist/(z+dist). add r6,r6,#hclip/2 ; Recenter. mov r7,r7,asr #sin_N-6 ; r7=int(y). mul r7,r8,r7 ; r7=checker_y*inv_N/(z+dist). mov r7,r7,asr #inv_N-8 ; r7=y'=checker_y*dist/(z+dist). add r7,r7,#vclip/2 ; Recenter. stmia r2,{r6-r7} ; Save screen x&y. ._next_u add r2,r2,#8 ; Next position for screen_xy. ldmdb r0,{r5-r8} ; Load @checker_heights, ux, uy, uz. add r11,r11,r6 ; current_x+=ux*192. add r12,r12,r7 ; current_y+=uy*192. add r13,r13,r8 ; current_z+=uz*192. subS r4,r4,#1 ; One checker point done. bNE _checker_u_loop sub r0,r0,#16 ; r0 points on middle of storage. ldmdb r0,{r4-r7,r11-r13} ; Load w(x,y,z);w_counter;row(x,y,z). add r11,r11,r4 ; row_x+=wx*192. add r12,r12,r5 ; row_y+=wy*192. add r13,r13,r6 ; row_z+=wz*192. subS r7,r7,#1 ; One checker row done. bNE _checker_w_loop ; Eliminate backfaces (z coord of vectorial product of two edges is <0) and ; invisible polygons, (one z is <=0) then sort visible polys according to ; (zmax+sum(z)/nb_points)/2. (sorting with only zmax or sum(z)/nb_points is ; not as good, so..) The method used to sort the polygons is one I invented ; some years ago when doing vectorballs on my STe. (In the demo "CakeHead", ; yup the first one). I' ve now greatly improved it, and also discovered ; other fellows invented the same method which is called "Counting Sort". ; But enough bullshits, here are the explanations... ; I suppose all the polygons have a z-coord between zmin and zmax, (not ; very restrictive for 3d) and we have a table with one pointer for each z, ; called the static table, which is initialised with an end marker. We also ; have another table, called the dynamic one, which will contain couples of ; pointers to a polygon definition and to the next couple with a polygon ; having the same z-coord. For each visible polygon, we calculate its z-coord ; we save adress of the polygon definition & static(z) in the dynamic table, ; and we save the dynamic adress of the couple in static(z). I think you got ; the point, for each z-coord we have a chained list containing adresses ; of the polygons. Then, using the sorted datas won' t be very hard, since ; we' ll only need to go through a list until we reach the end-marker. In ; case you don' t know what a chained list is, I recommend you get a look ; in computer science books since it is a classical datatype. .faces_sorting adr r0,bss+inverses adr r2,bss+space_z adr r3,faces_definition adr r4,bss+dynamic_sort adr r5,bss+static_sort adr r6,bss+screen_xy ldmia r3!,{r1,r7-r9} ; Load nb_points-2 and 3 first points. ; I assume there is at least one valid polygon, (what would be the need of ; sorting, else?) so I don' t check here if nb_points-3 is positive. ._sort_one_face ldr r10,[r2,r7,lsl #2] ; r10=z1. cmp r10,#0 ; Is z1<=0? ldrGT r11,[r2,r8,lsl #2] ; r11=z2. cmpGT r11,#0 ; ..or z2<=0? ldrGT r12,[r2,r9,lsl #2] ; r12=z3. cmpGT r12,#0 ; ..or z3<=0? bLE _sort_invisible ; Then face is invisible. mov r13,r10,lsr #1 ; r13 will contain zmax/2. cmp r13,r11,lsr #1 ; Flags=zmax/2-z2/2. movMI r13,r11,lsr #1 ; If z2/2>zmax/2, then it' s new zmax/2. cmp r13,r12,lsr #1 ; Idem with z3. movMI r13,r12,lsr #1 add r10,r10,r11 ; r10=z1+z2. add r10,r10,r12 ; r10=z1+z2+z3. add r7,r6,r7,lsl #3 ; r7=@screen_xy(pt0) ldmia r7,{r7,r11} ; r7=x0 | r11=y0. add r8,r6,r8,lsl #3 ; r8=@screen_xy(pt1) ldmia r8,{r8,r12} ; r8=x1 | r12=y1. sub r7,r7,r8 ; r7=x0-x1. sub r11,r12,r11 ; r11=y0-y1. add r9,r6,r9,lsl #3 ; r9=@screen_xy(pt2) ldmia r9,{r9,r14} ; r9=x2 | r14=y2. sub r9,r9,r8 ; r9=x2-x1. mul r9,r11,r9 ; r9=(x2-x1)*(y0-y1). sub r14,r14,r12 ; r14=y2-y1. mlaS r7,r14,r7,r9 ; r7=(x0-x1)*(y2-y1)+(x2-x1)*(y0-y1). bMI _sort_invisible ; Result<0, then it is a backface. mov r7,r3 ; r7=r3=face_definition+16. subS r8,r1,#2 ; r8=nb_points-4. bMI _sort_compute_z ; No more if nb_points=3. ._sort_one_point ldr r9,[r7],#4 ; Load point nb. ldr r9,[r2,r9,lsl #2] ; r9=z of point. cmp r9,#0 ; z<0? bLE _sort_invisible ; Then face is invisible. cmp r13,r9,lsr #1 ; Flags=zmax/2-z/2. movMI r13,r9,lsr #1 ; If z/2>zmax/2, then it' s new zmax/2. add r10,r10,r9 ; r10+=z. subS r8,r8,#1 ; One point seen. bGE _sort_one_point ._sort_compute_z add r7,r1,#2 ; r7=nb_points. ldr r7,[r0,r7,lsl #2] ; r7=2^inv_N/nb_points. mul r10,r7,r10 ; r10=sum(z)*2^inv_N/nb_points. mov r7,r10,lsr #inv_N ; r7=sum(z)/nb_points. add r7,r13,r7,lsr #1 ; r7=(zmax+sum(z)/nb_points)/2. ldr r8,[r5,r7,lsl #2] ; Load static(z)=@previous_dynamic. str r4,[r5,r7,lsl #2] ; static(z)=@this_dynamic. stmia r4!,{r3,r8} ; Save face_adress & static(z). ._sort_invisible add r3,r3,r1,lsl #2 ; Pass nb_points-3*points and color. ldmia r3!,{r1,r7-r9} ; Load nb_points-2 and 3 first points. subS r10,r1,#1 ; r10=nb_points-3. bGE _sort_one_face ; No, then this is a valid polygon. ldr r13,old_stack ; Pfiuu, he' s back. ; Last, but not least we have to draw the polygons on screen. As stated ; above, we go through the chained lists corresponding to the z-coords of ; faces. The important part for drawing is the polygon routine. Here we ; have r5=bss+static_sort and r6=bss+screen_xy. bl get_workscr_adr ; r0=videoram adress. add r0,r0,#320*48+8 adr r2,bss+extra ; r2 points on extra+inverses. adr r3,poly_coords ; Save coords here. mov r7,#inv_nb-1 ; r7=z counter. ._draw_one_z ldr r9,[r5,r7,lsl #2] ; Load static(r7). cmp r9,#1<<30 ; static(r7)=end marker? subGES r7,r7,#1 ; Then next z. bGT _draw_one_z ; And continue if z>0. cmp r7,#0 ; z=0? bLE _drawing_end ; Then we have finished to draw. ._one_face_z ldmia r9,{r8,r9} ; r8=polygon adress | r9=@next. mov r1,r3 ; Don' t modify r3. ldmdb r8,{r10-r12,r14} ; Load nb_points-2 & 3 first points. add r4,r6,r11,lsl #3 ; r4=@screen_xy(pt0). ldmia r4,{r4,r11} ; r4=x0 | r11=y0. stmia r1!,{r4,r11} ; Save them in coords table. add r4,r6,r12,lsl #3 ; r4=@screen_xy(pt1). ldmia r4,{r4,r11} ; r4=x1 | r11=y1. add r12,r6,r14,lsl #3 ; r12=@screen_xy(pt2). ldmia r12,{r12,r14} ; r12=x2 | r14=y2. stmia r1!,{r4,r11-r12,r14} ; Save them. add r4,r10,#2 ; r4=nb_points. subS r10,r10,#2 ; r10=nb_points-4. bMI _draw_face ; No more points if nb_points=3. ._copy_face_one ldr r11,[r8],#4 ; Load point nb. add r11,r6,r11,lsl #3 ; r11=@screen_xy(pt). ldmia r11,{r11,r12} ; r11=x | r12=y. stmia r1!,{r11,r12} ; And copy them. subS r10,r10,#1 ; One point left? bGE _copy_face_one ._draw_face ldr r1,[r8],#4 ; Load face color. bl FastPoly256 ; And draw the polygon. cmp r9,#1<<30 ; @next=end marker? bNE _one_face_z ; Else we have continue in this list. subS r7,r7,#1 ; No more face with this z, so next z. b _draw_one_z ._drawing_end bl vsync_routine ; Wait until next workscr is ready. swi OS_ReadEscapeState ; Escape key pressed? bCC sentinel_one_frame ; No, then loop. swi OS_Exit ;**************************************************************************** ;**************************************************************************** ;***** ***** ;***** MAIN DATAS ***** ;***** ***** ;**************************************************************************** ;**************************************************************************** .old_stack ; 15 registers are not enough. .random_germ dcd &eb1a2c34 ; The magical random number. .old_scrblank dcd 0 .bg_colors dcb &08,&09,&0a,&0b,&a4,&a5,&a6,&a7,&d8,&d9,&da,&db ; Blue. dcb &db,&da,&d9,&d8,&a7,&a6,&a5,&a4,&0b,&0a,&09,&08 .sinA dcd 1647089 ; sin((pi/4)/sin_nb)*2^28. .cosA dcd 268430403 ; cos((pi/4)/sin_nb)*2^28. .crash_counter ; Contains .logo_1bpp dcd %00000000000000000000000000000000,%00000000001100000000000000000000 dcd %00000110000000110000000000000000,%00000000011000111011001111100111 dcd %11100110111001100110011101100011,%01101011011100110110011000110110 dcd %00110110101101100011000001100011,%01100111011010110110001101100011 dcd %11100011101101100011011000110110,%00000000000000000000000000000000 dcd %00000000000000000000000000000000 .fall_height dcd 0 .logo_definition dcd -18,-5,18,4 ; Coords of 1st box. dcd &0a0a0a0a ; Color of shadow box. dcd 20 ; Nb of boxes dcd -17,-1,-15,2, -16,2,-11,3, -13,-1,-12,0, -12,-4,-10,2, -16,-2,-13,-1 dcd -9,-1,-7,2, -8,2,-5,3, -5,1,-4,2, -4,-1,-2,3, -8,-2,-3,-1 dcd -1,-2,1,3, 1,-2,5,-1, 4,-1,6,3, 2,-1,3,2 dcd 7,-2,9,3, 9,-1,10,0, 10,-2,13,-1, 12,-1,14,3 dcd 15,-4,17,0, 15,1,17,3 ; .................................... ; ......XX.........................XX. ; ......XX.........................XX. ; ..XXX.XX..XXXXX..XXXXXX..XX.XXX..XX. ; .XX..XXX.XX...XX.XX.X.XX.XXX..XX.XX. ; .XX...XX.XX...XX.XX.X.XX.XX...XX.... ; .XX...XX.XX..XXX.XX.X.XX.XX...XX.XX. ; ..XXXXX...XXX.XX.XX...XX.XX...XX.XX. ; .................................... .viewer_angles dcd &20000000,&80000000 ; ax,ay (<<32-sin_pow). .mvt_counter dcd -1 ; Counter for current mvt. .mvt_pointer dcd mvts_table ; Points on current mvt. ; Table containing movements datas, in the following way. ; 1 long = nb_steps*2 + bit0 ; If bit0=0, then we have add_ax, add_ay. (Head mvt) ; If bit0=1, we have a0x,a2x,...,a2z,a3z,a,b,c. (Splines coefs for jump) .mvts_table incbin "mvts" dcd -1 ; End marker. ; Coords for objects' points. They are arranged in the following way: ; 1 long = nb of points with this y. ; 1 long = y position. ; nb_pts * 1 long = [x|z] (16 bits each) ; 1 long = x*z. ; Last point is the checkerboard extremity. (relative position to viewer) .objects_pts_src dcd 4, 0, ( 86<<16)+ 86, 86* 86, ( 86<<16)+106, 86*106 ; Obj1 = Tree. dcd (106<<16)+106,106*106, (106<<16)+ 86,106* 86 dcd 8, -48, ( 86<<16)+ 86, 86* 86, ( 86<<16)+106, 86*106 dcd (106<<16)+106,106*106, (106<<16)+ 86,106* 86 dcd ( 96<<16)+ 10, 96* 10, (182<<16)+ 96,182* 96 dcd ( 96<<16)+182, 96*182, ( 10<<16)+ 96, 10* 96 dcd 1,-256, ( 96<<16)+ 96, 96* 96 ; Polys for Tree - 4,5,1,0 5,6,2,1 6,7,3,2 7,4,0,3 (trunk) ; 11,10,9,8 (underneath) 8,9,12 9,10,12 10,11,12 11,8,12 (leaves) dcd 4, 0, ( 66<<16)+ 66, 66* 66, (126<<16)+ 66,126* 66 ; Obj2 = Monolith. dcd (126<<16)+126,126*126, ( 66<<16)+126, 66*126 dcd 4,-192, ( 76<<16)+ 76, 76* 76, (116<<16)+ 76,116* 76 dcd (116<<16)+116,116*116, ( 76<<16)+116, 76*116 dcd 1,-256, ( 96<<16)+ 96, 96* 96 ; Polys for Monolith - 0,1,5,4 1,2,6,5 2,3,7,6 3,0,4,7 (body) ; 4,5,8 5,6,8 6,7,8 7,4,8 (hat) dcd 4, 0, ( 36<<16)+ 36, 36* 36, (156<<16)+ 36,156* 36 ; Obj3 = Block. dcd (156<<16)+156,156*156, ( 36<<16)+156, 36*156 dcd 4, -64, ( 96<<16)+ 12, 96* 12, (180<<16)+ 96,180* 96 dcd ( 96<<16)+180, 96*180, ( 12<<16)+ 96, 12* 96 ; Polys for Block - 0,1,4 1,2,5 2,3,6 3,0,7 ; 4,1,5 5,2,6 6,3,7 7,0,4 4,5,6,7 .viewer_pos dcd 1, 96*2+112, ((-96*9)<<16)-96*3,96*9*96*3 ; Checkerboard start pos. dcd 0 ; End marker. ; Table containing for each object the pointer to its rotated coords and its ; nb_points. Will be accessed from checker_heights with inverse offset. dcd objects_pts_dest+(13+9)*12,8 ; Block. dcd objects_pts_dest+13*12,9 ; Monolith. dcd objects_pts_dest,13 ; Tree. .checker_heights ; Will contain the vectors for checker heights. dbd (max_checker_height+1)*2 .heights&objects_nb ; Table containing heights&objects_nb. (nibbles) incbin "heights" ALIGN ; Table containing the faces definition... nb_points-2, points and color. .faces_definition incbin "polys" dcd -1 ; End marker. .objects_pts_dest ; Will contain rotated coords of 3d objects. .poly_coords ; Will contain brows coords of poly. .logo191_24 ; Not very mysterious. incbin "mirrorlogo" ;**************************************************************************** ;**************************************************************************** ;***** ***** ;***** ROUTINES ***** ;***** ***** ;**************************************************************************** ;**************************************************************************** .videoram_adress dcd 148,-1 ; Values for the swi. .workscr_nb dcd 2 ; This two vars must be left together. .displayscr_nb dcd 1 ; --------------------------------------------------------------------------- ; --- Routine for Vertical Blank interrupt. --- ; --------------------------------------------------------------------------- ; We check if the next screen which will be displayed is entirely drawn ; (ie display_scr_nb-1<>workscr_nb), and in this case we use a swi to change ; the screen bank to display_scr_nb-1. When displayscr_nb-1 reachs 0 it is ; set back to ScreenNb by using self-modified code. (Op2 in vbl_screennb_mov ; was changed to ScreenNb) ; At first I was accessing directly the MemC to change the display screen, ; but since this isn' t compatible coding, I had to spent some time with ; ArmOric and his PRMs in order to use a swi during this interrupt, and I ; recommend you get a look at PRMs. ; When this routine is called we must have r12 pointing on a buffer which ; contains workscr_nb and displayscr_nb. .vbl_routine cmp r0,#4 ; Event=VBl? movNE pc,r14 ; No, then it' s none of our business. stmfd r13!,{r0,r1,r14} ; Save registers. ldmia r12,{r0,r1} ; r0=workscr_nb | r1=displayscr_nb. subS r1,r1,#1 ; r1=displayscr_nb-1. .vbl_screennb_mov movEQ r1,#0 ; If r1=0 then back to ScreenNb. cmp r0,r1 ; Flags=workscr_nb-(displayscr_nb-1). ldmEQfd r13!,{r0,r1,pc} ; If equal don' t show next screen. str r1,[r12,#4] ; Save new displayscr_nb. mov r12,pc ; Keep current status/mode. orr r0,r12,#3 ; Derive supervisor version of it. teqp r0,#0 ; Enter supervisor mode. mov r0,r0 stmfd r13!,{r14} ; Save Supervisor R14 mov r0,#&71 ; Next showscreen. swi OS_Byte ldmfd r13!,{r14} ; Restore Supervisor R14 teqp r12,#0 ; Re-enter original processor mode. mov r0,r0 ldmfd r13!,{r0,r1,pc} ; Could have been so short and so good. ; --------------------------------------------------------------------------- ; --- Routine for Vertical Synchronisation. --- ; --------------------------------------------------------------------------- ; When this routine is called, this means we have just finished to draw the ; workscr_nb, and we notify it by setting new workscreen to old workscr_nb-1. ; (As for the vbl_routine, we loop if workscr_nb-1 reaches 0, and this is ; performed with modification of vsync_screennb_mov) ; Once the notification has been made, we wait until the new workscr_nb is ; different from the displayscr_nb. .vsync_routine stmdb r13!,{r0,r14} ldr r0,workscr_nb ; Load workscr_nb. subS r0,r0,#1 ; r0=workscr_nb-1. .vsync_screennb_mov movEQ r0,#0 ; If r0=0 then back to ScreenNb. str r0,workscr_nb ; Save new workscr_nb. ._wait_vsync ldr r14,displayscr_nb ; Load displayscr_nb. cmp r0,r14 ; displayscr_nb=new_workscr_nb? bEQ _wait_vsync ; Then wait. ldmia r13!,{r0,pc} ; --------------------------------------------------------------------------- ; --- Routine to get WorkScreen adress. --- ; --------------------------------------------------------------------------- ; This routine returns the workscreen adress in r0, and also the adress of ; the clsbox buffer corresponding to this workscreen. (The clsboxes buffer ; contains a succession of xleft,yup,xright,ydown used for cls) .get_workscr_adr stmdb r13!,{r14} ldr r0,videoram_adress adr r1,bss+clsboxes ldr r14,workscr_nb sub r14,r14,#1 ; RiscOS is an OS for coders... ;) add r1,r1,r14,lsl #4 ; 16 longs per cls boxes. add r14,r14,r14,lsl #2 ; r14=workscr_nb*5. add r0,r0,r14,lsl #6+8 ; r0=video+workscr_nb*320*256. ldmia r13!,{pc} ; --------------------------------------------------------------------------- ; --- Exit Handler. --- ; --------------------------------------------------------------------------- ; Restore exit handler, disable our VBl interrupt, stop music and then quit. .proggy_exit ldmdb r12,{r1-r3} ; Load old exit handler environment. mov r0,#11 ; ReAssign it. swi OS_ChangeEnvironment mov r0,#&d ; Disable an event. mov r1,#4 ; Event 4=VBl. swi OS_Byte mov r0,#&10 ; Release Event Vector. (&10) adr r1,vbl_routine ; Give same values as when claiming. adr r2,workscr_nb swi XOS_Release #if zizik mvn r0,#0 ; Deinit symphony replayrout. bl player+&9F0C #endif mov r0,#3 ; Write screenblanker control. ldr r1,old_scrblank ; Load old infos. add r1,r1,r1,lsl #2 mov r1,r1,lsl #2 ; r1=infos*5*4. swi XScreenBlanker_Control mvn r0,#0 swi Wimp_CommandWindow ; Avoid the 'Press Space' from Wimp swi OS_Exit ; --------------------------------------------------------------------------- ; --- Routine smoothing a texture --- ; --- © Alain BROBECKER --- ; --------------------------------------------------------------------------- ; This routines works by applying the following 3*3 matrix... ; ( 1 2 1 ) ( pix0 pix1 pix2 ) ; 1/16 * ( 2 4 2 ) * ( pix3 pix4 pix5 ) = new pix. ; ( 1 2 1 ) ( pix6 pix7 pix8 ) ; Parameters are... ; r0 = adress of initial N*N texture. ; r1 = adress of N*N buffer for smoothed result. .smooth_texture stmfd r13!,{r0-r9,r14} mov r2,#0 ; r2=y counter. ._smooth_line mov r3,#0 ; r3=x counter. sub r4,r2,#1<<(32-bg_M) ; r4=(y-1) mod N <<(32-M). (Wrapping) add r6,r2,#1<<(32-bg_M) ; r6=(y+1) mod N <<(32-M). (Wrapping) add r4,r0,r4,lsr #(32-2*bg_M) ; r4 points on src_line up. add r5,r0,r2,lsr #(32-2*bg_M) ; r5 points on src_line. add r6,r0,r6,lsr #(32-2*bg_M) ; r6 points on src_line down. ._smooth_one sub r7,r3,#1<<(32-bg_M) ; r7=(x-1) mod N <<(32-M). (Wrapping) add r8,r3,#1<<(32-bg_M) ; r8=(x+1) mod N <<(32-M). (Wrapping) ldrB r9,[r5,r3,lsr #(32-bg_M)] ; Load all the pixels, and add them ldrB r14,[r4,r3,lsr #(32-bg_M)] ; with the good coefficients in r9. add r9,r14,r9,lsl #1 ldrB r14,[r6,r3,lsr #(32-bg_M)] add r9,r9,r14 ldrB r14,[r5,r7,lsr #(32-bg_M)] add r9,r9,r14 ldrB r14,[r5,r8,lsr #(32-bg_M)] add r9,r9,r14 ldrB r14,[r4,r7,lsr #(32-bg_M)] add r9,r14,r9,lsl #1 ldrB r14,[r4,r8,lsr #(32-bg_M)] add r9,r9,r14 ldrB r14,[r6,r7,lsr #(32-bg_M)] add r9,r9,r14 ldrB r14,[r6,r8,lsr #(32-bg_M)] add r9,r9,r14 mov r9,r9,lsr #4 ; r9=smoothed intensity. strB r9,[r1],#1 ; Save new pixel value. addS r3,r3,#1<<(32-bg_M) ; Next pixel. bNE _smooth_one addS r2,r2,#1<<(32-bg_M) ; Next line. bNE _smooth_line ldmfd r13!,{r0-r9,pc} ; --------------------------------------------------------------------------- ; --- Routine creating a fractal landscape --- ; --- © Alain BROBECKER --- ; --------------------------------------------------------------------------- ; Recursive landscape creation. Considering a point in the landscape and ; the iteration (=width of square) we construct the points m4-m8 and ; go on recursively on all resulting four squares. ; m0--m4--m1 h4=0.5*(h0+h1)+rnd ; | | | h5=0.5*(h1+h2)+rnd ; m7--m8--m5 h6=0.5*(h2+h3)+rnd ; | | | h7=0.5*(h3+h0)+rnd ; m3--m6--m2 h8=0.25*(h0+h1+h2+h3)+rnd ; Parameters are... ; r0=adress of buffer for landscape. ; r1=random number. ; r2=1. ; r3=iteration. ; r4=posx. ; r5=posy. .recursive_landscape stmfd r13!,{r3-r5,r14} ; At first, we calculate h4,h5,h6,h7 and h8. add r6,r4,r2,lsl r3 and r6,r6,#bg_N-1 ; r6=(posx+2^iteration) mod(bg_N). add r7,r5,r2,lsl r3 and r7,r7,#bg_N-1 ; r7=(posy+2^iteration) mod(bg_N). add r9,r4,r7,lsl #bg_M ; r9 points on m3. add r8,r6,r7,lsl #bg_M ; r8 points on m2. add r7,r6,r5,lsl #bg_M ; r7 points on m1. add r6,r4,r5,lsl #bg_M ; r6 points on m0. ldrB r6,[r0,r6] ; r6=h0. ldrB r7,[r0,r7] ; r7=h1. ldrB r8,[r0,r8] ; r8=h2. ldrB r9,[r0,r9] ; r9=h3. sub r10,r3,#1 mov r10,r2,lsl r10 ; r10=2^(iteration-1). ; Calculation of m8. add r14,r6,r7 add r14,r14,r8 add r14,r14,r9 ; r14=h0+h1+h2+h3. mov r14,r14,asr #2 ; r14=0.25*(h0+h1+h2+h3). random32 r1 ; New random number. rsb r11,r3,#fractal+1 ; r11=fractal+1-iteration=shift for rnd. addS r14,r14,r1,asr r11 ; r14=0.25*(h0+h1+h2+h3)+rnd. movLE r14,#1 ; Make sure 0<r14<256. cmp r14,#255 movGE r14,#255 add r12,r5,r10 ; Make r12 point on m8. add r12,r4,r12,lsl #bg_M add r12,r12,r10 strB r14,[r0,r12] ; Save h8. ; Calculation of m6. add r14,r8,r9 ; r14=h2+h3. mov r14,r14,asr #1 ; r14=0.5*(h2+h3). random32 r1 ; New random number. rsb r11,r3,#fractal ; r11=fractal-iteration=shift for rnd. addS r14,r14,r1,asr r11 ; r14=0.5*(h2+h3)+rnd. movLE r14,#1 ; Make sure 1<r14<256. cmp r14,#255 movGE r14,#255 add r12,r5,r2,lsl r3 ; Make r12 point on m6. add r12,r4,r12,lsl #bg_M add r12,r12,r10 strB r14,[r0,r12] ; Save h6. ; Calculation of m5. add r14,r7,r8 ; r14=h1+h2. mov r14,r14,asr #1 ; r14=0.5*(h1+h2). random32 r1 ; New random number. addS r14,r14,r1,asr r11 ; r14=0.5*(h1+h2)+rnd. movLE r14,#1 ; Make sure 1<r14<256. cmp r14,#255 movGE r14,#255 add r12,r4,r2,lsl r3 ; Make r12 point on m5. add r12,r12,r5,lsl #bg_M add r12,r12,r10,lsl #bg_M ldrB r8,[r0,r12] ; Load value at m5. cmp r8,#0 ; Pixel already set? strEQB r14,[r0,r12] ; Else save h5. ; Calculation of m4. add r14,r6,r7 ; r14=h0+h1. mov r14,r14,asr #1 ; r14=0.5*(h0+h1). random32 r1 ; New random number. addS r14,r14,r1,asr r11 ; r14=0.5*(h0+h1)+rnd. movLE r14,#1 ; Make sure 1<r14<256. cmp r14,#255 movGE r14,#255 add r12,r4,r10 ; Make r12 point on m4. add r12,r12,r5,lsl #bg_M ldrB r8,[r0,r12] cmp r8,#0 strEQB r14,[r0,r12] ; Save h4. ; Calculation of m7. add r14,r6,r9 ; r14=h0+h3. mov r14,r14,asr #1 ; r14=0.5*(h0+h3). random32 r1 ; New random number. addS r14,r14,r1,asr r11 ; r14=0.5*(h0+h3)+rnd. movLE r14,#0 ; Make sure 1<r14<256. cmp r14,#255 movGE r14,#255 add r12,r5,r10 ; Make r12 point on m7. add r12,r4,r12,lsl #bg_M ldrB r8,[r0,r12] cmp r8,#0 strEQB r14,[r0,r12] ; Save h7. ; Second part, recursive call. subS r3,r3,#1 ldmEQfd r13!,{r3-r5,pc} ; Stop recusrion when iter=0. bl recursive_landscape ; Else go on with four subsquares. add r4,r4,r2,lsl r3 ; start pos=m4. bl recursive_landscape add r5,r5,r2,lsl r3 ; start pos=m8. bl recursive_landscape sub r4,r4,r2,lsl r3 ; start pos=m7. bl recursive_landscape ldmfd r13!,{r3-r5,pc} ; --------------------------------------------------------------------------- ; --- Routine copying an aligned 8 bpp box --- ; --- © Alain BROBECKER May 96 --- ; --------------------------------------------------------------------------- ; * This routine copies the box between (x1-x1mod8;y1) and (x2+8-x2mod8;y2) ; from the source to the destination. This routine is mostly aimed at ; screenparts clearing. ; * r13 is saved just after the generated code, so we can use it for the ; ldmia-stmia copy. But we have to generate the instructions which will ; restore it at the end of routine. ; * There are many other tricks (for the generation of 'bGE'...) but I won' t ; explain them since code is widely commented. ; ; Parameters are... ; r0 = source adress. ; r1 = destination adress. ; r2 = x1. 1------+ ; r3 = y1. | | ; r4 = x2. | | ; r5 = y2. +------2 .CopyBigBox256 cmp r2,#320 ; At first check if the box is cmpLT r3,#256 ; completly out of screen, and in movGE pc,r14 ; such case we quit. cmp r4,#0 cmpGE r5,#0 movLT pc,r14 stmfd r13!,{r0-r12,r14} ; Be clean or die. cmp r2,#0 ; Perform the clipping. movLT r2,#0 cmp r3,#0 movLT r3,#0 cmp r4,#320 movGE r4,#319 cmp r5,#256 movGE r5,#255 mov r2,r2,lsr #2 ; r2=x1>>2. rsbS r4,r2,r4,lsr #2 ; r4=x2>>2-x1>>2=nb_longs-1. subGES r14,r5,r3 ; r14=dy=y2-y1. ldmMIfd r13!,{r0-r12,pc} ; Quit if nb_longs-1<0 or dy<0. add r3,r3,r3,lsl #2 ; r3=y1*5. add r3,r2,r3,lsl #4 ; r3=y1*80+x1>>2. add r0,r0,r3,lsl #2 ; r0=source+y1*320+4*(x1>>2). add r1,r1,r3,lsl #2 ; r1=dest+y1*320+4*(x1>>2). .StrongBranch1 b _StrongCopyBox ; If SA is absent, will be replaced by... ; add r2,r4,#1 ; r2=nb_longs. rsb r3,r2,#80 ; r3=nb longs to pass each line=offset/4. adr r4,_code+4 ; Code will be generated here. adr r5,_opcodes ldmia r5,{r6-r11} ; Load some opcodes. ._one_copy_max subS r2,r2,#12 ; More than 12 longs left? stmGEia r4!,{r6-r7} ; Yes then save one ldmia+stmia max. bGT _one_copy_max ; r2>0? Then test again. bEQ _generate_add ; r2=0? Then no more copy. add r5,r5,r2,lsl #3 ; r5 point on opcodes for last copy. ldmda r5,{r6-r7} ; Load them. cmp r2,#-11 ; Last fill instruction is a str? addEQ r6,r6,r3,lsl #2 ; Then r6='ldr r2,[r0],#offset+4', addEQ r7,r7,r3,lsl #2 ; and r7='str r2,[r1],#offset+4'. stmia r4!,{r6-r7} ; Save last fill instruction. bEQ _end_generate_add ; No need of an add if we have an str. ._generate_add cmp r3,#0 ; Offset is null? addNE r8,r8,r3 ; No, then r8='add r0,r0,#(offset/4)<<2', addNE r9,r9,r3 ; r9='add r1,r1,#(offset/4)<<2', stmNEia r4!,{r8-r9} ; and save instructions. ._end_generate_add adr r9,_code-2*4 ; Beware the pipeline. sub r9,r9,r4 ; r9=offset for the bGE. mov r9,r9,asr #2 ; r9=offset/4. (higher byte=&ff) eor r9,r9,#&55<<24 ; r9=&AAxxxxxx='bGE offset'. stmia r4!,{r9-r11,r13} ; Save instructions and stack. ._code subS r14,r14,#1 ; One line will be drawn. dbd 8*2+4 ; Space for the code and stack. ldr r2,[r0],#4 ; Opcodes for last copy instruction. str r2,[r1],#4 ldmia r0!,{r2-r3} stmia r1!,{r2-r3} ldmia r0!,{r2-r4} stmia r1!,{r2-r4} ldmia r0!,{r2-r5} stmia r1!,{r2-r5} ldmia r0!,{r2-r6} stmia r1!,{r2-r6} ldmia r0!,{r2-r7} stmia r1!,{r2-r7} ldmia r0!,{r2-r8} stmia r1!,{r2-r8} ldmia r0!,{r2-r9} stmia r1!,{r2-r9} ldmia r0!,{r2-r10} stmia r1!,{r2-r10} ldmia r0!,{r2-r11} stmia r1!,{r2-r11} ldmia r0!,{r2-r12} stmia r1!,{r2-r12} stmia r0!,{r1-r12} ._opcodes ldmia r0!,{r2-r13} ; Maximum copying instructions. stmia r1!,{r2-r13} dcd &e2800f00 ; Opcode of 'add r0,r0,#0<<2'. dcd &e2811f00 ; Opcode of 'add r1,r1,#0<<2'. ldr r13,[pc,#0] ; Load stack which is 8 bytes after. ldmfd r13!,{r0-r12,pc} ; And quit. ;---- StongARM version. No generated code. ---------------------------------- ._StrongCopyBox adr r6,_next_hline add r14,r14,#1 ._one_hline mov r2,r0 mov r3,r1 sub pc,r6,r4,lsl #3 #rept 79 ldr r5,[r2],#4 str r5,[r3],#4 #endr ._next_hline ldr r5,[r2],#4 str r5,[r3],#4 add r0,r0,#320 add r1,r1,#320 subS r14,r14,#1 bNE _one_hline ldmfd r13!,{r0-r12,pc} ; --------------------------------------------------------------------------- ; --- Routine drawing a 8 bpp box --- ; --- © Alain BROBECKER May 96 --- ; --------------------------------------------------------------------------- ; * This routine draws the box between (x1;y1) and (x2;y2) on the mode13 ; screen with the given filling pattern. ; * r13 is saved just after the generated code, so we can use it for the ; ldmia-stmia copy. But we have to generate the instructions which will ; restore it at the end of routine. ; * The last filling instruction (str, strB or add, to modify the adress) ; is generated with the stmia used for the endcode generation. ; * Most times (>75%) we won' t need an add to modify offsets, so I choosed ; to branch in such cases instead of cases when we have a str(B). ; * There are many other tricks (for the generation of 'bGE'..) but I won' t ; explain them since code is widely commented. ; ; Parameters are... ; r0 = screen adress. ; r1 = filling pattern. ; r2 = x1. 1------+ ; r3 = y1. | | ; r4 = x2. | | ; r5 = y2. +------2 .FastBox256 cmp r2,#320 ; At first check if the box is cmpLT r3,#256 ; completly out of screen, and in movGE pc,r14 ; such case we quit. cmp r4,#0 cmpGE r5,#0 movLT pc,r14 stmfd r13!,{r0-r12,r14} ; Be clean or die. cmp r2,#0 ; Perform the clipping. movLT r2,#0 cmp r3,#0 movLT r3,#0 cmp r4,#320 movGE r4,#319 cmp r5,#256 movGE r5,#255 subS r14,r5,r3 ; r14=dy=y2-y1. subGES r5,r4,r2 ; r5=dx=x2-x1. ldmMIfd r13!,{r0-r12,pc} ; Quit if dy<0 or dx<0. add r3,r3,r3,lsl #2 ; r3=y1*5. add r0,r0,r3,lsl #6 ; r0=screen+y1*320. .StrongBranch2 b _StrongFastBox ; If SA is absent, will be replaced by... ; add r0,r0,r2 ; r0=screen+y1*320+x1. mov r3,r2,lsr #2 ; r3=x1/4. rsbS r3,r3,r4,lsr #2 ; r3=x2/4-x1/4=nb_longs. adr r7,_small_adr ; r7 points on adresses for small boxes. ldrEQ pc,[r7,r5,lsl #2] ; nb_longs=0, then execute small box rout. rsb r5,r5,#319 ; r5=319-dx=nb of bytes to pass each line. adr r6,_code+4 ; Generate code here. ldmdb r7!,{r8-r11} ; Load some opcodes. ; Here we begin to care about first longword filling. andS r2,r2,#%11 ; r2=x1 mod(3). bEQ _first_long_full ; If x1 mod(3)=0, first long is full. sub r3,r3,#1 ; Else first long mustn' t be drawn. tst r2,#%01 ; Down bit of x1 set? strNE r8,[r6],#4 ; Then we have an odd nb of strB. tst r2,r2,lsl #1 ; bit1 AND bit0 of x1 cleared? strEQ r8,[r6],#4 ; Then x1 mod(3)=1 or 2, so we must strEQ r8,[r6],#4 ; generate two strB more. ._first_long_full and r4,r4,#%11 ; r4=x2 mod(3). and r2,r4,r4,lsr #1 ; r2=bit1 AND bit0 of x2 mod(3). addS r3,r3,r2 ; If x2 mod(3)=%11, last long is full. bEQ _last_longword ; If nb_longs=0 go to last long. ._one_stmia_max subS r3,r3,#13 ; More than 13 longs left? strGE r9,[r6],#4 ; Yes, then save one stmia max. bGT _one_stmia_max ; r3>0? Then test again. ldrMI r9,[r7,r3,lsl #2] ; If r3<0 then load opcode of last long strMI r9,[r6],#4 ; fill instruction and save it. ._last_longword teq r4,#%11 ; x2 mod(3)=%11? bEQ _last_long_full ; Then last long is full. tst r4,#%01 ; Down bit clear? strEQ r8,[r6],#4 ; Then we have an odd nb of strB. teq r4,r4,lsl #1 ; bit1 EOR bit0<>0? strNE r8,[r6],#4 ; Then x2 mod(3)=1 or 2, then there strNE r8,[r6],#4 ; are two strB more. ._last_long_full ldr r8,[r6,#-4]! ; Load last saved instruction. tst r8,#1<<26 ; Is it a str or strB? bEQ _generate_add ; No, then we' ll need an add. add r8,r8,r5 ; Yes, then add to 319-dx to offset. ._generate_endcode adr r9,_code-3*4 ; Beware the pipeline and last instruction. sub r9,r9,r6 ; r9=offset for the bGE. mov r9,r9,asr #2 ; r9=offset/4. (higher byte=&ff) eor r9,r9,#&55<<24 ; r9=&AAxxxxxx='bGE offset'. stmia r6!,{r8-r11,r13} ; Save instructions and stack. mov r2,r1 ; Put pattern in other longwords. mov r3,r1 mov r4,r1 mov r5,r1 mov r6,r1 mov r7,r1 mov r8,r1 mov r9,r1 mov r10,r1 mov r11,r1 mov r12,r1 mov r13,r1 ._code subS r14,r14,#1 ; One line will be drawn. dbd 3+6+3+4 ; Space for the code and stack. ._generate_add cmp r5,#0 ; Offset is null? bEQ _generate_endcode ; Then go on... add r6,r6,#4 ; Don' t modify loaded instruction. mov r8,#&e28<<20 ; r8=opcode of 'add r0,r0,#0'. cmp r5,#255 ; Offset bigger than 255? addGE r2,r8,#255 ; Then generate an 'add r0,r0,#255' strGE r2,[r6],#4 subGE r5,r5,#255 ; and substract 255 to offset. add r8,r8,r5 ; r8='add r0,r0,#offset'. b _generate_endcode str r1,[r0],#4 ; Opcodes for lasts longs filling. stmia r0!,{r1-r2} stmia r0!,{r1-r3} stmia r0!,{r1-r4} stmia r0!,{r1-r5} stmia r0!,{r1-r6} stmia r0!,{r1-r7} stmia r0!,{r1-r8} stmia r0!,{r1-r9} stmia r0!,{r1-r10} stmia r0!,{r1-r11} stmia r0!,{r1-r12} ._opcodes strB r1,[r0],#1 ; Byte filling instruction. stmia r0!,{r1-r13} ; Maximum filling instruction. ldr r13,[pc,#0] ; Load stack which is 8 bytes after. ldmfd r13!,{r0-r12,pc} ; And quit. ._small_adr dcd _small1 ; Adresses for the routines corresponding dcd _small2 ; to x1-x2 being in same longword. dcd _small3 dcd _small4 ; Here are the routine for small boxes. ._small1 strB r1,[r0],#320 subS r14,r14,#1 bGE _small1 ldmfd r13!,{r0-r12,pc} ._small2 strB r1,[r0],#1 strB r1,[r0],#319 subS r14,r14,#1 bGE _small2 ldmfd r13!,{r0-r12,pc} ._small3 strB r1,[r0],#1 strB r1,[r0],#1 strB r1,[r0],#318 subS r14,r14,#1 bGE _small3 ldmfd r13!,{r0-r12,pc} ._small4 str r1,[r0],#320 subS r14,r14,#1 bGE _small4 ldmfd r13!,{r0-r12,pc} ;---- StongARM version. No generated code. ---------------------------------- ._StrongFastBox adr r6,_next_hline add r4,r4,#1 add r14,r14,#1 ._one_hline add r3,r0,r2 ; r3=@xleft. add r5,r0,r4 ; r5=@xright. cmp r5,r3 ; xright=<xleft? bLE _next_hline tst r3,#%01 strNEB r1,[r3],#1 tst r5,#%01 strNEB r1,[r5,#-1]! cmp r5,r3 bLE _next_hline tst r3,#%10 strNEB r1,[r3],#1 strNEB r1,[r3],#1 tst r5,#%10 strNEB r1,[r5,#-1]! strNEB r1,[r5,#-1]! sub r5,r5,r3 ; r5=nb of longs to fill*4. sub pc,r6,r5 #rept 80 str r1,[r3],#4 #endr ._next_hline add r0,r0,#320 subS r14,r14,#1 bNE _one_hline ldmfd r13!,{r0-r12,pc} ; --------------------------------------------------------------------------- ; --- 256 colors Polygon routine. --- ; --- © Alain BROBECKER ???-August 96 --- ; --------------------------------------------------------------------------- ; * The hline filling part is greatly inspired from the one by Jan/BASS. ; (Some will say entirely =) Too bad, but when writing my own I got a look ; at Jan' s routine, and I must admit I can' t do better. I was not all ; that far behind though, but having lotsa problems with clipping and Jan ; has solved them in an elegant way. (drawing from xleft to xright-1 instead ; of xright) ThanX again, Jan... ; * The routine expects you to give a list of the polygon brows in anticlock ; cycle, and have extra storage just after the list. (Because I copy the ; brows one more time) ; * The upper clipping is performed by calculating slopes of the first partly ; visibles edges, and passing all invisibles lines with a multiply. The lower ; clipping is made by just reducing the nb of hlines to draw. ; * Horizontal clipping (performed while drawing) depends upon hclip and vclip ; parameters, but I assume the screen is 320 bytes wide anyway. ; ; Parameters are... ; r0 = videoram adress. ; r1 = filling pattern. ; r2 = adress of 12 longs for storage + inverses table. ; r3 = brows coords in anticlock cycle. (+extra storage) ; r4 = nb of brows. .FastPoly256 stmia r2!,{r0,r3-r14} ; Save registers | r2 point on inverses. ; Copy the brows a second time and search bounding box coords. Also since ; we want r3 to point on upper_left brow (anticlock cycle) and r4 point on ; upper_right brow, (clock) we already prepare for this. (will be continued ; a bit after) add r5,r3,r4,lsl #3 ; r5 points after coords table. ldmia r3!,{r6,r7} ; Load first brow. stmia r5!,{r6,r7} ; And copy it. mov r8,r6 ; r6=xmax | r8=xmin. mov r10,r7 ; r7=ymax | r10=ymin. sub r9,r3,#8 ; r9 will point on upper_left brow. sub r11,r5,#8 ; r11 will point on upper_right brow. sub r4,r4,#1 ; First brow copied. ._copy_and_search ldmia r3!,{r12,r13} ; Load brow. stmia r5!,{r12,r13} ; Copy it. cmp r6,r12 ; Flags=xmax-x. movMI r6,r12 ; If xmax-x<0 then x is new xmax. cmp r12,r8 ; Flags=x-xmin. movMI r8,r12 ; If x-xmin<0 then x is new xmin. cmp r7,r13 ; Flags=ymax-y. movMI r7,r13 ; If ymax-y<0 then y is new ymax. cmp r13,r10 ; Flags=y-ymin. movMI r10,r13 ; If y-ymin<0 then y is new ymin. subLE r9,r3,#8 ; y<=ymin => it' s new upper_left brow. subMI r11,r5,#8 ; y<ymin => it' s new upper_right brow. subS r4,r4,#1 ; One brow copied. bNE _copy_and_search ; Now, according to xmin,xmax,ymin & ymax, we can see if the polygon is ; totally out of the screen and in such case we quit. Also we quit if ; ymin=ymax or ymax-ymin>=inv_nb. cmp r6,#0 ; Flags=xmax-0. cmpGE r7,#0 ; If xmax>=0, then flags=ymax-0. ldmMIdb r2!,{r0,r3-r13,pc} ; Quit if xmax<0 or ymax<0. cmp r8,#hclip ; Flags=xmin-hclip. cmpMI r10,#vclip ; If xmin<hclip, then flags=ymin-vclip. ldmGEdb r2!,{r0,r3-r13,pc} ; Quit if xmin>=hclip or ymin>=vclip,. subS r14,r10,r7 ; r14=ymin-ymax. cmnNE r14,#inv_nb ; If ymin<>ymax, check ymin-ymax+inv_nb. ldmLEdb r2!,{r0,r3-r13,pc} ; Quit if ymin=ymax or ymax-ymin>=inv_nb. ; We made r9 point on the last upper brow we saw, but if next point has the ; same y (can happen when 1st brow(s) and last one(s) are ymin) then this ; one is on the left, so we must take it as new upper_left brow. ldr r14,[r9,#12]! ; Load next_y coord. (anticlock) ._upleft_search cmp r10,r14 ; next_y=ymin? ldrEQ r14,[r9,#8]! ; Then this point is on the left, take bEQ _upleft_search ; it as new upper_left brow and go on.. subS r3,r9,#12 ; r3 points on upper_left brow. (anticlock) ; Same comment for upper_right brow except that we go in clock cycle. ldr r14,[r11,#-4]! ; Load next y coord. (clock) ._upright_search cmp r10,r14 ; next_y=ymin? ldrEQ r14,[r11,#-8]! ; Then this point is on the right, take bEQ _upright_search ; it as new upper_right brow and go on.. addS r4,r11,#4 ; r4 points on upper_right brow. (clock) ; Some more inits, and checking for upper clipping. adr r13,_draw_hline ; Adress for filling jump. mov r14,r1 ; Two registers for stmia2 filling. addS r11,r10,r10,lsl #2 ; r11=ymin*5. bMI _upclip ; Perform up clipping if ymin<0. add r0,r0,r11,lsl #6 ; r0=video+320*ymin. ldr r9,[r3],#8 ; r9=upper_left x and r3 points after. ldr r11,[r4] ; r11=upper_right x. b _change_both ; Poly is clipped up!. At first we search the first left edge which is partly ; visible, we compute its slope and its intersection with the upper border. ._upclip mov r11,r10 ; r11=r10=ymin. ldr r5,[r3],#8 ; r5=xleft. ._upclip_search_left ldmia r3!,{r7,r9} ; Load next_xleft,next_yelft. cmp r9,#0 ; next_yleft>=0? movLT r5,r7 ; No, continue to search. movLT r10,r9 bLT _upclip_search_left sub r7,r7,r5 ; r7=dxleft. mov r5,r5,lsl #inv_N ; r5=xleft<<inv_N. subS r12,r9,r10 ; r12=dyleft. ldmLEdb r2!,{r0,r3-r13,pc} ; Quit if dyleft=<0. ldr r12,[r2,r12,lsl #2] ; r12=2^inv_N/dyleft. mul r7,r12,r7 ; r7=int(dxleft)/dyleft<<inv_N. rsb r10,r10,#0 ; r10=nb of lines to pass (>=0). mla r5,r10,r7,r5 ; r5=clipped_xleft<<inv_N. ; Do the upper clipping with right edge in the same way. ldr r6,[r4] ; r6=xright. ._upclip_search_right ldmdb r4!,{r8,r10} ; Load next_xright,next_yright. cmp r10,#0 ; next_yright>=0? movLT r6,r8 ; No, then continue to search. movLT r11,r10 bLT _upclip_search_right sub r8,r8,r6 ; r8=dxright. mov r6,r6,lsl #inv_N ; r6=xright<<inv_N. sub r12,r10,r11 ; r12=dyright. ldr r12,[r2,r12,lsl #2] ; r12=2^inv_N/dyright. mul r8,r12,r8 ; r8=int(dxright)/dyright<<inv_N. rsb r11,r11,#0 ; r11=nb of lines to pass (>=0). mla r6,r11,r8,r6 ; r6=clipped_xright<<inv_N. ; While making the upclip we have computed the slopes, so we can now use ; them for edges tracking and begin directly by drawing the poly. cmp r9,r10 ; Flags=next_yleft-next_yright. movGE r9,r10 ; Minimum in r9=nb of lines to draw. cmp r9,#vclip ; Is it greater than vclip? movGE r9,#vclip ; Then we won' t go after vclip-1. add r10,r0,r5,asr #inv_N ; r10=adress of xleft pixel. cmp r5,#0 ; Flags=xleft-0. movLT r10,r0 ; If xleft<0, then clip to left. b _slopes_changed ; And start to draw. ; Here are the routines for edges tracking and polygon drawing. ; r0=adress of first hline | r1 & r14=filling pattern ; r2=inverses table adress | r3=left edge ymin ; r4=right edge ymin | r5=xleft<<inv_N ; r6=xright<<inv_N | r7=dxleft/dyleft<<inv_N ; r8=dxright/dyright<<inv_N | r9=nb of lines to draw ; r10 & r11 used as temp | r13=adress of _draw_hline ._change_slopes ldmdb r3,{r9,r10} ; r9=xleft | r10=yleft. ldmia r4,{r11,r12} ; r11=xright | r12=yright. cmp r10,r12 bGT _change_right bEQ _change_both ; Here we have r10=yleft<r12=yright. ._change_left cmp r10,#vclip ; Current yleft is out of screen? ldmGEdb r2!,{r0,r3-r13,pc} ; Then quit. mov r5,r9,lsl #inv_N ; r5=xleft<<inv_N. ldmia r3!,{r7,r11} ; r7=next_xleft | r11=next_yleft. sub r7,r7,r5,asr #inv_N ; r7=int(dxleft). sub r9,r11,r10 ; r9=next_yleft-yleft=dyleft. ldr r9,[r2,r9,lsl #2] ; r9=2^inv_N/dyleft. mul r7,r9,r7 ; r7=int(dxleft)/dyleft<<inv_N. cmp r11,r12 ; Flags=next_yleft-yright. movGT r11,r12 ; r11=min(next_yleft;yright). cmp r11,#vclip ; Is it greater than hclip? movGE r11,#vclip ; Then we won' t go after hclip-1. sub r9,r11,r10 ; r9=dy=min(...)-yleft. add r10,r0,r5,asr #inv_N ; r10=adress of xleft pixel. cmp r5,#0 ; Flags=xleft-0. movLT r10,r0 ; If xleft<0, then clip to left. b _slopes_changed ; Here we have r12=yright<r10=yleft. ._change_right cmp r12,#vclip ; Current yright is out of screen? ldmGEdb r2!,{r0,r3-r13,pc} ; Then quit. mov r6,r11,lsl #inv_N ; r6=xright<<inv_N. ldmdb r4!,{r8,r9} ; r8=next_xright | r9=next_yright. subS r8,r8,r6,asr #inv_N ; r8=int(dxright). sub r11,r9,r12 ; r11=next_yright-yright. subPL r11,r11,#1 ; This is for better result. ldr r11,[r2,r11,lsl #2] ; r11=2^inv_N/(dyright). mul r8,r11,r8 ; r8=int(dxright)/dyright<<inv_N. cmp r9,r10 ; Flags=next_yright-yleft. movGT r9,r10 ; r9=min(next_yright;yleft). cmp r9,#vclip ; Is it greater than hclip? movGE r9,#vclip ; Then we won' t go after hclip-1. sub r9,r9,r12 ; r9=dy=min(...)-yright. add r10,r0,r5,asr #inv_N ; r10=adress of xleft pixel. cmp r5,#0 ; Flags=xleft-0. movLT r10,r0 ; If xleft<0, then clip to left. b _slopes_changed ; Here we have r10=yleft=r12=yright=y. ._change_both cmp r10,#vclip ; Current y is out of screen? ldmGEdb r2!,{r0,r3-r13,pc} ; Then quit. mov r5,r9,lsl #inv_N ; r5=xleft<<inv_N. mov r6,r11,lsl #inv_N ; r6=xright<<inv_N. ldmia r3!,{r7,r9} ; r7=next_xleft | r9=next_yleft. ldmdb r4!,{r8,r11} ; r8=next_xright | r11=next_yright. sub r7,r7,r5,asr #inv_N ; r7=int(dxleft). subS r12,r9,r10 ; r12=next_yleft-y=dyleft. ldmLEdb r2!,{r0,r3-r13,pc} ; Quit if dyleft=<0. ldr r12,[r2,r12,lsl #2] ; r12=2^inv_N/dyleft. mul r7,r12,r7 ; r7=int(dxleft)/dyleft<<inv_N. subS r8,r8,r6,asr #inv_N ; r8=int(dxright). sub r12,r11,r10 ; r12=next_yright-y=dyright. subPL r12,r12,#1 ; This is for better result. ldr r12,[r2,r12,lsl #2] ; r12=2^inv_N/(dyright). mul r8,r12,r8 ; r8=int(dxright)/dyright<<inv_N. cmp r9,r11 ; Flags=next_yleft-next_yright. movGT r9,r11 ; r9=min(next_yleft;next_yright). cmp r9,#vclip ; Is it greater than hclip? movGE r9,#vclip ; Then we won' t go after hclip-1. sub r9,r9,r10 ; r9=dy=min(...)-y. add r10,r0,r5,asr #inv_N ; r10=adress of xleft pixel. cmp r5,#0 ; Flags=xleft-0. movLT r10,r0 ; If xleft<0, then clip to left. b _slopes_changed ; Here' s the filling routine, inspired (ripped?) from the one by Jan/BASS. ; The main difference being it is commented! =) #rept 40 stmia r10!,{r1,r14} #endr ._draw_hline add r0,r0,#320 ; Next hline. addS r5,r5,r7 ; xleft+=dxleft/dyleft. add r10,r0,r5,asr #inv_N ; r10=adress of xleft pixel. movLT r10,r0 ; If xleft<0, then clip to left. add r6,r6,r8 ; xright+=dxright/dyright. subS r9,r9,#1 ; One line drawn. bLE _change_slopes ._slopes_changed add r11,r0,r6,asr #inv_N ; r11=adress of xright pixel. cmp r6,#hclip<<inv_N ; xrigth>hclip? addGT r11,r0,#hclip ; Then clip to right. cmp r11,r10 ; xright=<xleft? bLE _draw_hline tst r10,#%01 strNEB r1,[r10],#1 tst r11,#%01 strNEB r1,[r11,#-1]! cmp r11,r10 bLE _draw_hline tst r10,#%10 strNEB r1,[r10],#1 strNEB r1,[r10],#1 tst r11,#%10 strNEB r1,[r11,#-1]! strNEB r1,[r11,#-1]! sub r11,r11,r10 ; r11=nb of longs to fill. movS r11,r11,lsr #3 strCS r1,[r10],#4 sub pc,r13,r11,lsl #2 #if zizik .player incbin "DSymPlay" ; BASS' productions are so good. ALIGN .music incbin "z00m!n" ; Wow! ALIGN #endif ;-----------------------> THIS MUST BE AT VERY END <----------------------- .bss