home *** CD-ROM | disk | FTP | other *** search
/ EuroCD 3 / EuroCD 3.iso / Programming / vbcc / machines / amiga68k / libsrc / math / math_040 / cos.s < prev    next >
Encoding:
Text File  |  1998-06-24  |  22.3 KB  |  530 lines

  1. *
  2. *   $VER: cos.s 33.1 (22.1.97)
  3. *
  4. *   calculates the sine or cosine of the source operand
  5. *
  6. *   Version history:
  7. *
  8. *   33.1    22.1.97 (c) Motorola
  9. *
  10. *           - snipped from M68060SP
  11. *
  12. *   33.2    24.1.97 laukkanen
  13. *
  14. *           - cos(0) was broken, found out this when trying
  15. *             to link mp3decoder with the new library, fixed.
  16. *
  17.  
  18.     machine 68040
  19.     fpu     1
  20.  
  21.     XDEF    _sin
  22.     XDEF    @sin
  23.     XDEF    _cos
  24.     XDEF    @cos
  25.     XDEF    PITBL
  26.  
  27. *************************************************************************
  28. * sin():      computes the sine of a normalized input                   *
  29. * cos():      computes the cosine of a normalized input                 *
  30. *                                                                       *
  31. * INPUT *************************************************************** *
  32. *       fp0 = extended precision input                                  *
  33. *                                                                       *
  34. * OUTPUT ************************************************************** *
  35. *       fp0 = sin(X) or cos(X)                                          *
  36. *                                                                       *
  37. * ACCURACY and MONOTONICITY ******************************************* *
  38. *       The returned result is within 1 ulp in 64 significant bit, i.e. *
  39. *       within 0.5001 ulp to 53 bits if the result is subsequently      *
  40. *       rounded to double precision. The result is provably monotonic   *
  41. *       in double precision.                                            *
  42. *                                                                       *
  43. * ALGORITHM *********************************************************** *
  44. *                                                                       *
  45. *       SIN and COS:                                                    *
  46. *       1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.  *
  47. *                                                                       *
  48. *       2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.                   *
  49. *                                                                       *
  50. *       3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let        *
  51. *               k = N mod 4, so in particular, k = 0,1,2,or 3.          *
  52. *               Overwrite k by k := k + AdjN.                           *
  53. *                                                                       *
  54. *       4. If k is even, go to 6.                                       *
  55. *                                                                       *
  56. *       5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j.                 *
  57. *               Return sgn*cos(r) where cos(r) is approximated by an    *
  58. *               even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)),  *
  59. *               s = r*r.                                                *
  60. *               Exit.                                                   *
  61. *                                                                       *
  62. *       6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)  *
  63. *               where sin(r) is approximated by an odd polynomial in r  *
  64. *               r + r*s*(A1+s*(A2+ ... + s*A7)),        s = r*r.        *
  65. *               Exit.                                                   *
  66. *                                                                       *
  67. *       7. If |X| > 1, go to 9.                                         *
  68. *                                                                       *
  69. *       8. (|X|<2**(-40)) If SIN is invoked, return X;                  *
  70. *               otherwise return 1.                                     *
  71. *                                                                       *
  72. *       9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi,           *
  73. *               go back to 3.                                           *
  74. *                                                                       *
  75. *************************************************************************
  76.  
  77. SINA7   dc.l            $BD6AAA77,$CCC994F5
  78. SINA6   dc.l            $3DE61209,$7AAE8DA1
  79. SINA5   dc.l            $BE5AE645,$2A118AE4
  80. SINA4   dc.l            $3EC71DE3,$A5341531
  81. SINA3   dc.l            $BF2A01A0,$1A018B59,$00000000,$00000000
  82. SINA2   dc.l            $3FF80000,$88888888,$888859AF,$00000000
  83. SINA1   dc.l            $BFFC0000,$AAAAAAAA,$AAAAAA99,$00000000
  84. TWOBYPI dc.l            $3FE45F30,$6DC9C883
  85. COSB8   dc.l            $3D2AC4D0,$D6011EE3
  86. COSB7   dc.l            $BDA9396F,$9F45AC19
  87. COSB6   dc.l            $3E21EED9,$0612C972
  88. COSB5   dc.l            $BE927E4F,$B79D9FCF
  89. COSB4   dc.l            $3EFA01A0,$1A01D423,$00000000,$00000000
  90. COSB3   dc.l            $BFF50000,$B60B60B6,$0B61D438,$00000000
  91. COSB2   dc.l            $3FFA0000,$AAAAAAAA,$AAAAAB5E
  92. COSB1   dc.l            $BF000000
  93.  
  94. PITBL   dc.l            $C0040000,$C90FDAA2,$2168C235,$21800000
  95.         dc.l            $C0040000,$C2C75BCD,$105D7C23,$A0D00000
  96.         dc.l            $C0040000,$BC7EDCF7,$FF523611,$A1E80000
  97.         dc.l            $C0040000,$B6365E22,$EE46F000,$21480000
  98.         dc.l            $C0040000,$AFEDDF4D,$DD3BA9EE,$A1200000
  99.         dc.l            $C0040000,$A9A56078,$CC3063DD,$21FC0000
  100.         dc.l            $C0040000,$A35CE1A3,$BB251DCB,$21100000
  101.         dc.l            $C0040000,$9D1462CE,$AA19D7B9,$A1580000
  102.         dc.l            $C0040000,$96CBE3F9,$990E91A8,$21E00000
  103.         dc.l            $C0040000,$90836524,$88034B96,$20B00000
  104.         dc.l            $C0040000,$8A3AE64F,$76F80584,$A1880000
  105.         dc.l            $C0040000,$83F2677A,$65ECBF73,$21C40000
  106.         dc.l            $C0030000,$FB53D14A,$A9C2F2C2,$20000000
  107.         dc.l            $C0030000,$EEC2D3A0,$87AC669F,$21380000
  108.         dc.l            $C0030000,$E231D5F6,$6595DA7B,$A1300000
  109.         dc.l            $C0030000,$D5A0D84C,$437F4E58,$9FC00000
  110.         dc.l            $C0030000,$C90FDAA2,$2168C235,$21000000
  111.         dc.l            $C0030000,$BC7EDCF7,$FF523611,$A1680000
  112.         dc.l            $C0030000,$AFEDDF4D,$DD3BA9EE,$A0A00000
  113.         dc.l            $C0030000,$A35CE1A3,$BB251DCB,$20900000
  114.         dc.l            $C0030000,$96CBE3F9,$990E91A8,$21600000
  115.         dc.l            $C0030000,$8A3AE64F,$76F80584,$A1080000
  116.         dc.l            $C0020000,$FB53D14A,$A9C2F2C2,$1F800000
  117.         dc.l            $C0020000,$E231D5F6,$6595DA7B,$A0B00000
  118.         dc.l            $C0020000,$C90FDAA2,$2168C235,$20800000
  119.         dc.l            $C0020000,$AFEDDF4D,$DD3BA9EE,$A0200000
  120.         dc.l            $C0020000,$96CBE3F9,$990E91A8,$20E00000
  121.         dc.l            $C0010000,$FB53D14A,$A9C2F2C2,$1F000000
  122.         dc.l            $C0010000,$C90FDAA2,$2168C235,$20000000
  123.         dc.l            $C0010000,$96CBE3F9,$990E91A8,$20600000
  124.         dc.l            $C0000000,$C90FDAA2,$2168C235,$1F800000
  125.         dc.l            $BFFF0000,$C90FDAA2,$2168C235,$1F000000
  126.         dc.l            $00000000,$00000000,$00000000,$00000000
  127.         dc.l            $3FFF0000,$C90FDAA2,$2168C235,$9F000000
  128.         dc.l            $40000000,$C90FDAA2,$2168C235,$9F800000
  129.         dc.l            $40010000,$96CBE3F9,$990E91A8,$A0600000
  130.         dc.l            $40010000,$C90FDAA2,$2168C235,$A0000000
  131.         dc.l            $40010000,$FB53D14A,$A9C2F2C2,$9F000000
  132.         dc.l            $40020000,$96CBE3F9,$990E91A8,$A0E00000
  133.         dc.l            $40020000,$AFEDDF4D,$DD3BA9EE,$20200000
  134.         dc.l            $40020000,$C90FDAA2,$2168C235,$A0800000
  135.         dc.l            $40020000,$E231D5F6,$6595DA7B,$20B00000
  136.         dc.l            $40020000,$FB53D14A,$A9C2F2C2,$9F800000
  137.         dc.l            $40030000,$8A3AE64F,$76F80584,$21080000
  138.         dc.l            $40030000,$96CBE3F9,$990E91A8,$A1600000
  139.         dc.l            $40030000,$A35CE1A3,$BB251DCB,$A0900000
  140.         dc.l            $40030000,$AFEDDF4D,$DD3BA9EE,$20A00000
  141.         dc.l            $40030000,$BC7EDCF7,$FF523611,$21680000
  142.         dc.l            $40030000,$C90FDAA2,$2168C235,$A1000000
  143.         dc.l            $40030000,$D5A0D84C,$437F4E58,$1FC00000
  144.         dc.l            $40030000,$E231D5F6,$6595DA7B,$21300000
  145.         dc.l            $40030000,$EEC2D3A0,$87AC669F,$A1380000
  146.         dc.l            $40030000,$FB53D14A,$A9C2F2C2,$A0000000
  147.         dc.l            $40040000,$83F2677A,$65ECBF73,$A1C40000
  148.         dc.l            $40040000,$8A3AE64F,$76F80584,$21880000
  149.         dc.l            $40040000,$90836524,$88034B96,$A0B00000
  150.         dc.l            $40040000,$96CBE3F9,$990E91A8,$A1E00000
  151.         dc.l            $40040000,$9D1462CE,$AA19D7B9,$21580000
  152.         dc.l            $40040000,$A35CE1A3,$BB251DCB,$A1100000
  153.         dc.l            $40040000,$A9A56078,$CC3063DD,$A1FC0000
  154.         dc.l            $40040000,$AFEDDF4D,$DD3BA9EE,$21200000
  155.         dc.l            $40040000,$B6365E22,$EE46F000,$A1480000
  156.         dc.l            $40040000,$BC7EDCF7,$FF523611,$21E80000
  157.         dc.l            $40040000,$C2C75BCD,$105D7C23,$20D00000
  158.         dc.l            $40040000,$C90FDAA2,$2168C235,$A1800000
  159.  
  160. X           EQU         -12
  161. XFRAC       EQU         X+4
  162. POSNEG1     EQU         -16
  163. ADJN        EQU         -20
  164. INT         EQU         -16
  165. TEMP_SIZE   EQU         20
  166.  
  167. ********************************************
  168. _sin
  169.         fmove.d         (4,sp),fp0
  170. @sin
  171.         link            a0,#-TEMP_SIZE
  172.         clr.l           (ADJN,a0)               ; yes; SET ADJN TO 0
  173.         bra.b           SINBGN
  174.  
  175. ********************************************
  176.  
  177. _cos
  178.         fmove.d         (4,sp),fp0
  179. @cos
  180.         link            a0,#-TEMP_SIZE
  181.         moveq           #1,d0
  182.         move.l          d0,(ADJN,a0)            ; yes; SET ADJN TO 1
  183.  
  184. ********************************************
  185.  
  186. SINBGN
  187.  
  188. *--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
  189.  
  190.         fmove.x         fp0,(X,a0)              ; save input at X
  191.  
  192. * "COMPACTIFY" X
  193.         move.l          (X,a0),d1               ; put exp in hi word
  194.         move.w          (X+4,a0),d1             ; fetch hi(man)
  195.         and.l           #$7FFFFFFF,d1           ; strip sign
  196.  
  197.         cmpi.l          #$3FD78000,d1           ; is |X| >= 2**(-40)?
  198.         bge.b           .SOK1                   ; no
  199.         bra.w           .SINSM                  ; yes; input is very small
  200.  
  201. .SOK1
  202.         cmp.l           #$4004BC7E,d1           ; is |X| < 15 PI?
  203.         blt.b           .SINMAIN                ; no
  204.         bra.w           .SREDUCEX               ; yes; input is very large
  205.  
  206. *--THIS IS THE USUAL CASE, |X| <= 15 PI.
  207. *--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
  208. .SINMAIN
  209.         fmove.x         fp0,fp1
  210.         fmul.d          (TWOBYPI,pc),fp1        ; X*2/PI
  211.         lea             (PITBL+$200,pc),a1      ; TABLE OF N*PI/2, N = -32,...,32
  212.  
  213.         fmove.l         fp1,(INT,a0)            ; CONVERT TO INTEGER
  214.  
  215.         move.l          (INT,a0),d1             ; make a copy of N
  216.         asl.l           #4,d1                   ; N *= 16
  217.         add.l           d1,a1                   ; tbl_addr = a1 + (N*16)
  218.  
  219. * A1 IS THE ADDRESS OF N*PIBY2
  220. * ...WHICH IS IN TWO PIECES Y1 # Y2
  221.         fsub.x          (a1)+,fp0               ; X-Y1
  222.         fsub.s          (a1),fp0                ; fp0 = R = (X-Y1)-Y2
  223.         asr.l           #4,d1
  224. .SINCONT
  225. *--continuation from REDUCEX
  226.  
  227. *--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
  228.         add.l           (ADJN,a0),d1            ; SEE IF D0 IS ODD OR EVEN
  229.         ror.l           #1,d1                   ; D0 WAS ODD IFF D0 IS NEGATIVE
  230.         tst.l           d1
  231.         blt.w           .COSPOLY
  232.  
  233. *--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
  234. *--THEN WE RETURN       SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
  235. *--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
  236. *--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
  237. *--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
  238. *--WHERE T=S*S.
  239. *--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
  240. *--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
  241. .SINPOLY
  242.         fmovem.x        fp2/fp3,-(sp)           ; save fp2/fp3
  243.  
  244.         fmove.x         fp0,(X,a0)              ; X IS R
  245.         fmul.x          fp0,fp0                 ; FP0 IS S
  246.  
  247.         fmove.d         (SINA7,pc),fp3
  248.         fmove.d         (SINA6,pc),fp2
  249.  
  250.         fmove.x         fp0,fp1
  251.         fmul.x          fp1,fp1                 ; FP1 IS T
  252.  
  253.         ror.l           #1,d1
  254.         and.l           #$80000000,d1
  255. * ...LEAST SIG. BIT OF D0 IN SIGN POSITION
  256.         eor.l           d1,(X,a0)               ; X IS NOW R'= SGN*R
  257.         fmul.x          fp1,fp3                 ; TA7
  258.         fmul.x          fp1,fp2                 ; TA6
  259.  
  260.         fadd.d          (SINA5,pc),fp3          ; A5+TA7
  261.         fadd.d          (SINA4,pc),fp2          ; A4+TA6
  262.  
  263.         fmul.x          fp1,fp3                 ; T(A5+TA7)
  264.         fmul.x          fp1,fp2                 ; T(A4+TA6)
  265.  
  266.         fadd.d          (SINA3,pc),fp3          ; A3+T(A5+TA7)
  267.         fadd.x          (SINA2,pc),fp2          ; A2+T(A4+TA6)
  268.  
  269.         fmul.x          fp3,fp1                 ; T(A3+T(A5+TA7))
  270.  
  271.         fmul.x          fp0,fp2                 ; S(A2+T(A4+TA6))
  272.         fadd.x          (SINA1,pc),fp1          ; A1+T(A3+T(A5+TA7))
  273.         fmul.x          (X,a0),fp0              ; R'*S
  274.  
  275.         fadd.x          fp2,fp1                 ; [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
  276.  
  277.         fmul.x          fp1,fp0                 ; SIN(R')-R'
  278.  
  279.         fmovem.x        (sp)+,fp2/fp3           ; restore fp2/fp3
  280.  
  281.         fadd.x          (X,a0),fp0              ; last inst - possible exception set
  282.         unlk            a0
  283.         rts
  284.  
  285. *--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
  286. *--THEN WE RETURN       SGN*COS(R). SGN*COS(R) IS COMPUTED BY
  287. *--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
  288. *--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
  289. *--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
  290. *--WHERE T=S*S.
  291. *--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
  292. *--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
  293. *--AND IS THEREFORE STORED AS SINGLE PRECISION.
  294. .COSPOLY
  295.         fmovem.x        fp2/fp3,-(sp)           ; save fp2/fp3
  296.  
  297.         fmul.x          fp0,fp0                 ; FP0 IS S
  298.  
  299.         fmove.d         (COSB8,pc),fp2
  300.         fmove.d         (COSB7,pc),fp3
  301.  
  302.         fmove.x         fp0,fp1
  303.         fmul.x          fp1,fp1                 ; FP1 IS T
  304.         fmove.x         fp0,(X,a0)              ; X IS S
  305.         ror.l           #1,d1
  306.         and.l           #$80000000,d1
  307. * ...LEAST SIG. BIT OF D0 IN SIGN POSITION
  308.  
  309.         fmul.x          fp1,fp2                 ; TB8
  310.  
  311.         eor.l           d1,(X,a0)               ; X IS NOW S'= SGN*S
  312.         and.l           #$80000000,d1
  313.  
  314.         fmul.x          fp1,fp3                 ; TB7
  315.  
  316.         or.l            #$3F800000,d1           ; D0 IS SGN IN SINGLE
  317.         move.l          d1,(POSNEG1,a0)
  318.  
  319.         fadd.d          (COSB6,pc),fp2          ; B6+TB8
  320.         fadd.d          (COSB5,pc),fp3          ; B5+TB7
  321.  
  322.         fmul.x          fp1,fp2                 ; T(B6+TB8)
  323.         fmul.x          fp1,fp3                 ; T(B5+TB7)
  324.  
  325.         fadd.d          (COSB4,pc),fp2          ; B4+T(B6+TB8)
  326.         fadd.x          (COSB3,pc),fp3          ; B3+T(B5+TB7)
  327.  
  328.         fmul.x          fp1,fp2                 ; T(B4+T(B6+TB8))
  329.         fmul.x          fp3,fp1                 ; T(B3+T(B5+TB7))
  330.  
  331.         fadd.x          (COSB2,pc),fp2          ; B2+T(B4+T(B6+TB8))
  332.         fadd.s          (COSB1,pc),fp1          ; B1+T(B3+T(B5+TB7))
  333.  
  334.         fmul.x          fp2,fp0                 ; S(B2+T(B4+T(B6+TB8)))
  335.  
  336.         fadd.x          fp1,fp0
  337.  
  338.         fmul.x          (X,a0),fp0
  339.  
  340.         fmovem.x        (sp)+,fp2/fp3           ; restore fp2/fp3
  341.  
  342.         fadd.s          (POSNEG1,a0),fp0        ; last inst - possible exception set
  343.         unlk            a0
  344.         rts
  345.  
  346. **********************************************
  347.  
  348. * SINe: Big OR Small?
  349. *--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
  350. *--IF |X| < 2**(-40), RETURN X OR 1.
  351. .SINSM
  352.         move.l          (ADJN,a0),d1
  353.         tst.l           d1
  354.         bgt.b           .COSTINY
  355.  
  356. * here, the operation may underflow iff the precision is sgl or dbl.
  357. * extended denorms are handled through another entry point.
  358. .SINTINY
  359.         fmove.x         (X,a0),fp0              ; last inst - possible exception set
  360.         unlk            a0
  361.         rts
  362.  
  363. .COSTINY
  364.         fmove.s         #$3F800000,fp0          ; fp0 = 1.0
  365.         unlk            a0
  366.         rts
  367.  
  368.  
  369.  
  370. FP_SCR0_LO  EQU         -4
  371. FP_SCR0_HI  EQU         -8
  372. FP_SCR0_EX  EQU         -12
  373. FP_SCR0     EQU         -12
  374. FP_SCR1_LO  EQU         -16
  375. FP_SCR1_HI  EQU         -20
  376. FP_SCR1_EX  EQU         -24
  377. FP_SCR1     EQU         -24
  378. INARG       EQU         -12
  379. ENDFLAG     EQU         -28
  380. TWOTO63     EQU         -32
  381.  
  382. REDUCE_SIZE EQU         32
  383.  
  384. ;--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
  385. ;--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
  386. ;--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
  387. .SREDUCEX
  388.         fmovem.x        fp2-fp5,-(sp)           ; save {fp2-fp5}
  389.         movem.l         d2/a0,-(sp)             ; save d2
  390.         fmove.s         #$00000000,fp1          ; fp1 = 0
  391.         link            a0,#-REDUCE_SIZE
  392.  
  393. ;--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
  394. ;--there is a danger of unwanted overflow in first LOOP iteration.  In this
  395. ;--case, reduce argument by one remainder step to make subsequent reduction
  396. ;--safe.
  397.  
  398.         cmp.l           #$7ffeffff,d1           ; is arg dangerously large?
  399.         bne.b           .SLOOP                  ; no
  400.  
  401. ; yes; create 2**16383*PI/2
  402.  
  403.         move.w          #$7ffe,(FP_SCR0_EX,a0)
  404.         move.l          #$c90fdaa2,(FP_SCR0_HI,a0)
  405.         clr.l           (FP_SCR0_LO,a0)
  406.  
  407. ; create low half of 2**16383*PI/2 at FP_SCR1
  408.  
  409.         move.w          #$7fdc,(FP_SCR1_EX,a0)
  410.         move.l          #$85a308d3,(FP_SCR1_HI,a0)
  411.         clr.l           (FP_SCR1_LO,a0)
  412.  
  413.         fcmp.s          #0,fp0                  ; test sign of argument
  414.         fblt.w          .sred_neg
  415.  
  416.         or.b            #$80,(FP_SCR0_EX,a0)    ; positive arg
  417.         or.b            #$80,(FP_SCR1_EX,a0)
  418. .sred_neg
  419.         fadd.x          (FP_SCR0,a0),fp0        ; high part of reduction is ex
  420. .act
  421.         fmove.x         fp0,fp1                 ; save high result in fp1
  422.         fadd.x          (FP_SCR1,a0),fp0        ; low part of reduction
  423.         fsub.x          fp0,fp1                 ; determine low component of r
  424. .esult
  425.         fadd.x          (FP_SCR1,a0),fp1        ; fp0/fp1 are reduced argument
  426. .
  427.  
  428. ;--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
  429. ;--integer quotient will be stored in N
  430. ;--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
  431. .SLOOP
  432.         fmove.x         fp0,(INARG,a0)          ; +-2**K * F, 1 <= F < 2
  433.         move.w          (INARG,a0),d1
  434.         move.l          d1,a1                   ; save a copy of D0
  435.         and.l           #$00007FFF,d1
  436.         sub.l           #$00003FFF,d1           ; d0 = K
  437.         cmp.l           #28,d1
  438.         ble.b           .SLASTLOOP
  439. .SCONTLOOP
  440.         sub.l           #27,d1                  ; d0 = L := K-27
  441.         clr.b           (ENDFLAG,a0)
  442.         bra.b           .SWORK
  443. .SLASTLOOP
  444.         clr.l           d1                      ; d0 = L := 0
  445.         move.b          #1,(ENDFLAG,a0)
  446.  
  447. .SWORK
  448. ;--FIND THE REMAINDER OF (R,r) W.R.T.   2**L * (PI/2). L IS SO CHOSEN
  449. ;--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
  450.  
  451. ;--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
  452. ;--2**L * (PIby2_1), 2**L * (PIby2_2)
  453.  
  454.         move.l          #$00003FFE,d2           ; BIASED EXP OF 2/PI
  455.         sub.l           d1,d2                   ; BIASED EXP OF 2**(-L)*(2/PI)
  456.  
  457.         move.l          #$A2F9836E,(FP_SCR0_HI,a0)
  458.         move.l          #$4E44152A,(FP_SCR0_LO,a0)
  459.         move.w          d2,(FP_SCR0_EX,a0)      ; FP_SCR0 = 2**(-L)*(2/PI)
  460.  
  461.         fmove.x         fp0,fp2
  462.         fmul.x          (FP_SCR0,a0),fp2        ; fp2 = X * 2**(-L)*(2/PI)
  463.  
  464. ;--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
  465. ;--FLOATING POINT FORMAT, THE TWO FMOVE'S       FMOVE.L FP <--> N
  466. ;--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
  467. ;--(SIGN(INARG)*2**63   +       FP2) - SIGN(INARG)*2**63 WILL GIVE
  468. ;--US THE DESIRED VALUE IN FLOATING POINT.
  469.         move.l          a1,d2
  470.         swap            d2
  471.         and.l           #$80000000,d2
  472.         or.l            #$5F000000,d2           ; d2 = SIGN(INARG)*2**63 IN SGL
  473.         move.l          d2,(TWOTO63,a0)
  474.         fadd.s          (TWOTO63,a0),fp2        ; THE FRACTIONAL PART OF FP1 IS ROUNDED
  475.         fsub.s          (TWOTO63,a0),fp2        ; fp2 = N
  476.  
  477. ;--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
  478.         move.l          d1,d2                   ; d2 = L
  479.  
  480.         add.l           #$00003FFF,d2           ; BIASED EXP OF 2**L * (PI/2)
  481.         move.w          d2,(FP_SCR0_EX,a0)
  482.         move.l          #$C90FDAA2,(FP_SCR0_HI,a0)
  483.         clr.l           (FP_SCR0_LO,a0)         ; FP_SCR0 = 2**(L) * Piby2_1
  484.  
  485.         add.l           #$00003FDD,d1
  486.         move.w          d1,(FP_SCR1_EX,a0)
  487.         move.l          #$85A308D3,(FP_SCR1_HI,a0)
  488.         clr.l           (FP_SCR1_LO,a0)         ; FP_SCR1 = 2**(L) * Piby2_2
  489.  
  490.         move.b          (ENDFLAG,a0),d1
  491.  
  492. ;--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
  493. ;--P2 = 2**(L) * Piby2_2
  494.         fmove.x         fp2,fp4                 ; fp4 = N
  495.         fmul.x          (FP_SCR0,a0),fp4        ; fp4 = W = N*P1
  496.         fmove.x         fp2,fp5                 ; fp5 = N
  497.         fmul.x          (FP_SCR1,a0),fp5        ; fp5 = w = N*P2
  498.         fmove.x         fp4,fp3                 ; fp3 = W = N*P1
  499.  
  500. ;--we want P+p = W+w  but  |p| <= half ulp of P
  501. ;--Then, we need to compute  A := R-P   and  a := r-p
  502.         fadd.x          fp5,fp3                 ; fp3 = P
  503.         fsub.x          fp3,fp4                 ; fp4 = W-P
  504.  
  505.         fsub.x          fp3,fp0                 ; fp0 = A := R - P
  506.         fadd.x          fp5,fp4                 ; fp4 = p = (W-P)+w
  507.  
  508.         fmove.x         fp0,fp3                 ; fp3 = A
  509.         fsub.x          fp4,fp1                 ; fp1 = a := r - p
  510.  
  511. ;--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
  512. ;--|r| <= half ulp of R.
  513.         fadd.x          fp1,fp0                 ; fp0 = R := A+a
  514. ;--No need to calculate r if this is the last loop
  515.         tst.b           d1
  516.         bgt.w           .SRESTORE
  517.  
  518. ;--Need to calculate r
  519.         fsub.x          fp0,fp3                 ; fp3 = A-R
  520.         fadd.x          fp3,fp1                 ; fp1 = r := (A-R)+a
  521.         bra.w           .SLOOP
  522.  
  523. .SRESTORE
  524.         fmove.l         fp2,d1
  525.         unlk            a0
  526.         movem.l         (sp)+,d2/a0             ; restore d2
  527.         fmovem.x        (sp)+,fp2-fp5           ; restore {fp2-fp5}
  528.  
  529.         bra.w           .SINCONT
  530.