Main Page   Class Hierarchy   Compound List   File List   Compound Members  

qsqrt.h

00001 /*
00002     Copyright (C) 2000 by Andrew Zabolotny
00003     Fast computation of sqrt(x) and 1/sqrt(x)
00004   
00005     This library is free software; you can redistribute it and/or
00006     modify it under the terms of the GNU Library General Public
00007     License as published by the Free Software Foundation; either
00008     version 2 of the License, or (at your option) any later version.
00009   
00010     This library is distributed in the hope that it will be useful,
00011     but WITHOUT ANY WARRANTY; without even the implied warranty of
00012     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013     Library General Public License for more details.
00014   
00015     You should have received a copy of the GNU Library General Public
00016     License along with this library; if not, write to the Free
00017     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00018 */
00019 
00020 // Uncomment the following line to define CS_NO_QSQRT if you experience
00021 // mysterious problems with CS which you think are related to this
00022 // version of sqrt not behaving properly. If you find something like
00023 // that I'd like to be notified of this so we can make sure this really
00024 // is the problem.
00025 //#define CS_NO_QSQRT
00026 
00027 #ifndef __QSQRT_H__
00028 #define __QSQRT_H__
00029 
00030 #if (!defined (CS_NO_QSQRT)) && defined (PROC_X86) && defined (COMP_GCC)
00031 
00032 /*
00033   NB: Single-precision floating-point format (32 bits):
00034         SEEEEEEE.EMMMMMMM.MMMMMMMM.MMMMMMMM
00035         S: Sign (0 - positive, 1 - negative)
00036         E: Exponent (plus 127, 8 bits)
00037         M: Mantissa (23 bits)
00038 */
00039 
00048 static inline float qsqrt (float x)
00049 {
00050   float ret;
00051 
00052 // Original C++ formulae:
00053 // float tmp = x;
00054 // *((unsigned *)&tmp) = (0xbe6f0000 - *((unsigned *)&tmp)) >> 1;
00055 // double h = x * 0.5;
00056 // double a = tmp;
00057 // a *= 1.5 - a * a * h;
00058 // a *= 1.5 - a * a * h;
00059 // return a * x;
00060 
00061   __asm__ (
00062                 "flds   %1\n"                   // x
00063                 "movl   $0xbe6f0000,%%eax\n"
00064                 "subl   %1,%%eax\n"
00065                 "shrl   $1,%%eax\n"
00066                 "movl   %%eax,%1\n"
00067                 "flds   %2\n"                   // x 0.5
00068                 "fmul   %%st(1)\n"              // x h
00069                 "flds   %3\n"                   // x h 1.5
00070                 "flds   %1\n"                   // x h 1.5 a
00071                 "fld    %%st\n"                 // x h 1.5 a a
00072                 "fmul   %%st\n"                 // x h 1.5 a a*a
00073                 "fmul   %%st(3)\n"              // x h 1.5 a a*a*h
00074                 "fsubr  %%st(2)\n"              // x h 1.5 a 1.5-a*a*h
00075                 "fmulp  %%st(1)\n"              // x h 1.5 a
00076                 "fld    %%st\n"                 // x h 1.5 a a
00077                 "fmul   %%st\n"                 // x h 1.5 a a*a
00078                 "fmulp  %%st(3)\n"              // x a*a*h 1.5 a
00079                 "fxch\n"                        // x a*a*h a 1.5
00080                 "fsubp  %%st,%%st(2)\n"         // x 1.5-a*a*h a
00081                 "fmulp  %%st(1)\n"              // x a
00082                 "fmulp  %%st(1)\n"              // a
00083         : "=&t" (ret), "+m" (x) : "m" (0.5F), "m" (1.5F)
00084         : "eax", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
00085   );
00086   return ret;
00087 }
00088 
00096 static inline float qisqrt (float x)
00097 {
00098   float ret;
00099   __asm__ (
00100                 "flds   %1\n"                   // x
00101                 "movl   $0xbe6f0000,%%eax\n"
00102                 "subl   %1,%%eax\n"
00103                 "shrl   $1,%%eax\n"
00104                 "movl   %%eax,%1\n"
00105                 "flds   %2\n"                   // x 0.5
00106                 "fmulp  %%st(1)\n"              // h
00107                 "flds   %3\n"                   // h 1.5
00108                 "flds   %1\n"                   // h 1.5 a
00109                 "fld    %%st\n"                 // h 1.5 a a
00110                 "fmul   %%st\n"                 // h 1.5 a a*a
00111                 "fmul   %%st(3)\n"              // h 1.5 a a*a*h
00112                 "fsubr  %%st(2)\n"              // h 1.5 a 1.5-a*a*h
00113                 "fmulp  %%st(1)\n"              // h 1.5 a
00114                 "fld    %%st\n"                 // h 1.5 a a
00115                 "fmul   %%st\n"                 // h 1.5 a a*a
00116                 "fmulp  %%st(3)\n"              // a*a*h 1.5 a
00117                 "fxch\n"                        // a*a*h a 1.5
00118                 "fsubp  %%st,%%st(2)\n"         // 1.5-a*a*h a
00119                 "fmulp  %%st(1)\n"              // a
00120         : "=t" (ret), "+m" (x) : "m" (0.5F), "m" (1.5F)
00121         : "eax", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
00122   );
00123   return ret;
00124 }
00125 
00126 #else
00127 
00128 #include <math.h>
00129 #define qsqrt(x)  sqrt(x)
00130 #define qisqrt(x) (1.0/sqrt(x))
00131 
00132 #endif
00133 
00134 #endif // __QSQRT_H__

Generated for Crystal Space by doxygen 1.2.5 written by Dimitri van Heesch, ©1997-2000