Re-style fdlibm to conform to jerry guidelines

* First re-style was done automatically by indent to minimize the
  chance of errors during rewrite.

* Manual changes were applied to non-critical places only (comments
  and spaces):
  * Replaced all tabs with spaces.
  * Fixed tab stops in formulae in function comments.
    (Note: ASCII art for math formulae (especially for super- and
    subscripts) is a terrible idea.)
  * Unified the style of function comments.
  * Moved some in-code comments to their right places, which indent
    couldn't handle.
  * Added spaces to formulae of in-code comments to make them more
    readable.
  * Added braces mandated by jerry style guidelines.
  * Added parentheses to multiline #ifdef.

JerryScript-DCO-1.0-Signed-off-by: Akos Kiss akiss@inf.u-szeged.hu
This commit is contained in:
Akos Kiss 2016-03-17 10:42:00 +01:00
parent b39474c746
commit 8dd5186a0d
19 changed files with 2726 additions and 1887 deletions

View File

@ -52,29 +52,29 @@ extern "C"
#define M_2_SQRTPI 1.1283791670955125738961589031215452
// Trigonometric functions
double cos(double);
double sin(double);
double tan(double);
double acos(double);
double asin(double);
double atan(double);
double atan2(double, double);
double cos (double);
double sin (double);
double tan (double);
double acos (double);
double asin (double);
double atan (double);
double atan2 (double, double);
// Exponential and logarithmic functions
double exp(double);
double log(double);
double exp (double);
double log (double);
// Power functions
double pow(double, double);
double sqrt(double);
double pow (double, double);
double sqrt (double);
// Rounding and remainder functions
double ceil(double);
double floor(double);
double ceil (double);
double floor (double);
// Other functions
double fabs(double);
double fmod(double, double);
double fabs (double);
double fmod (double, double);
#ifdef __cplusplus
}

View File

@ -13,43 +13,43 @@
/* Sometimes it's necessary to define __LITTLE_ENDIAN explicitly
but these catch some common cases. */
#if defined(i386) || defined(__i386) || defined(__i386__) || \
defined(i486) || defined(__i486) || defined(__i486__) || \
defined(intel) || defined(x86) || defined(i86pc) || \
defined(__alpha) || defined(__osf__) || \
defined(__x86_64__) || defined(__arm__)
#if (defined (i386) || defined (__i386) || defined (__i386__) || \
defined (i486) || defined (__i486) || defined (__i486__) || \
defined (intel) || defined (x86) || defined (i86pc) || \
defined (__alpha) || defined (__osf__) || \
defined (__x86_64__) || defined (__arm__))
#define __LITTLE_ENDIAN
#endif
#ifdef __LITTLE_ENDIAN
#define __HI(x) *(1+(int*)&x)
#define __LO(x) *(int*)&x
#define __HI(x) *(1 + (int *) &x)
#define __LO(x) *(int *) &x
#else
#define __HI(x) *(int*)&x
#define __LO(x) *(1+(int*)&x)
#define __HI(x) *(int *) &x
#define __LO(x) *(1 + (int *) &x)
#endif
/*
* ANSI/POSIX
*/
#define MAXFLOAT ((float)3.40282346638528860e+38)
#define MAXFLOAT ((float) 3.40282346638528860e+38)
#define HUGE MAXFLOAT
#define HUGE MAXFLOAT
/*
* set X_TLOSS = pi*2**52, which is possibly defined in <values.h>
* (one may replace the following line by "#include <values.h>")
*/
#define X_TLOSS 1.41484755040568800000e+16
#define X_TLOSS 1.41484755040568800000e+16
#define DOMAIN 1
#define SING 2
#define OVERFLOW 3
#define UNDERFLOW 4
#define TLOSS 5
#define PLOSS 6
#define DOMAIN 1
#define SING 2
#define OVERFLOW 3
#define UNDERFLOW 4
#define TLOSS 5
#define PLOSS 6
/*
* ANSI/POSIX

View File

@ -6,31 +6,32 @@
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* acos(x)
* Method :
* acos(x) = pi/2 - asin(x)
* acos(-x) = pi/2 + asin(x)
*
* Method:
* acos(x) = pi/2 - asin(x)
* acos(-x) = pi/2 + asin(x)
* For |x|<=0.5
* acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c)
* acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c)
* For x>0.5
* acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2)))
* = 2asin(sqrt((1-x)/2))
* = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z)
* = 2f + (2c + 2s*z*R(z))
* acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2)))
* = 2asin(sqrt((1-x)/2))
* = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z)
* = 2f + (2c + 2s*z*R(z))
* where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term
* for f so that f+c ~ sqrt(z).
* For x<-0.5
* acos(x) = pi - 2asin(sqrt((1-|x|)/2))
* = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z)
* acos(x) = pi - 2asin(sqrt((1-|x|)/2))
* = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z)
*
* Special cases:
* if x is NaN, return x itself;
* if |x|>1, return NaN with invalid signal.
* if x is NaN, return x itself;
* if |x|>1, return NaN with invalid signal.
*
* Function needed: sqrt
*/
@ -52,44 +53,62 @@
#define qS3 -6.88283971605453293030e-01 /* 0xBFE6066C, 0x1B8D0159 */
#define qS4 7.70381505559019352791e-02 /* 0x3FB3B8C5, 0xB12E9282 */
double acos(double x)
double
acos (double x)
{
double z,p,q,r,w,s,c,df;
int hx,ix;
hx = __HI(x);
ix = hx&0x7fffffff;
if(ix>=0x3ff00000) { /* |x| >= 1 */
if(((ix-0x3ff00000)|__LO(x))==0) { /* |x|==1 */
if(hx>0) return 0.0; /* acos(1) = 0 */
else return pi+2.0*pio2_lo; /* acos(-1)= pi */
}
return (x-x)/(x-x); /* acos(|x|>1) is NaN */
}
if(ix<0x3fe00000) { /* |x| < 0.5 */
if(ix<=0x3c600000) return pio2_hi+pio2_lo;/*if|x|<2**-57*/
z = x*x;
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
r = p/q;
return pio2_hi - (x - (pio2_lo-x*r));
} else if (hx<0) { /* x < -0.5 */
z = (one+x)*0.5;
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
s = sqrt(z);
r = p/q;
w = r*s-pio2_lo;
return pi - 2.0*(s+w);
} else { /* x > 0.5 */
z = (one-x)*0.5;
s = sqrt(z);
df = s;
__LO(df) = 0;
c = (z-df*df)/(s+df);
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
r = p/q;
w = r*s+c;
return 2.0*(df+w);
}
}
double z, p, q, r, w, s, c, df;
int hx, ix;
hx = __HI (x);
ix = hx & 0x7fffffff;
if (ix >= 0x3ff00000) /* |x| >= 1 */
{
if (((ix - 0x3ff00000) | __LO (x)) == 0) /* |x| == 1 */
{
if (hx > 0) /* acos(1) = 0 */
{
return 0.0;
}
else /* acos(-1) = pi */
{
return pi + 2.0 * pio2_lo;
}
}
return (x - x) / (x - x); /* acos(|x|>1) is NaN */
}
if (ix < 0x3fe00000) /* |x| < 0.5 */
{
if (ix <= 0x3c600000) /* if |x| < 2**-57 */
{
return pio2_hi + pio2_lo;
}
z = x * x;
p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
r = p / q;
return pio2_hi - (x - (pio2_lo - x * r));
}
else if (hx < 0) /* x < -0.5 */
{
z = (one + x) * 0.5;
p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
s = sqrt (z);
r = p / q;
w = r * s - pio2_lo;
return pi - 2.0 * (s + w);
}
else /* x > 0.5 */
{
z = (one - x) * 0.5;
s = sqrt (z);
df = s;
__LO (df) = 0;
c = (z - df * df) / (s + df);
p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
r = p / q;
w = r * s + c;
return 2.0 * (df + w);
}
} /* acos */

View File

@ -6,42 +6,41 @@
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* asin(x)
* Method :
* Since asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ...
* we approximate asin(x) on [0,0.5] by
* asin(x) = x + x*x^2*R(x^2)
* where
* R(x^2) is a rational approximation of (asin(x)-x)/x^3
* and its remez error is bounded by
* |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75)
*
* For x in [0.5,1]
* asin(x) = pi/2-2*asin(sqrt((1-x)/2))
* Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2;
* then for x>0.98
* asin(x) = pi/2 - 2*(s+s*z*R(z))
* = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo)
* For x<=0.98, let pio4_hi = pio2_hi/2, then
* f = hi part of s;
* c = sqrt(z) - f = (z-f*f)/(s+f) ...f+c=sqrt(z)
* and
* asin(x) = pi/2 - 2*(s+s*z*R(z))
* = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo)
* = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c))
* Method:
* Since asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ...
* we approximate asin(x) on [0,0.5] by
* asin(x) = x + x*x^2*R(x^2)
* where
* R(x^2) is a rational approximation of (asin(x)-x)/x^3
* and its remez error is bounded by
* |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75)
*
* For x in [0.5,1]
* asin(x) = pi/2-2*asin(sqrt((1-x)/2))
* Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2;
* then for x>0.98
* asin(x) = pi/2 - 2*(s+s*z*R(z))
* = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo)
* For x<=0.98, let pio4_hi = pio2_hi/2, then
* f = hi part of s;
* c = sqrt(z) - f = (z-f*f)/(s+f) ...f+c=sqrt(z)
* and
* asin(x) = pi/2 - 2*(s+s*z*R(z))
* = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo)
* = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c))
*
* Special cases:
* if x is NaN, return x itself;
* if |x|>1, return NaN with invalid signal.
*
* if x is NaN, return x itself;
* if |x|>1, return NaN with invalid signal.
*/
#include "fdlibm.h"
#define one 1.00000000000000000000e+00 /* 0x3FF00000, 0x00000000 */
@ -49,7 +48,7 @@
#define pio2_hi 1.57079632679489655800e+00 /* 0x3FF921FB, 0x54442D18 */
#define pio2_lo 6.12323399573676603587e-17 /* 0x3C91A626, 0x33145C07 */
#define pio4_hi 7.85398163397448278999e-01 /* 0x3FE921FB, 0x54442D18 */
/* coefficient for R(x^2) */
/* coefficient for R(x^2) */
#define pS0 1.66666666666666657415e-01 /* 0x3FC55555, 0x55555555 */
#define pS1 -3.25565818622400915405e-01 /* 0xBFD4D612, 0x03EB6F7D */
#define pS2 2.01212532134862925881e-01 /* 0x3FC9C155, 0x0E884455 */
@ -61,44 +60,67 @@
#define qS3 -6.88283971605453293030e-01 /* 0xBFE6066C, 0x1B8D0159 */
#define qS4 7.70381505559019352791e-02 /* 0x3FB3B8C5, 0xB12E9282 */
double asin(double x)
double
asin (double x)
{
double t = 0,w,p,q,c,r,s;
int hx,ix;
hx = __HI(x);
ix = hx&0x7fffffff;
if(ix>= 0x3ff00000) { /* |x|>= 1 */
if(((ix-0x3ff00000)|__LO(x))==0)
/* asin(1)=+-pi/2 with inexact */
return x*pio2_hi+x*pio2_lo;
return (x-x)/(x-x); /* asin(|x|>1) is NaN */
} else if (ix<0x3fe00000) { /* |x|<0.5 */
if(ix<0x3e400000) { /* if |x| < 2**-27 */
if(huge+x>one) return x;/* return x with inexact if x!=0*/
} else
t = x*x;
p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
w = p/q;
return x+x*w;
}
/* 1> |x|>= 0.5 */
w = one-fabs(x);
t = w*0.5;
p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
s = sqrt(t);
if(ix>=0x3FEF3333) { /* if |x| > 0.975 */
w = p/q;
t = pio2_hi-(2.0*(s+s*w)-pio2_lo);
} else {
w = s;
__LO(w) = 0;
c = (t-w*w)/(s+w);
r = p/q;
p = 2.0*s*r-(pio2_lo-2.0*c);
q = pio4_hi-2.0*w;
t = pio4_hi-(p-q);
}
if(hx>0) return t; else return -t;
}
double t = 0, w, p, q, c, r, s;
int hx, ix;
hx = __HI (x);
ix = hx & 0x7fffffff;
if (ix >= 0x3ff00000) /* |x| >= 1 */
{
if (((ix - 0x3ff00000) | __LO (x)) == 0) /* asin(1) = +-pi/2 with inexact */
{
return x * pio2_hi + x * pio2_lo;
}
return (x - x) / (x - x); /* asin(|x|>1) is NaN */
}
else if (ix < 0x3fe00000) /* |x| < 0.5 */
{
if (ix < 0x3e400000) /* if |x| < 2**-27 */
{
if (huge + x > one) /* return x with inexact if x != 0 */
{
return x;
}
}
else
{
t = x * x;
}
p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + t * (pS4 + t * pS5)))));
q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4)));
w = p / q;
return x + x * w;
}
/* 1 > |x| >= 0.5 */
w = one - fabs (x);
t = w * 0.5;
p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + t * (pS4 + t * pS5)))));
q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4)));
s = sqrt (t);
if (ix >= 0x3FEF3333) /* if |x| > 0.975 */
{
w = p / q;
t = pio2_hi - (2.0 * (s + s * w) - pio2_lo);
}
else
{
w = s;
__LO (w) = 0;
c = (t - w * w) / (s + w);
r = p / q;
p = 2.0 * s * r - (pio2_lo - 2.0 * c);
q = pio4_hi - 2.0 * w;
t = pio4_hi - (p - q);
}
if (hx > 0)
{
return t;
}
else
{
return -t;
}
} /* asin */

View File

@ -6,14 +6,14 @@
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* atan(x)
* Method
*
* Method:
* 1. Reduce x to positive by atan(x) = -atan(-x).
* 2. According to the integer k=4t+0.25 chopped, t=x, the argument
* is further reduced to one of the following intervals and the
@ -34,14 +34,16 @@
#include "fdlibm.h"
static const double atanhi[] = {
static const double atanhi[] =
{
4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */
7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */
9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */
1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */
};
static const double atanlo[] = {
static const double atanlo[] =
{
2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */
3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */
1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */
@ -63,48 +65,83 @@ static const double atanlo[] = {
#define one 1.0
#define huge 1.0e300
double atan(double x)
double
atan (double x)
{
double w,s1,s2,z;
int ix,hx,id;
double w, s1, s2, z;
int ix, hx, id;
hx = __HI(x);
ix = hx&0x7fffffff;
if(ix>=0x44100000) { /* if |x| >= 2^66 */
if(ix>0x7ff00000||
(ix==0x7ff00000&&(__LO(x)!=0)))
return x+x; /* NaN */
if(hx>0) return atanhi[3]+atanlo[3];
else return -atanhi[3]-atanlo[3];
} if (ix < 0x3fdc0000) { /* |x| < 0.4375 */
if (ix < 0x3e200000) { /* |x| < 2^-29 */
if(huge+x>one) return x; /* raise inexact */
}
id = -1;
} else {
x = fabs(x);
if (ix < 0x3ff30000) { /* |x| < 1.1875 */
if (ix < 0x3fe60000) { /* 7/16 <=|x|<11/16 */
id = 0; x = (2.0*x-one)/(2.0+x);
} else { /* 11/16<=|x|< 19/16 */
id = 1; x = (x-one)/(x+one);
}
} else {
if (ix < 0x40038000) { /* |x| < 2.4375 */
id = 2; x = (x-1.5)/(one+1.5*x);
} else { /* 2.4375 <= |x| < 2^66 */
id = 3; x = -1.0/x;
}
}}
/* end of argument reduction */
z = x*x;
w = z*z;
/* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
s1 = z*(aT0+w*(aT2+w*(aT4+w*(aT6+w*(aT8+w*aT10)))));
s2 = w*(aT1+w*(aT3+w*(aT5+w*(aT7+w*aT9))));
if (id<0) return x - x*(s1+s2);
else {
z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x);
return (hx<0)? -z:z;
}
}
hx = __HI (x);
ix = hx & 0x7fffffff;
if (ix >= 0x44100000) /* if |x| >= 2^66 */
{
if (ix > 0x7ff00000 || (ix == 0x7ff00000 && (__LO (x) != 0)))
{
return x + x; /* NaN */
}
if (hx > 0)
{
return atanhi[3] + atanlo[3];
}
else
{
return -atanhi[3] - atanlo[3];
}
}
if (ix < 0x3fdc0000) /* |x| < 0.4375 */
{
if (ix < 0x3e200000) /* |x| < 2^-29 */
{
if (huge + x > one) /* raise inexact */
{
return x;
}
}
id = -1;
}
else
{
x = fabs (x);
if (ix < 0x3ff30000) /* |x| < 1.1875 */
{
if (ix < 0x3fe60000) /* 7/16 <= |x| < 11/16 */
{
id = 0;
x = (2.0 * x - one) / (2.0 + x);
}
else /* 11/16 <= |x| < 19/16 */
{
id = 1;
x = (x - one) / (x + one);
}
}
else
{
if (ix < 0x40038000) /* |x| < 2.4375 */
{
id = 2;
x = (x - 1.5) / (one + 1.5 * x);
}
else /* 2.4375 <= |x| < 2^66 */
{
id = 3;
x = -1.0 / x;
}
}
}
/* end of argument reduction */
z = x * x;
w = z * z;
/* break sum from i=0 to 10 aT[i] z**(i+1) into odd and even poly */
s1 = z * (aT0 + w * (aT2 + w * (aT4 + w * (aT6 + w * (aT8 + w * aT10)))));
s2 = w * (aT1 + w * (aT3 + w * (aT5 + w * (aT7 + w * aT9))));
if (id < 0)
{
return x - x * (s1 + s2);
}
else
{
z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x);
return (hx < 0) ? -z : z;
}
} /* atan */

View File

@ -6,31 +6,30 @@
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* atan2(y,x)
* Method :
* 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
* 2. Reduce x to positive by (if x and y are unexceptional):
* ARG (x+iy) = arctan(y/x) ... if x > 0,
* ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0,
*
* Method:
* 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
* 2. Reduce x to positive by (if x and y are unexceptional):
* ARG (x+iy) = arctan(y/x) ... if x > 0,
* ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0,
*
* Special cases:
*
* ATAN2((anything), NaN ) is NaN;
* ATAN2(NAN , (anything) ) is NaN;
* ATAN2(+-0, +(anything but NaN)) is +-0 ;
* ATAN2(+-0, -(anything but NaN)) is +-pi ;
* ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2;
* ATAN2(+-(anything but INF and NaN), +INF) is +-0 ;
* ATAN2(+-(anything but INF and NaN), -INF) is +-pi;
* ATAN2(+-INF,+INF ) is +-pi/4 ;
* ATAN2(+-INF,-INF ) is +-3pi/4;
* ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
* ATAN2((anything), NaN ) is NaN;
* ATAN2(NAN , (anything) ) is NaN;
* ATAN2(+-0, +(anything but NaN)) is +-0 ;
* ATAN2(+-0, -(anything but NaN)) is +-pi ;
* ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2;
* ATAN2(+-(anything but INF and NaN), +INF) is +-0 ;
* ATAN2(+-(anything but INF and NaN), -INF) is +-pi;
* ATAN2(+-INF,+INF ) is +-pi/4 ;
* ATAN2(+-INF,-INF ) is +-3pi/4;
* ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
*
* Constants:
* The hexadecimal values are the intended ones for the following
@ -48,66 +47,142 @@
#define pi 3.1415926535897931160E+00 /* 0x400921FB, 0x54442D18 */
#define pi_lo 1.2246467991473531772E-16 /* 0x3CA1A626, 0x33145C07 */
double atan2(double y, double x)
{
double z;
int k,m,hx,hy,ix,iy;
unsigned lx,ly;
double
atan2 (double y, double x)
{
double z;
int k, m, hx, hy, ix, iy;
unsigned lx, ly;
hx = __HI(x); ix = hx&0x7fffffff;
lx = __LO(x);
hy = __HI(y); iy = hy&0x7fffffff;
ly = __LO(y);
if(((ix|((lx|-lx)>>31))>0x7ff00000)||
((iy|((ly|-ly)>>31))>0x7ff00000)) /* x or y is NaN */
return x+y;
if((hx-0x3ff00000|lx)==0) return atan(y); /* x=1.0 */
m = ((hy>>31)&1)|((hx>>30)&2); /* 2*sign(x)+sign(y) */
hx = __HI (x);
ix = hx & 0x7fffffff;
lx = __LO (x);
hy = __HI (y);
iy = hy & 0x7fffffff;
ly = __LO (y);
if (((ix | ((lx | -lx) >> 31)) > 0x7ff00000) || ((iy | ((ly | -ly) >> 31)) > 0x7ff00000)) /* x or y is NaN */
{
return x + y;
}
if ((hx - 0x3ff00000 | lx) == 0) /* x = 1.0 */
{
return atan (y);
}
m = ((hy >> 31) & 1) | ((hx >> 30) & 2); /* 2 * sign(x) + sign(y) */
/* when y = 0 */
if((iy|ly)==0) {
switch(m) {
case 0:
case 1: return y; /* atan(+-0,+anything)=+-0 */
case 2: return pi+tiny;/* atan(+0,-anything) = pi */
case 3: return -pi-tiny;/* atan(-0,-anything) =-pi */
}
}
/* when x = 0 */
if((ix|lx)==0) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny;
/* when x is INF */
if(ix==0x7ff00000) {
if(iy==0x7ff00000) {
switch(m) {
case 0: return pi_o_4+tiny;/* atan(+INF,+INF) */
case 1: return -pi_o_4-tiny;/* atan(-INF,+INF) */
case 2: return 3.0*pi_o_4+tiny;/*atan(+INF,-INF)*/
case 3: return -3.0*pi_o_4-tiny;/*atan(-INF,-INF)*/
}
} else {
switch(m) {
case 0: return zero ; /* atan(+...,+INF) */
case 1: return -zero ; /* atan(-...,+INF) */
case 2: return pi+tiny ; /* atan(+...,-INF) */
case 3: return -pi-tiny ; /* atan(-...,-INF) */
}
}
}
/* when y is INF */
if(iy==0x7ff00000) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny;
/* when y = 0 */
if ((iy | ly) == 0)
{
switch (m)
{
case 0:
case 1:
{
return y; /* atan(+-0,+anything) = +-0 */
}
case 2:
{
return pi + tiny; /* atan(+0,-anything) = pi */
}
case 3:
{
return -pi - tiny; /* atan(-0,-anything) = -pi */
}
}
}
/* when x = 0 */
if ((ix | lx) == 0)
{
return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
}
/* compute y/x */
k = (iy-ix)>>20;
if(k > 60) z=pi_o_2+0.5*pi_lo; /* |y/x| > 2**60 */
else if(hx<0&&k<-60) z=0.0; /* |y|/x < -2**60 */
else z=atan(fabs(y/x)); /* safe to do y/x */
switch (m) {
case 0: return z ; /* atan(+,+) */
case 1: __HI(z) ^= 0x80000000;
return z ; /* atan(-,+) */
case 2: return pi-(z-pi_lo);/* atan(+,-) */
default: /* case 3 */
return (z-pi_lo)-pi;/* atan(-,-) */
}
}
/* when x is INF */
if (ix == 0x7ff00000)
{
if (iy == 0x7ff00000)
{
switch (m)
{
case 0: /* atan(+INF,+INF) */
{
return pi_o_4 + tiny;
}
case 1: /* atan(-INF,+INF) */
{
return -pi_o_4 - tiny;
}
case 2: /* atan(+INF,-INF) */
{
return 3.0 * pi_o_4 + tiny;
}
case 3: /* atan(-INF,-INF) */
{
return -3.0 * pi_o_4 - tiny;
}
}
}
else
{
switch (m)
{
case 0: /* atan(+...,+INF) */
{
return zero;
}
case 1: /* atan(-...,+INF) */
{
return -zero;
}
case 2: /* atan(+...,-INF) */
{
return pi + tiny;
}
case 3: /* atan(-...,-INF) */
{
return -pi - tiny;
}
}
}
}
/* when y is INF */
if (iy == 0x7ff00000)
{
return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
}
/* compute y / x */
k = (iy - ix) >> 20;
if (k > 60) /* |y / x| > 2**60 */
{
z = pi_o_2 + 0.5 * pi_lo;
}
else if (hx < 0 && k < -60) /* |y| / x < -2**60 */
{
z = 0.0;
}
else /* safe to do y / x */
{
z = atan (fabs (y / x));
}
switch (m)
{
case 0: /* atan(+,+) */
{
return z;
}
case 1: /* atan(-,+) */
{
__HI (z) ^= 0x80000000;
return z;
}
case 2: /* atan(+,-) */
{
return pi - (z - pi_lo);
}
/* case 3: */
default: /* atan(-,-) */
{
return (z - pi_lo) - pi;
}
}
} /* atan2 */

View File

@ -6,64 +6,110 @@
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* ceil(x)
/* ceil(x)
* Return x rounded toward -inf to integral value
*
* Method:
* Bit twiddling.
* Bit twiddling.
*
* Exception:
* Inexact flag raised if x not equal to ceil(x).
* Inexact flag raised if x not equal to ceil(x).
*/
#include "fdlibm.h"
#define huge 1.0e300
double ceil(double x)
double
ceil (double x)
{
int i0,i1,j0;
unsigned i,j;
i0 = __HI(x);
i1 = __LO(x);
j0 = ((i0>>20)&0x7ff)-0x3ff;
if(j0<20) {
if(j0<0) { /* raise inexact if x != 0 */
if(huge+x>0.0) {/* return 0*sign(x) if |x|<1 */
if(i0<0) {i0=0x80000000;i1=0;}
else if((i0|i1)!=0) { i0=0x3ff00000;i1=0;}
}
} else {
i = (0x000fffff)>>j0;
if(((i0&i)|i1)==0) return x; /* x is integral */
if(huge+x>0.0) { /* raise inexact flag */
if(i0>0) i0 += (0x00100000)>>j0;
i0 &= (~i); i1=0;
}
}
} else if (j0>51) {
if(j0==0x400) return x+x; /* inf or NaN */
else return x; /* x is integral */
} else {
i = ((unsigned)(0xffffffff))>>(j0-20);
if((i1&i)==0) return x; /* x is integral */
if(huge+x>0.0) { /* raise inexact flag */
if(i0>0) {
if(j0==20) i0+=1;
else {
j = i1 + (1<<(52-j0));
if(j<i1) i0+=1; /* got a carry */
i1 = j;
}
}
i1 &= (~i);
}
}
__HI(x) = i0;
__LO(x) = i1;
return x;
}
int i0, i1, j0;
unsigned i, j;
i0 = __HI (x);
i1 = __LO (x);
j0 = ((i0 >> 20) & 0x7ff) - 0x3ff;
if (j0 < 20)
{
if (j0 < 0) /* raise inexact if x != 0 */
{
if (huge + x > 0.0) /* return 0 * sign(x) if |x| < 1 */
{
if (i0 < 0)
{
i0 = 0x80000000;
i1 = 0;
}
else if ((i0 | i1) != 0)
{
i0 = 0x3ff00000;
i1 = 0;
}
}
}
else
{
i = (0x000fffff) >> j0;
if (((i0 & i) | i1) == 0) /* x is integral */
{
return x;
}
if (huge + x > 0.0) /* raise inexact flag */
{
if (i0 > 0)
{
i0 += (0x00100000) >> j0;
}
i0 &= (~i);
i1 = 0;
}
}
}
else if (j0 > 51)
{
if (j0 == 0x400) /* inf or NaN */
{
return x + x;
}
else /* x is integral */
{
return x;
}
}
else
{
i = ((unsigned) (0xffffffff)) >> (j0 - 20);
if ((i1 & i) == 0) /* x is integral */
{
return x;
}
if (huge + x > 0.0) /* raise inexact flag */
{
if (i0 > 0)
{
if (j0 == 20)
{
i0 += 1;
}
else
{
j = i1 + (1 << (52 - j0));
if (j < i1) /* got a carry */
{
i0 += 1;
}
i1 = j;
}
}
i1 &= (~i);
}
}
__HI (x) = i0;
__LO (x) = i1;
return x;
} /* ceil */

View File

@ -6,21 +6,20 @@
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* copysign(double x, double y)
* copysign(x,y) returns a value with the magnitude of x and
/* copysign(x,y) returns a value with the magnitude of x and
* with the sign bit of y.
*/
#include "fdlibm.h"
double copysign(double x, double y)
double
copysign (double x, double y)
{
__HI(x) = (__HI(x)&0x7fffffff)|(__HI(y)&0x80000000);
return x;
}
__HI (x) = (__HI (x) & 0x7fffffff) | (__HI (y) & 0x80000000);
return x;
} /* copysign */

View File

@ -5,7 +5,7 @@
* Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved.
*
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
@ -13,58 +13,58 @@
/* exp(x)
* Returns the exponential of x.
*
* Method
* Method:
* 1. Argument reduction:
* Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
* Given x, find r and integer k such that
* Given x, find r and integer k such that
*
* x = k*ln2 + r, |r| <= 0.5*ln2.
*
* Here r will be represented as r = hi-lo for better
* accuracy.
* accuracy.
*
* 2. Approximation of exp(r) by a special rational function on
* the interval [0,0.34658]:
* Write
* R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
* the interval [0,0.34658]:
* Write
* R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
* We use a special Remes algorithm on [0,0.34658] to generate
* a polynomial of degree 5 to approximate R. The maximum error
* of this polynomial approximation is bounded by 2**-59. In
* other words,
* R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
* (where z=r*r, and the values of P1 to P5 are listed below)
* and
* | 5 | -59
* | 2.0+P1*z+...+P5*z - R(z) | <= 2
* | |
* The computation of exp(r) thus becomes
* a polynomial of degree 5 to approximate R. The maximum error
* of this polynomial approximation is bounded by 2**-59. In
* other words,
* R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
* (where z=r*r, and the values of P1 to P5 are listed below)
* and
* | 5 | -59
* | 2.0+P1*z+...+P5*z - R(z) | <= 2
* | |
* The computation of exp(r) thus becomes
* 2*r
* exp(r) = 1 + -------
* R - r
* exp(r) = 1 + -------
* R - r
* r*R1(r)
* = 1 + r + ----------- (for better accuracy)
* 2 - R1(r)
* where
* 2 4 10
* R1(r) = r - (P1*r + P2*r + ... + P5*r ).
* = 1 + r + ----------- (for better accuracy)
* 2 - R1(r)
* where
* 2 4 10
* R1(r) = r - (P1*r + P2*r + ... + P5*r ).
*
* 3. Scale back to obtain exp(x):
* From step 1, we have
* exp(x) = 2^k * exp(r)
* From step 1, we have
* exp(x) = 2^k * exp(r)
*
* Special cases:
* exp(INF) is INF, exp(NaN) is NaN;
* exp(-INF) is 0, and
* for finite argument, only exp(0)=1 is exact.
* exp(INF) is INF, exp(NaN) is NaN;
* exp(-INF) is 0, and
* for finite argument, only exp(0)=1 is exact.
*
* Accuracy:
* according to an error analysis, the error is always less than
* 1 ulp (unit in the last place).
* according to an error analysis, the error is always less than
* 1 ulp (unit in the last place).
*
* Misc. info.
* For IEEE double
* if x > 7.09782712893383973096e+02 then exp(x) overflow
* if x < -7.45133219101941108420e+02 then exp(x) underflow
* Misc. info:
* For IEEE double
* if x > 7.09782712893383973096e+02 then exp(x) overflow
* if x < -7.45133219101941108420e+02 then exp(x) underflow
*
* Constants:
* The hexadecimal values are the intended ones for the following
@ -75,73 +75,118 @@
#include "fdlibm.h"
static const double
halF[2] = {0.5,-0.5,},
ln2HI[2] = { 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */
-6.93147180369123816490e-01,}, /* 0xbfe62e42, 0xfee00000 */
ln2LO[2] = { 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */
-1.90821492927058770002e-10,}; /* 0xbdea39ef, 0x35793c76 */
static const double halF[2] =
{
0.5,
-0.5,
};
static const double ln2HI[2] =
{
6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */
-6.93147180369123816490e-01, /* 0xbfe62e42, 0xfee00000 */
};
static const double ln2LO[2] =
{
1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */
-1.90821492927058770002e-10, /* 0xbdea39ef, 0x35793c76 */
};
#define one 1.0
#define huge 1.0e+300
#define twom1000 9.33263618503218878990e-302 /* 2**-1000=0x01700000,0 */
#define o_threshold 7.09782712893383973096e+02 /* 0x40862E42, 0xFEFA39EF */
#define u_threshold -7.45133219101941108420e+02 /* 0xc0874910, 0xD52D3051 */
#define invln2 1.44269504088896338700e+00 /* 0x3ff71547, 0x652b82fe */
#define P1 1.66666666666666019037e-01 /* 0x3FC55555, 0x5555553E */
#define P2 -2.77777777770155933842e-03 /* 0xBF66C16C, 0x16BEBD93 */
#define P3 6.61375632143793436117e-05 /* 0x3F11566A, 0xAF25DE2C */
#define P4 -1.65339022054652515390e-06 /* 0xBEBBBD41, 0xC5D26BF1 */
#define P5 4.13813679705723846039e-08 /* 0x3E663769, 0x72BEA4D0 */
#define o_threshold 7.09782712893383973096e+02 /* 0x40862E42, 0xFEFA39EF */
#define u_threshold -7.45133219101941108420e+02 /* 0xc0874910, 0xD52D3051 */
#define invln2 1.44269504088896338700e+00 /* 0x3ff71547, 0x652b82fe */
#define P1 1.66666666666666019037e-01 /* 0x3FC55555, 0x5555553E */
#define P2 -2.77777777770155933842e-03 /* 0xBF66C16C, 0x16BEBD93 */
#define P3 6.61375632143793436117e-05 /* 0x3F11566A, 0xAF25DE2C */
#define P4 -1.65339022054652515390e-06 /* 0xBEBBBD41, 0xC5D26BF1 */
#define P5 4.13813679705723846039e-08 /* 0x3E663769, 0x72BEA4D0 */
double exp(double x) /* default IEEE double exp */
double
exp (double x) /* default IEEE double exp */
{
double y,hi,lo,c,t;
int k = 0,xsb;
unsigned hx;
double y, hi, lo, c, t;
int k = 0, xsb;
unsigned hx;
hx = __HI(x); /* high word of x */
xsb = (hx>>31)&1; /* sign bit of x */
hx &= 0x7fffffff; /* high word of |x| */
hx = __HI (x); /* high word of x */
xsb = (hx >> 31) & 1; /* sign bit of x */
hx &= 0x7fffffff; /* high word of |x| */
/* filter out non-finite argument */
if(hx >= 0x40862E42) { /* if |x|>=709.78... */
if(hx>=0x7ff00000) {
if(((hx&0xfffff)|__LO(x))!=0)
return x+x; /* NaN */
else return (xsb==0)? x:0.0; /* exp(+-inf)={inf,0} */
}
if(x > o_threshold) return huge*huge; /* overflow */
if(x < u_threshold) return twom1000*twom1000; /* underflow */
}
/* filter out non-finite argument */
if (hx >= 0x40862E42) /* if |x| >= 709.78... */
{
if (hx >= 0x7ff00000)
{
if (((hx & 0xfffff) | __LO (x)) != 0) /* NaN */
{
return x + x;
}
else /* exp(+-inf) = {inf,0} */
{
return (xsb == 0) ? x : 0.0;
}
}
if (x > o_threshold) /* overflow */
{
return huge * huge;
}
if (x < u_threshold) /* underflow */
{
return twom1000 * twom1000;
}
}
/* argument reduction */
if(hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */
if(hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
hi = x-ln2HI[xsb]; lo=ln2LO[xsb]; k = 1-xsb-xsb;
} else {
k = (int)(invln2*x+halF[xsb]);
t = k;
hi = x - t*ln2HI[0]; /* t*ln2HI is exact here */
lo = t*ln2LO[0];
}
x = hi - lo;
}
else if(hx < 0x3e300000) { /* when |x|<2**-28 */
if(huge+x>one) return one+x;/* trigger inexact */
}
else k = 0;
/* argument reduction */
if (hx > 0x3fd62e42) /* if |x| > 0.5 ln2 */
{
if (hx < 0x3FF0A2B2) /* and |x| < 1.5 ln2 */
{
hi = x - ln2HI[xsb];
lo = ln2LO[xsb];
k = 1 - xsb - xsb;
}
else
{
k = (int) (invln2 * x + halF[xsb]);
t = k;
hi = x - t * ln2HI[0]; /* t * ln2HI is exact here */
lo = t * ln2LO[0];
}
x = hi - lo;
}
else if (hx < 0x3e300000) /* when |x| < 2**-28 */
{
if (huge + x > one) /* trigger inexact */
{
return one + x;
}
}
else
{
k = 0;
}
/* x is now in primary range */
t = x*x;
c = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
if(k==0) return one-((x*c)/(c-2.0)-x);
else y = one-((lo-(x*c)/(2.0-c))-hi);
if(k >= -1021) {
__HI(y) += (k<<20); /* add k to y's exponent */
return y;
} else {
__HI(y) += ((k+1000)<<20);/* add k to y's exponent */
return y*twom1000;
}
}
/* x is now in primary range */
t = x * x;
c = x - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
if (k == 0)
{
return one - ((x * c) / (c - 2.0) - x);
}
else
{
y = one - ((lo - (x * c) / (2.0 - c)) - hi);
}
if (k >= -1021)
{
__HI (y) += (k << 20); /* add k to y's exponent */
return y;
}
else
{
__HI (y) += ((k + 1000) << 20); /* add k to y's exponent */
return y * twom1000;
}
} /* exp */

View File

@ -6,19 +6,19 @@
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* fabs(x) returns the absolute value of x.
/* fabs(x) returns the absolute value of x.
*/
#include "fdlibm.h"
double fabs(double x)
double
fabs (double x)
{
__HI(x) &= 0x7fffffff;
return x;
}
__HI (x) &= 0x7fffffff;
return x;
} /* fabs */

View File

@ -6,21 +6,22 @@
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* finite(x) returns 1 is x is finite, else 0;
/* finite(x) returns 1 is x is finite, else 0;
* no branching!
*/
#include "fdlibm.h"
int finite(double x)
int
finite (double x)
{
int hx;
hx = __HI(x);
return (unsigned)((hx&0x7fffffff)-0x7ff00000)>>31;
}
int hx;
hx = __HI (x);
return (unsigned) ((hx & 0x7fffffff) - 0x7ff00000) >> 31;
} /* finite */

View File

@ -6,65 +6,109 @@
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* floor(x)
/* floor(x)
* Return x rounded toward -inf to integral value
*
* Method:
* Bit twiddling.
* Bit twiddling.
*
* Exception:
* Inexact flag raised if x not equal to floor(x).
* Inexact flag raised if x not equal to floor(x).
*/
#include "fdlibm.h"
#define huge 1.0e300
double floor(double x)
double
floor (double x)
{
int i0,i1,j0;
unsigned i,j;
i0 = __HI(x);
i1 = __LO(x);
j0 = ((i0>>20)&0x7ff)-0x3ff;
if(j0<20) {
if(j0<0) { /* raise inexact if x != 0 */
if(huge+x>0.0) {/* return 0*sign(x) if |x|<1 */
if(i0>=0) {i0=i1=0;}
else if(((i0&0x7fffffff)|i1)!=0)
{ i0=0xbff00000;i1=0;}
}
} else {
i = (0x000fffff)>>j0;
if(((i0&i)|i1)==0) return x; /* x is integral */
if(huge+x>0.0) { /* raise inexact flag */
if(i0<0) i0 += (0x00100000)>>j0;
i0 &= (~i); i1=0;
}
}
} else if (j0>51) {
if(j0==0x400) return x+x; /* inf or NaN */
else return x; /* x is integral */
} else {
i = ((unsigned)(0xffffffff))>>(j0-20);
if((i1&i)==0) return x; /* x is integral */
if(huge+x>0.0) { /* raise inexact flag */
if(i0<0) {
if(j0==20) i0+=1;
else {
j = i1+(1<<(52-j0));
if(j<i1) i0 +=1 ; /* got a carry */
i1=j;
}
}
i1 &= (~i);
}
}
__HI(x) = i0;
__LO(x) = i1;
return x;
}
int i0, i1, j0;
unsigned i, j;
i0 = __HI (x);
i1 = __LO (x);
j0 = ((i0 >> 20) & 0x7ff) - 0x3ff;
if (j0 < 20)
{
if (j0 < 0) /* raise inexact if x != 0 */
{
if (huge + x > 0.0) /* return 0 * sign(x) if |x| < 1 */
{
if (i0 >= 0)
{
i0 = i1 = 0;
}
else if (((i0 & 0x7fffffff) | i1) != 0)
{
i0 = 0xbff00000;
i1 = 0;
}
}
}
else
{
i = (0x000fffff) >> j0;
if (((i0 & i) | i1) == 0) /* x is integral */
{
return x;
}
if (huge + x > 0.0) /* raise inexact flag */
{
if (i0 < 0)
{
i0 += (0x00100000) >> j0;
}
i0 &= (~i);
i1 = 0;
}
}
}
else if (j0 > 51)
{
if (j0 == 0x400) /* inf or NaN */
{
return x + x;
}
else /* x is integral */
{
return x;
}
}
else
{
i = ((unsigned) (0xffffffff)) >> (j0 - 20);
if ((i1 & i) == 0) /* x is integral */
{
return x;
}
if (huge + x > 0.0) /* raise inexact flag */
{
if (i0 < 0)
{
if (j0 == 20)
{
i0 += 1;
}
else
{
j = i1 + (1 << (52 - j0));
if (j < i1) /* got a carry */
{
i0 += 1;
}
i1 = j;
}
}
i1 &= (~i);
}
}
__HI (x) = i0;
__LO (x) = i1;
return x;
} /* floor */

View File

@ -11,124 +11,209 @@
* ====================================================
*/
/*
* fmod(x,y)
/* fmod(x,y)
* Return x mod y in exact arithmetic
*
* Method: shift and subtract
*/
#include "fdlibm.h"
static const double
Zero[] = {0.0, -0.0,};
static const double Zero[] = { 0.0, -0.0, };
#define one 1.0
double fmod(double x, double y)
double
fmod (double x, double y)
{
int n,hx,hy,hz,ix,iy,sx,i;
unsigned lx,ly,lz;
int n, hx, hy, hz, ix, iy, sx, i;
unsigned lx, ly, lz;
hx = __HI(x); /* high word of x */
lx = __LO(x); /* low word of x */
hy = __HI(y); /* high word of y */
ly = __LO(y); /* low word of y */
sx = hx&0x80000000; /* sign of x */
hx ^=sx; /* |x| */
hy &= 0x7fffffff; /* |y| */
hx = __HI (x); /* high word of x */
lx = __LO (x); /* low word of x */
hy = __HI (y); /* high word of y */
ly = __LO (y); /* low word of y */
sx = hx & 0x80000000; /* sign of x */
hx ^= sx; /* |x| */
hy &= 0x7fffffff; /* |y| */
/* purge off exception values */
if((hy|ly)==0||(hx>=0x7ff00000)|| /* y=0,or x not finite */
((hy|((ly|-ly)>>31))>0x7ff00000)) /* or y is NaN */
return (x*y)/(x*y);
if(hx<=hy) {
if((hx<hy)||(lx<ly)) return x; /* |x|<|y| return x */
if(lx==ly)
return Zero[(unsigned)sx>>31]; /* |x|=|y| return x*0*/
}
/* purge off exception values */
if ((hy | ly) == 0 || (hx >= 0x7ff00000) || /* y = 0, or x not finite */
((hy | ((ly | -ly) >> 31)) > 0x7ff00000)) /* or y is NaN */
{
return (x * y) / (x * y);
}
if (hx <= hy)
{
if ((hx < hy) || (lx < ly)) /* |x| < |y| return x */
{
return x;
}
if (lx == ly) /* |x| = |y| return x * 0 */
{
return Zero[(unsigned) sx >> 31];
}
}
/* determine ix = ilogb(x) */
if(hx<0x00100000) { /* subnormal x */
if(hx==0) {
for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
} else {
for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1;
}
} else ix = (hx>>20)-1023;
/* determine ix = ilogb(x) */
if (hx < 0x00100000) /* subnormal x */
{
if (hx == 0)
{
for (ix = -1043, i = lx; i > 0; i <<= 1)
{
ix -= 1;
}
}
else
{
for (ix = -1022, i = (hx << 11); i > 0; i <<= 1)
{
ix -= 1;
}
}
}
else
{
ix = (hx >> 20) - 1023;
}
/* determine iy = ilogb(y) */
if(hy<0x00100000) { /* subnormal y */
if(hy==0) {
for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
} else {
for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1;
}
} else iy = (hy>>20)-1023;
/* determine iy = ilogb(y) */
if (hy < 0x00100000) /* subnormal y */
{
if (hy == 0)
{
for (iy = -1043, i = ly; i > 0; i <<= 1)
{
iy -= 1;
}
}
else
{
for (iy = -1022, i = (hy << 11); i > 0; i <<= 1)
{
iy -= 1;
}
}
}
else
{
iy = (hy >> 20) - 1023;
}
/* set up {hx,lx}, {hy,ly} and align y to x */
if(ix >= -1022)
hx = 0x00100000|(0x000fffff&hx);
else { /* subnormal x, shift x to normal */
n = -1022-ix;
if(n<=31) {
hx = (hx<<n)|(lx>>(32-n));
lx <<= n;
} else {
hx = lx<<(n-32);
lx = 0;
}
}
if(iy >= -1022)
hy = 0x00100000|(0x000fffff&hy);
else { /* subnormal y, shift y to normal */
n = -1022-iy;
if(n<=31) {
hy = (hy<<n)|(ly>>(32-n));
ly <<= n;
} else {
hy = ly<<(n-32);
ly = 0;
}
}
/* set up {hx,lx}, {hy,ly} and align y to x */
if (ix >= -1022)
{
hx = 0x00100000 | (0x000fffff & hx);
}
else /* subnormal x, shift x to normal */
{
n = -1022 - ix;
if (n <= 31)
{
hx = (hx << n) | (lx >> (32 - n));
lx <<= n;
}
else
{
hx = lx << (n - 32);
lx = 0;
}
}
if (iy >= -1022)
{
hy = 0x00100000 | (0x000fffff & hy);
}
else /* subnormal y, shift y to normal */
{
n = -1022 - iy;
if (n <= 31)
{
hy = (hy << n) | (ly >> (32 - n));
ly <<= n;
}
else
{
hy = ly << (n - 32);
ly = 0;
}
}
/* fix point fmod */
n = ix - iy;
while(n--) {
hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
if(hz<0){hx = hx+hx+(lx>>31); lx = lx+lx;}
else {
if((hz|lz)==0) /* return sign(x)*0 */
return Zero[(unsigned)sx>>31];
hx = hz+hz+(lz>>31); lx = lz+lz;
}
}
hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
if(hz>=0) {hx=hz;lx=lz;}
/* fix point fmod */
n = ix - iy;
while (n--)
{
hz = hx - hy;
lz = lx - ly;
if (lx < ly)
{
hz -= 1;
}
if (hz < 0)
{
hx = hx + hx + (lx >> 31);
lx = lx + lx;
}
else
{
if ((hz | lz) == 0) /* return sign(x) * 0 */
{
return Zero[(unsigned) sx >> 31];
}
hx = hz + hz + (lz >> 31);
lx = lz + lz;
}
}
hz = hx - hy;
lz = lx - ly;
if (lx < ly)
{
hz -= 1;
}
if (hz >= 0)
{
hx = hz;
lx = lz;
}
/* convert back to floating value and restore the sign */
if((hx|lx)==0) /* return sign(x)*0 */
return Zero[(unsigned)sx>>31];
while(hx<0x00100000) { /* normalize x */
hx = hx+hx+(lx>>31); lx = lx+lx;
iy -= 1;
}
if(iy>= -1022) { /* normalize output */
hx = ((hx-0x00100000)|((iy+1023)<<20));
__HI(x) = hx|sx;
__LO(x) = lx;
} else { /* subnormal output */
n = -1022 - iy;
if(n<=20) {
lx = (lx>>n)|((unsigned)hx<<(32-n));
hx >>= n;
} else if (n<=31) {
lx = (hx<<(32-n))|(lx>>n); hx = sx;
} else {
lx = hx>>(n-32); hx = sx;
}
__HI(x) = hx|sx;
__LO(x) = lx;
x *= one; /* create necessary signal */
}
return x; /* exact output */
}
/* convert back to floating value and restore the sign */
if ((hx | lx) == 0) /* return sign(x) * 0 */
{
return Zero[(unsigned) sx >> 31];
}
while (hx < 0x00100000) /* normalize x */
{
hx = hx + hx + (lx >> 31);
lx = lx + lx;
iy -= 1;
}
if (iy >= -1022) /* normalize output */
{
hx = ((hx - 0x00100000) | ((iy + 1023) << 20));
__HI (x) = hx | sx;
__LO (x) = lx;
}
else /* subnormal output */
{
n = -1022 - iy;
if (n <= 20)
{
lx = (lx >> n) | ((unsigned) hx << (32 - n));
hx >>= n;
}
else if (n <= 31)
{
lx = (hx << (32 - n)) | (lx >> n);
hx = sx;
}
else
{
lx = hx >> (n - 32);
hx = sx;
}
__HI (x) = hx | sx;
__LO (x) = lx;
x *= one; /* create necessary signal */
}
return x; /* exact output */
} /* fmod */

View File

@ -6,24 +6,25 @@
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* isnan(x) returns 1 is x is nan, else 0;
/* isnan(x) returns 1 is x is nan, else 0;
* no branching!
*/
#include "fdlibm.h"
int isnan(double x)
int
isnan (double x)
{
int hx,lx;
hx = (__HI(x)&0x7fffffff);
lx = __LO(x);
hx |= (unsigned)(lx|(-lx))>>31;
hx = 0x7ff00000 - hx;
return ((unsigned)(hx))>>31;
}
int hx, lx;
hx = (__HI (x) & 0x7fffffff);
lx = __LO (x);
hx |= (unsigned) (lx | (-lx)) >> 31;
hx = 0x7ff00000 - hx;
return ((unsigned) (hx)) >> 31;
} /* isnan */

View File

@ -6,7 +6,7 @@
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
@ -16,44 +16,44 @@
*
* Method :
* 1. Argument Reduction: find k and f such that
* x = 2^k * (1+f),
* where sqrt(2)/2 < 1+f < sqrt(2) .
* x = 2^k * (1+f),
* where sqrt(2)/2 < 1+f < sqrt(2) .
*
* 2. Approximation of log(1+f).
* Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
* = 2s + 2/3 s**3 + 2/5 s**5 + .....,
* = 2s + s*R
* Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
* = 2s + 2/3 s**3 + 2/5 s**5 + .....,
* = 2s + s*R
* We use a special Reme algorithm on [0,0.1716] to generate
* a polynomial of degree 14 to approximate R The maximum error
* of this polynomial approximation is bounded by 2**-58.45. In
* other words,
* 2 4 6 8 10 12 14
* R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s
* (the values of Lg1 to Lg7 are listed in the program)
* and
* | 2 14 | -58.45
* | Lg1*s +...+Lg7*s - R(z) | <= 2
* | |
* Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
* In order to guarantee error in log below 1ulp, we compute log
* by
* log(1+f) = f - s*(f - R) (if f is not too large)
* log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
*
* 3. Finally, log(x) = k*ln2 + log(1+f).
* = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
* Here ln2 is split into two floating point number:
* ln2_hi + ln2_lo,
* where n*ln2_hi is always exact for |n| < 2000.
* a polynomial of degree 14 to approximate R The maximum error
* of this polynomial approximation is bounded by 2**-58.45. In
* other words,
* 2 4 6 8 10 12 14
* R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s
* (the values of Lg1 to Lg7 are listed in the program)
* and
* | 2 14 | -58.45
* | Lg1*s +...+Lg7*s - R(z) | <= 2
* | |
* Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
* In order to guarantee error in log below 1ulp, we compute log
* by
* log(1+f) = f - s*(f - R) (if f is not too large)
* log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
*
* 3. Finally, log(x) = k*ln2 + log(1+f).
* = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
* Here ln2 is split into two floating point number:
* ln2_hi + ln2_lo,
* where n*ln2_hi is always exact for |n| < 2000.
*
* Special cases:
* log(x) is NaN with signal if x < 0 (including -INF) ;
* log(+INF) is +INF; log(0) is -INF with signal;
* log(NaN) is that NaN with no signal.
* log(x) is NaN with signal if x < 0 (including -INF) ;
* log(+INF) is +INF; log(0) is -INF with signal;
* log(NaN) is that NaN with no signal.
*
* Accuracy:
* according to an error analysis, the error is always less than
* 1 ulp (unit in the last place).
* according to an error analysis, the error is always less than
* 1 ulp (unit in the last place).
*
* Constants:
* The hexadecimal values are the intended ones for the following
@ -65,64 +65,108 @@
#include "fdlibm.h"
#define zero 0.0
#define ln2_hi 6.93147180369123816490e-01 /* 3fe62e42 fee00000 */
#define ln2_lo 1.90821492927058770002e-10 /* 3dea39ef 35793c76 */
#define two54 1.80143985094819840000e+16 /* 43500000 00000000 */
#define Lg1 6.666666666666735130e-01 /* 3FE55555 55555593 */
#define Lg2 3.999999999940941908e-01 /* 3FD99999 9997FA04 */
#define Lg3 2.857142874366239149e-01 /* 3FD24924 94229359 */
#define Lg4 2.222219843214978396e-01 /* 3FCC71C5 1D8E78AF */
#define Lg5 1.818357216161805012e-01 /* 3FC74664 96CB03DE */
#define Lg6 1.531383769920937332e-01 /* 3FC39A09 D078C69F */
#define Lg7 1.479819860511658591e-01 /* 3FC2F112 DF3E5244 */
#define ln2_hi 6.93147180369123816490e-01 /* 3fe62e42 fee00000 */
#define ln2_lo 1.90821492927058770002e-10 /* 3dea39ef 35793c76 */
#define two54 1.80143985094819840000e+16 /* 43500000 00000000 */
#define Lg1 6.666666666666735130e-01 /* 3FE55555 55555593 */
#define Lg2 3.999999999940941908e-01 /* 3FD99999 9997FA04 */
#define Lg3 2.857142874366239149e-01 /* 3FD24924 94229359 */
#define Lg4 2.222219843214978396e-01 /* 3FCC71C5 1D8E78AF */
#define Lg5 1.818357216161805012e-01 /* 3FC74664 96CB03DE */
#define Lg6 1.531383769920937332e-01 /* 3FC39A09 D078C69F */
#define Lg7 1.479819860511658591e-01 /* 3FC2F112 DF3E5244 */
double log(double x)
double
log (double x)
{
double hfsq,f,s,z,R,w,t1,t2,dk;
int k,hx,i,j;
unsigned lx;
double hfsq, f, s, z, R, w, t1, t2, dk;
int k, hx, i, j;
unsigned lx;
hx = __HI(x); /* high word of x */
lx = __LO(x); /* low word of x */
hx = __HI (x); /* high word of x */
lx = __LO (x); /* low word of x */
k=0;
if (hx < 0x00100000) { /* x < 2**-1022 */
if (((hx&0x7fffffff)|lx)==0)
return -two54/zero; /* log(+-0)=-inf */
if (hx<0) return (x-x)/zero; /* log(-#) = NaN */
k -= 54; x *= two54; /* subnormal number, scale up x */
hx = __HI(x); /* high word of x */
}
if (hx >= 0x7ff00000) return x+x;
k += (hx>>20)-1023;
hx &= 0x000fffff;
i = (hx+0x95f64)&0x100000;
__HI(x) = hx|(i^0x3ff00000); /* normalize x or x/2 */
k += (i>>20);
f = x-1.0;
if((0x000fffff&(2+hx))<3) { /* |f| < 2**-20 */
if(f==zero) if(k==0) return zero; else {dk=(double)k;
return dk*ln2_hi+dk*ln2_lo;}
R = f*f*(0.5-0.33333333333333333*f);
if(k==0) return f-R; else {dk=(double)k;
return dk*ln2_hi-((R-dk*ln2_lo)-f);}
}
s = f/(2.0+f);
dk = (double)k;
z = s*s;
i = hx-0x6147a;
w = z*z;
j = 0x6b851-hx;
t1= w*(Lg2+w*(Lg4+w*Lg6));
t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
i |= j;
R = t2+t1;
if(i>0) {
hfsq=0.5*f*f;
if(k==0) return f-(hfsq-s*(hfsq+R)); else
return dk*ln2_hi-((hfsq-(s*(hfsq+R)+dk*ln2_lo))-f);
} else {
if(k==0) return f-s*(f-R); else
return dk*ln2_hi-((s*(f-R)-dk*ln2_lo)-f);
}
}
k = 0;
if (hx < 0x00100000) /* x < 2**-1022 */
{
if (((hx & 0x7fffffff) | lx) == 0) /* log(+-0) = -inf */
{
return -two54 / zero;
}
if (hx < 0) /* log(-#) = NaN */
{
return (x - x) / zero;
}
k -= 54;
x *= two54; /* subnormal number, scale up x */
hx = __HI (x); /* high word of x */
}
if (hx >= 0x7ff00000)
{
return x + x;
}
k += (hx >> 20) - 1023;
hx &= 0x000fffff;
i = (hx + 0x95f64) & 0x100000;
__HI (x) = hx | (i ^ 0x3ff00000); /* normalize x or x / 2 */
k += (i >> 20);
f = x - 1.0;
if ((0x000fffff & (2 + hx)) < 3) /* |f| < 2**-20 */
{
if (f == zero)
{
if (k == 0)
{
return zero;
}
else
{
dk = (double) k;
return dk * ln2_hi + dk * ln2_lo;
}
}
R = f * f * (0.5 - 0.33333333333333333 * f);
if (k == 0)
{
return f - R;
}
else
{
dk = (double) k;
return dk * ln2_hi - ((R - dk * ln2_lo) - f);
}
}
s = f / (2.0 + f);
dk = (double) k;
z = s * s;
i = hx - 0x6147a;
w = z * z;
j = 0x6b851 - hx;
t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
i |= j;
R = t2 + t1;
if (i > 0)
{
hfsq = 0.5 * f * f;
if (k == 0)
{
return f - (hfsq - s * (hfsq + R));
}
else
{
return dk * ln2_hi - ((hfsq - (s * (hfsq + R) + dk * ln2_lo)) - f);
}
}
else
{
if (k == 0)
{
return f - s * (f - R);
}
else
{
return dk * ln2_hi - ((s * (f - R) - dk * ln2_lo) - f);
}
}
} /* log */

View File

@ -5,50 +5,50 @@
* Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved.
*
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* pow(x,y) return x**y
*
* n
* n
* Method: Let x = 2 * (1+f)
* 1. Compute and return log2(x) in two pieces:
* log2(x) = w1 + w2,
* where w1 has 53-24 = 29 bit trailing zeros.
* 2. Perform y*log2(x) = n+y' by simulating muti-precision
* arithmetic, where |y'|<=0.5.
* 3. Return x**y = 2**n*exp(y'*log2)
* 1. Compute and return log2(x) in two pieces:
* log2(x) = w1 + w2,
* where w1 has 53-24 = 29 bit trailing zeros.
* 2. Perform y*log2(x) = n+y' by simulating muti-precision
* arithmetic, where |y'|<=0.5.
* 3. Return x**y = 2**n*exp(y'*log2)
*
* Special cases:
* 1. (anything) ** 0 is 1
* 2. (anything) ** 1 is itself
* 3. (anything) ** NAN is NAN
* 4. NAN ** (anything except 0) is NAN
* 5. +-(|x| > 1) ** +INF is +INF
* 6. +-(|x| > 1) ** -INF is +0
* 7. +-(|x| < 1) ** +INF is +0
* 8. +-(|x| < 1) ** -INF is +INF
* 9. +-1 ** +-INF is NAN
* 10. +0 ** (+anything except 0, NAN) is +0
* 11. -0 ** (+anything except 0, NAN, odd integer) is +0
* 12. +0 ** (-anything except 0, NAN) is +INF
* 13. -0 ** (-anything except 0, NAN, odd integer) is +INF
* 14. -0 ** (odd integer) = -( +0 ** (odd integer) )
* 15. +INF ** (+anything except 0,NAN) is +INF
* 16. +INF ** (-anything except 0,NAN) is +0
* 17. -INF ** (anything) = -0 ** (-anything)
* 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
* 19. (-anything except 0 and inf) ** (non-integer) is NAN
* 1. (anything) ** 0 is 1
* 2. (anything) ** 1 is itself
* 3. (anything) ** NAN is NAN
* 4. NAN ** (anything except 0) is NAN
* 5. +-(|x| > 1) ** +INF is +INF
* 6. +-(|x| > 1) ** -INF is +0
* 7. +-(|x| < 1) ** +INF is +0
* 8. +-(|x| < 1) ** -INF is +INF
* 9. +-1 ** +-INF is NAN
* 10. +0 ** (+anything except 0, NAN) is +0
* 11. -0 ** (+anything except 0, NAN, odd integer) is +0
* 12. +0 ** (-anything except 0, NAN) is +INF
* 13. -0 ** (-anything except 0, NAN, odd integer) is +INF
* 14. -0 ** (odd integer) = -( +0 ** (odd integer) )
* 15. +INF ** (+anything except 0,NAN) is +INF
* 16. +INF ** (-anything except 0,NAN) is +0
* 17. -INF ** (anything) = -0 ** (-anything)
* 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
* 19. (-anything except 0 and inf) ** (non-integer) is NAN
*
* Accuracy:
* pow(x,y) returns x**y nearly rounded. In particular
* pow(integer,integer)
* always returns the correct integer provided it is
* representable.
* pow(x,y) returns x**y nearly rounded. In particular
* pow(integer,integer)
* always returns the correct integer provided it is
* representable.
*
* Constants :
* Constants:
* The hexadecimal values are the intended ones for the following
* constants. The decimal values may be used, provided that the
* compiler will convert from decimal to binary accurately enough
@ -57,242 +57,371 @@
#include "fdlibm.h"
static const double
one = 1.0,
bp[] = {1.0, 1.5,},
dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
dp_l[] = { 0.0, 1.35003920212974897128e-08,}; /* 0x3E4CFDEB, 0x43CFD006 */
static const double one = 1.0;
static const double bp[] =
{
1.0,
1.5,
};
static const double dp_h[] =
{
0.0,
5.84962487220764160156e-01, /* 0x3FE2B803, 0x40000000 */
};
static const double dp_l[] =
{
0.0,
1.35003920212974897128e-08, /* 0x3E4CFDEB, 0x43CFD006 */
};
#define zero 0.0
#define two 2.0
#define two53 9007199254740992.0 /* 0x43400000, 0x00000000 */
#define two53 9007199254740992.0 /* 0x43400000, 0x00000000 */
#define huge 1.0e300
#define tiny 1.0e-300
/* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */
#define L1 5.99999999999994648725e-01 /* 0x3FE33333, 0x33333303 */
#define L2 4.28571428578550184252e-01 /* 0x3FDB6DB6, 0xDB6FABFF */
#define L3 3.33333329818377432918e-01 /* 0x3FD55555, 0x518F264D */
#define L4 2.72728123808534006489e-01 /* 0x3FD17460, 0xA91D4101 */
#define L5 2.30660745775561754067e-01 /* 0x3FCD864A, 0x93C9DB65 */
#define L6 2.06975017800338417784e-01 /* 0x3FCA7E28, 0x4A454EEF */
#define P1 1.66666666666666019037e-01 /* 0x3FC55555, 0x5555553E */
#define P2 -2.77777777770155933842e-03 /* 0xBF66C16C, 0x16BEBD93 */
#define P3 6.61375632143793436117e-05 /* 0x3F11566A, 0xAF25DE2C */
#define P4 -1.65339022054652515390e-06 /* 0xBEBBBD41, 0xC5D26BF1 */
#define P5 4.13813679705723846039e-08 /* 0x3E663769, 0x72BEA4D0 */
#define lg2 6.93147180559945286227e-01 /* 0x3FE62E42, 0xFEFA39EF */
#define lg2_h 6.93147182464599609375e-01 /* 0x3FE62E43, 0x00000000 */
#define lg2_l -1.90465429995776804525e-09 /* 0xBE205C61, 0x0CA86C39 */
/* poly coefs for (3/2) * (log(x) - 2s - 2/3 * s**3 */
#define L1 5.99999999999994648725e-01 /* 0x3FE33333, 0x33333303 */
#define L2 4.28571428578550184252e-01 /* 0x3FDB6DB6, 0xDB6FABFF */
#define L3 3.33333329818377432918e-01 /* 0x3FD55555, 0x518F264D */
#define L4 2.72728123808534006489e-01 /* 0x3FD17460, 0xA91D4101 */
#define L5 2.30660745775561754067e-01 /* 0x3FCD864A, 0x93C9DB65 */
#define L6 2.06975017800338417784e-01 /* 0x3FCA7E28, 0x4A454EEF */
#define P1 1.66666666666666019037e-01 /* 0x3FC55555, 0x5555553E */
#define P2 -2.77777777770155933842e-03 /* 0xBF66C16C, 0x16BEBD93 */
#define P3 6.61375632143793436117e-05 /* 0x3F11566A, 0xAF25DE2C */
#define P4 -1.65339022054652515390e-06 /* 0xBEBBBD41, 0xC5D26BF1 */
#define P5 4.13813679705723846039e-08 /* 0x3E663769, 0x72BEA4D0 */
#define lg2 6.93147180559945286227e-01 /* 0x3FE62E42, 0xFEFA39EF */
#define lg2_h 6.93147182464599609375e-01 /* 0x3FE62E43, 0x00000000 */
#define lg2_l -1.90465429995776804525e-09 /* 0xBE205C61, 0x0CA86C39 */
#define ovt 8.0085662595372944372e-0017 /* -(1024-log2(ovfl+.5ulp)) */
#define cp 9.61796693925975554329e-01 /* 0x3FEEC709, 0xDC3A03FD =2/(3ln2) */
#define cp_h 9.61796700954437255859e-01 /* 0x3FEEC709, 0xE0000000 =(float)cp */
#define cp_l -7.02846165095275826516e-09 /* 0xBE3E2FE0, 0x145B01F5 =tail of cp_h*/
#define ivln2 1.44269504088896338700e+00 /* 0x3FF71547, 0x652B82FE =1/ln2 */
#define ivln2_h 1.44269502162933349609e+00 /* 0x3FF71547, 0x60000000 =24b 1/ln2*/
#define ivln2_l 1.92596299112661746887e-08 /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
#define cp 9.61796693925975554329e-01 /* 0x3FEEC709, 0xDC3A03FD = 2 / (3 ln2) */
#define cp_h 9.61796700954437255859e-01 /* 0x3FEEC709, 0xE0000000 = (float) cp */
#define cp_l -7.02846165095275826516e-09 /* 0xBE3E2FE0, 0x145B01F5 = tail of cp_h */
#define ivln2 1.44269504088896338700e+00 /* 0x3FF71547, 0x652B82FE = 1 / ln2 */
#define ivln2_h 1.44269502162933349609e+00 /* 0x3FF71547, 0x60000000 = 24b 1 / ln2 */
#define ivln2_l 1.92596299112661746887e-08 /* 0x3E54AE0B, 0xF85DDF44 = 1 / ln2 tail */
double pow(double x, double y)
double
pow (double x, double y)
{
double z,ax,z_h,z_l,p_h,p_l;
double y1,t1,t2,r,s,t,u,v,w;
int i0,i1,i,j,k,yisint,n;
int hx,hy,ix,iy;
unsigned lx,ly;
double z, ax, z_h, z_l, p_h, p_l;
double y1, t1, t2, r, s, t, u, v, w;
int i0, i1, i, j, k, yisint, n;
int hx, hy, ix, iy;
unsigned lx, ly;
i0 = ((*(int*)&one)>>29)^1; i1=1-i0;
hx = __HI(x); lx = __LO(x);
hy = __HI(y); ly = __LO(y);
ix = hx&0x7fffffff; iy = hy&0x7fffffff;
i0 = ((*(int *) &one) >> 29) ^ 1;
i1 = 1 - i0;
hx = __HI (x);
lx = __LO (x);
hy = __HI (y);
ly = __LO (y);
ix = hx & 0x7fffffff;
iy = hy & 0x7fffffff;
/* y==zero: x**0 = 1 */
if((iy|ly)==0) return one;
/* y == zero: x**0 = 1 */
if ((iy | ly) == 0)
{
return one;
}
/* +-NaN return x+y */
if(ix > 0x7ff00000 || ((ix==0x7ff00000)&&(lx!=0)) ||
iy > 0x7ff00000 || ((iy==0x7ff00000)&&(ly!=0)))
return x+y;
/* +-NaN return x + y */
if (ix > 0x7ff00000 || ((ix == 0x7ff00000) && (lx != 0)) || iy > 0x7ff00000 || ((iy == 0x7ff00000) && (ly != 0)))
{
return x + y;
}
/* determine if y is an odd int when x < 0
* yisint = 0 ... y is not an integer
* yisint = 1 ... y is an odd int
* yisint = 2 ... y is an even int
*/
yisint = 0;
if(hx<0) {
if(iy>=0x43400000) yisint = 2; /* even integer y */
else if(iy>=0x3ff00000) {
k = (iy>>20)-0x3ff; /* exponent */
if(k>20) {
j = ly>>(52-k);
if((j<<(52-k))==ly) yisint = 2-(j&1);
} else if(ly==0) {
j = iy>>(20-k);
if((j<<(20-k))==iy) yisint = 2-(j&1);
}
}
}
/* determine if y is an odd int when x < 0
* yisint = 0 ... y is not an integer
* yisint = 1 ... y is an odd int
* yisint = 2 ... y is an even int
*/
yisint = 0;
if (hx < 0)
{
if (iy >= 0x43400000) /* even integer y */
{
yisint = 2;
}
else if (iy >= 0x3ff00000)
{
k = (iy >> 20) - 0x3ff; /* exponent */
if (k > 20)
{
j = ly >> (52 - k);
if ((j << (52 - k)) == ly)
{
yisint = 2 - (j & 1);
}
}
else if (ly == 0)
{
j = iy >> (20 - k);
if ((j << (20 - k)) == iy)
{
yisint = 2 - (j & 1);
}
}
}
}
/* special value of y */
if(ly==0) {
if (iy==0x7ff00000) { /* y is +-inf */
if(((ix-0x3ff00000)|lx)==0)
return y - y; /* inf**+-1 is NaN */
else if (ix >= 0x3ff00000)/* (|x|>1)**+-inf = inf,0 */
return (hy>=0)? y: zero;
else /* (|x|<1)**-,+inf = inf,0 */
return (hy<0)?-y: zero;
}
if(iy==0x3ff00000) { /* y is +-1 */
if(hy<0) return one/x; else return x;
}
if(hy==0x40000000) return x*x; /* y is 2 */
if(hy==0x3fe00000) { /* y is 0.5 */
if(hx>=0) /* x >= +0 */
return sqrt(x);
}
}
/* special value of y */
if (ly == 0)
{
if (iy == 0x7ff00000) /* y is +-inf */
{
if (((ix - 0x3ff00000) | lx) == 0) /* inf**+-1 is NaN */
{
return y - y;
}
else if (ix >= 0x3ff00000) /* (|x|>1)**+-inf = inf,0 */
{
return (hy >= 0) ? y : zero;
}
else /* (|x|<1)**-,+inf = inf,0 */
{
return (hy < 0) ? -y : zero;
}
}
if (iy == 0x3ff00000) /* y is +-1 */
{
if (hy < 0)
{
return one / x;
}
else
{
return x;
}
}
if (hy == 0x40000000) /* y is 2 */
{
return x * x;
}
if (hy == 0x3fe00000) /* y is 0.5 */
{
if (hx >= 0) /* x >= +0 */
{
return sqrt (x);
}
}
}
ax = fabs(x);
/* special value of x */
if(lx==0) {
if(ix==0x7ff00000||ix==0||ix==0x3ff00000){
z = ax; /*x is +-0,+-inf,+-1*/
if(hy<0) z = one/z; /* z = (1/|x|) */
if(hx<0) {
if(((ix-0x3ff00000)|yisint)==0) {
z = (z-z)/(z-z); /* (-1)**non-int is NaN */
} else if(yisint==1)
z = -z; /* (x<0)**odd = -(|x|**odd) */
}
return z;
}
}
ax = fabs (x);
/* special value of x */
if (lx == 0)
{
if (ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000)
{
z = ax; /* x is +-0,+-inf,+-1 */
if (hy < 0)
{
z = one / z; /* z = (1 / |x|) */
}
if (hx < 0)
{
if (((ix - 0x3ff00000) | yisint) == 0)
{
z = (z - z) / (z - z); /* (-1)**non-int is NaN */
}
else if (yisint == 1)
{
z = -z; /* (x<0)**odd = -(|x|**odd) */
}
}
return z;
}
}
n = (hx>>31)+1;
n = (hx >> 31) + 1;
/* (x<0)**(non-int) is NaN */
if((n|yisint)==0) return (x-x)/(x-x);
/* (x<0)**(non-int) is NaN */
if ((n | yisint) == 0)
{
return (x - x) / (x - x);
}
s = one; /* s (sign of result -ve**odd) = -1 else = 1 */
if((n|(yisint-1))==0) s = -one;/* (-ve)**(odd int) */
s = one; /* s (sign of result -ve**odd) = -1 else = 1 */
if ((n | (yisint - 1)) == 0)
{
s = -one; /* (-ve)**(odd int) */
}
/* |y| is huge */
if(iy>0x41e00000) { /* if |y| > 2**31 */
if(iy>0x43f00000){ /* if |y| > 2**64, must o/uflow */
if(ix<=0x3fefffff) return (hy<0)? huge*huge:tiny*tiny;
if(ix>=0x3ff00000) return (hy>0)? huge*huge:tiny*tiny;
}
/* over/underflow if x is not close to one */
if(ix<0x3fefffff) return (hy<0)? s*huge*huge:s*tiny*tiny;
if(ix>0x3ff00000) return (hy>0)? s*huge*huge:s*tiny*tiny;
/* now |1-x| is tiny <= 2**-20, suffice to compute
log(x) by x-x^2/2+x^3/3-x^4/4 */
t = ax-one; /* t has 20 trailing zeros */
w = (t*t)*(0.5-t*(0.3333333333333333333333-t*0.25));
u = ivln2_h*t; /* ivln2_h has 21 sig. bits */
v = t*ivln2_l-w*ivln2;
t1 = u+v;
__LO(t1) = 0;
t2 = v-(t1-u);
} else {
double ss,s2,s_h,s_l,t_h,t_l;
n = 0;
/* take care subnormal number */
if(ix<0x00100000)
{ax *= two53; n -= 53; ix = __HI(ax); }
n += ((ix)>>20)-0x3ff;
j = ix&0x000fffff;
/* determine interval */
ix = j|0x3ff00000; /* normalize ix */
if(j<=0x3988E) k=0; /* |x|<sqrt(3/2) */
else if(j<0xBB67A) k=1; /* |x|<sqrt(3) */
else {k=0;n+=1;ix -= 0x00100000;}
__HI(ax) = ix;
/* |y| is huge */
if (iy > 0x41e00000) /* if |y| > 2**31 */
{
if (iy > 0x43f00000) /* if |y| > 2**64, must o/uflow */
{
if (ix <= 0x3fefffff)
{
return (hy < 0) ? huge * huge : tiny * tiny;
}
if (ix >= 0x3ff00000)
{
return (hy > 0) ? huge * huge : tiny * tiny;
}
}
/* over/underflow if x is not close to one */
if (ix < 0x3fefffff)
{
return (hy < 0) ? s * huge * huge : s * tiny * tiny;
}
if (ix > 0x3ff00000)
{
return (hy > 0) ? s * huge * huge : s * tiny * tiny;
}
/* now |1 - x| is tiny <= 2**-20, suffice to compute
log(x) by x - x^2 / 2 + x^3 / 3 - x^4 / 4 */
t = ax - one; /* t has 20 trailing zeros */
w = (t * t) * (0.5 - t * (0.3333333333333333333333 - t * 0.25));
u = ivln2_h * t; /* ivln2_h has 21 sig. bits */
v = t * ivln2_l - w * ivln2;
t1 = u + v;
__LO (t1) = 0;
t2 = v - (t1 - u);
}
else
{
double ss, s2, s_h, s_l, t_h, t_l;
/* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
u = ax-bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
v = one/(ax+bp[k]);
ss = u*v;
s_h = ss;
__LO(s_h) = 0;
/* t_h=ax+bp[k] High */
t_h = zero;
__HI(t_h)=((ix>>1)|0x20000000)+0x00080000+(k<<18);
t_l = ax - (t_h-bp[k]);
s_l = v*((u-s_h*t_h)-s_h*t_l);
/* compute log(ax) */
s2 = ss*ss;
r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6)))));
r += s_l*(s_h+ss);
s2 = s_h*s_h;
t_h = 3.0+s2+r;
__LO(t_h) = 0;
t_l = r-((t_h-3.0)-s2);
/* u+v = ss*(1+...) */
u = s_h*t_h;
v = s_l*t_h+t_l*ss;
/* 2/(3log2)*(ss+...) */
p_h = u+v;
__LO(p_h) = 0;
p_l = v-(p_h-u);
z_h = cp_h*p_h; /* cp_h+cp_l = 2/(3*log2) */
z_l = cp_l*p_h+p_l*cp+dp_l[k];
/* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */
t = (double)n;
t1 = (((z_h+z_l)+dp_h[k])+t);
__LO(t1) = 0;
t2 = z_l-(((t1-t)-dp_h[k])-z_h);
}
n = 0;
/* take care subnormal number */
if (ix < 0x00100000)
{
ax *= two53;
n -= 53;
ix = __HI (ax);
}
n += ((ix) >> 20) - 0x3ff;
j = ix & 0x000fffff;
/* determine interval */
ix = j | 0x3ff00000; /* normalize ix */
if (j <= 0x3988E) /* |x| < sqrt(3/2) */
{
k = 0;
}
else if (j < 0xBB67A) /* |x| < sqrt(3) */
{
k = 1;
}
else
{
k = 0;
n += 1;
ix -= 0x00100000;
}
__HI (ax) = ix;
/* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
y1 = y;
__LO(y1) = 0;
p_l = (y-y1)*t1+y*t2;
p_h = y1*t1;
z = p_l+p_h;
j = __HI(z);
i = __LO(z);
if (j>=0x40900000) { /* z >= 1024 */
if(((j-0x40900000)|i)!=0) /* if z > 1024 */
return s*huge*huge; /* overflow */
else {
if(p_l+ovt>z-p_h) return s*huge*huge; /* overflow */
}
} else if((j&0x7fffffff)>=0x4090cc00 ) { /* z <= -1075 */
if(((j-0xc090cc00)|i)!=0) /* z < -1075 */
return s*tiny*tiny; /* underflow */
else {
if(p_l<=z-p_h) return s*tiny*tiny; /* underflow */
}
}
/*
* compute 2**(p_h+p_l)
*/
i = j&0x7fffffff;
k = (i>>20)-0x3ff;
n = 0;
if(i>0x3fe00000) { /* if |z| > 0.5, set n = [z+0.5] */
n = j+(0x00100000>>(k+1));
k = ((n&0x7fffffff)>>20)-0x3ff; /* new k for n */
t = zero;
__HI(t) = (n&~(0x000fffff>>k));
n = ((n&0x000fffff)|0x00100000)>>(20-k);
if(j<0) n = -n;
p_h -= t;
}
t = p_l+p_h;
__LO(t) = 0;
u = t*lg2_h;
v = (p_l-(t-p_h))*lg2+t*lg2_l;
z = u+v;
w = v-(z-u);
t = z*z;
t1 = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
r = (z*t1)/(t1-two)-(w+z*w);
z = one-(r-z);
j = __HI(z);
j += (n<<20);
if((j>>20)<=0) z = scalbn(z,n); /* subnormal output */
else __HI(z) += (n<<20);
return s*z;
}
/* compute ss = s_h + s_l = (x - 1) / (x + 1) or (x - 1.5) / (x + 1.5) */
u = ax - bp[k]; /* bp[0] = 1.0, bp[1] = 1.5 */
v = one / (ax + bp[k]);
ss = u * v;
s_h = ss;
__LO (s_h) = 0;
/* t_h = ax + bp[k] High */
t_h = zero;
__HI (t_h) = ((ix >> 1) | 0x20000000) + 0x00080000 + (k << 18);
t_l = ax - (t_h - bp[k]);
s_l = v * ((u - s_h * t_h) - s_h * t_l);
/* compute log(ax) */
s2 = ss * ss;
r = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6)))));
r += s_l * (s_h + ss);
s2 = s_h * s_h;
t_h = 3.0 + s2 + r;
__LO (t_h) = 0;
t_l = r - ((t_h - 3.0) - s2);
/* u + v = ss * (1 + ...) */
u = s_h * t_h;
v = s_l * t_h + t_l * ss;
/* 2 / (3 * log2) * (ss + ...) */
p_h = u + v;
__LO (p_h) = 0;
p_l = v - (p_h - u);
z_h = cp_h * p_h; /* cp_h + cp_l = 2 / (3 * log2) */
z_l = cp_l * p_h + p_l * cp + dp_l[k];
/* log2(ax) = (ss + ...) * 2 / (3 * log2) = n + dp_h + z_h + z_l */
t = (double) n;
t1 = (((z_h + z_l) + dp_h[k]) + t);
__LO (t1) = 0;
t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
}
/* split up y into y1 + y2 and compute (y1 + y2) * (t1 + t2) */
y1 = y;
__LO (y1) = 0;
p_l = (y - y1) * t1 + y * t2;
p_h = y1 * t1;
z = p_l + p_h;
j = __HI (z);
i = __LO (z);
if (j >= 0x40900000) /* z >= 1024 */
{
if (((j - 0x40900000) | i) != 0) /* if z > 1024 */
{
return s * huge * huge; /* overflow */
}
else
{
if (p_l + ovt > z - p_h)
{
return s * huge * huge; /* overflow */
}
}
}
else if ((j & 0x7fffffff) >= 0x4090cc00) /* z <= -1075 */
{
if (((j - 0xc090cc00) | i) != 0) /* z < -1075 */
{
return s * tiny * tiny; /* underflow */
}
else
{
if (p_l <= z - p_h)
{
return s * tiny * tiny; /* underflow */
}
}
}
/*
* compute 2**(p_h + p_l)
*/
i = j & 0x7fffffff;
k = (i >> 20) - 0x3ff;
n = 0;
if (i > 0x3fe00000) /* if |z| > 0.5, set n = [z + 0.5] */
{
n = j + (0x00100000 >> (k + 1));
k = ((n & 0x7fffffff) >> 20) - 0x3ff; /* new k for n */
t = zero;
__HI (t) = (n & ~(0x000fffff >> k));
n = ((n & 0x000fffff) | 0x00100000) >> (20 - k);
if (j < 0)
{
n = -n;
}
p_h -= t;
}
t = p_l + p_h;
__LO (t) = 0;
u = t * lg2_h;
v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
z = u + v;
w = v - (z - u);
t = z * z;
t1 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
r = (z * t1) / (t1 - two) - (w + z * w);
z = one - (r - z);
j = __HI (z);
j += (n << 20);
if ((j >> 20) <= 0) /* subnormal output */
{
z = scalbn (z, n);
}
else
{
__HI (z) += (n << 20);
}
return s * z;
} /* pow */

View File

@ -6,14 +6,12 @@
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* scalbn (double x, int n)
* scalbn(x,n) returns x* 2**n computed by exponent
/* scalbn(x,n) returns x* 2**n computed by exponent
* manipulation rather than by actually performing an
* exponentiation or a multiplication.
*/
@ -25,29 +23,54 @@
#define huge 1.0e+300
#define tiny 1.0e-300
double scalbn (double x, int n)
double
scalbn (double x, int n)
{
int k,hx,lx;
hx = __HI(x);
lx = __LO(x);
k = (hx&0x7ff00000)>>20; /* extract exponent */
if (k==0) { /* 0 or subnormal x */
if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
x *= two54;
hx = __HI(x);
k = ((hx&0x7ff00000)>>20) - 54;
if (n< -50000) return tiny*x; /*underflow*/
}
if (k==0x7ff) return x+x; /* NaN or Inf */
k = k+n;
if (k > 0x7fe) return huge*copysign(huge,x); /* overflow */
if (k > 0) /* normal result */
{__HI(x) = (hx&0x800fffff)|(k<<20); return x;}
if (k <= -54)
if (n > 50000) /* in case integer overflow in n+k */
return huge*copysign(huge,x); /*overflow*/
else return tiny*copysign(tiny,x); /*underflow*/
k += 54; /* subnormal result */
__HI(x) = (hx&0x800fffff)|(k<<20);
return x*twom54;
}
int k, hx, lx;
hx = __HI (x);
lx = __LO (x);
k = (hx & 0x7ff00000) >> 20; /* extract exponent */
if (k == 0) /* 0 or subnormal x */
{
if ((lx | (hx & 0x7fffffff)) == 0) /* +-0 */
{
return x;
}
x *= two54;
hx = __HI (x);
k = ((hx & 0x7ff00000) >> 20) - 54;
if (n < -50000) /*underflow */
{
return tiny * x;
}
}
if (k == 0x7ff) /* NaN or Inf */
{
return x + x;
}
k = k + n;
if (k > 0x7fe) /* overflow */
{
return huge * copysign (huge, x);
}
if (k > 0) /* normal result */
{
__HI (x) = (hx & 0x800fffff) | (k << 20);
return x;
}
if (k <= -54)
{
if (n > 50000) /* in case integer overflow in n + k */
{
return huge * copysign (huge, x); /*overflow */
}
else
{
return tiny * copysign (tiny, x); /*underflow */
}
}
k += 54; /* subnormal result */
__HI (x) = (hx & 0x800fffff) | (k << 20);
return x * twom54;
} /* scalbn */

View File

@ -6,79 +6,80 @@
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* sqrt(x)
* Return correctly rounded sqrt.
*
* ------------------------------------------
* | Use the hardware sqrt if you have one |
* | Use the hardware sqrt if you have one |
* ------------------------------------------
*
* Method:
* Bit by bit method using integer arithmetic. (Slow, but portable)
* 1. Normalization
* Scale x to y in [1,4) with even powers of 2:
* find an integer k such that 1 <= (y=x*2^(2k)) < 4, then
* sqrt(x) = 2^k * sqrt(y)
* Scale x to y in [1,4) with even powers of 2:
* find an integer k such that 1 <= (y=x*2^(2k)) < 4, then
* sqrt(x) = 2^k * sqrt(y)
* 2. Bit by bit computation
* Let q = sqrt(y) truncated to i bit after binary point (q = 1),
* i 0
* Let q = sqrt(y) truncated to i bit after binary point (q = 1),
* i 0
* i+1 2
* s = 2*q , and y = 2 * ( y - q ). (1)
* i i i i
* s = 2*q , and y = 2 * ( y - q ). (1)
* i i i i
*
* To compute q from q , one checks whether
* i+1 i
* To compute q from q , one checks whether
* i+1 i
*
* -(i+1) 2
* (q + 2 ) <= y. (2)
* i
* -(i+1)
* If (2) is false, then q = q ; otherwise q = q + 2 .
* i+1 i i+1 i
* -(i+1) 2
* (q + 2 ) <= y. (2)
* i
* -(i+1)
* If (2) is false, then q = q ; otherwise q = q + 2 .
* i+1 i i+1 i
*
* With some algebric manipulation, it is not difficult to see
* that (2) is equivalent to
* With some algebric manipulation, it is not difficult to see
* that (2) is equivalent to
* -(i+1)
* s + 2 <= y (3)
* i i
* s + 2 <= y (3)
* i i
*
* The advantage of (3) is that s and y can be computed by
* i i
* the following recurrence formula:
* if (3) is false
* The advantage of (3) is that s and y can be computed by
* i i
* the following recurrence formula:
* if (3) is false
*
* s = s , y = y ; (4)
* i+1 i i+1 i
* s = s , y = y ; (4)
* i+1 i i+1 i
*
* otherwise,
* otherwise,
* -i -(i+1)
* s = s + 2 , y = y - s - 2 (5)
* s = s + 2 , y = y - s - 2 (5)
* i+1 i i+1 i i
*
* One may easily use induction to prove (4) and (5).
* Note. Since the left hand side of (3) contain only i+2 bits,
* it does not necessary to do a full (53-bit) comparison
* in (3).
* One may easily use induction to prove (4) and (5).
* Note. Since the left hand side of (3) contain only i+2 bits,
* it does not necessary to do a full (53-bit) comparison
* in (3).
* 3. Final rounding
* After generating the 53 bits result, we compute one more bit.
* Together with the remainder, we can decide whether the
* result is exact, bigger than 1/2ulp, or less than 1/2ulp
* (it will never equal to 1/2ulp).
* The rounding mode can be detected by checking whether
* huge + tiny is equal to huge, and whether huge - tiny is
* equal to huge for some floating point number "huge" and "tiny".
* After generating the 53 bits result, we compute one more bit.
* Together with the remainder, we can decide whether the
* result is exact, bigger than 1/2ulp, or less than 1/2ulp
* (it will never equal to 1/2ulp).
* The rounding mode can be detected by checking whether
* huge + tiny is equal to huge, and whether huge - tiny is
* equal to huge for some floating point number "huge" and "tiny".
*
* Special cases:
* sqrt(+-0) = +-0 ... exact
* sqrt(inf) = inf
* sqrt(-ve) = NaN ... with invalid signal
* sqrt(NaN) = NaN ... with invalid signal for signaling NaN
* sqrt(+-0) = +-0 ... exact
* sqrt(inf) = inf
* sqrt(-ve) = NaN ... with invalid signal
* sqrt(NaN) = NaN ... with invalid signal for signaling NaN
*
* Other methods : see the appended file at the end of the program below.
*---------------
* Other methods: see the appended file at the end of the program below.
*/
#include "fdlibm.h"
@ -86,103 +87,143 @@
#define one 1.0
#define tiny 1.0e-300
double sqrt(double x)
double
sqrt (double x)
{
double z;
int sign = (int)0x80000000;
unsigned r,t1,s1,ix1,q1;
int ix0,s0,q,m,t,i;
double z;
int sign = (int) 0x80000000;
unsigned r, t1, s1, ix1, q1;
int ix0, s0, q, m, t, i;
ix0 = __HI(x); /* high word of x */
ix1 = __LO(x); /* low word of x */
ix0 = __HI (x); /* high word of x */
ix1 = __LO (x); /* low word of x */
/* take care of Inf and NaN */
if((ix0&0x7ff00000)==0x7ff00000) {
return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
sqrt(-inf)=sNaN */
}
/* take care of zero */
if(ix0<=0) {
if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
else if(ix0<0)
return (x-x)/(x-x); /* sqrt(-ve) = sNaN */
}
/* normalize x */
m = (ix0>>20);
if(m==0) { /* subnormal x */
while(ix0==0) {
m -= 21;
ix0 |= (ix1>>11); ix1 <<= 21;
}
for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
m -= i-1;
ix0 |= (ix1>>(32-i));
ix1 <<= i;
}
m -= 1023; /* unbias exponent */
ix0 = (ix0&0x000fffff)|0x00100000;
if(m&1){ /* odd m, double x to make it even */
ix0 += ix0 + ((ix1&sign)>>31);
ix1 += ix1;
}
m >>= 1; /* m = [m/2] */
/* take care of Inf and NaN */
if ((ix0 & 0x7ff00000) == 0x7ff00000)
{
return x * x + x; /* sqrt(NaN) = NaN, sqrt(+inf) = +inf, sqrt(-inf) = sNaN */
}
/* take care of zero */
if (ix0 <= 0)
{
if (((ix0 & (~sign)) | ix1) == 0) /* sqrt(+-0) = +-0 */
{
return x;
}
else if (ix0 < 0) /* sqrt(-ve) = sNaN */
{
return (x - x) / (x - x);
}
}
/* normalize x */
m = (ix0 >> 20);
if (m == 0) /* subnormal x */
{
while (ix0 == 0)
{
m -= 21;
ix0 |= (ix1 >> 11);
ix1 <<= 21;
}
for (i = 0; (ix0 & 0x00100000) == 0; i++)
{
ix0 <<= 1;
}
m -= i - 1;
ix0 |= (ix1 >> (32 - i));
ix1 <<= i;
}
m -= 1023; /* unbias exponent */
ix0 = (ix0 & 0x000fffff) | 0x00100000;
if (m & 1) /* odd m, double x to make it even */
{
ix0 += ix0 + ((ix1 & sign) >> 31);
ix1 += ix1;
}
m >>= 1; /* m = [m / 2] */
/* generate sqrt(x) bit by bit */
ix0 += ix0 + ((ix1&sign)>>31);
ix1 += ix1;
q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */
r = 0x00200000; /* r = moving bit from right to left */
/* generate sqrt(x) bit by bit */
ix0 += ix0 + ((ix1 & sign) >> 31);
ix1 += ix1;
q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */
r = 0x00200000; /* r = moving bit from right to left */
while(r!=0) {
t = s0+r;
if(t<=ix0) {
s0 = t+r;
ix0 -= t;
q += r;
}
ix0 += ix0 + ((ix1&sign)>>31);
ix1 += ix1;
r>>=1;
}
while (r != 0)
{
t = s0 + r;
if (t <= ix0)
{
s0 = t + r;
ix0 -= t;
q += r;
}
ix0 += ix0 + ((ix1 & sign) >> 31);
ix1 += ix1;
r >>= 1;
}
r = sign;
while(r!=0) {
t1 = s1+r;
t = s0;
if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
s1 = t1+r;
if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
ix0 -= t;
if (ix1 < t1) ix0 -= 1;
ix1 -= t1;
q1 += r;
}
ix0 += ix0 + ((ix1&sign)>>31);
ix1 += ix1;
r>>=1;
}
r = sign;
while (r != 0)
{
t1 = s1 + r;
t = s0;
if ((t < ix0) || ((t == ix0) && (t1 <= ix1)))
{
s1 = t1 + r;
if (((t1 & sign) == sign) && (s1 & sign) == 0)
{
s0 += 1;
}
ix0 -= t;
if (ix1 < t1)
{
ix0 -= 1;
}
ix1 -= t1;
q1 += r;
}
ix0 += ix0 + ((ix1 & sign) >> 31);
ix1 += ix1;
r >>= 1;
}
/* use floating add to find out rounding direction */
if((ix0|ix1)!=0) {
z = one-tiny; /* trigger inexact flag */
if (z>=one) {
z = one+tiny;
if (q1==(unsigned)0xffffffff) { q1=0; q += 1;}
else if (z>one) {
if (q1==(unsigned)0xfffffffe) q+=1;
q1+=2;
} else
q1 += (q1&1);
}
}
ix0 = (q>>1)+0x3fe00000;
ix1 = q1>>1;
if ((q&1)==1) ix1 |= sign;
ix0 += (m <<20);
__HI(z) = ix0;
__LO(z) = ix1;
return z;
}
/* use floating add to find out rounding direction */
if ((ix0 | ix1) != 0)
{
z = one - tiny; /* trigger inexact flag */
if (z >= one)
{
z = one + tiny;
if (q1 == (unsigned) 0xffffffff)
{
q1 = 0;
q += 1;
}
else if (z > one)
{
if (q1 == (unsigned) 0xfffffffe)
{
q += 1;
}
q1 += 2;
}
else
{
q1 += (q1 & 1);
}
}
}
ix0 = (q >> 1) + 0x3fe00000;
ix1 = q1 >> 1;
if ((q & 1) == 1)
{
ix1 |= sign;
}
ix0 += (m << 20);
__HI (z) = ix0;
__LO (z) = ix1;
return z;
} /* sqrt */
/*
Other methods (use floating-point arithmetic)
@ -190,253 +231,252 @@ Other methods (use floating-point arithmetic)
(This is a copy of a drafted paper by Prof W. Kahan
and K.C. Ng, written in May, 1986)
Two algorithms are given here to implement sqrt(x)
(IEEE double precision arithmetic) in software.
Both supply sqrt(x) correctly rounded. The first algorithm (in
Section A) uses newton iterations and involves four divisions.
The second one uses reciproot iterations to avoid division, but
requires more multiplications. Both algorithms need the ability
to chop results of arithmetic operations instead of round them,
and the INEXACT flag to indicate when an arithmetic operation
is executed exactly with no roundoff error, all part of the
standard (IEEE 754-1985). The ability to perform shift, add,
subtract and logical AND operations upon 32-bit words is needed
too, though not part of the standard.
Two algorithms are given here to implement sqrt(x)
(IEEE double precision arithmetic) in software.
Both supply sqrt(x) correctly rounded. The first algorithm (in
Section A) uses newton iterations and involves four divisions.
The second one uses reciproot iterations to avoid division, but
requires more multiplications. Both algorithms need the ability
to chop results of arithmetic operations instead of round them,
and the INEXACT flag to indicate when an arithmetic operation
is executed exactly with no roundoff error, all part of the
standard (IEEE 754-1985). The ability to perform shift, add,
subtract and logical AND operations upon 32-bit words is needed
too, though not part of the standard.
A. sqrt(x) by Newton Iteration
(1) Initial approximation
(1) Initial approximation
Let x0 and x1 be the leading and the trailing 32-bit words of
a floating point number x (in IEEE double format) respectively
Let x0 and x1 be the leading and the trailing 32-bit words of
a floating point number x (in IEEE double format) respectively
1 11 52 ...widths
------------------------------------------------------
x: |s| e | f |
------------------------------------------------------
msb lsb msb lsb ...order
1 11 52 ...widths
------------------------------------------------------
x: |s| e | f |
------------------------------------------------------
msb lsb msb lsb ...order
------------------------ ------------------------
x0: |s| e | f1 | x1: | f2 |
------------------------ ------------------------
------------------------ ------------------------
x0: |s| e | f1 | x1: | f2 |
------------------------ ------------------------
By performing shifts and subtracts on x0 and x1 (both regarded
as integers), we obtain an 8-bit approximation of sqrt(x) as
follows.
By performing shifts and subtracts on x0 and x1 (both regarded
as integers), we obtain an 8-bit approximation of sqrt(x) as
follows.
k := (x0>>1) + 0x1ff80000;
y0 := k - T1[31&(k>>15)]. ... y ~ sqrt(x) to 8 bits
Here k is a 32-bit integer and T1[] is an integer array containing
correction terms. Now magically the floating value of y (y's
leading 32-bit word is y0, the value of its trailing word is 0)
approximates sqrt(x) to almost 8-bit.
k := (x0>>1) + 0x1ff80000;
y0 := k - T1[31&(k>>15)]. ... y ~ sqrt(x) to 8 bits
Here k is a 32-bit integer and T1[] is an integer array containing
correction terms. Now magically the floating value of y (y's
leading 32-bit word is y0, the value of its trailing word is 0)
approximates sqrt(x) to almost 8-bit.
Value of T1:
static int T1[32]= {
0, 1024, 3062, 5746, 9193, 13348, 18162, 23592,
29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215,
83599, 71378, 60428, 50647, 41945, 34246, 27478, 21581,
16499, 12183, 8588, 5674, 3403, 1742, 661, 130,};
Value of T1:
static int T1[32]= {
0, 1024, 3062, 5746, 9193, 13348, 18162, 23592,
29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215,
83599, 71378, 60428, 50647, 41945, 34246, 27478, 21581,
16499, 12183, 8588, 5674, 3403, 1742, 661, 130,};
(2) Iterative refinement
(2) Iterative refinement
Apply Heron's rule three times to y, we have y approximates
sqrt(x) to within 1 ulp (Unit in the Last Place):
Apply Heron's rule three times to y, we have y approximates
sqrt(x) to within 1 ulp (Unit in the Last Place):
y := (y+x/y)/2 ... almost 17 sig. bits
y := (y+x/y)/2 ... almost 35 sig. bits
y := y-(y-x/y)/2 ... within 1 ulp
y := (y+x/y)/2 ... almost 17 sig. bits
y := (y+x/y)/2 ... almost 35 sig. bits
y := y-(y-x/y)/2 ... within 1 ulp
Remark 1.
Another way to improve y to within 1 ulp is:
Remark 1.
Another way to improve y to within 1 ulp is:
y := (y+x/y) ... almost 17 sig. bits to 2*sqrt(x)
y := y - 0x00100006 ... almost 18 sig. bits to sqrt(x)
y := (y+x/y) ... almost 17 sig. bits to 2*sqrt(x)
y := y - 0x00100006 ... almost 18 sig. bits to sqrt(x)
2
(x-y )*y
y := y + 2* ---------- ...within 1 ulp
2
3y + x
2
(x-y )*y
y := y + 2* ---------- ...within 1 ulp
2
3y + x
This formula has one division fewer than the one above; however,
it requires more multiplications and additions. Also x must be
scaled in advance to avoid spurious overflow in evaluating the
expression 3y*y+x. Hence it is not recommended uless division
is slow. If division is very slow, then one should use the
reciproot algorithm given in section B.
This formula has one division fewer than the one above; however,
it requires more multiplications and additions. Also x must be
scaled in advance to avoid spurious overflow in evaluating the
expression 3y*y+x. Hence it is not recommended uless division
is slow. If division is very slow, then one should use the
reciproot algorithm given in section B.
(3) Final adjustment
By twiddling y's last bit it is possible to force y to be
correctly rounded according to the prevailing rounding mode
as follows. Let r and i be copies of the rounding mode and
inexact flag before entering the square root program. Also we
use the expression y+-ulp for the next representable floating
numbers (up and down) of y. Note that y+-ulp = either fixed
point y+-1, or multiply y by nextafter(1,+-inf) in chopped
mode.
By twiddling y's last bit it is possible to force y to be
correctly rounded according to the prevailing rounding mode
as follows. Let r and i be copies of the rounding mode and
inexact flag before entering the square root program. Also we
use the expression y+-ulp for the next representable floating
numbers (up and down) of y. Note that y+-ulp = either fixed
point y+-1, or multiply y by nextafter(1,+-inf) in chopped
mode.
I := FALSE; ... reset INEXACT flag I
R := RZ; ... set rounding mode to round-toward-zero
z := x/y; ... chopped quotient, possibly inexact
If(not I) then { ... if the quotient is exact
if(z=y) {
I := i; ... restore inexact flag
R := r; ... restore rounded mode
return sqrt(x):=y.
} else {
z := z - ulp; ... special rounding
}
}
i := TRUE; ... sqrt(x) is inexact
If (r=RN) then z=z+ulp ... rounded-to-nearest
If (r=RP) then { ... round-toward-+inf
y = y+ulp; z=z+ulp;
}
y := y+z; ... chopped sum
y0:=y0-0x00100000; ... y := y/2 is correctly rounded.
I := i; ... restore inexact flag
R := r; ... restore rounded mode
return sqrt(x):=y.
I := FALSE; ... reset INEXACT flag I
R := RZ; ... set rounding mode to round-toward-zero
z := x/y; ... chopped quotient, possibly inexact
If(not I) then { ... if the quotient is exact
if(z=y) {
I := i; ... restore inexact flag
R := r; ... restore rounded mode
return sqrt(x):=y.
} else {
z := z - ulp; ... special rounding
}
}
i := TRUE; ... sqrt(x) is inexact
If (r=RN) then z=z+ulp ... rounded-to-nearest
If (r=RP) then { ... round-toward-+inf
y = y+ulp; z=z+ulp;
}
y := y+z; ... chopped sum
y0:=y0-0x00100000; ... y := y/2 is correctly rounded.
I := i; ... restore inexact flag
R := r; ... restore rounded mode
return sqrt(x):=y.
(4) Special cases
(4) Special cases
Square root of +inf, +-0, or NaN is itself;
Square root of a negative number is NaN with invalid signal.
Square root of +inf, +-0, or NaN is itself;
Square root of a negative number is NaN with invalid signal.
B. sqrt(x) by Reciproot Iteration
(1) Initial approximation
(1) Initial approximation
Let x0 and x1 be the leading and the trailing 32-bit words of
a floating point number x (in IEEE double format) respectively
(see section A). By performing shifs and subtracts on x0 and y0,
we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
Let x0 and x1 be the leading and the trailing 32-bit words of
a floating point number x (in IEEE double format) respectively
(see section A). By performing shifs and subtracts on x0 and y0,
we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
k := 0x5fe80000 - (x0>>1);
y0:= k - T2[63&(k>>14)]. ... y ~ 1/sqrt(x) to 7.8 bits
k := 0x5fe80000 - (x0>>1);
y0:= k - T2[63&(k>>14)]. ... y ~ 1/sqrt(x) to 7.8 bits
Here k is a 32-bit integer and T2[] is an integer array
containing correction terms. Now magically the floating
value of y (y's leading 32-bit word is y0, the value of
its trailing word y1 is set to zero) approximates 1/sqrt(x)
to almost 7.8-bit.
Here k is a 32-bit integer and T2[] is an integer array
containing correction terms. Now magically the floating
value of y (y's leading 32-bit word is y0, the value of
its trailing word y1 is set to zero) approximates 1/sqrt(x)
to almost 7.8-bit.
Value of T2:
static int T2[64]= {
0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,};
Value of T2:
static int T2[64]= {
0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,};
(2) Iterative refinement
(2) Iterative refinement
Apply Reciproot iteration three times to y and multiply the
result by x to get an approximation z that matches sqrt(x)
to about 1 ulp. To be exact, we will have
-1ulp < sqrt(x)-z<1.0625ulp.
Apply Reciproot iteration three times to y and multiply the
result by x to get an approximation z that matches sqrt(x)
to about 1 ulp. To be exact, we will have
-1ulp < sqrt(x)-z<1.0625ulp.
... set rounding mode to Round-to-nearest
y := y*(1.5-0.5*x*y*y) ... almost 15 sig. bits to 1/sqrt(x)
y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
... special arrangement for better accuracy
z := x*y ... 29 bits to sqrt(x), with z*y<1
z := z + 0.5*z*(1-z*y) ... about 1 ulp to sqrt(x)
... set rounding mode to Round-to-nearest
y := y*(1.5-0.5*x*y*y) ... almost 15 sig. bits to 1/sqrt(x)
y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
... special arrangement for better accuracy
z := x*y ... 29 bits to sqrt(x), with z*y<1
z := z + 0.5*z*(1-z*y) ... about 1 ulp to sqrt(x)
Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
(a) the term z*y in the final iteration is always less than 1;
(b) the error in the final result is biased upward so that
-1 ulp < sqrt(x) - z < 1.0625 ulp
instead of |sqrt(x)-z|<1.03125ulp.
Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
(a) the term z*y in the final iteration is always less than 1;
(b) the error in the final result is biased upward so that
-1 ulp < sqrt(x) - z < 1.0625 ulp
instead of |sqrt(x)-z|<1.03125ulp.
(3) Final adjustment
(3) Final adjustment
By twiddling y's last bit it is possible to force y to be
correctly rounded according to the prevailing rounding mode
as follows. Let r and i be copies of the rounding mode and
inexact flag before entering the square root program. Also we
use the expression y+-ulp for the next representable floating
numbers (up and down) of y. Note that y+-ulp = either fixed
point y+-1, or multiply y by nextafter(1,+-inf) in chopped
mode.
By twiddling y's last bit it is possible to force y to be
correctly rounded according to the prevailing rounding mode
as follows. Let r and i be copies of the rounding mode and
inexact flag before entering the square root program. Also we
use the expression y+-ulp for the next representable floating
numbers (up and down) of y. Note that y+-ulp = either fixed
point y+-1, or multiply y by nextafter(1,+-inf) in chopped
mode.
R := RZ; ... set rounding mode to round-toward-zero
switch(r) {
case RN: ... round-to-nearest
if(x<= z*(z-ulp)...chopped) z = z - ulp; else
if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
break;
case RZ:case RM: ... round-to-zero or round-to--inf
R:=RP; ... reset rounding mod to round-to-+inf
if(x<z*z ... rounded up) z = z - ulp; else
if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
break;
case RP: ... round-to-+inf
if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
if(x>z*z ...chopped) z = z+ulp;
break;
}
R := RZ; ... set rounding mode to round-toward-zero
switch(r) {
case RN: ... round-to-nearest
if(x<= z*(z-ulp)...chopped) z = z - ulp; else
if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
break;
case RZ:case RM: ... round-to-zero or round-to--inf
R:=RP; ... reset rounding mod to round-to-+inf
if(x<z*z ... rounded up) z = z - ulp; else
if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
break;
case RP: ... round-to-+inf
if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
if(x>z*z ...chopped) z = z+ulp;
break;
}
Remark 3. The above comparisons can be done in fixed point. For
example, to compare x and w=z*z chopped, it suffices to compare
x1 and w1 (the trailing parts of x and w), regarding them as
two's complement integers.
Remark 3. The above comparisons can be done in fixed point. For
example, to compare x and w=z*z chopped, it suffices to compare
x1 and w1 (the trailing parts of x and w), regarding them as
two's complement integers.
...Is z an exact square root?
To determine whether z is an exact square root of x, let z1 be the
trailing part of z, and also let x0 and x1 be the leading and
trailing parts of x.
...Is z an exact square root?
To determine whether z is an exact square root of x, let z1 be the
trailing part of z, and also let x0 and x1 be the leading and
trailing parts of x.
If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0
I := 1; ... Raise Inexact flag: z is not exact
else {
j := 1 - [(x0>>20)&1] ... j = logb(x) mod 2
k := z1 >> 26; ... get z's 25-th and 26-th
fraction bits
I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
}
R:= r ... restore rounded mode
return sqrt(x):=z.
If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0
I := 1; ... Raise Inexact flag: z is not exact
else {
j := 1 - [(x0>>20)&1] ... j = logb(x) mod 2
k := z1 >> 26; ... get z's 25-th and 26-th
fraction bits
I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
}
R:= r ... restore rounded mode
return sqrt(x):=z.
If multiplication is cheaper then the foregoing red tape, the
Inexact flag can be evaluated by
If multiplication is cheaper then the foregoing red tape, the
Inexact flag can be evaluated by
I := i;
I := (z*z!=x) or I.
I := i;
I := (z*z!=x) or I.
Note that z*z can overwrite I; this value must be sensed if it is
True.
Note that z*z can overwrite I; this value must be sensed if it is
True.
Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
zero.
Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
zero.
--------------------
z1: | f2 |
--------------------
bit 31 bit 0
--------------------
z1: | f2 |
--------------------
bit 31 bit 0
Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
or even of logb(x) have the following relations:
Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
or even of logb(x) have the following relations:
-------------------------------------------------
bit 27,26 of z1 bit 1,0 of x1 logb(x)
-------------------------------------------------
00 00 odd and even
01 01 even
10 10 odd
10 00 even
11 01 even
-------------------------------------------------
(4) Special cases (see (4) of Section A).
-------------------------------------------------
bit 27,26 of z1 bit 1,0 of x1 logb(x)
-------------------------------------------------
00 00 odd and even
01 01 even
10 10 odd
10 00 even
11 01 even
-------------------------------------------------
(4) Special cases (see (4) of Section A).
*/

File diff suppressed because it is too large Load Diff