source: trunk/libs/newlib/src/newlib/libm/machine/spu/headers/sqrtd2.h

Last change on this file was 444, checked in by satin@…, 6 years ago

add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc

File size: 5.8 KB
Line 
1/* --------------------------------------------------------------  */
2/* (C)Copyright 2001,2008,                                         */
3/* International Business Machines Corporation,                    */
4/* Sony Computer Entertainment, Incorporated,                      */
5/* Toshiba Corporation,                                            */
6/*                                                                 */
7/* All Rights Reserved.                                            */
8/*                                                                 */
9/* Redistribution and use in source and binary forms, with or      */
10/* without modification, are permitted provided that the           */
11/* following conditions are met:                                   */
12/*                                                                 */
13/* - Redistributions of source code must retain the above copyright*/
14/*   notice, this list of conditions and the following disclaimer. */
15/*                                                                 */
16/* - Redistributions in binary form must reproduce the above       */
17/*   copyright notice, this list of conditions and the following   */
18/*   disclaimer in the documentation and/or other materials        */
19/*   provided with the distribution.                               */
20/*                                                                 */
21/* - Neither the name of IBM Corporation nor the names of its      */
22/*   contributors may be used to endorse or promote products       */
23/*   derived from this software without specific prior written     */
24/*   permission.                                                   */
25/*                                                                 */
26/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND          */
27/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,     */
28/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF        */
29/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE        */
30/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR            */
31/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,    */
32/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT    */
33/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;    */
34/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)        */
35/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN       */
36/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR    */
37/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,  */
38/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              */
39/* --------------------------------------------------------------  */
40/* PROLOG END TAG zYx                                              */
41#ifdef __SPU__
42#ifndef _SQRTD2_H_
43#define _SQRTD2_H_      1
44
45#include <spu_intrinsics.h>
46
47/*
48 * FUNCTION
49 *      vector double _sqrtd2(vector double in)
50 *
51 * DESCRIPTION
52 *      The _sqrtd2 function computes the square root of the vector input "in"
53 *      and returns the result.
54 *
55 */
56static __inline vector double _sqrtd2(vector double in)
57{
58  vec_int4 bias_exp;
59  vec_uint4 exp;
60  vec_float4 fx, fg, fy, fd, fe, fy2, fhalf;
61  vec_ullong2 nochange, denorm;
62  vec_ullong2 mask = spu_splats(0x7FE0000000000000ULL);
63  vec_double2 dx, de, dd, dy, dg, dy2, dhalf;
64  vec_double2 neg;
65  vec_double2 one = spu_splats(1.0);
66  vec_double2 two_pow_52 = (vec_double2)spu_splats(0x4330000000000000ULL);
67
68  /* If the input is a denorm, then multiply it by 2^52 so that the input is no
69   * longer denormal.
70   */
71  exp = (vec_uint4)spu_and((vec_ullong2)in, spu_splats(0xFFF0000000000000ULL));
72  denorm = (vec_ullong2)spu_cmpeq(exp,0);
73
74  in = spu_mul(in, spu_sel(one, two_pow_52, denorm));
75
76  fhalf = spu_splats(0.5f);
77  dhalf = spu_splats(0.5);
78
79  /* Coerce the input, in, into the argument reduced space [0.5, 2.0).
80   */
81  dx = spu_sel(in, dhalf, mask);
82
83  /* Compute an initial single precision guess for the square root (fg)
84   * and half reciprocal (fy2).
85   */
86  fx = spu_roundtf(dx);
87
88  fy2 = spu_rsqrte(fx);
89  fy = spu_mul(fy2, fhalf);
90  fg = spu_mul(fy2, fx);        /* 12-bit approximation to sqrt(cx) */
91 
92  /* Perform one single precision Newton-Raphson iteration to improve
93   * accuracy to about 22 bits.
94   */
95  fe = spu_nmsub(fy, fg, fhalf);
96  fd = spu_nmsub(fg, fg, fx);
97
98  fy = spu_madd(fy2, fe, fy);
99  fg = spu_madd(fy, fd, fg);    /* 22-bit approximation */
100
101  dy = spu_extend(fy);
102  dg = spu_extend(fg);
103
104  /* Perform two double precision Newton-Raphson iteration to improve
105   * accuracy to about 44 and 88 bits repectively.
106   */
107  dy2 = spu_add(dy, dy);
108  de = spu_nmsub(dy, dg, dhalf);
109  dd = spu_nmsub(dg, dg, dx);
110  dy = spu_madd(dy2, de, dy);
111  dg = spu_madd(dy, dd, dg);    /* 44 bit approximation */
112
113  dd = spu_nmsub(dg, dg, dx);
114  dg = spu_madd(dy, dd, dg);    /* full double precision approximation */
115
116
117  /* Compute the expected exponent assuming that it is not a special value.
118   * See special value handling below.
119   */
120  bias_exp = spu_rlmaska(spu_sub((vec_int4)spu_and((vec_ullong2)in, mask), 
121                                 (vec_int4)spu_splats(0x3FE0000000000000ULL)),
122                         -1);
123
124  /* Adjust the exponent bias if the input was denormalized */
125  bias_exp = spu_sub(bias_exp, (vec_int4)spu_and(spu_splats(0x01A0000000000000ULL), denorm));
126
127  dg = (vec_double2)spu_add((vec_int4)dg, bias_exp);
128
129  /* Handle special inputs. These include:
130   *
131   *   input             output
132   * =========          =========
133   *    -0                -0
134   *     0                 0
135   * +infinity          +infinity
136   *    NaN               NaN
137   *    <0                NaN
138   */
139  exp = spu_shuffle(exp, exp, ((vec_uchar16) { 0,1,2,3,0,1,2,3, 8,9,10,11,8,9,10,11 }));
140
141  neg = (vec_double2)spu_rlmaska((vec_int4)exp, -31);
142  nochange = spu_or((vec_ullong2)spu_cmpeq(exp, 0x7FF00000), 
143                    spu_cmpeq(in, spu_splats(0.0)));
144
145  dg = spu_sel(spu_or(dg, neg), in, nochange);
146
147  return (dg);
148}
149#endif /* _SQRTD2_H_ */
150#endif /* __SPU__ */
Note: See TracBrowser for help on using the repository browser.