source: trunk/libs/newlib/src/newlib/libm/machine/spu/headers/recipd2.h

Last change on this file was 444, checked in by satin@…, 6 years ago

add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc

File size: 7.3 KB
Line 
1/* --------------------------------------------------------------  */
2/* (C)Copyright 2001,2008,                                         */
3/* International Business Machines Corporation,                    */
4/* Sony Computer Entertainment, Incorporated,                      */
5/* Toshiba Corporation,                                            */
6/*                                                                 */
7/* All Rights Reserved.                                            */
8/*                                                                 */
9/* Redistribution and use in source and binary forms, with or      */
10/* without modification, are permitted provided that the           */
11/* following conditions are met:                                   */
12/*                                                                 */
13/* - Redistributions of source code must retain the above copyright*/
14/*   notice, this list of conditions and the following disclaimer. */
15/*                                                                 */
16/* - Redistributions in binary form must reproduce the above       */
17/*   copyright notice, this list of conditions and the following   */
18/*   disclaimer in the documentation and/or other materials        */
19/*   provided with the distribution.                               */
20/*                                                                 */
21/* - Neither the name of IBM Corporation nor the names of its      */
22/*   contributors may be used to endorse or promote products       */
23/*   derived from this software without specific prior written     */
24/*   permission.                                                   */
25/*                                                                 */
26/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND          */
27/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,     */
28/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF        */
29/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE        */
30/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR            */
31/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,    */
32/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT    */
33/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;    */
34/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)        */
35/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN       */
36/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR    */
37/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,  */
38/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              */
39/* --------------------------------------------------------------  */
40/* PROLOG END TAG zYx                                              */
41#ifdef __SPU__
42
43#ifndef _RECIPD2_H_
44#define _RECIPD2_H_             1
45
46#include <spu_intrinsics.h>
47
48
49/*
50 * FUNCTION
51 *      vector double _recipd2(vector double value)
52 *
53 * DESCRIPTION
54 *      The _recipd2 function inverts "value" and returns the result.
55 *      Computation is performed using the single precision reciprocal
56 *      estimate and interpolate instructions to produce a 12 accurate
57 *      estimate.
58 *
59 *      One (1) iteration of a Newton-Raphson is performed to improve
60 *      accuracy to single precision floating point. Two additional double
61 *      precision iterations are  needed to achieve a full double
62 *      preicision result.
63 *
64 *      The Newton-Raphson iteration is of the form:
65 *      a)      X[i+1] = X[i] * (2.0 - b*X[i])
66 *          or
67 *      b)      X[i+1] = X[i] + X[i]*(1.0 - X[i]*b)
68 *      where b is the input value to be inverted
69 *
70 *      The later (b) form improves the accuracy to 99.95% correctly rounded.
71 */ 
72static __inline vector double _recipd2(vector double value_in)
73{
74  vec_float4  x0;
75  vec_float4  value;
76  vec_float4  one   = spu_splats(1.0f);
77  vec_double2 one_d = spu_splats(1.0);
78  vec_double2 x1, x2, x3;
79  vec_double2 scale;
80  vec_double2 exp, value_d;
81  vec_ullong2 expmask = spu_splats(0x7FF0000000000000ULL);
82  vec_ullong2 is0inf;
83
84#ifdef __SPU_EDP__
85  vec_ullong2 isdenorm;
86  vec_ullong2 expmask_minus1 = spu_splats(0x7FE0000000000000ULL);
87
88  /* Determine special input values. For example, if the input is a denorm, infinity or 0 */
89
90  isdenorm = spu_testsv(value_in, (SPU_SV_POS_DENORM   | SPU_SV_NEG_DENORM));
91  is0inf   = spu_testsv(value_in, (SPU_SV_NEG_ZERO     | SPU_SV_POS_ZERO |
92                                   SPU_SV_NEG_INFINITY | SPU_SV_POS_INFINITY));
93
94  /* Scale the divisor to correct for double precision floating
95   * point exponents that are out of single precision range.
96   */
97  exp = spu_and(value_in, (vec_double2)expmask);
98  scale = spu_xor(exp, (vec_double2)spu_sel(expmask, expmask_minus1, isdenorm));
99  value_d = spu_mul(value_in, scale);
100  value = spu_roundtf(value_d);
101
102  /* Perform reciprocal with 1 single precision and 2 double precision
103   * Newton-Raphson iterations.
104   */
105  x0 = spu_re(value);
106  x1 = spu_extend(spu_madd(spu_nmsub(value, x0, one), x0, x0));
107  x2 = spu_madd(spu_nmsub(value_d, x1, one_d), x1, x1);
108  x3 = spu_madd(spu_nmsub(value_d, x2, one_d), x2, x2);
109  x3 = spu_sel(spu_mul(x3, scale), spu_xor(value_in, (vector double)expmask), is0inf);
110
111#else /* !__SPU_EDP__ */
112
113  vec_uint4 isinf, iszero, isdenorm0;
114  vec_double2 value_abs;
115  vec_double2 sign = spu_splats(-0.0);
116  vec_double2 denorm_scale = (vec_double2)spu_splats(0x4330000000000000ULL);
117  vec_double2 exp_53 = (vec_double2)spu_splats(0x0350000000000000ULL);
118  vec_uchar16 splat_hi = (vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11};
119  vec_uchar16 swap = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
120
121  value_abs = spu_andc(value_in, sign);
122  exp = spu_and(value_in, (vec_double2)expmask);
123
124  /* Determine if the input is a special value. These include:
125   *  denorm   - then we must coerce it to a normal value.
126   *  zero     - then we must return an infinity
127   *  infinity - then we must return a zero.
128   */
129  isdenorm0 = spu_cmpeq(spu_shuffle((vec_uint4)exp, (vec_uint4)exp, splat_hi), 0);
130
131  isinf  = spu_cmpeq((vec_uint4)value_abs, (vec_uint4)expmask);
132  iszero = spu_cmpeq((vec_uint4)value_abs, 0);
133  isinf  = spu_and(isinf,  spu_shuffle(isinf, isinf, swap));
134  iszero = spu_and(iszero, spu_shuffle(iszero, iszero, swap));
135  is0inf = (vec_ullong2)spu_or(isinf, iszero);
136
137  /* If the inputs is a denorm, we must first convert it to a normal number since
138   * arithmetic operations on denormals produces 0 on Cell/B.E.
139   */
140  value_d = spu_sub(spu_or(value_abs, exp_53), exp_53);
141  value_d = spu_sel(value_abs, value_d, (vec_ullong2)isdenorm0);
142
143  /* Scale the divisor to correct for double precision floating
144   * point exponents that are out of single precision range.
145   */
146  scale = spu_xor(spu_and(value_d, (vec_double2)expmask), (vec_double2)expmask);
147  value_d = spu_mul(value_d, scale);
148  value = spu_roundtf(value_d);
149
150  /* Perform reciprocal with 1 single precision and 2 double precision
151   * Newton-Raphson iterations. The bias is removed after the single
152   * precision iteration.
153   */
154  x0 = spu_re(value);
155  x1 = spu_extend(spu_madd(spu_nmsub(value, x0, one), x0, x0));
156  x2 = spu_madd(spu_nmsub(value_d, x1, one_d), x1, x1);
157  x3 = spu_madd(spu_nmsub(value_d, x2, one_d), x2, x2);
158  x3 = spu_mul(x3, spu_sel(scale, value_in, (vec_ullong2)sign));
159  x3 = spu_sel(x3, spu_mul(x3, denorm_scale), (vec_ullong2)isdenorm0);
160  x3 = spu_sel(x3, spu_xor(value_in, (vector double)expmask), is0inf);
161
162#endif /* __SPU_EDP__ */
163
164  return (x3);
165}
166
167#endif /* _RECIPD2_H_ */
168#endif /* __SPU__ */
Note: See TracBrowser for help on using the repository browser.