Context Navigation

sqrtd2.h

Last change on this file was 444, checked in by satin@…, 6 years ago
add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc
File size: 5.8 KB

Line
1	/* -------------------------------------------------------------- */
2	/* (C)Copyright 2001,2008, */
3	/* International Business Machines Corporation, */
4	/* Sony Computer Entertainment, Incorporated, */
5	/* Toshiba Corporation, */
6	/* */
7	/* All Rights Reserved. */
8	/* */
9	/* Redistribution and use in source and binary forms, with or */
10	/* without modification, are permitted provided that the */
11	/* following conditions are met: */
12	/* */
13	/* - Redistributions of source code must retain the above copyright*/
14	/* notice, this list of conditions and the following disclaimer. */
15	/* */
16	/* - Redistributions in binary form must reproduce the above */
17	/* copyright notice, this list of conditions and the following */
18	/* disclaimer in the documentation and/or other materials */
19	/* provided with the distribution. */
20	/* */
21	/* - Neither the name of IBM Corporation nor the names of its */
22	/* contributors may be used to endorse or promote products */
23	/* derived from this software without specific prior written */
24	/* permission. */
25	/* */
26	/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
27	/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
28	/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
29	/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
30	/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
31	/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
32	/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
33	/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
34	/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
35	/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
36	/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
37	/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
38	/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
39	/* -------------------------------------------------------------- */
40	/* PROLOG END TAG zYx */
41	#ifdef __SPU__
42	#ifndef _SQRTD2_H_
43	#define _SQRTD2_H_ 1
44
45	#include <spu_intrinsics.h>
46
47	/*
48	* FUNCTION
49	* vector double _sqrtd2(vector double in)
50	*
51	* DESCRIPTION
52	* The _sqrtd2 function computes the square root of the vector input "in"
53	* and returns the result.
54	*
55	*/
56	static __inline vector double _sqrtd2(vector double in)
57	{
58	vec_int4 bias_exp;
59	vec_uint4 exp;
60	vec_float4 fx, fg, fy, fd, fe, fy2, fhalf;
61	vec_ullong2 nochange, denorm;
62	vec_ullong2 mask = spu_splats(0x7FE0000000000000ULL);
63	vec_double2 dx, de, dd, dy, dg, dy2, dhalf;
64	vec_double2 neg;
65	vec_double2 one = spu_splats(1.0);
66	vec_double2 two_pow_52 = (vec_double2)spu_splats(0x4330000000000000ULL);
67
68	/* If the input is a denorm, then multiply it by 2^52 so that the input is no
69	* longer denormal.
70	*/
71	exp = (vec_uint4)spu_and((vec_ullong2)in, spu_splats(0xFFF0000000000000ULL));
72	denorm = (vec_ullong2)spu_cmpeq(exp,0);
73
74	in = spu_mul(in, spu_sel(one, two_pow_52, denorm));
75
76	fhalf = spu_splats(0.5f);
77	dhalf = spu_splats(0.5);
78
79	/* Coerce the input, in, into the argument reduced space [0.5, 2.0).
80	*/
81	dx = spu_sel(in, dhalf, mask);
82
83	/* Compute an initial single precision guess for the square root (fg)
84	* and half reciprocal (fy2).
85	*/
86	fx = spu_roundtf(dx);
87
88	fy2 = spu_rsqrte(fx);
89	fy = spu_mul(fy2, fhalf);
90	fg = spu_mul(fy2, fx); /* 12-bit approximation to sqrt(cx) */
91
92	/* Perform one single precision Newton-Raphson iteration to improve
93	* accuracy to about 22 bits.
94	*/
95	fe = spu_nmsub(fy, fg, fhalf);
96	fd = spu_nmsub(fg, fg, fx);
97
98	fy = spu_madd(fy2, fe, fy);
99	fg = spu_madd(fy, fd, fg); /* 22-bit approximation */
100
101	dy = spu_extend(fy);
102	dg = spu_extend(fg);
103
104	/* Perform two double precision Newton-Raphson iteration to improve
105	* accuracy to about 44 and 88 bits repectively.
106	*/
107	dy2 = spu_add(dy, dy);
108	de = spu_nmsub(dy, dg, dhalf);
109	dd = spu_nmsub(dg, dg, dx);
110	dy = spu_madd(dy2, de, dy);
111	dg = spu_madd(dy, dd, dg); /* 44 bit approximation */
112
113	dd = spu_nmsub(dg, dg, dx);
114	dg = spu_madd(dy, dd, dg); /* full double precision approximation */
115
116
117	/* Compute the expected exponent assuming that it is not a special value.
118	* See special value handling below.
119	*/
120	bias_exp = spu_rlmaska(spu_sub((vec_int4)spu_and((vec_ullong2)in, mask),
121	(vec_int4)spu_splats(0x3FE0000000000000ULL)),
122	-1);
123
124	/* Adjust the exponent bias if the input was denormalized */
125	bias_exp = spu_sub(bias_exp, (vec_int4)spu_and(spu_splats(0x01A0000000000000ULL), denorm));
126
127	dg = (vec_double2)spu_add((vec_int4)dg, bias_exp);
128
129	/* Handle special inputs. These include:
130	*
131	* input output
132	* ========= =========
133	* -0 -0
134	* 0 0
135	* +infinity +infinity
136	* NaN NaN
137	* <0 NaN
138	*/
139	exp = spu_shuffle(exp, exp, ((vec_uchar16) { 0,1,2,3,0,1,2,3, 8,9,10,11,8,9,10,11 }));
140
141	neg = (vec_double2)spu_rlmaska((vec_int4)exp, -31);
142	nochange = spu_or((vec_ullong2)spu_cmpeq(exp, 0x7FF00000),
143	spu_cmpeq(in, spu_splats(0.0)));
144
145	dg = spu_sel(spu_or(dg, neg), in, nochange);
146
147	return (dg);
148	}
149	#endif /* _SQRTD2_H_ */
150	#endif /* __SPU__ */

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/libs/newlib/src/newlib/libm/machine/spu/headers/sqrtd2.h

Download in other formats: