1 | /* -------------------------------------------------------------- */ |
---|
2 | /* (C)Copyright 2001,2008, */ |
---|
3 | /* International Business Machines Corporation, */ |
---|
4 | /* Sony Computer Entertainment, Incorporated, */ |
---|
5 | /* Toshiba Corporation, */ |
---|
6 | /* */ |
---|
7 | /* All Rights Reserved. */ |
---|
8 | /* */ |
---|
9 | /* Redistribution and use in source and binary forms, with or */ |
---|
10 | /* without modification, are permitted provided that the */ |
---|
11 | /* following conditions are met: */ |
---|
12 | /* */ |
---|
13 | /* - Redistributions of source code must retain the above copyright*/ |
---|
14 | /* notice, this list of conditions and the following disclaimer. */ |
---|
15 | /* */ |
---|
16 | /* - Redistributions in binary form must reproduce the above */ |
---|
17 | /* copyright notice, this list of conditions and the following */ |
---|
18 | /* disclaimer in the documentation and/or other materials */ |
---|
19 | /* provided with the distribution. */ |
---|
20 | /* */ |
---|
21 | /* - Neither the name of IBM Corporation nor the names of its */ |
---|
22 | /* contributors may be used to endorse or promote products */ |
---|
23 | /* derived from this software without specific prior written */ |
---|
24 | /* permission. */ |
---|
25 | /* */ |
---|
26 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ |
---|
27 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ |
---|
28 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ |
---|
29 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ |
---|
30 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */ |
---|
31 | /* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */ |
---|
32 | /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */ |
---|
33 | /* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */ |
---|
34 | /* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ |
---|
35 | /* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ |
---|
36 | /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */ |
---|
37 | /* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */ |
---|
38 | /* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ |
---|
39 | /* -------------------------------------------------------------- */ |
---|
40 | /* PROLOG END TAG zYx */ |
---|
41 | #ifdef __SPU__ |
---|
42 | #ifndef _EXPF4_H_ |
---|
43 | #define _EXPF4_H_ 1 |
---|
44 | |
---|
45 | |
---|
46 | #include "floorf4.h" |
---|
47 | #include "ldexpf4.h" |
---|
48 | |
---|
49 | /* |
---|
50 | * FUNCTION |
---|
51 | * vector float _expf4(vector float x) |
---|
52 | * |
---|
53 | * DESCRIPTION |
---|
54 | * The _expf4 function computes e raised to the input x for |
---|
55 | * each of the element of the float vector. |
---|
56 | * |
---|
57 | */ |
---|
58 | static __inline vector float _expf4(vector float x) |
---|
59 | { |
---|
60 | |
---|
61 | // log2(e) |
---|
62 | vec_float4 log2e = spu_splats(1.4426950408889634074f); |
---|
63 | |
---|
64 | // Extra precision for the ln2 multiply |
---|
65 | vec_float4 ln2_hi = spu_splats(0.693359375f); |
---|
66 | vec_float4 ln2_lo = spu_splats(-2.12194440E-4f); |
---|
67 | |
---|
68 | // Coefficents for the Taylor series |
---|
69 | vec_float4 f02 = spu_splats(5.0000000000000000E-1f); // 1/2! |
---|
70 | vec_float4 f03 = spu_splats(1.6666666666666667E-1f); // 1/3! |
---|
71 | vec_float4 f04 = spu_splats(4.1666666666666667E-2f); // 1/4! |
---|
72 | vec_float4 f05 = spu_splats(8.3333333333333333E-3f); // 1/5! |
---|
73 | vec_float4 f06 = spu_splats(1.3888888888888889E-3f); // 1/6! |
---|
74 | vec_float4 f07 = spu_splats(1.9841269841269841E-4f); // 1/7! |
---|
75 | |
---|
76 | // Range reduce input, so that: |
---|
77 | // e^x = e^z * 2^n |
---|
78 | // e^x = e^z * e^(n * ln(2)) |
---|
79 | // e^x = e^(z + (n * ln(2))) |
---|
80 | |
---|
81 | vec_int4 n; // exponent of reduction |
---|
82 | vec_float4 q; // range reduced result |
---|
83 | |
---|
84 | vec_float4 z; |
---|
85 | vec_float4 r; |
---|
86 | |
---|
87 | z = spu_madd(x,log2e,spu_splats(0.5f)); |
---|
88 | z = _floorf4(z); |
---|
89 | r = spu_nmsub(z,ln2_hi,x); |
---|
90 | r = spu_nmsub(z,ln2_lo,r); |
---|
91 | n = spu_convts(z,0); |
---|
92 | z = spu_mul(r,r); |
---|
93 | |
---|
94 | // Use Horner's method on the Taylor series |
---|
95 | q = spu_madd(r,f07,f06); |
---|
96 | q = spu_madd(q,r,f05); |
---|
97 | q = spu_madd(q,r,f04); |
---|
98 | q = spu_madd(q,r,f03); |
---|
99 | q = spu_madd(q,r,f02); |
---|
100 | q = spu_madd(q,z,r); |
---|
101 | q = spu_add(q,spu_splats(1.0f)); |
---|
102 | |
---|
103 | // Adjust the result by the range reduction |
---|
104 | r = _ldexpf4( q, n ); |
---|
105 | |
---|
106 | return(r); |
---|
107 | |
---|
108 | } |
---|
109 | |
---|
110 | #endif /* _EXPF4_H_ */ |
---|
111 | #endif /* __SPU__ */ |
---|
112 | |
---|