[444] | 1 | /* -------------------------------------------------------------- */ |
---|
| 2 | /* (C)Copyright 2001,2008, */ |
---|
| 3 | /* International Business Machines Corporation, */ |
---|
| 4 | /* Sony Computer Entertainment, Incorporated, */ |
---|
| 5 | /* Toshiba Corporation, */ |
---|
| 6 | /* */ |
---|
| 7 | /* All Rights Reserved. */ |
---|
| 8 | /* */ |
---|
| 9 | /* Redistribution and use in source and binary forms, with or */ |
---|
| 10 | /* without modification, are permitted provided that the */ |
---|
| 11 | /* following conditions are met: */ |
---|
| 12 | /* */ |
---|
| 13 | /* - Redistributions of source code must retain the above copyright*/ |
---|
| 14 | /* notice, this list of conditions and the following disclaimer. */ |
---|
| 15 | /* */ |
---|
| 16 | /* - Redistributions in binary form must reproduce the above */ |
---|
| 17 | /* copyright notice, this list of conditions and the following */ |
---|
| 18 | /* disclaimer in the documentation and/or other materials */ |
---|
| 19 | /* provided with the distribution. */ |
---|
| 20 | /* */ |
---|
| 21 | /* - Neither the name of IBM Corporation nor the names of its */ |
---|
| 22 | /* contributors may be used to endorse or promote products */ |
---|
| 23 | /* derived from this software without specific prior written */ |
---|
| 24 | /* permission. */ |
---|
| 25 | /* */ |
---|
| 26 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ |
---|
| 27 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ |
---|
| 28 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ |
---|
| 29 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ |
---|
| 30 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */ |
---|
| 31 | /* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */ |
---|
| 32 | /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */ |
---|
| 33 | /* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */ |
---|
| 34 | /* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ |
---|
| 35 | /* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ |
---|
| 36 | /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */ |
---|
| 37 | /* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */ |
---|
| 38 | /* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ |
---|
| 39 | /* -------------------------------------------------------------- */ |
---|
| 40 | /* PROLOG END TAG zYx */ |
---|
| 41 | #ifdef __SPU__ |
---|
| 42 | #ifndef _POWF4_H_ |
---|
| 43 | #define _POWF4_H_ 1 |
---|
| 44 | |
---|
| 45 | #include <spu_intrinsics.h> |
---|
| 46 | #include <vec_types.h> |
---|
| 47 | |
---|
| 48 | #include "exp2f4.h" |
---|
| 49 | #include "log2f4.h" |
---|
| 50 | |
---|
| 51 | /* |
---|
| 52 | * FUNCTION |
---|
| 53 | * vector float _powf4(vector float x, vector float y) |
---|
| 54 | * |
---|
| 55 | * DESCRIPTION |
---|
| 56 | * The _powf4 function computes x raised to the power y for the set of |
---|
| 57 | * vectors. The powf4 function is computed as by decomposing |
---|
| 58 | * the problem into: |
---|
| 59 | * |
---|
| 60 | * x^y = 2^(y*log2(x)) |
---|
| 61 | * |
---|
| 62 | */ |
---|
| 63 | static __inline vector float _powf4(vector float x, vector float y) |
---|
| 64 | { |
---|
| 65 | vec_uint4 y_exp; |
---|
| 66 | vec_uint4 y_mantissa; |
---|
| 67 | vec_uint4 mant_shift; |
---|
| 68 | vec_uint4 y_is_int; |
---|
| 69 | vec_uint4 y_is_odd; |
---|
| 70 | vec_uint4 x_sign_bit; |
---|
| 71 | vec_uint4 zero = (vec_uint4)spu_splats(0); |
---|
| 72 | vec_uint4 bit0 = (vec_uint4)spu_splats(0x80000000); |
---|
| 73 | vec_int4 error = spu_splats(-1); |
---|
| 74 | vec_float4 out; |
---|
| 75 | |
---|
| 76 | y_exp = spu_and(spu_rlmask((vec_uint4)y, -23), 0x000000FF); |
---|
| 77 | |
---|
| 78 | /* Need the implied bit in the mantissa to catch |
---|
| 79 | * y = 1 case later |
---|
| 80 | */ |
---|
| 81 | y_mantissa = spu_or(spu_sl((vec_uint4)y, (unsigned int)8), bit0); |
---|
| 82 | |
---|
| 83 | x_sign_bit = spu_and((vec_uint4)x, bit0); |
---|
| 84 | |
---|
| 85 | /* We are going to shift the mantissa over enough to |
---|
| 86 | * determine if we have an integer. |
---|
| 87 | */ |
---|
| 88 | mant_shift = spu_add(y_exp, -127); |
---|
| 89 | |
---|
| 90 | |
---|
| 91 | /* Leave the lowest-order integer bit of mantissa on the |
---|
| 92 | * high end so we can see if the integer is odd. |
---|
| 93 | */ |
---|
| 94 | y_mantissa = spu_sl(y_mantissa, mant_shift); |
---|
| 95 | |
---|
| 96 | y_is_int = spu_cmpeq(spu_andc(y_mantissa, bit0), 0); |
---|
| 97 | y_is_int = spu_and(y_is_int, spu_cmpgt(y_exp, 126)); |
---|
| 98 | |
---|
| 99 | y_is_odd = spu_and(spu_cmpeq(y_mantissa, bit0), y_is_int); |
---|
| 100 | |
---|
| 101 | out = _exp2f4(spu_mul(y, _log2f4(spu_andc(x, (vec_float4)bit0)))); |
---|
| 102 | |
---|
| 103 | /* x < 0 is only ok when y integer |
---|
| 104 | */ |
---|
| 105 | out = spu_sel(out, (vec_float4)error, |
---|
| 106 | spu_andc(spu_cmpeq(x_sign_bit, bit0), y_is_int)); |
---|
| 107 | |
---|
| 108 | /* Preserve the sign of x if y is an odd integer |
---|
| 109 | */ |
---|
| 110 | out = spu_sel(out, spu_or(out, (vec_float4)x_sign_bit), y_is_odd); |
---|
| 111 | |
---|
| 112 | /* x = anything, y = +/- 0, returns 1 |
---|
| 113 | */ |
---|
| 114 | out = spu_sel(out, spu_splats(1.0f), spu_cmpabseq(y, (vec_float4)zero)); |
---|
| 115 | |
---|
| 116 | return(out); |
---|
| 117 | } |
---|
| 118 | |
---|
| 119 | #endif /* _POWF4_H_ */ |
---|
| 120 | #endif /* __SPU__ */ |
---|