[772] | 1 | /* ------------------------- */ |
---|
| 2 | /* --- macro_mpar_SIMD.h --- */ |
---|
| 3 | /* ------------------------- */ |
---|
| 4 | |
---|
| 5 | /* |
---|
| 6 | * Copyright (c) 2015-2015, Lionel Lacassagne, All rights reserved |
---|
| 7 | * Univ Paris Sud XI, CNRS |
---|
| 8 | */ |
---|
| 9 | |
---|
| 10 | #ifndef _MACRO_MPAR_SIMD_H_ |
---|
| 11 | #define _MACRO_MPAR_SIMD_H_ |
---|
| 12 | |
---|
| 13 | #define vec_change(x0, x1) vec_xor(x0, x1) |
---|
| 14 | |
---|
| 15 | // ========================== // |
---|
| 16 | // === conditional update === // |
---|
| 17 | // ========================== // |
---|
| 18 | |
---|
| 19 | #define vec0_update_cond(new_state, previous_state) \ |
---|
| 20 | vec_andnot(vec_cmpeq(previous_state, zero), new_state) |
---|
| 21 | |
---|
| 22 | #define vec1_update_cond(new_state, previous_state) \ |
---|
| 23 | vec_sel(new_state, previous_state, vec_cmpeq(previous_state, ff)) |
---|
| 24 | // ou vec_sel(new_state, ff, vec_cmpeq(previous_state, ff);) |
---|
| 25 | |
---|
| 26 | // ============================ // |
---|
| 27 | // === min and positive min === // |
---|
| 28 | // ============================ // |
---|
| 29 | |
---|
| 30 | #define vec0_positive_min3(a, b, c, pm) \ |
---|
| 31 | do { \ |
---|
| 32 | vuint32 _m, _eq; \ |
---|
| 33 | pm = ff; \ |
---|
| 34 | _m = vec_min(pm, a); \ |
---|
| 35 | _eq = vec_cmpeq(a, zero); \ |
---|
| 36 | pm = vec_sel(_m, pm, _eq); \ |
---|
| 37 | _m = vec_min(pm, b); \ |
---|
| 38 | _eq = vec_cmpeq(b, zero); \ |
---|
| 39 | pm = vec_sel(_m, pm, _eq); \ |
---|
| 40 | _m = vec_min(pm, c); \ |
---|
| 41 | _eq = vec_cmpeq(c, zero); \ |
---|
| 42 | pm = vec_sel(_m, pm, _eq); \ |
---|
| 43 | } while(0) |
---|
| 44 | |
---|
| 45 | // pm real var name should be different from local variables: |
---|
| 46 | // _m and _eq |
---|
| 47 | |
---|
| 48 | #define vec0_positive_min3_cond(a, b, c, pm) \ |
---|
| 49 | do { \ |
---|
| 50 | vuint32 _m, _eq; \ |
---|
| 51 | pm = ff; \ |
---|
| 52 | _m = vec_min(pm, a); \ |
---|
| 53 | _eq = vec_cmpeq(a, zero); \ |
---|
| 54 | pm = vec_sel(_m, pm, _eq); \ |
---|
| 55 | _m = vec_min(pm, c); \ |
---|
| 56 | _eq = vec_cmpeq(c, zero); \ |
---|
| 57 | pm = vec_sel(_m, pm, _eq); \ |
---|
| 58 | _m = vec_min(pm, b); \ |
---|
| 59 | _eq = vec_cmpeq(b, zero); \ |
---|
| 60 | pm = vec_sel(_m, pm, _eq); \ |
---|
| 61 | pm = vec_andnot(_eq, pm); \ |
---|
| 62 | } while(0) |
---|
| 63 | |
---|
| 64 | #define vec1_min2(a, b) vec_min(a, b) |
---|
| 65 | #define vec1_min3(a, b, c) vec1_min2(vec1_min2(a, b), c) |
---|
| 66 | |
---|
| 67 | #define vec1_min3_cond(a, b, c) vec1_update_cond(vec1_min3(a, b, c), b) |
---|
| 68 | // pm real var name should be different from local variables: |
---|
| 69 | // _m and _eq |
---|
| 70 | |
---|
| 71 | // ================== // |
---|
| 72 | // === diffusion1 === // |
---|
| 73 | // ================== // |
---|
| 74 | #define vec0_diffusion1_expand(x, xd) \ |
---|
| 75 | do { \ |
---|
| 76 | vuint32 _l, _r; \ |
---|
| 77 | _l = vec_ldup(x); \ |
---|
| 78 | _r = vec_rdup(x); \ |
---|
| 79 | vec0_positive_min3_cond(_l, x, _r, xd); \ |
---|
| 80 | } while(0) |
---|
| 81 | |
---|
| 82 | // ne pas factoriser car _l et _r sont utilises plusieurs fois |
---|
| 83 | #define vec0_diffusion1(x, xd) \ |
---|
| 84 | do { \ |
---|
| 85 | vuint32 _l, _r; \ |
---|
| 86 | _l = vec_ldup(x); \ |
---|
| 87 | _r = vec_rdup(x); \ |
---|
| 88 | vec0_positive_min3_cond(_l, x, _r, xd); \ |
---|
| 89 | } while(0) |
---|
| 90 | |
---|
| 91 | // si duplication trop complexe |
---|
| 92 | // vec0_positive_min3_cond(vec_left(zero, x), x, vec_right(x, zero), xd) |
---|
| 93 | |
---|
| 94 | #define vec1_diffusion1_expand(x, xd) \ |
---|
| 95 | do { \ |
---|
| 96 | vuint32 _l, _r; \ |
---|
| 97 | _l = vec_ldup(x); \ |
---|
| 98 | _r = vec_rdup(x); \ |
---|
| 99 | xd = vec1_min3(_l, x, _r); \ |
---|
| 100 | xd = vec1_update_cond(xd, x); \ |
---|
| 101 | } while(0) |
---|
| 102 | |
---|
| 103 | #define vec1_diffusion1(x) \ |
---|
| 104 | vec1_min3_cond(vec_left1(ff, x), x, vec_right1(x, ff)) |
---|
| 105 | |
---|
| 106 | // si duplication trop complexe |
---|
| 107 | // vec1_min3_cond(vec_left1(ff, x), x, vec_right1(x, ff), xd) |
---|
| 108 | |
---|
| 109 | // ================ // |
---|
| 110 | // === mask neq === // |
---|
| 111 | // ================ // |
---|
| 112 | |
---|
| 113 | #define vec_maskneq(x1, x0) \ |
---|
| 114 | vec_movemask(vec_andnot(vec_cmpeq(x1, x0), ff)) |
---|
| 115 | |
---|
| 116 | // ========================== // |
---|
| 117 | // === positive_diffusion === // |
---|
| 118 | // ========================== // |
---|
| 119 | |
---|
| 120 | // --- Mpar-0 --- |
---|
| 121 | #define vec0_positive_diff2stab_old1(pm) \ |
---|
| 122 | do { \ |
---|
| 123 | vuint32 _l, _r; \ |
---|
| 124 | vuint32 _md, _cm; \ |
---|
| 125 | vuint32 _eq, _neq, _eq0; \ |
---|
| 126 | uint32 _sflag; \ |
---|
| 127 | do { \ |
---|
| 128 | _l = vec_ldup(pm); \ |
---|
| 129 | _r = vec_rdup(pm); \ |
---|
| 130 | vec0_positive_min3(_l, pm, _r, _md); \ |
---|
| 131 | _eq0 = vec_cmpeq(pm, zero); \ |
---|
| 132 | _cm = vec_andnot(_eq0, _md); \ |
---|
| 133 | _eq = vec_cmpeq(pm, _cm); \ |
---|
| 134 | _neq = vec_andnot(_eq, ff); \ |
---|
| 135 | _sflag = vec_movemask(_neq); \ |
---|
| 136 | pm = _cm; \ |
---|
| 137 | } while(_sflag); \ |
---|
| 138 | } while(0) |
---|
| 139 | |
---|
| 140 | #define vec0_positive_diff2stab_old(pm) \ |
---|
| 141 | do { \ |
---|
| 142 | vuint32 _l, _r; \ |
---|
| 143 | vuint32 _md, _cm; \ |
---|
| 144 | vuint32 _eq, _neq; \ |
---|
| 145 | uint32 _sflag; \ |
---|
| 146 | do { \ |
---|
| 147 | _l = vec_ldup(pm); \ |
---|
| 148 | _r = vec_rdup(pm); \ |
---|
| 149 | vec0_positive_min3_cond(_l, pm, _r, _cm); \ |
---|
| 150 | _eq = vec_cmpeq(pm, _cm); \ |
---|
| 151 | _neq = vec_andnot(_eq, ff); \ |
---|
| 152 | _sflag = vec_movemask(_neq); \ |
---|
| 153 | pm = _cm; \ |
---|
| 154 | } while(_sflag); \ |
---|
| 155 | } while(0) |
---|
| 156 | |
---|
| 157 | #define vec0_diff2stab_1param(pm) \ |
---|
| 158 | do { \ |
---|
| 159 | uint32 _sflag; \ |
---|
| 160 | vuint32 _dm; \ |
---|
| 161 | do { \ |
---|
| 162 | vec0_diffusion1(pm, _dm); \ |
---|
| 163 | _sflag = vec_maskneq(pm, _dm); \ |
---|
| 164 | pm = _dm; \ |
---|
| 165 | } while(_sflag); \ |
---|
| 166 | } while(0) |
---|
| 167 | |
---|
| 168 | #define vec0_diff2stab_2param(pm, dm) \ |
---|
| 169 | do { \ |
---|
| 170 | uint32 _sflag; \ |
---|
| 171 | vuint32 _x = pm; \ |
---|
| 172 | do { \ |
---|
| 173 | vec0_diffusion1(_x, dm); \ |
---|
| 174 | _sflag = vec_maskneq(_x, dm); \ |
---|
| 175 | _x = dm; \ |
---|
| 176 | } while(_sflag); \ |
---|
| 177 | } while(0) |
---|
| 178 | |
---|
| 179 | #define vec0_diff2stab(pm) vec0_diff2stab_1param(pm) |
---|
| 180 | //#define vec0_diff2stab(pm, dm) vec0_diff2stab_2param(pm, dm) |
---|
| 181 | |
---|
| 182 | // --- Mpar-1 --- |
---|
| 183 | |
---|
| 184 | #define vec1_diff2stab_1param(pm) \ |
---|
| 185 | do { \ |
---|
| 186 | uint32 _sflag; \ |
---|
| 187 | vuint32 _dm; \ |
---|
| 188 | do { \ |
---|
| 189 | _dm = vec1_diffusion1(pm); \ |
---|
| 190 | _sflag = vec_maskneq(pm, _dm); \ |
---|
| 191 | pm = _dm; \ |
---|
| 192 | } while(_sflag); \ |
---|
| 193 | } while(0) |
---|
| 194 | |
---|
| 195 | #define vec1_diff2stab_2param(pm, dm) \ |
---|
| 196 | do { \ |
---|
| 197 | uint32 _sflag; \ |
---|
| 198 | vuint32 _x = pm; \ |
---|
| 199 | do { \ |
---|
| 200 | dm = vec1_diffusion1(_x); \ |
---|
| 201 | _sflag = vec_maskneq(_x, dm); \ |
---|
| 202 | _x = dm; \ |
---|
| 203 | } while(_sflag); \ |
---|
| 204 | } while(0) |
---|
| 205 | |
---|
| 206 | #define vec1_diff2stab(pm) vec1_diff2stab_2param(pm) |
---|
| 207 | //#define vec1_diff2stab(pm, dm) vec1_diff2stab_2param(pm, dm) |
---|
| 208 | |
---|
| 209 | |
---|
| 210 | // =========================== // |
---|
| 211 | // === increment decrement === // |
---|
| 212 | // =========================== // |
---|
| 213 | |
---|
| 214 | #define vec_inc(x) vec_add(x, one) |
---|
| 215 | #define vec_dec(x) vec_sub(x, one) |
---|
| 216 | |
---|
| 217 | #endif // _MACRO_SIMD_H_ |
---|