1 | /* ------------------------- */ |
---|
2 | /* --- macro_mpar_SIMD.h --- */ |
---|
3 | /* ------------------------- */ |
---|
4 | |
---|
5 | /* |
---|
6 | * Copyright (c) 2015-2015, Lionel Lacassagne, All rights reserved |
---|
7 | * Univ Paris Sud XI, CNRS |
---|
8 | */ |
---|
9 | |
---|
10 | #ifndef _MACRO_MPAR_SIMD_H_ |
---|
11 | #define _MACRO_MPAR_SIMD_H_ |
---|
12 | |
---|
13 | #define vec_change(x0, x1) vec_xor(x0, x1) |
---|
14 | |
---|
15 | // ========================== // |
---|
16 | // === conditional update === // |
---|
17 | // ========================== // |
---|
18 | |
---|
19 | #define vec0_update_cond(new_state, previous_state) \ |
---|
20 | vec_andnot(vec_cmpeq(previous_state, zero), new_state) |
---|
21 | |
---|
22 | #define vec1_update_cond(new_state, previous_state) \ |
---|
23 | vec_sel(new_state, previous_state, vec_cmpeq(previous_state, ff)) |
---|
24 | // ou vec_sel(new_state, ff, vec_cmpeq(previous_state, ff);) |
---|
25 | |
---|
26 | // ============================ // |
---|
27 | // === min and positive min === // |
---|
28 | // ============================ // |
---|
29 | |
---|
30 | #define vec0_positive_min3(a, b, c, pm) \ |
---|
31 | do { \ |
---|
32 | vuint32 _m, _eq; \ |
---|
33 | pm = ff; \ |
---|
34 | _m = vec_min(pm, a); \ |
---|
35 | _eq = vec_cmpeq(a, zero); \ |
---|
36 | pm = vec_sel(_m, pm, _eq); \ |
---|
37 | _m = vec_min(pm, b); \ |
---|
38 | _eq = vec_cmpeq(b, zero); \ |
---|
39 | pm = vec_sel(_m, pm, _eq); \ |
---|
40 | _m = vec_min(pm, c); \ |
---|
41 | _eq = vec_cmpeq(c, zero); \ |
---|
42 | pm = vec_sel(_m, pm, _eq); \ |
---|
43 | } while(0) |
---|
44 | |
---|
45 | // pm real var name should be different from local variables: |
---|
46 | // _m and _eq |
---|
47 | |
---|
48 | #define vec0_positive_min3_cond(a, b, c, pm) \ |
---|
49 | do { \ |
---|
50 | vuint32 _m, _eq; \ |
---|
51 | pm = ff; \ |
---|
52 | _m = vec_min(pm, a); \ |
---|
53 | _eq = vec_cmpeq(a, zero); \ |
---|
54 | pm = vec_sel(_m, pm, _eq); \ |
---|
55 | _m = vec_min(pm, c); \ |
---|
56 | _eq = vec_cmpeq(c, zero); \ |
---|
57 | pm = vec_sel(_m, pm, _eq); \ |
---|
58 | _m = vec_min(pm, b); \ |
---|
59 | _eq = vec_cmpeq(b, zero); \ |
---|
60 | pm = vec_sel(_m, pm, _eq); \ |
---|
61 | pm = vec_andnot(_eq, pm); \ |
---|
62 | } while(0) |
---|
63 | |
---|
64 | #define vec1_min2(a, b) vec_min(a, b) |
---|
65 | #define vec1_min3(a, b, c) vec1_min2(vec1_min2(a, b), c) |
---|
66 | |
---|
67 | #define vec1_min3_cond(a, b, c) vec1_update_cond(vec1_min3(a, b, c), b) |
---|
68 | // pm real var name should be different from local variables: |
---|
69 | // _m and _eq |
---|
70 | |
---|
71 | // ================== // |
---|
72 | // === diffusion1 === // |
---|
73 | // ================== // |
---|
74 | #define vec0_diffusion1_expand(x, xd) \ |
---|
75 | do { \ |
---|
76 | vuint32 _l, _r; \ |
---|
77 | _l = vec_ldup(x); \ |
---|
78 | _r = vec_rdup(x); \ |
---|
79 | vec0_positive_min3_cond(_l, x, _r, xd); \ |
---|
80 | } while(0) |
---|
81 | |
---|
82 | // ne pas factoriser car _l et _r sont utilises plusieurs fois |
---|
83 | #define vec0_diffusion1(x, xd) \ |
---|
84 | do { \ |
---|
85 | vuint32 _l, _r; \ |
---|
86 | _l = vec_ldup(x); \ |
---|
87 | _r = vec_rdup(x); \ |
---|
88 | vec0_positive_min3_cond(_l, x, _r, xd); \ |
---|
89 | } while(0) |
---|
90 | |
---|
91 | // si duplication trop complexe |
---|
92 | // vec0_positive_min3_cond(vec_left(zero, x), x, vec_right(x, zero), xd) |
---|
93 | |
---|
94 | #define vec1_diffusion1_expand(x, xd) \ |
---|
95 | do { \ |
---|
96 | vuint32 _l, _r; \ |
---|
97 | _l = vec_ldup(x); \ |
---|
98 | _r = vec_rdup(x); \ |
---|
99 | xd = vec1_min3(_l, x, _r); \ |
---|
100 | xd = vec1_update_cond(xd, x); \ |
---|
101 | } while(0) |
---|
102 | |
---|
103 | #define vec1_diffusion1(x) \ |
---|
104 | vec1_min3_cond(vec_left1(ff, x), x, vec_right1(x, ff)) |
---|
105 | |
---|
106 | // si duplication trop complexe |
---|
107 | // vec1_min3_cond(vec_left1(ff, x), x, vec_right1(x, ff), xd) |
---|
108 | |
---|
109 | // ================ // |
---|
110 | // === mask neq === // |
---|
111 | // ================ // |
---|
112 | |
---|
113 | #define vec_maskneq(x1, x0) \ |
---|
114 | vec_movemask(vec_andnot(vec_cmpeq(x1, x0), ff)) |
---|
115 | |
---|
116 | // ========================== // |
---|
117 | // === positive_diffusion === // |
---|
118 | // ========================== // |
---|
119 | |
---|
120 | // --- Mpar-0 --- |
---|
121 | #define vec0_positive_diff2stab_old1(pm) \ |
---|
122 | do { \ |
---|
123 | vuint32 _l, _r; \ |
---|
124 | vuint32 _md, _cm; \ |
---|
125 | vuint32 _eq, _neq, _eq0; \ |
---|
126 | uint32 _sflag; \ |
---|
127 | do { \ |
---|
128 | _l = vec_ldup(pm); \ |
---|
129 | _r = vec_rdup(pm); \ |
---|
130 | vec0_positive_min3(_l, pm, _r, _md); \ |
---|
131 | _eq0 = vec_cmpeq(pm, zero); \ |
---|
132 | _cm = vec_andnot(_eq0, _md); \ |
---|
133 | _eq = vec_cmpeq(pm, _cm); \ |
---|
134 | _neq = vec_andnot(_eq, ff); \ |
---|
135 | _sflag = vec_movemask(_neq); \ |
---|
136 | pm = _cm; \ |
---|
137 | } while(_sflag); \ |
---|
138 | } while(0) |
---|
139 | |
---|
140 | #define vec0_positive_diff2stab_old(pm) \ |
---|
141 | do { \ |
---|
142 | vuint32 _l, _r; \ |
---|
143 | vuint32 _md, _cm; \ |
---|
144 | vuint32 _eq, _neq; \ |
---|
145 | uint32 _sflag; \ |
---|
146 | do { \ |
---|
147 | _l = vec_ldup(pm); \ |
---|
148 | _r = vec_rdup(pm); \ |
---|
149 | vec0_positive_min3_cond(_l, pm, _r, _cm); \ |
---|
150 | _eq = vec_cmpeq(pm, _cm); \ |
---|
151 | _neq = vec_andnot(_eq, ff); \ |
---|
152 | _sflag = vec_movemask(_neq); \ |
---|
153 | pm = _cm; \ |
---|
154 | } while(_sflag); \ |
---|
155 | } while(0) |
---|
156 | |
---|
157 | #define vec0_diff2stab_1param(pm) \ |
---|
158 | do { \ |
---|
159 | uint32 _sflag; \ |
---|
160 | vuint32 _dm; \ |
---|
161 | do { \ |
---|
162 | vec0_diffusion1(pm, _dm); \ |
---|
163 | _sflag = vec_maskneq(pm, _dm); \ |
---|
164 | pm = _dm; \ |
---|
165 | } while(_sflag); \ |
---|
166 | } while(0) |
---|
167 | |
---|
168 | #define vec0_diff2stab_2param(pm, dm) \ |
---|
169 | do { \ |
---|
170 | uint32 _sflag; \ |
---|
171 | vuint32 _x = pm; \ |
---|
172 | do { \ |
---|
173 | vec0_diffusion1(_x, dm); \ |
---|
174 | _sflag = vec_maskneq(_x, dm); \ |
---|
175 | _x = dm; \ |
---|
176 | } while(_sflag); \ |
---|
177 | } while(0) |
---|
178 | |
---|
179 | #define vec0_diff2stab(pm) vec0_diff2stab_1param(pm) |
---|
180 | //#define vec0_diff2stab(pm, dm) vec0_diff2stab_2param(pm, dm) |
---|
181 | |
---|
182 | // --- Mpar-1 --- |
---|
183 | |
---|
184 | #define vec1_diff2stab_1param(pm) \ |
---|
185 | do { \ |
---|
186 | uint32 _sflag; \ |
---|
187 | vuint32 _dm; \ |
---|
188 | do { \ |
---|
189 | _dm = vec1_diffusion1(pm); \ |
---|
190 | _sflag = vec_maskneq(pm, _dm); \ |
---|
191 | pm = _dm; \ |
---|
192 | } while(_sflag); \ |
---|
193 | } while(0) |
---|
194 | |
---|
195 | #define vec1_diff2stab_2param(pm, dm) \ |
---|
196 | do { \ |
---|
197 | uint32 _sflag; \ |
---|
198 | vuint32 _x = pm; \ |
---|
199 | do { \ |
---|
200 | dm = vec1_diffusion1(_x); \ |
---|
201 | _sflag = vec_maskneq(_x, dm); \ |
---|
202 | _x = dm; \ |
---|
203 | } while(_sflag); \ |
---|
204 | } while(0) |
---|
205 | |
---|
206 | #define vec1_diff2stab(pm) vec1_diff2stab_2param(pm) |
---|
207 | //#define vec1_diff2stab(pm, dm) vec1_diff2stab_2param(pm, dm) |
---|
208 | |
---|
209 | |
---|
210 | // =========================== // |
---|
211 | // === increment decrement === // |
---|
212 | // =========================== // |
---|
213 | |
---|
214 | #define vec_inc(x) vec_add(x, one) |
---|
215 | #define vec_dec(x) vec_sub(x, one) |
---|
216 | |
---|
217 | #endif // _MACRO_SIMD_H_ |
---|