/* ------------------------- */ /* --- macro_mpar_SIMD.h --- */ /* ------------------------- */ /* * Copyright (c) 2015-2015, Lionel Lacassagne, All rights reserved * Univ Paris Sud XI, CNRS */ #ifndef _MACRO_MPAR_SIMD_H_ #define _MACRO_MPAR_SIMD_H_ #define vec_change(x0, x1) vec_xor(x0, x1) // ========================== // // === conditional update === // // ========================== // #define vec0_update_cond(new_state, previous_state) \ vec_andnot(vec_cmpeq(previous_state, zero), new_state) #define vec1_update_cond(new_state, previous_state) \ vec_sel(new_state, previous_state, vec_cmpeq(previous_state, ff)) // ou vec_sel(new_state, ff, vec_cmpeq(previous_state, ff);) // ============================ // // === min and positive min === // // ============================ // #define vec0_positive_min3(a, b, c, pm) \ do { \ vuint32 _m, _eq; \ pm = ff; \ _m = vec_min(pm, a); \ _eq = vec_cmpeq(a, zero); \ pm = vec_sel(_m, pm, _eq); \ _m = vec_min(pm, b); \ _eq = vec_cmpeq(b, zero); \ pm = vec_sel(_m, pm, _eq); \ _m = vec_min(pm, c); \ _eq = vec_cmpeq(c, zero); \ pm = vec_sel(_m, pm, _eq); \ } while(0) // pm real var name should be different from local variables: // _m and _eq #define vec0_positive_min3_cond(a, b, c, pm) \ do { \ vuint32 _m, _eq; \ pm = ff; \ _m = vec_min(pm, a); \ _eq = vec_cmpeq(a, zero); \ pm = vec_sel(_m, pm, _eq); \ _m = vec_min(pm, c); \ _eq = vec_cmpeq(c, zero); \ pm = vec_sel(_m, pm, _eq); \ _m = vec_min(pm, b); \ _eq = vec_cmpeq(b, zero); \ pm = vec_sel(_m, pm, _eq); \ pm = vec_andnot(_eq, pm); \ } while(0) #define vec1_min2(a, b) vec_min(a, b) #define vec1_min3(a, b, c) vec1_min2(vec1_min2(a, b), c) #define vec1_min3_cond(a, b, c) vec1_update_cond(vec1_min3(a, b, c), b) // pm real var name should be different from local variables: // _m and _eq // ================== // // === diffusion1 === // // ================== // #define vec0_diffusion1_expand(x, xd) \ do { \ vuint32 _l, _r; \ _l = vec_ldup(x); \ _r = vec_rdup(x); \ vec0_positive_min3_cond(_l, x, _r, xd); \ } while(0) // ne pas factoriser car _l et _r sont utilises plusieurs fois #define vec0_diffusion1(x, xd) \ do { \ vuint32 _l, _r; \ _l = vec_ldup(x); \ _r = vec_rdup(x); \ vec0_positive_min3_cond(_l, x, _r, xd); \ } while(0) // si duplication trop complexe // vec0_positive_min3_cond(vec_left(zero, x), x, vec_right(x, zero), xd) #define vec1_diffusion1_expand(x, xd) \ do { \ vuint32 _l, _r; \ _l = vec_ldup(x); \ _r = vec_rdup(x); \ xd = vec1_min3(_l, x, _r); \ xd = vec1_update_cond(xd, x); \ } while(0) #define vec1_diffusion1(x) \ vec1_min3_cond(vec_left1(ff, x), x, vec_right1(x, ff)) // si duplication trop complexe // vec1_min3_cond(vec_left1(ff, x), x, vec_right1(x, ff), xd) // ================ // // === mask neq === // // ================ // #define vec_maskneq(x1, x0) \ vec_movemask(vec_andnot(vec_cmpeq(x1, x0), ff)) // ========================== // // === positive_diffusion === // // ========================== // // --- Mpar-0 --- #define vec0_positive_diff2stab_old1(pm) \ do { \ vuint32 _l, _r; \ vuint32 _md, _cm; \ vuint32 _eq, _neq, _eq0; \ uint32 _sflag; \ do { \ _l = vec_ldup(pm); \ _r = vec_rdup(pm); \ vec0_positive_min3(_l, pm, _r, _md); \ _eq0 = vec_cmpeq(pm, zero); \ _cm = vec_andnot(_eq0, _md); \ _eq = vec_cmpeq(pm, _cm); \ _neq = vec_andnot(_eq, ff); \ _sflag = vec_movemask(_neq); \ pm = _cm; \ } while(_sflag); \ } while(0) #define vec0_positive_diff2stab_old(pm) \ do { \ vuint32 _l, _r; \ vuint32 _md, _cm; \ vuint32 _eq, _neq; \ uint32 _sflag; \ do { \ _l = vec_ldup(pm); \ _r = vec_rdup(pm); \ vec0_positive_min3_cond(_l, pm, _r, _cm); \ _eq = vec_cmpeq(pm, _cm); \ _neq = vec_andnot(_eq, ff); \ _sflag = vec_movemask(_neq); \ pm = _cm; \ } while(_sflag); \ } while(0) #define vec0_diff2stab_1param(pm) \ do { \ uint32 _sflag; \ vuint32 _dm; \ do { \ vec0_diffusion1(pm, _dm); \ _sflag = vec_maskneq(pm, _dm); \ pm = _dm; \ } while(_sflag); \ } while(0) #define vec0_diff2stab_2param(pm, dm) \ do { \ uint32 _sflag; \ vuint32 _x = pm; \ do { \ vec0_diffusion1(_x, dm); \ _sflag = vec_maskneq(_x, dm); \ _x = dm; \ } while(_sflag); \ } while(0) #define vec0_diff2stab(pm) vec0_diff2stab_1param(pm) //#define vec0_diff2stab(pm, dm) vec0_diff2stab_2param(pm, dm) // --- Mpar-1 --- #define vec1_diff2stab_1param(pm) \ do { \ uint32 _sflag; \ vuint32 _dm; \ do { \ _dm = vec1_diffusion1(pm); \ _sflag = vec_maskneq(pm, _dm); \ pm = _dm; \ } while(_sflag); \ } while(0) #define vec1_diff2stab_2param(pm, dm) \ do { \ uint32 _sflag; \ vuint32 _x = pm; \ do { \ dm = vec1_diffusion1(_x); \ _sflag = vec_maskneq(_x, dm); \ _x = dm; \ } while(_sflag); \ } while(0) #define vec1_diff2stab(pm) vec1_diff2stab_2param(pm) //#define vec1_diff2stab(pm, dm) vec1_diff2stab_2param(pm, dm) // =========================== // // === increment decrement === // // =========================== // #define vec_inc(x) vec_add(x, one) #define vec_dec(x) vec_sub(x, one) #endif // _MACRO_SIMD_H_