/* ------------------------- */
/* --- macro_mpar_SIMD.h --- */
/* ------------------------- */

/*
 * Copyright (c) 2015-2015, Lionel Lacassagne, All rights reserved
 * Univ Paris Sud XI, CNRS
 */

#ifndef _MACRO_MPAR_SIMD_H_
#define _MACRO_MPAR_SIMD_H_

#define vec_change(x0, x1) vec_xor(x0, x1)

// ========================== //
// === conditional update === //
// ========================== //

#define vec0_update_cond(new_state, previous_state) \
vec_andnot(vec_cmpeq(previous_state, zero), new_state)

#define vec1_update_cond(new_state, previous_state) \
vec_sel(new_state, previous_state, vec_cmpeq(previous_state, ff))
// ou vec_sel(new_state, ff, vec_cmpeq(previous_state, ff);)

// ============================ //
// === min and positive min === //
// ============================ //

#define vec0_positive_min3(a, b, c, pm) \
do {                                    \
    vuint32 _m, _eq;                    \
    pm = ff;                            \
    _m  = vec_min(pm, a);               \
    _eq = vec_cmpeq(a, zero);           \
    pm  = vec_sel(_m, pm, _eq);         \
    _m  = vec_min(pm, b);               \
    _eq = vec_cmpeq(b, zero);           \
    pm  = vec_sel(_m, pm, _eq);         \
    _m  = vec_min(pm, c);               \
    _eq = vec_cmpeq(c, zero);           \
    pm = vec_sel(_m, pm, _eq);          \
  } while(0)

// pm real var name should be different from local variables:
// _m and _eq

#define vec0_positive_min3_cond(a, b, c, pm) \
do {                                         \
    vuint32 _m, _eq;                         \
    pm  = ff;                                \
    _m  = vec_min(pm, a);                    \
    _eq = vec_cmpeq(a, zero);                \
    pm  = vec_sel(_m, pm, _eq);              \
    _m  = vec_min(pm, c);                    \
    _eq = vec_cmpeq(c, zero);                \
    pm  = vec_sel(_m, pm, _eq);              \
    _m  = vec_min(pm, b);                    \
    _eq = vec_cmpeq(b, zero);                \
    pm  = vec_sel(_m, pm, _eq);              \
    pm  = vec_andnot(_eq, pm);               \
} while(0)

#define vec1_min2(a, b) vec_min(a, b)
#define vec1_min3(a, b, c) vec1_min2(vec1_min2(a, b), c)

#define vec1_min3_cond(a, b, c)  vec1_update_cond(vec1_min3(a, b, c), b)
// pm real var name should be different from local variables:
// _m and _eq

// ================== //
// === diffusion1 === //
// ================== //
#define vec0_diffusion1_expand(x, xd)       \
do {                                        \
    vuint32 _l, _r;                         \
    _l = vec_ldup(x);                       \
    _r = vec_rdup(x);                       \
    vec0_positive_min3_cond(_l, x, _r, xd); \
} while(0)

// ne pas factoriser car _l et _r sont utilises plusieurs fois
#define vec0_diffusion1(x, xd)              \
do {                                        \
    vuint32 _l, _r;                         \
    _l = vec_ldup(x);                       \
    _r = vec_rdup(x);                       \
    vec0_positive_min3_cond(_l, x, _r, xd); \
} while(0)

// si duplication trop complexe
// vec0_positive_min3_cond(vec_left(zero, x), x, vec_right(x, zero), xd)

#define vec1_diffusion1_expand(x, xd) \
do {                                  \
    vuint32 _l, _r;                   \
    _l = vec_ldup(x);                 \
    _r = vec_rdup(x);                 \
    xd = vec1_min3(_l, x, _r);        \
    xd = vec1_update_cond(xd, x);     \
} while(0)

#define vec1_diffusion1(x)  \
vec1_min3_cond(vec_left1(ff, x), x, vec_right1(x, ff))

// si duplication trop complexe
// vec1_min3_cond(vec_left1(ff, x), x, vec_right1(x, ff), xd)

// ================ //
// === mask neq === //
// ================ //

#define vec_maskneq(x1, x0) \
vec_movemask(vec_andnot(vec_cmpeq(x1, x0), ff))

// ========================== //
// === positive_diffusion === //
// ========================== //

// --- Mpar-0 ---
#define vec0_positive_diff2stab_old1(pm)     \
do {                                         \
    vuint32 _l, _r;                          \
    vuint32 _md, _cm;                        \
    vuint32 _eq, _neq, _eq0;                 \
    uint32 _sflag;                           \
    do {                                     \
        _l  = vec_ldup(pm);                  \
        _r  = vec_rdup(pm);                  \
        vec0_positive_min3(_l, pm, _r, _md);  \
        _eq0   = vec_cmpeq(pm, zero);        \
        _cm    = vec_andnot(_eq0, _md);      \
        _eq    = vec_cmpeq(pm, _cm);         \
        _neq   = vec_andnot(_eq, ff);        \
        _sflag = vec_movemask(_neq);         \
        pm     = _cm;                        \
    } while(_sflag);                         \
} while(0)

#define vec0_positive_diff2stab_old(pm)           \
do {                                              \
    vuint32 _l, _r;                               \
    vuint32 _md, _cm;                             \
    vuint32 _eq, _neq;                            \
    uint32 _sflag;                                \
    do {                                          \
        _l  = vec_ldup(pm);                       \
        _r  = vec_rdup(pm);                       \
        vec0_positive_min3_cond(_l, pm, _r, _cm); \
        _eq    = vec_cmpeq(pm, _cm);              \
        _neq   = vec_andnot(_eq, ff);             \
        _sflag = vec_movemask(_neq);              \
        pm    = _cm;                              \
    } while(_sflag);                              \
} while(0)

#define vec0_diff2stab_1param(pm)      \
do {                                   \
    uint32 _sflag;                     \
    vuint32 _dm;                       \
    do {                               \
        vec0_diffusion1(pm, _dm);      \
        _sflag = vec_maskneq(pm, _dm); \
        pm    = _dm;                   \
    } while(_sflag);                   \
} while(0)

#define vec0_diff2stab_2param(pm, dm) \
do {                                  \
    uint32 _sflag;                    \
    vuint32 _x = pm;                  \
    do {                              \
        vec0_diffusion1(_x, dm);      \
        _sflag = vec_maskneq(_x, dm); \
        _x     = dm;                  \
    } while(_sflag);                  \
} while(0)

#define vec0_diff2stab(pm)     vec0_diff2stab_1param(pm)
//#define vec0_diff2stab(pm, dm) vec0_diff2stab_2param(pm, dm)

// --- Mpar-1 ---

#define vec1_diff2stab_1param(pm)      \
do {                                   \
    uint32 _sflag;                     \
    vuint32 _dm;                       \
    do {                               \
        _dm    = vec1_diffusion1(pm);  \
        _sflag = vec_maskneq(pm, _dm); \
        pm     = _dm;                  \
    } while(_sflag);                   \
} while(0)

#define vec1_diff2stab_2param(pm, dm) \
do {                                  \
    uint32 _sflag;                    \
    vuint32 _x = pm;                  \
    do {                              \
        dm     = vec1_diffusion1(_x); \
        _sflag = vec_maskneq(_x, dm); \
        _x    = dm;                   \
    } while(_sflag);                  \
} while(0)

#define vec1_diff2stab(pm)     vec1_diff2stab_2param(pm)
//#define vec1_diff2stab(pm, dm) vec1_diff2stab_2param(pm, dm)


// =========================== //
// === increment decrement === //
// =========================== //

#define vec_inc(x) vec_add(x, one)
#define vec_dec(x) vec_sub(x, one)

#endif // _MACRO_SIMD_H_