/* ----------------------- */
/* --- mca_rosenfeld.c --- */
/* ----------------------- */

/*
 * Copyright (c) 2016 Lionel Lacassagne, LIP6, UPMC, CNRS
 * Init  : 2016/03/03
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <user_barrier.h>
#include <user_lock.h>

#ifdef CLI
#include "nrc_os_config.h"
#include "nrc.h"
#endif

#include "util.h"
#include "ecc_common.h"

#include "palette.h"
#include "bmpNR.h"

#include "str_ext.h"

// -----------
// -- local --
// -----------

#include "mca.h"

extern giet_barrier_t main_barrier;

// ----------------------------------
uint32 FindRoot(uint32 * T, uint32 e)
// ----------------------------------
{
    uint32 r;
    
    r = e;
    while (T[r] < r) {
        r = T[r];
    }
    return r;
}


// ---------------------------------------------------
uint32 FindRoot_Dist(uint32 ** D, uint32 r, int shift)
// ---------------------------------------------------
{
    uint32 e;
    uint32 e1;
    uint32 e0;
    
    int mask = (1 << shift) - 1;
    
    MCA_VERBOSE2(printf("FindRoot_Dist(%d) (alpha = %d) \n", r, shift));
    do {
        e  = r;
        e1 = r >> shift;
        e0 = r & mask;
        r = D[e1][e0];
        MCA_VERBOSE2(printf("FindRoot: D(%d) = D[%d,%d] = %d (alpha = %d)\n", e, e1, e0, r, shift));
    } while (r < e);
    MCA_VERBOSE2(printf("FindRoot_Dist = %d \n\n", r));
    return r;
}


// -----------------------------------------
void SetRoot(uint32 * T, uint32 e, uint32 r)
// -----------------------------------------
{
    while (T[e] < e) {
        e = T[e];
    }
    T[e] = r;
}


// ----------------------------------------------------------
void SetRoot_Dist(uint32 ** D, uint32 e, uint32 r, int shift)
// ----------------------------------------------------------
{
    int mask = (1 << shift) - 1;
    
    uint32 e1 = e >> shift;
    uint32 e0 = e & mask;
    
    D[e1][e0] = r;
}


// --------------------------------------------
uint32 Union0(uint32 * T, uint32 ei, uint32 ej)
// --------------------------------------------
{
    // version de la publication
    // @QM : faut-il tester le cas == 0 ici aussi ?
    uint32 ri, rj;
    ri = (ei == 0) ? 0 : FindRoot(T, ei);
    if (ei != ej) {
        rj = (ej == 0) ? 0 : FindRoot(T, ej);
        if (ri > rj) {
            ri = rj;
        }
        SetRoot(T, ej, ri);
    }
    SetRoot(T, ei, ri);
    return ri;
}


// -------------------------------------------------
uint32 QuickUnion2(uint32 * T, uint32 e1, uint32 e2)
// -------------------------------------------------
{
    // version QU de Union2
    uint32 r1, r2, r;
    
    r1 = (e1 == 0) ? 0 : FindRoot(T, e1);
    r2 = (e2 == 0) ? 0 : FindRoot(T, e2);
    
    r = ui32Min2(r1, r2);
    
    if (r1 != r) {
        T[r1] = r; // SetRoot
    }
    if (r2 != r) {
        T[r2] = r; // SetRoot
    }
    
    return r;
}


// --------------------------------------------
uint32 use1_QU_Rosenfeld(uint32 e1, uint32 * T)
// --------------------------------------------
{
    return T[e1];
}


// -------------------------------------------------------
uint32 use2_QU_Rosenfeld(uint32 e1, uint32 e2, uint32 * T)
// -------------------------------------------------------
{
    return QuickUnion2(T, e1, e2);
}


// ----------------------------------------------------------------
uint32 updateTable_Rosenfeld(uint32 * T, uint32 e, uint32 epsilon)
// ----------------------------------------------------------------
{
    // notations e == v, epsilon == u avec v > u (v forcement different de u)
    return Union0(T, e, epsilon); // original
}


// ----------------------------------------------------------------
void vuse2_Rosenfeld(uint32 e1, uint32 e2, uint32 * T, uint32 ** D)
// ----------------------------------------------------------------
{
    uint32 e;
    uint32 a1;
    uint32 a2;
    
    a1 = (e1 == 0) ? 0 : FindRoot(T, e1);
    a2 = (e2 == 0) ? 0 : FindRoot(T, e2);
    
    if (a1 == a2) {
        return; // evite la backdoor
    }
    
    // forcement positifs car appel depuis optimizedBorder qui a fait un test
    if (a1 < a2) {
        e = a1;
        updateTable_Rosenfeld(T, a2, e);
    }
    else {
        e = a2;
        updateTable_Rosenfeld(T, a1, e);
    }
}


// ---------------------------------------------------------------------------
void vuse3_Rosenfeld(uint32 e1, uint32 e2, uint32 e3, uint32 * T, uint32 ** D)
// ---------------------------------------------------------------------------
{
    uint32 e;
    uint32 a1;
    uint32 a2;
    uint32 a3;
    
    a1 = (e1 == 0) ? 0 : FindRoot(T, e1);
    a2 = (e2 == 0) ? 0 : FindRoot(T, e2);
    a3 = (e3 == 0) ? 0 : FindRoot(T, e3);
    
    if (a1 == a2 && a2 == a3) {
        return;
    }
    
    e = ui32Min3(a1, a2, a3);  // forcement positifs car appel depuis optimizedBorder qui a fait un test
    
    if (a1 > e) {
        updateTable_Rosenfeld(T, a1, e);
    }
    a2 = T[a2];
    if (a2 > e) {
        updateTable_Rosenfeld(T, a2, e);
    }
    a3 = T[a3];
    if (a3 > e) {
        updateTable_Rosenfeld(T, a3, e);
    }
}


// ----------------------------------------------
uint32 solveTable_Rosenfeld(uint32 * T, uint32 ne)
// ----------------------------------------------
{
    // equivalent a Flatten
    // fermeture transitive sans pack
    // (presence de trous dans les numeros d'etiquettes)
    
    uint32 e;
    
    for (e = 1; e <= ne; e++) {   
        T[e] = T[T[e]];
    }
    return ne;
}


// ----------------------------------------------------------------------------------
uint32 optimizedAccess_DT_Rosenfeld(uint32 ** E, int i, int j, uint32 * T, uint32 ne)
// ----------------------------------------------------------------------------------
{
    // Decision Tree 8-connexe avec Quick-Union
    uint32 a, b, c, d, e;
    
    b = E[i - 1][j];
    if (b) {
        e = use1_QU_Rosenfeld(b, T);
    }
    else {
        c = E[i - 1][j + 1];
        if (c) {
            a = E[i - 1][j - 1];
            if (a) {
                e = use2_QU_Rosenfeld(a, c, T);
            }
            else {
                d = E[i][j - 1];
                if (d) {
                    e = use2_QU_Rosenfeld(c, d, T);
                }
                else {
                    e = use1_QU_Rosenfeld(c, T);
                }
            }
        }
        else {
            a = E[i - 1][j - 1];
            if (a) {
                e = use1_QU_Rosenfeld(a, T);
            }
            else {
                d = E[i][j - 1];
                if (d) {
                    e = use1_QU_Rosenfeld(d, T);
                }
                else {
                    e = ++ne;
                }
            }
        }
    }
    E[i][j] = e;
    return ne;
}


// ------------------------------------------------------------------------------------------
void optimizedBorder_Rosenfeld(uint32 ** E, int i, int j, uint32 * T, uint32 ** D, int alpha)
// ------------------------------------------------------------------------------------------
{
    // copie de optimizedBorder_Rosenfeld
    uint32 a, b, c, x;
    
    b = E[i - 1][j];
    x = E[i][j];
    
    if (b) {
        //printf("%d = %d\n", b, x);
        vuse2_Rosenfeld(b, x, T, D);
    }
    else {
        c = E[i - 1][j + 1];
        if (c) {
            a = E[i - 1][j - 1];
            if (a) {
                //printf("%d = %d = %d\n", a, c, x);
                vuse3_Rosenfeld(a, c, x, T, D);
            }
            else {
                //printf("%d = %d\n", c, x);
                vuse2_Rosenfeld(c, x, T, D);
            }
        }
        else {
            a = E[i - 1][j - 1];
            if (a) {
                //printf("%d = %d\n", a, x);
                vuse2_Rosenfeld(a, x, T, D);
            }
        }
    }
}


// -----------------------------------------------------------------------------------------------------------------
void borderMerging_Fast_Rosenfeld_Dist(uint8 **X, int i, int width, uint32 ** E, uint32 * T, uint32 ** D, int alpha)
// -----------------------------------------------------------------------------------------------------------------
{
    for (int j = 0; j < width; j++) {
        if (X[i][j])  {
            optimizedBorder_Rosenfeld(E, i, j, T, D, alpha);
        }
    }
    return;
}


// ------------------------------------------------------------------------------------------------------------------
void borderMerging_Slow_Rosenfeld_Dist(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ** D, int alpha)
// ------------------------------------------------------------------------------------------------------------------
{
    // copie de borderMerging_Rosenfeld_UF_Fast2_8C
    
    int j;
    
    uint32 e;
    
    uint32 e1, e2, e3, ex;
    uint32 r1, r2, r3, rx;
    
    // --------------
    // -- prologue --
    // --------------
    MCA_VERBOSE2(printf("[borderMerging_Slow_Rosenfeld_Dist] i = %d\n", i));
    
    j = 0;
    ex = E[i][j];
    
    if (ex) {
        
        MCA_VERBOSE2(printf("[borderMerging_Slow_Rosenfeld_Dist] j = %d\n", j));
        
        e2 = E[i - 1][j];
        e3 = E[i - 1][j + 1];
        
        r2 = FindRoot_Dist(D, e2, alpha);
        r3 = FindRoot_Dist(D, e3, alpha);
        rx = FindRoot(T, ex); // we already tested that ex != 0
        
        MCA_VERBOSE2(printf("\n"));
        MCA_VERBOSE2(printf("e2 = %4d -> %4d\n", e2, r2));
        MCA_VERBOSE2(printf("e3 = %4d -> %4d\n", e3, r3));
        MCA_VERBOSE2(printf("ex = %4d -> %4d\n", ex, rx));
        
        e = ui32MinNonNul3(r2, r3, rx);
        
        // Quick-Union
        if (r2 > e) {
            SetRoot_Dist(D, r2, e, alpha);
            MCA_VERBOSE2(printf("D[%4d] <- %d\n", r2, e));
        }
        if (r3 > e) {
            SetRoot_Dist(D, r3, e, alpha);
            MCA_VERBOSE2(printf("D[%4d] <- %d\n", r3, e));
        }
        if (rx > e) {
            SetRoot(T, rx, e);
            MCA_VERBOSE2(printf("D[%4d] <- %d\n", rx, e));
        }
        MCA_VERBOSE2(printf("\n"));
        // attention SetRoot fait un while inutile
    }
    
    // -----------------------
    // -- boucle principale --
    // -----------------------
    
    for (j = 0 + 1; j < width - 1; j++) {
    
        ex = E[i][j];
        
        // que le cas general (pour faire un code simple)
        if (ex) {
            MCA_VERBOSE2(printf("[borderMerging_Slow_Rosenfeld_Dist] j = %d\n", j));
            
            e1 = E[i - 1][j - 1];
            e2 = E[i - 1][j];
            e3 = E[i - 1][j + 1];
        
            r1 = FindRoot_Dist(D, e1, alpha);
            r2 = FindRoot_Dist(D, e2, alpha);
            r3 = FindRoot_Dist(D, e3, alpha);
            rx = FindRoot(T, ex); // we already tested that ex != 0
        
            MCA_VERBOSE2(printf("\n"));
            MCA_VERBOSE2(printf("e1 = %4d -> %4d\n", e1, r1));
            MCA_VERBOSE2(printf("e2 = %4d -> %4d\n", e2, r2));
            MCA_VERBOSE2(printf("e3 = %4d -> %4d\n", e3, r3));
            MCA_VERBOSE2(printf("ex = %4d -> %4d\n", ex, rx));
            
            e = ui32MinNonNul4(r1, r2, r3, rx);
            
            // Quick-Union
            if (r1 > e) {
                SetRoot_Dist(D, r1, e, alpha);
                MCA_VERBOSE2(printf("D[%4d] <- %d\n", r1, e));
            }
            if (r2 > e) {
                SetRoot_Dist(D, r2, e, alpha);
                MCA_VERBOSE2(printf("D[%4d] <- %d\n", r2, e));
            }
            if (r3 > e) {
                SetRoot_Dist(D, r3, e, alpha);
                MCA_VERBOSE2(printf("D[%4d] <- %d\n", r3, e));
            }
            if (rx > e) {
                // @QM pourquoi pas T[e] = rx; ?
                //SetRoot(T, rx, e); 
                T[e] = rx;
                MCA_VERBOSE2(printf("D[%4d] <- %d\n", rx, e));
            }
            MCA_VERBOSE2(printf("\n"));
            // attention SetRoot fait un while inutile
        }
    }
    
    // --------------
    // -- epilogue --
    // --------------
    
    j = width - 1;
    ex = E[i][j];
    
    if (ex) {
        
        MCA_VERBOSE2(printf("[borderMerging_Slow_Rosenfeld_Dist] j = %d\n", j));
        
        e1 = E[i - 1][j - 1];
        e2 = E[i - 1][j];
        
        r1 = FindRoot_Dist(D, e1, alpha);
        r2 = FindRoot_Dist(D, e2, alpha);
        rx = FindRoot(T, ex); // we already tested that ex != 0
        
        MCA_VERBOSE2(printf("\n"));
        MCA_VERBOSE2(printf("e1 = %4d -> %4d\n", e1, r1));
        MCA_VERBOSE2(printf("e2 = %4d -> %4d\n", e2, r2));
        MCA_VERBOSE2(printf("ex = %4d -> %4d\n", ex, rx));
        
        e = ui32MinNonNul3(r1, r2, rx);
        
        // Quick-Union
        if (r1 > e) {
            SetRoot_Dist(D, r1, e, alpha);
            MCA_VERBOSE2(printf("D[%4d] <- %d\n", r1, e));
        }
        if (r2 > e) {
            SetRoot_Dist(D, r2, e, alpha);
            MCA_VERBOSE2(printf("D[%4d] <- %d\n", r2, e));
        }
        if (rx > e) {
            SetRoot(T, rx, e);
            MCA_VERBOSE2(printf("D[%4d] <- %d\n", rx, e));
        }
        MCA_VERBOSE2(printf("\n"));
    }
    return;
}


// -------------------------------------------------------------------------------------------------------------
void borderMerging_Rosenfeld_Dist(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ** D, int alpha)
// -------------------------------------------------------------------------------------------------------------
{
    borderMerging_Slow_Rosenfeld_Dist(X, i, width, E, T, D, alpha);
}


// ---------------------------------------------------------------------------------------------
uint32 line0Labeling_Rosenfeld(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ne)
// ---------------------------------------------------------------------------------------------
{
    int j;
    uint8 x;
    uint32 e4;
    uint32 r4;
    
    // prologue : j = 0
    x = X[i][0];
    if (x) {
        E[i][0] = ++ne;
    }
    else {
        E[i][0] = 0;
    }
    
    // boucle et epilogue j = [1..width-1]
    for (j = 1; j <= width - 1; j++) {
        x = X[i][j];
        if (x)  {
            e4 = E[i][j - 1];
            
            if (e4 == 0) {
                E[i][j] = ++ne;
            }
            else {
                E[i][j] = e4;
            }
        }
        else {
            E[i][j] = 0;
        }
    }
    return ne;
}


// -------------------------------------------------------------------------------------------------
uint32 lineLabeling_Slow_Rosenfeld(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ne)
// -------------------------------------------------------------------------------------------------
{
    // version lineLabeling_Rosenfeld_UF_QU_8C avec Quick-Union
    
    int j;
    
    uint8 x;
    uint32 e;
    uint32 e1, e2, e3, e4;
    uint32 r1, r2, r3, r4;
    
    // --------------
    // -- prologue --
    // --------------
    
    j = 0;
    x = X[i][j];
    
    if (x) {
        
        e2 = E[i - 1][j];
        e3 = E[i - 1][j + 1];
        
        // nouvel element
        if (e2 == 0 && e3 == 0) {
            e = ++ne;
            E[i][j] = e;
        }
        else {
            // etiquettes identiques
            if (e2 == e3) {
                e = e2;
                E[i][j] = e; 
            }
            else {    
                // cas general
                r2 = (e2 == 0) ? 0 : FindRoot(T, e2);
                r3 = (e3 == 0) ? 0 : FindRoot(T, e3);
                
                e = ui32MinNonNul2(r2, r3);
                
                // Quick-Union
                if (r2 > e) {
                    T[r2] = e;
                }
                if (r3 > e) {
                    T[r3] = e;
                }
                E[i][j] = e;
            }
        }
    }
    else {
        E[i][j] = 0;
    } // x
    
    // -----------------------
    // -- boucle principale --
    // -----------------------
    
    for (j = 0 + 1; j < width - 1; j++) {
        
        x = X[i][j];
        
        if (x)  {
            e1 = E[i - 1][j - 1];
            e2 = E[i - 1][j];
            e3 = E[i - 1][j + 1];
            e4 = E[i][j - 1];
            
            // nouvel element
            if (e1 == 0 && e2 == 0 && e3 == 0 && e4 == 0) {
                e = ++ne;
                E[i][j] = e;
            }
            else {
                // etiquettes identiques
                if (e1 == e2 && e1 == e3 && e1 == e4) {
                    e = e1;
                    E[i][j] = e;
                }
                else {
                    // cas general
                    r1 = (e1 == 0) ? 0 : FindRoot(T, e1);
                    r2 = (e2 == 0) ? 0 : FindRoot(T, e2);
                    r3 = (e3 == 0) ? 0 : FindRoot(T, e3);
                    r4 = (e4 == 0) ? 0 : FindRoot(T, e4);
                    
                    e = ui32MinNonNul4(r1, r2, r3, r4);
                    giet_pthread_assert(e != 0, "e = 0\n");
                    
                    // Quick-Union
                    if (r1 > e) {
                        T[r1] = e;
                    }
                    if (r2 > e) {
                        T[r2] = e;
                    }
                    if (r3 > e) {
                        T[r3] = e;
                    }
                    if (r4 > e) {
                        T[r4] = e;
                    }
                    E[i][j] = e;
                }
            }
        }
        else {
            E[i][j] = 0;
        } // x
    } // j
    
    // --------------
    // -- epilogue --
    // --------------
    j = width - 1;
    x = X[i][j];
    
    if (x) {
        e1 = E[i - 1][j - 1];
        e2 = E[i - 1][j];
        e4 = E[i][j - 1];
        
        // nouvel element
        if (e1 == 0 && e2 == 0 && e4 == 0) {
            e = ++ne;
            E[i][j] = e;
        }
        else {
            // etiquettes identiques
            if (e1 == e2 && e1 == e4) {
                e = e1;
                E[i][j] = e;
            }
            else {
                // cas general
                r1 = (e1 == 0) ? 0 : FindRoot(T, e1);
                r2 = (e2 == 0) ? 0 : FindRoot(T, e2);
                r4 = (e4 == 0) ? 0 : FindRoot(T, e4);
                
                e = ui32MinNonNul3(r1, r2, r4);
                
                // Quick-Union
                if (r1 > e) {
                    T[r1] = e;
                }
                if (r2 > e) {
                    T[r2] = e;
                }
                if (r4 > e) {
                    T[r4] = e;
                }
                E[i][j] = e;
            }
        }
    }
    else {
        E[i][j] = 0;
    } // x
    
    return ne;
}


// -------------------------------------------------------------------------------------------------
uint32 lineLabeling_Fast_Rosenfeld(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ne)
// -------------------------------------------------------------------------------------------------
{
    // avec DT et QU
    int j;
    uint8 x;
    
    for (j = 0; j < width; j++) {
        x = X[i][j];
        if (x) {
            ne = optimizedAccess_DT_Rosenfeld(E, i, j, T, ne);
        }
        else {
            E[i][j] = 0;
        }
    }
    return ne;
}


// --------------------------------------------------------------------------------------------
uint32 lineLabeling_Rosenfeld(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ne)
// --------------------------------------------------------------------------------------------
{
    return lineLabeling_Slow_Rosenfeld(X, i, width, E, T, ne);
    //return lineLabeling_Fast_Rosenfeld(X, i, width, E, T, ne);
}


// ----------------------------------------------------------------
uint32 countTable_Range_Rosenfeld(uint32 * T, uint32 e0, uint32 e1)
// ----------------------------------------------------------------
{
    uint32 e;
    uint32 nr = 0; // nombre de racines = de composantes connexes
    
    for (e = e0; e <= e1; e++) {
        if (e == T[e]) {
            nr += 1;
        }
    }
    return nr;
}


// --------------------------------------------------------------
void solveTable_Range_Rosenfeld(uint32 * T, uint32 e0, uint32 e1)
// --------------------------------------------------------------
{
    uint32 e, r;
    
    for (e = e0; e <= e1; e++) {
        r = T[T[e]];
        if (r < e) {
            T[e] = r; // racine de la classe d'equivalence
        }
    }    
}


// -------------------------------------
void MCA_Label_Rosenfeld_PAR1(MCA * mca)
// -------------------------------------
{
    if (mca->p == 0) { 
        MCA_VERBOSE1(printf("------------------------------\n"));
        MCA_VERBOSE1(printf("-- MCA_Label_Rosenfeld_PAR1 --\n"));
        MCA_VERBOSE1(printf("------------------------------\n"));
    }
    
    int i0 = mca->i0;
    int i1 = mca->i1;
    int width = mca->width; 
    uint32 e0 = mca->e0;
    uint32 e1 = mca->e1;
    uint32 ne = e0 - 1;
    uint32 nr = 0;

    // local memory zones
    uint8 **  X = mca->X;
    uint32 ** E = mca->E;
    uint32 *  T = mca->T;

    if (mca->p == 0) {
        set_ui32vector_j(T, e0 - 1, e1); // car e0 = 1, on a besoin que T[0] = 0 pour FindRoot
        // @QM : maintenant que c'est testé partout, en a-t-on encore besoin ? A priori non (a tester)
    }
    else {
        set_ui32vector_j(T, e0, e1);
    }

    MCA_VERBOSE2(display_ui8matrix_positive(X, i0, i1, 0, width - 1, 5, "Xp"); printf("\n"));

    ne = line0Labeling_Rosenfeld(X, i0, width, E, T, ne);
    for (int i = i0 + 1; i <= i1; i++) {
        ne = lineLabeling_Rosenfeld(X, i, width, E, T, ne);
    }

    MCA_VERBOSE2(display_ui32matrix_positive(E, i0, i1, 0, width - 1, 5, "Ep"); printf("\n"));
    if (mca->p == 0) { 
        MCA_VERBOSE2(display_ui32vector_number(T, e0, ne, "%5d", "Tp_avant"));
    }

    // fermeture transitive sans pack
    solveTable_Range_Rosenfeld(T, e0, ne);
    nr = countTable_Range_Rosenfeld(T, e0, ne);
    mca->ne = ne; // Plus grande etiquette de l'intervalle [e0..e1]

    MCA_VERBOSE2(printf("p = %d : e = [%d..%d] -> ne = %d -> nr = %d\n", mca->p, e0, ne, (ne - e0 + 1), nr));
    if (mca->p == 0) { 
        MCA_VERBOSE2(display_ui32vector_number(T, e0, ne, "%5d", "Tp_apres"));
    }
}


// -------------------------------------
void MCA_Label_Rosenfeld_PYR2(MCA * mca)
// -------------------------------------
{
    // input
    int np = mca->mca->np;
    
    // variables
    int n = np;
    int nb_level = i32log2(np);
    if ((1 << nb_level) < np) {
        nb_level++; // correction pour traiter n non puissance de 2
    }

    if (mca->p == 0) {
        MCA_VERBOSE1(printf("------------------------------\n"));
        MCA_VERBOSE1(printf("-- MCA_Label_Rosenfeld_PYR2 --\n"));
        MCA_VERBOSE1(printf("------------------------------\n"));
    }
    
    // ------------------------------
    // -- pyramidal border merging --
    // ------------------------------
    
    // local variables
    int i = mca->i0;
    int width = mca->width;
    int alpha = mca->alpha;
    uint32 e0 = mca->e0;
    uint32 e1 = mca->ne;

    // local memory zones
    uint8 **  X = mca->X;
    uint32 ** E = mca->E;
    uint32 *  T = mca->T;
    uint32 ** D = mca->D;

    // @QM
    // en fait, c'est compliqué.
    // On pourrait optimiser en faisant faire un "break" aux procs qui n'ont plus jamais
    // à faire d'itération, mais le problème est alors qu'il faut utiliser des barrières avec
    // un nombre de procs à attendre différent à chaque fois, et qu'il faut les
    // initialiser => il faut précalculer toutes ces valeurs et avoir une alloc dynamique
    // du nombre de barrières.
    // De plus, le problème est décuplé si le nombre de lignes n'est pas une puissance de 2, car
    // dans ce cas certains threads ne doivent rien faire à une itération courante i,
    // mais doivent être actifs à i + 1 => encore plus dur de calculer le nombre
    // de threads à attendre à chaque barrière + surtout savoir s'il faut break ou continue
    for (int level = 1; level <= nb_level; level++) {
        if ((mca->p + (1 << (level - 1))) % (1 << level) == 0) {
            // thread actif
            //MCA_VERBOSE1(printf("### level = %d - p = %d\n", level, mca->p));
            borderMerging_Rosenfeld_Dist(X, i, width, E, T, D, alpha);  // en (i) et (i-1)
        }
        barrier_wait(&main_barrier);
    }
    

    // ---------------------------------
    // -- parallel transitive closure --
    // ---------------------------------
    
    for (uint32 e = e0; e <= e1; e++) {
        uint32 r = T[e]; // acces local
        if (r < e) {
            r = FindRoot_Dist(D, e, alpha); // acces distant
        }
        T[e] = r;
        MCA_VERBOSE2(printf("p%d : T[%d] <- %d\n", mca->p, e, r));
    }
}


// -------------------------------------
void MCA_Label_Rosenfeld_PAR3(MCA * mca)
// -------------------------------------
{
    // input
    if (mca->p == 0) {
        MCA_VERBOSE1(printf("------------------------------\n"));
        MCA_VERBOSE1(printf("-- MCA_Label_Rosenfeld_PAR3 --\n"));
        MCA_VERBOSE1(printf("------------------------------\n"));
    }
    
    int i0 = mca->i0;
    int i1 = mca->i1;
    int j0 = 0;
    int j1 = mca->width - 1;

    uint32 ** E = mca->E;
    uint32 * T = mca->T;

    for (int i = i0; i <= i1; i++) {
        for (int j = j0; j <= j1; j++) {
            uint32 e = E[i][j];
            if (e != 0) {
                E[i][j] = T[e];
            }
        }
    }
}


// =============================================================
__attribute__((constructor)) void MCA_Label_Rosenfeld(MCA * mca)
// =============================================================
{
    MCA_Scatter_ImageX(mca);
    barrier_wait(&main_barrier);

    MCA_Label_Rosenfeld_PAR1(mca);
    barrier_wait(&main_barrier);
    
    //MCA_Gather_ImageL(mca);
    //barrier_wait(&main_barrier);
    //MCA_VERBOSE2(display_ui32matrix_positive(mca->E, mca->i0, mca->i1, 0, mca->width - 1, 5, "E2"));
    //barrier_wait(&main_barrier);
    
    //MCA_Label_Rosenfeld_SEQ2(mca);
    MCA_Label_Rosenfeld_PYR2(mca);
    barrier_wait(&main_barrier);
    //MCA_VERBOSE2(display_ui32matrix_positive(mca->E, mca->i0, mca->i1, 0, mca->width - 1, 5, "EPYR"));
    //barrier_wait(&main_barrier);
    
    MCA_Label_Rosenfeld_PAR3(mca);
    barrier_wait(&main_barrier);

    MCA_Gather_ImageL(mca);
    barrier_wait(&main_barrier);
    //MCA_VERBOSE2(display_ui32matrix_positive(mca->E, mca->i0, mca->i1, 0, mca->width - 1, 5, "E3"));
    //barrier_wait(&main_barrier);
    
    if (mca->p != 0) {
        giet_pthread_exit(NULL);
    }
}

// Local Variables:
// tab-width: 4
// c-basic-offset: 4
// c-file-offsets:((innamespace . 0)(inline-open . 0))
// indent-tabs-mode: nil
// End:

// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4

