/* ----------------------- */
/* --- mca_rosenfeld.c --- */
/* ----------------------- */

/*
 * Copyright (c) 2016 Lionel Lacassagne, LIP6, UPMC, CNRS
 * Init  : 2016/03/03
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <assert.h>
#if PARMERGE
#include <pthread.h>
#endif

#include "nrc_os_config.h"
#include "config.h"
#include "nrc.h"

#if TARGET_OS == GIETVM
    #include <user_barrier.h>
    #include <user_lock.h>
    #include <giet_config.h>
#else
    #include <stdbool.h>
#endif


#include "util.h"
#include "ecc_common.h"
#include "palette.h"
#include "bmpNR.h"
#include "clock.h"
#include "str_ext.h"
#include "ecc_features.h"

// -----------
// -- local --
// -----------

#include "mca.h"

extern pthread_barrier_t main_barrier;
extern int display_features;

CLOCK_DEC;


// -----------------------------------------
static uint32 FindRoot(uint32 * T, uint32 e)
// -----------------------------------------
{
    uint32 r;
    
    assert(e != 0);
    r = e;
    while (T[r] < r) {
        r = T[r];
    }
    if (r == 0) {
        printf("e = %d\n",e);
        assert(0);
    }
    return r;
}


// ----------------------------------------------------------
static uint32 FindRoot_Dist(uint32 ** D, uint32 r, int shift)
// ----------------------------------------------------------
{
    uint32 e;
    uint32 e1;
    uint32 e0;

    assert(r != 0);
    
    int mask = (1 << shift) - 1;
    
    MCA_VERBOSE2(printf("%s(%d, %d) \n", __func__, r, shift));
    do {
        e  = r;
        e1 = r >> shift;
        e0 = r & mask;
        r = D[e1][e0];
        MCA_VERBOSE2(printf("%s: D(%d) = D[%d,%d] = %d (alpha = %d)\n", __func__, e, e1, e0, r, shift));
    } while (r < e);
    MCA_VERBOSE2(printf("%s = %d \n\n", __func__, r));
    assert(r != 0);
    return r;
}


#if !FEATURES
// --------------------------------------------------------------------------------
static void SetRoot_Rosenfeld_Dist(uint32 ** D, uint32 root, uint32 eps, int shift)
// --------------------------------------------------------------------------------
{
    int mask = (1 << shift) - 1;
    assert(root != 0 && eps != 0);
    
    uint32 r1 = root >> shift;
    uint32 r0 = root & mask;
    
    D[r1][r0] = eps;
}
#endif // !FEATURES


#if FEATURES && !PARMERGE
// ----------------------------------------------------------------------------------------------------
void SetRoot_Features_Rosenfeld_Dist(uint32 ** D, uint32 root, uint32 eps, int shift, RegionStats ** F)
// ----------------------------------------------------------------------------------------------------
{
    assert(root != 0 && eps != 0);

    MCA_VERBOSE2(printf("F(%d) += F(%d)\n", eps, root));
    
    int mask = (1 << shift) - 1;

    // SetRoot_Rosenfeld_Dist
    uint32 r1 = root >> shift;
    uint32 r0 = root & mask;
    
    D[r1][r0] = eps;
    
    uint32 e1 = eps >> shift;
    uint32 e0 = eps & mask;
    
    // version Dist de "RegionStats_Accumulate_Stats1_From_Index"
    
    // F(eps) = F(eps) U F(root)
    
    F[e1][e0].xmin = ui16min2(F[e1][e0].xmin, F[r1][r0].xmin);
    F[e1][e0].xmax = ui16max2(F[e1][e0].xmax, F[r1][r0].xmax);
    F[e1][e0].ymin = ui16min2(F[e1][e0].ymin, F[r1][r0].ymin);
    F[e1][e0].ymax = ui16max2(F[e1][e0].ymax, F[r1][r0].ymax);
    
    F[e1][e0].S  += F[r1][r0].S;
    F[e1][e0].Sx += F[r1][r0].Sx;
    F[e1][e0].Sy += F[r1][r0].Sy;
}
#endif // FEATURES && !PARMERGE


#if FEATURES && PARMERGE
// -------------------------------------------------------------------------------------------------------------
bool SetRoot_Parallel_Features_Rosenfeld_Dist(uint32 ** D, uint32 root, uint32 eps, int shift, RegionStats ** F)
// -------------------------------------------------------------------------------------------------------------
{
    assert(root != 0 && eps != 0);

    MCA_VERBOSE2(printf("F(%d) += F(%d)\n", eps, root));
    
    int mask = (1 << shift) - 1;

    // SetRoot_Rosenfeld_Dist
    uint32 r1 = root >> shift;
    uint32 r0 = root & mask;
    
    uint32 e1 = eps >> shift;
    uint32 e0 = eps & mask;

    // Locking towards the root (first root, then eps)
    pthread_spin_lock(&F[r1][r0].lock);
    pthread_spin_lock(&F[e1][e0].lock);
    // FIXME: merge these conditions later, when they both appear
    if (D[e1][e0] != eps) {
        // Someone change the root of epsilon, need to find the new root
        printf("race cond 1\n");
        pthread_spin_unlock(&F[e1][e0].lock);
        pthread_spin_unlock(&F[r1][r0].lock);
        return false;
    }
    if (D[r1][r0] != root) {
        // Someone change the root of epsilon, need to find the new root
        printf("race cond 2\n");
        pthread_spin_unlock(&F[e1][e0].lock);
        pthread_spin_unlock(&F[r1][r0].lock);
        return false;
    }

    D[r1][r0] = eps;
    
    // F(eps) = F(eps) U F(root)
    F[e1][e0].xmin = ui16min2(F[e1][e0].xmin, F[r1][r0].xmin);
    F[e1][e0].xmax = ui16max2(F[e1][e0].xmax, F[r1][r0].xmax);
    F[e1][e0].ymin = ui16min2(F[e1][e0].ymin, F[r1][r0].ymin);
    F[e1][e0].ymax = ui16max2(F[e1][e0].ymax, F[r1][r0].ymax);
    
    F[e1][e0].S  += F[r1][r0].S;
    F[e1][e0].Sx += F[r1][r0].Sx;
    F[e1][e0].Sy += F[r1][r0].Sy;

    pthread_spin_unlock(&F[e1][e0].lock);
    pthread_spin_unlock(&F[r1][r0].lock);
    return true;
}
#endif // FEATURES && PARMERGE



#if FAST
// --------------------------------------------------------
static uint32 QuickUnion2(uint32 * T, uint32 e1, uint32 e2)
// --------------------------------------------------------
{
    // version QU de Union2
    uint32 r1 = FindRoot(T, e1);
    uint32 r2 = FindRoot(T, e2);
    
    assert(e1 != 0 && e2 != 0 && r1 != 0 && r2 != 0);
    uint32 eps = ui32Min2(r1, r2);

    if (r1 > eps) {
        T[r1] = eps; // SetRoot sans besoin de remonter
    }
    if (r2 > eps) {
        T[r2] = eps; // SetRoot sans besoin de remonter
    }
    assert(e1 != 0 && e2 != 0 && r1 != 0 && r2 != 0);
    
    return eps;
}
#endif // FAST


#if FAST
// ---------------------------------------------------
static uint32 use1_QU_Rosenfeld(uint32 e1, uint32 * T)
// ---------------------------------------------------
{
    return FindRoot(T, e1);
}
#endif // FAST


#if FAST
// --------------------------------------------------------------
static uint32 use2_QU_Rosenfeld(uint32 e1, uint32 e2, uint32 * T)
// --------------------------------------------------------------
{
    return QuickUnion2(T, e1, e2);
}
#endif // FAST


#if FAST && !FEATURES
// ---------------------------------------------------------------------------------------
static void vuse2_Rosenfeld_Dist(uint32 ed, uint32 el, uint32 * T, uint32 ** D, int alpha)
// ---------------------------------------------------------------------------------------
{
    uint32 rd = FindRoot_Dist(D, ed, alpha);
    
    uint32 rl = T[el]; // car le premier acces est local
    rl = FindRoot_Dist(D, rl, alpha);
    
    assert(ed != 0 && el != 0 && rd != 0 && rl != 0);
    if (rd == rl) {
        return; // evite la backdoor
    }
    
    // forcement positifs car appel depuis optimizedBorder
    // qui a fait un test
    if (rd < rl) {
        SetRoot_Rosenfeld_Dist(D, rl, rd, alpha);
    }
    else {
        SetRoot_Rosenfeld_Dist(D, rd, rl, alpha);
    }
}
#endif // FAST && !FEATURES


#if FAST && !FEATURES
// -----------------------------------------------------------------------------------------------------
static void vuse3_Rosenfeld_Dist(uint32 ed1, uint32 ed2, uint32 el3, uint32 * T, uint32 ** D, int alpha)
// -----------------------------------------------------------------------------------------------------
{
    uint32 r1 = FindRoot_Dist(D, ed1, alpha);
    uint32 r2 = FindRoot_Dist(D, ed2, alpha);
    
    // QM
    //uint32 r3 = FindRoot(T, el3); // local - distant
    uint32 r3 = T[el3]; // local - distant
    r3 = FindRoot_Dist(D, r3, alpha);

    assert(ed1 != 0 && ed2 != 0 && el3 != 0 && r1 != 0 && r2 != 0 && r3 != 0);
    
    if (r1 == r2 && r2 == r3) {
        return;
    }
    
    uint32 eps = ui32Min3(r1, r2, r3);  // forcement positifs car appel depuis optimizedBorder qui a fait un test
    
    if (r1 > eps) {
        SetRoot_Rosenfeld_Dist(D, r1, eps, alpha);
    }
    //r2 = T[r2]; // @QM est-ce indispensable s'il n'y a pas de features ? (cf. slow no features)
    // comment est-on sur que r2 (ou r3) est local ???
    if (r2 > eps) {
        SetRoot_Rosenfeld_Dist(D, r2, eps, alpha);
    }
    //r3 = T[r3];
    if (r3 > eps) {
        SetRoot_Rosenfeld_Dist(D, r3, eps, alpha);
    }
}
#endif // FAST && !FEATURES


#if FAST && FEATURES && !PARMERGE
// -----------------------------------------------------------------------------------------------------------
void vuse2_Features_Rosenfeld_Dist(uint32 ed, uint32 el, uint32 * T, uint32 ** D, int alpha, RegionStats ** F)
// -----------------------------------------------------------------------------------------------------------
{
    assert(ed != 0 && el != 0);

    uint32 rd = FindRoot_Dist(D, ed, alpha);
    
    uint32 rl = T[el]; // car le premier acces est local
    assert(rl != 0);
    rl = FindRoot_Dist(D, rl, alpha);
    
    assert(rd != 0 && rl != 0);

    if (rd == rl) {
        return; // evite la backdoor
    }
    
    // forcement positifs car appel depuis optimizedBorder
    // qui a fait un test
    if (rd < rl) {
        SetRoot_Features_Rosenfeld_Dist(D, rl, rd, alpha, F);
    }
    else {
        SetRoot_Features_Rosenfeld_Dist(D, rd, rl, alpha, F);
    }
}
#endif // FAST && FEATURES && !PARMERGE


#if FAST && FEATURES && !PARMERGE
// -------------------------------------------------------------------------------------------------------------------------
void vuse3_Features_Rosenfeld_Dist(uint32 ed1, uint32 ed2, uint32 el3, uint32 * T, uint32 ** D, int alpha, RegionStats ** F)
// -------------------------------------------------------------------------------------------------------------------------
{
    assert(ed1 != 0 && ed2 != 0 && el3 != 0);

    uint32 r1 = FindRoot_Dist(D, ed1, alpha);
    uint32 r2 = FindRoot_Dist(D, ed2, alpha);
    
    //uint32 r3 = FindRoot(T, el3); // local - distant
    uint32 r3 = T[el3]; // local - distant
    assert(r3 != 0);
    r3 = FindRoot_Dist(D, r3, alpha);
    
    assert(r1 != 0 && r2 != 0 && r3 != 0);

    if (r1 == r2 && r2 == r3) {
        return;
    }
    
    uint32 eps = ui32Min3(r1, r2, r3);  // forcement positifs car appel depuis optimizedBorder qui a fait un test
    
    if (r1 > eps) {
        SetRoot_Features_Rosenfeld_Dist(D, r1, eps, alpha, F);
    }
    //r2 = T[r2];
    if (r2 > eps && r2 != r1) {
        SetRoot_Features_Rosenfeld_Dist(D, r2, eps, alpha, F);
    }
    //r3 = T[r3];
    if (r3 > eps && r3 != r2 && r3 != r1) {
        SetRoot_Features_Rosenfeld_Dist(D, r3, eps, alpha, F);
    }
}
#endif // FAST && FEATURES && !PARMERGE


#if FAST && FEATURES && PARMERGE
// --------------------------------------------------------------------------------------------------------------------
void vuse2_Parallel_Features_Rosenfeld_Dist(uint32 ed, uint32 el, uint32 * T, uint32 ** D, int alpha, RegionStats ** F)
// --------------------------------------------------------------------------------------------------------------------
{
    bool ok;
    assert(ed != 0 && el != 0);
    uint32 rl = T[el]; // car le premier acces est local
    assert(rl != 0);

    uint32 rd;
    
    do {
        rd = FindRoot_Dist(D, ed, alpha); // no lock
        rl = FindRoot_Dist(D, rl, alpha);

        assert(rd != 0 && rl != 0);

        if (rd == rl) {
            return; // evite la backdoor
        }

        // forcement positifs car appel depuis optimizedBorder
        // qui a fait un test
        if (rd < rl) {
            ok = SetRoot_Parallel_Features_Rosenfeld_Dist(D, rl, rd, alpha, F);
        }
        else {
            ok = SetRoot_Parallel_Features_Rosenfeld_Dist(D, rd, rl, alpha, F);
        }
    } while (!ok);
}
#endif // FAST && FEATURES && PARMERGE


#if FAST && FEATURES && PARMERGE
// ----------------------------------------------------------------------------------------------------------------------------------
void vuse3_Parallel_Features_Rosenfeld_Dist(uint32 ed1, uint32 ed2, uint32 el3, uint32 * T, uint32 ** D, int alpha, RegionStats ** F)
// ----------------------------------------------------------------------------------------------------------------------------------
{
    bool ok1, ok2, ok3;
    assert(ed1 != 0 && ed2 != 0 && el3 != 0);

    uint32 r1;
    uint32 r2;
    uint32 r3 = T[el3]; // local - distant
    assert(r3 != 0);

    do {
        r1 = FindRoot_Dist(D, ed1, alpha);
        r2 = FindRoot_Dist(D, ed2, alpha);
        r3 = FindRoot_Dist(D, r3, alpha);
    
        assert(r1 != 0 && r2 != 0 && r3 != 0);

        if (r1 == r2 && r2 == r3) {
            return;
        }
    
        uint32 eps = ui32Min3(r1, r2, r3);  // forcement positifs car appel depuis optimizedBorder qui a fait un test
    
        ok1 = true;
        ok2 = true;
        ok3 = true;
        if (r1 > eps) {
            ok1 = SetRoot_Parallel_Features_Rosenfeld_Dist(D, r1, eps, alpha, F);
        }
        if (r2 > eps && r2 != r1) {
            ok2 = SetRoot_Parallel_Features_Rosenfeld_Dist(D, r2, eps, alpha, F);
        }
        if (r3 > eps && r3 != r2 && r3 != r1) {
            ok3 = SetRoot_Parallel_Features_Rosenfeld_Dist(D, r3, eps, alpha, F);
        }
    } while (!(ok1 && ok2 && ok3));
}
#endif // FAST && FEATURES && PARMERGE




#if FAST && !FEATURES
// ------------------------------------------------------------------------------------------------------
static void optimizedBorder_Rosenfeld_Dist(uint32 ** E, int i, int j, uint32 * T, uint32 ** D, int alpha)
// ------------------------------------------------------------------------------------------------------
{
    uint32 a, b, c, x;
    
    x = E[i][j];
    if (x) {
        b = E[i - 1][j];
        if (b) {
            vuse2_Rosenfeld_Dist(b, x, T, D, alpha); // dist, local
        }
        else {
            c = E[i - 1][j + 1];
            if (c) {
                a = E[i - 1][j - 1];
                if (a) {
                    vuse3_Rosenfeld_Dist(a, c, x, T, D, alpha); // dist, local
                }
                else {
                    vuse2_Rosenfeld_Dist(c, x, T, D, alpha); // dist, local
                }
            }
            else {
                a = E[i - 1][j - 1];
                if (a) {
                    vuse2_Rosenfeld_Dist(a, x, T, D, alpha); // dist, local
                }
            }
        }
    }
}
#endif // FAST && !FEATURES


#if FAST && !FEATURES
// ---------------------------------------------------------------------------------------------------
static void optimizedBorderLeft_Rosenfeld_Dist(uint32 ** E, int i, int j, uint32 * T, uint32 ** D, int alpha)
// ---------------------------------------------------------------------------------------------------
{
    uint32 x = E[i][j];
    if (x) {
        uint32 b = E[i - 1][j];
        if (b) {
            vuse2_Rosenfeld_Dist(b, x, T, D, alpha); // dist, local
        }
        else {
            uint32 c = E[i - 1][j + 1];
            if (c) {
                vuse2_Rosenfeld_Dist(c, x, T, D, alpha); // dist, local
            }
        }
    }
}
#endif // FAST && !FEATURES


#if FAST && !FEATURES
// -----------------------------------------------------------------------------------------------------------
static void optimizedBorderRight_Rosenfeld_Dist(uint32 ** E, int i, int j, uint32 * T, uint32 ** D, int alpha)
// -----------------------------------------------------------------------------------------------------------
{
    // copie de optimizedBorder_Rosenfeld
    // test d'existance de ex en local local

    uint32 b = E[i - 1][j];
    uint32 x = E[i][j];
    
    if (x) {
        if (b) {
            vuse2_Rosenfeld_Dist(b, x, T, D, alpha); // dist, local
        }
        else {
            uint32 a = E[i - 1][j - 1];
            if (a) {
                vuse2_Rosenfeld_Dist(a, x, T, D, alpha); // dist, local
            }
        }
    }
}
#endif // FAST && !FEATURES


#if FAST && !FEATURES
// ------------------------------------------------------------------------------------------------------------------------
static void borderMerging_Fast_Rosenfeld_Dist(uint8 **X, int i, int width, uint32 ** E, uint32 * T, uint32 ** D, int alpha)
// ------------------------------------------------------------------------------------------------------------------------
{
    // Prologue
    optimizedBorderLeft_Rosenfeld_Dist(E, i, 0, T, D, alpha);
    // Boucle principale
    for (int j = 1; j < width - 1; j++) {
        optimizedBorder_Rosenfeld_Dist(E, i, j, T, D, alpha);
    }
    // Epilogue
    optimizedBorderRight_Rosenfeld_Dist(E, i, width - 1, T, D, alpha);
}
#endif // FAST && !FEATURES


#if SLOW && !FEATURES
// -------------------------------------------------------------------------------------------------------------------------
static void borderMerging_Slow_Rosenfeld_Dist(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ** D, int alpha)
// -------------------------------------------------------------------------------------------------------------------------
{
    int j;
    
    uint32 eps;
    uint32 e1, e2, e3, ex;
    uint32 r1, r2, r3, rx;
    
    // --------------
    // -- prologue --
    // --------------
    MCA_VERBOSE2(printf("[%s] i = %d\n", __func__, i));
    
    j = 0;
    ex = E[i][j];
    
    if (ex) {
        
        MCA_VERBOSE2(printf("[%s] j = %d\n", __func__, j));
        
        e2 = E[i - 1][j];
        e3 = E[i - 1][j + 1];

        // test pour eviter acces distant
        r2 = e2 ? FindRoot_Dist(D, e2, alpha) : 0;
        r3 = e3 ? FindRoot_Dist(D, e3, alpha) : 0;

        rx = T[ex];
        rx = FindRoot_Dist(D, rx, alpha);
 
        MCA_VERBOSE2(printf("\n"));
        MCA_VERBOSE2(printf("e2 = %4d -> %4d\n", e2, r2));
        MCA_VERBOSE2(printf("e3 = %4d -> %4d\n", e3, r3));
        MCA_VERBOSE2(printf("ex = %4d -> %4d\n", ex, rx));
        
        eps = ui32MinNonNul3(r2, r3, rx);
        
        // Quick-Union
        if (r2 > eps) {
            SetRoot_Rosenfeld_Dist(D, r2, eps, alpha);
            MCA_VERBOSE2(printf("D[%4d] <- %d\n", r2, eps));
        }
        if (r3 > eps) {
            SetRoot_Rosenfeld_Dist(D, r3, eps, alpha);
            MCA_VERBOSE2(printf("D[%4d] <- %d\n", r3, eps));
        }
        if (rx > eps) {
            SetRoot_Rosenfeld_Dist(D, rx, eps, alpha);
            MCA_VERBOSE2(printf("D[%4d] <- %d\n", rx, eps));
        }
        MCA_VERBOSE2(printf("\n"));
    }
    
    // -----------------------
    // -- boucle principale --
    // -----------------------
    
    for (j = 0 + 1; j < width - 1; j++) {
    
        ex = E[i][j];
        
        // que le cas general (pour faire un code simple)
        if (ex) {
            MCA_VERBOSE2(printf("[%s] j = %d\n", __func__, j));
            
            e1 = E[i - 1][j - 1];
            e2 = E[i - 1][j];
            e3 = E[i - 1][j + 1];
        
            // test pour eviter acces distant
            r1 = e1 ? FindRoot_Dist(D, e1, alpha) : 0;
            r2 = e2 ? FindRoot_Dist(D, e2, alpha) : 0;
            r3 = e3 ? FindRoot_Dist(D, e3, alpha) : 0;

            rx = T[ex];
            rx = FindRoot_Dist(D, rx, alpha);

            MCA_VERBOSE2(printf("\n"));
            MCA_VERBOSE2(printf("e1 = %4d -> %4d\n", e1, r1));
            MCA_VERBOSE2(printf("e2 = %4d -> %4d\n", e2, r2));
            MCA_VERBOSE2(printf("e3 = %4d -> %4d\n", e3, r3));
            MCA_VERBOSE2(printf("ex = %4d -> %4d\n", ex, rx));
            
            eps = ui32MinNonNul4(r1, r2, r3, rx);
            
            // Quick-Union
            if (r1 > eps) {
                SetRoot_Rosenfeld_Dist(D, r1, eps, alpha);
                MCA_VERBOSE2(printf("D[%4d] <- %d\n", r1, eps));
            }
            if (r2 > eps) {
                SetRoot_Rosenfeld_Dist(D, r2, eps, alpha);
                MCA_VERBOSE2(printf("D[%4d] <- %d\n", r2, eps));
            }
            if (r3 > eps) {
                SetRoot_Rosenfeld_Dist(D, r3, eps, alpha);
                MCA_VERBOSE2(printf("D[%4d] <- %d\n", r3, eps));
            }
            if (rx > eps) {
                SetRoot_Rosenfeld_Dist(D, rx, eps, alpha);
                MCA_VERBOSE2(printf("D[%4d] <- %d\n", rx, eps));
            }
            MCA_VERBOSE2(printf("\n"));
            // attention SetRoot fait un while inutile
        }
    }
    
    // --------------
    // -- epilogue --
    // --------------
    
    j = width - 1;
    ex = E[i][j];
    
    if (ex) {
        
        MCA_VERBOSE2(printf("[%s] j = %d\n", __func__, j));
        
        e1 = E[i - 1][j - 1];
        e2 = E[i - 1][j];

        // test pour eviter acces distant
        r1 = e1 ? FindRoot_Dist(D, e1, alpha) : 0;
        r2 = e2 ? FindRoot_Dist(D, e2, alpha) : 0;

        rx = T[ex];
        rx = FindRoot_Dist(D, rx, alpha);

        MCA_VERBOSE2(printf("\n"));
        MCA_VERBOSE2(printf("e1 = %4d -> %4d\n", e1, r1));
        MCA_VERBOSE2(printf("e2 = %4d -> %4d\n", e2, r2));
        MCA_VERBOSE2(printf("ex = %4d -> %4d\n", ex, rx));
        
        eps = ui32MinNonNul3(r1, r2, rx);
        
        // Quick-Union
        if (r1 > eps) {
            SetRoot_Rosenfeld_Dist(D, r1, eps, alpha);
            MCA_VERBOSE2(printf("D[%4d] <- %d\n", r1, eps));
        }
        if (r2 > eps) {
            SetRoot_Rosenfeld_Dist(D, r2, eps, alpha);
            MCA_VERBOSE2(printf("D[%4d] <- %d\n", r2, eps));
        }
        if (rx > eps) {
            SetRoot_Rosenfeld_Dist(D, rx, eps, alpha);
            MCA_VERBOSE2(printf("D[%4d] <- %d\n", rx, eps));
        }
        MCA_VERBOSE2(printf("\n"));
    }
    return;
}
#endif // SLOW && !FEATURES


#if SLOW && FEATURES
// ----------------------------------------------------------------------------------------------------------------------------------------------------
static void borderMerging_Slow_Features_Rosenfeld_Dist(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ** D, int alpha, RegionStats ** F)
// ----------------------------------------------------------------------------------------------------------------------------------------------------
{
    int j = 0;
    
    uint32 eps;
    
    uint32 e1, e2, e3, ex;
    uint32 r1, r2, r3, rx;
    
    // --------------
    // -- prologue --
    // --------------
    MCA_VERBOSE2(printf("[%s] i = %d\n", __func__, i));
    
    ex = E[i][j];
    
    if (ex) {
        
        MCA_VERBOSE2(printf("[%s] j = %d\n", __func__, j));
        
        e2 = E[i - 1][j];
        e3 = E[i - 1][j + 1];
        
        if (e2 || e3) {
        
            // test pour eviter acces distant
            r2 = e2 ? FindRoot_Dist(D, e2, alpha) : 0;
            r3 = e3 ? FindRoot_Dist(D, e3, alpha) : 0;

            rx = T[ex];
            rx = FindRoot_Dist(D, rx, alpha);
            
            eps = ui32MinNonNul3(r2, r3, rx);
            
            MCA_VERBOSE2(printf("\n"));
            MCA_VERBOSE2(printf("e2  = %5d -> r2 = %5d\n", e2, r2));
            MCA_VERBOSE2(printf("e3  = %5d -> r3 = %5d\n", e3, r3));
            MCA_VERBOSE2(printf("ex  = %5d -> rx = %5d\n", ex, rx));
            MCA_VERBOSE2(printf("eps = %5d\n", eps));
            
            // Quick-Union
            // @QM
            if (r2 > eps) {
                SetRoot_Features_Rosenfeld_Dist(D, r2, eps, alpha, F);
                MCA_VERBOSE2(printf("D[%5d] <- %d\n", r2, eps));
            }
            if (r3 > 0) {
                r3 = FindRoot_Dist(D, r3, alpha);
            }
            // Pour le cas où r2 == r3, il ne faut pas ajouter deux fois les features
            //if (r3 > eps && r3 != r2) {
            if (r3 > eps) {
                SetRoot_Features_Rosenfeld_Dist(D, r3, eps, alpha, F);
                MCA_VERBOSE2(printf("D[%5d] <- %d\n", r3, eps));
            }
            rx = FindRoot_Dist(D, rx, alpha);
            //if (rx > eps && rx != r3 && rx != r2) {
            if (rx > eps) {
                SetRoot_Features_Rosenfeld_Dist(D, rx, eps, alpha, F);
                MCA_VERBOSE2(printf("D[%5d] <- %d\n", rx, eps));
            }
            MCA_VERBOSE2(printf("---------------------------\n"));
        }
    }
    
    // -----------------------
    // -- boucle principale --
    // -----------------------
    
    for (j = 0 + 1; j < width - 1; j++) {
        
        ex = E[i][j];
        
        if (ex) {
            
            MCA_VERBOSE2(printf("[%s] j = %d\n", __func__, j));
            
            e1 = E[i - 1][j - 1];
            e2 = E[i - 1][j];
            e3 = E[i - 1][j + 1];
            
            if (e1 || e2 || e3) {
                // test pour eviter un acces distant
                r1 = e1 ? FindRoot_Dist(D, e1, alpha) : 0;
                r2 = e2 ? FindRoot_Dist(D, e2, alpha) : 0;
                r3 = e3 ? FindRoot_Dist(D, e3, alpha) : 0;

                rx = T[ex];
                rx = FindRoot_Dist(D, rx, alpha);
                
                eps = ui32MinNonNul4(r1, r2, r3, rx);

                MCA_VERBOSE2(printf("\n"));
                MCA_VERBOSE2(printf("e1  = %5d -> r1 = %5d\n", e1, r1));
                MCA_VERBOSE2(printf("e2  = %5d -> r2 = %5d\n", e2, r2));
                MCA_VERBOSE2(printf("e3  = %5d -> r3 = %5d\n", e3, r3));
                MCA_VERBOSE2(printf("ex  = %5d -> rx = %5d\n", ex, rx));
                MCA_VERBOSE2(printf("eps = %5d\n", eps));
                
                // Quick-Union
                // @QM
                if (r1 > eps) {
                    SetRoot_Features_Rosenfeld_Dist(D, r1, eps, alpha, F);
                    MCA_VERBOSE2(printf("D[%5d] <- %d\n", r1, eps));
                }
                if (r2 > 0) {
                    r2 = FindRoot_Dist(D, r2, alpha);
                }
                //if (r2 > eps && r2 != r1) {
                if (r2 > eps) {
                    SetRoot_Features_Rosenfeld_Dist(D, r2, eps, alpha, F);
                    MCA_VERBOSE2(printf("D[%5d] <- %d\n", r2, eps));
                }
                if (r3 > 0) {
                    r3 = FindRoot_Dist(D, r3, alpha);
                }
                //if (r3 > eps && r3 != r2 && r3 != r1) {
                if (r3 > eps) {
                    SetRoot_Features_Rosenfeld_Dist(D, r3, eps, alpha, F);
                    MCA_VERBOSE2(printf("D[%5d] <- %d\n", r3, eps));
                }
                rx = FindRoot_Dist(D, rx, alpha);
                //if (rx > eps && rx != r3 && rx != r2 && rx != r1) {
                if (rx > eps) {
                    SetRoot_Features_Rosenfeld_Dist(D, rx, eps, alpha, F);
                    MCA_VERBOSE2(printf("D[%5d] <- %d\n", rx, eps));
                }
                MCA_VERBOSE2(puts("---------------------------\n"));
                
                // attention SetRoot fait un while inutile
            }
        }
    }
    
    // --------------
    // -- epilogue --
    // --------------
    
    j = width - 1;
    ex = E[i][j];
    
    if (ex) {
        
        MCA_VERBOSE2(printf("[%s] j = %d\n", __func__, j));
        
        e1 = E[i - 1][j - 1];
        e2 = E[i - 1][j];
        
        if (e1 || e2) {
        
            // test pour eviter acces distant
            r1 = e1 ? FindRoot_Dist(D, e1, alpha) : 0;
            r2 = e2 ? FindRoot_Dist(D, e2, alpha) : 0;

            rx = T[ex];
            rx = FindRoot_Dist(D, rx, alpha);
            
            eps = ui32MinNonNul3(r1, r2, rx);
            
            MCA_VERBOSE2(printf("\n"));
            MCA_VERBOSE2(printf("e1  = %5d -> r1 = %5d\n", e1, r1));
            MCA_VERBOSE2(printf("e2  = %5d -> r2 = %5d\n", e2, r2));
            MCA_VERBOSE2(printf("ex  = %5d -> rx = %5d\n", ex, rx));
            MCA_VERBOSE2(printf("eps = %5d\n", eps));
            
            // Quick-Union
            if (r1 > eps) {
                SetRoot_Features_Rosenfeld_Dist(D, r1, eps, alpha, F);
                MCA_VERBOSE2(printf("D[%5d] <- %d\n", r1, eps));
            }
            if (r2 > 0) {
                r2 = FindRoot_Dist(D, r2, alpha);
            }
            //if (r2 > eps && r2 != r1) {
            if (r2 > eps) {
                SetRoot_Features_Rosenfeld_Dist(D, r2, eps, alpha, F);
                MCA_VERBOSE2(printf("D[%5d] <- %d\n", r2, eps));
            }
            rx = FindRoot_Dist(D, rx, alpha);
            //if (rx > eps && rx != r2 && rx != r1) {
            if (rx > eps) {
                SetRoot_Features_Rosenfeld_Dist(D, rx, eps, alpha, F);
                MCA_VERBOSE2(printf("D[%5d] <- %d\n", rx, eps));
            }
            MCA_VERBOSE2(printf("---------------------------\n"));
        }
    }
    return;
}
#endif // SLOW && FEATURES


#if FAST && FEATURES && !PARMERGE
// --------------------------------------------------------------------------------------------------------------------------
void optimizedBorder_Features_Rosenfeld_Dist(uint32 ** E, int i, int j, uint32 * T, uint32 ** D, int alpha, RegionStats ** F)
// --------------------------------------------------------------------------------------------------------------------------
{
    // copie de optimizedBorder_Rosenfeld
    uint32 a, b, c, x;
    
    x = E[i][j];
    
    if (x) {
        b = E[i - 1][j];
        if (b) {
            vuse2_Features_Rosenfeld_Dist(b, x, T, D, alpha, F); // dist, local
        }
        else {
            c = E[i - 1][j + 1];
            if (c) {
                a = E[i - 1][j - 1];
                if (a) {
                    vuse3_Features_Rosenfeld_Dist(a, c, x, T, D, alpha, F); // dist, local
                }
                else {
                    vuse2_Features_Rosenfeld_Dist(c, x, T, D, alpha, F); // dist, local
                }
            }
            else {
                a = E[i - 1][j - 1];
                if (a) {
                    vuse2_Features_Rosenfeld_Dist(a, x, T, D, alpha, F); // dist, local
                }
            }
        }
    }
}
#endif // FAST && FEATURES && !PARMERGE


#if FAST && FEATURES && !PARMERGE
// ------------------------------------------------------------------------------------------------------------------------------
void optimizedBorderLeft_Features_Rosenfeld_Dist(uint32 ** E, int i, int j, uint32 * T, uint32 ** D, int alpha, RegionStats ** F)
// ------------------------------------------------------------------------------------------------------------------------------
{
    uint32 x = E[i][j];
    
    if (x) {
        uint32 b = E[i - 1][j];
        if (b) {
            vuse2_Features_Rosenfeld_Dist(b, x, T, D, alpha, F); // dist, local
        }
        else {
            uint32 c = E[i - 1][j + 1];
            if (c) {
                vuse2_Features_Rosenfeld_Dist(c, x, T, D, alpha, F); // dist, local
            }
        }
    }
}
#endif // FAST && FEATURES && !PARMERGE


#if FAST && FEATURES && !PARMERGE
// -------------------------------------------------------------------------------------------------------------------------------
void optimizedBorderRight_Features_Rosenfeld_Dist(uint32 ** E, int i, int j, uint32 * T, uint32 ** D, int alpha, RegionStats ** F)
// -------------------------------------------------------------------------------------------------------------------------------
{
    // copie de optimizedBorder_Rosenfeld
    // test d'existance de ex en local local
    
    uint32 x = E[i][j];
    
    if (x) {
        uint32 b = E[i - 1][j];
        if (b) {
            vuse2_Features_Rosenfeld_Dist(b, x, T, D, alpha, F); // dist, local
        }
        else {
            uint32 a = E[i - 1][j - 1];
            if (a) {
                vuse2_Features_Rosenfeld_Dist(a, x, T, D, alpha, F); // dist, local
            }
        }
    }
}
#endif // FAST && FEATURES && !PARMERGE


#if FAST && FEATURES && PARMERGE
// -----------------------------------------------------------------------------------------------------------------------------------
void optimizedBorder_Parallel_Features_Rosenfeld_Dist(uint32 ** E, int i, int j, uint32 * T, uint32 ** D, int alpha, RegionStats ** F)
// -----------------------------------------------------------------------------------------------------------------------------------
{
    // copie de optimizedBorder_Rosenfeld
    uint32 a, b, c, x;
    
    x = E[i][j];
    
    if (x) {
        b = E[i - 1][j];
        if (b) {
            vuse2_Parallel_Features_Rosenfeld_Dist(b, x, T, D, alpha, F); // dist, local
        }
        else {
            c = E[i - 1][j + 1];
            if (c) {
                a = E[i - 1][j - 1];
                if (a) {
                    vuse3_Parallel_Features_Rosenfeld_Dist(a, c, x, T, D, alpha, F); // dist, local
                }
                else {
                    vuse2_Parallel_Features_Rosenfeld_Dist(c, x, T, D, alpha, F); // dist, local
                }
            }
            else {
                a = E[i - 1][j - 1];
                if (a) {
                    vuse2_Parallel_Features_Rosenfeld_Dist(a, x, T, D, alpha, F); // dist, local
                }
            }
        }
    }
}
#endif // FAST && FEATURES && PARMERGE


#if FAST && FEATURES && PARMERGE
// ---------------------------------------------------------------------------------------------------------------------------------------
void optimizedBorderLeft_Parallel_Features_Rosenfeld_Dist(uint32 ** E, int i, int j, uint32 * T, uint32 ** D, int alpha, RegionStats ** F)
// ---------------------------------------------------------------------------------------------------------------------------------------
{
    uint32 x = E[i][j];
    
    if (x) {
        uint32 b = E[i - 1][j];
        if (b) {
            vuse2_Parallel_Features_Rosenfeld_Dist(b, x, T, D, alpha, F); // dist, local
        }
        else {
            uint32 c = E[i - 1][j + 1];
            if (c) {
                vuse2_Parallel_Features_Rosenfeld_Dist(c, x, T, D, alpha, F); // dist, local
            }
        }
    }
}
#endif // FAST && FEATURES && PARMERGE


#if FAST && FEATURES && PARMERGE
// ----------------------------------------------------------------------------------------------------------------------------------------
void optimizedBorderRight_Parallel_Features_Rosenfeld_Dist(uint32 ** E, int i, int j, uint32 * T, uint32 ** D, int alpha, RegionStats ** F)
// ----------------------------------------------------------------------------------------------------------------------------------------
{
    // copie de optimizedBorder_Rosenfeld
    // test d'existance de ex en local local
    
    uint32 x = E[i][j];
    
    if (x) {
        uint32 b = E[i - 1][j];
        if (b) {
            vuse2_Parallel_Features_Rosenfeld_Dist(b, x, T, D, alpha, F); // dist, local
        }
        else {
            uint32 a = E[i - 1][j - 1];
            if (a) {
                vuse2_Parallel_Features_Rosenfeld_Dist(a, x, T, D, alpha, F); // dist, local
            }
        }
    }
}
#endif // FAST && FEATURES && PARMERGE


#if FAST && FEATURES
// ---------------------------------------------------------------------------------------------------------------------------------------------
void borderMerging_Fast_Features_Rosenfeld_Dist(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ** D, int alpha, RegionStats ** F)
// ---------------------------------------------------------------------------------------------------------------------------------------------
{
    MCA_VERBOSE2(printf("[%s]", __func__));
    
#if PARMERGE
    optimizedBorderLeft_Parallel_Features_Rosenfeld_Dist(E, i, 0, T, D, alpha, F);
#else
    optimizedBorderLeft_Features_Rosenfeld_Dist(E, i, 0, T, D, alpha, F);
#endif
    
    for (int j = 1; j < width - 1; j++) {
#if PARMERGE
        optimizedBorder_Parallel_Features_Rosenfeld_Dist(E, i, j, T, D, alpha, F);
#else
        optimizedBorder_Features_Rosenfeld_Dist(E, i, j, T, D, alpha, F);
#endif
    }
    
#if PARMERGE
    optimizedBorderRight_Parallel_Features_Rosenfeld_Dist(E, i, width - 1, T, D, alpha, F);
#else
    optimizedBorderRight_Features_Rosenfeld_Dist(E, i, width - 1, T, D, alpha, F);
#endif
}
#endif // FAST && FEATURES


#if !FEATURES
// --------------------------------------------------------------------------------------------------------------------
static void borderMerging_Rosenfeld_Dist(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ** D, int alpha)
// --------------------------------------------------------------------------------------------------------------------
{
#if SLOW
    borderMerging_Slow_Rosenfeld_Dist(X, i, width, E, T, D, alpha);
#elif FAST
    borderMerging_Fast_Rosenfeld_Dist(X, i, width, E, T, D, alpha);
#else
#error "Please define SLOW or FAST for the Rosenfeld version"
#endif
}
#endif // !FEATURES


#if FEATURES
// -----------------------------------------------------------------------------------------------------------------------------------------------
static void borderMerging_Features_Rosenfeld_Dist(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ** D, int alpha, RegionStats ** F)
// -----------------------------------------------------------------------------------------------------------------------------------------------
{
#if SLOW
    borderMerging_Slow_Features_Rosenfeld_Dist(X, i, width, E, T, D, alpha, F);
#elif FAST
    borderMerging_Fast_Features_Rosenfeld_Dist(X, i, width, E, T, D, alpha, F);
#else
#error "Please define SLOW or FAST for the Rosenfeld version"
#endif
}
#endif // FEATURES


// ----------------------------------------------------------------------------------------------------
static uint32 line0Labeling_Rosenfeld(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ne)
// ----------------------------------------------------------------------------------------------------
{
    int j;
    uint8 x;
    uint32 e4;
    uint32 r4;
    
    // prologue : j = 0
    x = X[i][0];
    if (x) {
        E[i][0] = ++ne;
    }
    else {
        E[i][0] = 0;
    }
    
    // boucle et epilogue j = [1..width-1]
    for (j = 1; j <= width - 1; j++) {
        x = X[i][j];
        if (x)  {
            e4 = E[i][j - 1];
            
            if (e4 == 0) {
                E[i][j] = ++ne;
            }
            else {
                E[i][j] = e4;
            }
        }
        else {
            E[i][j] = 0;
        }
    }
    return ne;
}


#if SLOW
// --------------------------------------------------------------------------------------------------------
static uint32 lineLabeling_Slow_Rosenfeld(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ne)
// --------------------------------------------------------------------------------------------------------
{
    // version lineLabeling_Rosenfeld_UF_QU_8C avec Quick-Union
    
    int j;
    
    uint8 x;
    uint32 e;
    uint32 e1, e2, e3, e4;
    uint32 r1, r2, r3, r4;
    
    // --------------
    // -- prologue --
    // --------------
    
    j = 0;
    x = X[i][j];
    
    if (x) {
        
        e2 = E[i - 1][j];
        e3 = E[i - 1][j + 1];
        
        // nouvel element
        if (e2 == 0 && e3 == 0) {
            e = ++ne;
            E[i][j] = e;
        }
        else {
            // etiquettes identiques
            if (e2 == e3) {
                e = e2;
                E[i][j] = e; 
            }
            else {    
                // cas general
                r2 = (e2 == 0) ? 0 : FindRoot(T, e2);
                r3 = (e3 == 0) ? 0 : FindRoot(T, e3);
                
                e = ui32MinNonNul2(r2, r3);
                
                // Quick-Union
                if (r2 > e) {
                    T[r2] = e;
                }
                if (r3 > e) {
                    T[r3] = e;
                }
                E[i][j] = e;
            }
        }
    }
    else {
        E[i][j] = 0;
    } // x
    
    // -----------------------
    // -- boucle principale --
    // -----------------------
    
    for (j = 0 + 1; j < width - 1; j++) {
        
        x = X[i][j];
        
        if (x)  {
            e1 = E[i - 1][j - 1];
            e2 = E[i - 1][j];
            e3 = E[i - 1][j + 1];
            e4 = E[i][j - 1];
            
            // nouvel element
            if (e1 == 0 && e2 == 0 && e3 == 0 && e4 == 0) {
                e = ++ne;
                E[i][j] = e;
            }
            else {
                // etiquettes identiques
                if (e1 == e2 && e1 == e3 && e1 == e4) {
                    e = e1;
                    E[i][j] = e;
                }
                else {
                    // cas general
                    r1 = (e1 == 0) ? 0 : FindRoot(T, e1);
                    r2 = (e2 == 0) ? 0 : FindRoot(T, e2);
                    r3 = (e3 == 0) ? 0 : FindRoot(T, e3);
                    r4 = (e4 == 0) ? 0 : FindRoot(T, e4);
                    
                    e = ui32MinNonNul4(r1, r2, r3, r4);
                    
                    // Quick-Union
                    if (r1 > e) {
                        T[r1] = e;
                    }
                    if (r2 > e) {
                        T[r2] = e;
                    }
                    if (r3 > e) {
                        T[r3] = e;
                    }
                    if (r4 > e) {
                        T[r4] = e;
                    }
                    E[i][j] = e;
                }
            }
        }
        else {
            E[i][j] = 0;
        } // x
    } // j
    
    // --------------
    // -- epilogue --
    // --------------
    j = width - 1;
    x = X[i][j];
    
    if (x) {
        e1 = E[i - 1][j - 1];
        e2 = E[i - 1][j];
        e4 = E[i][j - 1];
        
        // nouvel element
        if (e1 == 0 && e2 == 0 && e4 == 0) {
            e = ++ne;
            E[i][j] = e;
        }
        else {
            // etiquettes identiques
            if (e1 == e2 && e1 == e4) {
                e = e1;
                E[i][j] = e;
            }
            else {
                // cas general
                r1 = (e1 == 0) ? 0 : FindRoot(T, e1);
                r2 = (e2 == 0) ? 0 : FindRoot(T, e2);
                r4 = (e4 == 0) ? 0 : FindRoot(T, e4);
                
                e = ui32MinNonNul3(r1, r2, r4);
                
                // Quick-Union
                if (r1 > e) {
                    T[r1] = e;
                }
                if (r2 > e) {
                    T[r2] = e;
                }
                if (r4 > e) {
                    T[r4] = e;
                }
                E[i][j] = e;
            }
        }
    }
    else {
        E[i][j] = 0;
    } // x
    
    return ne;
}
#endif // SLOW


#if FAST
// ---------------------------------------------------------------------------------------------
static uint32 optimizedAccessLeft_DT_Rosenfeld(uint32 ** E, int i, int j, uint32 * T, uint32 ne)
// ---------------------------------------------------------------------------------------------
{
    // Decision Tree 8-connexe avec Quick-Union
    uint32 b, c, e;
    
    b = E[i - 1][j];
    if (b) {
        e = use1_QU_Rosenfeld(b, T);
    }
    else {
        c = E[i - 1][j + 1];
        if (c) {
            e = use1_QU_Rosenfeld(c, T);
        }
        else {
            e = ++ne;
        }
    }
    E[i][j] = e;
    return ne;
}
#endif // FAST


#if FAST
// ----------------------------------------------------------------------------------------------
static uint32 optimizedAccessRight_DT_Rosenfeld(uint32 ** E, int i, int j, uint32 * T, uint32 ne)
// ----------------------------------------------------------------------------------------------
{
    // Decision Tree 8-connexe avec Quick-Union
    uint32 a, b, d, e;
    
    b = E[i - 1][j];
    if (b) {
        e = use1_QU_Rosenfeld(b, T);
    }
    else {
        a = E[i - 1][j - 1];
        if (a) {
            e = use1_QU_Rosenfeld(a, T);
        }
        else {
            d = E[i][j - 1];
            if (d) {
                e = use1_QU_Rosenfeld(d, T);
            }
            else {
                e = ++ne;
            }
        }
    }
    E[i][j] = e;
    return ne;
}
#endif // FAST


#if FAST
// -----------------------------------------------------------------------------------------
static uint32 optimizedAccess_DT_Rosenfeld(uint32 ** E, int i, int j, uint32 * T, uint32 ne)
// -----------------------------------------------------------------------------------------
{
    // Decision Tree 8-connexe avec Quick-Union
    uint32 a, b, c, d, e;
    
    b = E[i - 1][j];
    if (b) {
        e = use1_QU_Rosenfeld(b, T);
    }
    else {
        c = E[i - 1][j + 1];
        if (c) {
            a = E[i - 1][j - 1];
            if (a) {
                e = use2_QU_Rosenfeld(a, c, T);
            }
            else {
                d = E[i][j - 1];
                if (d) {
                    e = use2_QU_Rosenfeld(c, d, T);
                }
                else {
                    e = use1_QU_Rosenfeld(c, T);
                }
            }
        }
        else {
            a = E[i - 1][j - 1];
            if (a) {
                e = use1_QU_Rosenfeld(a, T);
            }
            else {
                d = E[i][j - 1];
                if (d) {
                    e = use1_QU_Rosenfeld(d, T);
                }
                else {
                    e = ++ne;
                }
            }
        }
    }
    E[i][j] = e;
    return ne;
}
#endif // FAST



#if FAST
// --------------------------------------------------------------------------------------------------------
static uint32 lineLabeling_Fast_Rosenfeld(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ne)
// --------------------------------------------------------------------------------------------------------
{
    uint8 x;
    // avec DT et QU
    // Left Border
    x = X[i][0];
    if (x) {
        ne = optimizedAccessLeft_DT_Rosenfeld(E, i, 0, T, ne);
    }
    else {
        E[i][0] = 0;
    }
    // Middle
    for (int j = 1; j < width - 1; j++) {
        uint8 x = X[i][j];
        if (x) {
            ne = optimizedAccess_DT_Rosenfeld(E, i, j, T, ne);
        }
        else {
            E[i][j] = 0;
        }
    }
    // Right Border
    x = X[i][width - 1];
    if (x) {
        ne = optimizedAccessRight_DT_Rosenfeld(E, i, width - 1, T, ne);
    }
    else {
        E[i][width - 1] = 0;
    }
    return ne;
}
#endif // FAST


// ---------------------------------------------------------------------------------------------------
static uint32 lineLabeling_Rosenfeld(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ne)
// ---------------------------------------------------------------------------------------------------
{
#if SLOW
    return lineLabeling_Slow_Rosenfeld(X, i, width, E, T, ne);
#elif FAST
    return lineLabeling_Fast_Rosenfeld(X, i, width, E, T, ne);
#else
#error "Please define SLOW or FAST for the Rosenfeld version"
#endif
}


// -----------------------------------------------------------------------
static uint32 countTable_Range_Rosenfeld(uint32 * T, uint32 e0, uint32 e1)
// -----------------------------------------------------------------------
{
    uint32 e;
    uint32 nr = 0; // nombre de racines = de composantes connexes
    
    for (e = e0; e <= e1; e++) {
        if (e == T[e]) {
            nr += 1;
        }
    }
    return nr;
}


#if !FEATURES
// ---------------------------------------------------------------------
static void solveTable_Range_Rosenfeld(uint32 * T, uint32 e0, uint32 e1)
// ---------------------------------------------------------------------
{
    uint32 e, r;
    
    for (e = e0; e <= e1; e++) {
        r = T[T[e]];
        if (r < e) {
            T[e] = r; // racine de la classe d'equivalence
        }
    }
}
#endif // !FEATURES


#if FEATURES
// ----------------------------------------------------------------------------------------------------------
static void solveTable_solveFeatures_Range_Rosenfeld(uint32 * T, uint32 e0, uint32 e1, RegionStats * Stats)
// ----------------------------------------------------------------------------------------------------------
{
    uint32 e, r;
    
    for (e = e0; e <= e1; e++) {
        r = T[T[e]];
        assert(r != 0);
        if (r < e) {
            T[e] = r; // racine de la classe d'equivalence
            RegionStats_Accumulate_Stats1_From_Index(Stats, r, e);
        }
    }
}
#endif // FEATURES


#if !FEATURES
// -------------------------------------
void MCA_Label_Rosenfeld_PAR1(MCA * mca)
// -------------------------------------
{
    if (mca->p == 0) { 
        printf("*** %s ***\n", __func__);
    }
    
    CLOCK_THREAD_START_STEP(mca->p, 0);

    int i0 = mca->i0;
    int i1 = mca->i1;
    int width = mca->width; 
    uint32 e0 = mca->e0;
    uint32 e1 = mca->e1;
    uint32 ne = e0 - 1;
    uint32 nr = 0;

    // local memory zones
    uint8 **  X = mca->X;
    uint32 ** E = mca->E;
    uint32 *  T = mca->T;

    if (mca->p == 0) {
        set_ui32vector_j(T, e0 - 1, e1); // car e0 = 1, on a besoin que T[0] = 0 pour FindRoot
    }
    else {
        set_ui32vector_j(T, e0, e1);
    }

    MCA_VERBOSE2(display_ui8matrix_positive(X, i0, i1, 0, width - 1, 5, "Xp"); printf("\n"));

    ne = line0Labeling_Rosenfeld(X, i0, width, E, T, ne);
    for (int i = i0 + 1; i <= i1; i++) {
        ne = lineLabeling_Rosenfeld(X, i, width, E, T, ne);
    }

    MCA_VERBOSE2(display_ui32matrix_positive(E, i0, i1, 0, width - 1, 5, "Ep"); printf("\n"));
    if (mca->p == 0) {
        MCA_VERBOSE2(display_ui32vector_number(T, e0, ne, "%5d", "Tp_avant"));
    }

    // fermeture transitive sans pack
    solveTable_Range_Rosenfeld(T, e0, ne);
    mca->ne = ne; // Plus grande etiquette de l'intervalle [e0..e1]

    MCA_VERBOSE2(nr = countTable_Range_Rosenfeld(T, e0, ne));
    MCA_VERBOSE2(printf("p = %d : e = [%d..%d] -> ne = %d -> nr = %d\n", mca->p, e0, ne, (ne - e0 + 1), nr));
    if (mca->p == 0) {
        MCA_VERBOSE2(display_ui32vector_number(T, e0, ne, "%5d", "Tp_apres"));
    }
    
    CLOCK_THREAD_END_STEP(mca->p, 0);
}
#endif // !FEATURES


#if !FEATURES
// -------------------------------------
void MCA_Label_Rosenfeld_PYR2(MCA * mca)
// -------------------------------------
{
    // input
    int p = mca->p;
    int nb_level = mca->nb_level;

    if (mca->p == 0) {
        printf("*** %s ***\n", __func__);
    }
    
    // ------------------------------
    // -- pyramidal border merging --
    // ------------------------------
    
    // local variables
    int i = mca->i0;
    int width = mca->width;
    int alpha = mca->alpha;
    uint32 e0 = mca->e0;
    uint32 e1 = mca->ne;

    // local memory zones
    uint8 **  X = mca->X;
    uint32 ** E = mca->E;
    uint32 *  T = mca->T;
    uint32 ** D = mca->D;

    CLOCK_THREAD_START_STEP(p, 1);
#if PYR_BARRIERS
    // Version optimisée qui fait faire un break aux processeurs qui n'ont plus
    // à faire de merge.
    // Implique de pré-calculer le nombre de threads à chaque barrière
    if (p != 0) { // thread 0 never has any merge to do
        int been_active = 0;
        for (int level = 0; level < nb_level; level++) {
            if ((p + (1 << level)) % (1 << (level + 1)) == 0) {
                borderMerging_Rosenfeld_Dist(X, i, width, E, T, D, alpha);  // en (i) et (i-1)
                been_active = 1;
            }
            else if (been_active) {
                break;
            }
            pthread_barrier_wait(&mca->barriers[level]);
        }
    }
    pthread_barrier_wait(&main_barrier);
#else
    for (int level = 1; level <= nb_level; level++) {
        if ((p + (1 << (level - 1))) % (1 << level) == 0) {
            // thread actif
            borderMerging_Rosenfeld_Dist(X, i, width, E, T, D, alpha);  // en (i) et (i-1)
        }
        pthread_barrier_wait(&main_barrier);
    }
#endif
    CLOCK_THREAD_END_STEP(p, 1);
    

    // ---------------------------------
    // -- parallel transitive closure --
    // ---------------------------------
    
    CLOCK_THREAD_START_STEP(p, 2);
    for (uint32 e = e0; e <= e1; e++) {
        uint32 r = T[e]; // acces local
        if (r < e) {
            r = FindRoot_Dist(D, e, alpha); // acces distant
            T[e] = r; // @QM était en dehors du "if" (je pense que déjà demandé)
        }
        MCA_VERBOSE2(printf("p%d : T[%d] <- %d\n", p, e, r));
    }
    CLOCK_THREAD_END_STEP(p, 2);
}
#endif // !FEATURES


// -------------------------------------
void MCA_Label_Rosenfeld_PAR3(MCA * mca)
// -------------------------------------
{
    // input
    if (mca->p == 0) {
        printf("*** %s ***\n", __func__);
    }
    
    int i0 = mca->i0;
    int i1 = mca->i1;
    int j0 = 0;
    int j1 = mca->width - 1;

    uint32 ** E = mca->E;
    uint32 * T = mca->T;

    CLOCK_THREAD_START_STEP(mca->p, 3);
    for (int i = i0; i <= i1; i++) {
        for (int j = j0; j <= j1; j++) {
            uint32 e = E[i][j];
            if (e != 0) {
                E[i][j] = T[e];
            }
        }
    }
    CLOCK_THREAD_END_STEP(mca->p, 3);
}


#if FEATURES
// -----------------------------------------------------
static void MCA_Label_Features_Rosenfeld_PAR1(MCA * mca)
// -----------------------------------------------------
{
    if (mca->p == 0) { 
        printf("*** %s ***\n", __func__);
    }
    
    CLOCK_THREAD_START_STEP(mca->p, 0);

    int i0 = mca->i0;
    int i1 = mca->i1;
    int width = mca->width;

    uint32 e0 = mca->e0;
    uint32 e1 = mca->e1;
    uint32 ne = e0 - 1;
    uint32 nr = 0;

    // local memory zones
    uint8 **  X = mca->X;
    uint32 ** E = mca->E;
    uint32 *  T = mca->T;

    RegionStats * stats = mca->stats;

    // reset sous optimal (pour le moment = voir region32)
    if (mca->p == 0) {
        set_ui32vector_j(T, e0 - 1, e1); // car e0 = 1, on a besoin que T[0] = 0 pour FindRoot
        zero_RegionStatsVector(stats, e0 - 1, e1);
    }
    else {
        set_ui32vector_j(T, e0, e1);
        zero_RegionStatsVector(stats, e0, e1);
    }

    if (mca->p == 0) {
        MCA_DISPLAY2(display_ui8matrix_positive(X, i0, i1, 0, width - 1, 5, "Xp"); printf("\n"));
    }

    // ---------------------------- //
    // -- Etiquetage d'une bande -- //
    // ---------------------------- //

    ne = line0Labeling_Rosenfeld(X, i0, width, E, T, ne);
    lineFeaturesComputation(E, i0, width, stats);

    for (int i = i0 + 1; i <= i1; i++) {
        ne = lineLabeling_Rosenfeld(X, i, width, E, T, ne); // Slow or Fast
        lineFeaturesComputation(E, i, width, stats);
    }
    mca->ne = ne; //plus grande etiquette de l'intervalle [e0..e1]

    if (mca->p == 0) {
        MCA_VERBOSE2(printf("ne = %d\n", ne));
        MCA_DISPLAY2(display_ui32matrix_positive(E, i0, i1, 0, width - 1, 5, "Ep"); printf("\n"));
        MCA_DISPLAY2(display_ui32vector_number(T, e0, ne, "%5d", "Tp_avant"));
    }

    // ------------------------------------------------------ //
    // -- Fermeture transitive sans pack de chaque table T -- //
    // ------------------------------------------------------ //

    solveTable_solveFeatures_Range_Rosenfeld(T, e0, ne, stats);

    if (mca->p == 0) {
        MCA_VERBOSE2(nr = countTable_Range_Rosenfeld(T, e0, ne);
                printf("p = %d : e = [%d..%d] -> ne = %d -> nr = %d\n", mca->p, e0, ne, (ne - e0 + 1), nr));
        MCA_DISPLAY2(display_ui32vector_number(T, e0, ne, "%5d", "Tp_apres"));
    }
    CLOCK_THREAD_END_STEP(mca->p, 0);
}
#endif // FEATURES


#if FEATURES && !PARMERGE
// -----------------------------------------------------
static void MCA_Label_Features_Rosenfeld_PYR2(MCA * mca)
// -----------------------------------------------------
{
    int p = mca->p;
    int nb_level = mca->nb_level;

    if (mca->p == 0) {
        printf("*** %s ***\n", __func__);
    }
    
    // ------------------------------
    // -- pyramidal border merging --
    // ------------------------------
    
    // local variables
    int i = mca->i0;
    int width = mca->width;
    int alpha = mca->alpha;
    uint32 e0 = mca->e0;
    uint32 e1 = mca->ne;

    // local memory zones
    uint8 **  X = mca->X;
    uint32 ** E = mca->E;
    uint32 *  T = mca->T;
    uint32 ** D = mca->D;
    RegionStats ** F = mca->F;

    CLOCK_THREAD_START_STEP(p, 1);
#if PYR_BARRIERS
    // Version optimisée qui fait faire un break aux processeurs qui n'ont plus
    // à faire de merge.
    // Implique de pré-calculer le nombre de threads à chaque barrière
    if (p != 0) { // thread 0 never has any merge to do
        int been_active = 0;
        for (int level = 0; level < nb_level; level++) {
            if ((p + (1 << level)) % (1 << (level + 1)) == 0) {
                borderMerging_Features_Rosenfeld_Dist(X, i, width, E, T, D, alpha, F);  // (i) et (i-1)
                been_active = 1;
            }
            else if (been_active) {
                break;
            }
            pthread_barrier_wait(&mca->barriers[level]);
        }
    }
    pthread_barrier_wait(&main_barrier);
#else
    for (int level = 1; level <= nb_level; level++) {
        if ((p + (1 << (level - 1))) % (1 << level) == 0) {
            // thread actif
            borderMerging_Features_Rosenfeld_Dist(X, i, width, E, T, D, alpha, F);  // (i) et (i-1)
        }
        pthread_barrier_wait(&main_barrier);
    }
#endif
    CLOCK_THREAD_END_STEP(p, 1);


    /**
     * To remove?
    // -- Affichage de debug
    if (mca->p == 0) {
        MCA_VERBOSE1(puts("-----------------------------"));
        MCA_VERBOSE1(puts("[PYR2]: avant pack sequentiel"));
        MCA_VERBOSE1(puts("-----------------------------"));
    
        for (int p = 0; p < mca->np; p++) {
    
            MCA* mca_par = mcas[p];
            uint32 e0 = mca_par->e0;
            uint32 e1 = mca_par->ne;
            
            uint32*  T = mca_par->T;
            RegionStats* Stats = mca_par->Stats;
        
            RegionStats_DisplayStats_Sparse(T, e0, e1, Stats, NULL);
            puts("");
        }
    }
    */

    // ---------------------------------
    // -- parallel transitive closure --
    // ---------------------------------
    // identique a la version sans Features
      
    CLOCK_THREAD_START_STEP(p, 2);
    for (uint32 e = e0; e <= e1; e++) {
        uint32 r = T[e]; // acces local
        if (r < e) {
            r = FindRoot_Dist(D, e, alpha); // acces distant
            T[e] = r;
        }
        MCA_VERBOSE2(printf("p%d : T[%d] <- %d\n", p, e, r));
    }
    CLOCK_THREAD_END_STEP(p, 2);

    // To avoid uninitialized accesses
    CLOCK_THREAD_START_STEP(p, 3);
    CLOCK_THREAD_END_STEP(p, 3);
}
#endif // FEATURES && !PARMERGE


#if FEATURES && PARMERGE
// -----------------------------------------------------
static void MCA_Label_Features_Rosenfeld_PAR2(MCA * mca)
// -----------------------------------------------------
{
    int p = mca->p;
    int nb_level = mca->nb_level;

    if (mca->p == 0) {
        printf("*** %s ***\n", __func__);
    }
    
    // ------------------------------
    // -- parallel border merging --
    // ------------------------------
    
    // local variables
    int i = mca->i0;
    int width = mca->width;
    int alpha = mca->alpha;
    uint32 e0 = mca->e0;
    uint32 e1 = mca->ne;

    // local memory zones
    uint8 **  X = mca->X;
    uint32 ** E = mca->E;
    uint32 *  T = mca->T;
    uint32 ** D = mca->D;
    RegionStats ** F = mca->F;

    CLOCK_THREAD_START_STEP(p, 1);
    if (p != 0) { // thread 0 never has any merge to do
        borderMerging_Features_Rosenfeld_Dist(X, i, width, E, T, D, alpha, F);  // (i) et (i-1)
    }
    pthread_barrier_wait(&main_barrier);
    CLOCK_THREAD_END_STEP(p, 1);


    // ---------------------------------
    // -- parallel transitive closure --
    // ---------------------------------
    // identique a la version sans Features
     
    CLOCK_THREAD_START_STEP(p, 2);
    for (uint32 e = e0; e <= e1; e++) {
        uint32 r = T[e]; // acces local
        if (r < e) {
            r = FindRoot_Dist(D, e, alpha); // acces distant
            T[e] = r;
        }
        MCA_VERBOSE2(printf("p%d : T[%d] <- %d\n", p, e, r));
    }
    CLOCK_THREAD_END_STEP(p, 2);

    // To avoid uninitialized accesses
    CLOCK_THREAD_START_STEP(p, 3);
    CLOCK_THREAD_END_STEP(p, 3);
}
#endif // FEATURES




#if !FEATURES
// =============================================================
#if TARGET_OS == GIETVM
__attribute__((constructor)) void MCA_Label_Rosenfeld(MCA * mca)
#else
void MCA_Label_Rosenfeld(MCA * mca)
#endif
// =============================================================
{
#if TARGET_OS == GIETVM
    unsigned int x, y, lpid;
    giet_proc_xyp(&x, &y, &lpid);
    // Mettre à jour mca->p en fonction de x, y, lpid
    // pour que les allocations faites par le main soient locales,
    // i.e. 
    mca->p = (x * Y_SIZE + y) * NB_PROCS_MAX + lpid;
    // We have :
    // mca->p = 4 pour (x = 0, y = 1, lpid = 0)
    // mca->p = 5 pour (x = 0, y = 1, lpid = 1)
    MCA_VERBOSE2(printf("mca->p = %d pour (x = %d, y = %d, lpid = %d)\n", mca->p, x, y, lpid));
#endif

    CLOCK_THREAD_START(mca->p);
    CLOCK_THREAD_COMPUTE_START(mca->p);

    MCA_Scatter_ImageX(mca);
    pthread_barrier_wait(&main_barrier);

    MCA_Label_Rosenfeld_PAR1(mca);
    pthread_barrier_wait(&main_barrier);
    
    MCA_Label_Rosenfeld_PYR2(mca);
    pthread_barrier_wait(&main_barrier);
    
    MCA_Label_Rosenfeld_PAR3(mca);
    pthread_barrier_wait(&main_barrier);

    MCA_Gather_ImageL(mca);
    pthread_barrier_wait(&main_barrier);

    CLOCK_THREAD_COMPUTE_END(mca->p);
    CLOCK_THREAD_END(mca->p);

#if TARGET_OS == GIETVM
    if (mca->p != 0) {
        exit(0);
    }
#endif
}
#endif // !FEATURES


#if FEATURES
// ======================================================================
#if TARGET_OS == GIETVM
__attribute__((constructor)) void * MCA_Label_Features_Rosenfeld(void * arg)
#else
void * MCA_Label_Features_Rosenfeld(void * arg)
#endif
// ======================================================================
{
    MCA * mca = (MCA *) arg;
#if TARGET_OS == GIETVM
    unsigned int x, y, lpid;
    giet_proc_xyp(&x, &y, &lpid);
    // Mettre à jour mca->p en fonction de x, y, lpid
    // pour que les allocations faites par le main soient locales,
    // i.e. 
    mca->p = (x * Y_SIZE + y) * NB_PROCS_MAX + lpid;
    // We have :
    // mca->p = 4 pour (x = 0, y = 1, lpid = 0)
    // mca->p = 5 pour (x = 0, y = 1, lpid = 1)
    MCA_VERBOSE2(printf("mca->p = %d pour (x = %d, y = %d, lpid = %d)\n", mca->p, x, y, lpid));
#endif

    CLOCK_THREAD_START(mca->p);
    CLOCK_THREAD_COMPUTE_START(mca->p);

    MCA_Scatter_ImageX(mca);
    pthread_barrier_wait(&main_barrier);

    MCA_Label_Features_Rosenfeld_PAR1(mca);
    pthread_barrier_wait(&main_barrier);
   
#if PARMERGE 
    MCA_Label_Features_Rosenfeld_PAR2(mca);
#else
    MCA_Label_Features_Rosenfeld_PYR2(mca);
#endif
    pthread_barrier_wait(&main_barrier);
    
    MCA_Label_Rosenfeld_PAR3(mca);
    pthread_barrier_wait(&main_barrier);

    MCA_Gather_ImageL(mca);
    pthread_barrier_wait(&main_barrier);

    CLOCK_THREAD_COMPUTE_END(mca->p);
 
    if (display_features) {
        if (mca->p == 0) {
            int i = 1;
            printf("[STATS]\n");
            for (int p = 0; p < mca->np; p++) {
                MCA * mca_par = mca->mca->mcas[p];
                uint32 e0 = mca_par->e0;
                uint32 ne = mca_par->ne - mca_par->e0; // number of elements
                uint32 * T = mca_par->T;
                RegionStats * stats = mca_par->stats;
                RegionStats_DisplayStats_Sparse(T, e0, e0 + ne, stats, NULL, &i);
            }
            printf("[/STATS]\n");
        }
    }

    CLOCK_THREAD_END(mca->p);

#if TARGET_OS == GIETVM
    if (mca->p != 0) {
        exit(0);
    }
#endif

    return NULL;
}
#endif // FEATURES


// Local Variables:
// tab-width: 4
// c-basic-offset: 4
// c-file-offsets:((innamespace . 0)(inline-open . 0))
// indent-tabs-mode: nil
// End:

// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4

