/* ------------- */
/* --- mca.c --- */
/* ------------- */

/*
 * Copyright (c) 2016 Lionel Lacassagne, LIP6, UPMC, CNRS
 * Init  : 2016/03/03
 * modif : 2016/03/07
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <malloc.h>

#include "nrc_os_config.h"
#include "config.h"
#include "nrc.h"

#if TARGET_OS == GIETVM
    #include <giet_config.h>
#endif

#include "util.h"
#include "ecc_common.h"
#include "ecc_features.h"
#include "mca_matrix_dist.h"

#include "palette.h"
#include "bmpNR.h"
#include "str_ext.h"


// -- local --
#include "mca.h"


// -----------------------
void MCA_Error(char * msg)
// -----------------------
{
    printf("MCA ERROR: %s\n", msg);
    exit(1);
}


// -------------------------------
MCA * MCA_pConstructor_Empty(void)
// -------------------------------
{
    MCA * mca;
    mca = (MCA *) malloc(sizeof(MCA));

    if (mca == NULL) {
        MCA_Error("allocation failed in MCA_pConstructor_Empty");
    }
    return mca;
}


// ---------------------------------------
void MCA_Set_ImageX(MCA * mca, uint8 ** X)
// ---------------------------------------
{
    mca->X = X;
}


// --------------------------------------
void MCA_Set_ImageL(MCA * mca, uint32 ** E)
// --------------------------------------
{
    mca->E = E;
}


// ------------------------------------
void MCA_Set_Width(MCA * mca, int width)
// ------------------------------------
{
    mca->width = width;

    mca->j0 = 0;
    mca->j1 = width - 1;
}


// --------------------------------------
void MCA_Set_Height(MCA * mca, int height)
// --------------------------------------
{
    mca->height = height;
  
    mca->i0 = 0;
    mca->i1 = height - 1;
}


// ------------------------------------------------
void MCA_Set_Size(MCA * mca, int width, int height)
// ------------------------------------------------
{
    MCA_Set_Width(mca, width);
    MCA_Set_Height(mca, height);
}


// -----------------------------------------------------
void MCA_Set_Dimension(MCA * mca, int width, int height)
// -----------------------------------------------------
{
    MCA_Set_Width(mca, width);
    MCA_Set_Height(mca, height);
}


// -------------------------------
void MCA_Set_NP(MCA * mca, int np)
// -------------------------------
{
    mca->np = np;
}


// -------------------------------
void MCA_Set_NR(MCA * mca, int nr)
// -------------------------------
{
    mca->nr = nr;
}



// ------------------------------------------------------------------
uint32 MCA_CalcMaxLabels(int connection, uint32 height, uint32 width)
// ------------------------------------------------------------------
{
    uint32 nemax = 0;
    
    if (connection == 4) {
        nemax =  ((height + 1) / 2) * ((width + 1) / 2) + (height / 2) * (width / 2); // 4C
    }
    if (connection == 8) {
        nemax = ((height + 1) / 2) * ((width + 1) / 2); // 8C
    }
    return nemax;
}


// ---------------------------
void MCA_Initialize(MCA * mca)
// ---------------------------
{
    // input
    int np     = mca->np;
    int nr     = mca->nr;
    int width  = mca->width;
    int height = mca->height;
    
    // variables
    int i0_par, i1_par, i1_par_previous;
    int j0_par, j1_par;
    int height_par, height_mod;

    int pw2;
    int32 ne_par;          // quantite par bande
    uint32 nemax_par;      // la puissance de 2 >=
    uint32 e0_par, e1_par; // indice par bande [start..end]
    int nb_level;
    
    MCA ** mcas;
    MCA *  mca_par;
    
    MCA_VERBOSE1(printf("*** %s ***\n", __func__));
    MCA_VERBOSE2(printf("   height = %d\n", height));
    MCA_VERBOSE2(printf("   width  = %d\n", width));
    
    // array of pointers to mca workers
    mcas = (MCA **) malloc(np * sizeof(MCA *));
    if (mcas == NULL) {
        MCA_Error("MCA_Initialize1");
    }
    mca->mcas = mcas;

    // hauteur de chaque bande
    height_par = height / np;
    height_mod = height % np;

    MCA_VERBOSE2(printf("   height_par = %d x %d + %d\n", height_par, np, height_mod));
    MCA_VERBOSE2(printf("   ========================\n"));
    
    i1_par_previous = 0;

    // puissance de 2 de chaque bande
    ne_par = height_par * width + 1;
    MCA_VERBOSE2(printf("   ne_par    = %d\n", ne_par));
    pw2 = i32log2(ne_par);
    if (ne_par > (1 << pw2)) {
        pw2++;
    }
    nemax_par = 1 << pw2;
    mca->alpha = pw2;

    MCA_VERBOSE2(printf("   nemax_par = %d\n", nemax_par));

    nb_level = i32log2(np);
    if ((1 << nb_level) < np) {
        nb_level++;
    }

#if PYR_BARRIERS
    // ------------------------------------------
    // -- Allocation des barrières pyramidales --
    // ------------------------------------------

    pthread_barrier_t * barriers = NULL;
    if (nb_level > 0) {
        barriers = malloc(sizeof(pthread_barrier_t) * nb_level);

        // Initially all threads are active except thread 0
        int nb_active = np - 1;
        pthread_barrier_init(&barriers[0], NULL, nb_active);
        for (int i = 1; i < nb_level; i++) {
            // thread 0 never does any merge
            for (int p = 1; p < np; p++) {
                if ((p + (1 << (i - 1))) % (1 << i) == 0) {
                    // thread inactive at level i
                    nb_active -= 1;
                }
            }
            pthread_barrier_init(&barriers[i], NULL, nb_active);
        }
    }
#endif

    for (int p = 0; p < np; p++) {

        // ----------------- //
        // -- constructor -- //
        // ----------------- //
        MCA_VERBOSE3(printf("-- p = %d ----------------\n", p));
    
        // alloc of mca workers into array of pointers
        mca_par = MCA_pConstructor_Empty();
        if (mca_par == NULL) {
            MCA_Error("MCA_Initialize2\n");
        }
        mcas[p]      = mca_par;
        mca_par->p   = p;
        mca_par->mca = mca; // pointer to master
#if TARGET_OS == GIETVM
        int x, y; // cluster coordinates
        // We have p == 4 => x = 0; y = 1
        x = (p / NB_PROCS_MAX) / Y_SIZE;
        y = (p / NB_PROCS_MAX) % Y_SIZE;
        MCA_VERBOSE3(printf("p = %d (x = %d, y = %d)\n", p, x, y));
#endif
        
        // ------------------------------------- //
        // -- calcul des parametres: passe #1 -- //
        // ------------------------------------- //
        
        // hauteur de chaque bande
        if (p == 0) {
            i0_par = 0;
        }
        else {
            i0_par = i1_par_previous + 1;
        }
        if (height_mod) {
            i1_par = i0_par + height_par;
            height_mod = height_mod - 1;
        }
        else {
            i1_par = i0_par + height_par - 1;
        }
        i1_par_previous = i1_par;
        
        MCA_VERBOSE3(printf("i0_par = %d\n", i0_par));
        MCA_VERBOSE3(printf("i1_par = %d\n", i1_par));
        
        // etiquettes
        if (p == 0) {
            e0_par = 1;
            e1_par = nemax_par - 1;
        }
        else {
            e0_par = p * nemax_par;
            e1_par = e0_par + nemax_par - 1;
        }
    
        MCA_VERBOSE3(printf("e0_par = %d\n", e0_par));
        MCA_VERBOSE3(printf("e1_par = %d\n", e1_par));

        mca_par->width  = width;
        mca_par->height = height_par;
        mca_par->i0 = i0_par;
        mca_par->i1 = i1_par;
        mca_par->j0 = 0;
        mca_par->j1 = width - 1;
        mca_par->e0 = e0_par;
        mca_par->e1 = e1_par;
        // à la première itération, on remet à 0 toute la table T
        mca_par->ne_prev = e1_par;
        mca_par->alpha = pw2;
        mca_par->np = np;
        mca_par->nr = nr;
        // Pour les barrières pyramidales
        mca_par->nb_level = nb_level;
#if PYR_BARRIERS
        mca_par->barriers = barriers;
#else
        mca_par->barriers = NULL;
#endif
        mca_par->F = NULL; // default init
        mca_par->stats = NULL; // default init
 
        // ---------------- //
        // -- allocation -- //
        // ---------------- //
#if TARGET_OS == GIETVM
        mca_par->X = remote_ui8matrix(i0_par, i1_par, 0, width - 1, x, y);
        mca_par->E = remote_dist_ui32matrix(i0_par, i1_par, 0, width - 1, x, y); // distributed matrix with border
        
        mca_par->T = remote_ui32vector(e0_par, e1_par, x, y);
        mca_par->stats = remote_RegionStatsVector(e0_par, e1_par, x, y);
        
        mca_par->D = (uint32 **) remote_vvector(0, np - 1, x, y);
        mca_par->F = (RegionStats **) remote_vvector(0, np - 1, x, y);
#else // !GIETVM
        mca_par->X = ui8matrix (i0_par, i1_par, 0, width - 1);
        mca_par->E = dist_ui32matrix(i0_par, i1_par, 0, width - 1); // distributed matrix with border
        
        mca_par->T = ui32vector(e0_par, e1_par);
        mca_par->stats = RegionStatsVector(e0_par, e1_par);
        
        mca_par->D = (uint32 **) vvector(0, np - 1);
        mca_par->F = (RegionStats **) vvector(0, np - 1);
#endif   
        MCA_VERBOSE3(printf("X = %p\n", mca_par->X));
        MCA_VERBOSE3(printf("E = %p\n", mca_par->E));
        MCA_VERBOSE3(printf("T = %p\n", mca_par->T));
        MCA_VERBOSE3(printf("D = %p\n", mca_par->D));
    } // p
    

    for (int p = 0; p < np; p++) {
        MCA * mca_par = mcas[p];
        
        uint32 * T = mca_par->T;
        uint32 e0 = mca_par->e0;
        uint32 e1 = mca_par->e1;
    
        MCA_VERBOSE3(printf("p = %d T[%d..%d]\n", p, e0, e1));
        set_ui32vector_j(T, e0, e1);
    }
    
    MCA_VERBOSE3(printf("display des tables d'EQ\n"));
    for (int p = 0; p < np; p++) {
        MCA * mca_par = mcas[p];
        
        uint32 * T = mca_par->T;
        uint32 e0 = mca_par->e0;
        uint32 e1 = mca_par->e1;
        
        MCA_VERBOSE3(printf("p = %d T[%d..%d]\n", p, e0, e1));
        MCA_VERBOSE3(display_ui32vector_number(T, e0, e0 + 10, "%5d", "T"));
        MCA_VERBOSE3(printf("\n"));
    }
    //exit(-1);
    
    // ------------------------------------------------------------- //
    // -- calcul des parametres: passe #2 = parametres distribues -- //
    // ------------------------------------------------------------- //
    
    // table d'indirection distribuee D
    MCA_VERBOSE3(printf("nemax_par = %d\n", nemax_par));
    for (int p = 0; p < np; p++) {
        MCA * mca_p = mcas[p];
        uint32 ** D = mca_p->D;
        RegionStats ** F  = mca_p->F;
        
        for (int k = 0; k < np; k++) {
            MCA * mca_k = mcas[k];
            uint32 * T = mca_k->T;
            D[k] = T + k * nemax_par; // il faut soustraire le "MSB"
            RegionStats * stat = mca_k->stats;
            F[k] = stat + k * nemax_par; // il faut soustraire le "MSB"
        } // k
    } // p
    
    MCA_VERBOSE3(printf("table d'indirection distribuee D\n"));
    
    for (int p = 0; p < np; p++) {
        MCA_VERBOSE3(printf("== p = %d ==========\n", p));
        
        MCA * mca_p = mcas[p];
        uint32 ** D = mca_p->D;
        
        for (int k = 0; k < np; k++) {
            MCA * mca_k = mcas[k];
            uint32 * T = mca_k->T;
            
            uint32 e0 = mca_k->e0;
            uint32 e1 = mca_k->e1;
            MCA_VERBOSE3(display_ui32vector_number(T, e0, e0 + 9, "%5d", "T"));
            MCA_VERBOSE3(display_ui32vector(D[k], 0, 9, "%5d", "D\n"));
        }
        MCA_VERBOSE3(printf("\n"));
    }
    
    for (int p = 0; p < np; p++) {
        if (p > 0) {
            //printf("i0_(%d) = %d i1_{%d} = %d\n", p, mcas[p]->i0, p-1, mcas[p-1]->i1);
            mcas[p]->E[mcas[p]->i0 - 1] = mcas[p - 1]->E[mcas[p - 1]->i1];
            
            /*printf("E[%2d] = E[%2d] = %p\n", mcas[p    ]->i0 - 1,
                                             mcas[p - 1]->i1,
                                             mcas[p - 1]->E[mcas[p - 1]->i1]);*/
        }
        if (p < np - 1) {
            //printf("i1_(%d) = %d i0_{%d} = %d\n", p, mcas[p]->i1, p+1, mcas[p-1]->i0);
            mcas[p]->E[mcas[p]->i1 + 1] = mcas[p + 1]->E[mcas[p + 1]->i0];
            
            /*printf("E[%2d] = E[%2d] = %p\n", mcas[p    ]->i1 + 1,
                                             mcas[p + 1]->i0,
                                             mcas[p + 1]->E[mcas[p + 1]->i1]);*/
        }
    }
}


// -----------------------------------
void MCA_Display_Parameters(MCA * mca)
// -----------------------------------
{
    int np = mca->np;
    
    MCA ** mcas = mca->mcas;
    MCA *  mca_par;
    (void) mca_par;
    
    MCA_VERBOSE1(printf("*** MCA_Display_Parameters ***\n"));
    
    MCA_VERBOSE2(printf("   height = %d\n", mca->height));
    MCA_VERBOSE2(printf("   width  = %d\n", mca->width));
    MCA_VERBOSE2(printf("   np     = %d\n", mca->np));
    
    for (int p = 0; p < np; p++) {
        mca_par = mcas[p];
        
        MCA_VERBOSE3(printf("Display MCA[%d]\n", p));
        MCA_VERBOSE3(printf("p = %d\n", mca_par->p));
        MCA_VERBOSE3(printf("i0 = %8d  i1 = %8d\n", mca_par->i0, mca_par->i1));
        MCA_VERBOSE3(printf("j0 = %8d  j1 = %8d\n", mca_par->j0, mca_par->j1));
        MCA_VERBOSE3(printf("e0 = %8d  e1 = %8d\n", mca_par->e0, mca_par->e1));
    }
}


// -------------------------
void MCA_Finalize(MCA * mca)
// -------------------------
{
    int np = mca->np;
    
    MCA ** mcas = mca->mcas;
    MCA *  mca_par;
    
    int i0, i1;
    int j0, j1;
    uint32 e0, e1;
    
    MCA_VERBOSE1(printf("*** MCA_Finalize ***\n"));
    
#if PYR_BARRIERS
    free(mcas[0]->barriers);
#endif

    for (int p = 0; p < np; p++) {
        mca_par = mcas[p];
    
        i0 = mca_par->i0;
        i1 = mca_par->i1;
        j0 = mca_par->j0;
        j1 = mca_par->j1;
        e0 = mca_par->e0;
        e1 = mca_par->e1;
    
        // ---------- //
        // -- free -- //
        // ---------- //
        
        free_ui8matrix (mca_par->X, i0, i1, j0, j1);
        free_dist_ui32matrix(mca_par->E, i0, i1, j0, j1);
        
        free_ui32vector(mca_par->T, e0, e1);
        free_RegionStatsVector(mca_par->stats, e0, e1);
        
        free_vvector((void **) mca_par->D, 0, np - 1);
        free_vvector((void **) mca_par->F, 0, np - 1);
        free(mca_par);
    }
    free(mcas);
    free(mca);
}


// -------------------------------
void MCA_Scatter_ImageX(MCA * mca)
// -------------------------------
{
    // diffusion de l'image binaire source
    
    int np = mca->np;
    uint8 ** X  = mca->mca->X;
    
    if (mca->p == 0) { 
        MCA_VERBOSE1(printf("*** MCA_Scatter_ImageX ***\n"));
    }
    
    int i0    = mca->i0;
    int i1    = mca->i1;
    int width = mca->width;

    uint8 **  X_par = mca->X;
    uint32 ** E_par = mca->E;

    //printf("copie de [%d..%d]x[%d..%d]\n", i0, i1, 0, width - 1);
    for (int i = i0; i <= i1; i++) {
        for (int j = 0; j <= width - 1; j++) {
            X_par[i][j] = X[i][j];
            E_par[i][j] = 0; // inutile normalement car ecriture de 0
        }
    }
}


// ------------------------------
void MCA_Gather_ImageL(MCA * mca)
// ------------------------------
{
    // recuperation de l'image d'etiquettes
    int np = mca->np;
    uint32 ** E = mca->mca->E;

    if (mca->p == 0) { 
        MCA_VERBOSE1(printf("*** MCA_Gather_ImageL ***\n"));
    }

    int i0 = mca->i0;
    int i1 = mca->i1;
    int width = mca->width;

    uint32 ** E_par = mca->E;

    for (int i = i0; i <= i1; i++) {
        for (int j = 0; j <= width - 1; j++) {
            E[i][j] = E_par[i][j];
        }
    }
}


// Local Variables:
// tab-width: 4
// c-basic-offset: 4
// c-file-offsets:((innamespace . 0)(inline-open . 0))
// indent-tabs-mode: nil
// End:

// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4


