/* ------------------ */
/* --- mca_main.c --- */
/* ------------------ */

/*
 * Copyright (c) 2016 Lionel Lacassagne, LIP6, UPMC, CNRS
 * Init  : 2016/03/03
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <malloc.h>

#include "nrc_os_config.h"
#include "config.h"
#include "nrc.h"

#if TARGET_OS == GIETVM
    #include <user_lock.h>
    #include <malloc.h>
    #include <giet_config.h>
    #include <user_barrier.h>
#else
    #include <unistd.h>
#endif

#include "util.h"
#include "ecc_common.h"
#include "ecc_features.h"
#include "palette.h"
#include "bmpNR.h"
#include "mca_matrix_dist.h"
#include "mca_rosenfeld.h"
#include "clock.h"
#include "str_ext.h"


/* -- local -- */
#include "mca.h"

#define MAX_THREADS 256
#define DEFAULT_NTHREADS 1
#define DEFAULT_IN_FILENAME "/misc/cadastre.pgm"
#define DEFAULT_OUT_FILENAME "out.bmp"

pthread_t thread_table[MAX_THREADS];
pthread_barrier_t main_barrier;
int display_features = 0;
int generate_output_image = 0;

CLOCK_DEC;

static void usage(char * name) {
    printf("Usage: %s <options>\n", name);
    printf("options:\n");
    printf("  -i <input_file>  : Input file (default = %s)\n", DEFAULT_IN_FILENAME);
    printf("  -o <output_file> : Output file (default = %s)\n", DEFAULT_OUT_FILENAME);
    printf("  -nN              : N = number of threads (default = %d).\n", DEFAULT_NTHREADS);
    printf("  -d               : Display features (default = false, requires features computation).\n");
    printf("  -g               : Generate output image (default = false).\n");
    printf("  -h               : Print out command line options.\n\n");
}



// --------------------------------------------------------------------------
void init_forme_boulon1(uint8 *** X0, int * i0, int * i1, int * j0, int * j1)
// --------------------------------------------------------------------------
{
    uint8 ** X;
    int i =  0;
    int h =  28;
    int w =  30;
    
    X = ui8matrix(0, h - 1, 0, w - 1);
    zero_ui8matrix(X, 0, h - 1, 0, w - 1);
    
    *X0 = X;
    *i0 = 0;
    *i1 = h - 1;
    *j0 = 0;
    *j1 = w - 1;
    
    //                                 0000000001111111111122222222223
    //                                 0123456789012345678901234567890
    set_ui8vector_str(X[i++], 0, w - 1, "                         111  "); // 00
    set_ui8vector_str(X[i++], 0, w - 1, "                        11111 "); // 01
    set_ui8vector_str(X[i++], 0, w - 1, "                      1111111 "); // 02
    set_ui8vector_str(X[i++], 0, w - 1, "                     11111111 "); // 03
    set_ui8vector_str(X[i++], 0, w - 1, "                    1111111111"); // 04
    set_ui8vector_str(X[i++], 0, w - 1, "                   11111111111"); // 05
    set_ui8vector_str(X[i++], 0, w - 1, "                 1111111111111"); // 06
    set_ui8vector_str(X[i++], 0, w - 1, "               11111111111111 "); // 07
    set_ui8vector_str(X[i++], 0, w - 1, "              11111111111111  "); // 08
    set_ui8vector_str(X[i++], 0, w - 1, "             11111111111111   "); // 09
    set_ui8vector_str(X[i++], 0, w - 1, "     11    11111111111111     "); // 10
    set_ui8vector_str(X[i++], 0, w - 1, "    111   11111111111111      "); // 11
    set_ui8vector_str(X[i++], 0, w - 1, "   11111111111111111111       "); // 12
    set_ui8vector_str(X[i++], 0, w - 1, " 11111111111111111111         "); // 13
    set_ui8vector_str(X[i++], 0, w - 1, "1111111111111111111           "); // 14
    set_ui8vector_str(X[i++], 0, w - 1, " 11111111111111111            "); // 15
    set_ui8vector_str(X[i++], 0, w - 1, " 1111111111111111             "); // 16
    set_ui8vector_str(X[i++], 0, w - 1, " 111111111111111              "); // 17
    set_ui8vector_str(X[i++], 0, w - 1, "  111111111111                "); // 18
    set_ui8vector_str(X[i++], 0, w - 1, "  1111111111                  "); // 29
    set_ui8vector_str(X[i++], 0, w - 1, "  1111111111                  "); // 20
    set_ui8vector_str(X[i++], 0, w - 1, "   111111111                  "); // 21
    set_ui8vector_str(X[i++], 0, w - 1, "   111111111                  "); // 22
    set_ui8vector_str(X[i++], 0, w - 1, "    11111111                  "); // 23
    set_ui8vector_str(X[i++], 0, w - 1, "    1111111                   "); // 24
    set_ui8vector_str(X[i++], 0, w - 1, "     11111                    "); // 25
    set_ui8vector_str(X[i++], 0, w - 1, "     111                      "); // 26
    set_ui8vector_str(X[i++], 0, w - 1, "                              "); // 27
    
    //printf("[init_forme_boulon1]: h = %d i = %d\n", h, i);
    if (i != h) {
        MCA_Error("init_forme_boulon1 i != h");
    }

    
    //display_ui8matrix_positive(X, 0, h-1, 0, w-1, 4, "forme_boulon1"); printf("");
    //write_ui8matrix_positive(  X, 0, h-1, 0, w-1, 4, "forme_boulon1.txt");
}


// Renumbers object in a contiguous way, for an image which has already
// been processed with several threads
// ------------------------------------------------------------------
static void renumber_image(MCA * mca, int i0, int i1, int j0, int j1)
// ------------------------------------------------------------------
{
    int32_t na = 0;
    uint32_t ** E = mca->E;
    uint32_t ** D = mca->mcas[0]->D;

    uint32_t shift = mca->alpha;
    uint32_t mask = (1 << shift) - 1;
    
    for (int32_t p = 0; p < mca->np; p++) {
        MCA * mca_par = mca->mcas[p];
        uint32 * T = mca_par->T;
        for (uint32_t e = mca_par->e0; e <= mca_par->ne; e++) {
            if (T[e] != e) {
                // FindRoot_Dist
                uint32_t r = T[e];
                uint32_t a = e;
                do {
                    uint32_t e1 = r >> shift;
                    uint32_t e0 = r & mask;
                    a = r;
                    r = D[e1][e0];
                } while (r < a);
                T[e] = r;
            }
            else {
                na += 1;
                T[e] = na;
            }
        }
    }

    for (int32_t i = i0; i <= i1; i++) {
        for (int32_t j = j0; j <= j1; j++) {
            if (E[i][j] != 0) {
                uint32_t e0 = E[i][j] & mask;
                uint32_t e1 = E[i][j] >> shift;
                E[i][j] = D[e1][e0];
            }
        }
    }
}


// ----------------------------
void mca_test1(int num_threads)
// ----------------------------
{
    int i0, i1, j0, j1;
    int height, width;
    
    uint8 ** X0;
    uint32 ** E;
    MCA * mca;

    pthread_barrier_init(&main_barrier, NULL, num_threads);

    // -- Allocation --
    init_forme_boulon1(&X0, &i0, &i1, &j0, &j1);
    
    height = i1 - i0 + 1;
    width  = j1 - j0 + 1;
    
    E = ui32matrix(i0, i1, j0, j1);
    
    zero_ui32matrix(E, i0, i1, j0, j1);
    
    mca = MCA_pConstructor_Empty();
    
    // -- set param
    MCA_Set_Size(mca, width, height);
    MCA_Set_ImageX(mca, X0);
    MCA_Set_ImageL(mca, E);
    MCA_Set_NP(mca, num_threads);
    
    // -- MCA init
    MCA_Initialize(mca);
    MCA_Display_Parameters(mca);
    
    display_ui8matrix_positive(mca->X, i0, i1, j0, j1, 5, "X0");
    for (int i = 1; i < num_threads; i++) {
        pthread_create(&thread_table[i], NULL, MCA_Label_Rosenfeld, (void *) mca->mcas[i]);
    }
    
    MCA_Label_Rosenfeld(mca->mcas[0]);

    for (int i = 1; i < num_threads; i++) {
        pthread_join(thread_table[i], NULL);
    }
    display_ui32matrix_positive(mca->E, i0, i1, j0, j1, 5, "Efinal");

    
    // -- free --
    MCA_VERBOSE1(printf("Finalize\n"));
    MCA_Finalize(mca);
    
    MCA_VERBOSE1(printf("Free_matrix\n"));
    free_ui8matrix (X0, i0, i1, j0, j1);
    free_ui32matrix(E,  i0, i1, j0, j1);
}



// -----------------------------------------------------------
void mca_test2(int num_threads, char * infile, char * outfile)
// -----------------------------------------------------------
{
    int i0, i1, j0, j1;
    int height, width;
    
    uint8 ** X;
    uint8 ** E8;
    uint32 ** E;
    MCA * mca;

    RGBQuad palette[256];

    pthread_barrier_init(&main_barrier, NULL, num_threads);

    Palette_18ColorsBW(palette);
    
    MCA_VERBOSE1(printf("Loading file %s... ", infile));
    X = LoadPGM_ui8matrix(infile, &i0, &i1, &j0, &j1);
    MCA_VERBOSE1(printf("done.\n"));

    MCA_VERBOSE1(printf("Allocating memory... "));
    height = i1 - i0 + 1;
    width  = j1 - j0 + 1;
    
    E8 = ui8matrix (i0, i1, j0, j1);
    E  = ui32matrix(i0, i1, j0, j1);
    
    zero_ui8matrix(E8, i0, i1, j0, j1);
    zero_ui32matrix(E, i0, i1, j0, j1);

    // pre-traitements
    binarisation_ui8matrix(X, i0, i1, j0, j1, 20, 1, X); // pour le traitement
    MCA_VERBOSE1(printf("done.\n"));

    MCA_VERBOSE1(printf("Allocating and initializing MCA... \n"));
    mca = MCA_pConstructor_Empty();
    
    // -- set param
    MCA_Set_Size(mca, width, height);
    MCA_Set_ImageX(mca, X);
    MCA_Set_ImageL(mca, E);
    MCA_Set_NP(mca, num_threads);
    
    // -- MCA init
    MCA_Initialize(mca);
    MCA_Display_Parameters(mca);
    MCA_VERBOSE1(printf("End of MCA allocation and initialization.\n"));
    
    CLOCK_APP_CREATE;
    for (int i = 1; i < num_threads; i++) {
        pthread_create(&thread_table[i], NULL, MCA_Label_Rosenfeld, (void *) mca->mcas[i]);
    }
    
    MCA_Label_Rosenfeld(mca->mcas[0]);

    for (int i = 1; i < num_threads; i++) {
        pthread_join(thread_table[i], NULL);
    }
    CLOCK_APP_JOIN;

    if (generate_output_image) {
#if TARGET_OS != GIETVM
        renumber_image(mca, i0, i1, j0, j1);
#else
        printf("Warning: the output image has not been renumbered, it cannot be used as a comparison with the reference\n");
#endif
        mod_ui32matrix_ui8matrix(mca->E, i0, i1, j0, j1, E8);
        MCA_VERBOSE1(printf("Saving file %s for verification... ", outfile));
        SaveBMP2_ui8matrix(E8, width, height, palette, outfile);
        MCA_VERBOSE1(printf("done.\n"));
    }

    MCA_Finalize(mca);
    MCA_VERBOSE1(printf("Deallocating memory..."));
    free_ui8matrix (X,  i0, i1, j0, j1);
    free_ui8matrix (E8, i0, i1, j0, j1);
    free_ui32matrix(E,  i0, i1, j0, j1);
    MCA_VERBOSE1(printf("done.\n"));
}


// --------------------------------------------------------------
int main_test_mca(int num_threads, char * infile, char * outfile)
// --------------------------------------------------------------
{
    CLOCK_INIT(num_threads, 4); // 4 = Number of steps in body
    CLOCK_APP_START;

    mca_test2(num_threads, infile, outfile);

    CLOCK_APP_END;
    CLOCK_FINALIZE;
    PRINT_CLOCK;
    CLOCK_FREE;
    
    return 0;
}


#if TARGET_OS == GIETVM
// ------------------------------------
__attribute__((constructor)) int main()
// ------------------------------------
#else
// -----------------------------
int main(int argc, char ** argv)
// -----------------------------
#endif
{
    char * infile = DEFAULT_IN_FILENAME;
    char * outfile = DEFAULT_OUT_FILENAME;

    int ch;
    int num_threads = DEFAULT_NTHREADS;

    MCA_VERBOSE1(printf("*** Starting application Rosenfeld ***\n"));

#if TARGET_OS != GIETVM // @QM I think the giet has some random (uninitialized) values for argc and argv
    while ((ch = getopt(argc, argv, "i:o:n:hdg")) != EOF) {
        switch (ch) {
        case 'i':
            infile = optarg;
            break;
        case 'o':
            outfile = optarg;
            break;
        case 'n':
            num_threads = atoi(optarg);
            break;
        case 'h':
            usage(argv[0]);
            return 0;
            break;
        case 'd':
#if !FEATURES
            fprintf(stderr, "*** Error: Features display requires features computation\n");
            return 1;
#endif
            display_features = 1;
            break;
        case 'g':
            generate_output_image = 1;
            break;
        default:
            usage(argv[0]);
            return 1;
            break;
        }
    }

    // Check arguments
    if (num_threads < 1) {
        fprintf(stderr, "*** Error: The number of threads must at least be 1\n");
        usage(argv[0]);
        return -1;
    }
#endif

#if TARGET_OS == GIETVM
    {
        unsigned int xsize, ysize, nprocs;
        giet_procs_number(&xsize, &ysize, &nprocs);
        num_threads = xsize * ysize * nprocs;
    }
#endif

    if (num_threads > MAX_THREADS) {
        printf("*** Error: The maximum number of threads is %d, i.e. less than the current number of threads.\n", MAX_THREADS);
        printf("Please recompile with a bigger MAX_THREADS value.\n");
        exit(1);
    }

    MCA_VERBOSE1(printf("Parameters:\n"));
    MCA_VERBOSE1(printf("- Number of threads: %d\n", num_threads));
    MCA_VERBOSE1(printf("- Input file: %s\n", infile));
    MCA_VERBOSE1(printf("- Output file: %s\n", outfile));
#if FAST
    MCA_VERBOSE1(printf("- Using decision trees (fast): yes\n"));
#elif SLOW
    MCA_VERBOSE1(printf("- Using decision trees (fast): no\n"));
#endif
#if FEATURES
    MCA_VERBOSE1(printf("- Computing features: yes\n"));
#else
    MCA_VERBOSE1(printf("- Computing features: no\n"));
#endif
#if PARMERGE
    MCA_VERBOSE1(printf("- Parallel Merge: yes\n"));
#else
    MCA_VERBOSE1(printf("- Parallel Merge: no\n"));
#endif
#if ARSP
    MCA_VERBOSE1(printf("- Optimization ARemSP: yes\n"));
#else
    MCA_VERBOSE1(printf("- Optimization ARemSP: no\n"));
#endif
#if PYR_BARRIERS
    MCA_VERBOSE1(printf("- Pyramidal Barriers: yes\n"));
#else
    MCA_VERBOSE1(printf("- Pyramidal Barriers: no\n"));
#endif


#if TARGET_OS == GIETVM
    giet_tty_alloc(1);
    MCA_VERBOSE1(printf("Initializing heaps... "));
    for (int i = 0; i < X_SIZE; i++) {
        for (int j = 0; j < X_SIZE; j++) {
            heap_init(i, j);
        }
    }
    MCA_VERBOSE1(printf("done.\n"));
#endif

    pthread_mutex_init(&print_lock, PTHREAD_PROCESS_PRIVATE);
    main_test_mca(num_threads, infile, outfile);

    return 0;
}

// Local Variables:
// tab-width: 4
// c-basic-offset: 4
// c-file-offsets:((innamespace . 0)(inline-open . 0))
// indent-tabs-mode: nil
// End:

// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4

