///////////////////////////////////////////////////////////////////////////////
// File   :  main.c
// Date   :  November 2013
// Author :  Cesar Fuguet Tortolero <cesar.fuguet-tortolero@lip6.fr>
//
// Description :
//
//      Sort application using the GIET-VM OS. This application uses the
//      barrier routines to apply a sort algorithm in several stages.
//
//      Considerations :
//
//          - It supports up to 256 processors and the number of processors
//            must be a power of 2.
//
//          - If there is only one TTY available, this application uses a spin
//            lock to avoid several threads writting at the same time.
//
//          - This application must be executed on a cache coherent
//            architecture. Otherwise some modifications must be applied
//
//          - The processors executing this application must have a contiguous
//            processor id and the first processor must have id 0.
//
///////////////////////////////////////////////////////////////////////////////

#include "stdio.h"
#include "mapping_info.h"
#include "hard_config.h"
#include "barrier.h"

#define ARRAY_LENGTH    512 
#define IPT             (ARRAY_LENGTH / *nb_thread) // ITEMS PER THREAD

////////////////////////////////////////////////////////////////////////////////
// Processors other than 0 display algorithm state
// The processor 0 always displays some information so this does not affect him 

#define VERBOSE         1

////////////////////////////////////////////////////////////////////////////////
// Define printf according to verbosity option and number of available
// TTY

#if (VERBOSE == 1)
#   define printf(...)     giet_shr_printf(__VA_ARGS__)
#else      
#   define printf(...)
#endif

#define task0_printf(...) if(thread_id == 0) giet_shr_printf(__VA_ARGS__)

#define exit    giet_exit
#define procid  giet_procid
#define rand    giet_rand

int array0[ARRAY_LENGTH];
int array1[ARRAY_LENGTH];

int init_ok = 0;

void bubbleSort(
        int * array,
        unsigned int length,
        unsigned int init_pos);

void merge(
        int * array,
        int * result,
        int length,
        int init_pos_a,
        int init_pos_b,
        int init_pos_result);

///////////////////////////////////////////////////
// This application support at most 256 processors
// Number of barriers = log2(nb_thread)

giet_barrier_t barrier[8];

//////////////////////////////////////////
__attribute__ ((constructor)) void sort()
{
    int thread_id = giet_thread_id();
    unsigned int* nb_thread;
    int * src_array;
    int * dst_array;
    int i;
    
    unsigned int time_start = giet_proctime();
    unsigned int time_end;   

    giet_vobj_get_vbase( "sort" , 
                         "sort_args", 
                         (unsigned int*)&nb_thread );
    
    task0_printf("\n[ Thread 0 ] Starting sort application with %d threads "
                 "at cycle %d\n", *nb_thread, time_start);

    ///////////////////////////
    // Barriers Initialization

    if (thread_id == 0)
    {
        for (i = 0; i < __builtin_ctz(*nb_thread); i++)
        {
            barrier_init(&barrier[i], *nb_thread >> i);
        }

        init_ok = 1;
    }
    else
    {
        while(!init_ok);
    }

    ////////////////////////
    // Array Initialization

    for (i = IPT * thread_id; i < IPT * (thread_id + 1); i++)
    {
        array0[i] = rand();
    }

    ///////////////////////////////////
    // Parallel sort of array elements

    printf("[ Thread %d ] Stage 0: Sorting...\n\r", thread_id);

    bubbleSort(array0, IPT, IPT * thread_id);

    printf("[ Thread %d ] Finishing Stage 0\n\r", thread_id);

    for (i = 0; i < __builtin_ctz(*nb_thread); i++)
    {
        asm volatile ("sync");
        barrier_wait(&barrier[i]);

        if((thread_id % (2 << i)) != 0)
        {
            printf("[ Thread %d ] Quit\n\r", thread_id );
            exit("Completed");
        }

        printf("[ Thread %d ] Stage %d: Sorting...\n\r", thread_id, i+1);

        if((i % 2) == 0)
        {
            src_array = &array0[0];
            dst_array = &array1[0];
        }
        else
        {
            src_array = &array1[0];
            dst_array = &array0[0];
        }

        merge(src_array, dst_array
                , IPT << i
                , IPT * thread_id
                , IPT * (thread_id + (1 << i))
                , IPT * thread_id
                );

        printf("[ Thread %d ] Finishing Stage %d\n\r", thread_id, i + 1);
    }

    int success;
    int failure_index;

    //////////////////////////////
    // Verify the resulting array

    if(thread_id == 0)
    {
        success = 1;

        for(i=0; i<(ARRAY_LENGTH-1); i++)
        {
            if(dst_array[i] > dst_array[i+1])
            {

                success = 0;
                failure_index = i;
                break;
            }
        }

        time_end = giet_proctime();

        printf("[ Thread 0 ] Finishing sort application at cycle %d\n"
               "[ Thread 0 ] Time elapsed = %d\n",
                time_end, (time_end - time_start) );

        if (success)
        {
            exit("!!! Success !!!");
        }
        else
        {
            printf("[ Thread 0 ] Failure!! Incorrect element: %d\n\r", 
                   failure_index);
            for(i=0; i<ARRAY_LENGTH; i++)
            {
                printf("array[%d] = %d\n", i, dst_array[i]);
            }
            exit("!!!  Failure !!!");
        }
    }
    exit("Completed");
}

////////////////////////////////////
void bubbleSort( int *        array,
                 unsigned int length,
                 unsigned int init_pos )
{
    int i;
    int j;
    int aux;

    for(i = 0; i < length; i++)
    {
        for(j = init_pos; j < (init_pos + length - i - 1); j++)
        {
            if(array[j] > array[j + 1])
            {
                aux          = array[j + 1];
                array[j + 1] = array[j];
                array[j]     = aux;
            }
        }
    }
}

/////////////
void merge(
        int * array,
        int * result,
        int length,
        int init_pos_a,
        int init_pos_b,
        int init_pos_result)
{
    int i;
    int j;
    int k;

    i = 0;
    j = 0;
    k = init_pos_result;

    while((i < length) || (j < length))
    {
        if((i < length) && (j < length))
        {
            if(array[init_pos_a + i] < array[init_pos_b + j])
            {
                result[k++] = array[init_pos_a + i];
                i++;
            }
            else
            {
                result[k++] = array[init_pos_b + j];
                j++;
            }
        }
        else if(i < length)
        {
            result[k++] = array[init_pos_a + i];
            i++;
        }
        else
        {
            result[k++] = array[init_pos_b + j];
            j++;
        }
    }
}

/* vim: tabstop=4 : shiftwidth=4 : expandtab
*/
