#include "benchmark.h"
#include "system.h"
#include "stdlib.h"
#include "stdio.h"
#include "stdint.h"
#include "../matrix_multiplication/matrix_multiplication.h"
#include "../common/common.h"

static uint32_t matrix_lock;

void benchmark_matrix_malloc (int ** matrix_a,
                              int ** matrix_b,
                              int ** matrix_d,
                              int    size)
{
  int i;
  
  for (i=0; i<size; ++i)
    {
      matrix_a[i] = (int*) malloc(size*sizeof(int));
      matrix_b[i] = (int*) malloc(size*sizeof(int));
      matrix_d[i] = (int*) malloc(size*sizeof(int));
    }
}

void benchmark_matrix_free (int ** matrix_a,
                            int ** matrix_b,
                            int ** matrix_d,
                            int    size)
{
  int i;
  
  for (i=0; i<size; ++i)
    {
      free(matrix_a[i]);
      free(matrix_b[i]);
      free(matrix_d[i]);
    }
}

void benchmark_matrix_init (int ** matrix_a,
                            int ** matrix_b,
                            int    size)
{
  int i,j,x;

  x = 0;
  for (i=0; i<size; ++i)
    for (j=0; j<size; ++j)
      {
        matrix_a [i][j] = x;
        matrix_b [j][i] = x;
        x++;
      }
}

void benchmark_matrix_validation (int ** matrix_d,
                                  int    size)
{
  int i,j,x;
  int error = 0;

  for (i=0; (i<size)&&(error==0); ++i)
    for (j=0; (j<size)&&(error==0); ++j)
      {
        int val = 0;
        for (x=0; x<size; ++x)
          val += (i*size+x)*(j*size+x);
        
        if (matrix_d[i][j] != val)
          {
            error = 1;
            printf("KO !!! (matrix_d[%d][%d] = %d (!= %d))\n",i,j,matrix_d[i][j],val);
          }
      }

  if (error == 0)
    printf("OK\n");
}

int _benchmark_matrix_multiplication_st (unsigned int size)
{
  printf("\n");
  printf("================================\n");
  printf("Benchmark Matrix Multiplication (ST)\n");
  printf("================================\n");
  printf("\n");

  printf(" * Size : %d\n",size);
  
  int cycle_begin, result;

  result = 0;

  printf(" * Malloc...\n");

  lock_lock(&matrix_lock);

  int ** matrix_local_a = (int**) malloc(size*sizeof(int*));
  int ** matrix_local_b = (int**) malloc(size*sizeof(int*));
  int ** matrix_local_d = (int**) malloc(size*sizeof(int*));

  benchmark_matrix_malloc (matrix_local_a,
                           matrix_local_b,
                           matrix_local_d,
                           size);
  lock_unlock(&matrix_lock);
  
  printf(" * Init...\n");

  benchmark_matrix_init (matrix_local_a,
                         matrix_local_b,
                         size);
  
  /* matrix_multiplication_print(matrix_local_a, size); */
  /* matrix_multiplication_print(matrix_local_b, size); */
 
  printf(" * Matrix multiplication (size : %d)... \n",size);

  cycle_begin = cpu_cycles();
  matrix_multiplication_st(matrix_local_a,
                           matrix_local_b,
                           matrix_local_d,
                           size);
  result += cpu_cycles()-cycle_begin;

  /* matrix_multiplication_print(matrix_local_d, size); */
  
  printf(" * Verification... ");
  
  benchmark_matrix_validation (matrix_local_d, size);
  
  printf(" * Free...\n");
  
  benchmark_matrix_free (matrix_local_a,
                         matrix_local_b,
                         matrix_local_d,
                         size);
  
  free(matrix_local_a);
  free(matrix_local_b);
  free(matrix_local_d);

  return result;
}

static int ** matrix_global_a;
static int ** matrix_global_b;
static int ** matrix_global_d;

static int matrix_nb_thread_start;
static int matrix_nb_thread_stop;
static int matrix_end;

int _benchmark_matrix_multiplication_mt (int size, int lock_by_line)
{
  printf("\n");
  printf("================================\n");
  printf("Benchmark Matrix Multiplication (MT)\n");
  printf("================================\n");
  printf("\n");
  
  printf(" * Size         : %d\n",size);
  printf(" * Lock by line : %d\n",lock_by_line);
  
  int cycle_begin, result;

  result = 0;

  lock_lock(&matrix_lock);

  if (matrix_end != 0)
    {
      printf("Benchmark is already finished\n");

      lock_unlock(&matrix_lock);
      
      return 0;
    }
      
  printf(" * Start number %d\n",matrix_nb_thread_start);
  
  // first thread
  if (matrix_nb_thread_start==0)
    {
      printf(" * Malloc...\n");

      matrix_global_a = (int**) malloc(size*sizeof(int*));
      matrix_global_b = (int**) malloc(size*sizeof(int*));
      matrix_global_d = (int**) malloc(size*sizeof(int*));

      benchmark_matrix_malloc (matrix_global_a,
                               matrix_global_b,
                               matrix_global_d,
                               size);
           
      printf(" * Init...\n");

      benchmark_matrix_init (matrix_global_a,
                             matrix_global_b,
                             size);
    }

  matrix_nb_thread_start ++;

  lock_unlock(&matrix_lock);
  
  /* matrix_multiplication_print(matrix_global_a, size); */
  /* matrix_multiplication_print(matrix_global_b, size); */
 
  printf(" * Matrix multiplication (size : %d)... \n",size);

  cycle_begin = cpu_cycles();
  matrix_multiplication_mt(matrix_global_a,
                           matrix_global_b,
                           matrix_global_d,
                           size,
                           lock_by_line);
  result += cpu_cycles()-cycle_begin;

  /* matrix_multiplication_print(matrix_global_d, size); */

  lock_lock(&matrix_lock);

  printf(" * Stop number %d\n",matrix_nb_thread_stop);
  
  matrix_nb_thread_stop ++;
    
  // first thread
  if (matrix_nb_thread_stop == matrix_nb_thread_start)
    {
      printf(" * Verification... ");

      benchmark_matrix_validation (matrix_global_d, size);
      
      printf(" * Free...\n");
   
      benchmark_matrix_free (matrix_global_a,
                             matrix_global_b,
                             matrix_global_d,
                             size);
      
      free(matrix_global_a);
      free(matrix_global_b);
      free(matrix_global_d);

      matrix_end = 1;
    }

  lock_unlock(&matrix_lock);
  
  return result;
}

int benchmark_matrix_multiplication_st (void) { return _benchmark_matrix_multiplication_st (MATRIX_MULTIPLICATION_ST_SIZE);}
int benchmark_matrix_multiplication_mt (void) { return _benchmark_matrix_multiplication_mt (MATRIX_MULTIPLICATION_MT_SIZE,MATRIX_MULTIPLICATION_MT_LOCK_BY_LINE);}


