#include "system.h"
#include "stdio.h"
#include "stdlib.h"
#include "matrice.h"

#include "../segmentation.h"

#define NPROCS 16
#define SIZE 1000 
#define SORT_TYPE 0

/* DMA mapped registers offset */
#define DMA_SRC_REG          0
#define DMA_DST_REG          1
#define DMA_LEN_REG          2
#define DMA_RESET_REG        3
#define DMA_IRQ_DISABLED     4

uint32_t lock=0;
volatile int compteur=NPROCS;

volatile int nprocs=NPROCS;

unsigned int SortArr0[NPROCS*(SIZE+200)];
//unsigned int SortArr0[4*4*SIZE];
unsigned char DmaSourceArray[NPROCS*512]; 
unsigned char DmaDestArray[NPROCS*512]; 

void SORT(unsigned int *base, unsigned int n, int type);
void insertion_sort(unsigned int *base, unsigned int n); // type 2
void selection_sort(unsigned int *base, unsigned int n); // type 1
void bubble_sort(unsigned int *base, unsigned int n);    // type 3
void shellSortPhase(unsigned int a[],unsigned int length, int gap);
void shellSort(unsigned int *base, unsigned int n);     // type 0

void dma_memcpy(unsigned char *source,unsigned char *dest,unsigned int length);
void end_simulation_error(unsigned int restval);
void end_simulation_good();

int main()
{
  register int p; 

  int beg_cycle, end_cycle;

  beg_cycle = cpu_cycles();
   
  p=procnum();

  puts("Hello from processor ");
  puti(p);
  putchar('\n');

  int i;
  int j;
  unsigned int* SortArray;
   
  if(p >= NPROCS){
        while(1) {
            asm volatile("nop");
        }
  }

//*------------------------------------
  for(i=0;i<4;i++){
    puts("Memory copy \n");
    SortArray = SortArr0 + p*(SIZE+200);
    memcpy(SortArray, gQSortNum0 + p*SIZE+10*i,SIZE*4);
    puts("Sort... \n");
    SORT((unsigned int *) (SortArray), (unsigned int) SIZE, i);

    for (j = 1; j < SIZE; j++)
    {
      if (SortArray[j] < SortArray[j-1])
      {
        puts("ucbqsort: failed\n");
        end_simulation_error((p+1)*i);
      }

    }

    puts("ucbqsort: success\n");
    end_cycle = cpu_cycles();
    printf( "nombre cycles cpu : %i\n", end_cycle-beg_cycle);
    beg_cycle = end_cycle;
  }


//
 if(!p){
    puts("Create the DMA source array");
    unsigned int *isptr = gQSortNum0;
    unsigned char *cdptr = DmaSourceArray;
    int k;
    for(k=0; k<512*NPROCS;k++){
      *cdptr = (char) *isptr;
      cdptr++;
      isptr++;
      //puti(k); // added for debug
      putchar('\n'); 
    }
  }


  puts("Decrement the counter");
  int wait=0;
  if(p+1 <= nprocs){
    lock_lock(&lock);
    compteur--;
    putchar('\n');
    puti(compteur);
    putchar('\n');
    if(compteur)
      wait=1;
    lock_unlock(&lock);
  }

  puts("Waiting for barrier");
  while(compteur&&wait);
  puts("Barrier released");

  unsigned int m_lfsr = -1;
  for(i=0; i<1000;i++)
    m_lfsr = (m_lfsr >> 1) ^ ((-(m_lfsr & 1)) & 0xd0000001);
  for(i=0; i<200 ; i++){  
    m_lfsr = (m_lfsr >> 1) ^ ((-(m_lfsr & 1)) & 0xd0000001);
    unsigned int length = m_lfsr % (256);
    m_lfsr = (m_lfsr >> 1) ^ ((-(m_lfsr & 1)) & 0xd0000001);
    unsigned int src_offset = m_lfsr %(128);
    m_lfsr = (m_lfsr >> 1) ^ ((-(m_lfsr & 1)) & 0xd0000001);
    unsigned int dest_offset = m_lfsr %(128);
    unsigned int source_sum=0;
    unsigned int dest_sum=0;
    for(j=0;j<length;j++){
      source_sum+=(unsigned int)DmaSourceArray[512*p+j+src_offset];
    }
    for(j=0;j<512;j++){
      DmaDestArray[p*512+j] = 0;
    }
    lock_lock(&lock);
    puts("Copying with the DMA... ");
    dma_memcpy(DmaSourceArray+src_offset+512*p,DmaDestArray+dest_offset+512*p,length);
    lock_unlock(&lock);
    for(j=0;j<512;j++){
      dest_sum+=(unsigned int)DmaDestArray[512*p+j];
    }
    if(source_sum!=dest_sum){
    /*
      puts("Source sum, Dest sum :");
      puti(source_sum);
      putchar('\n');
      puti(dest_sum);
      putchar('\n');
      puts("Source addr, Dest addr :");
      puti(DmaSourceArray+src_offset+512*p);
      putchar('\n');
      puti(DmaDestArray+dest_offset+512*p);
      putchar('\n');
      puts("Length :");
      puti(length);
      putchar('\n');
    */
      end_simulation_error((p+1)*5);
    }
    puts("done !");
  }
  if(p+1 <= nprocs){
    lock_lock(&lock);
    compteur++;
    putchar('\n');
    puti(compteur);
    putchar('\n');
    if(compteur != NPROCS)
      wait=1;
    lock_unlock(&lock);
  }

  while((compteur!=NPROCS) && wait);

  end_simulation_good();
}

void dma_memcpy(unsigned char *source,unsigned char *dest,unsigned int length){
  int i;
  volatile unsigned int *dma_base = (volatile unsigned int*) DMA_BASE;
  *(dma_base + DMA_RESET_REG) = 0;
  *(dma_base + DMA_SRC_REG) = (unsigned int) source;
  *(dma_base + DMA_DST_REG) = (unsigned int) dest;
  *(dma_base + DMA_IRQ_DISABLED) = 1;
  *(dma_base + DMA_LEN_REG) = length;
  do{
    for(i=0 ; i<300; i++){
      asm volatile("nop");
    } 
  }while( *(dma_base + DMA_LEN_REG)!= 0 );

}

void end_simulation_good(){
    puts("\n Simulation Good !!!!!!!!");
    while(1);	
//  return;
}

void end_simulation_error(unsigned int retval){
    puts("\n Simulation error ********");
    while(1);
}

//---- insertion sort : non adapt pour tableaux de grande taille (> 100) --
void insertion_sort(unsigned int *base, unsigned int n) 
{
  /* Spcifications externes : Tri du tableau base par insertion squentielle */
  int i,p,j;
  int x;

  puts("Insertion Sort\n");

  for (i = 1; i < n; i++) 
  {

    putchar('-'); // added for debug

    /* stockage de la valeur en i */
    x = base[i]; 

    /* recherche du plus petit indice p infrieur  i tel que base[p] >= base[i] */
    for(p = 0; base[p] < x; p++);
    /* p pointe une valeur de base suprieure  celle en i */ 

    /* dcalage avant des valeurs de base entre p et i */         
    for (j = i-1; j >= p; j--) {
      base[j+1] = base[j]; 
    }   

    base[p] = x; /* insertion de la valeur stocke  la place vacante */

    putchar('+'); // added for debug

  }
}

//------ simple_sort -------------------------------
void selection_sort(unsigned int *base, unsigned int n)
{
  int i, min, j , x;
  puts("Selection Sort\n");

  for(i = 0 ; i < n - 1 ; i++)
  {

    putchar('-'); // added for debug

    min = i;


    for(j = i+1 ; j < n ; j++)
    {

      if(base[j] < base[min])
        min = j;

         }

         if(min != i)
         {
             x = base[i];
             base[i] = base[min];
             base[min] = x;
         }

         putchar('+'); // added for debug

     }
}
//-------------------------------
void bubble_sort(unsigned int *base, unsigned int n)
{
        int i   = 0; /* Indice de rptition du tri */
 	int j   = 0; /* Variable de boucle */
 	int tmp = 0; /* Variable de stockage temporaire */
        int en_desordre = 1; /* Boolen marquant l'arrt du tri si le tableau est ordonn */

        puts("Bubble Sort\n");

	/* Boucle de rptition du tri et le test qui arrte le tri ds que le tableau est ordonn */
	for(i = 0 ; (i < n) && en_desordre; i++)
	{
                putchar('-'); // added for debug

		/* Supposons le tableau ordonn */
		en_desordre = 0;
		/* Vrification des lments des places j et j-1 */
		for(j = 1 ; j < n - i ; j++)
		{
			/* Si les 2 lments sont mal tris */
			if(base[j] < base[j-1])
			{
				/* Inversion des 2 lments */
 				tmp = base[j-1];
 				base[j-1] = base[j];
 				base[j] = tmp;
 
 				/* Le tableau n'est toujours pas tri */
				en_desordre = 1;
 			}
		}

                putchar('+'); // added for debug
	}

}
//------------------------------------------------------
/*
 * Excute un tri par insertion avec la sparation donne
 * If gap == 1, on fait un tri ordinaire.
 * If gap >= length, on ne fait rien.
 */
void shellSortPhase(unsigned int a[],unsigned int length, int gap) {
    int i;
 
    puti(gap);
    for (i = gap; i < length; ++i) {
        unsigned int value = a[i];
        int j;
        for (j = i - gap; j >= 0 && a[j] > value; j -= gap) {
	    putchar('+');
            a[j + gap] = a[j];
	    putchar('-');
        }
        a[j + gap] = value;
    }
}
 
void shellSort(unsigned int *base, unsigned int n) {
    /*
     * gaps[] doit approximer une Srie gomtrique.
     * La sequence suivante est la meilleure connue en terme
     * de nombre moyen de comparaisons. voir:
     * http://www.research.att.com/~njas/sequences/A102549
     */
    static const int gaps[] = {
        1, 4, 10, 23, 57, 132, 301, 701
    };
    int sizeIndex;
 
    puts("Shell Sort\n");
    for (sizeIndex = sizeof(gaps)/sizeof(gaps[0]) - 1;
               sizeIndex >= 0;
               --sizeIndex)
        shellSortPhase(base, n, gaps[sizeIndex]);
}

//-------------------------------------*/
void SORT(unsigned int *base, unsigned int n, int type)
{
  switch(type)
  {
  case 0:
    shellSort(base, n);
    break;
  case 1:
    selection_sort(base, n);
    break;
  case 2:
    insertion_sort(base, n);
    break;
  case 3:
    bubble_sort(base, n);
    break;
  default:
    break;
  }
}

