/////////////////////////////////////////////////////////////////////////////// // File : main.c // Date : November 2013 // Author : Cesar Fuguet Tortolero // // Description : // // Sort application using the GIET-VM OS. This application uses the // barrier routines to apply a sort algorithm in several stages. // // Constraints : // // - It supports up to 1024 processors and the number of processors // must be a power of 2. // // _ The array of values to be sorted (ARRAY_LENGTH) must be power of 2 // larger than the number of processors. // // - This application must be executed on a cache coherent architecture. // /////////////////////////////////////////////////////////////////////////////// #include "stdio.h" #include "mapping_info.h" #include "hard_config.h" #include "user_barrier.h" #define ARRAY_LENGTH 0x1000 #define IPT (ARRAY_LENGTH / threads) // ITEMS PER THREAD //////////////////////////////////////////////////////////////////////////////// // Processors other than 0 display algorithm state if VERBOSE non zero #define VERBOSE 1 //////////////////////////////////////////////////////////////////////////////// // Define printf according to verbosity option and number of available TTY #if (VERBOSE == 1) # define printf(...) giet_shr_printf(__VA_ARGS__) #else # define printf(...) #endif #define task0_printf(...) if(thread_id == 0) giet_shr_printf(__VA_ARGS__) int array0[ARRAY_LENGTH]; int array1[ARRAY_LENGTH]; volatile int init_ok = 0; void bubbleSort( int * array, unsigned int length, unsigned int init_pos); void merge( int * array, int * result, int length, int init_pos_a, int init_pos_b, int init_pos_result); /////////////////////////////////////////////////////// // This application supports at most 1024 processors // Number of barriers = log2(threads) giet_barrier_t barrier[10]; ////////////////////////////////////////// __attribute__ ((constructor)) void sort() { int thread_id = giet_thread_id(); int * src_array = NULL; int * dst_array = NULL; int i; unsigned int time_start = giet_proctime(); unsigned int time_end; // compute number of threads (one thread per proc) unsigned int x_size; unsigned int y_size; unsigned int nprocs; unsigned int threads; giet_procs_number( &x_size , &y_size , &nprocs ); threads = x_size * y_size * nprocs; if ( (threads != 1) && (threads != 2) && (threads != 4) && (threads != 8) && (threads != 16 ) && (threads != 32) && (threads != 64) && (threads != 128) && (threads != 256) && (threads != 512) && (threads != 1024) ) { task0_printf("[SORT ERROR] Number of processors must be power of 2\n" " x_size = %d / y_size = %d / nprocs = %d\n", x_size , y_size , nprocs ); giet_exit("error"); } task0_printf("\n[ Thread 0 ] Starting sort application with %d threads " "at cycle %d\n", threads, time_start); /////////////////////////// // Barriers Initialization if (thread_id == 0) { for (i = 0; i < __builtin_ctz( threads ); i++) { barrier_init(&barrier[i], threads >> i); } init_ok = 1; } else { while(!init_ok); } //////////////////////// // Array Initialization for (i = IPT * thread_id; i < IPT * (thread_id + 1); i++) { array0[i] = giet_rand(); } /////////////////////////////////// // Parallel sort of array elements printf("[ Thread %d ] Stage 0: Sorting...\n\r", thread_id); bubbleSort(array0, IPT, IPT * thread_id); printf("[ Thread %d ] Finishing Stage 0\n\r", thread_id); for (i = 0; i < __builtin_ctz( threads ); i++) { barrier_wait(&barrier[i]); if((thread_id % (2 << i)) != 0) { printf("[ Thread %d ] Quit\n\r", thread_id ); giet_exit("Completed"); } printf("[ Thread %d ] Stage %d: Sorting...\n\r", thread_id, i+1); if((i % 2) == 0) { src_array = &array0[0]; dst_array = &array1[0]; } else { src_array = &array1[0]; dst_array = &array0[0]; } merge(src_array, dst_array , IPT << i , IPT * thread_id , IPT * (thread_id + (1 << i)) , IPT * thread_id ); printf("[ Thread %d ] Finishing Stage %d\n\r", thread_id, i + 1); } int success; int failure_index; ////////////////////////////// // Verify the resulting array if(thread_id != 0) { giet_exit("error: only thread 0 should get here"); } success = 1; for(i=0; i<(ARRAY_LENGTH-1); i++) { if(dst_array[i] > dst_array[i+1]) { success = 0; failure_index = i; break; } } time_end = giet_proctime(); printf("[ Thread 0 ] Finishing sort application at cycle %d\n" "[ Thread 0 ] Time elapsed = %d\n", time_end, (time_end - time_start) ); if (success) { giet_exit("!!! Success !!!"); } else { printf("[ Thread 0 ] Failure!! Incorrect element: %d\n\r", failure_index); for(i=0; i array[j + 1]) { aux = array[j + 1]; array[j + 1] = array[j]; array[j] = aux; } } } } ///////////// void merge( int * array, int * result, int length, int init_pos_a, int init_pos_b, int init_pos_result) { int i; int j; int k; i = 0; j = 0; k = init_pos_result; while((i < length) || (j < length)) { if((i < length) && (j < length)) { if(array[init_pos_a + i] < array[init_pos_b + j]) { result[k++] = array[init_pos_a + i]; i++; } else { result[k++] = array[init_pos_b + j]; j++; } } else if(i < length) { result[k++] = array[init_pos_a + i]; i++; } else { result[k++] = array[init_pos_b + j]; j++; } } } /* vim: tabstop=4 : shiftwidth=4 : expandtab */