/////////////////////////////////////////////////////////////////////////////// // File : sort.c // Date : November 2013 // Author : Cesar Fuguet Tortolero /////////////////////////////////////////////////////////////////////////////// // This multi-threaded application implement a multi-stage sort application. // The various stages are separated by synchronisation barriers. // There is one thread per physical processors. Computation is organised as // a binary tree: All threads contribute to the first stage of parallel sort // but, the number of participating threads is divided by 2 at each next stage. // Number_of_stages = number of barriers = log2(Number_of_threads) // // Constraints : // - It supports up to 1024 processors and the number of processors // must be a power of 2. // _ The array of values to be sorted (ARRAY_LENGTH) must be power of 2 // larger than the number of processors. // - This application uses a single TTY terminal, shared by all threads, // that is protectted by an user-level SQT lock. /////////////////////////////////////////////////////////////////////////////// #include "stdio.h" #include "mapping_info.h" #include "user_barrier.h" #include "user_lock.h" #define ARRAY_LENGTH 0x400 #define VERBOSE 0 // macro to use a shared TTY #define printf(...); { lock_acquire( &tty_lock ); \ giet_tty_printf(__VA_ARGS__); \ lock_release( &tty_lock ); } // argument for the sort() function typedef struct { unsigned int threads; // number of threads (one per core unsigned int index; // user defined thread index } args_t; ////////////////////////////////////////// // Global variables ////////////////////////////////////////// int array0[ARRAY_LENGTH]; int array1[ARRAY_LENGTH]; giet_barrier_t barrier[10]; user_lock_t tty_lock; //////////////////////////////////// void bubbleSort( int * array, unsigned int length, unsigned int init_pos ) { int i; int j; int aux; for(i = 0; i < length; i++) { for(j = init_pos; j < (init_pos + length - i - 1); j++) { if(array[j] > array[j + 1]) { aux = array[j + 1]; array[j + 1] = array[j]; array[j] = aux; } } } } // end bubbleSort() ///////////////////////// void merge( int * array, int * result, int length, int init_pos_a, int init_pos_b, int init_pos_result ) { int i; int j; int k; i = 0; j = 0; k = init_pos_result; while((i < length) || (j < length)) { if((i < length) && (j < length)) { if(array[init_pos_a + i] < array[init_pos_b + j]) { result[k++] = array[init_pos_a + i]; i++; } else { result[k++] = array[init_pos_b + j]; j++; } } else if(i < length) { result[k++] = array[init_pos_a + i]; i++; } else { result[k++] = array[init_pos_b + j]; j++; } } } // end merge() /////////////////////////////////////////////////////////////////// __attribute__ ((constructor)) void sort( args_t* ptr ) /////////////////////////////////////////////////////////////////// { int * src_array = NULL; int * dst_array = NULL; unsigned int thread_id = ptr->index; unsigned int threads = ptr->threads; unsigned int items = ARRAY_LENGTH / threads; unsigned int stages = __builtin_ctz( threads ); unsigned int i; // all threads contribute to the first stage of parallel sort printf("[SORT] Thread %d / Stage 0: Sorting...\n\r", thread_id ); bubbleSort( array0, items, items * thread_id ); printf("[SORT] Thread %d / Stage 0: Completed\n\r", thread_id); // the number of threads is divided by 2 at each next stage for ( i = 0 ; i < stages ; i++ ) { barrier_wait( &barrier[i] ); if((thread_id % (2 << i)) != 0) giet_pthread_exit("Completed"); printf("[SORT] Thread %d / Stage %d: Sorting...\n\r", thread_id, i+1); if((i % 2) == 0) // even stage { src_array = &array0[0]; dst_array = &array1[0]; } else // odd stage { src_array = &array1[0]; dst_array = &array0[0]; } merge( src_array, dst_array, items << i, items * thread_id, items * (thread_id + (1 << i)), items * thread_id ); printf("[SORT] Thread %d / Stage %d: Completed\n\r", thread_id, i+1); } } // end sort() ////////////////////////////////////////// __attribute__ ((constructor)) void main() ////////////////////////////////////////// { unsigned int x_size; // number of rows unsigned int y_size; // number of columns unsigned int nprocs; // number of procs per cluster unsigned int threads; // total number of threads unsigned int n; // index for loops args_t arg[1024]; // array of arguments for sort() // compute number of threads (one thread per proc) giet_procs_number( &x_size , &y_size , &nprocs ); threads = x_size * y_size * nprocs; // alloc a shared TTY used by all threads giet_tty_alloc(1); lock_init( &tty_lock ); // checks number of threads if ( (threads != 1) && (threads != 2) && (threads != 4) && (threads != 8) && (threads != 16 ) && (threads != 32) && (threads != 64) && (threads != 128) && (threads != 256) && (threads != 512) && (threads != 1024) ) { giet_pthread_exit("[SORT ERROR] : number of cores must be power of 2\n"); } // check array size if ( ARRAY_LENGTH % threads) { giet_pthread_exit("[SORT ERROR] : array size must be multiple of number of cores\n"); } // Barriers initialization (number of participants divided by 2 at each stage) for (n = 0; n < __builtin_ctz( threads ); n++) { barrier_init( &barrier[n], threads >> n ); } // Array to sort initialization for ( n = 0 ; n < ARRAY_LENGTH ; n++ ) { array0[n] = giet_rand(); #if VERBOSE printf("array[%d] = %d\n", n , array0[n] ); #endif } printf("\n[SORT] main completes initialisation at cycle %d / %d threads\n", giet_proctime() , threads ); // launch other threads to run sort() function pthread_t trdid; for ( n = 1 ; n < threads ; n++ ) { arg[n].index = n; arg[n].threads = threads; if ( giet_pthread_create( &trdid, // not used because no join NULL, // no attribute &sort, &arg[n] ) ) // pointer on sort arguments { printf("\n[SORT ERROR] creating thread %d\n", n ); giet_pthread_exit( NULL ); } } // main run also the sort() function arg[0].index = 0; arg[0].threads = threads; sort( &arg[0] ); // Check result int success = 1; int* res_array = ( (threads== 2) || (threads== 8) || (threads== 32) || (threads==128) || (threads==512) ) ? array1 : array0; for( n=0 ; n<(ARRAY_LENGTH-1) ; n++ ) { if ( res_array[n] > res_array[n+1] ) { success = 0; break; } } #if VERBOSE for( n=0; n