[718] | 1 | /////////////////////////////////////////////////////////////////////////////// |
---|
| 2 | // File : sort.c |
---|
| 3 | // Date : November 2013 |
---|
| 4 | // Author : Cesar Fuguet Tortolero <cesar.fuguet-tortolero@lip6.fr> |
---|
| 5 | /////////////////////////////////////////////////////////////////////////////// |
---|
| 6 | // This multi-threaded application implement a multi-stage sort application. |
---|
| 7 | // The various stages are separated by synchronisation barriers. |
---|
| 8 | // There is one thread per physical processors. Computation is organised as |
---|
| 9 | // a binary tree: All threads contribute to the first stage of parallel sort |
---|
| 10 | // but, the number of participating threads is divided by 2 at each next stage. |
---|
| 11 | // Number_of_stages = number of barriers = log2(Number_of_threads) |
---|
| 12 | // |
---|
| 13 | // Constraints : |
---|
| 14 | // - It supports up to 1024 processors and the number of processors |
---|
| 15 | // must be a power of 2. |
---|
| 16 | // _ The array of values to be sorted (ARRAY_LENGTH) must be power of 2 |
---|
| 17 | // larger than the number of processors. |
---|
| 18 | // - This application uses a single TTY terminal, shared by all threads, |
---|
| 19 | // that is protectted by an user-level SQT lock. |
---|
| 20 | /////////////////////////////////////////////////////////////////////////////// |
---|
| 21 | |
---|
| 22 | #include "stdio.h" |
---|
| 23 | #include "mapping_info.h" |
---|
| 24 | #include "user_barrier.h" |
---|
| 25 | #include "user_lock.h" |
---|
| 26 | |
---|
| 27 | #define ARRAY_LENGTH 0x400 |
---|
| 28 | #define VERBOSE 0 |
---|
| 29 | |
---|
| 30 | // macro to use a shared TTY |
---|
| 31 | #define printf(...); { lock_acquire( &tty_lock ); \ |
---|
| 32 | giet_tty_printf(__VA_ARGS__); \ |
---|
| 33 | lock_release( &tty_lock ); } |
---|
| 34 | |
---|
| 35 | // argument for the sort() function |
---|
| 36 | typedef struct |
---|
| 37 | { |
---|
| 38 | unsigned int threads; // number of threads (one per core |
---|
| 39 | unsigned int index; // user defined thread index |
---|
| 40 | } args_t; |
---|
| 41 | |
---|
| 42 | ////////////////////////////////////////// |
---|
| 43 | // Global variables |
---|
| 44 | ////////////////////////////////////////// |
---|
| 45 | |
---|
| 46 | int array0[ARRAY_LENGTH]; |
---|
| 47 | int array1[ARRAY_LENGTH]; |
---|
| 48 | |
---|
| 49 | giet_barrier_t barrier[10]; |
---|
| 50 | |
---|
| 51 | user_lock_t tty_lock; |
---|
| 52 | |
---|
| 53 | |
---|
| 54 | //////////////////////////////////// |
---|
| 55 | void bubbleSort( int * array, |
---|
| 56 | unsigned int length, |
---|
| 57 | unsigned int init_pos ) |
---|
| 58 | { |
---|
| 59 | int i; |
---|
| 60 | int j; |
---|
| 61 | int aux; |
---|
| 62 | |
---|
| 63 | for(i = 0; i < length; i++) |
---|
| 64 | { |
---|
| 65 | for(j = init_pos; j < (init_pos + length - i - 1); j++) |
---|
| 66 | { |
---|
| 67 | if(array[j] > array[j + 1]) |
---|
| 68 | { |
---|
| 69 | aux = array[j + 1]; |
---|
| 70 | array[j + 1] = array[j]; |
---|
| 71 | array[j] = aux; |
---|
| 72 | } |
---|
| 73 | } |
---|
| 74 | } |
---|
| 75 | } // end bubbleSort() |
---|
| 76 | |
---|
| 77 | |
---|
| 78 | ///////////////////////// |
---|
| 79 | void merge( int * array, |
---|
| 80 | int * result, |
---|
| 81 | int length, |
---|
| 82 | int init_pos_a, |
---|
| 83 | int init_pos_b, |
---|
| 84 | int init_pos_result ) |
---|
| 85 | { |
---|
| 86 | int i; |
---|
| 87 | int j; |
---|
| 88 | int k; |
---|
| 89 | |
---|
| 90 | i = 0; |
---|
| 91 | j = 0; |
---|
| 92 | k = init_pos_result; |
---|
| 93 | |
---|
| 94 | while((i < length) || (j < length)) |
---|
| 95 | { |
---|
| 96 | if((i < length) && (j < length)) |
---|
| 97 | { |
---|
| 98 | if(array[init_pos_a + i] < array[init_pos_b + j]) |
---|
| 99 | { |
---|
| 100 | result[k++] = array[init_pos_a + i]; |
---|
| 101 | i++; |
---|
| 102 | } |
---|
| 103 | else |
---|
| 104 | { |
---|
| 105 | result[k++] = array[init_pos_b + j]; |
---|
| 106 | j++; |
---|
| 107 | } |
---|
| 108 | } |
---|
| 109 | else if(i < length) |
---|
| 110 | { |
---|
| 111 | result[k++] = array[init_pos_a + i]; |
---|
| 112 | i++; |
---|
| 113 | } |
---|
| 114 | else |
---|
| 115 | { |
---|
| 116 | result[k++] = array[init_pos_b + j]; |
---|
| 117 | j++; |
---|
| 118 | } |
---|
| 119 | } |
---|
| 120 | } // end merge() |
---|
| 121 | |
---|
| 122 | |
---|
| 123 | /////////////////////////////////////////////////////////////////// |
---|
| 124 | __attribute__ ((constructor)) void sort( args_t* ptr ) |
---|
| 125 | /////////////////////////////////////////////////////////////////// |
---|
| 126 | { |
---|
| 127 | int * src_array = NULL; |
---|
| 128 | int * dst_array = NULL; |
---|
| 129 | |
---|
| 130 | unsigned int thread_id = ptr->index; |
---|
| 131 | unsigned int threads = ptr->threads; |
---|
| 132 | unsigned int items = ARRAY_LENGTH / threads; |
---|
| 133 | unsigned int stages = __builtin_ctz( threads ); |
---|
| 134 | unsigned int i; |
---|
| 135 | |
---|
| 136 | // all threads contribute to the first stage of parallel sort |
---|
| 137 | printf("[SORT] Thread %d / Stage 0: Sorting...\n\r", thread_id ); |
---|
| 138 | |
---|
| 139 | bubbleSort( array0, items, items * thread_id ); |
---|
| 140 | |
---|
| 141 | printf("[SORT] Thread %d / Stage 0: Completed\n\r", thread_id); |
---|
| 142 | |
---|
| 143 | // the number of threads is divided by 2 at each next stage |
---|
| 144 | for ( i = 0 ; i < stages ; i++ ) |
---|
| 145 | { |
---|
| 146 | barrier_wait( &barrier[i] ); |
---|
| 147 | |
---|
| 148 | if((thread_id % (2 << i)) != 0) giet_pthread_exit("Completed"); |
---|
| 149 | |
---|
| 150 | printf("[SORT] Thread %d / Stage %d: Sorting...\n\r", thread_id, i+1); |
---|
| 151 | |
---|
| 152 | if((i % 2) == 0) // even stage |
---|
| 153 | { |
---|
| 154 | src_array = &array0[0]; |
---|
| 155 | dst_array = &array1[0]; |
---|
| 156 | } |
---|
| 157 | else // odd stage |
---|
| 158 | { |
---|
| 159 | src_array = &array1[0]; |
---|
| 160 | dst_array = &array0[0]; |
---|
| 161 | } |
---|
| 162 | |
---|
| 163 | merge( src_array, |
---|
| 164 | dst_array, |
---|
| 165 | items << i, |
---|
| 166 | items * thread_id, |
---|
| 167 | items * (thread_id + (1 << i)), |
---|
| 168 | items * thread_id ); |
---|
| 169 | |
---|
| 170 | printf("[SORT] Thread %d / Stage %d: Completed\n\r", thread_id, i+1); |
---|
| 171 | } |
---|
| 172 | |
---|
| 173 | |
---|
| 174 | } // end sort() |
---|
| 175 | |
---|
| 176 | |
---|
| 177 | ////////////////////////////////////////// |
---|
| 178 | __attribute__ ((constructor)) void main() |
---|
| 179 | ////////////////////////////////////////// |
---|
| 180 | { |
---|
| 181 | unsigned int x_size; // number of rows |
---|
| 182 | unsigned int y_size; // number of columns |
---|
| 183 | unsigned int nprocs; // number of procs per cluster |
---|
| 184 | unsigned int threads; // total number of threads |
---|
| 185 | unsigned int n; // index for loops |
---|
| 186 | |
---|
| 187 | args_t arg[1024]; // array of arguments for sort() |
---|
| 188 | |
---|
| 189 | // compute number of threads (one thread per proc) |
---|
| 190 | giet_procs_number( &x_size , &y_size , &nprocs ); |
---|
| 191 | threads = x_size * y_size * nprocs; |
---|
| 192 | |
---|
| 193 | // alloc a shared TTY used by all threads |
---|
| 194 | giet_tty_alloc(1); |
---|
| 195 | lock_init( &tty_lock ); |
---|
| 196 | |
---|
| 197 | // checks number of threads |
---|
| 198 | if ( (threads != 1) && (threads != 2) && (threads != 4) && |
---|
| 199 | (threads != 8) && (threads != 16 ) && (threads != 32) && |
---|
| 200 | (threads != 64) && (threads != 128) && (threads != 256) && |
---|
| 201 | (threads != 512) && (threads != 1024) ) |
---|
| 202 | { |
---|
| 203 | giet_pthread_exit("[SORT ERROR] : number of cores must be power of 2\n"); |
---|
| 204 | } |
---|
| 205 | |
---|
| 206 | // check array size |
---|
| 207 | if ( ARRAY_LENGTH % threads) |
---|
| 208 | { |
---|
| 209 | giet_pthread_exit("[SORT ERROR] : array size must be multiple of number of cores\n"); |
---|
| 210 | } |
---|
| 211 | |
---|
| 212 | // Barriers initialization (number of participants divided by 2 at each stage) |
---|
| 213 | for (n = 0; n < __builtin_ctz( threads ); n++) |
---|
| 214 | { |
---|
| 215 | barrier_init( &barrier[n], threads >> n ); |
---|
| 216 | } |
---|
| 217 | |
---|
| 218 | // Array to sort initialization |
---|
| 219 | for ( n = 0 ; n < ARRAY_LENGTH ; n++ ) |
---|
| 220 | { |
---|
| 221 | array0[n] = giet_rand(); |
---|
| 222 | |
---|
| 223 | #if VERBOSE |
---|
| 224 | printf("array[%d] = %d\n", n , array0[n] ); |
---|
| 225 | #endif |
---|
| 226 | |
---|
| 227 | } |
---|
| 228 | |
---|
| 229 | printf("\n[SORT] main completes initialisation at cycle %d / %d threads\n", |
---|
| 230 | giet_proctime() , threads ); |
---|
| 231 | |
---|
| 232 | // launch other threads to run sort() function |
---|
| 233 | pthread_t trdid; |
---|
| 234 | for ( n = 1 ; n < threads ; n++ ) |
---|
| 235 | { |
---|
| 236 | arg[n].index = n; |
---|
| 237 | arg[n].threads = threads; |
---|
| 238 | if ( giet_pthread_create( &trdid, // not used because no join |
---|
| 239 | NULL, // no attribute |
---|
| 240 | &sort, |
---|
| 241 | &arg[n] ) ) // pointer on sort arguments |
---|
| 242 | { |
---|
| 243 | printf("\n[SORT ERROR] creating thread %d\n", n ); |
---|
| 244 | giet_pthread_exit( NULL ); |
---|
| 245 | } |
---|
| 246 | } |
---|
| 247 | |
---|
| 248 | // main run also the sort() function |
---|
| 249 | arg[0].index = 0; |
---|
| 250 | arg[0].threads = threads; |
---|
| 251 | sort( &arg[0] ); |
---|
| 252 | |
---|
| 253 | // Check result |
---|
| 254 | int success = 1; |
---|
| 255 | int* res_array = ( (threads== 2) || |
---|
| 256 | (threads== 8) || |
---|
| 257 | (threads== 32) || |
---|
| 258 | (threads==128) || |
---|
| 259 | (threads==512) ) ? array1 : array0; |
---|
| 260 | |
---|
| 261 | for( n=0 ; n<(ARRAY_LENGTH-1) ; n++ ) |
---|
| 262 | { |
---|
| 263 | if ( res_array[n] > res_array[n+1] ) |
---|
| 264 | { |
---|
| 265 | success = 0; |
---|
| 266 | break; |
---|
| 267 | } |
---|
| 268 | } |
---|
| 269 | |
---|
| 270 | #if VERBOSE |
---|
| 271 | for( n=0; n<ARRAY_LENGTH; n++) |
---|
| 272 | { |
---|
| 273 | printf("array[%d] = %d\n", n , res_array[n] ); |
---|
| 274 | } |
---|
| 275 | #endif |
---|
| 276 | |
---|
| 277 | if ( success ) |
---|
| 278 | { |
---|
| 279 | printf("[SORT] Main completes at cycle %d : success\n", giet_proctime() ); |
---|
| 280 | giet_pthread_exit("Success"); |
---|
| 281 | } |
---|
| 282 | else |
---|
| 283 | { |
---|
| 284 | printf("[SORT] Main completes at cycle %d : failure\n", giet_proctime() ); |
---|
| 285 | giet_pthread_exit("Failure"); |
---|
| 286 | } |
---|
| 287 | |
---|
| 288 | } // end main() |
---|
| 289 | |
---|
| 290 | |
---|
| 291 | /* vim: tabstop=4 : shiftwidth=4 : expandtab |
---|
| 292 | */ |
---|