Changeset 826 for soft/giet_vm/applications/rosenfeld/include
- Timestamp:
- Jul 13, 2017, 11:01:58 AM (7 years ago)
- Location:
- soft/giet_vm/applications/rosenfeld/include
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
soft/giet_vm/applications/rosenfeld/include/clock.h
r823 r826 9 9 #include <x86intrin.h> 10 10 #include <sys/time.h> 11 12 typedef uint64_t cl_size_t; 13 #define MAX_CLOCK_VAL 0xFFFFFFFFFFFFFFFFLU 14 #elif TARGET_OS == GIETVM 15 typedef uint32_t cl_size_t; 16 #define MAX_CLOCK_VAL 0xFFFFFFFF 11 17 #endif 12 18 … … 37 43 38 44 39 static void local_sort_asc( uint64_t tab[], int32_t size) {40 uint64_t tmp;45 static void local_sort_asc(cl_size_t tab[], int32_t size) { 46 cl_size_t tmp; 41 47 int32_t i, j; 42 48 for (i = 0; i < size; i++) { 43 uint64_t min = tab[i];49 cl_size_t min = tab[i]; 44 50 int32_t jmin = i; 45 51 for (j = i + 1; j < size; j++) { … … 57 63 58 64 59 #define CLOCK_DEC uint64_t app_start; \60 uint64_t app_end; \61 uint64_t app_create; \62 uint64_t app_join; \63 uint64_t * thread_start; \64 uint64_t * thread_end; \65 uint64_t * thread_compute_start; \66 uint64_t * thread_compute_end; \65 #define CLOCK_DEC cl_size_t app_start; \ 66 cl_size_t app_end; \ 67 cl_size_t app_create; \ 68 cl_size_t app_join; \ 69 cl_size_t * thread_start; \ 70 cl_size_t * thread_end; \ 71 cl_size_t * thread_compute_start; \ 72 cl_size_t * thread_compute_end; \ 67 73 int32_t step_number; \ 68 74 int32_t clock_thread_num; \ 69 75 int32_t clock_num_runs; \ 70 uint64_t ** thread_start_step; \71 uint64_t ** thread_end_step; \72 uint64_t global_thread_start; \73 uint64_t global_thread_end; \74 uint64_t global_thread_compute_start; \75 uint64_t global_thread_compute_end; \76 uint64_t accumulated_thread_compute; \77 uint64_t * global_thread_start_step; \78 uint64_t * global_thread_end_step; \79 uint64_t * accumulated_thread_step;76 cl_size_t ** thread_start_step; \ 77 cl_size_t ** thread_end_step; \ 78 cl_size_t global_thread_start; \ 79 cl_size_t global_thread_end; \ 80 cl_size_t global_thread_compute_start; \ 81 cl_size_t global_thread_compute_end; \ 82 cl_size_t accumulated_thread_compute; \ 83 cl_size_t * global_thread_start_step; \ 84 cl_size_t * global_thread_end_step; \ 85 cl_size_t * accumulated_thread_step; 80 86 81 87 #if TARGET_OS == GIETVM … … 85 91 struct timeval full_time; \ 86 92 gettimeofday(&full_time, NULL); \ 87 x = ( uint64_t) ((full_time.tv_usec + full_time.tv_sec * 1000000)); \93 x = (cl_size_t) ((full_time.tv_usec + full_time.tv_sec * 1000000)); \ 88 94 }) */ 89 95 #define CLOCK(x) ({ x = __rdtsc(); }) … … 91 97 92 98 // x = number of threads, y = number of steps 93 #define CLOCK_INIT(x, y) ({ \94 clock_thread_num = (x); \95 step_number = (y); \96 clock_num_runs = 0; \97 global_thread_start = 0xFFFFFFFFFFFFFFFFLLU;\98 global_thread_end = 0; \99 global_thread_compute_start = 0xFFFFFFFFFFFFFFFFLLU;\100 global_thread_compute_end = 0; \101 accumulated_thread_compute = 0; \102 if ((x) > 0) { \103 thread_start = ( uint64_t *) malloc(sizeof(uint64_t) * (x)); \104 thread_end = ( uint64_t *) malloc(sizeof(uint64_t) * (x)); \105 thread_compute_start = ( uint64_t *) malloc(sizeof(uint64_t) * (x)); \106 thread_compute_end = ( uint64_t *) malloc(sizeof(uint64_t) * (x)); \107 if ((y) > 0) { \108 global_thread_start_step = ( uint64_t *) malloc(sizeof(uint64_t) * (y)); \109 global_thread_end_step = ( uint64_t *) malloc(sizeof(uint64_t) * (y)); \110 thread_start_step = ( uint64_t **) malloc(sizeof(uint64_t *) * (y)); \111 thread_end_step = ( uint64_t **) malloc(sizeof(uint64_t *) * (y)); \112 accumulated_thread_step = ( uint64_t *) malloc(sizeof(uint64_t) * (y)); \113 for (int32_t j = 0; j < (y); j++) { \114 global_thread_start_step[j] = 0xFFFFFFFFFFFFFFFFLLU;\115 global_thread_end_step[j] = 0; \116 accumulated_thread_step[j] = 0; \117 thread_start_step[j] = ( uint64_t *) malloc(sizeof(uint64_t) * (x)); \118 thread_end_step[j] = ( uint64_t *) malloc(sizeof(uint64_t) * (x)); \119 } \120 } \121 } \99 #define CLOCK_INIT(x, y) ({ \ 100 clock_thread_num = (x); \ 101 step_number = (y); \ 102 clock_num_runs = 0; \ 103 global_thread_start = MAX_CLOCK_VAL; \ 104 global_thread_end = 0; \ 105 global_thread_compute_start = MAX_CLOCK_VAL; \ 106 global_thread_compute_end = 0; \ 107 accumulated_thread_compute = 0; \ 108 if ((x) > 0) { \ 109 thread_start = (cl_size_t *) malloc(sizeof(cl_size_t) * (x)); \ 110 thread_end = (cl_size_t *) malloc(sizeof(cl_size_t) * (x)); \ 111 thread_compute_start = (cl_size_t *) malloc(sizeof(cl_size_t) * (x)); \ 112 thread_compute_end = (cl_size_t *) malloc(sizeof(cl_size_t) * (x)); \ 113 if ((y) > 0) { \ 114 global_thread_start_step = (cl_size_t *) malloc(sizeof(cl_size_t) * (y)); \ 115 global_thread_end_step = (cl_size_t *) malloc(sizeof(cl_size_t) * (y)); \ 116 thread_start_step = (cl_size_t **) malloc(sizeof(cl_size_t *) * (y)); \ 117 thread_end_step = (cl_size_t **) malloc(sizeof(cl_size_t *) * (y)); \ 118 accumulated_thread_step = (cl_size_t *) malloc(sizeof(cl_size_t) * (y)); \ 119 for (int32_t j = 0; j < (y); j++) { \ 120 global_thread_start_step[j] = MAX_CLOCK_VAL; \ 121 global_thread_end_step[j] = 0; \ 122 accumulated_thread_step[j] = 0; \ 123 thread_start_step[j] = (cl_size_t *) malloc(sizeof(cl_size_t) * (x)); \ 124 thread_end_step[j] = (cl_size_t *) malloc(sizeof(cl_size_t) * (x)); \ 125 } \ 126 } \ 127 } \ 122 128 }) 123 129 … … 153 159 for (int32_t j = 0; j < step_number; j++) { \ 154 160 accumulated_thread_step[j] += (global_thread_end_step[j] - global_thread_start_step[j]); \ 155 global_thread_start_step[j] = 0xFFFFFFFFFFFFFFFFLLU;\161 global_thread_start_step[j] = MAX_CLOCK_VAL; \ 156 162 global_thread_end_step[j] = 0; \ 157 163 } \ 158 164 accumulated_thread_compute += (global_thread_compute_end - global_thread_compute_start); \ 159 global_thread_compute_start = 0xFFFFFFFFFFFFFFFFLLU;\165 global_thread_compute_start = MAX_CLOCK_VAL; \ 160 166 global_thread_compute_end = 0; \ 161 167 clock_num_runs++; \ … … 192 198 193 199 200 #if TARGET_OS == LINUX 201 194 202 #define PRINT_CLOCK ({ \ 195 203 MCA_VERBOSE1(printf("Timestamps:\n")); \ … … 198 206 MCA_VERBOSE1(printf("(are those of the last run)\n")); \ 199 207 } \ 200 MCA_VERBOSE1(printf("[APP_START] : %llu\n", app_start));\201 MCA_VERBOSE1(printf("[APP_CREATE] : %llu\n", app_create));\202 MCA_VERBOSE1(printf("[THREAD_START] : %llu\n", global_thread_start));\203 MCA_VERBOSE1(printf("[THREAD_COMPUTE_START] : %llu\n", global_thread_compute_start));\208 MCA_VERBOSE1(printf("[APP_START] : %llu\n", (long long unsigned int) app_start)); \ 209 MCA_VERBOSE1(printf("[APP_CREATE] : %llu\n", (long long unsigned int) app_create)); \ 210 MCA_VERBOSE1(printf("[THREAD_START] : %llu\n", (long long unsigned int) global_thread_start)); \ 211 MCA_VERBOSE1(printf("[THREAD_COMPUTE_START] : %llu\n", (long long unsigned int) global_thread_compute_start)); \ 204 212 for (int32_t j = 0; j < step_number; j++) { \ 205 MCA_VERBOSE1(printf("[THREAD_START_STEP_%d] : %llu\n", j, global_thread_start_step[j]));\206 MCA_VERBOSE1(printf("[THREAD_END_STEP_%d] : %llu\n", j, global_thread_end_step[j]));\207 } \ 208 MCA_VERBOSE1(printf("[THREAD_COMPUTE_END] : %llu\n", global_thread_compute_end));\209 MCA_VERBOSE1(printf("[THREAD_END] : %llu\n", global_thread_end));\210 MCA_VERBOSE1(printf("[APP_JOIN] : %llu\n", app_join));\211 MCA_VERBOSE1(printf("[APP_END] : %llu\n", app_end));\213 MCA_VERBOSE1(printf("[THREAD_START_STEP_%d] : %llu\n", j, (long long unsigned int) global_thread_start_step[j])); \ 214 MCA_VERBOSE1(printf("[THREAD_END_STEP_%d] : %llu\n", j, (long long unsigned int) global_thread_end_step[j])); \ 215 } \ 216 MCA_VERBOSE1(printf("[THREAD_COMPUTE_END] : %llu\n", (long long unsigned int) global_thread_compute_end)); \ 217 MCA_VERBOSE1(printf("[THREAD_END] : %llu\n", (long long unsigned int) global_thread_end)); \ 218 MCA_VERBOSE1(printf("[APP_JOIN] : %llu\n", (long long unsigned int) app_join)); \ 219 MCA_VERBOSE1(printf("[APP_END] : %llu\n", (long long unsigned int) app_end)); \ 212 220 MCA_VERBOSE1(printf("Durations (in cycles):\n")); \ 213 221 if (clock_num_runs > 1) { \ 214 222 MCA_VERBOSE1(printf("(PARALLEL_COMPUTE and THREAD_STEPs are averaged over %d runs)\n", clock_num_runs)); \ 215 223 } \ 216 MCA_VERBOSE1(printf("[TOTAL] : %llu\n", app_end - app_start));\217 MCA_VERBOSE1(printf("[THREAD] : %llu\n", app_join - app_create));\218 MCA_VERBOSE1(printf("[PARALLEL] : %llu\n", global_thread_end - global_thread_start));\219 MCA_VERBOSE1(printf("[PARALLEL_COMPUTE] : %llu\n", accumulated_thread_compute / clock_num_runs));\224 MCA_VERBOSE1(printf("[TOTAL] : %llu\n", (long long unsigned int) app_end - app_start)); \ 225 MCA_VERBOSE1(printf("[THREAD] : %llu\n", (long long unsigned int) app_join - app_create)); \ 226 MCA_VERBOSE1(printf("[PARALLEL] : %llu\n", (long long unsigned int) global_thread_end - global_thread_start));\ 227 MCA_VERBOSE1(printf("[PARALLEL_COMPUTE] : %llu\n", (long long unsigned int) accumulated_thread_compute / clock_num_runs)); \ 220 228 for (int32_t j = 0; j < step_number; j++) { \ 221 MCA_VERBOSE1(printf("[THREAD_STEP_%d] : %llu\n", j, accumulated_thread_step[j] / clock_num_runs));\229 MCA_VERBOSE1(printf("[THREAD_STEP_%d] : %llu\n", j, (long long unsigned int) accumulated_thread_step[j] / clock_num_runs)); \ 222 230 } \ 223 231 MCA_VERBOSE1(printf("\n")); \ … … 233 241 MCA_VERBOSE1(printf("# cycle thread_id\n")); \ 234 242 for (int32_t i = 0; i < clock_thread_num; i++) { \ 235 MCA_VERBOSE1(printf("%llu\t%d\n", thread_start[i] - app_start, i));\236 MCA_VERBOSE1(printf("%llu\t%d\n", thread_compute_start[i] - app_start, i));\243 MCA_VERBOSE1(printf("%llu\t%d\n", (long long unsigned int) thread_start[i] - app_start, i)); \ 244 MCA_VERBOSE1(printf("%llu\t%d\n", (long long unsigned int) thread_compute_start[i] - app_start, i)); \ 237 245 for (int32_t j = 0; j < step_number; j++) { \ 238 MCA_VERBOSE1(printf("%llu\t%d\n", thread_start_step[j][i] - app_start, i));\239 MCA_VERBOSE1(printf("%llu\t%d\n", thread_end_step[j][i] - app_start, i));\246 MCA_VERBOSE1(printf("%llu\t%d\n", (long long unsigned int) thread_start_step[j][i] - app_start, i)); \ 247 MCA_VERBOSE1(printf("%llu\t%d\n", (long long unsigned int) thread_end_step[j][i] - app_start, i)); \ 240 248 } \ 241 MCA_VERBOSE1(printf("%llu\t%d\n", thread_compute_end[i] - app_start, i)); \ 242 MCA_VERBOSE1(printf("%llu\t%d\n", thread_end[i] - app_start, i)); \ 243 } \ 244 }) 245 246 249 MCA_VERBOSE1(printf("%llu\t%d\n", (long long unsigned int) thread_compute_end[i] - app_start, i)); \ 250 MCA_VERBOSE1(printf("%llu\t%d\n", (long long unsigned int) thread_end[i] - app_start, i)); \ 251 } \ 252 }) 253 254 #elif TARGET_OS == GIETVM 255 256 #define PRINT_CLOCK ({ \ 257 MCA_VERBOSE1(printf("Timestamps:\n")); \ 258 if (clock_num_runs > 1) { \ 259 MCA_VERBOSE1(printf("(THREAD_COMPUTE_START, THREAD_COMPUTE_END, THREAD_START_STEPs and THREAD_END_STEPs)\n")); \ 260 MCA_VERBOSE1(printf("(are those of the last run)\n")); \ 261 } \ 262 MCA_VERBOSE1(printf("[APP_START] : %d\n", app_start)); \ 263 MCA_VERBOSE1(printf("[APP_CREATE] : %d\n", app_create)); \ 264 MCA_VERBOSE1(printf("[THREAD_START] : %d\n", global_thread_start)); \ 265 MCA_VERBOSE1(printf("[THREAD_COMPUTE_START] : %d\n", global_thread_compute_start)); \ 266 for (int32_t j = 0; j < step_number; j++) { \ 267 MCA_VERBOSE1(printf("[THREAD_START_STEP_%d] : %d\n", j, global_thread_start_step[j])); \ 268 MCA_VERBOSE1(printf("[THREAD_END_STEP_%d] : %d\n", j, global_thread_end_step[j])); \ 269 } \ 270 MCA_VERBOSE1(printf("[THREAD_COMPUTE_END] : %d\n", global_thread_compute_end)); \ 271 MCA_VERBOSE1(printf("[THREAD_END] : %d\n", global_thread_end)); \ 272 MCA_VERBOSE1(printf("[APP_JOIN] : %d\n", app_join)); \ 273 MCA_VERBOSE1(printf("[APP_END] : %d\n", app_end)); \ 274 MCA_VERBOSE1(printf("Durations (in cycles):\n")); \ 275 if (clock_num_runs > 1) { \ 276 MCA_VERBOSE1(printf("(PARALLEL_COMPUTE and THREAD_STEPs are averaged over %d runs)\n", clock_num_runs)); \ 277 } \ 278 MCA_VERBOSE1(printf("[TOTAL] : %d\n", app_end - app_start)); \ 279 MCA_VERBOSE1(printf("[THREAD] : %d\n", app_join - app_create)); \ 280 MCA_VERBOSE1(printf("[PARALLEL] : %d\n", global_thread_end - global_thread_start)); \ 281 MCA_VERBOSE1(printf("[PARALLEL_COMPUTE] : %d\n", accumulated_thread_compute / clock_num_runs)); \ 282 for (int32_t j = 0; j < step_number; j++) { \ 283 MCA_VERBOSE1(printf("[THREAD_STEP_%d] : %d\n", j, accumulated_thread_step[j] / clock_num_runs)); \ 284 } \ 285 MCA_VERBOSE1(printf("\n")); \ 286 MCA_VERBOSE1(printf("*** All threads times output in a gnuplot data-style ***\n")); \ 287 local_sort_asc(thread_start, clock_thread_num); \ 288 local_sort_asc(thread_compute_start, clock_thread_num); \ 289 local_sort_asc(thread_compute_end, clock_thread_num); \ 290 local_sort_asc(thread_end, clock_thread_num); \ 291 for (int32_t j = 0; j < step_number; j++) { \ 292 local_sort_asc(thread_start_step[j], clock_thread_num); \ 293 local_sort_asc(thread_end_step[j], clock_thread_num); \ 294 } \ 295 MCA_VERBOSE1(printf("# cycle thread_id\n")); \ 296 for (int32_t i = 0; i < clock_thread_num; i++) { \ 297 MCA_VERBOSE1(printf("%d\t%d\n", thread_start[i] - app_start, i)); \ 298 MCA_VERBOSE1(printf("%d\t%d\n", thread_compute_start[i] - app_start, i)); \ 299 for (int32_t j = 0; j < step_number; j++) { \ 300 MCA_VERBOSE1(printf("%d\t%d\n", thread_start_step[j][i] - app_start, i)); \ 301 MCA_VERBOSE1(printf("%d\t%d\n", thread_end_step[j][i] - app_start, i)); \ 302 } \ 303 MCA_VERBOSE1(printf("%d\t%d\n", thread_compute_end[i] - app_start, i)); \ 304 MCA_VERBOSE1(printf("%d\t%d\n", thread_end[i] - app_start, i)); \ 305 } \ 306 }) 307 308 309 #endif 247 310 248 311 -
soft/giet_vm/applications/rosenfeld/include/config.h
r823 r826 4 4 5 5 #define SLOW 0 6 #define FEATURES 06 #define FEATURES 1 7 7 #define FAST 1 8 8 #define PYR_BARRIERS 0 9 9 #define PARMERGE 1 10 #define ARSP 010 #define ARSP 1 11 11 12 12 #if FAST && SLOW … … 45 45 #define vuse3_Rosenfeld(e, f, g, T, D, alpha, F) vuse3_Parallel_Rosenfeld_Dist(e, f, g, T, D, alpha, F) 46 46 #define SetRoot_Parallel_FNF(D, rl, rd, alpha, F) SetRoot_Parallel_Features_Rosenfeld_Dist(D, rl, rd, alpha, F) 47 #elif FEATURES && PARMERGE && ARSP 48 #define vuse2_Rosenfeld(e, f, T, D, alpha, F) vuse2_Parallel_Features_Arsp_Rosenfeld_Dist(e, f, T, D, alpha, F) 49 #define vuse3_Rosenfeld(e, f, g, T, D, alpha, F) vuse3_Parallel_Features_Arsp_Rosenfeld_Dist(e, f, g, T, D, alpha, F) 50 #error "Configuration Not implemented" 47 #elif FEATURES && PARMERGE && ARSP 48 #define vuse2_Rosenfeld(e, f, T, D, alpha, F) vuse2_Parallel_Arsp_Rosenfeld_Dist(e, f, T, D, alpha, F) 49 #define vuse3_Rosenfeld(e, f, g, T, D, alpha, F) vuse3_Parallel_Arsp_Rosenfeld_Dist(e, f, g, T, D, alpha, F) 51 50 #endif 52 51 #endif -
soft/giet_vm/applications/rosenfeld/include/ecc_features.h
r822 r826 16 16 #define __ECC_FEATURES_H__ 17 17 18 #if PARMERGE18 #if TARGET_OS == LINUX && PARMERGE 19 19 #include <pthread.h> 20 20 #endif
Note: See TracChangeset
for help on using the changeset viewer.