Changeset 822 for soft/giet_vm/applications/rosenfeld/include/clock.h
- Timestamp:
- Jun 1, 2016, 10:25:43 AM (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
soft/giet_vm/applications/rosenfeld/include/clock.h
r821 r822 31 31 32 32 33 static void local_sort_asc(uint 32_t tab[], int size) {34 int tmp;35 int i, j;33 static void local_sort_asc(uint64_t tab[], int32_t size) { 34 int32_t tmp; 35 int32_t i, j; 36 36 for (i = 0; i < size; i++) { 37 uint 32_t min = tab[i];38 int jmin = i;37 uint64_t min = tab[i]; 38 int32_t jmin = i; 39 39 for (j = i + 1; j < size; j++) { 40 40 if (tab[j] < min) { … … 51 51 52 52 53 #define CLOCK_DEC uint 32_t app_start; \54 uint 32_t app_end; \55 uint 32_t app_create; \56 uint 32_t app_join; \57 uint 32_t * thread_start; \58 uint 32_t * thread_end; \59 uint 32_t * thread_compute_start; \60 uint 32_t * thread_compute_end; \53 #define CLOCK_DEC uint64_t app_start; \ 54 uint64_t app_end; \ 55 uint64_t app_create; \ 56 uint64_t app_join; \ 57 uint64_t * thread_start; \ 58 uint64_t * thread_end; \ 59 uint64_t * thread_compute_start; \ 60 uint64_t * thread_compute_end; \ 61 61 int32_t step_number; \ 62 62 int32_t clock_thread_num; \ 63 uint 32_t ** thread_start_step; \64 uint 32_t ** thread_end_step; \65 uint 32_t global_thread_start; \66 uint 32_t global_thread_end; \67 uint 32_t global_thread_compute_start; \68 uint 32_t global_thread_compute_end; \69 uint 32_t * global_thread_start_step; \70 uint 32_t * global_thread_end_step; \63 uint64_t ** thread_start_step; \ 64 uint64_t ** thread_end_step; \ 65 uint64_t global_thread_start; \ 66 uint64_t global_thread_end; \ 67 uint64_t global_thread_compute_start; \ 68 uint64_t global_thread_compute_end; \ 69 uint64_t * global_thread_start_step; \ 70 uint64_t * global_thread_end_step; 71 71 72 72 #if TARGET_OS == GIETVM … … 76 76 struct timeval full_time; \ 77 77 gettimeofday(&full_time, NULL); \ 78 x = (u nsigned long) ((full_time.tv_usec + full_time.tv_sec * 1000000) / 1000); \78 x = (uint64_t) ((full_time.tv_usec + full_time.tv_sec * 1000000)); \ 79 79 }) 80 80 #endif … … 84 84 clock_thread_num = (x); \ 85 85 step_number = (y); \ 86 global_thread_start = 0xFFFFFFFF LLU;\86 global_thread_start = 0xFFFFFFFFFFFFFFFFLLU; \ 87 87 global_thread_end = 0; \ 88 global_thread_compute_start = 0xFFFFFFFF LLU;\88 global_thread_compute_start = 0xFFFFFFFFFFFFFFFFLLU; \ 89 89 global_thread_compute_end = 0; \ 90 90 if ((x) > 0) { \ 91 thread_start = (uint 32_t *) malloc(sizeof(uint32_t) * (x)); \92 thread_end = (uint 32_t *) malloc(sizeof(uint32_t) * (x)); \93 thread_compute_start = (uint 32_t *) malloc(sizeof(uint32_t) * (x)); \94 thread_compute_end = (uint 32_t *) malloc(sizeof(uint32_t) * (x)); \91 thread_start = (uint64_t *) malloc(sizeof(uint64_t) * (x)); \ 92 thread_end = (uint64_t *) malloc(sizeof(uint64_t) * (x)); \ 93 thread_compute_start = (uint64_t *) malloc(sizeof(uint64_t) * (x)); \ 94 thread_compute_end = (uint64_t *) malloc(sizeof(uint64_t) * (x)); \ 95 95 if ((y) > 0) { \ 96 global_thread_start_step = (uint 32_t *) malloc(sizeof(uint32_t) * (y)); \97 global_thread_end_step = (uint 32_t *) malloc(sizeof(uint32_t) * (y)); \98 thread_start_step = (uint 32_t **) malloc(sizeof(uint32_t *) * (y)); \99 thread_end_step = (uint 32_t **) malloc(sizeof(uint32_t *) * (y)); \100 for (int j = 0; j < (y); j++) {\101 global_thread_start_step[j] = 0xFFFFFFFF LU;\96 global_thread_start_step = (uint64_t *) malloc(sizeof(uint64_t) * (y)); \ 97 global_thread_end_step = (uint64_t *) malloc(sizeof(uint64_t) * (y)); \ 98 thread_start_step = (uint64_t **) malloc(sizeof(uint64_t *) * (y)); \ 99 thread_end_step = (uint64_t **) malloc(sizeof(uint64_t *) * (y)); \ 100 for (int32_t j = 0; j < (y); j++) { \ 101 global_thread_start_step[j] = 0xFFFFFFFFFFFFFFFFLU; \ 102 102 global_thread_end_step[j] = 0; \ 103 thread_start_step[j] = (uint 32_t *) malloc(sizeof(uint32_t) * (x)); \104 thread_end_step[j] = (uint 32_t *) malloc(sizeof(uint32_t) * (x)); \103 thread_start_step[j] = (uint64_t *) malloc(sizeof(uint64_t) * (x)); \ 104 thread_end_step[j] = (uint64_t *) malloc(sizeof(uint64_t) * (x)); \ 105 105 } \ 106 106 } \ … … 121 121 122 122 123 // x = number of threads124 123 #define CLOCK_FINALIZE ({ \ 125 for (int i = 0; i < clock_thread_num; i++) {\124 for (int32_t i = 0; i < clock_thread_num; i++) { \ 126 125 if (thread_start[i] < global_thread_start) { \ 127 126 global_thread_start = thread_start[i]; \ … … 136 135 global_thread_compute_end = thread_compute_end[i]; \ 137 136 } \ 138 for (int j = 0; j < step_number; j++) {\137 for (int32_t j = 0; j < step_number; j++) { \ 139 138 if (thread_start_step[j][i] < global_thread_start_step[j]) { \ 140 139 global_thread_start_step[j] = thread_start_step[j][i]; \ … … 147 146 }) 148 147 149 #define PRINT_CLOCK ({ \150 printf("Timestamps:\n");\151 printf("[APP_START] : %d\n", app_start); \152 printf("[APP_CREATE] : %d\n", app_create); \153 printf("[THREAD_START] : %d\n", global_thread_start); \154 printf("[THREAD_COMPUTE_START] : %d\n", global_thread_compute_start); \155 for (int j = 0; j < step_number; j++) {\156 printf("[THREAD_START_STEP_%d] : %d\n", j, global_thread_start_step[j]); \157 printf("[THREAD_END_STEP_%d] : %d\n", j, global_thread_end_step[j]); \158 } \159 printf("[THREAD_COMPUTE_END] : %d\n", global_thread_compute_end); \160 printf("[THREAD_END] : %d\n", global_thread_end); \161 printf("[APP_JOIN] : %d\n", app_join); \162 printf("[APP_END] : %d\n", app_end); \163 printf("Durations (in cycles):\n");\164 printf("[TOTAL] : %d\n", app_end - app_start); \165 printf("[THREAD] : %d\n", app_join - app_create); \166 printf("[PARALLEL] : %d\n", global_thread_end - global_thread_start); \167 printf("[PARALLEL_COMPUTE] : %d\n", global_thread_compute_end - global_thread_compute_start); \168 for (int j = 0; j < step_number; j++) {\169 printf("[THREAD_STEP_%d] : %d\n", j, global_thread_end_step[j] - global_thread_start_step[j]); \170 } \171 printf("\n");\172 printf("*** All threads times output in a gnuplot data-style ***\n");\173 local_sort_asc(thread_start, clock_thread_num); \174 local_sort_asc(thread_compute_start, clock_thread_num); \175 local_sort_asc(thread_compute_end, clock_thread_num); \176 local_sort_asc(thread_end, clock_thread_num); \177 for (int j = 0; j < step_number; j++) {\178 local_sort_asc(thread_start_step[j], clock_thread_num); \179 local_sort_asc(thread_end_step[j], clock_thread_num); \180 } \181 printf("# cycle thread_id\n");\182 for (int i = 0; i < clock_thread_num; i++) {\183 printf("%d\t%d\n", thread_start[i], i); \184 printf("%d\t%d\n", thread_compute_start[i], i); \185 for (int j = 0; j < step_number; j++) {\186 printf("%d\t%d\n", thread_start_step[j][i], i); \187 printf("%d\t%d\n", thread_end_step[j][i], i); \188 } \189 printf("%d\t%d\n", thread_compute_end[i], i); \190 printf("%d\t%d\n", thread_end[i], i); \191 } \148 #define PRINT_CLOCK ({ \ 149 MCA_VERBOSE1(printf("Timestamps:\n")); \ 150 MCA_VERBOSE1(printf("[APP_START] : %llu\n", app_start)); \ 151 MCA_VERBOSE1(printf("[APP_CREATE] : %llu\n", app_create)); \ 152 MCA_VERBOSE1(printf("[THREAD_START] : %llu\n", global_thread_start)); \ 153 MCA_VERBOSE1(printf("[THREAD_COMPUTE_START] : %llu\n", global_thread_compute_start)); \ 154 for (int32_t j = 0; j < step_number; j++) { \ 155 MCA_VERBOSE1(printf("[THREAD_START_STEP_%d] : %llu\n", j, global_thread_start_step[j])); \ 156 MCA_VERBOSE1(printf("[THREAD_END_STEP_%d] : %llu\n", j, global_thread_end_step[j])); \ 157 } \ 158 MCA_VERBOSE1(printf("[THREAD_COMPUTE_END] : %llu\n", global_thread_compute_end)); \ 159 MCA_VERBOSE1(printf("[THREAD_END] : %llu\n", global_thread_end)); \ 160 MCA_VERBOSE1(printf("[APP_JOIN] : %llu\n", app_join)); \ 161 MCA_VERBOSE1(printf("[APP_END] : %llu\n", app_end)); \ 162 MCA_VERBOSE1(printf("Durations (in cycles):\n")); \ 163 MCA_VERBOSE1(printf("[TOTAL] : %llu\n", app_end - app_start)); \ 164 MCA_VERBOSE1(printf("[THREAD] : %llu\n", app_join - app_create)); \ 165 MCA_VERBOSE1(printf("[PARALLEL] : %llu\n", global_thread_end - global_thread_start)); \ 166 MCA_VERBOSE1(printf("[PARALLEL_COMPUTE] : %llu\n", global_thread_compute_end - global_thread_compute_start)); \ 167 for (int32_t j = 0; j < step_number; j++) { \ 168 MCA_VERBOSE1(printf("[THREAD_STEP_%d] : %llu\n", j, global_thread_end_step[j] - global_thread_start_step[j])); \ 169 } \ 170 MCA_VERBOSE1(printf("\n")); \ 171 MCA_VERBOSE1(printf("*** All threads times output in a gnuplot data-style ***\n")); \ 172 local_sort_asc(thread_start, clock_thread_num); \ 173 local_sort_asc(thread_compute_start, clock_thread_num); \ 174 local_sort_asc(thread_compute_end, clock_thread_num); \ 175 local_sort_asc(thread_end, clock_thread_num); \ 176 for (int32_t j = 0; j < step_number; j++) { \ 177 local_sort_asc(thread_start_step[j], clock_thread_num); \ 178 local_sort_asc(thread_end_step[j], clock_thread_num); \ 179 } \ 180 MCA_VERBOSE1(printf("# cycle thread_id\n")); \ 181 for (int32_t i = 0; i < clock_thread_num; i++) { \ 182 MCA_VERBOSE1(printf("%llu\t%d\n", thread_start[i], i)); \ 183 MCA_VERBOSE1(printf("%llu\t%d\n", thread_compute_start[i], i)); \ 184 for (int32_t j = 0; j < step_number; j++) { \ 185 MCA_VERBOSE1(printf("%llu\t%d\n", thread_start_step[j][i], i)); \ 186 MCA_VERBOSE1(printf("%llu\t%d\n", thread_end_step[j][i], i)); \ 187 } \ 188 MCA_VERBOSE1(printf("%llu\t%d\n", thread_compute_end[i], i)); \ 189 MCA_VERBOSE1(printf("%llu\t%d\n", thread_end[i], i)); \ 190 } \ 192 191 }) 193 192 … … 197 196 198 197 199 #define CLOCK_FREE ({ 200 if (clock_thread_num > 0) { 201 free(thread_start); 202 free(thread_end); 203 free(thread_compute_start); 204 free(thread_compute_end); 205 if (step_number > 0) { 206 free(global_thread_start_step); 207 free(global_thread_end_step); 208 for (int j = 0; j < step_number; j++) {\209 free(thread_start_step[j]); 210 free(thread_end_step[j]); 211 } 212 free(thread_start_step); 213 free(thread_end_step); 214 } 215 } 198 #define CLOCK_FREE ({ \ 199 if (clock_thread_num > 0) { \ 200 free(thread_start); \ 201 free(thread_end); \ 202 free(thread_compute_start); \ 203 free(thread_compute_end); \ 204 if (step_number > 0) { \ 205 free(global_thread_start_step); \ 206 free(global_thread_end_step); \ 207 for (int32_t j = 0; j < step_number; j++) { \ 208 free(thread_start_step[j]); \ 209 free(thread_end_step[j]); \ 210 } \ 211 free(thread_start_step); \ 212 free(thread_end_step); \ 213 } \ 214 } \ 216 215 }) 217 216
Note: See TracChangeset
for help on using the changeset viewer.