Changeset 822 for soft/giet_vm/applications/rosenfeld/include
- Timestamp:
- Jun 1, 2016, 10:25:43 AM (8 years ago)
- Location:
- soft/giet_vm/applications/rosenfeld/include
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
soft/giet_vm/applications/rosenfeld/include/clock.h
r821 r822 31 31 32 32 33 static void local_sort_asc(uint 32_t tab[], int size) {34 int tmp;35 int i, j;33 static void local_sort_asc(uint64_t tab[], int32_t size) { 34 int32_t tmp; 35 int32_t i, j; 36 36 for (i = 0; i < size; i++) { 37 uint 32_t min = tab[i];38 int jmin = i;37 uint64_t min = tab[i]; 38 int32_t jmin = i; 39 39 for (j = i + 1; j < size; j++) { 40 40 if (tab[j] < min) { … … 51 51 52 52 53 #define CLOCK_DEC uint 32_t app_start; \54 uint 32_t app_end; \55 uint 32_t app_create; \56 uint 32_t app_join; \57 uint 32_t * thread_start; \58 uint 32_t * thread_end; \59 uint 32_t * thread_compute_start; \60 uint 32_t * thread_compute_end; \53 #define CLOCK_DEC uint64_t app_start; \ 54 uint64_t app_end; \ 55 uint64_t app_create; \ 56 uint64_t app_join; \ 57 uint64_t * thread_start; \ 58 uint64_t * thread_end; \ 59 uint64_t * thread_compute_start; \ 60 uint64_t * thread_compute_end; \ 61 61 int32_t step_number; \ 62 62 int32_t clock_thread_num; \ 63 uint 32_t ** thread_start_step; \64 uint 32_t ** thread_end_step; \65 uint 32_t global_thread_start; \66 uint 32_t global_thread_end; \67 uint 32_t global_thread_compute_start; \68 uint 32_t global_thread_compute_end; \69 uint 32_t * global_thread_start_step; \70 uint 32_t * global_thread_end_step; \63 uint64_t ** thread_start_step; \ 64 uint64_t ** thread_end_step; \ 65 uint64_t global_thread_start; \ 66 uint64_t global_thread_end; \ 67 uint64_t global_thread_compute_start; \ 68 uint64_t global_thread_compute_end; \ 69 uint64_t * global_thread_start_step; \ 70 uint64_t * global_thread_end_step; 71 71 72 72 #if TARGET_OS == GIETVM … … 76 76 struct timeval full_time; \ 77 77 gettimeofday(&full_time, NULL); \ 78 x = (u nsigned long) ((full_time.tv_usec + full_time.tv_sec * 1000000) / 1000); \78 x = (uint64_t) ((full_time.tv_usec + full_time.tv_sec * 1000000)); \ 79 79 }) 80 80 #endif … … 84 84 clock_thread_num = (x); \ 85 85 step_number = (y); \ 86 global_thread_start = 0xFFFFFFFF LLU;\86 global_thread_start = 0xFFFFFFFFFFFFFFFFLLU; \ 87 87 global_thread_end = 0; \ 88 global_thread_compute_start = 0xFFFFFFFF LLU;\88 global_thread_compute_start = 0xFFFFFFFFFFFFFFFFLLU; \ 89 89 global_thread_compute_end = 0; \ 90 90 if ((x) > 0) { \ 91 thread_start = (uint 32_t *) malloc(sizeof(uint32_t) * (x)); \92 thread_end = (uint 32_t *) malloc(sizeof(uint32_t) * (x)); \93 thread_compute_start = (uint 32_t *) malloc(sizeof(uint32_t) * (x)); \94 thread_compute_end = (uint 32_t *) malloc(sizeof(uint32_t) * (x)); \91 thread_start = (uint64_t *) malloc(sizeof(uint64_t) * (x)); \ 92 thread_end = (uint64_t *) malloc(sizeof(uint64_t) * (x)); \ 93 thread_compute_start = (uint64_t *) malloc(sizeof(uint64_t) * (x)); \ 94 thread_compute_end = (uint64_t *) malloc(sizeof(uint64_t) * (x)); \ 95 95 if ((y) > 0) { \ 96 global_thread_start_step = (uint 32_t *) malloc(sizeof(uint32_t) * (y)); \97 global_thread_end_step = (uint 32_t *) malloc(sizeof(uint32_t) * (y)); \98 thread_start_step = (uint 32_t **) malloc(sizeof(uint32_t *) * (y)); \99 thread_end_step = (uint 32_t **) malloc(sizeof(uint32_t *) * (y)); \100 for (int j = 0; j < (y); j++) {\101 global_thread_start_step[j] = 0xFFFFFFFF LU;\96 global_thread_start_step = (uint64_t *) malloc(sizeof(uint64_t) * (y)); \ 97 global_thread_end_step = (uint64_t *) malloc(sizeof(uint64_t) * (y)); \ 98 thread_start_step = (uint64_t **) malloc(sizeof(uint64_t *) * (y)); \ 99 thread_end_step = (uint64_t **) malloc(sizeof(uint64_t *) * (y)); \ 100 for (int32_t j = 0; j < (y); j++) { \ 101 global_thread_start_step[j] = 0xFFFFFFFFFFFFFFFFLU; \ 102 102 global_thread_end_step[j] = 0; \ 103 thread_start_step[j] = (uint 32_t *) malloc(sizeof(uint32_t) * (x)); \104 thread_end_step[j] = (uint 32_t *) malloc(sizeof(uint32_t) * (x)); \103 thread_start_step[j] = (uint64_t *) malloc(sizeof(uint64_t) * (x)); \ 104 thread_end_step[j] = (uint64_t *) malloc(sizeof(uint64_t) * (x)); \ 105 105 } \ 106 106 } \ … … 121 121 122 122 123 // x = number of threads124 123 #define CLOCK_FINALIZE ({ \ 125 for (int i = 0; i < clock_thread_num; i++) {\124 for (int32_t i = 0; i < clock_thread_num; i++) { \ 126 125 if (thread_start[i] < global_thread_start) { \ 127 126 global_thread_start = thread_start[i]; \ … … 136 135 global_thread_compute_end = thread_compute_end[i]; \ 137 136 } \ 138 for (int j = 0; j < step_number; j++) {\137 for (int32_t j = 0; j < step_number; j++) { \ 139 138 if (thread_start_step[j][i] < global_thread_start_step[j]) { \ 140 139 global_thread_start_step[j] = thread_start_step[j][i]; \ … … 147 146 }) 148 147 149 #define PRINT_CLOCK ({ \150 printf("Timestamps:\n");\151 printf("[APP_START] : %d\n", app_start); \152 printf("[APP_CREATE] : %d\n", app_create); \153 printf("[THREAD_START] : %d\n", global_thread_start); \154 printf("[THREAD_COMPUTE_START] : %d\n", global_thread_compute_start); \155 for (int j = 0; j < step_number; j++) {\156 printf("[THREAD_START_STEP_%d] : %d\n", j, global_thread_start_step[j]); \157 printf("[THREAD_END_STEP_%d] : %d\n", j, global_thread_end_step[j]); \158 } \159 printf("[THREAD_COMPUTE_END] : %d\n", global_thread_compute_end); \160 printf("[THREAD_END] : %d\n", global_thread_end); \161 printf("[APP_JOIN] : %d\n", app_join); \162 printf("[APP_END] : %d\n", app_end); \163 printf("Durations (in cycles):\n");\164 printf("[TOTAL] : %d\n", app_end - app_start); \165 printf("[THREAD] : %d\n", app_join - app_create); \166 printf("[PARALLEL] : %d\n", global_thread_end - global_thread_start); \167 printf("[PARALLEL_COMPUTE] : %d\n", global_thread_compute_end - global_thread_compute_start); \168 for (int j = 0; j < step_number; j++) {\169 printf("[THREAD_STEP_%d] : %d\n", j, global_thread_end_step[j] - global_thread_start_step[j]); \170 } \171 printf("\n");\172 printf("*** All threads times output in a gnuplot data-style ***\n");\173 local_sort_asc(thread_start, clock_thread_num); \174 local_sort_asc(thread_compute_start, clock_thread_num); \175 local_sort_asc(thread_compute_end, clock_thread_num); \176 local_sort_asc(thread_end, clock_thread_num); \177 for (int j = 0; j < step_number; j++) {\178 local_sort_asc(thread_start_step[j], clock_thread_num); \179 local_sort_asc(thread_end_step[j], clock_thread_num); \180 } \181 printf("# cycle thread_id\n");\182 for (int i = 0; i < clock_thread_num; i++) {\183 printf("%d\t%d\n", thread_start[i], i); \184 printf("%d\t%d\n", thread_compute_start[i], i); \185 for (int j = 0; j < step_number; j++) {\186 printf("%d\t%d\n", thread_start_step[j][i], i); \187 printf("%d\t%d\n", thread_end_step[j][i], i); \188 } \189 printf("%d\t%d\n", thread_compute_end[i], i); \190 printf("%d\t%d\n", thread_end[i], i); \191 } \148 #define PRINT_CLOCK ({ \ 149 MCA_VERBOSE1(printf("Timestamps:\n")); \ 150 MCA_VERBOSE1(printf("[APP_START] : %llu\n", app_start)); \ 151 MCA_VERBOSE1(printf("[APP_CREATE] : %llu\n", app_create)); \ 152 MCA_VERBOSE1(printf("[THREAD_START] : %llu\n", global_thread_start)); \ 153 MCA_VERBOSE1(printf("[THREAD_COMPUTE_START] : %llu\n", global_thread_compute_start)); \ 154 for (int32_t j = 0; j < step_number; j++) { \ 155 MCA_VERBOSE1(printf("[THREAD_START_STEP_%d] : %llu\n", j, global_thread_start_step[j])); \ 156 MCA_VERBOSE1(printf("[THREAD_END_STEP_%d] : %llu\n", j, global_thread_end_step[j])); \ 157 } \ 158 MCA_VERBOSE1(printf("[THREAD_COMPUTE_END] : %llu\n", global_thread_compute_end)); \ 159 MCA_VERBOSE1(printf("[THREAD_END] : %llu\n", global_thread_end)); \ 160 MCA_VERBOSE1(printf("[APP_JOIN] : %llu\n", app_join)); \ 161 MCA_VERBOSE1(printf("[APP_END] : %llu\n", app_end)); \ 162 MCA_VERBOSE1(printf("Durations (in cycles):\n")); \ 163 MCA_VERBOSE1(printf("[TOTAL] : %llu\n", app_end - app_start)); \ 164 MCA_VERBOSE1(printf("[THREAD] : %llu\n", app_join - app_create)); \ 165 MCA_VERBOSE1(printf("[PARALLEL] : %llu\n", global_thread_end - global_thread_start)); \ 166 MCA_VERBOSE1(printf("[PARALLEL_COMPUTE] : %llu\n", global_thread_compute_end - global_thread_compute_start)); \ 167 for (int32_t j = 0; j < step_number; j++) { \ 168 MCA_VERBOSE1(printf("[THREAD_STEP_%d] : %llu\n", j, global_thread_end_step[j] - global_thread_start_step[j])); \ 169 } \ 170 MCA_VERBOSE1(printf("\n")); \ 171 MCA_VERBOSE1(printf("*** All threads times output in a gnuplot data-style ***\n")); \ 172 local_sort_asc(thread_start, clock_thread_num); \ 173 local_sort_asc(thread_compute_start, clock_thread_num); \ 174 local_sort_asc(thread_compute_end, clock_thread_num); \ 175 local_sort_asc(thread_end, clock_thread_num); \ 176 for (int32_t j = 0; j < step_number; j++) { \ 177 local_sort_asc(thread_start_step[j], clock_thread_num); \ 178 local_sort_asc(thread_end_step[j], clock_thread_num); \ 179 } \ 180 MCA_VERBOSE1(printf("# cycle thread_id\n")); \ 181 for (int32_t i = 0; i < clock_thread_num; i++) { \ 182 MCA_VERBOSE1(printf("%llu\t%d\n", thread_start[i], i)); \ 183 MCA_VERBOSE1(printf("%llu\t%d\n", thread_compute_start[i], i)); \ 184 for (int32_t j = 0; j < step_number; j++) { \ 185 MCA_VERBOSE1(printf("%llu\t%d\n", thread_start_step[j][i], i)); \ 186 MCA_VERBOSE1(printf("%llu\t%d\n", thread_end_step[j][i], i)); \ 187 } \ 188 MCA_VERBOSE1(printf("%llu\t%d\n", thread_compute_end[i], i)); \ 189 MCA_VERBOSE1(printf("%llu\t%d\n", thread_end[i], i)); \ 190 } \ 192 191 }) 193 192 … … 197 196 198 197 199 #define CLOCK_FREE ({ 200 if (clock_thread_num > 0) { 201 free(thread_start); 202 free(thread_end); 203 free(thread_compute_start); 204 free(thread_compute_end); 205 if (step_number > 0) { 206 free(global_thread_start_step); 207 free(global_thread_end_step); 208 for (int j = 0; j < step_number; j++) {\209 free(thread_start_step[j]); 210 free(thread_end_step[j]); 211 } 212 free(thread_start_step); 213 free(thread_end_step); 214 } 215 } 198 #define CLOCK_FREE ({ \ 199 if (clock_thread_num > 0) { \ 200 free(thread_start); \ 201 free(thread_end); \ 202 free(thread_compute_start); \ 203 free(thread_compute_end); \ 204 if (step_number > 0) { \ 205 free(global_thread_start_step); \ 206 free(global_thread_end_step); \ 207 for (int32_t j = 0; j < step_number; j++) { \ 208 free(thread_start_step[j]); \ 209 free(thread_end_step[j]); \ 210 } \ 211 free(thread_start_step); \ 212 free(thread_end_step); \ 213 } \ 214 } \ 216 215 }) 217 216 -
soft/giet_vm/applications/rosenfeld/include/config.h
r821 r822 1 2 #ifndef __CONFIG_H_ 3 #define __CONFIG_H_ 1 4 2 5 #define SLOW 0 … … 4 7 #define FAST 1 5 8 #define PYR_BARRIERS 0 6 #define PARMERGE 1 9 #define PARMERGE 0 10 #define ARSP 0 7 11 8 12 #if FAST && SLOW … … 14 18 #endif 15 19 16 #if PARMERGE && (!FEATURES || !FAST) 17 #error "PARMERGE is only supported for the FAST version with FEATURES enabled" 20 21 #if FAST 22 #if !FEATURES && !PARMERGE && !ARSP 23 #define vuse2_Rosenfeld(e, f, T, D, alpha, F) vuse2_Rosenfeld_Dist(e, f, T, D, alpha) 24 #define vuse3_Rosenfeld(e, f, g, T, D, alpha, F) vuse3_Rosenfeld_Dist(e, f, g, T, D, alpha) 25 #elif !FEATURES && !PARMERGE && ARSP 26 #define vuse2_Rosenfeld(e, f, T, D, alpha, F) vuse2_Arsp_Rosenfeld_Dist(e, f, T, D, alpha) 27 #define vuse3_Rosenfeld(e, f, g, T, D, alpha, F) vuse3_Arsp_Rosenfeld_Dist(e, f, g, T, D, alpha) 28 #error "Configuration Not implemented" 29 #elif !FEATURES && PARMERGE && !ARSP 30 #define vuse2_Rosenfeld(e, f, T, D, alpha, F) vuse2_Parallel_Rosenfeld_Dist(e, f, T, D, alpha, F) 31 #define vuse3_Rosenfeld(e, f, g, T, D, alpha, F) vuse3_Parallel_Rosenfeld_Dist(e, f, g, T, D, alpha, F) 32 #error "Configuration Not implemented" 33 #elif !FEATURES && PARMERGE && ARSP 34 #define vuse2_Rosenfeld(e, f, T, D, alpha, F) vuse2_Parallel_Arsp_Rosenfeld_Dist(e, f, T, D, alpha, F) 35 #define vuse3_Rosenfeld(e, f, g, T, D, alpha, F) vuse3_Parallel_Arsp_Rosenfeld_Dist(e, f, g, T, D, alpha, F) 36 #elif FEATURES && !PARMERGE && !ARSP 37 #define vuse2_Rosenfeld(e, f, T, D, alpha, F) vuse2_Features_Rosenfeld_Dist(e, f, T, D, alpha, F) 38 #define vuse3_Rosenfeld(e, f, g, T, D, alpha, F) vuse3_Features_Rosenfeld_Dist(e, f, g, T, D, alpha, F) 39 #elif FEATURES && !PARMERGE && ARSP 40 #define vuse2_Rosenfeld(e, f, T, D, alpha, F) vuse2_Features_Arsp_Rosenfeld_Dist(e, f, T, D, alpha, F) 41 #define vuse3_Rosenfeld(e, f, g, T, D, alpha, F) vuse3_Features_Arsp_Rosenfeld_Dist(e, f, g, T, D, alpha, F) 42 #error "Configuration Not implemented" 43 #elif FEATURES && PARMERGE && !ARSP 44 #define vuse2_Rosenfeld(e, f, T, D, alpha, F) vuse2_Parallel_Features_Rosenfeld_Dist(e, f, T, D, alpha, F) 45 #define vuse3_Rosenfeld(e, f, g, T, D, alpha, F) vuse3_Parallel_Features_Rosenfeld_Dist(e, f, g, T, D, alpha, F) 46 #elif FEATURES && PARMERGE && ARSP 47 #define vuse2_Rosenfeld(e, f, T, D, alpha, F) vuse2_Parallel_Features_Arsp_Rosenfeld_Dist(e, f, T, D, alpha, F) 48 #define vuse3_Rosenfeld(e, f, g, T, D, alpha, F) vuse3_Parallel_Features_Arsp_Rosenfeld_Dist(e, f, g, T, D, alpha, F) 49 #error "Configuration Not implemented" 50 #endif 18 51 #endif 19 52 53 #if SLOW 54 #if FEATURES 55 #define SetRoot_Rosenfeld(D, r, eps, alpha, F) SetRoot_Features_Rosenfeld_Dist(D, r, eps, alpha, F) 56 #else 57 #define SetRoot_Rosenfeld(D, r, eps, alpha, F) SetRoot_Rosenfeld_Dist(D, r, eps, alpha) 58 #endif 59 #if PARMERGE 60 #error "Configuration SLOW and PARMERGE Not implemented" 61 #endif 62 #if ARSP 63 #error "Configuration SLOW and ARSP Not implemented" 64 #endif 65 #endif 66 67 // Verbose level 68 // 0 : No trace at all 69 // 1 : Traces compatible with execution times measurements, 70 // in particular, there must not be any traces in the 71 // "compute" sections 72 // 2 : Standard level 73 // 3 : Maximum (debug) level 74 #define MCA_VERBOSE_LEVEL 2 75 76 #endif // __CONFIG_H__ 77 78 -
soft/giet_vm/applications/rosenfeld/include/ecc_features.h
r821 r822 25 25 // ------------------------------------------------------------------------ 26 26 27 // @QM 28 // Je m'autorise à mettre des champs de la structure 29 // conditionnellement vis-à -vis de flags, car : 30 // 1. Il ne s'agit pas d'une bibliothÚque 31 // 2. Tous les fichiers .c dépendent de tous les .h, et donc 32 // en particulier du fichier config.h qui définit les flags 33 // Il est donc impossible de mélanger deux fichiers binaires 34 // qui ont des définitions différentes de cette structure 27 35 typedef struct { 28 29 uint16 xmin; 30 uint16 xmax; 31 uint16 ymin; 32 uint16 ymax; 33 34 uint32 S; 35 36 uint32 Sx; 37 uint32 Sy; 36 #if FEATURES 37 uint16 xmin; 38 uint16 xmax; 39 uint16 ymin; 40 uint16 ymax; 41 42 uint32 S; 43 44 uint32 Sx; 45 uint32 Sy; 46 #endif 38 47 #if PARMERGE 39 48 pthread_spinlock_t lock; 40 49 #endif 41 42 50 } RegionStats; 43 51 … … 210 218 int RegionStatsVector_Match (RegionStats *S1, int i0, int i1, RegionStats *S2, int j0, int j1); 211 219 212 #ifdef __cplusplus213 }214 #endif215 220 216 221 #endif /* __FEATURES_H__ */ -
soft/giet_vm/applications/rosenfeld/include/mca.h
r821 r822 26 26 27 27 28 // QM : using mutex lock instead of mutexlock,29 // because apparently mutexlocks cause a bug in valgrind28 // QM : using mutex lock instead of spinlock, 29 // because apparently spinlocks cause a bug in valgrind 30 30 // (solved but the installed version is not recent enough) 31 31 // cf. https://bugs.kde.org/show_bug.cgi?id=336435 32 32 pthread_mutex_t print_lock; 33 33 34 #define MCA_VERBOSE0(X) ({ \ 34 35 36 37 #if MCA_VERBOSE_LEVEL >= 1 38 #define MCA_VERBOSE1(X) ({ \ 35 39 pthread_mutex_lock(&print_lock); \ 36 40 X; \ 37 41 pthread_mutex_unlock(&print_lock); \ 38 42 }) 39 #define MCA_VERBOSE1(X) ({ \ 43 #else 44 #define MCA_VERBOSE1(X) 45 #endif 46 47 #if MCA_VERBOSE_LEVEL >= 2 48 #define MCA_VERBOSE2(X) ({ \ 40 49 pthread_mutex_lock(&print_lock); \ 41 50 X; \ 42 51 pthread_mutex_unlock(&print_lock); \ 43 52 }) 44 #define MCA_DISPLAY0(X) ({ \ 53 #else 54 #define MCA_VERBOSE2(X) 55 #endif 56 57 #if MCA_VERBOSE_LEVEL >= 3 58 #define MCA_VERBOSE3(X) ({ \ 45 59 pthread_mutex_lock(&print_lock); \ 46 60 X; \ 47 61 pthread_mutex_unlock(&print_lock); \ 48 62 }) 49 #define MCA_DISPLAY1(X) ({ \ 50 pthread_mutex_lock(&print_lock); \ 51 X; \ 52 pthread_mutex_unlock(&print_lock); \ 53 }) 54 55 56 #define MCA_VERBOSE2(X) 57 /* 58 #define MCA_VERBOSE2(X) ({ \ 59 pthread_mutex_lock(&print_lock); \ 60 X; \ 61 pthread_mutex_unlock(&print_lock); \ 62 }) 63 */ 64 65 #define MCA_DISPLAY2(X) 66 63 #else 64 #define MCA_VERBOSE3(X) 65 #endif 67 66 68 67
Note: See TracChangeset
for help on using the changeset viewer.