#ifndef _CLOCK_H_ #define _CLOCK_H_ #include #include "nrc_os_config.h" #if TARGET_OS == LINUX #include #endif /** * The macros should be called in the following order: * - CLOCK_DEC; * - CLOCK_INIT(num_threads, num_steps); * - CLOCK_APP_START; * - CLOCK_APP_CREATE; * - CLOCK_THREAD_START(thread_id); * - CLOCK_THREAD_COMPUTE_START(thread_id; * - CLOCK_THREAD_START_STEP(thread_id, step_id) * - CLOCK_THREAD_END_STEP(thread_id, step_id) * - (repeat num_steps times) * - CLOCK_THREAD_COMPUTE_END(thread_id); * - CLOCK_THREAD_END(thread_id) * - CLOCK_APP_JOIN; * - CLOCK_APP_END; * - CLOCK_FINALIZE(num_threads); * - PRINT_CLOCK; * - CLOCK_FREE; */ static void local_sort_asc(uint32_t tab[], int size) { int tmp; int i, j; for (i = 0; i < size; i++) { uint32_t min = tab[i]; int jmin = i; for (j = i + 1; j < size; j++) { if (tab[j] < min) { jmin = j; min = tab[j]; } } tmp = tab[i]; tab[i] = min; tab[jmin] = tmp; } } #define CLOCK_DEC uint32_t app_start; \ uint32_t app_end; \ uint32_t app_create; \ uint32_t app_join; \ uint32_t * thread_start; \ uint32_t * thread_end; \ uint32_t * thread_compute_start; \ uint32_t * thread_compute_end; \ int32_t step_number; \ int32_t clock_thread_num; \ uint32_t ** thread_start_step; \ uint32_t ** thread_end_step; \ uint32_t global_thread_start; \ uint32_t global_thread_end; \ uint32_t global_thread_compute_start; \ uint32_t global_thread_compute_end; \ uint32_t * global_thread_start_step; \ uint32_t * global_thread_end_step; \ #if TARGET_OS == GIETVM #define CLOCK(x) ({ x = giet_proctime(); }) #elif TARGET_OS == LINUX #define CLOCK(x) ({ \ struct timeval full_time; \ gettimeofday(&full_time, NULL); \ x = (unsigned long) ((full_time.tv_usec + full_time.tv_sec * 1000000) / 1000); \ }) #endif // x = number of threads, y = number of steps #define CLOCK_INIT(x, y) ({ \ clock_thread_num = (x); \ step_number = (y); \ global_thread_start = 0xFFFFFFFFLLU; \ global_thread_end = 0; \ global_thread_compute_start = 0xFFFFFFFFLLU; \ global_thread_compute_end = 0; \ if ((x) > 0) { \ thread_start = (uint32_t *) malloc(sizeof(uint32_t) * (x)); \ thread_end = (uint32_t *) malloc(sizeof(uint32_t) * (x)); \ thread_compute_start = (uint32_t *) malloc(sizeof(uint32_t) * (x)); \ thread_compute_end = (uint32_t *) malloc(sizeof(uint32_t) * (x)); \ if ((y) > 0) { \ global_thread_start_step = (uint32_t *) malloc(sizeof(uint32_t) * (y)); \ global_thread_end_step = (uint32_t *) malloc(sizeof(uint32_t) * (y)); \ thread_start_step = (uint32_t **) malloc(sizeof(uint32_t *) * (y)); \ thread_end_step = (uint32_t **) malloc(sizeof(uint32_t *) * (y)); \ for (int j = 0; j < (y); j++) { \ global_thread_start_step[j] = 0xFFFFFFFFLU; \ global_thread_end_step[j] = 0; \ thread_start_step[j] = (uint32_t *) malloc(sizeof(uint32_t) * (x)); \ thread_end_step[j] = (uint32_t *) malloc(sizeof(uint32_t) * (x)); \ } \ } \ } \ }) #define CLOCK_APP_START ({ CLOCK(app_start); }) #define CLOCK_APP_END ({ CLOCK(app_end); }) #define CLOCK_APP_CREATE ({ CLOCK(app_create); }) #define CLOCK_APP_JOIN ({ CLOCK(app_join); }) #define CLOCK_THREAD_START(x) ({ CLOCK(thread_start[x]); }) #define CLOCK_THREAD_END(x) ({ CLOCK(thread_end[x]); }) #define CLOCK_THREAD_COMPUTE_START(x) ({ CLOCK(thread_compute_start[x]); }) #define CLOCK_THREAD_COMPUTE_END(x) ({ CLOCK(thread_compute_end[x]); }) #define CLOCK_THREAD_START_STEP(x, y) ({ CLOCK(thread_start_step[y][x]); }) #define CLOCK_THREAD_END_STEP(x, y) ({ CLOCK(thread_end_step[y][x]); }) // x = number of threads #define CLOCK_FINALIZE ({ \ for (int i = 0; i < clock_thread_num; i++) { \ if (thread_start[i] < global_thread_start) { \ global_thread_start = thread_start[i]; \ } \ if (thread_compute_start[i] < global_thread_compute_start) { \ global_thread_compute_start = thread_compute_start[i]; \ } \ if (thread_end[i] > global_thread_end) { \ global_thread_end = thread_end[i]; \ } \ if (thread_compute_end[i] > global_thread_compute_end) { \ global_thread_compute_end = thread_compute_end[i]; \ } \ for (int j = 0; j < step_number; j++) { \ if (thread_start_step[j][i] < global_thread_start_step[j]) { \ global_thread_start_step[j] = thread_start_step[j][i]; \ } \ if (thread_end_step[j][i] > global_thread_end_step[j]) { \ global_thread_end_step[j] = thread_end_step[j][i]; \ } \ } \ } \ }) #define PRINT_CLOCK ({ \ printf("Timestamps:\n"); \ printf("[APP_START] : %d\n", app_start); \ printf("[APP_CREATE] : %d\n", app_create); \ printf("[THREAD_START] : %d\n", global_thread_start); \ printf("[THREAD_COMPUTE_START] : %d\n", global_thread_compute_start); \ for (int j = 0; j < step_number; j++) { \ printf("[THREAD_START_STEP_%d] : %d\n", j, global_thread_start_step[j]); \ printf("[THREAD_END_STEP_%d] : %d\n", j, global_thread_end_step[j]); \ } \ printf("[THREAD_COMPUTE_END] : %d\n", global_thread_compute_end); \ printf("[THREAD_END] : %d\n", global_thread_end); \ printf("[APP_JOIN] : %d\n", app_join); \ printf("[APP_END] : %d\n", app_end); \ printf("Durations (in cycles):\n"); \ printf("[TOTAL] : %d\n", app_end - app_start); \ printf("[THREAD] : %d\n", app_join - app_create); \ printf("[PARALLEL] : %d\n", global_thread_end - global_thread_start); \ printf("[PARALLEL_COMPUTE] : %d\n", global_thread_compute_end - global_thread_compute_start); \ for (int j = 0; j < step_number; j++) { \ printf("[THREAD_STEP_%d] : %d\n", j, global_thread_end_step[j] - global_thread_start_step[j]); \ } \ printf("\n"); \ printf("*** All threads times output in a gnuplot data-style ***\n"); \ local_sort_asc(thread_start, clock_thread_num); \ local_sort_asc(thread_compute_start, clock_thread_num); \ local_sort_asc(thread_compute_end, clock_thread_num); \ local_sort_asc(thread_end, clock_thread_num); \ for (int j = 0; j < step_number; j++) { \ local_sort_asc(thread_start_step[j], clock_thread_num); \ local_sort_asc(thread_end_step[j], clock_thread_num); \ } \ printf("# cycle thread_id\n"); \ for (int i = 0; i < clock_thread_num; i++) { \ printf("%d\t%d\n", thread_start[i], i); \ printf("%d\t%d\n", thread_compute_start[i], i); \ for (int j = 0; j < step_number; j++) { \ printf("%d\t%d\n", thread_start_step[j][i], i); \ printf("%d\t%d\n", thread_end_step[j][i], i); \ } \ printf("%d\t%d\n", thread_compute_end[i], i); \ printf("%d\t%d\n", thread_end[i], i); \ } \ }) #define CLOCK_FREE ({ \ if (clock_thread_num > 0) { \ free(thread_start); \ free(thread_end); \ free(thread_compute_start); \ free(thread_compute_end); \ if (step_number > 0) { \ free(global_thread_start_step); \ free(global_thread_end_step); \ for (int j = 0; j < step_number; j++) { \ free(thread_start_step[j]); \ free(thread_end_step[j]); \ } \ free(thread_start_step); \ free(thread_end_step); \ } \ } \ }) #endif