Changeset 598 for soft/giet_vm/applications/ocean/main.C
- Timestamp:
- Jul 9, 2015, 2:11:17 PM (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
soft/giet_vm/applications/ocean/main.C
r589 r598 1 #line 115 "/Users/alain/soc/giet_vm/applications/ocean/null_macros/c.m4.null.GIET" 2 1 /*************************************************************************/ 2 /* */ 3 /* Copyright (c) 1994 Stanford University */ 4 /* */ 5 /* All rights reserved. */ 6 /* */ 7 /* Permission is given to use, copy, and modify this software for any */ 8 /* non-commercial purpose as long as this copyright notice is not */ 9 /* removed. All other uses, including redistribution in whole or in */ 10 /* part, are forbidden without prior written permission. */ 11 /* */ 12 /* This software is provided with absolutely no warranty and no */ 13 /* support. */ 14 /* */ 15 /*************************************************************************/ 16 17 /*************************************************************************/ 18 /* */ 19 /* SPLASH Ocean Code */ 20 /* */ 21 /* This application studies the role of eddy and boundary currents in */ 22 /* influencing large-scale ocean movements. This implementation uses */ 23 /* dynamically allocated four-dimensional arrays for grid data storage. */ 24 /* */ 25 /* Command line options: */ 26 /* */ 27 /* -mM : Simulate MxM ocean. M must be (power of 2) +2. */ 28 /* -nN : N = number of threads. N must be power of 2. */ 29 /* -eE : E = error tolerance for iterative relaxation. */ 30 /* -rR : R = distance between grid points in meters. */ 31 /* -tT : T = timestep in seconds. */ 32 /* -s : Print timing statistics. */ 33 /* -o : Print out relaxation residual values. */ 34 /* -h : Print out command line options. */ 35 /* */ 36 /* Default: OCEAN -m130 -n1 -e1e-7 -r20000.0 -t28800.0 */ 37 /* */ 38 /* NOTE: This code works under both the FORK and SPROC models. */ 39 /* */ 40 /*************************************************************************/ 41 42 MAIN_ENV 43 44 #define DEFAULT_M 514 45 #define DEFAULT_N 4 46 #define DEFAULT_E 1e-7 47 #define DEFAULT_T 28800.0 48 #define DEFAULT_R 20000.0 49 #define UP 0 50 #define DOWN 1 51 #define LEFT 2 52 #define RIGHT 3 53 #define UPLEFT 4 54 #define UPRIGHT 5 55 #define DOWNLEFT 6 56 #define DOWNRIGHT 7 57 #define PAGE_SIZE 4096 58 59 #include <stdio.h> 60 #include <math.h> 61 #include <stdlib.h> 62 63 #include "decs.h" 64 65 struct multi_struct *multi; 66 struct global_struct *global; 67 struct locks_struct *locks; 68 struct bars_struct *bars; 69 70 struct Global_Private *main_gp; 71 double ****main_psi; 72 double ****main_psim; 73 double ***main_psium; 74 double ***main_psilm; 75 double ***main_psib; 76 double ***main_ga; 77 double ***main_gb; 78 double ****main_work1; 79 double ***main_work2; 80 double ***main_work3; 81 double ****main_work4; 82 double ****main_work5; 83 double ***main_work6; 84 double ****main_work7; 85 double ***main_oldga; 86 double ***main_oldgb; 87 double ****main_q_multi; 88 double ****main_rhs_multi; 89 double ****temparray; 90 double ***tauz; 91 long *main_imx; 92 long *main_jmx; 93 94 long nprocs = DEFAULT_N; 95 const double h1 = 1000.0; 96 const double h3 = 4000.0; 97 const double h = 5000.0; 98 const double lf = -5.12e11; 99 double res = DEFAULT_R; 100 double dtau = DEFAULT_T; 101 const double f0 = 8.3e-5; 102 const double beta = 2.0e-11; 103 const double gpr = 0.02; 104 double ysca; 105 long oim; 106 long jmm1; 107 double tolerance = DEFAULT_E; 108 const double pi = 3.141592653589793; 109 const double t0 = 0.5e-4; 110 const double outday0 = 1.0; 111 const double outday1 = 2.0; 112 const double outday2 = 2.0; 113 const double outday3 = 2.0; 114 const double maxwork = 10000.0; 115 double factjacob; 116 double factlap; 117 118 //TODO : répliquer ça : 119 double *main_lev_res; 120 double *main_lev_tol; 121 double *main_i_int_coeff; 122 double *main_j_int_coeff; 123 long *main_xpts_per_proc; 124 long *main_ypts_per_proc; 125 long main_xprocs; 126 long main_yprocs; 127 long main_numlev; 128 double main_eig2; 129 long main_im = DEFAULT_M; 130 long main_jm; 131 132 long minlevel; 133 long do_stats = 1; 134 long do_output = 0; 135 long *ids_procs; 136 137 138 __attribute__ ((constructor)) int main(int argc, char *argv[]) 139 { 140 long i; 141 long j; 142 long k; 143 long x_part; 144 long y_part; 145 long d_size; 146 long itemp; 147 long jtemp; 148 double procsqrt; 149 long temp = 0; 150 double min_total; 151 double max_total; 152 double avg_total; 153 double avg_wait; 154 double max_wait; 155 double min_wait; 156 double min_multi; 157 double max_multi; 158 double avg_multi; 159 double min_frac; 160 double max_frac; 161 double avg_frac; 162 long imax_wait; 163 long imin_wait; 164 long ch; 165 unsigned long long computeend; 166 unsigned long long start; 167 im = main_im; 168 169 CLOCK(start); 170 171 while ((ch = getopt(argc, argv, "m:n:e:r:t:soh")) != -1) { 172 switch (ch) { 173 case 'm': 174 im = atoi(optarg); 175 if (log_2(im - 2) == -1) { 176 printerr("Grid must be ((power of 2)+2) in each dimension\n"); 177 exit(-1); 178 } 179 break; 180 case 'n': 181 nprocs = atoi(optarg); 182 if (nprocs < 1) { 183 printerr("N must be >= 1\n"); 184 exit(-1); 185 } 186 if (log_2(nprocs) == -1) { 187 printerr("N must be a power of 2\n"); 188 exit(-1); 189 } 190 break; 191 case 'e': 192 tolerance = atof(optarg); 193 break; 194 case 'r': 195 res = atof(optarg); 196 break; 197 case 't': 198 dtau = atof(optarg); 199 break; 200 case 's': 201 do_stats = !do_stats; 202 break; 203 case 'o': 204 do_output = !do_output; 205 break; 206 case 'h': 207 printf("Usage: ocean <options>\n\n"); 208 printf("options:\n"); 209 printf(" -mM : Simulate MxM ocean. M must be (power of 2) + 2 (default = %d).\n", DEFAULT_M); 210 printf(" -nN : N = number of threads. N must be power of 2 (default = %d).\n", DEFAULT_N); 211 printf(" -eE : E = error tolerance for iterative relaxation (default = %f).\n", DEFAULT_E); 212 printf(" -rR : R = distance between grid points in meters (default = %f).\n", DEFAULT_R); 213 printf(" -tT : T = timestep in seconds (default = %f).\n", DEFAULT_T); 214 printf(" -s : Print timing statistics.\n"); 215 printf(" -o : Print out relaxation residual values.\n"); 216 printf(" -h : Print out command line options.\n\n"); 217 exit(0); 218 break; 219 } 220 } 221 222 MAIN_INITENV 223 224 jm = im; 225 226 printf("\n"); 227 printf("Ocean simulation with W-cycle multigrid solver\n"); 228 printf(" Processors : %1ld\n", nprocs); 229 printf(" Grid size : %1ld x %1ld\n", im, jm); 230 printf(" Grid resolution (meters) : %0.2f\n", res); 231 printf(" Time between relaxations (seconds) : %0.0f\n", dtau); 232 printf(" Error tolerance : %0.7g\n", tolerance); 233 printf("\n"); 234 235 xprocs = 0; 236 yprocs = 0; 237 238 procsqrt = sqrt((double) nprocs); 239 j = (long) procsqrt; 240 241 while ((xprocs == 0) && (j > 0)) { 242 k = nprocs / j; 243 if (k * j == nprocs) { 244 if (k > j) { 245 xprocs = j; 246 yprocs = k; 247 } else { 248 xprocs = k; 249 yprocs = j; 250 } 251 } 252 j--; 253 } 254 255 if (xprocs == 0) { 256 printerr("Could not find factors for subblocking\n"); 257 exit(-1); 258 } 259 260 minlevel = 0; 261 itemp = 1; 262 jtemp = 1; 263 numlev = 0; 264 minlevel = 0; 265 266 while (itemp < (im - 2)) { 267 itemp = itemp * 2; 268 jtemp = jtemp * 2; 269 if ((itemp / yprocs > 1) && (jtemp / xprocs > 1)) { 270 numlev++; 271 } 272 } 273 274 if (numlev == 0) { 275 printerr("Must have at least 2 grid points per processor in each dimension\n"); 276 exit(-1); 277 } 278 279 main_imx = (long *) G_MALLOC(numlev * sizeof(long), 0); 280 main_jmx = (long *) G_MALLOC(numlev * sizeof(long), 0); 281 main_lev_res = (double *) G_MALLOC(numlev * sizeof(double), 0); 282 main_lev_tol = (double *) G_MALLOC(numlev * sizeof(double), 0); 283 main_i_int_coeff = (double *) G_MALLOC(numlev * sizeof(double), 0); 284 main_j_int_coeff = (double *) G_MALLOC(numlev * sizeof(double), 0); 285 main_xpts_per_proc = (long *) G_MALLOC(numlev * sizeof(long), 0); 286 main_ypts_per_proc = (long *) G_MALLOC(numlev * sizeof(long), 0); 287 ids_procs = (long *) G_MALLOC(nprocs * sizeof(long), 0); 288 289 imx = main_imx; 290 jmx = main_jmx; 291 lev_res = main_lev_res; 292 lev_tol = main_lev_tol; 293 i_int_coeff = main_i_int_coeff; 294 j_int_coeff = main_j_int_coeff; 295 xpts_per_proc = main_xpts_per_proc; 296 ypts_per_proc = main_ypts_per_proc; 297 298 for (i = 0; i < nprocs; i++) { 299 ids_procs[i] = i; 300 } 301 302 imx[numlev - 1] = im; 303 jmx[numlev - 1] = jm; 304 lev_res[numlev - 1] = res; 305 lev_tol[numlev - 1] = tolerance; 306 307 for (i = numlev - 2; i >= 0; i--) { 308 imx[i] = ((imx[i + 1] - 2) / 2) + 2; 309 jmx[i] = ((jmx[i + 1] - 2) / 2) + 2; 310 lev_res[i] = lev_res[i + 1] * 2; 311 } 312 313 for (i = 0; i < numlev; i++) { 314 xpts_per_proc[i] = (jmx[i] - 2) / xprocs; 315 ypts_per_proc[i] = (imx[i] - 2) / yprocs; 316 } 317 for (i = numlev - 1; i >= 0; i--) { 318 if ((xpts_per_proc[i] < 2) || (ypts_per_proc[i] < 2)) { 319 minlevel = i + 1; 320 break; 321 } 322 } 323 324 for (i = 0; i < numlev; i++) { 325 temp += imx[i]; 326 } 327 temp = 0; 328 j = 0; 329 for (k = 0; k < numlev; k++) { 330 for (i = 0; i < imx[k]; i++) { 331 j++; 332 temp += jmx[k]; 333 } 334 } 335 336 d_size = nprocs * sizeof(double ***); 337 main_psi = (double ****) G_MALLOC(d_size, 0); 338 main_psim = (double ****) G_MALLOC(d_size, 0); 339 main_work1 = (double ****) G_MALLOC(d_size, 0); 340 main_work4 = (double ****) G_MALLOC(d_size, 0); 341 main_work5 = (double ****) G_MALLOC(d_size, 0); 342 main_work7 = (double ****) G_MALLOC(d_size, 0); 343 temparray = (double ****) G_MALLOC(d_size, -1); 344 345 psi = main_psi; 346 psim = main_psim; 347 work1 = main_work1; 348 work4 = main_work4; 349 work5 = main_work5; 350 work7 = main_work7; 351 352 d_size = 2 * sizeof(double **); 353 for (i = 0; i < nprocs; i++) { 354 psi[i] = (double ***) G_MALLOC(d_size, i); 355 psim[i] = (double ***) G_MALLOC(d_size, i); 356 work1[i] = (double ***) G_MALLOC(d_size, i); 357 work4[i] = (double ***) G_MALLOC(d_size, i); 358 work5[i] = (double ***) G_MALLOC(d_size, i); 359 work7[i] = (double ***) G_MALLOC(d_size, i); 360 temparray[i] = (double ***) G_MALLOC(d_size, i); 361 } 362 363 d_size = nprocs * sizeof(double **); 364 main_psium = (double ***) G_MALLOC(d_size, 0); 365 main_psilm = (double ***) G_MALLOC(d_size, 0); 366 main_psib = (double ***) G_MALLOC(d_size, 0); 367 main_ga = (double ***) G_MALLOC(d_size, 0); 368 main_gb = (double ***) G_MALLOC(d_size, 0); 369 main_work2 = (double ***) G_MALLOC(d_size, 0); 370 main_work3 = (double ***) G_MALLOC(d_size, 0); 371 main_work6 = (double ***) G_MALLOC(d_size, 0); 372 tauz = (double ***) G_MALLOC(d_size, 0); 373 main_oldga = (double ***) G_MALLOC(d_size, 0); 374 main_oldgb = (double ***) G_MALLOC(d_size, 0); 375 376 psium = main_psium; 377 psilm = main_psilm; 378 psib = main_psib; 379 ga = main_ga; 380 gb = main_gb; 381 work2 = main_work2; 382 work3 = main_work3; 383 work6 = main_work6; 384 oldga = main_oldga; 385 oldgb = main_oldgb; 386 387 main_gp = (struct Global_Private *) G_MALLOC((nprocs + 1) * sizeof(struct Global_Private), -1); 388 gp = main_gp; 389 390 for (i = 0; i < nprocs; i++) { 391 gp[i].pad = (char *) G_MALLOC(PAGE_SIZE * sizeof(char), i); 392 gp[i].rel_num_x = (long *) G_MALLOC(numlev * sizeof(long), i); 393 gp[i].rel_num_y = (long *) G_MALLOC(numlev * sizeof(long), i); 394 gp[i].eist = (long *) G_MALLOC(numlev * sizeof(long), i); 395 gp[i].ejst = (long *) G_MALLOC(numlev * sizeof(long), i); 396 gp[i].oist = (long *) G_MALLOC(numlev * sizeof(long), i); 397 gp[i].ojst = (long *) G_MALLOC(numlev * sizeof(long), i); 398 gp[i].rlist = (long *) G_MALLOC(numlev * sizeof(long), i); 399 gp[i].rljst = (long *) G_MALLOC(numlev * sizeof(long), i); 400 gp[i].rlien = (long *) G_MALLOC(numlev * sizeof(long), i); 401 gp[i].rljen = (long *) G_MALLOC(numlev * sizeof(long), i); 402 gp[i].neighbors = (long *) G_MALLOC(8 * sizeof(long), i); 403 gp[i].rownum = (long *) G_MALLOC(sizeof(long), i); 404 gp[i].colnum = (long *) G_MALLOC(sizeof(long), i); 405 gp[i].lpid = (long *) G_MALLOC(sizeof(long), i); 406 gp[i].multi_time = (double *) G_MALLOC(sizeof(double), i); 407 gp[i].total_time = (double *) G_MALLOC(sizeof(double), i); 408 gp[i].sync_time = (double *) G_MALLOC(sizeof(double), i); 409 gp[i].process_time = (double *) G_MALLOC(sizeof(double), i); 410 gp[i].step_start = (double *) G_MALLOC(sizeof(double), i); 411 gp[i].steps_time = (double *) G_MALLOC(10 * sizeof(double), i); 412 *gp[i].multi_time = 0; 413 *gp[i].total_time = 0; 414 *gp[i].sync_time = 0; 415 *gp[i].process_time = 0; 416 *gp[i].lpid = i; 417 } 418 419 subblock(); 420 421 x_part = (jm - 2) / xprocs + 2; 422 y_part = (im - 2) / yprocs + 2; 423 424 d_size = x_part * y_part * sizeof(double) + y_part * sizeof(double *); 425 426 global = (struct global_struct *) G_MALLOC(sizeof(struct global_struct), -1); 427 428 for (i = 0; i < nprocs; i++) { 429 psi[i][0] = (double **) G_MALLOC(d_size, i); 430 psi[i][1] = (double **) G_MALLOC(d_size, i); 431 psim[i][0] = (double **) G_MALLOC(d_size, i); 432 psim[i][1] = (double **) G_MALLOC(d_size, i); 433 psium[i] = (double **) G_MALLOC(d_size, i); 434 psilm[i] = (double **) G_MALLOC(d_size, i); 435 psib[i] = (double **) G_MALLOC(d_size, i); 436 ga[i] = (double **) G_MALLOC(d_size, i); 437 gb[i] = (double **) G_MALLOC(d_size, i); 438 work1[i][0] = (double **) G_MALLOC(d_size, i); 439 work1[i][1] = (double **) G_MALLOC(d_size, i); 440 work2[i] = (double **) G_MALLOC(d_size, i); 441 work3[i] = (double **) G_MALLOC(d_size, i); 442 work4[i][0] = (double **) G_MALLOC(d_size, i); 443 work4[i][1] = (double **) G_MALLOC(d_size, i); 444 work5[i][0] = (double **) G_MALLOC(d_size, i); 445 work5[i][1] = (double **) G_MALLOC(d_size, i); 446 work6[i] = (double **) G_MALLOC(d_size, i); 447 work7[i][0] = (double **) G_MALLOC(d_size, i); 448 work7[i][1] = (double **) G_MALLOC(d_size, i); 449 temparray[i][0] = (double **) G_MALLOC(d_size, i); 450 temparray[i][1] = (double **) G_MALLOC(d_size, i); 451 tauz[i] = (double **) G_MALLOC(d_size, i); 452 oldga[i] = (double **) G_MALLOC(d_size, i); 453 oldgb[i] = (double **) G_MALLOC(d_size, i); 454 } 455 456 oim = im; 457 //f = (double *) G_MALLOC(oim*sizeof(double), 0); 458 multi = (struct multi_struct *) G_MALLOC(sizeof(struct multi_struct), -1); 459 460 d_size = numlev * sizeof(double **); 461 if (numlev % 2 == 1) { /* To make sure that the actual data 462 starts double word aligned, add an extra 463 pointer */ 464 d_size += sizeof(double **); 465 } 466 for (i = 0; i < numlev; i++) { 467 d_size += ((imx[i] - 2) / yprocs + 2) * ((jmx[i] - 2) / xprocs + 2) * sizeof(double) + ((imx[i] - 2) / yprocs + 2) * sizeof(double *); 468 } 469 470 d_size *= nprocs; 471 472 if (nprocs % 2 == 1) { /* To make sure that the actual data 473 starts double word aligned, add an extra 474 pointer */ 475 d_size += sizeof(double ***); 476 } 477 478 d_size += nprocs * sizeof(double ***); 479 main_q_multi = (double ****) G_MALLOC(d_size, -1); 480 main_rhs_multi = (double ****) G_MALLOC(d_size, -1); 481 q_multi = main_q_multi; 482 rhs_multi = main_rhs_multi; 483 484 485 locks = (struct locks_struct *) G_MALLOC(sizeof(struct locks_struct), -1); 486 bars = (struct bars_struct *) G_MALLOC(sizeof(struct bars_struct), -1); 487 488 LOCKINIT(locks->idlock) 489 LOCKINIT(locks->psiailock) 490 LOCKINIT(locks->psibilock) 491 LOCKINIT(locks->donelock) 492 LOCKINIT(locks->error_lock) 493 LOCKINIT(locks->bar_lock) 494 #if defined(MULTIPLE_BARRIERS) 495 BARINIT(bars->iteration, nprocs) 496 BARINIT(bars->gsudn, nprocs) 497 BARINIT(bars->p_setup, nprocs) 498 BARINIT(bars->p_redph, nprocs) 499 BARINIT(bars->p_soln, nprocs) 500 BARINIT(bars->p_subph, nprocs) 501 BARINIT(bars->sl_prini, nprocs) 502 BARINIT(bars->sl_psini, nprocs) 503 BARINIT(bars->sl_onetime, nprocs) 504 BARINIT(bars->sl_phase_1, nprocs) 505 BARINIT(bars->sl_phase_2, nprocs) 506 BARINIT(bars->sl_phase_3, nprocs) 507 BARINIT(bars->sl_phase_4, nprocs) 508 BARINIT(bars->sl_phase_5, nprocs) 509 BARINIT(bars->sl_phase_6, nprocs) 510 BARINIT(bars->sl_phase_7, nprocs) 511 BARINIT(bars->sl_phase_8, nprocs) 512 BARINIT(bars->sl_phase_9, nprocs) 513 BARINIT(bars->sl_phase_10, nprocs) 514 BARINIT(bars->error_barrier, nprocs) 515 #else 516 BARINIT(bars->barrier, nprocs) 517 #endif 518 link_all(); 519 520 multi->err_multi = 0.0; 521 i_int_coeff[0] = 0.0; 522 j_int_coeff[0] = 0.0; 523 524 for (i = 0; i < numlev; i++) { 525 i_int_coeff[i] = 1.0 / (imx[i] - 1); 526 j_int_coeff[i] = 1.0 / (jmx[i] - 1); 527 } 528 529 /* 530 initialize constants and variables 531 532 id is a global shared variable that has fetch-and-add operations 533 performed on it by processes to obtain their pids. 534 */ 535 536 //global->id = 0; 537 global->trackstart = 0; 538 global->psibi = 0.0; 539 540 factjacob = -1. / (12. * res * res); 541 factlap = 1. / (res * res); 542 eig2 = -h * f0 * f0 / (h1 * h3 * gpr); 543 544 jmm1 = jm - 1; 545 ysca = ((double) jmm1) * res; 546 im = (imx[numlev - 1] - 2) / yprocs + 2; 547 jm = (jmx[numlev - 1] - 2) / xprocs + 2; 548 549 main_im = im; 550 main_jm = jm; 551 main_numlev = numlev; 552 main_xprocs = xprocs; 553 main_yprocs = yprocs; 554 main_eig2 = eig2; 555 556 if (do_output) { 557 printf(" MULTIGRID OUTPUTS\n"); 558 } 559 560 CREATE(slave, nprocs); 561 WAIT_FOR_END(nprocs); 562 CLOCK(computeend); 563 564 printf("\n"); 565 printf(" PROCESS STATISTICS\n"); 566 printf(" Total Multigrid Multigrid\n"); 567 printf(" Proc Time Time Fraction\n"); 568 printf(" 0 %15.0f %15.0f %10.3f\n", (*gp[0].total_time), (*gp[0].multi_time), (*gp[0].multi_time) / (*gp[0].total_time)); 569 570 if (do_stats) { 571 double phase_time; 572 min_total = max_total = avg_total = (*gp[0].total_time); 573 min_multi = max_multi = avg_multi = (*gp[0].multi_time); 574 min_frac = max_frac = avg_frac = (*gp[0].multi_time) / (*gp[0].total_time); 575 avg_wait = *gp[0].sync_time; 576 max_wait = *gp[0].sync_time; 577 min_wait = *gp[0].sync_time; 578 imax_wait = 0; 579 imin_wait = 0; 580 581 for (i = 1; i < nprocs; i++) { 582 if ((*gp[i].total_time) > max_total) { 583 max_total = (*gp[i].total_time); 584 } 585 if ((*gp[i].total_time) < min_total) { 586 min_total = (*gp[i].total_time); 587 } 588 if ((*gp[i].multi_time) > max_multi) { 589 max_multi = (*gp[i].multi_time); 590 } 591 if ((*gp[i].multi_time) < min_multi) { 592 min_multi = (*gp[i].multi_time); 593 } 594 if ((*gp[i].multi_time) / (*gp[i].total_time) > max_frac) { 595 max_frac = (*gp[i].multi_time) / (*gp[i].total_time); 596 } 597 if ((*gp[i].multi_time) / (*gp[i].total_time) < min_frac) { 598 min_frac = (*gp[i].multi_time) / (*gp[i].total_time); 599 } 600 avg_total += (*gp[i].total_time); 601 avg_multi += (*gp[i].multi_time); 602 avg_frac += (*gp[i].multi_time) / (*gp[i].total_time); 603 avg_wait += (*gp[i].sync_time); 604 if (max_wait < (*gp[i].sync_time)) { 605 max_wait = (*gp[i].sync_time); 606 imax_wait = i; 607 } 608 if (min_wait > (*gp[i].sync_time)) { 609 min_wait = (*gp[i].sync_time); 610 imin_wait = i; 611 } 612 } 613 avg_total = avg_total / nprocs; 614 avg_multi = avg_multi / nprocs; 615 avg_frac = avg_frac / nprocs; 616 avg_wait = avg_wait / nprocs; 617 for (i = 1; i < nprocs; i++) { 618 printf(" %3ld %15.0f %15.0f %10.3f\n", i, (*gp[i].total_time), (*gp[i].multi_time), (*gp[i].multi_time) / (*gp[i].total_time)); 619 } 620 printf(" Avg %15.0f %15.0f %10.3f\n", avg_total, avg_multi, avg_frac); 621 printf(" Min %15.0f %15.0f %10.3f\n", min_total, min_multi, min_frac); 622 printf(" Max %15.0f %15.0f %10.3f\n", max_total, max_multi, max_frac); 623 624 printf("\n\n Sync\n"); 625 printf(" Proc Time Fraction\n"); 626 for (i = 0; i < nprocs; i++) { 627 printf(" %ld %u %f\n", i, (unsigned int)*gp[i].sync_time, *gp[i].sync_time / ((long)(*gp[i].total_time))); 628 } 629 630 printf(" Avg %f %f\n", avg_wait, (double) avg_wait / (long) (computeend - global->trackstart)); 631 printf(" Min %f %f\n", min_wait, (double) min_wait / (long) (*gp[imin_wait].total_time)); 632 printf(" Max %f %f\n", max_wait, (double) max_wait / (long) (*gp[imax_wait].total_time)); 633 634 printf("\nPhases Avg :\n\n"); 635 for (i = 0; i < 10; i++) { 636 phase_time = 0; 637 for (j = 0; j < nprocs; j++) { 638 phase_time += gp[j].steps_time[i]; 639 } 640 phase_time /= (double) nprocs; 641 printf(" %d = %f (fraction %f)\n", i + 1, phase_time, phase_time / (long) (computeend - global->trackstart)); 642 } 643 } 644 printf("\n"); 645 646 global->starttime = start; 647 printf(" TIMING INFORMATION\n"); 648 printf("[NPROCS] : %16ld\n", nprocs); 649 printf("[START1] : %16llu\n", global->starttime); 650 printf("[START2] : %16llu\n", global->trackstart); 651 printf("[END] : %16llu\n", computeend); 652 printf("[TOTAL] : %16llu\n", computeend - global->starttime); // With init 653 printf("[PARALLEL_COMPUTE] : %16llu\n", computeend - global->trackstart); // Without init 654 printf("(excludes first timestep)\n"); 655 printf("\n"); 656 657 MAIN_END 658 659 } 660 661 long log_2(long number) 662 { 663 long cumulative = 1; 664 long out = 0; 665 long done = 0; 666 667 while ((cumulative < number) && (!done) && (out < 50)) { 668 if (cumulative == number) { 669 done = 1; 670 } else { 671 cumulative = cumulative * 2; 672 out++; 673 } 674 } 675 676 if (cumulative == number) { 677 return (out); 678 } else { 679 return (-1); 680 } 681 } 682 683 void printerr(char *s) 684 { 685 fprintf(stderr, "ERROR: %s\n", s); 686 } 687 688 689 // Local Variables: 690 // tab-width: 4 691 // c-basic-offset: 4 692 // c-file-offsets:((innamespace . 0)(inline-open . 0)) 693 // indent-tabs-mode: nil 694 // End: 695 696 // vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4
Note: See TracChangeset
for help on using the changeset viewer.