Changeset 589 for soft/giet_vm/applications/ocean/main.C
- Timestamp:
- Jul 8, 2015, 3:57:15 PM (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
soft/giet_vm/applications/ocean/main.C
r581 r589 1 /*************************************************************************/ 2 /* */ 3 /* Copyright (c) 1994 Stanford University */ 4 /* */ 5 /* All rights reserved. */ 6 /* */ 7 /* Permission is given to use, copy, and modify this software for any */ 8 /* non-commercial purpose as long as this copyright notice is not */ 9 /* removed. All other uses, including redistribution in whole or in */ 10 /* part, are forbidden without prior written permission. */ 11 /* */ 12 /* This software is provided with absolutely no warranty and no */ 13 /* support. */ 14 /* */ 15 /*************************************************************************/ 1 #line 115 "/Users/alain/soc/giet_vm/applications/ocean/null_macros/c.m4.null.GIET" 16 2 17 /*************************************************************************/18 /* */19 /* SPLASH Ocean Code */20 /* */21 /* This application studies the role of eddy and boundary currents in */22 /* influencing large-scale ocean movements. This implementation uses */23 /* dynamically allocated four-dimensional arrays for grid data storage. */24 /* */25 /* Command line options: */26 /* */27 /* -mM : Simulate MxM ocean. M must be (power of 2) +2. */28 /* -nN : N = number of threads. N must be power of 2. */29 /* -eE : E = error tolerance for iterative relaxation. */30 /* -rR : R = distance between grid points in meters. */31 /* -tT : T = timestep in seconds. */32 /* -s : Print timing statistics. */33 /* -o : Print out relaxation residual values. */34 /* -h : Print out command line options. */35 /* */36 /* Default: OCEAN -m130 -n1 -e1e-7 -r20000.0 -t28800.0 */37 /* */38 /* NOTE: This code works under both the FORK and SPROC models. */39 /* */40 /*************************************************************************/41 42 MAIN_ENV43 44 #define DEFAULT_M 51445 #define DEFAULT_N 446 #define DEFAULT_E 1e-747 #define DEFAULT_T 28800.048 #define DEFAULT_R 20000.049 #define UP 050 #define DOWN 151 #define LEFT 252 #define RIGHT 353 #define UPLEFT 454 #define UPRIGHT 555 #define DOWNLEFT 656 #define DOWNRIGHT 757 #define PAGE_SIZE 409658 59 #include <stdio.h>60 #include <math.h>61 #include <stdlib.h>62 63 #include "decs.h"64 65 struct multi_struct *multi;66 struct global_struct *global;67 struct locks_struct *locks;68 struct bars_struct *bars;69 70 struct Global_Private *main_gp;71 double ****main_psi;72 double ****main_psim;73 double ***main_psium;74 double ***main_psilm;75 double ***main_psib;76 double ***main_ga;77 double ***main_gb;78 double ****main_work1;79 double ***main_work2;80 double ***main_work3;81 double ****main_work4;82 double ****main_work5;83 double ***main_work6;84 double ****main_work7;85 double ***main_oldga;86 double ***main_oldgb;87 double ****main_q_multi;88 double ****main_rhs_multi;89 double ****temparray;90 double ***tauz;91 long *main_imx;92 long *main_jmx;93 94 long nprocs = DEFAULT_N;95 const double h1 = 1000.0;96 const double h3 = 4000.0;97 const double h = 5000.0;98 const double lf = -5.12e11;99 double res = DEFAULT_R;100 double dtau = DEFAULT_T;101 const double f0 = 8.3e-5;102 const double beta = 2.0e-11;103 const double gpr = 0.02;104 double ysca;105 long oim;106 long jmm1;107 double tolerance = DEFAULT_E;108 const double pi = 3.141592653589793;109 const double t0 = 0.5e-4;110 const double outday0 = 1.0;111 const double outday1 = 2.0;112 const double outday2 = 2.0;113 const double outday3 = 2.0;114 const double maxwork = 10000.0;115 double factjacob;116 double factlap;117 118 //TODO : répliquer ça :119 double *main_lev_res;120 double *main_lev_tol;121 double *main_i_int_coeff;122 double *main_j_int_coeff;123 long *main_xpts_per_proc;124 long *main_ypts_per_proc;125 long main_xprocs;126 long main_yprocs;127 long main_numlev;128 double main_eig2;129 long main_im = DEFAULT_M;130 long main_jm;131 132 long minlevel;133 long do_stats = 1;134 long do_output = 0;135 long *ids_procs;136 137 138 __attribute__ ((constructor)) int main(int argc, char *argv[])139 {140 long i;141 long j;142 long k;143 long x_part;144 long y_part;145 long d_size;146 long itemp;147 long jtemp;148 double procsqrt;149 long temp = 0;150 double min_total;151 double max_total;152 double avg_total;153 double avg_wait;154 double max_wait;155 double min_wait;156 double min_multi;157 double max_multi;158 double avg_multi;159 double min_frac;160 double max_frac;161 double avg_frac;162 long imax_wait;163 long imin_wait;164 long ch;165 unsigned long long computeend;166 unsigned long long start;167 im = main_im;168 169 CLOCK(start);170 171 while ((ch = getopt(argc, argv, "m:n:e:r:t:soh")) != -1) {172 switch (ch) {173 case 'm':174 im = atoi(optarg);175 if (log_2(im - 2) == -1) {176 printerr("Grid must be ((power of 2)+2) in each dimension\n");177 exit(-1);178 }179 break;180 case 'n':181 nprocs = atoi(optarg);182 if (nprocs < 1) {183 printerr("N must be >= 1\n");184 exit(-1);185 }186 if (log_2(nprocs) == -1) {187 printerr("N must be a power of 2\n");188 exit(-1);189 }190 break;191 case 'e':192 tolerance = atof(optarg);193 break;194 case 'r':195 res = atof(optarg);196 break;197 case 't':198 dtau = atof(optarg);199 break;200 case 's':201 do_stats = !do_stats;202 break;203 case 'o':204 do_output = !do_output;205 break;206 case 'h':207 printf("Usage: ocean <options>\n\n");208 printf("options:\n");209 printf(" -mM : Simulate MxM ocean. M must be (power of 2) + 2 (default = %d).\n", DEFAULT_M);210 printf(" -nN : N = number of threads. N must be power of 2 (default = %d).\n", DEFAULT_N);211 printf(" -eE : E = error tolerance for iterative relaxation (default = %f).\n", DEFAULT_E);212 printf(" -rR : R = distance between grid points in meters (default = %f).\n", DEFAULT_R);213 printf(" -tT : T = timestep in seconds (default = %f).\n", DEFAULT_T);214 printf(" -s : Print timing statistics.\n");215 printf(" -o : Print out relaxation residual values.\n");216 printf(" -h : Print out command line options.\n\n");217 exit(0);218 break;219 }220 }221 222 MAIN_INITENV223 224 jm = im;225 226 printf("\n");227 printf("Ocean simulation with W-cycle multigrid solver\n");228 printf(" Processors : %1ld\n", nprocs);229 printf(" Grid size : %1ld x %1ld\n", im, jm);230 printf(" Grid resolution (meters) : %0.2f\n", res);231 printf(" Time between relaxations (seconds) : %0.0f\n", dtau);232 printf(" Error tolerance : %0.7g\n", tolerance);233 printf("\n");234 235 xprocs = 0;236 yprocs = 0;237 238 procsqrt = sqrt((double) nprocs);239 j = (long) procsqrt;240 241 while ((xprocs == 0) && (j > 0)) {242 k = nprocs / j;243 if (k * j == nprocs) {244 if (k > j) {245 xprocs = j;246 yprocs = k;247 } else {248 xprocs = k;249 yprocs = j;250 }251 }252 j--;253 }254 255 if (xprocs == 0) {256 printerr("Could not find factors for subblocking\n");257 exit(-1);258 }259 260 minlevel = 0;261 itemp = 1;262 jtemp = 1;263 numlev = 0;264 minlevel = 0;265 266 while (itemp < (im - 2)) {267 itemp = itemp * 2;268 jtemp = jtemp * 2;269 if ((itemp / yprocs > 1) && (jtemp / xprocs > 1)) {270 numlev++;271 }272 }273 274 if (numlev == 0) {275 printerr("Must have at least 2 grid points per processor in each dimension\n");276 exit(-1);277 }278 279 main_imx = (long *) G_MALLOC(numlev * sizeof(long), 0);280 main_jmx = (long *) G_MALLOC(numlev * sizeof(long), 0);281 main_lev_res = (double *) G_MALLOC(numlev * sizeof(double), 0);282 main_lev_tol = (double *) G_MALLOC(numlev * sizeof(double), 0);283 main_i_int_coeff = (double *) G_MALLOC(numlev * sizeof(double), 0);284 main_j_int_coeff = (double *) G_MALLOC(numlev * sizeof(double), 0);285 main_xpts_per_proc = (long *) G_MALLOC(numlev * sizeof(long), 0);286 main_ypts_per_proc = (long *) G_MALLOC(numlev * sizeof(long), 0);287 ids_procs = (long *) G_MALLOC(nprocs * sizeof(long), 0);288 289 imx = main_imx;290 jmx = main_jmx;291 lev_res = main_lev_res;292 lev_tol = main_lev_tol;293 i_int_coeff = main_i_int_coeff;294 j_int_coeff = main_j_int_coeff;295 xpts_per_proc = main_xpts_per_proc;296 ypts_per_proc = main_ypts_per_proc;297 298 for (i = 0; i < nprocs; i++) {299 ids_procs[i] = i;300 }301 302 imx[numlev - 1] = im;303 jmx[numlev - 1] = jm;304 lev_res[numlev - 1] = res;305 lev_tol[numlev - 1] = tolerance;306 307 for (i = numlev - 2; i >= 0; i--) {308 imx[i] = ((imx[i + 1] - 2) / 2) + 2;309 jmx[i] = ((jmx[i + 1] - 2) / 2) + 2;310 lev_res[i] = lev_res[i + 1] * 2;311 }312 313 for (i = 0; i < numlev; i++) {314 xpts_per_proc[i] = (jmx[i] - 2) / xprocs;315 ypts_per_proc[i] = (imx[i] - 2) / yprocs;316 }317 for (i = numlev - 1; i >= 0; i--) {318 if ((xpts_per_proc[i] < 2) || (ypts_per_proc[i] < 2)) {319 minlevel = i + 1;320 break;321 }322 }323 324 for (i = 0; i < numlev; i++) {325 temp += imx[i];326 }327 temp = 0;328 j = 0;329 for (k = 0; k < numlev; k++) {330 for (i = 0; i < imx[k]; i++) {331 j++;332 temp += jmx[k];333 }334 }335 336 d_size = nprocs * sizeof(double ***);337 main_psi = (double ****) G_MALLOC(d_size, 0);338 main_psim = (double ****) G_MALLOC(d_size, 0);339 main_work1 = (double ****) G_MALLOC(d_size, 0);340 main_work4 = (double ****) G_MALLOC(d_size, 0);341 main_work5 = (double ****) G_MALLOC(d_size, 0);342 main_work7 = (double ****) G_MALLOC(d_size, 0);343 temparray = (double ****) G_MALLOC(d_size, -1);344 345 psi = main_psi;346 psim = main_psim;347 work1 = main_work1;348 work4 = main_work4;349 work5 = main_work5;350 work7 = main_work7;351 352 d_size = 2 * sizeof(double **);353 for (i = 0; i < nprocs; i++) {354 psi[i] = (double ***) G_MALLOC(d_size, i);355 psim[i] = (double ***) G_MALLOC(d_size, i);356 work1[i] = (double ***) G_MALLOC(d_size, i);357 work4[i] = (double ***) G_MALLOC(d_size, i);358 work5[i] = (double ***) G_MALLOC(d_size, i);359 work7[i] = (double ***) G_MALLOC(d_size, i);360 temparray[i] = (double ***) G_MALLOC(d_size, i);361 }362 363 d_size = nprocs * sizeof(double **);364 main_psium = (double ***) G_MALLOC(d_size, 0);365 main_psilm = (double ***) G_MALLOC(d_size, 0);366 main_psib = (double ***) G_MALLOC(d_size, 0);367 main_ga = (double ***) G_MALLOC(d_size, 0);368 main_gb = (double ***) G_MALLOC(d_size, 0);369 main_work2 = (double ***) G_MALLOC(d_size, 0);370 main_work3 = (double ***) G_MALLOC(d_size, 0);371 main_work6 = (double ***) G_MALLOC(d_size, 0);372 tauz = (double ***) G_MALLOC(d_size, 0);373 main_oldga = (double ***) G_MALLOC(d_size, 0);374 main_oldgb = (double ***) G_MALLOC(d_size, 0);375 376 psium = main_psium;377 psilm = main_psilm;378 psib = main_psib;379 ga = main_ga;380 gb = main_gb;381 work2 = main_work2;382 work3 = main_work3;383 work6 = main_work6;384 oldga = main_oldga;385 oldgb = main_oldgb;386 387 main_gp = (struct Global_Private *) G_MALLOC((nprocs + 1) * sizeof(struct Global_Private), -1);388 gp = main_gp;389 390 for (i = 0; i < nprocs; i++) {391 gp[i].pad = (char *) G_MALLOC(PAGE_SIZE * sizeof(char), i);392 gp[i].rel_num_x = (long *) G_MALLOC(numlev * sizeof(long), i);393 gp[i].rel_num_y = (long *) G_MALLOC(numlev * sizeof(long), i);394 gp[i].eist = (long *) G_MALLOC(numlev * sizeof(long), i);395 gp[i].ejst = (long *) G_MALLOC(numlev * sizeof(long), i);396 gp[i].oist = (long *) G_MALLOC(numlev * sizeof(long), i);397 gp[i].ojst = (long *) G_MALLOC(numlev * sizeof(long), i);398 gp[i].rlist = (long *) G_MALLOC(numlev * sizeof(long), i);399 gp[i].rljst = (long *) G_MALLOC(numlev * sizeof(long), i);400 gp[i].rlien = (long *) G_MALLOC(numlev * sizeof(long), i);401 gp[i].rljen = (long *) G_MALLOC(numlev * sizeof(long), i);402 gp[i].neighbors = (long *) G_MALLOC(8 * sizeof(long), i);403 gp[i].rownum = (long *) G_MALLOC(sizeof(long), i);404 gp[i].colnum = (long *) G_MALLOC(sizeof(long), i);405 gp[i].lpid = (long *) G_MALLOC(sizeof(long), i);406 gp[i].multi_time = (double *) G_MALLOC(sizeof(double), i);407 gp[i].total_time = (double *) G_MALLOC(sizeof(double), i);408 gp[i].sync_time = (double *) G_MALLOC(sizeof(double), i);409 gp[i].process_time = (double *) G_MALLOC(sizeof(double), i);410 gp[i].step_start = (double *) G_MALLOC(sizeof(double), i);411 gp[i].steps_time = (double *) G_MALLOC(10 * sizeof(double), i);412 *gp[i].multi_time = 0;413 *gp[i].total_time = 0;414 *gp[i].sync_time = 0;415 *gp[i].process_time = 0;416 *gp[i].lpid = i;417 }418 419 subblock();420 421 x_part = (jm - 2) / xprocs + 2;422 y_part = (im - 2) / yprocs + 2;423 424 d_size = x_part * y_part * sizeof(double) + y_part * sizeof(double *);425 426 global = (struct global_struct *) G_MALLOC(sizeof(struct global_struct), -1);427 428 for (i = 0; i < nprocs; i++) {429 psi[i][0] = (double **) G_MALLOC(d_size, i);430 psi[i][1] = (double **) G_MALLOC(d_size, i);431 psim[i][0] = (double **) G_MALLOC(d_size, i);432 psim[i][1] = (double **) G_MALLOC(d_size, i);433 psium[i] = (double **) G_MALLOC(d_size, i);434 psilm[i] = (double **) G_MALLOC(d_size, i);435 psib[i] = (double **) G_MALLOC(d_size, i);436 ga[i] = (double **) G_MALLOC(d_size, i);437 gb[i] = (double **) G_MALLOC(d_size, i);438 work1[i][0] = (double **) G_MALLOC(d_size, i);439 work1[i][1] = (double **) G_MALLOC(d_size, i);440 work2[i] = (double **) G_MALLOC(d_size, i);441 work3[i] = (double **) G_MALLOC(d_size, i);442 work4[i][0] = (double **) G_MALLOC(d_size, i);443 work4[i][1] = (double **) G_MALLOC(d_size, i);444 work5[i][0] = (double **) G_MALLOC(d_size, i);445 work5[i][1] = (double **) G_MALLOC(d_size, i);446 work6[i] = (double **) G_MALLOC(d_size, i);447 work7[i][0] = (double **) G_MALLOC(d_size, i);448 work7[i][1] = (double **) G_MALLOC(d_size, i);449 temparray[i][0] = (double **) G_MALLOC(d_size, i);450 temparray[i][1] = (double **) G_MALLOC(d_size, i);451 tauz[i] = (double **) G_MALLOC(d_size, i);452 oldga[i] = (double **) G_MALLOC(d_size, i);453 oldgb[i] = (double **) G_MALLOC(d_size, i);454 }455 456 oim = im;457 //f = (double *) G_MALLOC(oim*sizeof(double), 0);458 multi = (struct multi_struct *) G_MALLOC(sizeof(struct multi_struct), -1);459 460 d_size = numlev * sizeof(double **);461 if (numlev % 2 == 1) { /* To make sure that the actual data462 starts double word aligned, add an extra463 pointer */464 d_size += sizeof(double **);465 }466 for (i = 0; i < numlev; i++) {467 d_size += ((imx[i] - 2) / yprocs + 2) * ((jmx[i] - 2) / xprocs + 2) * sizeof(double) + ((imx[i] - 2) / yprocs + 2) * sizeof(double *);468 }469 470 d_size *= nprocs;471 472 if (nprocs % 2 == 1) { /* To make sure that the actual data473 starts double word aligned, add an extra474 pointer */475 d_size += sizeof(double ***);476 }477 478 d_size += nprocs * sizeof(double ***);479 main_q_multi = (double ****) G_MALLOC(d_size, -1);480 main_rhs_multi = (double ****) G_MALLOC(d_size, -1);481 q_multi = main_q_multi;482 rhs_multi = main_rhs_multi;483 484 485 locks = (struct locks_struct *) G_MALLOC(sizeof(struct locks_struct), -1);486 bars = (struct bars_struct *) G_MALLOC(sizeof(struct bars_struct), -1);487 488 LOCKINIT(locks->idlock)489 LOCKINIT(locks->psiailock)490 LOCKINIT(locks->psibilock)491 LOCKINIT(locks->donelock)492 LOCKINIT(locks->error_lock)493 LOCKINIT(locks->bar_lock)494 #if defined(MULTIPLE_BARRIERS)495 BARINIT(bars->iteration, nprocs)496 BARINIT(bars->gsudn, nprocs)497 BARINIT(bars->p_setup, nprocs)498 BARINIT(bars->p_redph, nprocs)499 BARINIT(bars->p_soln, nprocs)500 BARINIT(bars->p_subph, nprocs)501 BARINIT(bars->sl_prini, nprocs)502 BARINIT(bars->sl_psini, nprocs)503 BARINIT(bars->sl_onetime, nprocs)504 BARINIT(bars->sl_phase_1, nprocs)505 BARINIT(bars->sl_phase_2, nprocs)506 BARINIT(bars->sl_phase_3, nprocs)507 BARINIT(bars->sl_phase_4, nprocs)508 BARINIT(bars->sl_phase_5, nprocs)509 BARINIT(bars->sl_phase_6, nprocs)510 BARINIT(bars->sl_phase_7, nprocs)511 BARINIT(bars->sl_phase_8, nprocs)512 BARINIT(bars->sl_phase_9, nprocs)513 BARINIT(bars->sl_phase_10, nprocs)514 BARINIT(bars->error_barrier, nprocs)515 #else516 BARINIT(bars->barrier, nprocs)517 #endif518 link_all();519 520 multi->err_multi = 0.0;521 i_int_coeff[0] = 0.0;522 j_int_coeff[0] = 0.0;523 524 for (i = 0; i < numlev; i++) {525 i_int_coeff[i] = 1.0 / (imx[i] - 1);526 j_int_coeff[i] = 1.0 / (jmx[i] - 1);527 }528 529 /*530 initialize constants and variables531 532 id is a global shared variable that has fetch-and-add operations533 performed on it by processes to obtain their pids.534 */535 536 //global->id = 0;537 global->trackstart = 0;538 global->psibi = 0.0;539 540 factjacob = -1. / (12. * res * res);541 factlap = 1. / (res * res);542 eig2 = -h * f0 * f0 / (h1 * h3 * gpr);543 544 jmm1 = jm - 1;545 ysca = ((double) jmm1) * res;546 im = (imx[numlev - 1] - 2) / yprocs + 2;547 jm = (jmx[numlev - 1] - 2) / xprocs + 2;548 549 main_im = im;550 main_jm = jm;551 main_numlev = numlev;552 main_xprocs = xprocs;553 main_yprocs = yprocs;554 main_eig2 = eig2;555 556 if (do_output) {557 printf(" MULTIGRID OUTPUTS\n");558 }559 560 CREATE(slave, nprocs);561 WAIT_FOR_END(nprocs);562 CLOCK(computeend);563 564 printf("\n");565 printf(" PROCESS STATISTICS\n");566 printf(" Total Multigrid Multigrid\n");567 printf(" Proc Time Time Fraction\n");568 printf(" 0 %15.0f %15.0f %10.3f\n", (*gp[0].total_time), (*gp[0].multi_time), (*gp[0].multi_time) / (*gp[0].total_time));569 570 if (do_stats) {571 double phase_time;572 min_total = max_total = avg_total = (*gp[0].total_time);573 min_multi = max_multi = avg_multi = (*gp[0].multi_time);574 min_frac = max_frac = avg_frac = (*gp[0].multi_time) / (*gp[0].total_time);575 avg_wait = *gp[0].sync_time;576 max_wait = *gp[0].sync_time;577 min_wait = *gp[0].sync_time;578 imax_wait = 0;579 imin_wait = 0;580 581 for (i = 1; i < nprocs; i++) {582 if ((*gp[i].total_time) > max_total) {583 max_total = (*gp[i].total_time);584 }585 if ((*gp[i].total_time) < min_total) {586 min_total = (*gp[i].total_time);587 }588 if ((*gp[i].multi_time) > max_multi) {589 max_multi = (*gp[i].multi_time);590 }591 if ((*gp[i].multi_time) < min_multi) {592 min_multi = (*gp[i].multi_time);593 }594 if ((*gp[i].multi_time) / (*gp[i].total_time) > max_frac) {595 max_frac = (*gp[i].multi_time) / (*gp[i].total_time);596 }597 if ((*gp[i].multi_time) / (*gp[i].total_time) < min_frac) {598 min_frac = (*gp[i].multi_time) / (*gp[i].total_time);599 }600 avg_total += (*gp[i].total_time);601 avg_multi += (*gp[i].multi_time);602 avg_frac += (*gp[i].multi_time) / (*gp[i].total_time);603 avg_wait += (*gp[i].sync_time);604 if (max_wait < (*gp[i].sync_time)) {605 max_wait = (*gp[i].sync_time);606 imax_wait = i;607 }608 if (min_wait > (*gp[i].sync_time)) {609 min_wait = (*gp[i].sync_time);610 imin_wait = i;611 }612 }613 avg_total = avg_total / nprocs;614 avg_multi = avg_multi / nprocs;615 avg_frac = avg_frac / nprocs;616 avg_wait = avg_wait / nprocs;617 for (i = 1; i < nprocs; i++) {618 printf(" %3ld %15.0f %15.0f %10.3f\n", i, (*gp[i].total_time), (*gp[i].multi_time), (*gp[i].multi_time) / (*gp[i].total_time));619 }620 printf(" Avg %15.0f %15.0f %10.3f\n", avg_total, avg_multi, avg_frac);621 printf(" Min %15.0f %15.0f %10.3f\n", min_total, min_multi, min_frac);622 printf(" Max %15.0f %15.0f %10.3f\n", max_total, max_multi, max_frac);623 624 printf("\n\n Sync\n");625 printf(" Proc Time Fraction\n");626 for (i = 0; i < nprocs; i++) {627 printf(" %ld %u %f\n", i, (unsigned int)*gp[i].sync_time, *gp[i].sync_time / ((long)(*gp[i].total_time)));628 }629 630 printf(" Avg %f %f\n", avg_wait, (double) avg_wait / (long) (computeend - global->trackstart));631 printf(" Min %f %f\n", min_wait, (double) min_wait / (long) (*gp[imin_wait].total_time));632 printf(" Max %f %f\n", max_wait, (double) max_wait / (long) (*gp[imax_wait].total_time));633 634 printf("\nPhases Avg :\n\n");635 for (i = 0; i < 10; i++) {636 phase_time = 0;637 for (j = 0; j < nprocs; j++) {638 phase_time += gp[j].steps_time[i];639 }640 phase_time /= (double) nprocs;641 printf(" %d = %f (fraction %f)\n", i + 1, phase_time, phase_time / (long) (computeend - global->trackstart));642 }643 }644 printf("\n");645 646 global->starttime = start;647 printf(" TIMING INFORMATION\n");648 printf("[NPROCS] : %16ld\n", nprocs);649 printf("[START1] : %16llu\n", global->starttime);650 printf("[START2] : %16llu\n", global->trackstart);651 printf("[END] : %16llu\n", computeend);652 printf("[TOTAL] : %16llu\n", computeend - global->starttime); // With init653 printf("[PARALLEL_COMPUTE] : %16llu\n", computeend - global->trackstart); // Without init654 printf("(excludes first timestep)\n");655 printf("\n");656 657 MAIN_END658 659 }660 661 long log_2(long number)662 {663 long cumulative = 1;664 long out = 0;665 long done = 0;666 667 while ((cumulative < number) && (!done) && (out < 50)) {668 if (cumulative == number) {669 done = 1;670 } else {671 cumulative = cumulative * 2;672 out++;673 }674 }675 676 if (cumulative == number) {677 return (out);678 } else {679 return (-1);680 }681 }682 683 void printerr(char *s)684 {685 fprintf(stderr, "ERROR: %s\n", s);686 }687 688 689 // Local Variables:690 // tab-width: 4691 // c-basic-offset: 4692 // c-file-offsets:((innamespace . 0)(inline-open . 0))693 // indent-tabs-mode: nil694 // End:695 696 // vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4
Note: See TracChangeset
for help on using the changeset viewer.