Changeset 826 for soft/giet_vm/applications/rosenfeld/src-par
- Timestamp:
- Jul 13, 2017, 11:01:58 AM (7 years ago)
- Location:
- soft/giet_vm/applications/rosenfeld/src-par
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
soft/giet_vm/applications/rosenfeld/src-par/mca.c
r823 r826 369 369 MCA_VERBOSE3(printf("\n")); 370 370 } 371 //exit(-1);372 371 373 372 // ------------------------------------------------------------- // … … 442 441 (void) mca_par; 443 442 444 MCA_VERBOSE1(printf("*** MCA_Display_Parameters ***\n"));443 MCA_VERBOSE1(printf("*** %s ***\n", __func__)); 445 444 446 445 MCA_VERBOSE2(printf(" height = %d\n", mca->height)); … … 473 472 uint32 e0, e1; 474 473 475 MCA_VERBOSE1(printf("*** MCA_Finalize ***\n"));474 MCA_VERBOSE1(printf("*** %s ***\n", __func__)); 476 475 477 476 #if PYR_BARRIERS … … 518 517 519 518 if (mca->p == 0) { 520 MCA_VERBOSE1(printf("*** MCA_Scatter_ImageX ***\n"));519 MCA_VERBOSE1(printf("*** %s ***\n", __func__)); 521 520 } 522 521 … … 547 546 548 547 if (mca->p == 0) { 549 MCA_VERBOSE1(printf("*** MCA_Gather_ImageL ***\n"));548 MCA_VERBOSE1(printf("*** %s ***\n", __func__)); 550 549 } 551 550 -
soft/giet_vm/applications/rosenfeld/src-par/mca_main.c
r823 r826 322 322 // ---------------------------------------------------------------------------- 323 323 { 324 CLOCK_INIT(num_threads, 4); // 4 = Number of steps in body324 CLOCK_INIT(num_threads, 5); // 4 = Number of steps in body 325 325 CLOCK_APP_START; 326 326 … … 352 352 int num_threads = DEFAULT_NTHREADS; 353 353 int num_runs = DEFAULT_NRUNS; 354 355 #if TARGET_OS == GIETVM 356 giet_tty_alloc(1); 357 #endif 354 358 355 359 MCA_VERBOSE1(printf("*** Starting application Rosenfeld ***\n")); … … 446 450 447 451 #if TARGET_OS == GIETVM 448 giet_tty_alloc(1);449 452 MCA_VERBOSE1(printf("Initializing heaps... ")); 450 453 for (int i = 0; i < X_SIZE; i++) { … … 458 461 pthread_mutex_init(&print_lock, PTHREAD_PROCESS_PRIVATE); 459 462 main_test_mca(num_threads, num_runs, infile, outfile); 463 464 #if TARGET_OS == GIETVM 465 *(unsigned int *) 0x0 = 0xDEADDEAD; 466 #endif 460 467 461 468 return 0; -
soft/giet_vm/applications/rosenfeld/src-par/mca_rosenfeld.c
r823 r826 12 12 #include <string.h> 13 13 #include <math.h> 14 #include <assert.h>15 14 #if PARMERGE 16 15 #include <pthread.h> … … 27 26 #else 28 27 #include <stdbool.h> 28 #include <assert.h> 29 29 #endif 30 30 … … 165 165 pthread_spin_lock(&F[e1][e0].lock); 166 166 if (D[e1][e0] != eps || D[r1][r0] != root) { 167 // Someone change the root of epsilon or "root", need to find the new root167 // Someone changed the root of epsilon or "root", need to find the new root 168 168 pthread_spin_unlock(&F[e1][e0].lock); 169 169 pthread_spin_unlock(&F[r1][r0].lock); … … 203 203 if (D[e1][e0] != eps) { 204 204 // Someone change the root of epsilon, need to find the new root 205 printf("race cond 1\n");205 //printf("race cond 1\n"); 206 206 pthread_spin_unlock(&F[e1][e0].lock); 207 207 pthread_spin_unlock(&F[r1][r0].lock); … … 210 210 if (D[r1][r0] != root) { 211 211 // Someone change the root of "root", need to find the new root 212 printf("race cond 2\n");212 //printf("race cond 2\n"); 213 213 pthread_spin_unlock(&F[e1][e0].lock); 214 214 pthread_spin_unlock(&F[r1][r0].lock); … … 235 235 236 236 237 #if FEATURES && PARMERGE && ARSP 238 // ------------------------------------------------------------------------------------------ 239 static void Propagate_Features(uint32 e0, uint32 e1, uint32 * T, RegionStats ** F, int shift) 240 // ------------------------------------------------------------------------------------------ 241 { 242 uint32 i; 243 const int mask = (1 << shift) - 1; 244 for (i = e0; i <= e1; i++) { 245 uint32 root = T[i]; 246 if (root != i) { 247 uint32 r1 = root >> shift; 248 uint32 r0 = root & mask; 249 250 uint32 l1 = i >> shift; 251 uint32 l0 = i & mask; 252 // We only lock the destination Features object 253 pthread_spin_lock(&F[r1][r0].lock); 254 255 // F(eps) = F(eps) U F(root) 256 F[r1][r0].xmin = ui16min2(F[l1][l0].xmin, F[r1][r0].xmin); 257 F[r1][r0].xmax = ui16max2(F[l1][l0].xmax, F[r1][r0].xmax); 258 F[r1][r0].ymin = ui16min2(F[l1][l0].ymin, F[r1][r0].ymin); 259 F[r1][r0].ymax = ui16max2(F[l1][l0].ymax, F[r1][r0].ymax); 260 261 F[r1][r0].S += F[l1][l0].S; 262 F[r1][r0].Sx += F[l1][l0].Sx; 263 F[r1][r0].Sy += F[l1][l0].Sy; 264 265 pthread_spin_unlock(&F[r1][r0].lock); 266 } 267 } 268 } 269 #endif // FEATURES && PARMERGE && ARSP 270 237 271 238 272 #if FAST … … 405 439 406 440 407 #if FAST && !FEATURES &&PARMERGE && ARSP441 #if FAST && PARMERGE && ARSP 408 442 // ---------------------------------------------------------------------------------------------------------------- 409 443 static bool SetRoot_Parallel_Arsp_Rosenfeld_Dist(uint32 ** D, uint32 root, uint32 eps, int shift, RegionStats ** F) 410 444 // ---------------------------------------------------------------------------------------------------------------- 411 445 { 446 // QM : Pour la version avec features, on est obligé de faire l'accumulation à la fin une fois la fermeture 447 // transitive globale réalisée : sinon, on peut perdre des features quand on propage vers un epsilon qui 448 // n'est pas une racine. 412 449 assert(root != 0 && eps != 0); 413 450 414 MCA_VERBOSE3(printf("F(%d) += F(%d)\n", eps, root));415 416 451 uint32_t mask = (1 << shift) - 1; 417 452 … … 419 454 uint32_t r0 = root & mask; 420 455 456 // @QM 457 // A priori ici il n'y a pas besoin de prendre le lock sur eps 458 // car ce n'est pas une racine 421 459 pthread_spin_lock(&F[r1][r0].lock); 422 460 if (D[r1][r0] != root) { … … 430 468 return true; 431 469 } 432 433 // FAST && !FEATURES && PARMERGE && ARSP 434 470 #endif // FAST && PARMERGE && ARSP 471 472 473 #if FAST && PARMERGE && ARSP 435 474 // ------------------------------------------------------------------------------------------------------------------------------ 436 475 static inline bool FindSmallerAncestor_Link(uint32 ** D, uint32_t rl, uint32_t el, uint32_t rd, uint32_t shift, RegionStats ** F) 437 476 // ------------------------------------------------------------------------------------------------------------------------------ 438 477 { 478 // Fait pointer rd (racine) vers rl (pas racine) a priori 479 // mais il faut que l'élément vers lequel rd pointe soit plus petit que rd 480 // On "remonte" donc vers la racine de rl jusqu'à atteindre un élément plus petit que rd 481 // Si on atteint la racine de rl et que cette derniÚre est toujours plus grande que rd, 482 // on fait alors pointer rl vers rd 439 483 bool ok; 440 484 uint32_t el1, el0; … … 462 506 } 463 507 464 // FAST && !FEATURES &&PARMERGE && ARSP508 // FAST && PARMERGE && ARSP 465 509 466 510 // ----------------------------------------------------------------------------------------------------------------------- … … 513 557 } 514 558 515 // FAST && !FEATURES &&PARMERGE && ARSP559 // FAST && PARMERGE && ARSP 516 560 517 561 // ------------------------------------------------------------------------------------------------------------------------------------- … … 617 661 } while (!ok); 618 662 } 619 #endif // FAST && !FEATURES &&PARMERGE && ARSP663 #endif // FAST && PARMERGE && ARSP 620 664 621 665 … … 1395 1439 if (r < e) { 1396 1440 T[e] = r; // racine de la classe d'equivalence 1397 #if FEATURES 1441 #if FEATURES && !(PARMERGE && ARSP) 1398 1442 RegionStats_Accumulate_Stats1_From_Index(Stats, r, e); 1399 1443 #endif … … 1519 1563 // -- parallel transitive closure -- 1520 1564 // --------------------------------- 1521 // identique a la version sans Features1522 1565 1523 1566 CLOCK_THREAD_START_STEP(p, 2); … … 1532 1575 CLOCK_THREAD_END_STEP(p, 2); 1533 1576 1577 #if FEATURES && ARSP 1578 pthread_barrier_wait(&main_barrier); 1579 #endif 1580 1534 1581 // To avoid uninitialized accesses 1535 1582 CLOCK_THREAD_START_STEP(p, 3); 1583 // With FEATURES and ARSP, STEP 3 is the Features propagation 1584 #if FEATURES && ARSP 1585 Propagate_Features(e0, e1, T, F, mca->alpha); 1586 #endif 1536 1587 CLOCK_THREAD_END_STEP(p, 3); 1537 1588 } … … 1616 1667 CLOCK_THREAD_END_STEP(p, 2); 1617 1668 } 1618 #endif // ! FEATURES1669 #endif // !PARMERGE 1619 1670 1620 1671 … … 1636 1687 uint32 * T = mca->T; 1637 1688 1638 CLOCK_THREAD_START_STEP(mca->p, 3);1689 CLOCK_THREAD_START_STEP(mca->p, 4); 1639 1690 for (int i = i0; i <= i1; i++) { 1640 1691 for (int j = j0; j <= j1; j++) { … … 1645 1696 } 1646 1697 } 1647 CLOCK_THREAD_END_STEP(mca->p, 3);1698 CLOCK_THREAD_END_STEP(mca->p, 4); 1648 1699 } 1649 1700
Note: See TracChangeset
for help on using the changeset viewer.