/* ----------------------- */ /* --- mca_rosenfeld.c --- */ /* ----------------------- */ /* * Copyright (c) 2016 Lionel Lacassagne, LIP6, UPMC, CNRS * Init : 2016/03/03 */ #include #include #include #include #if PARMERGE #include #endif #include "nrc_os_config.h" #include "config.h" #include "nrc.h" #if TARGET_OS == GIETVM #include #include #include #else #include #include #endif #include "util.h" #include "ecc_common.h" #include "palette.h" #include "bmpNR.h" #include "clock.h" #include "str_ext.h" #include "ecc_features.h" // ----------- // -- local -- // ----------- #include "mca.h" extern pthread_barrier_t main_barrier; extern int display_features; CLOCK_DEC; // ----------------------------------------- static uint32 FindRoot(uint32 * T, uint32 e) // ----------------------------------------- { uint32 r; assert(e != 0); r = e; while (T[r] < r) { r = T[r]; } assert(r != 0); return r; } // ---------------------------------------------------------- static uint32 FindRoot_Dist(uint32 ** D, uint32 r, int shift) // ---------------------------------------------------------- { uint32 e; uint32 e1; uint32 e0; assert(r != 0); int mask = (1 << shift) - 1; MCA_VERBOSE3(printf("%s(%d, %d) \n", __func__, r, shift)); do { e = r; e1 = r >> shift; e0 = r & mask; r = D[e1][e0]; MCA_VERBOSE3(printf("%s: D(%d) = D[%d,%d] = %d (alpha = %d)\n", __func__, e, e1, e0, r, shift)); } while (r < e); MCA_VERBOSE3(printf("%s = %d \n\n", __func__, r)); assert(r != 0); return r; } #if !FEATURES // -------------------------------------------------------------------------------- static void SetRoot_Rosenfeld_Dist(uint32 ** D, uint32 root, uint32 eps, int shift) // -------------------------------------------------------------------------------- { int mask = (1 << shift) - 1; assert(root != 0 && eps != 0); uint32 r1 = root >> shift; uint32 r0 = root & mask; D[r1][r0] = eps; } #endif // !FEATURES #if FEATURES && !PARMERGE // ----------------------------------------------------------------------------------------------------------- static void SetRoot_Features_Rosenfeld_Dist(uint32 ** D, uint32 root, uint32 eps, int shift, RegionStats ** F) // ----------------------------------------------------------------------------------------------------------- { assert(root != 0 && eps != 0); MCA_VERBOSE3(printf("F(%d) += F(%d)\n", eps, root)); int mask = (1 << shift) - 1; uint32 r1 = root >> shift; uint32 r0 = root & mask; D[r1][r0] = eps; uint32 e1 = eps >> shift; uint32 e0 = eps & mask; // version Dist de "RegionStats_Accumulate_Stats1_From_Index" // F(eps) = F(eps) U F(root) F[e1][e0].xmin = ui16min2(F[e1][e0].xmin, F[r1][r0].xmin); F[e1][e0].xmax = ui16max2(F[e1][e0].xmax, F[r1][r0].xmax); F[e1][e0].ymin = ui16min2(F[e1][e0].ymin, F[r1][r0].ymin); F[e1][e0].ymax = ui16max2(F[e1][e0].ymax, F[r1][r0].ymax); F[e1][e0].S += F[r1][r0].S; F[e1][e0].Sx += F[r1][r0].Sx; F[e1][e0].Sy += F[r1][r0].Sy; } #endif // FEATURES && !PARMERGE #if !FEATURES && PARMERGE // ----------------------------------------------------------------------------------------------------------- static bool SetRoot_Parallel_Rosenfeld_Dist(uint32 ** D, uint32 root, uint32 eps, int shift, RegionStats ** F) // ----------------------------------------------------------------------------------------------------------- { assert(root != 0 && eps != 0); MCA_VERBOSE3(printf("F(%d) += F(%d)\n", eps, root)); int mask = (1 << shift) - 1; uint32 r1 = root >> shift; uint32 r0 = root & mask; uint32 e1 = eps >> shift; uint32 e0 = eps & mask; // Locking towards the root (first root, then eps) pthread_spin_lock(&F[r1][r0].lock); pthread_spin_lock(&F[e1][e0].lock); if (D[e1][e0] != eps || D[r1][r0] != root) { // Someone changed the root of epsilon or "root", need to find the new root pthread_spin_unlock(&F[e1][e0].lock); pthread_spin_unlock(&F[r1][r0].lock); return false; } D[r1][r0] = eps; pthread_spin_unlock(&F[e1][e0].lock); pthread_spin_unlock(&F[r1][r0].lock); return true; } #endif // !FEATURES && PARMERGE #if FEATURES && PARMERGE // -------------------------------------------------------------------------------------------------------------------- static bool SetRoot_Parallel_Features_Rosenfeld_Dist(uint32 ** D, uint32 root, uint32 eps, int shift, RegionStats ** F) // -------------------------------------------------------------------------------------------------------------------- { assert(root != 0 && eps != 0); MCA_VERBOSE3(printf("F(%d) += F(%d)\n", eps, root)); int mask = (1 << shift) - 1; uint32 r1 = root >> shift; uint32 r0 = root & mask; uint32 e1 = eps >> shift; uint32 e0 = eps & mask; // Locking towards the root (first root, then eps) pthread_spin_lock(&F[r1][r0].lock); pthread_spin_lock(&F[e1][e0].lock); // FIXME: merge these conditions later, when they both appear if (D[e1][e0] != eps) { // Someone change the root of epsilon, need to find the new root //printf("race cond 1\n"); pthread_spin_unlock(&F[e1][e0].lock); pthread_spin_unlock(&F[r1][r0].lock); return false; } if (D[r1][r0] != root) { // Someone change the root of "root", need to find the new root //printf("race cond 2\n"); pthread_spin_unlock(&F[e1][e0].lock); pthread_spin_unlock(&F[r1][r0].lock); return false; } D[r1][r0] = eps; // F(eps) = F(eps) U F(root) F[e1][e0].xmin = ui16min2(F[e1][e0].xmin, F[r1][r0].xmin); F[e1][e0].xmax = ui16max2(F[e1][e0].xmax, F[r1][r0].xmax); F[e1][e0].ymin = ui16min2(F[e1][e0].ymin, F[r1][r0].ymin); F[e1][e0].ymax = ui16max2(F[e1][e0].ymax, F[r1][r0].ymax); F[e1][e0].S += F[r1][r0].S; F[e1][e0].Sx += F[r1][r0].Sx; F[e1][e0].Sy += F[r1][r0].Sy; pthread_spin_unlock(&F[e1][e0].lock); pthread_spin_unlock(&F[r1][r0].lock); return true; } #endif // FEATURES && PARMERGE #if FEATURES && PARMERGE && ARSP // ------------------------------------------------------------------------------------------ static void Propagate_Features(uint32 e0, uint32 e1, uint32 * T, RegionStats ** F, int shift) // ------------------------------------------------------------------------------------------ { uint32 i; const int mask = (1 << shift) - 1; for (i = e0; i <= e1; i++) { uint32 root = T[i]; if (root != i) { uint32 r1 = root >> shift; uint32 r0 = root & mask; uint32 l1 = i >> shift; uint32 l0 = i & mask; // We only lock the destination Features object pthread_spin_lock(&F[r1][r0].lock); // F(eps) = F(eps) U F(root) F[r1][r0].xmin = ui16min2(F[l1][l0].xmin, F[r1][r0].xmin); F[r1][r0].xmax = ui16max2(F[l1][l0].xmax, F[r1][r0].xmax); F[r1][r0].ymin = ui16min2(F[l1][l0].ymin, F[r1][r0].ymin); F[r1][r0].ymax = ui16max2(F[l1][l0].ymax, F[r1][r0].ymax); F[r1][r0].S += F[l1][l0].S; F[r1][r0].Sx += F[l1][l0].Sx; F[r1][r0].Sy += F[l1][l0].Sy; pthread_spin_unlock(&F[r1][r0].lock); } } } #endif // FEATURES && PARMERGE && ARSP #if FAST // -------------------------------------------------------- static uint32 QuickUnion2(uint32 * T, uint32 e1, uint32 e2) // -------------------------------------------------------- { // version QU de Union2 uint32 r1 = FindRoot(T, e1); uint32 r2 = FindRoot(T, e2); assert(e1 != 0 && e2 != 0 && r1 != 0 && r2 != 0); uint32 eps = ui32Min2(r1, r2); if (r1 > eps) { T[r1] = eps; // SetRoot sans besoin de remonter } if (r2 > eps) { T[r2] = eps; // SetRoot sans besoin de remonter } assert(e1 != 0 && e2 != 0 && r1 != 0 && r2 != 0); return eps; } #endif // FAST #if FAST // --------------------------------------------------- static uint32 use1_QU_Rosenfeld(uint32 e1, uint32 * T) // --------------------------------------------------- { return FindRoot(T, e1); } #endif // FAST #if FAST // -------------------------------------------------------------- static uint32 use2_QU_Rosenfeld(uint32 e1, uint32 e2, uint32 * T) // -------------------------------------------------------------- { return QuickUnion2(T, e1, e2); } #endif // FAST #if FAST && !FEATURES && !PARMERGE && !ARSP // --------------------------------------------------------------------------------------- static void vuse2_Rosenfeld_Dist(uint32 ed, uint32 el, uint32 * T, uint32 ** D, int alpha) // --------------------------------------------------------------------------------------- { uint32 rd = FindRoot_Dist(D, ed, alpha); uint32 rl = T[el]; // car le premier acces est local rl = FindRoot_Dist(D, rl, alpha); assert(ed != 0 && el != 0 && rd != 0 && rl != 0); if (rd == rl) { return; // evite la backdoor } // forcement positifs car appel depuis optimizedBorder // qui a fait un test if (rd < rl) { SetRoot_Rosenfeld_Dist(D, rl, rd, alpha); } else { SetRoot_Rosenfeld_Dist(D, rd, rl, alpha); } } // FAST && !FEATURES && !PARMERGE && !ARSP // ----------------------------------------------------------------------------------------------------- static void vuse3_Rosenfeld_Dist(uint32 ed1, uint32 ed2, uint32 el3, uint32 * T, uint32 ** D, int alpha) // ----------------------------------------------------------------------------------------------------- { uint32 r1 = FindRoot_Dist(D, ed1, alpha); uint32 r2 = FindRoot_Dist(D, ed2, alpha); uint32 r3 = T[el3]; // local - distant r3 = FindRoot_Dist(D, r3, alpha); assert(ed1 != 0 && ed2 != 0 && el3 != 0 && r1 != 0 && r2 != 0 && r3 != 0); if (r1 == r2 && r2 == r3) { return; } uint32 eps = ui32Min3(r1, r2, r3); // forcement positifs car appel depuis optimizedBorder qui a fait un test // On ne fait pas le test car on peut faire le SetRoot plusieurs fois sur le même élément (on n'accumule pas de stats) if (r1 > eps) { SetRoot_Rosenfeld_Dist(D, r1, eps, alpha); } if (r2 > eps) { SetRoot_Rosenfeld_Dist(D, r2, eps, alpha); } if (r3 > eps) { SetRoot_Rosenfeld_Dist(D, r3, eps, alpha); } } #endif // FAST && !FEATURES && !PARMERGE && !ARSP #if FAST && FEATURES && !PARMERGE && !ARSP // ------------------------------------------------------------------------------------------------------------------ static void vuse2_Features_Rosenfeld_Dist(uint32 ed, uint32 el, uint32 * T, uint32 ** D, int alpha, RegionStats ** F) // ------------------------------------------------------------------------------------------------------------------ { assert(ed != 0 && el != 0); uint32 rd = FindRoot_Dist(D, ed, alpha); uint32 rl = T[el]; // car le premier acces est local assert(rl != 0); rl = FindRoot_Dist(D, rl, alpha); assert(rd != 0 && rl != 0); if (rd == rl) { return; // evite la backdoor } // forcement positifs car appel depuis optimizedBorder // qui a fait un test if (rd < rl) { SetRoot_Features_Rosenfeld_Dist(D, rl, rd, alpha, F); } else { SetRoot_Features_Rosenfeld_Dist(D, rd, rl, alpha, F); } } // FAST && FEATURES && !PARMERGE && !ARSP // -------------------------------------------------------------------------------------------------------------------------------- static void vuse3_Features_Rosenfeld_Dist(uint32 ed1, uint32 ed2, uint32 el3, uint32 * T, uint32 ** D, int alpha, RegionStats ** F) // -------------------------------------------------------------------------------------------------------------------------------- { assert(ed1 != 0 && ed2 != 0 && el3 != 0); uint32 r1 = FindRoot_Dist(D, ed1, alpha); uint32 r2 = FindRoot_Dist(D, ed2, alpha); uint32 r3 = T[el3]; // local - distant assert(r3 != 0); r3 = FindRoot_Dist(D, r3, alpha); assert(r1 != 0 && r2 != 0 && r3 != 0); if (r1 == r2 && r2 == r3) { return; } uint32 eps = ui32Min3(r1, r2, r3); // forcement positifs car appel depuis optimizedBorder qui a fait un test if (r1 > eps) { SetRoot_Features_Rosenfeld_Dist(D, r1, eps, alpha, F); } if (r2 > eps && r2 != r1) { SetRoot_Features_Rosenfeld_Dist(D, r2, eps, alpha, F); } if (r3 > eps && r3 != r2 && r3 != r1) { SetRoot_Features_Rosenfeld_Dist(D, r3, eps, alpha, F); } } #endif // FAST && FEATURES && !PARMERGE && !ARSP #if FAST && PARMERGE && ARSP // ---------------------------------------------------------------------------------------------------------------- static bool SetRoot_Parallel_Arsp_Rosenfeld_Dist(uint32 ** D, uint32 root, uint32 eps, int shift, RegionStats ** F) // ---------------------------------------------------------------------------------------------------------------- { // QM : Pour la version avec features, on est obligé de faire l'accumulation à la fin une fois la fermeture // transitive globale réalisée : sinon, on peut perdre des features quand on propage vers un epsilon qui // n'est pas une racine. assert(root != 0 && eps != 0); uint32_t mask = (1 << shift) - 1; uint32_t r1 = root >> shift; uint32_t r0 = root & mask; // @QM // A priori ici il n'y a pas besoin de prendre le lock sur eps // car ce n'est pas une racine pthread_spin_lock(&F[r1][r0].lock); if (D[r1][r0] != root) { pthread_spin_unlock(&F[r1][r0].lock); return false; } D[r1][r0] = eps; pthread_spin_unlock(&F[r1][r0].lock); return true; } #endif // FAST && PARMERGE && ARSP #if FAST && PARMERGE && ARSP // ------------------------------------------------------------------------------------------------------------------------------ static inline bool FindSmallerAncestor_Link(uint32 ** D, uint32_t rl, uint32_t el, uint32_t rd, uint32_t shift, RegionStats ** F) // ------------------------------------------------------------------------------------------------------------------------------ { // Fait pointer rd (racine) vers rl (pas racine) a priori // mais il faut que l'élément vers lequel rd pointe soit plus petit que rd // On "remonte" donc vers la racine de rl jusqu'à atteindre un élément plus petit que rd // Si on atteint la racine de rl et que cette dernière est toujours plus grande que rd, // on fait alors pointer rl vers rd bool ok; uint32_t el1, el0; uint32_t mask = (1 << shift) - 1; while (rl < el && rl > rd) { el = rl; el1 = rl >> shift; el0 = rl & mask; rl = D[el1][el0]; } if (rd != rl) { if (rl == el && rl > rd) { // L'ordre s'est inversé : on fait pointer rl vers rd ok = SetRoot_Parallel_Arsp_Rosenfeld_Dist(D, rl, rd, shift, F); } else { // On fait pointer rd vers rl ok = SetRoot_Parallel_Arsp_Rosenfeld_Dist(D, rd, rl, shift, F); } } else { ok = true; } return ok; } // FAST && PARMERGE && ARSP // ----------------------------------------------------------------------------------------------------------------------- static void vuse2_Parallel_Arsp_Rosenfeld_Dist(uint32 ed, uint32 el, uint32 * T, uint32 ** D, int alpha, RegionStats ** F) // ----------------------------------------------------------------------------------------------------------------------- { assert(ed != 0 && el != 0); uint32_t shift = alpha; uint32_t mask = (1 << shift) - 1; uint32_t rd = ed; uint32_t rl = el; uint32_t ed1; uint32_t el1; uint32_t ed0; uint32_t el0; bool ok; // Fusion ed - el do { do { ed = rd; el = rl; ed1 = rd >> shift; el1 = rl >> shift; ed0 = rd & mask; el0 = rl & mask; rd = D[ed1][ed0]; rl = D[el1][el0]; } while (rl < el && rd < ed); assert(rl != 0 && rd != 0); if (rd != rl) { if (rd == ed) { ok = FindSmallerAncestor_Link(D, rl, el, rd, shift, F); } else { assert(rl == el); ok = FindSmallerAncestor_Link(D, rd, ed, rl, shift, F); } } else { ok = true; } } while (!ok); } // FAST && PARMERGE && ARSP // ------------------------------------------------------------------------------------------------------------------------------------- static void vuse3_Parallel_Arsp_Rosenfeld_Dist(uint32 ed1, uint32 ed2, uint32 el3, uint32 * T, uint32 ** D, int alpha, RegionStats ** F) // ------------------------------------------------------------------------------------------------------------------------------------- { assert(ed1 != 0 && ed2 != 0 && el3 != 0); uint32_t shift = alpha; uint32_t mask = (1 << shift) - 1; uint32_t r1 = ed1; uint32_t r2 = ed2; uint32_t r3 = el3; uint32_t e11; uint32_t e21; uint32_t e31; uint32_t e10; uint32_t e20; uint32_t e30; uint32_t r0; uint32_t ed0; uint32_t e00; uint32_t e01; // Pas d'init pour que valgrind détecte une erreur si bool est lu sans être affecté bool ok; // Fusion ed1 - ed2 do { do { ed1 = r1; ed2 = r2; e11 = r1 >> shift; e21 = r2 >> shift; e10 = r1 & mask; e20 = r2 & mask; r1 = D[e11][e10]; r2 = D[e21][e20]; } while (r1 < ed1 && r2 < ed2); assert(r1 != 0 && r2 != 0); if (r1 != r2) { if (r1 == ed1) { ok = FindSmallerAncestor_Link(D, r2, ed2, r1, shift, F); } else { assert(r2 == ed2); ok = FindSmallerAncestor_Link(D, r1, ed1, r2, shift, F); } } else { ok = true; } } while (!ok); // Fusion r0 = min(r1, r2) avec r3 if (r1 < r2) { r0 = r1; ed0 = r1; e00 = e10; e01 = e11; } else { r0 = r2; ed0 = r2; e00 = e20; e01 = e21; } // r0 est déjà une racine goto r0_is_root; do { do { ed0 = r0; el3 = r3; e01 = r0 >> shift; e31 = r3 >> shift; e00 = r0 & mask; e30 = r3 & mask; r0 = D[e01][e00]; r3 = D[e31][e30]; } while (r0 < ed0 && r3 < el3); assert(r0 != 0 && r3 != 0); if (r0 != r3) { if (r0 == ed0) { r0_is_root: ok = FindSmallerAncestor_Link(D, r3, el3, r0, shift, F); } else { assert(r3 == el3); ok = FindSmallerAncestor_Link(D, r0, ed0, r3, shift, F); } } else { ok = true; } } while (!ok); } #endif // FAST && PARMERGE && ARSP #if FAST && PARMERGE && !ARSP // Valid for FEATURES and !FEATURES // --------------------------------------------------------------------------------------------------------------------------- static void vuse2_Parallel_Rosenfeld_Dist(uint32 ed, uint32 el, uint32 * T, uint32 ** D, int alpha, RegionStats ** F) // --------------------------------------------------------------------------------------------------------------------------- { bool ok; assert(ed != 0 && el != 0); uint32 rl = T[el]; // car le premier acces est local assert(rl != 0); uint32 rd; do { rd = FindRoot_Dist(D, ed, alpha); // no lock rl = FindRoot_Dist(D, rl, alpha); assert(rd != 0 && rl != 0); if (rd == rl) { return; // evite la backdoor } // forcement positifs car appel depuis optimizedBorder // qui a fait un test if (rd < rl) { // Features or No Features depending on config ok = SetRoot_Parallel_FNF(D, rl, rd, alpha, F); } else { ok = SetRoot_Parallel_FNF(D, rd, rl, alpha, F); } } while (!ok); } // FAST && PARMERGE && !ARSP // ----------------------------------------------------------------------------------------------------------------------------------------- static void vuse3_Parallel_Rosenfeld_Dist(uint32 ed1, uint32 ed2, uint32 el3, uint32 * T, uint32 ** D, int alpha, RegionStats ** F) // ----------------------------------------------------------------------------------------------------------------------------------------- { bool ok1, ok2, ok3; assert(ed1 != 0 && ed2 != 0 && el3 != 0); uint32 r1; uint32 r2; uint32 r3 = T[el3]; // local - distant assert(r3 != 0); do { r1 = FindRoot_Dist(D, ed1, alpha); r2 = FindRoot_Dist(D, ed2, alpha); r3 = FindRoot_Dist(D, r3, alpha); assert(r1 != 0 && r2 != 0 && r3 != 0); if (r1 == r2 && r2 == r3) { return; } uint32 eps = ui32Min3(r1, r2, r3); // forcement positifs car appel depuis optimizedBorder qui a fait un test ok1 = true; ok2 = true; ok3 = true; if (r1 > eps) { ok1 = SetRoot_Parallel_FNF(D, r1, eps, alpha, F); } if (r2 > eps && r2 != r1) { ok2 = SetRoot_Parallel_FNF(D, r2, eps, alpha, F); } if (r3 > eps && r3 != r2 && r3 != r1) { ok3 = SetRoot_Parallel_FNF(D, r3, eps, alpha, F); } } while (!(ok1 && ok2 && ok3)); } #endif // FAST && PARMERGE && !ARSP #if FAST // ------------------------------------------------------------------------------------------------------------------------ static void optimizedBorder_Rosenfeld_Dist(uint32 ** E, int i, int j, uint32 * T, uint32 ** D, int alpha, RegionStats ** F) // ------------------------------------------------------------------------------------------------------------------------ { uint32 a, b, c, x; x = E[i][j]; if (x) { b = E[i - 1][j]; if (b) { vuse2_Rosenfeld(b, x, T, D, alpha, F); // dist, local } else { c = E[i - 1][j + 1]; if (c) { a = E[i - 1][j - 1]; if (a) { vuse3_Rosenfeld(a, c, x, T, D, alpha, F); // dist, local } else { vuse2_Rosenfeld(c, x, T, D, alpha, F); // dist, local } } else { a = E[i - 1][j - 1]; if (a) { vuse2_Rosenfeld(a, x, T, D, alpha, F); // dist, local } } } } } // FAST // ---------------------------------------------------------------------------------------------------------------------------- static void optimizedBorderLeft_Rosenfeld_Dist(uint32 ** E, int i, int j, uint32 * T, uint32 ** D, int alpha, RegionStats ** F) // ---------------------------------------------------------------------------------------------------------------------------- { uint32 x = E[i][j]; if (x) { uint32 b = E[i - 1][j]; if (b) { vuse2_Rosenfeld(b, x, T, D, alpha, F); // dist, local } else { uint32 c = E[i - 1][j + 1]; if (c) { vuse2_Rosenfeld(c, x, T, D, alpha, F); // dist, local } } } } // FAST // ----------------------------------------------------------------------------------------------------------------------------- static void optimizedBorderRight_Rosenfeld_Dist(uint32 ** E, int i, int j, uint32 * T, uint32 ** D, int alpha, RegionStats ** F) // ----------------------------------------------------------------------------------------------------------------------------- { // copie de optimizedBorder_Rosenfeld // test d'existance de ex en local local uint32 b = E[i - 1][j]; uint32 x = E[i][j]; if (x) { if (b) { vuse2_Rosenfeld(b, x, T, D, alpha, F); // dist, local } else { uint32 a = E[i - 1][j - 1]; if (a) { vuse2_Rosenfeld(a, x, T, D, alpha, F); // dist, local } } } } // FAST // ------------------------------------------------------------------------------------------------------------------------------------------- static void borderMerging_Fast_Rosenfeld_Dist(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ** D, int alpha, RegionStats ** F) // ------------------------------------------------------------------------------------------------------------------------------------------- { // Prologue optimizedBorderLeft_Rosenfeld_Dist(E, i, 0, T, D, alpha, F); // Boucle principale for (int j = 1; j < width - 1; j++) { optimizedBorder_Rosenfeld_Dist(E, i, j, T, D, alpha, F); } // Epilogue optimizedBorderRight_Rosenfeld_Dist(E, i, width - 1, T, D, alpha, F); } #endif // FAST #if SLOW // ------------------------------------------------------------------------------------------------------------------------------------------- static void borderMerging_Slow_Rosenfeld_Dist(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ** D, int alpha, RegionStats ** F) // ------------------------------------------------------------------------------------------------------------------------------------------- { int j = 0; uint32 eps; uint32 e1, e2, e3, ex; uint32 r1, r2, r3, rx; // -------------- // -- prologue -- // -------------- MCA_VERBOSE3(printf("[%s] i = %d\n", __func__, i)); ex = E[i][j]; if (ex) { MCA_VERBOSE3(printf("[%s] j = %d\n", __func__, j)); e2 = E[i - 1][j]; e3 = E[i - 1][j + 1]; if (e2 || e3) { // test pour eviter acces distant r2 = e2 ? FindRoot_Dist(D, e2, alpha) : 0; r3 = e3 ? FindRoot_Dist(D, e3, alpha) : 0; rx = T[ex]; rx = FindRoot_Dist(D, rx, alpha); eps = ui32MinNonNul3(r2, r3, rx); MCA_VERBOSE3(printf("\n")); MCA_VERBOSE3(printf("e2 = %5d -> r2 = %5d\n", e2, r2)); MCA_VERBOSE3(printf("e3 = %5d -> r3 = %5d\n", e3, r3)); MCA_VERBOSE3(printf("ex = %5d -> rx = %5d\n", ex, rx)); MCA_VERBOSE3(printf("eps = %5d\n", eps)); // Quick-Union if (r2 > eps) { SetRoot_Rosenfeld(D, r2, eps, alpha, F); MCA_VERBOSE3(printf("D[%5d] <- %d\n", r2, eps)); } // Pour le cas où r2 == r3, il ne faut pas ajouter deux fois les features //if (r3 > 0) { // r3 = FindRoot_Dist(D, r3, alpha); //} //if (r3 > eps) { if (r3 > eps && r3 != r2) { SetRoot_Rosenfeld(D, r3, eps, alpha, F); MCA_VERBOSE3(printf("D[%5d] <- %d\n", r3, eps)); } //rx = FindRoot_Dist(D, rx, alpha); //if (rx > eps) { if (rx > eps && rx != r3 && rx != r2) { SetRoot_Rosenfeld(D, rx, eps, alpha, F); MCA_VERBOSE3(printf("D[%5d] <- %d\n", rx, eps)); } MCA_VERBOSE3(printf("---------------------------\n")); } } // ----------------------- // -- boucle principale -- // ----------------------- for (j = 0 + 1; j < width - 1; j++) { ex = E[i][j]; if (ex) { MCA_VERBOSE3(printf("[%s] j = %d\n", __func__, j)); e1 = E[i - 1][j - 1]; e2 = E[i - 1][j]; e3 = E[i - 1][j + 1]; if (e1 || e2 || e3) { // test pour eviter un acces distant r1 = e1 ? FindRoot_Dist(D, e1, alpha) : 0; r2 = e2 ? FindRoot_Dist(D, e2, alpha) : 0; r3 = e3 ? FindRoot_Dist(D, e3, alpha) : 0; rx = T[ex]; rx = FindRoot_Dist(D, rx, alpha); eps = ui32MinNonNul4(r1, r2, r3, rx); MCA_VERBOSE3(printf("\n")); MCA_VERBOSE3(printf("e1 = %5d -> r1 = %5d\n", e1, r1)); MCA_VERBOSE3(printf("e2 = %5d -> r2 = %5d\n", e2, r2)); MCA_VERBOSE3(printf("e3 = %5d -> r3 = %5d\n", e3, r3)); MCA_VERBOSE3(printf("ex = %5d -> rx = %5d\n", ex, rx)); MCA_VERBOSE3(printf("eps = %5d\n", eps)); // Quick-Union if (r1 > eps) { SetRoot_Rosenfeld(D, r1, eps, alpha, F); MCA_VERBOSE3(printf("D[%5d] <- %d\n", r1, eps)); } //if (r2 > 0) { // r2 = FindRoot_Dist(D, r2, alpha); //} if (r2 > eps && r2 != r1) { //if (r2 > eps) { SetRoot_Rosenfeld(D, r2, eps, alpha, F); MCA_VERBOSE3(printf("D[%5d] <- %d\n", r2, eps)); } //if (r3 > 0) { // r3 = FindRoot_Dist(D, r3, alpha); //} if (r3 > eps && r3 != r2 && r3 != r1) { //if (r3 > eps) { SetRoot_Rosenfeld(D, r3, eps, alpha, F); MCA_VERBOSE3(printf("D[%5d] <- %d\n", r3, eps)); } //rx = FindRoot_Dist(D, rx, alpha); if (rx > eps && rx != r3 && rx != r2 && rx != r1) { //if (rx > eps) { SetRoot_Rosenfeld(D, rx, eps, alpha, F); MCA_VERBOSE3(printf("D[%5d] <- %d\n", rx, eps)); } MCA_VERBOSE3(puts("---------------------------\n")); } } } // -------------- // -- epilogue -- // -------------- j = width - 1; ex = E[i][j]; if (ex) { MCA_VERBOSE3(printf("[%s] j = %d\n", __func__, j)); e1 = E[i - 1][j - 1]; e2 = E[i - 1][j]; if (e1 || e2) { // test pour eviter acces distant r1 = e1 ? FindRoot_Dist(D, e1, alpha) : 0; r2 = e2 ? FindRoot_Dist(D, e2, alpha) : 0; rx = T[ex]; rx = FindRoot_Dist(D, rx, alpha); eps = ui32MinNonNul3(r1, r2, rx); MCA_VERBOSE3(printf("\n")); MCA_VERBOSE3(printf("e1 = %5d -> r1 = %5d\n", e1, r1)); MCA_VERBOSE3(printf("e2 = %5d -> r2 = %5d\n", e2, r2)); MCA_VERBOSE3(printf("ex = %5d -> rx = %5d\n", ex, rx)); MCA_VERBOSE3(printf("eps = %5d\n", eps)); // Quick-Union if (r1 > eps) { SetRoot_Rosenfeld(D, r1, eps, alpha, F); MCA_VERBOSE3(printf("D[%5d] <- %d\n", r1, eps)); } //if (r2 > 0) { // r2 = FindRoot_Dist(D, r2, alpha); //} if (r2 > eps && r2 != r1) { //if (r2 > eps) { SetRoot_Rosenfeld(D, r2, eps, alpha, F); MCA_VERBOSE3(printf("D[%5d] <- %d\n", r2, eps)); } //rx = FindRoot_Dist(D, rx, alpha); if (rx > eps && rx != r2 && rx != r1) { //if (rx > eps) { SetRoot_Rosenfeld(D, rx, eps, alpha, F); MCA_VERBOSE3(printf("D[%5d] <- %d\n", rx, eps)); } MCA_VERBOSE3(printf("---------------------------\n")); } } } #endif // SLOW // -------------------------------------------------------------------------------------------------------------------------------------- static void borderMerging_Rosenfeld_Dist(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ** D, int alpha, RegionStats ** F) // -------------------------------------------------------------------------------------------------------------------------------------- { #if FAST borderMerging_Fast_Rosenfeld_Dist(X, i, width, E, T, D, alpha, F); #endif // FAST #if SLOW borderMerging_Slow_Rosenfeld_Dist(X, i, width, E, T, D, alpha, F); #endif // SLOW } // ---------------------------------------------------------------------------------------------------- static uint32 line0Labeling_Rosenfeld(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ne) // ---------------------------------------------------------------------------------------------------- { int j; uint8 x; uint32 e4; uint32 r4; // prologue : j = 0 x = X[i][0]; if (x) { E[i][0] = ++ne; } else { E[i][0] = 0; } // boucle et epilogue j = [1..width-1] for (j = 1; j <= width - 1; j++) { x = X[i][j]; if (x) { e4 = E[i][j - 1]; if (e4 == 0) { E[i][j] = ++ne; } else { E[i][j] = e4; } } else { E[i][j] = 0; } } return ne; } #if SLOW // -------------------------------------------------------------------------------------------------------- static uint32 lineLabeling_Slow_Rosenfeld(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ne) // -------------------------------------------------------------------------------------------------------- { // version lineLabeling_Rosenfeld_UF_QU_8C avec Quick-Union int j; uint8 x; uint32 e; uint32 e1, e2, e3, e4; uint32 r1, r2, r3, r4; // -------------- // -- prologue -- // -------------- j = 0; x = X[i][j]; if (x) { e2 = E[i - 1][j]; e3 = E[i - 1][j + 1]; // nouvel element if (e2 == 0 && e3 == 0) { e = ++ne; E[i][j] = e; } else { // etiquettes identiques if (e2 == e3) { e = e2; E[i][j] = e; } else { // cas general r2 = (e2 == 0) ? 0 : FindRoot(T, e2); r3 = (e3 == 0) ? 0 : FindRoot(T, e3); e = ui32MinNonNul2(r2, r3); // Quick-Union if (r2 > e) { T[r2] = e; } if (r3 > e) { T[r3] = e; } E[i][j] = e; } } } else { E[i][j] = 0; } // x // ----------------------- // -- boucle principale -- // ----------------------- for (j = 0 + 1; j < width - 1; j++) { x = X[i][j]; if (x) { e1 = E[i - 1][j - 1]; e2 = E[i - 1][j]; e3 = E[i - 1][j + 1]; e4 = E[i][j - 1]; // nouvel element if (e1 == 0 && e2 == 0 && e3 == 0 && e4 == 0) { e = ++ne; E[i][j] = e; } else { // etiquettes identiques if (e1 == e2 && e1 == e3 && e1 == e4) { e = e1; E[i][j] = e; } else { // cas general r1 = (e1 == 0) ? 0 : FindRoot(T, e1); r2 = (e2 == 0) ? 0 : FindRoot(T, e2); r3 = (e3 == 0) ? 0 : FindRoot(T, e3); r4 = (e4 == 0) ? 0 : FindRoot(T, e4); e = ui32MinNonNul4(r1, r2, r3, r4); // Quick-Union if (r1 > e) { T[r1] = e; } if (r2 > e) { T[r2] = e; } if (r3 > e) { T[r3] = e; } if (r4 > e) { T[r4] = e; } E[i][j] = e; } } } else { E[i][j] = 0; } // x } // j // -------------- // -- epilogue -- // -------------- j = width - 1; x = X[i][j]; if (x) { e1 = E[i - 1][j - 1]; e2 = E[i - 1][j]; e4 = E[i][j - 1]; // nouvel element if (e1 == 0 && e2 == 0 && e4 == 0) { e = ++ne; E[i][j] = e; } else { // etiquettes identiques if (e1 == e2 && e1 == e4) { e = e1; E[i][j] = e; } else { // cas general r1 = (e1 == 0) ? 0 : FindRoot(T, e1); r2 = (e2 == 0) ? 0 : FindRoot(T, e2); r4 = (e4 == 0) ? 0 : FindRoot(T, e4); e = ui32MinNonNul3(r1, r2, r4); // Quick-Union if (r1 > e) { T[r1] = e; } if (r2 > e) { T[r2] = e; } if (r4 > e) { T[r4] = e; } E[i][j] = e; } } } else { E[i][j] = 0; } // x return ne; } #endif // SLOW #if FAST // --------------------------------------------------------------------------------------------- static uint32 optimizedAccessLeft_DT_Rosenfeld(uint32 ** E, int i, int j, uint32 * T, uint32 ne) // --------------------------------------------------------------------------------------------- { // Decision Tree 8-connexe avec Quick-Union uint32 b, c, e; b = E[i - 1][j]; if (b) { e = use1_QU_Rosenfeld(b, T); } else { c = E[i - 1][j + 1]; if (c) { e = use1_QU_Rosenfeld(c, T); } else { e = ++ne; } } E[i][j] = e; return ne; } // FAST // ---------------------------------------------------------------------------------------------- static uint32 optimizedAccessRight_DT_Rosenfeld(uint32 ** E, int i, int j, uint32 * T, uint32 ne) // ---------------------------------------------------------------------------------------------- { // Decision Tree 8-connexe avec Quick-Union uint32 a, b, d, e; b = E[i - 1][j]; if (b) { e = use1_QU_Rosenfeld(b, T); } else { a = E[i - 1][j - 1]; if (a) { e = use1_QU_Rosenfeld(a, T); } else { d = E[i][j - 1]; if (d) { e = use1_QU_Rosenfeld(d, T); } else { e = ++ne; } } } E[i][j] = e; return ne; } // FAST // ----------------------------------------------------------------------------------------- static uint32 optimizedAccess_DT_Rosenfeld(uint32 ** E, int i, int j, uint32 * T, uint32 ne) // ----------------------------------------------------------------------------------------- { // Decision Tree 8-connexe avec Quick-Union uint32 a, b, c, d, e; b = E[i - 1][j]; if (b) { e = use1_QU_Rosenfeld(b, T); } else { c = E[i - 1][j + 1]; if (c) { a = E[i - 1][j - 1]; if (a) { e = use2_QU_Rosenfeld(a, c, T); } else { d = E[i][j - 1]; if (d) { e = use2_QU_Rosenfeld(c, d, T); } else { e = use1_QU_Rosenfeld(c, T); } } } else { a = E[i - 1][j - 1]; if (a) { e = use1_QU_Rosenfeld(a, T); } else { d = E[i][j - 1]; if (d) { e = use1_QU_Rosenfeld(d, T); } else { e = ++ne; } } } } E[i][j] = e; return ne; } // FAST // -------------------------------------------------------------------------------------------------------- static uint32 lineLabeling_Fast_Rosenfeld(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ne) // -------------------------------------------------------------------------------------------------------- { uint8 x; // avec DT et QU // Left Border x = X[i][0]; if (x) { ne = optimizedAccessLeft_DT_Rosenfeld(E, i, 0, T, ne); } else { E[i][0] = 0; } // Middle for (int j = 1; j < width - 1; j++) { uint8 x = X[i][j]; if (x) { ne = optimizedAccess_DT_Rosenfeld(E, i, j, T, ne); } else { E[i][j] = 0; } } // Right Border x = X[i][width - 1]; if (x) { ne = optimizedAccessRight_DT_Rosenfeld(E, i, width - 1, T, ne); } else { E[i][width - 1] = 0; } return ne; } #endif // FAST // --------------------------------------------------------------------------------------------------- static uint32 lineLabeling_Rosenfeld(uint8 ** X, int i, int width, uint32 ** E, uint32 * T, uint32 ne) // --------------------------------------------------------------------------------------------------- { #if SLOW return lineLabeling_Slow_Rosenfeld(X, i, width, E, T, ne); #elif FAST return lineLabeling_Fast_Rosenfeld(X, i, width, E, T, ne); #endif } // ----------------------------------------------------------------------- static uint32 countTable_Range_Rosenfeld(uint32 * T, uint32 e0, uint32 e1) // ----------------------------------------------------------------------- { uint32 e; uint32 nr = 0; // nombre de racines = de composantes connexes for (e = e0; e <= e1; e++) { if (e == T[e]) { nr += 1; } } return nr; } // ------------------------------------------------------------------------------------------ static void solveTable_Range_Rosenfeld(uint32 * T, uint32 e0, uint32 e1, RegionStats * Stats) // ------------------------------------------------------------------------------------------ { uint32 e, r; for (e = e0; e <= e1; e++) { r = T[T[e]]; assert(r != 0); if (r < e) { T[e] = r; // racine de la classe d'equivalence #if FEATURES && !(PARMERGE && ARSP) RegionStats_Accumulate_Stats1_From_Index(Stats, r, e); #endif } } } // -------------------------------------------- static void MCA_Label_Rosenfeld_PAR1(MCA * mca) // -------------------------------------------- { if (mca->p == 0) { MCA_VERBOSE2(printf("*** %s ***\n", __func__)); } int i0 = mca->i0; int i1 = mca->i1; int width = mca->width; uint32 e0 = mca->e0; uint32 e1 = mca->e1; uint32 ne_prev = mca->ne_prev; uint32 ne = e0 - 1; uint32 nr = 0; // local memory zones uint8 ** X = mca->X; uint32 ** E = mca->E; uint32 * T = mca->T; RegionStats * stats = mca->stats; CLOCK_THREAD_START_STEP(mca->p, 0); set_ui32vector_j(T, e0, ne_prev); #if FEATURES zero_RegionStatsVector(stats, e0, ne_prev); #endif if (mca->p == 0) { MCA_VERBOSE3(display_ui8matrix_positive(X, i0, i1, 0, width - 1, 5, "Xp"); printf("\n")); } // ---------------------------- // // -- Etiquetage d'une bande -- // // ---------------------------- // ne = line0Labeling_Rosenfeld(X, i0, width, E, T, ne); #if FEATURES lineFeaturesComputation(E, i0, width, stats); #endif for (int i = i0 + 1; i <= i1; i++) { ne = lineLabeling_Rosenfeld(X, i, width, E, T, ne); // Slow or Fast #if FEATURES lineFeaturesComputation(E, i, width, stats); #endif } mca->ne = ne; //plus grande etiquette de l'intervalle [e0..e1] if (mca->p == 0) { MCA_VERBOSE3(printf("ne = %d\n", ne)); MCA_VERBOSE3(display_ui32matrix_positive(E, i0, i1, 0, width - 1, 5, "Ep"); printf("\n")); MCA_VERBOSE3(display_ui32vector_number(T, e0, ne, "%5d", "Tp_avant")); } // ------------------------------------------------------ // // -- Fermeture transitive sans pack de chaque table T -- // // ------------------------------------------------------ // solveTable_Range_Rosenfeld(T, e0, ne, stats); if (mca->p == 0) { MCA_VERBOSE3(nr = countTable_Range_Rosenfeld(T, e0, ne); printf("p = %d : e = [%d..%d] -> ne = %d -> nr = %d\n", mca->p, e0, ne, (ne - e0 + 1), nr)); MCA_VERBOSE3(display_ui32vector_number(T, e0, ne, "%5d", "Tp_apres")); } CLOCK_THREAD_END_STEP(mca->p, 0); } #if PARMERGE // ----------------------------------------------------- static void MCA_Label_Rosenfeld_PAR2(MCA * mca) // ----------------------------------------------------- { int p = mca->p; int nb_level = mca->nb_level; if (mca->p == 0) { MCA_VERBOSE2(printf("*** %s ***\n", __func__)); } // ------------------------------ // -- parallel border merging -- // ------------------------------ // local variables int i = mca->i0; int width = mca->width; int alpha = mca->alpha; uint32 e0 = mca->e0; uint32 e1 = mca->ne; // local memory zones uint8 ** X = mca->X; uint32 ** E = mca->E; uint32 * T = mca->T; uint32 ** D = mca->D; RegionStats ** F = mca->F; CLOCK_THREAD_START_STEP(p, 1); if (p != 0) { // thread 0 never has any merge to do borderMerging_Rosenfeld_Dist(X, i, width, E, T, D, alpha, F); // (i) et (i-1) } pthread_barrier_wait(&main_barrier); CLOCK_THREAD_END_STEP(p, 1); // --------------------------------- // -- parallel transitive closure -- // --------------------------------- CLOCK_THREAD_START_STEP(p, 2); for (uint32 e = e0; e <= e1; e++) { uint32 r = T[e]; // acces local if (r < e) { r = FindRoot_Dist(D, e, alpha); // acces distant T[e] = r; } MCA_VERBOSE3(printf("p%d : T[%d] <- %d\n", p, e, r)); } CLOCK_THREAD_END_STEP(p, 2); #if FEATURES && ARSP pthread_barrier_wait(&main_barrier); #endif // To avoid uninitialized accesses CLOCK_THREAD_START_STEP(p, 3); // With FEATURES and ARSP, STEP 3 is the Features propagation #if FEATURES && ARSP Propagate_Features(e0, e1, T, F, mca->alpha); #endif CLOCK_THREAD_END_STEP(p, 3); } #endif // PARMERGE #if !PARMERGE // -------------------------------------------- static void MCA_Label_Rosenfeld_PYR2(MCA * mca) // -------------------------------------------- { // input int p = mca->p; int nb_level = mca->nb_level; if (mca->p == 0) { MCA_VERBOSE2(printf("*** %s ***\n", __func__)); } // ------------------------------ // -- pyramidal border merging -- // ------------------------------ // local variables int i = mca->i0; int width = mca->width; int alpha = mca->alpha; uint32 e0 = mca->e0; uint32 e1 = mca->ne; // local memory zones uint8 ** X = mca->X; uint32 ** E = mca->E; uint32 * T = mca->T; uint32 ** D = mca->D; RegionStats ** F = mca->F; CLOCK_THREAD_START_STEP(p, 1); #if PYR_BARRIERS // Version optimisée qui fait faire un break aux processeurs qui n'ont plus // à faire de merge. // Implique de pré-calculer le nombre de threads à chaque barrière if (p != 0) { // thread 0 never has any merge to do int been_active = 0; for (int level = 0; level < nb_level; level++) { if ((p + (1 << level)) % (1 << (level + 1)) == 0) { borderMerging_Rosenfeld_Dist(X, i, width, E, T, D, alpha, F); // (i) et (i-1) been_active = 1; } else if (been_active) { break; } pthread_barrier_wait(&mca->barriers[level]); } } pthread_barrier_wait(&main_barrier); #else for (int level = 1; level <= nb_level; level++) { if ((p + (1 << (level - 1))) % (1 << level) == 0) { // thread actif borderMerging_Rosenfeld_Dist(X, i, width, E, T, D, alpha, F); // (i) et (i-1) } pthread_barrier_wait(&main_barrier); } #endif CLOCK_THREAD_END_STEP(p, 1); // --------------------------------- // -- parallel transitive closure -- // --------------------------------- CLOCK_THREAD_START_STEP(p, 2); for (uint32 e = e0; e <= e1; e++) { uint32 r = T[e]; // acces local if (r < e) { r = FindRoot_Dist(D, e, alpha); // acces distant T[e] = r; } MCA_VERBOSE3(printf("p%d : T[%d] <- %d\n", p, e, r)); } CLOCK_THREAD_END_STEP(p, 2); } #endif // !PARMERGE // ------------------------------------- void MCA_Label_Rosenfeld_PAR3(MCA * mca) // ------------------------------------- { // input if (mca->p == 0) { MCA_VERBOSE2(printf("*** %s ***\n", __func__)); } int i0 = mca->i0; int i1 = mca->i1; int j0 = 0; int j1 = mca->width - 1; uint32 ** E = mca->E; uint32 * T = mca->T; CLOCK_THREAD_START_STEP(mca->p, 4); for (int i = i0; i <= i1; i++) { for (int j = j0; j <= j1; j++) { uint32 e = E[i][j]; if (e != 0) { E[i][j] = T[e]; } } } CLOCK_THREAD_END_STEP(mca->p, 4); } // ====================================================================== #if TARGET_OS == GIETVM __attribute__((constructor)) void * MCA_Label_Rosenfeld(void * arg) #else void * MCA_Label_Rosenfeld(void * arg) #endif // ====================================================================== { MCA * mca = (MCA *) arg; #if TARGET_OS == GIETVM unsigned int x, y, lpid; giet_proc_xyp(&x, &y, &lpid); // Mettre à jour mca->p en fonction de x, y, lpid // pour que les allocations faites par le main soient locales, // i.e. mca->p = (x * Y_SIZE + y) * NB_PROCS_MAX + lpid; // We have : // mca->p = 4 pour (x = 0, y = 1, lpid = 0) // mca->p = 5 pour (x = 0, y = 1, lpid = 1) MCA_VERBOSE3(printf("mca->p = %d pour (x = %d, y = %d, lpid = %d)\n", mca->p, x, y, lpid)); #endif CLOCK_THREAD_START(mca->p); int num_runs = mca->nr; // We always perform one more run than the num_runs // value, so as to know "ne", i.e. the number of // elements to reset in the T and F tables (labels and stats) // After this first extra run, clock times are not accumulated // and thus are lost. // Note: the CLOCK_THREAD_START will still include this first run, // and in case of multiple runs, only averaged times should be // considered. for (int run = 0; run < num_runs + 1; run++) { CLOCK_THREAD_COMPUTE_START(mca->p); MCA_Scatter_ImageX(mca); pthread_barrier_wait(&main_barrier); MCA_Label_Rosenfeld_PAR1(mca); pthread_barrier_wait(&main_barrier); #if PARMERGE MCA_Label_Rosenfeld_PAR2(mca); #else MCA_Label_Rosenfeld_PYR2(mca); #endif pthread_barrier_wait(&main_barrier); MCA_Label_Rosenfeld_PAR3(mca); pthread_barrier_wait(&main_barrier); MCA_Gather_ImageL(mca); pthread_barrier_wait(&main_barrier); CLOCK_THREAD_COMPUTE_END(mca->p); if (run == 0) { // Mise à jour du ne_prev par chaque thread mca->ne_prev = mca->ne; mca->ne = 0; } else { // Accumulation du temps COMPUTE et de toutes les STEP if (mca->p == 0) { CLOCK_ACCUMULATE; } assert(mca->ne == mca->ne_prev); // Reinitialisation de "ne" s'il ne s'agit pas du dernier run if (run != num_runs) { mca->ne = 0; } } pthread_barrier_wait(&main_barrier); } #if FEATURES if (display_features) { if (mca->p == 0) { int i = 1; MCA_VERBOSE1(printf("[STATS]\n")); for (int p = 0; p < mca->np; p++) { MCA * mca_par = mca->mca->mcas[p]; uint32 e0 = mca_par->e0; uint32 ne = mca_par->ne - mca_par->e0; // number of elements uint32 * T = mca_par->T; RegionStats * stats = mca_par->stats; MCA_VERBOSE1(RegionStats_DisplayStats_Sparse(T, e0, e0 + ne, stats, NULL, &i)); } MCA_VERBOSE1(printf("[/STATS]\n")); } } #endif CLOCK_THREAD_END(mca->p); #if TARGET_OS == GIETVM if (mca->p != 0) { exit(0); } #endif return NULL; } // Local Variables: // tab-width: 4 // c-basic-offset: 4 // c-file-offsets:((innamespace . 0)(inline-open . 0)) // indent-tabs-mode: nil // End: // vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4