Changeset 652 for trunk/user/sort


Ignore:
Timestamp:
Nov 14, 2019, 3:56:51 PM (5 years ago)
Author:
alain
Message:

Introduce the three placement modes in "transpose", "convol', "fft" applications.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/user/sort/sort.c

    r637 r652  
    6969#define INSTRUMENTATION     1               // register computation times on file
    7070
    71 ///////////////////////////////////////////////////////////////////////////////////
    72 //            Arguments for the sort() function
    73 ///////////////////////////////////////////////////////////////////////////////////
    74 
    75 typedef struct
    76 {
    77     unsigned int        tid;                // continuous thread index
    78     unsigned int        threads;            // total number of threads
    79     pthread_barrier_t * parent_barrier;     // pointer on termination barrier
    80 }
    81 sort_args_t;
    82 
    8371////////////////////////////////////////////////////////////////////////////////////
    8472//            Sort specific global variables
     
    8876int                 array1[ARRAY_LENGTH];   
    8977
     78unsigned int        threads;                // total number of working threads
     79
    9080pthread_barrier_t   barrier;                 // synchronisation variables
    9181
     
    9383//             Global variables required by parallel_pthread_create()
    9484/////////////////////////////////////////////////////////////////////////////////////
    95 
    96 // 2D arrays of input arguments for the <sort> threads
    97 // These arrays are initialised by the application main thread
    98 
    99 sort_args_t       sort_args[CLUSTERS_MAX][CORES_MAX];  // sort function arguments
    100 sort_args_t     * sort_ptrs[CLUSTERS_MAX][CORES_MAX];  // pointers on arguments
    101 
    102 // 1D array of barriers to allow the <sort> threads to signal termination
    103 // this array is initialised by the pthread_parallel_create() function
    104  
    105 pthread_barrier_t parent_barriers[CLUSTERS_MAX];       // termination barrier
    10685
    10786
     
    174153}  // end merge()
    175154
    176 //////////////////////////////
    177 void sort( sort_args_t * ptr )
     155///////////////////////////////////////////////
     156void sort( pthread_parallel_work_args_t * ptr )
    178157{
    179158    unsigned int        i;
     
    183162    // get arguments
    184163    unsigned int        tid            = ptr->tid;
    185     unsigned int        threads        = ptr->threads;
    186     pthread_barrier_t * parent_barrier = ptr->parent_barrier;
     164    pthread_barrier_t * parent_barrier = ptr->barrier;
    187165
    188166    unsigned int        items      = ARRAY_LENGTH / threads;
     
    190168
    191169#if DEBUG_SORT
    192 printf("\n[sort] start : ptr %x / tid %d / threads %d / barrier %x\n",
     170printf("\n[sort] start : ptr %x / tid %d / threads %d / parent_barrier %x\n",
    193171ptr, tid, threads, parent_barrier );
    194172#endif
     
    249227    }  // en for stages
    250228
    251     // sort thread signal completion to main thread
     229    // sort thread signal completion to pthtread_parallel_create()
    252230    pthread_barrier_wait( parent_barrier );
    253231
     
    269247    unsigned int           y_size;             // number of columns
    270248    unsigned int           ncores;             // number of cores per cluster
    271     unsigned int           total_threads;      // total number of threads
    272     unsigned int           x;                  // X coordinate for a sort thread
    273     unsigned int           y;                  // Y coordinate for a sort thread
    274     unsigned int           cxy;                // cluster identifier for a sort thead
    275     unsigned int           lid;                // core local index for a thread
    276     unsigned int           tid;                // sort thread continuous index
    277249    pthread_barrierattr_t  barrier_attr;       // barrier attributes (used for DQT)
    278250    unsigned int           n;                  // index in array to sort
     
    285257    get_cycle( &start_cycle );
    286258 
    287     // compute number of threads (one thread per core)
     259    // compute number of working threads (one thread per core)
    288260    get_config( &x_size , &y_size , &ncores );
    289     total_threads = x_size * y_size * ncores;
     261    threads = x_size * y_size * ncores;
    290262
    291263    // compute covering DQT size an level
     
    294266
    295267    // checks number of threads
    296     if ( (total_threads != 1)   && (total_threads != 2)   && (total_threads != 4)   &&
    297          (total_threads != 8)   && (total_threads != 16 ) && (total_threads != 32)  &&
    298          (total_threads != 64)  && (total_threads != 128) && (total_threads != 256) &&
    299          (total_threads != 512) && (total_threads != 1024) )
     268    if ( (threads != 1)   && (threads != 2)   && (threads != 4)   &&
     269         (threads != 8)   && (threads != 16 ) && (threads != 32)  &&
     270         (threads != 64)  && (threads != 128) && (threads != 256) &&
     271         (threads != 512) && (threads != 1024) )
    300272    {
    301273        printf("\n[sort] ERROR : number of cores must be power of 2\n");
     
    304276
    305277    // check array size
    306     if ( ARRAY_LENGTH % total_threads)
     278    if ( ARRAY_LENGTH % threads)
    307279    {
    308280        printf("\n[sort] ERROR : array size must be multiple of number of threads\n");
     
    311283
    312284    printf("\n[sort] main starts / %d threads / %d items / pid %x / cycle %d\n",
    313     total_threads, ARRAY_LENGTH, getpid(), (unsigned int)start_cycle );
     285    threads, ARRAY_LENGTH, getpid(), (unsigned int)start_cycle );
    314286
    315287    // initialize barrier
     
    319291        barrier_attr.y_size   = y_size;
    320292        barrier_attr.nthreads = ncores;
    321         error = pthread_barrier_init( &barrier, &barrier_attr , total_threads );
     293        error = pthread_barrier_init( &barrier, &barrier_attr , threads );
    322294    }
    323295    else // use SIMPLE_BARRIER
    324296    {
    325         error = pthread_barrier_init( &barrier, NULL , total_threads );
     297        error = pthread_barrier_init( &barrier, NULL , threads );
    326298    }
    327299
     
    352324#endif
    353325
    354     // build array of arguments for the <sort> threads
    355     for (x = 0 ; x < x_size ; x++)
    356     {
    357         for (y = 0 ; y < y_size ; y++)
    358         {
    359             // compute cluster identifier
    360             cxy = HAL_CXY_FROM_XY( x , y );
    361 
    362             for ( lid = 0 ; lid < ncores ; lid++ )
    363             {
    364                 // compute thread continuous index
    365                 tid = (((x * y_size) + y) * ncores) + lid;
    366 
    367                 // initialize 2D array of arguments
    368                 sort_args[cxy][lid].tid            = tid;
    369                 sort_args[cxy][lid].threads        = total_threads;
    370                 sort_args[cxy][lid].parent_barrier = &parent_barriers[cxy];
    371 
    372                 // initialize 2D array of pointers
    373                 sort_ptrs[cxy][lid] = &sort_args[cxy][lid];
    374             }
    375         }
    376     }
    377 
    378326    ///////////////////////////
    379327    get_cycle( &seq_end_cycle );
     
    386334    // create and execute the working threads
    387335    if( pthread_parallel_create( root_level,
    388                                  &sort,
    389                                  &sort_ptrs[0][0],
    390                                  &parent_barriers[0] ) )
     336                                 &sort ) )
    391337    {
    392338        printf("\n[sort] ERROR : cannot create threads\n");
     
    412358#if CHECK_RESULT
    413359    int    success = 1;
    414     int *  res_array = ( (total_threads ==   2) ||
    415                          (total_threads ==   8) ||
    416                          (total_threads ==  32) ||
    417                          (total_threads == 128) ||
    418                          (total_threads == 512) ) ? array1 : array0;
     360    int *  res_array = ( (threads ==   2) ||
     361                         (threads ==   8) ||
     362                         (threads ==  32) ||
     363                         (threads == 128) ||
     364                         (threads == 512) ) ? array1 : array0;
    419365
    420366    for( n=0 ; n<(ARRAY_LENGTH-2) ; n++ )
Note: See TracChangeset for help on using the changeset viewer.