Changeset 635 for trunk/user/sort/sort.c


Ignore:
Timestamp:
Jun 26, 2019, 11:42:37 AM (5 years ago)
Author:
alain
Message:

This version is a major evolution: The physical memory allocators,
defined in the kmem.c, ppm.c, and kcm.c files have been modified
to support remote accesses. The RPCs that were previously user
to allocate physical memory in a remote cluster have been removed.
This has been done to cure a dead-lock in case of concurrent page-faults.

This version 2.2 has been tested on a (4 clusters / 2 cores per cluster)
TSAR architecture, for both the "sort" and the "fft" applications.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/user/sort/sort.c

    r629 r635  
     1/*
     2 * sort.c - Parallel sort
     3 *
     4 * Author     Cesar Fuguet Tortolero (2013)
     5 *            Alain Greiner (2019)
     6 *
     7 * Copyright (c) UPMC Sorbonne Universites
     8 *
     9 * This is free software; you can redistribute it and/or modify it
     10 * under the terms of the GNU General Public License as published by
     11 * the Free Software Foundation; version 2.0 of the License.
     12 *
     13 * It is distributed in the hope that it will be useful, but
     14 * WITHOUT ANY WARRANTY; without even the implied warranty of
     15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     16 * General Public License for more details.
     17 *
     18 * You should have received a copy of the GNU General Public License
     19 * along with ALMOS-MKH; if not, write to the Free Software Foundation,
     20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
     21 */
     22
    123///////////////////////////////////////////////////////////////////////////////
    2 // File   :  sort.c
    3 // Date   :  November 2013
    4 // Author :  Cesar Fuguet Tortolero <cesar.fuguet-tortolero@lip6.fr>
    5 ///////////////////////////////////////////////////////////////////////////////
    6 // This multi-threaded application implement a multi-stage sort application.
    7 // The various stages are separated by synchronisation barriers.
     24// This multi-threaded application implement a multi-stage sort.
     25// It has been writen by Cesar Fuget Tortolero in 2013.
     26// It has been ported on ALMOS-MKH by Alain Greiner in 2019.
     27//
    828// There is one thread per physical cores.
    929// Computation is organised as a binary tree:
     
    1535//       Number_of_stages = number of barriers = log2(Number_of_threads)
    1636//
     37// The various stages are separated by synchronisation barriers, and the
     38// main thread uses the join syscall to check that all threads completed
     39// before printing the computation time (sequencial & parallel).
     40// These results can be - optionnaly - registered in an instrumentation file.
     41//
    1742// Constraints :
    1843// - It supports up to 1024 cores: x_size, y_size, and ncores must be
     
    2954#include <hal_macros.h>
    3055
    31 #define ARRAY_LENGTH        4096       // number of items
     56#define ARRAY_LENGTH        128        // number of items
    3257#define MAX_THREADS         1024       // 16 * 16 * 4
    3358
    3459#define USE_DQT_BARRIER     1          // use DQT barrier if non zero
    3560#define DISPLAY_ARRAY       0          // display items values before and after
    36 #define DEBUG_MAIN          1          // trace main function
    37 #define DEBUG_SORT          1          // trace sort function
    38 #define INTERACTIVE_MODE    0          // activate idbg() during instrumentation
     61#define DEBUG_MAIN          0          // trace main function
     62#define DEBUG_SORT          0          // trace sort function
    3963#define CHECK_RESULT        0          // for debug
    4064#define INSTRUMENTATION     1          // register computation times on file
    41 #define IDBG                0          // activate interactive debug in main
    4265
    4366/////////////////////////////////////////////////////////////
     
    6285pthread_barrier_t   barrier;                 // synchronisation variables
    6386
    64 pthread_attr_t      attr[MAX_THREADS];       // thread attributes (one per thread)
    65 args_t              arg[MAX_THREADS];        // sort function arguments (one per thread)
     87pthread_t           trdid[MAX_THREADS];      // kernel identifiers
     88pthread_attr_t      attr[MAX_THREADS];       // thread attributes
     89args_t              arg[MAX_THREADS];        // sort function arguments
    6690
    6791////////////////////////////////////
     
    277301    unsigned int           lid;                // core local index for a thread
    278302    unsigned int           n;                  // index in array to sort
    279     pthread_t              trdid;              // kernel allocated thread index (unused)
    280303    pthread_barrierattr_t  barrier_attr;       // barrier attributes
    281304
     
    353376    // launch other threads to execute sort() function
    354377    // on cores other than the core running the main thread
    355     for ( x=0 ; x<x_size ; x++ )
    356     {
    357         for ( y=0 ; y<y_size ; y++ )
    358         {
    359             for ( lid=0 ; lid<ncores ; lid++ )
     378    for ( x = 0 ; x < x_size ; x++ )
     379    {
     380        for ( y = 0 ; y < y_size ; y++ )
     381        {
     382            for ( lid = 0 ; lid < ncores ; lid++ )
    360383            {
     384                // compute thread user index (continuous index)
    361385                thread_uid = (((x * y_size) + y) * ncores) + lid;
    362386
    363                 // set sort arguments for all threads
     387                // set arguments for all threads
    364388                arg[thread_uid].threads      = total_threads;
    365389                arg[thread_uid].thread_uid   = thread_uid;
     
    367391
    368392                // set thread attributes for all threads
    369                 attr[thread_uid].attributes = PT_ATTR_DETACH          |
    370                                               PT_ATTR_CLUSTER_DEFINED |
    371                                               PT_ATTR_CORE_DEFINED;
     393                attr[thread_uid].attributes = PT_ATTR_CLUSTER_DEFINED | PT_ATTR_CORE_DEFINED;
    372394                attr[thread_uid].cxy        = HAL_CXY_FROM_XY( x , y );
    373395                attr[thread_uid].lid        = lid;
     
    375397                if( thread_uid != main_uid )
    376398                {
    377                     if ( pthread_create( &trdid,              // not used because no join
     399                    if ( pthread_create( &trdid[thread_uid],  // buffer for kernel identifier
    378400                                         &attr[thread_uid],   // thread attributes
    379401                                         &sort,               // entry function
     
    383405                        exit( 0 );
    384406                    }
    385                     else
    386                     {
    387 #if DEBUG_MAIN
     407
     408#if (DEBUG_MAIN & 1)
    388409printf("\n[sort] main created thread %x \n", thread_uid );
    389410#endif
    390                     }
    391411                }
    392412            }
     
    402422#endif
    403423
    404 #if INTERACTIVE_MODE
    405 idbg();
    406 #endif
    407    
    408424    // the main thread run also the sort() function
    409425    sort( &arg[main_uid] );
    410426
     427    // wait other threads completion
     428    for ( x = 0 ; x < x_size ; x++ )
     429    {
     430        for ( y = 0 ; y < y_size ; y++ )
     431        {
     432            for ( lid = 0 ; lid < ncores ; lid++ )
     433            {
     434                // compute thread continuous index
     435                thread_uid = (((x * y_size) + y) * ncores) + lid;
     436
     437                if( thread_uid != main_uid )
     438                {
     439                    if( pthread_join( trdid[thread_uid] , NULL ) )
     440                    {
     441                        printf("\n[fft error] in main thread %d joining thread %d\n",
     442                        main_uid , thread_uid );
     443                        exit( 0 );
     444                    }
     445                   
     446#if (DEBUG_MAIN & 1)
     447printf("\n[fft] main thread %d joined thread %d\n", main_uid, thread_uid );
     448#endif
     449
     450                }
     451            }
     452        }
     453    }
     454
    411455    ////////////////////////////
    412456    get_cycle( &para_end_cycle );
     
    417461    // destroy barrier
    418462    pthread_barrier_destroy( &barrier );
    419 
    420 #if INTERACTIVE_MODE
    421 idbg();
    422 #endif
    423463
    424464#if CHECK_RESULT
Note: See TracChangeset for help on using the changeset viewer.