Changeset 629 for trunk/softs


Ignore:
Timestamp:
Feb 12, 2014, 9:51:23 AM (11 years ago)
Author:
alain
Message:
  • Updatre the gier_tsar to support the vci_iopic component in the tsar_generic_leti plat-form.
  • Modify the soft_transpose_giet application to make optional the graphic display on frame buffer and to introduce a systematic auto-check
Location:
trunk/softs
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • trunk/softs/giet_tsar/giet.S

    r622 r629  
    169169    /* It depends on both the cluster_xy & local_id,  */
    170170    /* and we must use the physical address extension */
    171     mfc0    $10,    $15,    1       /* $10 <= proc_id */
    172     andi    $10,    $10,    0x3FF   /* at most 1024 processors */
     171    mfc0    $10,    $15,    1        /* $10 <= proc_id                                */
     172    andi    $10,    $10,    0x3FF    /* at most 1024 processors                        */
    173173    li      $11,    NB_PROCS_MAX
    174174    divu    $10,    $11
    175     mflo    $12                      /* $12 <= cluster_xy */
    176     mfhi    $13                      /* $13 <= local_id */
    177 
    178     li      $7,     0b011110000000   /* $7 <= PRIO offset */
    179     sll     $8,     $13,    2        /* $8 <= local_id*4 */
    180     addu    $9,     $7,     $8       /* $9 <= PRIO offset + local_id*4 */
    181     la      $27,    seg_xcu_base   
    182     addu    $26,    $9,     $27      /* $26 <= seg_icu_base + PRIO offset + local_id*4 */
    183 
    184     /* XCU[cluster_xy] access to get PRIO register value */
    185     mtc2    $12,    $24              /* set PADDR extension */
    186     lw      $14,    ($26)            /* $14 <= PRIO register value */
    187     mtc2    $0,     $24              /* reset PADDR extension */
    188 
    189     /* test PTI, then HWI, then WTI */
    190     andi    $27,    $14,    0x1      /* test bit T in PRIO register */
    191     bne     $27,    $0,     _int_PTI /* branch to PTI handler */
    192     andi    $27,    $14,    0x2      /* test bit W in PRIO register */
    193     bne     $27,    $0,     _int_HWI /* branch to HWI handler */
    194     andi    $27,    $14,    0x4      /* test bit W in PRIO register */
    195     bne     $27,    $0,     _int_WTI /* branch to IPI handler */
     175    mflo    $12                      /* $12 <= cluster_xy                              */
     176    mfhi    $13                      /* $13 <= local_id                                */
     177    la      $14,    seg_xcu_base     /* $14 <= seg_xcu_base                            */
     178
     179    li      $7,     0b011110000000   /* $7 <= PRIO offset                              */
     180    sll     $8,     $13,    2        /* $8 <= local_id*4                              */
     181    addu    $9,     $7,     $8       /* $9 <= PRIO offset + local_id*4                 */
     182    addu    $26,    $9,     $14      /* $26 <= seg_icu_base + PRIO offset + local_id*4 */
     183
     184    /* XCU[cluster_xy] access to get PRIO register value                               */
     185    mtc2    $12,    $24              /* set PADDR extension                            */
     186    lw      $15,    ($26)            /* $15 <= PRIO register value                    */
     187    mtc2    $0,     $24              /* reset PADDR extension                          */
     188
     189    /* test PTI, then HWI, then WTI                                                    */
     190    andi    $27,    $15,    0x1      /* test bit T in PRIO register                    */
     191    bne     $27,    $0,     _int_PTI /* branch to PTI handler                          */
     192    andi    $27,    $15,    0x2      /* test bit W in PRIO register                    */
     193    bne     $27,    $0,     _int_HWI /* branch to HWI handler                          */
     194    andi    $27,    $15,    0x4      /* test bit W in PRIO register                    */
     195    bne     $27,    $0,     _int_WTI /* branch to WTI handler                          */
    196196   
    197197    /* exit interrupt handler: restore registers */
    198198_int_restore:
    199199    .set noat
    200     lw      $1,     4*4($29)         /* restore $1 */
     200    lw      $1,     4*4($29)
    201201    .set at
    202     lw      $2,     4*5($29)         /* restore $2 */
    203     lw      $3,     4*6($29)         /* restore $3 */
    204     lw      $4,     4*7($29)         /* restore $4 */
    205     lw      $5,     4*8($29)         /* restore $5 */
    206     lw      $6,     4*9($29)         /* restore $6 */
    207     lw      $7,     4*10($29)        /* restore $7 */
    208     lw      $8,     4*11($29)        /* restore $8 */
    209     lw      $9,     4*12($29)        /* restore $9 */
    210     lw      $10,    4*13($29)        /* restore $10 */
    211     lw      $11,    4*14($29)        /* restore $11 */
    212     lw      $12,    4*15($29)        /* restore $12 */
    213     lw      $13,    4*16($29)        /* restore $13 */
    214     lw      $14,    4*17($29)        /* restore $14 */
    215     lw      $15,    4*18($29)        /* restore $15 */
    216     lw      $24,    4*19($29)        /* restore $24 */
    217     lw      $25,    4*20($29)        /* restore $25 */
    218     lw      $31,    4*21($29)        /* restore $31 */
     202    lw      $2,     4*5($29)
     203    lw      $3,     4*6($29)
     204    lw      $4,     4*7($29)
     205    lw      $5,     4*8($29)
     206    lw      $6,     4*9($29)
     207    lw      $7,     4*10($29)
     208    lw      $8,     4*11($29)
     209    lw      $9,     4*12($29)
     210    lw      $10,    4*13($29)
     211    lw      $11,    4*14($29)
     212    lw      $12,    4*15($29)
     213    lw      $13,    4*16($29)
     214    lw      $14,    4*17($29)
     215    lw      $15,    4*18($29)
     216    lw      $24,    4*19($29)
     217    lw      $25,    4*20($29)
     218    lw      $31,    4*21($29)
    219219    lw      $27,    4*22($29)        /* get EPC */
    220220    addiu   $29,    $29,    23*4     /* restore SP */
     
    222222    eret                             /* exit GIET */
    223223
     224    /* The PTI handler get PTI index, */
     225    /* acknowledge the PTI register   */
     226    /* and call the corresponding ISR */
    224227_int_PTI:
    225     srl     $26,    $14,    6        /* $26 <= (PRIO>>6  = PTI index) */
    226     j       _int_call_isr
    227     nop
    228 
    229 _int_HWI:
    230     srl     $26,    $14,    14       /* $26 <= (PRIO>>14 = HWI index) */
    231     j       _int_call_isr
    232     nop
    233 
    234 _int_WTI:
    235     srl     $26,    $14,    22       /* $26 <= (PRIO>>22 = WTI index) */
    236     j       _int_call_isr
    237     nop
    238    
    239     /* Call the relevant ISR */
    240 _int_call_isr:
    241     andi    $26,    $26,    0x7C     /* $26 <= interrupt_index * 4 */
     228    srl     $26,    $15,    6        /* $26 <= PRIO >> 6             */
     229    andi    $26,    $26,    0x7C     /* $26 <= PTI_INDEX * 4         */
     230    addi    $27,    $14,    0x180    /* $27 <= &PTI_ACK[0]           */
     231    add     $27,    $27,    $26      /* $27 <= &PTI_ACK[PTI_INDEX]   */
     232    lw      $0,     ($27)            /* acknowledge XICU PTI         */
    242233    la      $27,    _interrupt_vector
    243234    addu    $26,    $26,    $27
    244     lw      $26,    ($26)            /* read ISR address */
    245     jalr    $26                      /* call ISR */
    246     nop
    247     j       _int_restore
    248     nop
    249 
     235    lw      $26,    ($26)            /* read ISR address             */
     236    jalr    $26                      /* call ISR                     */
     237    nop
     238    j       _int_restore             /* return from INT handler      */
     239    nop
     240
     241    /* The HWI handler get HWI index  */
     242    /* and call the corresponding ISR */
     243_int_HWI:
     244    srl     $26,    $15,    14       /* $26 <= PRIO >> 14            */
     245    andi    $26,    $26,    0x7C     /* $26 <= HWI_INDEX * 4         */
     246    la      $27,    _interrupt_vector
     247    addu    $26,    $26,    $27      /* $26 <= &ISR[HWI_INDEX        */
     248    lw      $26,    ($26)            /* read ISR address             */
     249    jalr    $26                      /* call ISR                     */
     250    nop
     251    j       _int_restore             /* return from INT handler      */
     252    nop
     253
     254    /* The WTI handler get WTI index, */
     255    /* acknowledge the WTI register   */
     256    /* and call the corresponding ISR */
     257_int_WTI:
     258    srl     $26,    $15,    22       /* $26 <= PRIO >> 22            */
     259    andi    $26,    $26,    0x7C     /* $26 <= WTI_INDEX * 4         */
     260    add     $27,    $14,    $26      /* $27 <= &WTI_REG[WTI_INDEX]   */
     261    lw      $0,     ($27)            /* acknowledge XICU WTI         */
     262    la      $27,    _interrupt_vector
     263    addu    $26,    $26,    $27      /* $26 <= &ISR[WTI_INDEX]       */
     264    lw      $26,    ($26)            /* read ISR address             */
     265    jalr    $26                      /* call ISR                     */
     266    nop
     267    j       _int_restore             /* return from INT handler */
     268    nop
     269   
    250270/* The default ISR is called when no specific ISR has been installed */
    251271/* in the interrupt vector. It simply displays a message on TTY0     */
  • trunk/softs/giet_tsar/reset.S

    r622 r629  
    1010*
    1111* As we don't want to use the virtual memory, the physical address is
    12 * equal to  the virtual address (identity mapping) and all processors use
    13 * the physical memory bank in cluster 0. Both the reset base address and
    14 * the kernel base address can be redefined to use a physical memory bank
    15 * smaller than 4 Gbytes.
     12* equal to  the virtual address (identity mapping) and all processors stacks
     13* and code segments are allocated in the physical memory bank in cluster 0.
     14*
     15* Both the reset base address and the kernel base address must be redefined
     16* to use a physical memory bank smaller than 2 Gbytes.
    1617*
    1718* There is one XCU iand one MMC per cluster.
    18 * All other peripherals (including the boot ROM) are located in cluster 0.
    19 * Only two HWI interrupts are supported:
    20 * - IRQ_IN[0]      IOC
    21 * - IRQ_IN[12]     MMC
     19*
     20* There is one IOPIC component in cluster_io.
     21*
     22* There is two sets of peripherals:
     23*
     24* 1) A block device and a single channel TTY controller are available
     25*    in cluster(0,0).
     26*
     27* 2) Other peripherals (including another Blockdevice, a multi-channels TTY
     28*    contrÃŽler, a Frame buffer) are located in cluster_io.
     29*    For those externals peripherals, hardware interrupts (HWI) are translated
     30*    to software interrupts (WTI) by and IOPIC component, that is programmed
     31*    to route all SWI to to processor 0 in cluster (0,0).
    2232*
    2333* The boot sequence is the following:
     
    2535*   - Each processor initializes the CP0 EBASE register
    2636*       - Only processor 0 initializes the Interrupt vector.
     37*       - Only processor 0 initializes the IOPIC component.
    2738*   - Each processor initializes its private XCU mask.
    2839*       - Each processor initializes the Status Register (SR)
     
    3748        .extern seg_stack_base
    3849        .extern seg_xcu_base
     50        .extern seg_pic_base
    3951    .extern seg_kcode_base
    4052        .extern _interrupt_vector
    4153        .extern _ioc_isr
    4254        .extern _mmc_isr
     55    .extern _tty_isr
    4356    .extern main
    4457
     
    6275    la      $27,    seg_stack_base
    6376    addi    $26,    $10,    1               /* $26 <= (proc_id + 1)           */
    64     sll     $26,    $26,    16          /* $26 <= (proc_id + 1) * 64K     */
     77    sll     $26,    $26,    14          /* $26 <= (proc_id + 1) * 16K     */
    6578    addu    $29,    $27,    $26             /* $29 <= seg_stack_base(proc_id) */
    6679
     
    6982    mtc0    $26,    CP0_EBASE           /* CP0_EBASE <= seg_kcode_base */
    7083
    71 /* only proc (0,0,0) initializes interrupt vector */
     84/* only proc (0,0,0) initializes interrupt vector for IOC, TTY, MMC     */
    7285    bne     $10,    $0,    reset_xcu
    7386    nop
    7487
    75     la      $26,    _interrupt_vector   /* interrupt vector address */
     88    la      $26,    _interrupt_vector   /* interrupt vector address                */
     89    la      $27,    _mmc_isr
     90    sw      $27,    32($26)             /* interrupt_vector[8] <= _mmc_isr         */
    7691    la      $27,    _ioc_isr
    77     sw      $27,     0($26)             /* interrupt_vector[0] <= _isr_ioc */
    78     la      $27,    _mmc_isr
    79     sw      $27,     48($26)            /* interrupt_vector[12] <= _isr_mmc */
    80    
     92    sw      $27,    36($26)             /* interrupt_vector[9] <= _ioc_isr         */
     93    la      $27,    _tty_isr
     94    sw      $27,    40($26)             /* interrupt_vector[10] <= _tty_isr        */
     95
     96/* only proc (0,0,0) initializes IOPIC : IOPIC_ADDRESS[i] <= &XICU[0].WTI_REG[i]   */
     97
     98    li      $20,    X_SIZE
     99    addi    $20,    $20,    -1
     100    sll     $20,    $20,    4
     101    li      $21,    Y_SIZE
     102    add     $22,    $20,    $21         /* $22 <= cluster(X_SIZE-1, Y_SIZE)        */
     103
     104    mtc2    $22,    CP2_PADDR_EXT       /* CP2_PADDR_EXT <= cluster_io             */
     105
     106    li      $24,    16                  /* $24  iteration (de)counter              */
     107    la      $27,    seg_xcu_base        /* $27 <= &(XICU[0].WTI_REG[0])            */
     108    la      $26,    seg_pic_base        /* $26 <= &IOPIC_ADDRESS[0]                */
     109
     110reset_loop:
     111    sw      $27,    0($26)              /* IOPIC_ADDRESS[i] <= &XICU[0].WTI_REG[i] */
     112    addi    $24,    $24,    -1          /* decrement iteration index               */
     113    addi    $27,    $27,     4          /* $27 <= &(XICU[0].WTI_REG[i++]           */
     114    addi    $26,    $26,     16         /* $26 <= &IOPIC_ADDRESS[i++]              */
     115    bne     $24,    $0, reset_loop
     116    nop
     117
     118    mtc2    $0,     CP2_PADDR_EXT       /* CP2_PADDR_EXT <= zero                   */
     119   
    81120reset_xcu:
    82121
    83 /* only proc (x,y,0) receive IRQs and initialise its private XCU mask */
     122/* only proc (x,y,0) receive IRQs and initialise HWI and WTI XICU masks */
    84123    bne     $11,    $0,     reset_end
    85124    nop
    86125    la      $26,    seg_xcu_base
    87     li      $27,    0b010010000000      /* offset for MSK_HWI_ENABLE & lpid == 0 */
    88     addu    $24,    $26,    $27         /* $24 <= &MASK  */
    89     li      $25,    0x00001001              /* IOC: IRQ[0] / MEMC: IRQ[12] */
    90     sw      $25,    0($24)              /* set MASK */
     126    li      $27,    0b010010000000      /* offset for MSK_HWI_ENABLE[lpid == 0]    */
     127    addu    $24,    $26,    $27         /* $24 <= &HWI_MASK                        */
     128    li      $25,    0x0700                      /* TTY:HWI[10]  IOC:HWI[9]  MEMC:HWI[8]    */
     129    sw      $25,    0($24)              /* set HWI mask                            */
     130
     131    li      $27,    0b011010000000      /* offset for MSK_WTI_ENABLE[lpid == 0]    */
     132    addu    $24,    $26,    $27         /* $24 <= $WTI_MASK                        */
     133    li      $25,    0xFFFFFFFF          /* all WTI enabled                         */
     134    sw      $25,    0($24)              /* set WTI mask                            */
    91135
    92136reset_end:
  • trunk/softs/giet_tsar/stdio.c

    r626 r629  
    44// Date : janvier 2014
    55//
    6 // This file define varions functions that can be used by applications to access
     6// This file defines various functions that can be used by applications to access
    77// peripherals, for the TSAR multi-processors multi_clusters architecture.
    88// There is NO separation between application code and system code, as the
     
    2222// - NB_PROCS_MAX    : max number of processor per cluster
    2323// - NB_TTY_CHANNELS : max number of TTY channels
     24// - USE_EXT_IO      : use external peripherals if not zero
    2425//
    2526// The follobing base addresses must be defined in the ldscript
     
    3132#include "stdio.h"
    3233
     34#if !defined(NB_PROCS_MAX)
     35#error: you must define NB_PROCS_MAX in the hard_config.h file
     36#endif
     37
     38#if !defined(USE_EXT_IO)
     39#error: you must define USE_EXT_IO in the hard_config.h file
     40#endif
     41
     42#if !defined(X_SIZE)
     43#error: you must define X_SIZE in the hard_config.h file
     44#endif
     45
     46#if !defined(Y_SIZE)
     47#error: you must define Y_SIZE in the hard_config.h file
     48#endif
     49
     50#if !defined(X_WIDTH)
     51#error: you must define X_WIDTH in the hard_config.h file
     52#endif
     53
     54#if (X_WIDTH != 4)
     55#error: The X_WIDTH parameter must be equal to 4
     56#endif
     57
     58#if !defined(Y_WIDTH)
     59#error: you must define X_WIDTH in the hard_config.h file
     60#endif
     61
     62#if (X_WIDTH != 4)
     63#error: The Y_WIDTH parameter must be equal to 4
     64#endif
     65
     66#if !defined(NB_TTY_CHANNELS)
     67#error: you must define NB_TTY_CHANNELS in the hard_config.h file
     68#endif
     69
     70
     71
     72
    3373#define NB_LOCKS      256
    3474#define NB_BARRIERS   16
     
    74114
    75115////////////////////////////////////////////////////////////////////////////////////////
    76 // Taken from MutekH.
     116// Memcopy taken from MutekH.
    77117////////////////////////////////////////////////////////////////////////////////////////
    78118in_drivers void* _memcpy( void*        _dst,
     
    100140    return _dst;
    101141}
    102 
     142////////////////////////////////////////////////////////////////////////////////////////
     143// Memcopy using extended addresses
     144////////////////////////////////////////////////////////////////////////////////////////
     145in_drivers void  _extended_memcpy( unsigned int dst_cluster,
     146                                   unsigned int dst_address,
     147                                   unsigned int src_cluster,
     148                                   unsigned int src_address,
     149                                   unsigned int length )
     150{
     151    if ( (dst_address & 0x3) || (src_address & 0x3) || (length & 0x3) )
     152    {
     153        _tty_get_lock( 0 );
     154        _tty_puts( "ERROR in _extended_memcpy()" );
     155        _tty_release_lock( 0 );
     156        _exit();
     157    }
     158
     159    unsigned int i;
     160    unsigned int word;
     161
     162    for ( i = 0 ; i < length ; i = i+4 )
     163    {
     164        word = _word_extended_read( src_cluster, (src_address + i) );
     165        _word_extended_write( dst_cluster, (dst_address + i), word );
     166    }
     167}
    103168////////////////////////////////////////////////////////////////////////////////////////
    104169// Access CP0 and returns processor ident
     
    179244}
    180245
    181 ///////////////////////////////////////////////////////////////////////////////////////
    182 // Exit (suicide) after printing message on  a TTY terminal.
     246////////////////////////////////////////////////////////////////////////////
     247// This function makes a physical read access to a 32 bits word in memory,
     248// after a temporary paddr extension.
     249////////////////////////////////////////////////////////////////////////////
     250in_drivers unsigned int _word_extended_read( unsigned int  cluster,
     251                                             unsigned int  address )
     252{
     253    unsigned int value;
     254    asm volatile(
     255            "li      $3,        0xFFFFFFFE    \n"
     256            "mfc0    $2,        $12           \n"
     257            "and     $3,        $2, $3        \n"
     258            "mtc0    $3,        $12           \n"     /* IRQ disabled     */
     259
     260            "mtc2    %2,        $24           \n"     /* PADDR_EXT <= msb */   
     261            "lw      %0,        0(%1)         \n"     /* value <= *paddr  */
     262            "mtc2    $0,        $24           \n"     /* PADDR_EXT <= 0   */
     263
     264            "li      $3,        0x00000001    \n"
     265            "mfc0    $2,        $12           \n"
     266            "or      $3,        $3, $2        \n"
     267            "mtc0    $3,        $12           \n"     /* IRQ enabled      */
     268            : "=r" (value)
     269            : "r" (address), "r" (cluster)
     270            : "$2", "$3" );
     271    return value;
     272}
     273////////////////////////////////////////////////////////////////////////////
     274// This function makes a physical read access to a single byte in memory,
     275// after a temporary paddr extension.
     276////////////////////////////////////////////////////////////////////////////
     277in_drivers unsigned char _byte_extended_read( unsigned int  cluster,
     278                                              unsigned int  address )
     279{
     280    unsigned int value;
     281    asm volatile(
     282            "li      $3,        0xFFFFFFFE    \n"
     283            "mfc0    $2,        $12           \n"
     284            "and     $3,        $2, $3        \n"
     285            "mtc0    $3,        $12           \n"     /* IRQ disabled     */
     286
     287            "mtc2    %2,        $24           \n"     /* PADDR_EXT <= msb */   
     288            "lb      %0,        0(%1)         \n"     /* value <= *paddr  */
     289            "mtc2    $0,        $24           \n"     /* PADDR_EXT <= 0   */
     290
     291            "li      $3,        0x00000001    \n"
     292            "mfc0    $2,        $12           \n"
     293            "or      $3,        $3, $2        \n"
     294            "mtc0    $3,        $12           \n"     /* IRQ enabled      */
     295            : "=r" (value)
     296            : "r" (address), "r" (cluster)
     297            : "$2", "$3" );
     298    return (unsigned char)value;
     299}
     300////////////////////////////////////////////////////////////////////////////
     301// This function makes a physical write access to a 32 bits word in memory,
     302// after a temporary DTLB address extension.
     303////////////////////////////////////////////////////////////////////////////
     304in_drivers void _word_extended_write( unsigned int  cluster,
     305                                      unsigned int  address,
     306                                      unsigned int  word )
     307{
     308    asm volatile(
     309            "li      $3,        0xFFFFFFFE    \n"
     310            "mfc0    $2,        $12           \n"
     311            "and     $3,        $2, $3        \n"
     312            "mtc0    $3,        $12           \n"     /* IRQ disabled     */
     313
     314            "mtc2    %2,        $24           \n"     /* PADDR_EXT <= msb */   
     315            "sw      %0,        0(%1)         \n"     /* *paddr <= value  */
     316            "mtc2    $0,        $24           \n"     /* PADDR_EXT <= 0   */   
     317
     318            "li      $3,        0x00000001    \n"
     319            "mfc0    $2,        $12           \n"
     320            "or      $3,        $2, $3        \n"
     321            "mtc0    $3,        $12           \n"     /* IRQ enabled      */
     322            :
     323            : "r" (word), "r" (address), "r" (cluster)
     324            : "$2", "$3");
     325}
     326////////////////////////////////////////////////////////////////////////////
     327// This function makes a physical write access to single byte in memory,
     328// after a temporary DTLB de-activation and address extension.
     329////////////////////////////////////////////////////////////////////////////
     330in_drivers void _byte_extended_write( unsigned int  cluster,
     331                                      unsigned int  address,
     332                                      unsigned char byte )
     333{
     334    asm volatile(
     335            "li      $3,        0xFFFFFFFE    \n"
     336            "mfc0    $2,        $12           \n"
     337            "and     $3,        $2, $3        \n"
     338            "mtc0    $3,        $12           \n"     /* IRQ disabled     */
     339
     340            "mtc2    %2,        $24           \n"     /* PADDR_EXT <= msb */   
     341            "sb      %0,        0(%1)         \n"     /* *paddr <= value  */
     342            "mtc2    $0,        $24           \n"     /* PADDR_EXT <= 0   */   
     343
     344            "li      $3,        0x00000001    \n"
     345            "mfc0    $2,        $12           \n"
     346            "or      $3,        $2, $3        \n"
     347            "mtc0    $3,        $12           \n"     /* IRQ enabled      */
     348            :
     349            : "r" (byte), "r" (address), "r" (cluster)
     350            : "$2", "$3");
     351}
     352
     353///////////////////////////////////////////////////////////////////////////////////////
     354// Exit (suicide) after printing message on TTY0
    183355///////////////////////////////////////////////////////////////////////////////////////
    184356in_drivers void _exit()
     
    189361    unsigned int y       = (proc_id / NB_PROCS_MAX) & ((1<<Y_WIDTH) - 1);
    190362
    191     _tty_printf("\n\n!!!  Exit  Processor (%d,%d,%d)  !!!\n", x, y, l );
     363    _tty_get_lock( 0 );
     364    _tty_puts("\n !!! exit proc[");
     365    _tty_putd( x );
     366    _tty_puts(",");
     367    _tty_putd( y );
     368    _tty_puts(",");
     369    _tty_putd( l );
     370    _tty_puts("]  !!!\n");
     371    _tty_release_lock( 0 );
    192372
    193373    while(1) asm volatile("nop");   // infinite loop...
     
    227407///////////////////////////////////////////////////////////////////////////////////////
    228408//  The total number of TTY terminals is defined by NB_TTY_CHANNELS.
    229 //  1. If there is only one terminal, it is supposed to be shared, and used by
    230 //     all processors: a lock must be taken before display.
    231 //  2. If there is several terminals, and the number of processors is smaller
    232 //     than the number of terminals, there is one terminal per processor, but
    233 //     the TTY index is not equal to the proc_id, due to cluster indexing policy:
    234 //     - proc_id = cluster_xy * NB_PROCS_MAX + local_id (with cluster_xy = x << Y_WIDTH + y)
    235 //     - tty_id  = cluster_id * NB_PROCS_MAX + local_id (with cluster_id = x * Y_SIZE + y)
    236 //  3. If the computed tty_id is larger than NB_TTY_CHANNELS, an error is returned.
     409//  - If there is only one terminal, it is supposed to be shared, and used by
     410//    all processors: a lock must be taken before display.
     411//  - If there is several terminals, and the number of processors is smaller
     412//    than the number of terminals, there is one terminal per processor, but
     413//    the TTY index is not equal to the proc_id, due to cluster indexing policy:
     414//    proc_id = cluster_xy * NB_PROCS_MAX + local_id (with cluster_xy = x << Y_WIDTH + y)
     415//    tty_id  = cluster_id * NB_PROCS_MAX + local_id (with cluster_id = x * Y_SIZE + y)
     416//  - If the computed tty_id is larger than NB_TTY_CHANNELS, an error is returned.
     417///////////////////////////////////////////////////////////////////////////////////////
     418//  If USE_EXT_IO is set, we use the TTY controler implemented in cluster_io
     419//  (x = X_SIZE-1 / y = Y_SIZE), which requires and extended address access.
     420//  If USE_EXT_IO not set, we use the single channel TTY contrÃŽler in cluster (0,0).
     421///////////////////////////////////////////////////////////////////////////////////////
     422
    237423///////////////////////////////////////////////////////////////////////////////////////
    238424// Write one or several characters directly from a fixed length user buffer
    239425// to the TTY_WRITE register of the TTY controler.
     426// The channel index must be checked by the calling function.
    240427// This is a non blocking call : it test the TTY_STATUS register.
    241428// If the TTY_STATUS_WRITE bit is set, the transfer stops and the function
     
    246433                           unsigned int    channel )
    247434{
    248     char*           tty_address;
    249     unsigned int    base                = (unsigned int)&seg_tty_base;
    250     unsigned int    nwritten    = 0;
    251     int i;
    252 
    253     tty_address = (char*)(base + channel*TTY_SPAN*4);
     435    unsigned int    base       = (unsigned int)&seg_tty_base + channel*TTY_SPAN*4;
     436    unsigned int    nwritten   = 0;
     437    unsigned int    cluster_io = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE;
     438    unsigned int    status;
     439    unsigned int        i;
    254440
    255441    for ( i=0 ; i < length ; i++ )
    256442    {
    257         if((tty_address[TTY_STATUS*4] & 0x2) == 0x2)  break;
    258         else
    259         {
    260             tty_address[TTY_WRITE*4] = buffer[i]; // write character
    261             nwritten++;
     443        if( USE_EXT_IO )    // extended addressing to reach cluster_io
     444        {
     445            status = _word_extended_read( cluster_io, base + TTY_STATUS*4 );
     446            if ( (status & 0x2) == 0x2 ) break;
     447            else
     448            {
     449                _byte_extended_write( cluster_io, base + TTY_WRITE*4 , buffer[i] );
     450                nwritten++;
     451            }
     452        }
     453        else                // direct addressing to cluster(0,0)
     454        {
     455            char* tty = (char*)base;
     456            if ( (tty[TTY_STATUS*4] & 0x2) == 0x2 )  break;
     457            else
     458            {
     459                tty[TTY_WRITE*4] = buffer[i]; // write character
     460                nwritten++;
     461            }
    262462        }
    263463    }
     
    265465    return nwritten;
    266466}
     467
    267468///////////////////////////////////////////////////////////////////////////////////////
    268469// Fetch one character directly from the TTY_READ register of the TTY controler,
    269470// and writes this character to the user buffer.
     471// The channel index must be checked by the calling function.
    270472// This is a non blocking call : it returns 0 if the register is empty,
    271473// and returns 1 if the register is full.
     
    274476                          unsigned int   channel )
    275477{
    276     char*           tty_address;
    277     unsigned int    base                = (unsigned int)&seg_tty_base;
    278 
    279     tty_address = (char*)(base + channel*TTY_SPAN*4);
    280 
    281     if((tty_address[TTY_STATUS*4] & 0x1) == 0x1)
    282     {
    283         buffer[0] = tty_address[TTY_READ*4];
    284         return 1;
     478    unsigned int    base       = (unsigned int)&seg_tty_base + channel*TTY_SPAN*4;
     479    unsigned int    cluster_io = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE;
     480    unsigned int    status;
     481
     482    if( USE_EXT_IO )
     483    {
     484        status = _word_extended_read( cluster_io, base + TTY_STATUS*4 );
     485        if ( (status & 0x1) == 0x1 )
     486        {
     487            buffer[0] = (char)_word_extended_read( cluster_io, base + TTY_READ*4 );
     488            return 1;
     489        }
     490        else
     491        {
     492            return 0;
     493        }
    285494    }
    286495    else
    287496    {
    288         return 0;
    289     }
    290 }
     497        char* tty = (char*)base;
     498
     499        if((tty[TTY_STATUS*4] & 0x1) == 0x1)
     500        {
     501            buffer[0] = tty[TTY_READ*4];
     502            return 1;
     503        }
     504        else
     505        {
     506            return 0;
     507        }
     508    }
     509}
     510
    291511//////////////////////////////////////////////////////////////////////////////
    292512// This function displays a string on TTY0.
     
    356576in_drivers void _tty_get_lock( unsigned int channel )
    357577{
    358     unsigned int* tty_address = (unsigned int *) &seg_tty_base;
    359     while ( tty_address[channel * TTY_SPAN + TTY_CONFIG] ) asm volatile("nop");
     578    if ( USE_EXT_IO )  // extended addressing to cluster_io
     579    {
     580        unsigned int    cluster_io = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE;
     581        unsigned int    address    = (unsigned int)&seg_tty_base
     582                                     + ((TTY_CONFIG + channel*TTY_SPAN)*4);
     583        while ( _word_extended_read( cluster_io, address ) ) asm volatile("nop");
     584    }
     585    else               // direct addressing to cluster(0,0)
     586    {
     587        unsigned int* tty = (unsigned int *) &seg_tty_base;
     588        while ( tty[channel * TTY_SPAN + TTY_CONFIG] ) asm volatile("nop");
     589    }
    360590}
    361591
     
    366596in_drivers void _tty_release_lock( unsigned int channel )
    367597{
    368     unsigned int* tty_address = (unsigned int *) &seg_tty_base;
    369     tty_address[channel * TTY_SPAN + TTY_CONFIG] = 0;
     598    if ( USE_EXT_IO )  // extended addressing to cluster_io
     599    {
     600        unsigned int    cluster_io = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE;
     601        unsigned int    address    = (unsigned int)&seg_tty_base
     602                                     + ((TTY_CONFIG + channel*TTY_SPAN)*4);
     603        _word_extended_write( cluster_io, address, 0 );
     604    }
     605    else               // direct addressing to cluster(0,0)
     606    {
     607        unsigned int* tty_address = (unsigned int *) &seg_tty_base;
     608        tty_address[channel * TTY_SPAN + TTY_CONFIG] = 0;
     609    }
    370610}
    371611
     
    383623    unsigned int y;
    384624
    385     // compute TTY terminal index
    386     if ( NB_TTY_CHANNELS == 1 )
    387     {
    388         channel = 0;
    389     }
    390     else
    391     {
    392         l           = (proc_id % NB_PROCS_MAX);
    393         x           = (proc_id / NB_PROCS_MAX) >> Y_WIDTH;
    394         y           = (proc_id / NB_PROCS_MAX) & ((1<<Y_WIDTH) - 1);
    395         channel = (x * Y_SIZE + y) * NB_PROCS_MAX + l;
    396         if (channel >= NB_TTY_CHANNELS )
    397         {
    398             _tty_get_lock( 0 );
    399             _tty_puts( "ERROR in _tty_getc()\n" );
    400             _tty_release_lock( 0 );
    401             _exit();
    402         }
     625    // check TTY channel
     626    l           = (proc_id % NB_PROCS_MAX);
     627    x           = (proc_id / NB_PROCS_MAX) >> Y_WIDTH;
     628    y           = (proc_id / NB_PROCS_MAX) & ((1<<Y_WIDTH) - 1);
     629    channel = (x * Y_SIZE + y) * NB_PROCS_MAX + l;
     630    if (channel >= NB_TTY_CHANNELS )
     631    {
     632        _tty_get_lock( 0 );
     633        _tty_puts( "ERROR in _tty_getc(): TTY index too large\n" );
     634        _tty_release_lock( 0 );
     635        _exit();
    403636    }
    404637
     
    433666    unsigned int  i;
    434667    unsigned int  channel;
    435     unsigned int  l;
    436668    unsigned int  x;
    437669    unsigned int  y;
    438 
    439     // compute TTY terminal index
    440     if ( NB_TTY_CHANNELS == 1 )
    441     {
    442         channel = 0;
    443     }
    444     else
    445     {
    446         l           = (proc_id % NB_PROCS_MAX);
    447         x           = (proc_id / NB_PROCS_MAX) >> Y_WIDTH;
    448         y           = (proc_id / NB_PROCS_MAX) & ((1<<Y_WIDTH) - 1);
    449         channel = (x * Y_SIZE + y) * NB_PROCS_MAX + l;
    450         if (channel >= NB_TTY_CHANNELS )   
    451         {
    452             _tty_get_lock( 0 );
    453             _tty_puts( "ERROR in _tty_getw()\n" );
    454             _tty_release_lock( 0 );
    455             _exit();
    456         }
     670    unsigned int  l;
     671
     672    // check TTY channel
     673    l           = (proc_id % NB_PROCS_MAX);
     674    x           = (proc_id / NB_PROCS_MAX) >> Y_WIDTH;
     675    y           = (proc_id / NB_PROCS_MAX) & ((1<<Y_WIDTH) - 1);
     676    channel = (x * Y_SIZE + y) * NB_PROCS_MAX + l;
     677    if (channel >= NB_TTY_CHANNELS )
     678    {
     679        _tty_get_lock( 0 );
     680        _tty_puts( "ERROR in _tty_getw(): TTY index too large\n" );
     681        _tty_release_lock( 0 );
     682        _exit();
    457683    }
    458684
     
    519745
    520746    unsigned int channel;
    521     unsigned int l;
    522747    unsigned int x;
    523748    unsigned int y;
    524749    unsigned int proc_id = _procid();
    525750
    526     // compute TTY channel
    527     if ( NB_TTY_CHANNELS == 1 )
     751    // compute TTY channel :
     752    // if the number of TTY channels is smaller
     753    // than the number of clusters, use TTY_0_0
     754    // else, TTY channel <= cluster index
     755    if ( NB_TTY_CHANNELS < (X_SIZE * Y_SIZE) )
    528756    {
    529757        channel = 0;
     
    531759    else
    532760    {
    533         l           = (proc_id % NB_PROCS_MAX);
    534761        x           = (proc_id / NB_PROCS_MAX) >> Y_WIDTH;
    535762        y           = (proc_id / NB_PROCS_MAX) & ((1<<Y_WIDTH) - 1);
    536         channel = (x * Y_SIZE + y) * NB_PROCS_MAX + l;
    537         if (channel >= NB_TTY_CHANNELS )
    538         {
    539             _tty_get_lock( 0 );
    540             _tty_puts("ERROR in _tty_printf() for proc[" );
    541             _tty_putd( x );
    542             _tty_puts(",");
    543             _tty_putd( y );
    544             _tty_puts(",");
    545             _tty_putd( l );
    546             _tty_puts("] / TTY channel too large = ");
    547             _tty_putd( channel );
    548             _tty_puts("\n");
    549             _tty_release_lock( 0 );
    550             _exit();
    551         }
     763        channel     = (x * Y_SIZE + y);
    552764    }
    553765
     
    640852//////////////////////////////////////////////////////////////////////////////////////
    641853//  These functions are the ISRs that must be executed when an IRQ is activated
    642 //  by the TTY: _tty_isr_X is associated to channel [X].
    643 //  It save the character in the communication buffer _tty_get_buf[X],
    644 //  and set the set/reset variable _tty_get_full[X].
     854//  by the TTY: _tty_isr_XX is associated to TTY channel [XX].
     855//  It save the character in the communication buffer _tty_get_buf[XX],
     856//  and set the set/reset variable _tty_get_full[XX].
    645857//  A character is lost if the buffer is full when the ISR is executed.
    646858//////////////////////////////////////////////////////////////////////////////////////
    647859in_drivers void _tty_isr_indexed(size_t index)
    648860{
    649     char*   base = (char*)&seg_tty_base;
    650     char*   tty_address = (char*)(base + index*TTY_SPAN*4);
    651 
    652     _tty_get_buf[index]  = tty_address[TTY_READ*4];     // save character and reset IRQ
    653     _tty_get_full[index] = 1;                       // signals character available
    654 }
     861    if ( USE_EXT_IO )   // extended addressing to TTY in cluster_io
     862    {
     863        unsigned int  cluster = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE;
     864        unsigned int  base    = (unsigned int)&seg_tty_base +
     865                                ((index*TTY_SPAN + TTY_READ)*4);
     866
     867        _tty_get_buf[index] = (char)_word_extended_read( cluster, base );
     868    }
     869    else                // direct addressing to TTY in cluster(0,0)
     870    {
     871        char* tty = (char*)&seg_tty_base + index*TTY_SPAN*4;
     872
     873        _tty_get_buf[index] = tty[TTY_READ*4];  // save character and reset IRQ
     874    }
     875    _tty_get_full[index] = 1;               // signals character available
     876}
     877
     878in_drivers void _tty_isr()    { _tty_isr_indexed(0); }
    655879
    656880in_drivers void _tty_isr_00() { _tty_isr_indexed(0); }
     
    689913
    690914//////////////////////////////////////////////////////////////////////////////////////////
    691 //  I/O BLOCK_DEVICE
    692 // The three functions below use the three variables _ioc_lock _ioc_done,
     915//   BLOCK_DEVICE (IOC)
     916//////////////////////////////////////////////////////////////////////////////////////////
     917// The functions below use the three variables _ioc_lock _ioc_done,
    693918// and _ioc_status for synchronisation.
    694919// - As the IOC component can be used by several programs running in parallel,
     
    705930// reset the _ioc_done variable to zero, and releases the _ioc_lock variable.
    706931///////////////////////////////////////////////////////////////////////////////////////
     932//  If USE_EXT_IO is set, we use the IOC controler implemented in cluster_io
     933//  (x = X_SIZE-1 / y = Y_SIZE), which requires and extended address access.
     934//  If USE_EXT_IO not set, we use the IOC contrÃŽler in cluster (0,0).
     935///////////////////////////////////////////////////////////////////////////////////////
     936
     937///////////////////////////////////////////////////////////////////////////////////////
    707938// This blocking function is used by the _ioc_read() and _ioc_write() functions
    708939// to get _ioc_lock using LL/SC.
     
    720951                  ::"r"(plock):"$2","$3");
    721952}
     953
    722954//////////////////////////////////////////////////////////////////////////////////////
    723955// Transfer data from a memory buffer to the block_device.
     
    725957// - buffer : base address of the memory buffer
    726958// - count  : number of blocks to be transfered
    727 // The source buffer must be in user address space.
     959// - ext    : cluster index for the memory buffer
    728960///////////////////////////////////////////////////////////////////////////////////////
    729961in_drivers void _ioc_write( size_t   lba,
     
    732964                            size_t   ext )
    733965{
    734     volatile unsigned int*      ioc_address = (unsigned int*)&seg_ioc_base;
    735 
    736966    // get the lock
    737967    _ioc_get_lock();
    738968
    739     // block_device configuration
    740     ioc_address[BLOCK_DEVICE_BUFFER]     = (unsigned int)buffer;
    741     ioc_address[BLOCK_DEVICE_BUFFER_EXT] = ext;
    742     ioc_address[BLOCK_DEVICE_COUNT]      = count;
    743     ioc_address[BLOCK_DEVICE_LBA]        = lba;
    744     ioc_address[BLOCK_DEVICE_IRQ_ENABLE] = 1;
    745     ioc_address[BLOCK_DEVICE_OP]         = BLOCK_DEVICE_WRITE;
    746 }
     969    if ( USE_EXT_IO )   // extended addressing to cluster_io
     970    {
     971        unsigned int    cluster = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE;
     972        unsigned int    base    = (unsigned int)&seg_ioc_base;
     973
     974        _word_extended_write( cluster, base + BLOCK_DEVICE_BUFFER*4,     (unsigned int)buffer );
     975        _word_extended_write( cluster, base + BLOCK_DEVICE_BUFFER_EXT*4, ext );
     976        _word_extended_write( cluster, base + BLOCK_DEVICE_COUNT*4,      count );
     977        _word_extended_write( cluster, base + BLOCK_DEVICE_LBA*4,        lba );
     978        _word_extended_write( cluster, base + BLOCK_DEVICE_IRQ_ENABLE*4, 1 );
     979        _word_extended_write( cluster, base + BLOCK_DEVICE_OP*4,         BLOCK_DEVICE_WRITE );
     980    }
     981    else                // direct addressing to cluster(0,0)
     982    {
     983        unsigned int* ioc = (unsigned int*)&seg_ioc_base;
     984
     985        ioc[BLOCK_DEVICE_BUFFER]     = (unsigned int)buffer;
     986        ioc[BLOCK_DEVICE_BUFFER_EXT] = ext;
     987        ioc[BLOCK_DEVICE_COUNT]      = count;
     988        ioc[BLOCK_DEVICE_LBA]        = lba;
     989        ioc[BLOCK_DEVICE_IRQ_ENABLE] = 1;
     990        ioc[BLOCK_DEVICE_OP]         = BLOCK_DEVICE_WRITE;
     991    }
     992}
     993
    747994///////////////////////////////////////////////////////////////////////////////////////
    748995// Transfer data from a file on the block device to a memory buffer.
     
    750997// - buffer : base address of the memory buffer
    751998// - count  : number of blocks to be transfered
    752 // The destination buffer must be in user address space.
    753 // All cache lines corresponding to the the target buffer must be invalidated
    754 // for cache coherence.
     999// - ext    : cluster index for the memory buffer
    7551000///////////////////////////////////////////////////////////////////////////////////////
    7561001in_drivers void _ioc_read( size_t   lba,
     
    7591004                           size_t   ext )
    7601005{
    761     volatile unsigned int*      ioc_address = (unsigned int*)&seg_ioc_base;
    762 
    7631006    // get the lock
    7641007    _ioc_get_lock();
    7651008
    766     // block_device configuration
    767     ioc_address[BLOCK_DEVICE_BUFFER]     = (unsigned int)buffer;
    768     ioc_address[BLOCK_DEVICE_BUFFER_EXT] = ext;
    769     ioc_address[BLOCK_DEVICE_COUNT]      = count;
    770     ioc_address[BLOCK_DEVICE_LBA]        = lba;
    771     ioc_address[BLOCK_DEVICE_IRQ_ENABLE] = 1;
    772     ioc_address[BLOCK_DEVICE_OP]         = BLOCK_DEVICE_READ;
    773 }
     1009    if ( USE_EXT_IO )   // extended addressing to cluster_io
     1010    {
     1011        unsigned int    cluster = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE;
     1012        unsigned int    base    = (unsigned int)&seg_ioc_base;
     1013
     1014        _word_extended_write( cluster, base + BLOCK_DEVICE_BUFFER*4,     (unsigned int)buffer );
     1015        _word_extended_write( cluster, base + BLOCK_DEVICE_BUFFER_EXT*4, ext );
     1016        _word_extended_write( cluster, base + BLOCK_DEVICE_COUNT*4,      count );
     1017        _word_extended_write( cluster, base + BLOCK_DEVICE_LBA*4,        lba );
     1018        _word_extended_write( cluster, base + BLOCK_DEVICE_IRQ_ENABLE*4, 1 );
     1019        _word_extended_write( cluster, base + BLOCK_DEVICE_OP*4,         BLOCK_DEVICE_READ );
     1020    }
     1021    else                // direct addressing to cluster(0,0)
     1022    {
     1023        unsigned int* ioc = (unsigned int*)&seg_ioc_base;
     1024
     1025        ioc[BLOCK_DEVICE_BUFFER]     = (unsigned int)buffer;
     1026        ioc[BLOCK_DEVICE_BUFFER_EXT] = ext;
     1027        ioc[BLOCK_DEVICE_COUNT]      = count;
     1028        ioc[BLOCK_DEVICE_LBA]        = lba;
     1029        ioc[BLOCK_DEVICE_IRQ_ENABLE] = 1;
     1030        ioc[BLOCK_DEVICE_OP]         = BLOCK_DEVICE_READ;
     1031    }
     1032}
     1033
    7741034///////////////////////////////////////////////////////////////////////////////////////
    7751035// This blocking function cheks completion of an I/O transfer and reports errors.
     
    7951055    }
    7961056}
     1057
    7971058//////////////////////////////////////////////////////////////////////////////////////
    7981059//  This ISR must be executed when an IRQ is activated by IOC to signal completion.
     
    8031064in_drivers void _ioc_isr()
    8041065{
    805     int* ioc_address = (int*)&seg_ioc_base;
     1066    if ( USE_EXT_IO )  // extended addressing to cluster_io
     1067    {
     1068        unsigned int    cluster = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE;
     1069        unsigned int    base    = (unsigned int)&seg_ioc_base;
     1070
     1071        _ioc_status = _word_extended_read( cluster, base + BLOCK_DEVICE_STATUS*4 );
     1072    }
     1073    else               // direct addressing to cluster(à,0)
     1074    {
     1075        unsigned int* ioc = (unsigned int*)&seg_ioc_base;
    8061076   
    807     _ioc_status = ioc_address[BLOCK_DEVICE_STATUS];     // save status & reset IRQ
    808     _ioc_done   = 1;                                                // signals completion
     1077        _ioc_status = ioc[BLOCK_DEVICE_STATUS]; // save status & reset IRQ
     1078    }
     1079    _ioc_done   = 1;       // signals completion
    8091080}
    8101081
     
    8251096//////////////////////////////////////////////////////////////////////////////////////
    8261097//  FRAME_BUFFER
     1098//////////////////////////////////////////////////////////////////////////////////////
    8271099// The _fb_sync_write & _fb_sync_read functions use a memcpy strategy to implement
    8281100// the transfer between a data buffer and the frame buffer.
    8291101// They are blocking until completion of the transfer.
    8301102//////////////////////////////////////////////////////////////////////////////////////
     1103
     1104//////////////////////////////////////////////////////////////////////////////////////
    8311105//  _fb_sync_write()
    8321106// Transfer data from an user buffer to the frame_buffer device with a memcpy.
    833 // - offset     : offset (in bytes) in the frame buffer
     1107// - offset : offset (in bytes) in the frame buffer
    8341108// - buffer : base address of the memory buffer
    8351109// - length : number of bytes to be transfered
    836 //////////////////////////////////////////////////////////////////////////////////////
    837 in_drivers void _fb_sync_write( size_t  offset,
    838                                 void*   buffer,
    839                                 size_t  length,
    840                                 size_t  ext )
    841 {
    842     volatile char*  fb = (char*)(void*)&seg_fbf_base + offset;
    843     char*       ub = buffer;
    844 
    845     _memcpy( (void*)fb, (void*)ub, length );
    846 }
     1110// - ext    : cluster_xy for the user buffer
     1111//////////////////////////////////////////////////////////////////////////////////////
     1112in_drivers void _fb_sync_write( unsigned int  offset,
     1113                                unsigned int  buffer,
     1114                                unsigned int  length,
     1115                                unsigned int  ext )
     1116{
     1117    unsigned int  src_address = buffer;
     1118    unsigned int  src_cluster = ext;
     1119    unsigned int  dst_address = (unsigned int)&seg_fbf_base + offset;
     1120    unsigned int  dst_cluster = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE;  // cluster_xy for I/O
     1121
     1122    _extended_memcpy( dst_cluster,
     1123                      dst_address,
     1124                      src_cluster,
     1125                      src_address,
     1126                      length );
     1127}
     1128
    8471129///////////////////////////////////////////////////////////////////////////////////////
    8481130//  _fb_sync_read()
    8491131// Transfer data from the frame_buffer device to an user buffer with a memcpy.
    850 // - offset     : offset (in bytes) in the frame buffer
     1132// - offset : offset (in bytes) in the frame buffer
    8511133// - buffer : base address of the memory buffer
    8521134// - length : number of bytes to be transfered
    853 //////////////////////////////////////////////////////////////////////////////////////
    854 in_drivers void  _fb_sync_read( size_t  offset,
    855                                 void*   buffer,
    856                                 size_t  length,
    857                                 size_t  ext )
    858 {
    859     volatile char*  fb = (char*)(void*)&seg_fbf_base + offset;
    860     char*       ub = buffer;
    861 
    862     _memcpy( (void*)ub, (void*)fb, length );
     1135// - ext    : cluster_xy for the user buffer
     1136//////////////////////////////////////////////////////////////////////////////////////
     1137in_drivers void  _fb_sync_read( unsigned int  offset,
     1138                                unsigned int  buffer,
     1139                                unsigned int  length,
     1140                                unsigned int  ext )
     1141{
     1142    unsigned int  dst_address = buffer;
     1143    unsigned int  dst_cluster = ext;
     1144    unsigned int  src_address = (unsigned int)&seg_fbf_base + offset;
     1145    unsigned int  src_cluster = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE;  // cluster_xy for I/O
     1146
     1147    _extended_memcpy( dst_cluster,
     1148                      dst_address,
     1149                      src_cluster,
     1150                      src_address,
     1151                      length );
    8631152}
    8641153
     
    8791168    _spin_lock[index] = 0;
    8801169}
     1170
    8811171///////////////////////////////////////////////////////////////////////////////////////
    8821172// Try to take a software spin-lock.
     
    9541244                  ::"r"(pinit),"r"(pcount),"r"(plock),"r"(value):"$2","$3");
    9551245}
     1246
    9561247//////////////////////////////////////////////////////////////////////////////////////
    9571248// This blocking function uses a busy_wait technics (on the barrier_lock value),
  • trunk/softs/giet_tsar/stdio.h

    r626 r629  
    5959void*           _memcpy( void* dst, const void* src, size_t size );
    6060
     61void            _extended_memcpy( unsigned int dst_cluster,
     62                                  unsigned int dst_address,
     63                                  unsigned int src_cluster,
     64                                  unsigned int src_address,
     65                                  unsigned int length );
    6166unsigned int    _procid();
    6267unsigned int    _proctime();
     
    8590void            _tty_getw( unsigned int* buffer );
    8691void            _tty_printf( char* format, ... );
     92void            _tty_isr();
    8793
    8894void            _ioc_get_lock();
     
    94100void            _mmc_isr();
    95101
    96 void            _fb_sync_write( size_t offset, void* buffer, size_t length, size_t ext );
    97 void            _fb_sync_read( size_t offset, void* buffer, size_t length, size_t ext );
     102void            _fb_sync_write( unsigned int offset,
     103                                unsigned int buffer,
     104                                unsigned int length,
     105                                unsigned int ext );
     106void            _fb_sync_read(  unsigned int offset,
     107                                unsigned int buffer,
     108                                unsigned int length,
     109                                unsigned int ext );
    98110
    99111void            _release_lock( size_t lock_index );
     
    103115void            _barrier_wait(size_t index);
    104116
     117unsigned char   _byte_extended_read(  unsigned int   cluster,
     118                                      unsigned int   address );
     119unsigned int    _word_extended_read(  unsigned int   cluster,
     120                                      unsigned int   address );
     121void            _word_extended_write( unsigned int   cluster,
     122                                      unsigned int   address,
     123                                      unsigned int   word );
     124void            _byte_extended_write( unsigned int   cluster,
     125                                      unsigned int   address,
     126                                      unsigned char  byte );
    105127#endif
    106128
  • trunk/softs/soft_hello_giet/hard_config.h

    r623 r629  
    1212#define  Y_WIDTH             4
    1313
    14 #define  NB_PROCS_MAX        2
     14#define  NB_PROCS_MAX        4
     15
     16#define  USE_EXT_IO          1
    1517
    1618#define  NB_DMA_CHANNELS     0
    17 #define  NB_TTY_CHANNELS     (NB_PROCS_MAX * X_SIZE * Y_SIZE)
    1819#define  NB_HBA_CHANNELS     0
    1920#define  NB_NIC_CHANNELS     0
    2021#define  NB_CMA_CHANNELS     0
    2122
     23#define  NB_TTY_CHANNELS     4
    2224
    2325#endif //_HARD_CONFIG_H
  • trunk/softs/soft_hello_giet/ldscript

    r623 r629  
    2323
    2424seg_xcu_base    = 0xF0000000;       /* controler XCU */
    25 seg_tty_base    = 0xF2000000;       /* controler TTY */
     25seg_tty_base    = 0xF4000000;       /* controler TTY */
    2626seg_fbf_base    = 0xF3000000;       /* controler FBF */
    27 seg_ioc_base    = 0xF4000000;       /* controler IOC */
    28 seg_mmc_base    = 0xFF000000;       /* config    MMC */
     27seg_ioc_base    = 0xF2000000;       /* controler IOC */
     28seg_nic_base    = 0xF7000000;       /* controler NIC */
     29seg_cma_base    = 0xF8000000;       /* controler CMA */
     30seg_pic_base    = 0xF9000000;       /* controler PIC */
     31seg_mmc_base    = 0xE0000000;       /* config    MMC */
    2932
    3033
  • trunk/softs/soft_sort_giet/hard_config.h

    r626 r629  
    1414#define  NB_PROCS_MAX        4
    1515
     16#define  USE_EXT_IO          1
     17
    1618#define  NB_DMA_CHANNELS     0
    17 #define  NB_TTY_CHANNELS     (NB_PROCS_MAX * X_SIZE * Y_SIZE)
    1819#define  NB_HBA_CHANNELS     0
    1920#define  NB_NIC_CHANNELS     0
    2021#define  NB_CMA_CHANNELS     0
    2122
     23#define  NB_TTY_CHANNELS     4
    2224
    2325#endif //_HARD_CONFIG_H
  • trunk/softs/soft_sort_giet/ldscript

    r626 r629  
    11/**********************************************************
    22        File : ldscript
    3         Author : Cesar Fuguet
     3        Author : Alain Greiner
    44        Date : January 2014
    55**********************************************************/
     
    2323
    2424seg_xcu_base    = 0xF0000000;       /* controler XCU */
    25 seg_tty_base    = 0xF2000000;       /* controler TTY */
     25seg_tty_base    = 0xF4000000;       /* controler TTY */
    2626seg_fbf_base    = 0xF3000000;       /* controler FBF */
    27 seg_ioc_base    = 0xF4000000;       /* controler IOC */
    28 seg_mmc_base    = 0xFF000000;       /* config    MMC */
     27seg_ioc_base    = 0xF2000000;       /* controler IOC */
     28seg_nic_base    = 0xF7000000;       /* controler NIC */
     29seg_cma_base    = 0xF8000000;       /* controler CMA */
     30seg_pic_base    = 0xF9000000;       /* controler PIC */
     31seg_mmc_base    = 0xE0000000;       /* config    MMC */
    2932
    3033
  • trunk/softs/soft_sort_giet/main.c

    r626 r629  
    5757    /* Hello World */
    5858
    59     task0_printf("\n[ PROC %d\t] Starting SORT application\n", proc_id);
    60 
    61     task0_printf("[ PROC %d\t] MESH %d x %d x %d processors\n",
    62                  proc_id, X_SIZE, Y_SIZE, NB_PROCS_MAX);
    63 
    64     /**************************************************************************/
    65     /* Barriers Inititialitatin */
     59    task0_printf("\n[ PROC_%d_%d_%d ] Starting SORT application\n",x,y,lid);
     60
     61    task0_printf("[ PROC_%d_%d_%d ] MESH %d x %d x %d processors\n",
     62                 x,y,lid, X_SIZE, Y_SIZE, NB_PROCS_MAX);
     63
     64    /**************************************************************************/
     65    /* Barriers Initialisation */
    6666
    6767    if (thread_id == 0)
     
    6969        for (i = 0; i < __builtin_ctz(total_procs); i++)
    7070        {
    71             printf("[ PROC %d\t] Initializing barrier %d with %d\n",
    72                 proc_id, i, total_procs >> i);
     71            printf("[ PROC_%d_%d_%d ] Initializing barrier %d with %d\n",
     72                x,y,lid, i, total_procs >> i);
    7373
    7474            _barrier_init(i, total_procs >> i);
    7575        }
    76 
     76        printf("\n");
    7777        asm volatile ("sync");
    7878        init_ok = 1;
     
    8080
    8181    /**************************************************************************/
    82     /* Array Inititialitatin */
     82    /* Array Initialisation */
    8383
    8484    for (i = IPP * thread_id; i < IPP * (thread_id + 1); i++)
     
    9393    /* Parallel sorting of array pieces */
    9494
    95     printf("[ PROC %d\t] Stage 0: Processor Sorting...\n\r", proc_id);
     95    printf("[ PROC_%d_%d_%d ] Stage 0: Starting...\n\r", x,y,lid);
    9696    bubbleSort(array0, IPP, IPP * thread_id);
    97     printf("[ PROC %d\t] Stage 0: Finishing...\n\r", proc_id);
     97    printf("[ PROC_%d_%d_%d ] Stage 0: Finishing...\n\r", x,y,lid);
    9898
    9999    for (i = 0; i < __builtin_ctz(total_procs); i++)
     
    104104        if((thread_id % (2 << i)) != 0) _exit();
    105105
    106         printf("[ PROC %d\t] Stage %d: Starting...\n\r", proc_id, i+1);
     106        printf("[ PROC_%d_%d_%d ] Stage %d: Starting...\n\r", x,y,lid, i+1);
    107107
    108108        if((i % 2) == 0)
     
    124124                );
    125125
    126         printf("[ PROC %d\t] Stage %d: Finishing...\n\r", proc_id, i+1);
     126        printf("[ PROC_%d_%d_%d ] Stage %d: Finishing...\n\r", x,y,lid, i+1);
    127127    }
    128128
     
    147147        if (success)
    148148        {
    149             printf("[ PROC %d\t] Success!!\n\r", proc_id);
     149            printf("[ PROC_%d_%d_%d ] Success!!\n\r", x,y,lid);
    150150        }
    151151        else
  • trunk/softs/soft_transpose_giet/ldscript

    r623 r629  
    1010peripherals are not present in the architecture */
    1111
    12 seg_reset_base  = 0x10000000;       /* le code de boot */
     12seg_reset_base  = 0x00000000;       /* boot code */
    1313
    14 seg_kcode_base  = 0x00001000;       /* le code du système */
    15 seg_kdata_base  = 0x00010000;       /* les donnees du système */
    16 seg_kunc_base   = 0x00020000;       /* les données non cachées du système */
     14seg_kcode_base  = 0x00010000;       /* kernel code */
     15seg_kdata_base  = 0x00020000;       /* kernel cacheable data */
     16seg_kunc_base   = 0x00030000;       /* kernel uncacheable data */
    1717
    18 seg_code_base   = 0x00030000;       /* le code utilisateur */
    19 seg_data_base   = 0x00040000;       /* les données utilisateur */
     18seg_code_base   = 0x00040000;       /* application code */
     19seg_data_base   = 0x00050000;       /* application data */
    2020
    21 seg_heap_base   = 0x00100000;       /* le tas utilisateur */
    22 seg_stack_base  = 0x00400000;       /* la pile utilisateur */
     21seg_heap_base   = 0x00100000;       /* heaps for application tasks */
     22seg_stack_base  = 0x00300000;       /* stacks */
    2323
    24 seg_xcu_base    = 0xF0000000;       /* controleur XCU */
    25 seg_dma_base    = 0xF1000000;       /* controleur DMA */
    26 seg_tty_base    = 0xF2000000;       /* controleur TTY */
    27 seg_fbf_base    = 0xF3000000;       /* controleur FBF */
    28 seg_ioc_base    = 0xF4000000;       /* controleur IOC */
     24seg_xcu_base    = 0xF0000000;       /* controler XCU */
     25seg_tty_base    = 0xF4000000;       /* controler TTY */
     26seg_fbf_base    = 0xF3000000;       /* controler FBF */
     27seg_ioc_base    = 0xF2000000;       /* controler IOC */
     28seg_nic_base    = 0xF7000000;       /* controler NIC */
     29seg_cma_base    = 0xF8000000;       /* controler CMA */
     30seg_pic_base    = 0xF9000000;       /* controler PIC */
     31seg_mmc_base    = 0xE0000000;       /* config    MMC */
    2932
    3033
  • trunk/softs/soft_transpose_giet/main.c

    r248 r629  
    11
     2#include "hard_config.h"
    23#include "stdio.h"
    34#include "limits.h"
    45#include "../giet_tsar/block_device.h"
    56
    6 #define NL              512
    7 #define NP              512
    8 #define NB_IMAGES       1
    9 #define NB_CLUSTER_MAX  256
    10 
    11 #define PRINTF(...)      ({ if (proc_id == 0) { tty_printf(__VA_ARGS__); } })
    12 
    13 //#define DISPLAY_ONLY
    14 
    15 ///////////////////////////////////////////
     7#define NL              128
     8#define NP              128
     9#define NB_IMAGES       5
     10
     11#define PRINTF(...)      ({ if (lpid == 0) { _tty_printf(__VA_ARGS__); } })
     12
     13#define DISPLAY_OK
     14
    1615// tricks to read parameters from ldscript
    17 ///////////////////////////////////////////
    18 
    19 struct plaf;
    20 
    21 extern struct plouf seg_ioc_base;
     16extern struct plaf seg_ioc_base;
    2217extern struct plaf seg_heap_base;
    23 extern struct plaf NB_PROCS;
    24 extern struct plaf NB_CLUSTERS;
     18
     19// global variables stored in seg_data (cluster 0)
     20
     21// instrumentation counters for each processor
     22unsigned int LOAD_START[256][4];
     23unsigned int LOAD_END  [256][4];
     24unsigned int TRSP_START[256][4];
     25unsigned int TRSP_END  [256][4];
     26unsigned int DISP_START[256][4];
     27unsigned int DISP_END  [256][4];
     28
     29// checksum variables
     30unsigned check_line_before[NL];
     31unsigned check_line_after[NL];
    2532
    2633/////////////
    27 void main(){
    28    unsigned int frame = 0;
    29    unsigned int date  = 0;
    30 
    31    unsigned int c; // cluster index for loops
    32    unsigned int l; // line index for loops
    33    unsigned int p; // pixel index for loops
    34 
    35    unsigned int proc_id       = procid();                      // processor id
    36    unsigned int nlocal_procs  = (unsigned int) &NB_PROCS;      // number of processors per cluster
    37    unsigned int nclusters     = (unsigned int) &NB_CLUSTERS;   // number of clusters
    38    unsigned int local_id      = proc_id % nlocal_procs;        // local processor id
    39    unsigned int cluster_id    = proc_id / nlocal_procs;        // cluster id
    40    unsigned int base          = (unsigned int) &seg_heap_base; // base address for shared buffers
    41    unsigned int increment     = 0x80000000 / nclusters * 2;    // cluster increment
    42    unsigned int nglobal_procs = nclusters * nlocal_procs;      // number of tasks
    43    unsigned int npixels       = NP * NL;                       // number of pixel per frame
    44    
    45    unsigned int * ioc_address = (unsigned int *) &seg_ioc_base;
    46    unsigned int block_size    = ioc_address[BLOCK_DEVICE_BLOCK_SIZE];
    47    unsigned int nblocks       = npixels / block_size;   // number of blocks per frame
    48 
    49    PRINTF("\n *** Entering main at cycle %d ***\n\n", proctime());
     34void main()
     35{
     36    unsigned int image = 0;
     37
     38    unsigned int l;                                             // line index for loops
     39    unsigned int p;                                             // pixel index for loops
     40
     41    unsigned int * ioc_address = (unsigned int *) &seg_ioc_base;
     42    unsigned int block_size    = ioc_address[BLOCK_DEVICE_BLOCK_SIZE];
     43
     44    unsigned int proc_id     = _procid();                       // processor id
     45    unsigned int nclusters   = X_SIZE*Y_SIZE;                   // number of clusters
     46    unsigned int lpid        = proc_id % NB_PROCS_MAX;          // local processor id
     47    unsigned int cluster_xy  = proc_id / NB_PROCS_MAX;          // cluster index (8 bits format)
     48    unsigned int x           = cluster_xy >> Y_WIDTH;           // x coordinate
     49    unsigned int y           = cluster_xy & ((1<<Y_WIDTH)-1);   // y coordinate
     50    unsigned int ntasks      = nclusters * NB_PROCS_MAX;        // number of tasks
     51    unsigned int npixels     = NP * NL;                         // number of pixel per image
     52    unsigned int nblocks     = npixels / block_size;            // number of blocks per image
     53
     54    // task_id is a "continuous" index for the the task running on processor (x,y,lpid)
     55    unsigned int task_id = (((x * Y_SIZE) + y) * NB_PROCS_MAX) + lpid;
     56
     57    // cluster_id is a "continuous" index for cluster(x,y)
     58    unsigned int cluster_id  = (x * Y_SIZE) + y;               
     59
     60    PRINTF("\n *** Proc 0 in cluster [%d,%d] enters main at cycle %d ***\n\n",
     61           x, y, _proctime());
    5062
    5163   //  parameters checking
    52    if ((nlocal_procs != 1) && (nlocal_procs != 2) && (nlocal_procs != 4)){
    53       PRINTF("NB_PROCS must be 1, 2 or 4\n");
    54       exit(1);
     64   if ((NB_PROCS_MAX != 1) && (NB_PROCS_MAX != 2) && (NB_PROCS_MAX != 4))
     65   {
     66      PRINTF("NB_PROCS_MAX must be 1, 2 or 4\n");
     67      _exit();
    5568   }
    5669   if ((nclusters != 1) && (nclusters != 2) && (nclusters != 4) && (nclusters != 8) &&
    5770         (nclusters != 16) && (nclusters != 32) && (nclusters != 64) && (nclusters != 128) &&
    58          (nclusters != 256)){
     71         (nclusters != 256))
     72   {
    5973      PRINTF("NB_CLUSTERS must be a power of 1 between 1 and 256\n");
    60       exit(1);
     74      _exit();
    6175   }
    62    if (nglobal_procs > 1024){
    63       PRINTF("NB_PROCS * NB_CLUSTERS cannot be larger than 1024\n");
    64       exit(1);
    65    }
    66    if (proc_id >= nglobal_procs){
    67       PRINTF("processor id %d larger than NB_CLUSTERS*NB_PROCS\n", proc_id);
    68       exit(1);
    69    }
    70 
    71    // Arrays of pointers on the shared, distributed buffers containing the frames
    72    // These arrays are indexed by the cluster index (sized for the worst case : 256 clusters)
    73    unsigned char * A[NB_CLUSTER_MAX];
    74    unsigned char * B[NB_CLUSTER_MAX];
    75 
    76    // Arrays of pointers on the instrumentation arrays
    77    // These arrays are indexed by the cluster index (sized for the worst case : 256 clusters)
    78    // each pointer points on the base adress of an array of NPROCS unsigned int
    79    unsigned int * LOAD_START[NB_CLUSTER_MAX];
    80    unsigned int * LOAD_END[NB_CLUSTER_MAX];
    81    unsigned int * TRSP_START[NB_CLUSTER_MAX];
    82    unsigned int * TRSP_END[NB_CLUSTER_MAX];
    83    unsigned int * DISP_START[NB_CLUSTER_MAX];
    84    unsigned int * DISP_END[NB_CLUSTER_MAX];
    85 
    86    // shared buffers address definition
    87    // from the seg_heap_base and increment depending on the cluster index
    88    // These arrays of pointers are identical and replicated in the stack of each task
    89    for (c = 0; c < nclusters; c++){
    90       A[c]          = (unsigned char *) (base                                  + increment * c);
    91       B[c]          = (unsigned char *) (base +     npixels                    + increment * c);
    92       LOAD_START[c] = (unsigned int *)  (base + 2 * npixels                    + increment * c);
    93       LOAD_END[c]   = (unsigned int *)  (base + 2 * npixels +     nlocal_procs + increment * c);
    94       TRSP_START[c] = (unsigned int *)  (base + 2 * npixels + 2 * nlocal_procs + increment * c);
    95       TRSP_END[c]   = (unsigned int *)  (base + 2 * npixels + 3 * nlocal_procs + increment * c);
    96       DISP_START[c] = (unsigned int *)  (base + 2 * npixels + 4 * nlocal_procs + increment * c);
    97       DISP_END[c]   = (unsigned int *)  (base + 2 * npixels + 5 * nlocal_procs + increment * c);
    98    }
     76
     77   // pointers on the distributed buffers containing the images,
     78   // allocated in the heap segment: each buffer contains 256 Kbytes
     79   unsigned char* buf_in  = (unsigned char*)&seg_heap_base;
     80   unsigned char* buf_out = buf_in + 0x00100000;
    9981
    10082   PRINTF("NB_CLUSTERS     = %d\n", nclusters);
    101    PRINTF("NB_LOCAL_PROCS  = %d\n", nlocal_procs);
    102    PRINTF("NB_GLOBAL_PROCS = %d\n", nglobal_procs);
     83   PRINTF("NB_LOCAL_PROCS  = %d\n", NB_PROCS_MAX);
     84   PRINTF("NB_TASKS        = %d\n", ntasks);
    10385   PRINTF("NB_PIXELS       = %d\n", npixels);
    10486   PRINTF("BLOCK_SIZE      = %d\n", block_size);
    10587   PRINTF("NB_BLOCKS       = %d\n\n", nblocks);
    10688
    107 
    108    PRINTF("*** Starting barrier init at cycle %d ***\n", proctime());
     89   PRINTF("*** Proc 0 in cluster [%d,%d] starts barrier init at cycle %d\n",
     90          x, y, _proctime());
    10991
    11092   //  barriers initialization
    111    barrier_init(0, nglobal_procs);
    112    barrier_init(1, nglobal_procs);
    113    barrier_init(2, nglobal_procs);
    114 
    115    PRINTF("*** Completing barrier init at cycle %d ***\n", proctime());
    116 
    117    // Main loop (on frames)
    118    while (frame < NB_IMAGES){
    119       // pseudo parallel load from disk to A[c] buffer : nblocks/nclusters blocks
    120       // only task running on processor with (local_id == 0) does it
    121 
    122       if (local_id == 0){
    123          int p;
    124 
    125          date = proctime();
    126          PRINTF("\n*** Starting load for frame %d at cycle %d\n", frame, date);
     93   _barrier_init(0, ntasks);
     94   _barrier_init(1, ntasks);
     95   _barrier_init(2, ntasks);
     96   _barrier_init(3, ntasks);
     97
     98   PRINTF("*** Proc 0 in cluster [%d,%d] completes barrier init at cycle %d\n",
     99          x, y, _proctime());
     100
     101   // Main loop (on images)
     102   while (image < NB_IMAGES)
     103   {
     104      // pseudo parallel load from disk to buf_in buffer : nblocks/nclusters blocks
     105      // only task running on processor with (lpid == 0) does it
     106
     107      LOAD_START[cluster_id][lpid] = _proctime();
     108
     109      if (lpid == 0)
     110      {
     111         _ioc_read( ((image * nblocks) + ((nblocks * cluster_id) / nclusters)),
     112                    buf_in,
     113                    (nblocks / nclusters),
     114                    cluster_xy );
     115
     116         PRINTF("\n*** Proc 0 in cluster [%d,%d] starts load for image %d at cycle %d\n",
     117                x, y, image, _proctime() );
     118
     119         _ioc_completed();
     120
     121         PRINTF("*** Proc 0 in cluster [%d,%d] completes load for image %d at cycle %d\n",
     122                x, y, image, _proctime() );
     123      }
     124
     125      LOAD_END[cluster_id][lpid] = _proctime();
     126
     127      _barrier_wait(0);
     128
     129      // parallel transpose from buf_in to buf_out buffers
     130      // each processor makes the transposition for (NL/ntasks) lines
     131      // (p,l) are the pixel coordinates in the source image
     132
     133      PRINTF("\n*** proc 0 in cluster [%d,%d] starts transpose for image %d at cycle %d\n",
     134             x, y, image, _proctime());
     135
     136      TRSP_START[cluster_id][lpid] = _proctime();
     137
     138      unsigned int nlt   = NL / ntasks;                // number of lines per processor
     139      unsigned int first = task_id * nlt;              // first line index
     140      unsigned int last  = first + nlt;                // last line index
     141      unsigned int nlines_clusters = NL / nclusters;   // number of lines per cluster
     142      unsigned int npix_clusters   = NP / nclusters;   // number of pixels per cluster
     143
     144      unsigned int src_cluster;
     145      unsigned int src_index;
     146      unsigned int dst_cluster;
     147      unsigned int dst_index;
     148
     149      unsigned int word;
     150
     151      for (l = first; l < last; l++)
     152      {
     153         PRINTF("    - processing line %d\n", l);
     154
     155         check_line_before[l] = 0;
    127156         
    128          for (p = 0; p < nlocal_procs; p++){
    129             LOAD_START[cluster_id][p] = date;
    130          }
    131          if (ioc_read(frame * nblocks + nblocks * cluster_id / nclusters, A[cluster_id], nblocks / nclusters)){
    132             PRINTF("echec ioc_read\n");
    133             exit();
    134          }
    135          if (ioc_completed()){
    136             PRINTF("echec ioc_completed\n");
    137             exit();
    138          }
    139 
    140          date = proctime();
    141          PRINTF("*** Completing load for frame %d at cycle %d\n", frame, date);
    142          for (p = 0; p < nlocal_procs; p++){
    143             LOAD_END[cluster_id][p] = date;
     157         // in each iteration we read one word an write four bytes
     158         for (p = 0 ; p < NP ; p = p+4)
     159         {
     160            // read one word, with extended address from local buffer
     161            src_cluster = cluster_xy;
     162            src_index   = (l % nlines_clusters) * NP + p;
     163            word = _word_extended_read( src_cluster,
     164                                        (unsigned int)&buf_in[src_index] );
     165
     166            unsigned char byte0 = (unsigned char)( word      & 0x000000FF);
     167            unsigned char byte1 = (unsigned char)((word>>8)  & 0x000000FF);
     168            unsigned char byte2 = (unsigned char)((word>>16) & 0x000000FF);
     169            unsigned char byte3 = (unsigned char)((word>>24) & 0x000000FF);
     170
     171            // compute checksum
     172            check_line_before[l] = check_line_before[l] + byte0 + byte1 + byte2 + byte3;
     173
     174            // write four bytes with extended address to four remote buffers
     175            dst_cluster = (((p / npix_clusters) / Y_SIZE) << Y_WIDTH) +
     176                           ((p / npix_clusters) % Y_SIZE);
     177            dst_index   = (p % npix_clusters) * NL + l;
     178            _byte_extended_write( dst_cluster,
     179                                  (unsigned int)&buf_out[dst_index],
     180                                  byte0 );
     181
     182            dst_cluster = ((((p+1) / npix_clusters) / Y_SIZE) << Y_WIDTH) +
     183                           (((p+1) / npix_clusters) % Y_SIZE);
     184            dst_index   = ((p+1) % npix_clusters) * NL + l;
     185            _byte_extended_write( dst_cluster,
     186                                  (unsigned int)&buf_out[dst_index],
     187                                  byte1 );
     188
     189            dst_cluster = ((((p+2) / npix_clusters) / Y_SIZE) << Y_WIDTH) +
     190                           (((p+2) / npix_clusters) % Y_SIZE);
     191            dst_index   = ((p+2) % npix_clusters) * NL + l;
     192            _byte_extended_write( dst_cluster,
     193                                  (unsigned int)&buf_out[dst_index],
     194                                  byte2 );
     195
     196            dst_cluster = ((((p+3) / npix_clusters) / Y_SIZE) << Y_WIDTH) +
     197                           (((p+3) / npix_clusters) % Y_SIZE);
     198            dst_index   = ((p+3) % npix_clusters) * NL + l;
     199            _byte_extended_write( dst_cluster,
     200                                  (unsigned int)&buf_out[dst_index],
     201                                  byte3 );
    144202         }
    145203      }
    146204
    147       barrier_wait(0);
    148 
    149       // parallel transpose from A to B buffers
    150       // each processor makes the transposition for (NL/nglobal_procs) lines
    151       // (p,l) are the (x,y) pixel coordinates in the source frame
    152 
    153 #ifndef DISPLAY_ONLY
    154       date = proctime();
    155       PRINTF("\n*** Starting transpose for frame %d at cycle %d\n", frame, date);
    156       TRSP_START[cluster_id][local_id] = date;
    157 
    158       unsigned int nlt   = NL / nglobal_procs; // Nombre de ligne à traiter par processeur
    159       unsigned int first = proc_id * nlt;      // Index de la premiÚre ligne à traiter pour le proc courant (celui qui exécute le code)
    160       unsigned int last  = first + nlt;        // Index de la derniÚre ligne
    161       unsigned int nlines_clusters = NL / nclusters; // Nombre de lignes à traiter par cluster
    162       unsigned int npix_clusters   = NP / nclusters; // Nombre de pixels par ligne à traiter par cluster
    163 
    164       for (l = first; l < last; l++){
    165          PRINTF("    - processing line %d\n", l);
    166          for (p = 0; p < NP; p++){
    167             unsigned int source_index   = (l % nlines_clusters) * NP + p;
    168             unsigned int dest_cluster   = p / npix_clusters;
    169             unsigned int dest_index     = (p % npix_clusters) * NL + l;
    170             B[dest_cluster][dest_index] = A[cluster_id][source_index];
     205      PRINTF("*** proc 0 in cluster [%d,%d] complete transpose for image %d at cycle %d\n",
     206             x, y, image, _proctime() );
     207
     208      TRSP_END[cluster_id][lpid] = _proctime();
     209
     210      _barrier_wait(1);
     211
     212      // optional parallel display from local buf_out to frame buffer
     213
     214#ifdef DISPLAY_OK
     215
     216      PRINTF("\n*** proc 0 in cluster [%d,%d] starts display for image %d at cycle %d\n",
     217             x, y, image, _proctime() );
     218
     219      DISP_START[cluster_id][lpid] = _proctime();
     220
     221      unsigned int npxt = npixels / ntasks;   // number of pixels per task
     222      unsigned int buffer = (unsigned int)buf_out + npxt*lpid;
     223
     224      _fb_sync_write( npxt * task_id, buffer, npxt, cluster_xy );
     225
     226      PRINTF("*** Proc 0 in cluster [%d,%d] completes display for image %d at cycle %d\n",
     227             x, y, image, _proctime() );
     228
     229      DISP_END[cluster_id][lpid] = _proctime();
     230
     231      _barrier_wait(2);
     232
     233#endif
     234
     235      // Instrumentation and checksum (done by processor 0 in cluster 0)
     236      if (proc_id == 0)
     237      {
     238         PRINTF("\n*** Proc [0,0,0] starts checks for image %d at cycle %d\n\n",
     239                  image, _proctime() );
     240
     241         unsigned int success = 1;
     242
     243         for ( l = 0 ; l < NL ; l++ )
     244         {
     245            check_line_after[l] = 0;
     246
     247            for ( p = 0 ; p < NP ; p++ )
     248            {
     249               // read one byte in remote buffer
     250               src_cluster = (((p / npix_clusters) / Y_SIZE) << Y_WIDTH) +
     251                             ((p / npix_clusters) % Y_SIZE);
     252               src_index   = (p % npix_clusters) * NL + l;
     253
     254               unsigned char byte = _byte_extended_read( src_cluster,
     255                                                         (unsigned int)&buf_out[src_index] );
     256
     257               check_line_after[l] = check_line_after[l] + byte;
     258            }
     259
     260            PRINTF(" - l = %d / before = %d / after = %d \n",
     261                   l, check_line_before[l], check_line_after[l] );
     262
     263            if ( check_line_before[l] != check_line_after[l] ) success = 0;
    171264         }
    172       }
    173 
    174       date = proctime();
    175       PRINTF("*** Completing transpose for frame %d at cycle %d\n", frame, date);
    176       TRSP_END[cluster_id][local_id] = date;
    177       barrier_wait(1);
    178 #endif
    179 
    180       // parallel display from B[c] to frame buffer
    181       // each processor uses its private dma to display NL*NP/nglobal_procs pixels
    182 
    183       date = proctime();
    184       PRINTF("\n*** Starting display for frame %d at cycle %d\n", frame, date);
    185       DISP_START[cluster_id][local_id] = date;
    186 
    187       unsigned int npxt = npixels / nglobal_procs;   // number of pixels per proc
    188 
    189 #ifndef DISPLAY_ONLY
    190       if (fb_write(npxt * proc_id, B[cluster_id] + npxt * local_id, npxt)){
    191          PRINTF("[%d]: echec fb_sync_write\n", proc_id);
    192          exit();
    193       }
    194 #else
    195       if (fb_write(npxt * proc_id, A[cluster_id] + npxt * local_id, npxt)){
    196          PRINTF("[%d]: echec fb_sync_write\n", proc_id);
    197          exit();
    198       }
    199 #endif
    200 
    201       if (fb_completed()){
    202          PRINTF("[%d]: echec fb_completed\n", proc_id);
    203          exit();
    204       }
    205 
    206       date = proctime();
    207       PRINTF("*** Completing display for frame %d at cycle %d\n", frame, date);
    208       DISP_END[cluster_id][local_id] = date;
    209 
    210       barrier_wait(2);
    211 
    212       // Instrumentation (done by processor 0 in cluster 0)
    213       if (local_id == 0){
    214          date = proctime();
    215          PRINTF("\n*** Starting Instrumentation for frame %d at cycle %d\n\n", frame, date);
     265
     266         if ( success ) PRINTF("\n*** proc [0,0,0] : CHECKSUM OK \n\n");
     267         else           PRINTF("\n*** proc [0,0,0] : CHECKSUM KO \n\n");
    216268
    217269         int cc, pp;
     
    229281         unsigned int max_disp_ended = 0;
    230282
    231          for (cc = 0; cc < nclusters; cc++){
    232             for (pp = 0; pp < nlocal_procs; pp++){
    233                if (LOAD_START[cc][pp] < min_load_start){
    234                   min_load_start = LOAD_START[cc][pp];
    235                }
    236                if (LOAD_START[cc][pp] > max_load_start){
    237                   max_load_start = LOAD_START[cc][pp];
    238                }
    239                if (LOAD_END[cc][pp] < min_load_ended){
    240                   min_load_ended = LOAD_END[cc][pp];
    241                }
    242                if (LOAD_END[cc][pp] > max_load_ended){
    243                   max_load_ended = LOAD_END[cc][pp];
    244                }
    245 
    246                if (TRSP_START[cc][pp] < min_trsp_start){
    247                   min_trsp_start = TRSP_START[cc][pp];
    248                }
    249                if (TRSP_START[cc][pp] > max_trsp_start){
    250                   max_trsp_start = TRSP_START[cc][pp];
    251                }
    252                if (TRSP_END[cc][pp] < min_trsp_ended){
    253                   min_trsp_ended = TRSP_END[cc][pp];
    254                }
    255                if (TRSP_END[cc][pp] > max_trsp_ended){
    256                   max_trsp_ended = TRSP_END[cc][pp];
    257                }
    258 
    259                if (DISP_START[cc][pp] < min_disp_start){
    260                   min_disp_start = DISP_START[cc][pp];
    261                }
    262                if (DISP_START[cc][pp] > max_disp_start){
    263                   max_disp_start = DISP_START[cc][pp];
    264                }
    265                if (DISP_END[cc][pp] < min_disp_ended){
    266                   min_disp_ended = DISP_END[cc][pp];
    267                }
    268                if (DISP_END[cc][pp] > max_disp_ended){
    269                   max_disp_ended = DISP_END[cc][pp];
    270                }
     283         for (cc = 0; cc < nclusters; cc++)
     284         {
     285            for (pp = 0; pp < NB_PROCS_MAX; pp++)
     286            {
     287               if (LOAD_START[cc][pp] < min_load_start)  min_load_start = LOAD_START[cc][pp];
     288               if (LOAD_START[cc][pp] > max_load_start)  max_load_start = LOAD_START[cc][pp];
     289               if (LOAD_END[cc][pp]   < min_load_ended)  min_load_ended = LOAD_END[cc][pp];
     290               if (LOAD_END[cc][pp]   > max_load_ended)  max_load_ended = LOAD_END[cc][pp];
     291               if (TRSP_START[cc][pp] < min_trsp_start)  min_trsp_start = TRSP_START[cc][pp];
     292               if (TRSP_START[cc][pp] > max_trsp_start)  max_trsp_start = TRSP_START[cc][pp];
     293               if (TRSP_END[cc][pp]   < min_trsp_ended)  min_trsp_ended = TRSP_END[cc][pp];
     294               if (TRSP_END[cc][pp]   > max_trsp_ended)  max_trsp_ended = TRSP_END[cc][pp];
     295               if (DISP_START[cc][pp] < min_disp_start)  min_disp_start = DISP_START[cc][pp];
     296               if (DISP_START[cc][pp] > max_disp_start)  max_disp_start = DISP_START[cc][pp];
     297               if (DISP_END[cc][pp]   < min_disp_ended)  min_disp_ended = DISP_END[cc][pp];
     298               if (DISP_END[cc][pp]   > max_disp_ended)  max_disp_ended = DISP_END[cc][pp];
    271299            }
    272300         }
    273301
    274302         PRINTF(" - LOAD_START : min = %d / max = %d / med = %d / delta = %d\n",
    275                min_load_start, max_load_start, (min_load_start+max_load_start)/2, max_load_start-min_load_start);
     303               min_load_start, max_load_start, (min_load_start+max_load_start)/2,
     304               max_load_start-min_load_start);
     305
    276306         PRINTF(" - LOAD_END   : min = %d / max = %d / med = %d / delta = %d\n",
    277                min_load_ended, max_load_ended, (min_load_ended+max_load_ended)/2, max_load_ended-min_load_ended);
     307               min_load_ended, max_load_ended, (min_load_ended+max_load_ended)/2,
     308               max_load_ended-min_load_ended);
    278309
    279310         PRINTF(" - TRSP_START : min = %d / max = %d / med = %d / delta = %d\n",
    280                min_trsp_start, max_trsp_start, (min_trsp_start+max_trsp_start)/2, max_trsp_start-min_trsp_start);
     311               min_trsp_start, max_trsp_start, (min_trsp_start+max_trsp_start)/2,
     312               max_trsp_start-min_trsp_start);
     313
    281314         PRINTF(" - TRSP_END   : min = %d / max = %d / med = %d / delta = %d\n",
    282                min_trsp_ended, max_trsp_ended, (min_trsp_ended+max_trsp_ended)/2, max_trsp_ended-min_trsp_ended);
     315               min_trsp_ended, max_trsp_ended, (min_trsp_ended+max_trsp_ended)/2,
     316               max_trsp_ended-min_trsp_ended);
    283317
    284318         PRINTF(" - DISP_START : min = %d / max = %d / med = %d / delta = %d\n",
    285                min_disp_start, max_disp_start, (min_disp_start+max_disp_start)/2, max_disp_start-min_disp_start);
     319               min_disp_start, max_disp_start, (min_disp_start+max_disp_start)/2,
     320               max_disp_start-min_disp_start);
     321
    286322         PRINTF(" - DISP_END   : min = %d / max = %d / med = %d / delta = %d\n",
    287                min_disp_ended, max_disp_ended, (min_disp_ended+max_disp_ended)/2, max_disp_ended-min_disp_ended);
    288 
    289          PRINTF(" - BARRIER TRSP/DISP = %d\n", min_disp_start - max_trsp_ended);
     323               min_disp_ended, max_disp_ended, (min_disp_ended+max_disp_ended)/2,
     324               max_disp_ended-min_disp_ended);
    290325      }
    291       frame++;
    292 
    293    } // end while frame     
    294 
    295    PRINTF("*** End of main ***\n");
    296 
    297    while(1);
     326
     327      image++;
     328
     329      _barrier_wait( 3 );
     330   } // end while image     
     331
     332
     333   _exit();
     334
    298335} // end main()
    299336
Note: See TracChangeset for help on using the changeset viewer.