source: soft/giet_vm/giet_boot/boot.c @ 618

Last change on this file since 618 was 615, checked in by bellefin, 10 years ago

Introduce mmc distributed lock
The locks are distributed in the kernel heaps (one lock in each cluster) and there is a global table in the kernel data segment which contains the addresses of all the locks.
The _mmc_boot_mode variable is defined in boot.c and kernel_init.c and defines which kind of lock is used.
The distributed locks are initialized inside the kernel_init() function.

File size: 75.9 KB
RevLine 
[527]1///////////////////////////////////////////////////////////////////////////////////
[258]2// File     : boot.c
3// Date     : 01/11/2013
4// Author   : alain greiner
5// Copyright (c) UPMC-LIP6
[527]6///////////////////////////////////////////////////////////////////////////////////
[493]7// The boot.c file contains the bootloader for the GIET-VM static OS. 
[258]8//
[493]9// This code has been written for the MIPS32 processor.
[359]10// The virtual adresses are on 32 bits and use the (unsigned int) type. The
[527]11// physicals addresses can have up to 40 bits, and use type (unsigned long long).
[412]12// It natively supports clusterised shared memory multi-processors architectures,
[493]13// where each processor is identified by a composite index [x,y,p],
[258]14// and where there is one physical memory bank per cluster.
15//
[493]16// The boot.elf file is stored on disk and is loaded into memory by proc[0,0,0],
17// executing the generic preloader (stored in ROM). The boot-loader code itself
18// is executed in parallel by all proc[x,y,0], and performs the following tasks:
19// - load into memory various binary files, from a FAT32 file system.
20// - build the various page tables (one page table per vspace).
21// - initialize the shedulers (one scheduler per processor).
22// - initialize the external peripherals.
[258]23//
24// 1) The binary files to be loaded are:
25//    - the "map.bin" file contains the hardware architecture description and the
26//      mapping directives. It must be stored in the the seg_boot_mapping segment
[321]27//      (at address SEG_BOOT_MAPPING_BASE defined in hard_config.h file).
[493]28//    - the "kernel.elf" file contains the kernel binary code and data.
[258]29//    - the various "application.elf" files.
30//
[493]31// 2) The "map.bin" file contains the C binary structure defining:
[258]32//    - the hardware architecture: number of clusters, number or processors,
33//      size of the memory segments, and peripherals in each cluster.
34//    - The structure of the various multi-threaded software applications:
35//      number of tasks, communication channels.
[513]36//    - The mapping: placement of virtual segments (vseg) in the physical
37//      segments (pseg), placement of software tasks on the processors,
[258]38//
39// 3) The GIET-VM uses the paged virtual memory to provides two services:
40//    - classical memory protection, when several independant applications compiled
41//      in different virtual spaces are executing on the same hardware platform.
[412]42//    - data placement in NUMA architectures, to control the placement
43//      of the software objects (vsegs) on the physical memory banks (psegs).
[258]44//
[527]45//    The max number of vspaces (GIET_NB_VSPACE_MAX) is a configuration parameter.
46//    For each application, the tasks are statically allocateded on processors.
[258]47//    The page table are statically build in the boot phase, and they do not
[412]48//    change during execution.
49//    The GIET_VM uses both small pages (4 Kbytes), and big pages (2 Mbytes).
[258]50//
[527]51//    Each page table (one page table per virtual space) is monolithic, and
52//    contains one PT1 (8 Kbytes) and a variable number of PT2s (4 Kbytes each).
53//    For each vspace, the number of PT2s is defined by the size of the PTAB vseg
54//    in the mapping.
55//    The PT1 is indexed by the ix1 field (11 bits) of the VPN. An entry is 32 bits.
56//    A PT2 is indexed the ix2 field (9 bits) of the VPN. An entry is 64 bits.
[412]57//    The first word contains the flags, the second word contains the PPN.
[493]58//    The page tables are distributed/replicated in all clusters.
[527]59///////////////////////////////////////////////////////////////////////////////////
[263]60// Implementation Notes:
61//
[527]62// 1) The cluster_id variable is a linear index in the mapping_info array.
[493]63//    The cluster_xy variable is the tological index = x << Y_WIDTH + y
[412]64//
[493]65// 2) We set the _tty0_boot_mode variable to force the _printf() function to use
66//    the tty0_spin_lock for exclusive access to TTY0.
[527]67///////////////////////////////////////////////////////////////////////////////////
[258]68
[263]69#include <giet_config.h>
[464]70#include <hard_config.h>
[436]71#include <mapping_info.h>
[464]72#include <kernel_malloc.h>
[258]73#include <memspace.h>
74#include <tty_driver.h>
75#include <xcu_driver.h>
[347]76#include <bdv_driver.h>
[460]77#include <hba_driver.h>
[527]78#include <sdc_driver.h>
[258]79#include <cma_driver.h>
80#include <nic_driver.h>
[299]81#include <iob_driver.h>
[295]82#include <pic_driver.h>
[258]83#include <mwr_driver.h>
[527]84#include <dma_driver.h>
[258]85#include <ctx_handler.h>
86#include <irq_handler.h>
87#include <vmem.h>
[412]88#include <pmem.h>
[258]89#include <utils.h>
[460]90#include <tty0.h>
[493]91#include <kernel_locks.h>
92#include <kernel_barriers.h>
[258]93#include <elf-types.h>
94#include <fat32.h>
95#include <mips32_registers.h>
96#include <stdarg.h>
97
[263]98#if !defined(X_SIZE)
[359]99# error: The X_SIZE value must be defined in the 'hard_config.h' file !
[258]100#endif
101
[263]102#if !defined(Y_SIZE)
[359]103# error: The Y_SIZE value must be defined in the 'hard_config.h' file !
[263]104#endif
105
106#if !defined(X_WIDTH)
[359]107# error: The X_WIDTH value must be defined in the 'hard_config.h' file !
[263]108#endif
109
110#if !defined(Y_WIDTH)
[359]111# error: The Y_WIDTH value must be defined in the 'hard_config.h' file !
[263]112#endif
113
[321]114#if !defined(SEG_BOOT_MAPPING_BASE)
[359]115# error: The SEG_BOOT_MAPPING_BASE value must be defined in the hard_config.h file !
[321]116#endif
117
[359]118#if !defined(NB_PROCS_MAX)
119# error: The NB_PROCS_MAX value must be defined in the 'hard_config.h' file !
[321]120#endif
121
[359]122#if !defined(GIET_NB_VSPACE_MAX)
123# error: The GIET_NB_VSPACE_MAX value must be defined in the 'giet_config.h' file !
[321]124#endif
125
[359]126#if !defined(GIET_ELF_BUFFER_SIZE)
127# error: The GIET_ELF_BUFFER_SIZE value must be defined in the giet_config.h file !
[258]128#endif
129
130////////////////////////////////////////////////////////////////////////////
131//      Global variables for boot code
132////////////////////////////////////////////////////////////////////////////
133
[412]134// Temporaty buffer used to load one complete .elf file 
[493]135__attribute__((section(".kdata")))
[590]136unsigned char  _boot_elf_buffer[GIET_ELF_BUFFER_SIZE] __attribute__((aligned(64)));
[258]137
[412]138// Physical memory allocators array (one per cluster)
[493]139__attribute__((section(".kdata")))
[527]140pmem_alloc_t  boot_pmem_alloc[X_SIZE][Y_SIZE];
[258]141
[412]142// Schedulers virtual base addresses array (one per processor)
[493]143__attribute__((section(".kdata")))
144static_scheduler_t* _schedulers[X_SIZE][Y_SIZE][NB_PROCS_MAX];
[258]145
[527]146// Page tables virtual base addresses (one per vspace and per cluster)
[493]147__attribute__((section(".kdata")))
148unsigned int        _ptabs_vaddr[GIET_NB_VSPACE_MAX][X_SIZE][Y_SIZE];
[258]149
[412]150// Page tables physical base addresses (one per vspace and per cluster)
[493]151__attribute__((section(".kdata")))
152paddr_t             _ptabs_paddr[GIET_NB_VSPACE_MAX][X_SIZE][Y_SIZE];
[258]153
[412]154// Page tables pt2 allocators (one per vspace and per cluster)
[493]155__attribute__((section(".kdata")))
156unsigned int        _ptabs_next_pt2[GIET_NB_VSPACE_MAX][X_SIZE][Y_SIZE];
[263]157
[412]158// Page tables max_pt2  (same value for all page tables)
[493]159__attribute__((section(".kdata")))
160unsigned int        _ptabs_max_pt2;
[412]161
[493]162// boot code uses a spin lock to protect TTY0
163__attribute__((section(".kdata")))
164unsigned int        _tty0_boot_mode = 1;
[490]165
[615]166// boot code does not use distributed locks to protect MMC
167__attribute__((section(".kdata")))
168unsigned int        _mmc_boot_mode = 1;
169
[578]170// boot code does not uses a lock to protect HBA command allocator
[493]171__attribute__((section(".kdata")))
[578]172unsigned int        _hba_boot_mode = 1;
173
174__attribute__((section(".kdata")))
[493]175spin_lock_t         _ptabs_spin_lock[GIET_NB_VSPACE_MAX][X_SIZE][Y_SIZE];
[490]176
[493]177// barrier used by boot code for parallel execution
178__attribute__((section(".kdata")))
179simple_barrier_t    _barrier_all_clusters;
[490]180
[527]181//////////////////////////////////////////////////////////////////////////////
182//        Extern variables
183//////////////////////////////////////////////////////////////////////////////
184
[493]185// this variable is defined in the tty0.c file
186extern spin_lock_t  _tty0_spin_lock;
[464]187
[527]188extern void boot_entry();
189
[258]190//////////////////////////////////////////////////////////////////////////////
[412]191// This function registers a new PTE1 in the page table defined
192// by the vspace_id argument, and the (x,y) coordinates.
193// It updates only the first level PT1.
[493]194// As each vseg is mapped by a different processor, the PT1 entry cannot
195// be concurrently accessed, and we don't need to take any lock.
[258]196//////////////////////////////////////////////////////////////////////////////
[412]197void boot_add_pte1( unsigned int vspace_id,
198                    unsigned int x,
199                    unsigned int y,
200                    unsigned int vpn,        // 20 bits right-justified
201                    unsigned int flags,      // 10 bits left-justified
202                    unsigned int ppn )       // 28 bits right-justified
[258]203{
[412]204    // compute index in PT1
205    unsigned int    ix1 = vpn >> 9;         // 11 bits for ix1
[258]206
[412]207    // get page table physical base address
[493]208    paddr_t  pt1_pbase = _ptabs_paddr[vspace_id][x][y];
[412]209
210    if ( pt1_pbase == 0 )
[258]211    {
[493]212        _printf("\n[BOOT ERROR] in boot_add_pte1() : no PTAB in cluster[%d,%d]"
213                    " containing processors\n", x , y );
[258]214        _exit();
215    }
216
[412]217    // compute pte1 : 2 bits V T / 8 bits flags / 3 bits RSVD / 19 bits bppi
218    unsigned int    pte1 = PTE_V |
219                           (flags & 0x3FC00000) |
220                           ((ppn>>9) & 0x0007FFFF);
[258]221
[412]222    // write pte1 in PT1
223    _physical_write( pt1_pbase + 4*ix1, pte1 );
224
[493]225    asm volatile ("sync");
[412]226
227}   // end boot_add_pte1()
228
[258]229//////////////////////////////////////////////////////////////////////////////
[412]230// This function registers a new PTE2 in the page table defined
[347]231// by the vspace_id argument, and the (x,y) coordinates.
[412]232// It updates both the first level PT1 and the second level PT2.
[258]233// As the set of PT2s is implemented as a fixed size array (no dynamic
234// allocation), this function checks a possible overflow of the PT2 array.
[493]235// As a given entry in PT1 can be shared by several vsegs, mapped by
236// different processors, we need to take the lock protecting PTAB[v][x]y].
[258]237//////////////////////////////////////////////////////////////////////////////
[412]238void boot_add_pte2( unsigned int vspace_id,
239                    unsigned int x,
240                    unsigned int y,
241                    unsigned int vpn,        // 20 bits right-justified
242                    unsigned int flags,      // 10 bits left-justified
243                    unsigned int ppn )       // 28 bits right-justified
[258]244{
245    unsigned int ix1;
246    unsigned int ix2;
[347]247    paddr_t      pt2_pbase;     // PT2 physical base address
[412]248    paddr_t      pte2_paddr;    // PTE2 physical address
[258]249    unsigned int pt2_id;        // PT2 index
250    unsigned int ptd;           // PTD : entry in PT1
251
[412]252    ix1 = vpn >> 9;             // 11 bits for ix1
253    ix2 = vpn & 0x1FF;          //  9 bits for ix2
[258]254
[493]255    // get page table physical base address
[347]256    paddr_t      pt1_pbase = _ptabs_paddr[vspace_id][x][y];
[258]257
[412]258    if ( pt1_pbase == 0 )
[258]259    {
[493]260        _printf("\n[BOOT ERROR] in boot_add_pte2() : no PTAB for vspace %d "
261                "in cluster[%d,%d]\n", vspace_id , x , y );
[258]262        _exit();
263    }
264
[493]265    // get lock protecting PTAB[vspace_id][x][y]
266    _spin_lock_acquire( &_ptabs_spin_lock[vspace_id][x][y] );
267
[258]268    // get ptd in PT1
[493]269    ptd = _physical_read( pt1_pbase + 4 * ix1 );
[258]270
[347]271    if ((ptd & PTE_V) == 0)    // undefined PTD: compute PT2 base address,
[258]272                               // and set a new PTD in PT1
273    {
[493]274        // get a new pt2_id
[347]275        pt2_id = _ptabs_next_pt2[vspace_id][x][y];
[493]276        _ptabs_next_pt2[vspace_id][x][y] = pt2_id + 1;
277
278        // check overflow
[412]279        if (pt2_id == _ptabs_max_pt2) 
[258]280        {
[493]281            _printf("\n[BOOT ERROR] in boot_add_pte2() : PTAB[%d,%d,%d]"
282                    " contains not enough PT2s\n", vspace_id, x, y );
[258]283            _exit();
284        }
[347]285
286        pt2_pbase = pt1_pbase + PT1_SIZE + PT2_SIZE * pt2_id;
287        ptd = PTE_V | PTE_T | (unsigned int) (pt2_pbase >> 12);
[493]288
289        // set PTD into PT1
[412]290        _physical_write( pt1_pbase + 4*ix1, ptd);
[258]291    }
292    else                       // valid PTD: compute PT2 base address
293    {
294        pt2_pbase = ((paddr_t)(ptd & 0x0FFFFFFF)) << 12;
295    }
296
297    // set PTE in PT2 : flags & PPN in two 32 bits words
[412]298    pte2_paddr  = pt2_pbase + 8 * ix2;
[493]299    _physical_write(pte2_paddr     , (PTE_V | flags) );
300    _physical_write(pte2_paddr + 4 , ppn );
[258]301
[493]302    // release lock protecting PTAB[vspace_id][x][y]
303    _spin_lock_release( &_ptabs_spin_lock[vspace_id][x][y] );
304
305    asm volatile ("sync");
306
[412]307}   // end boot_add_pte2()
[258]308
[412]309////////////////////////////////////////////////////////////////////////////////////
[258]310// Align the value of paddr or vaddr to the required alignement,
311// defined by alignPow2 == L2(alignement).
[412]312////////////////////////////////////////////////////////////////////////////////////
[493]313paddr_t paddr_align_to( paddr_t paddr, unsigned int alignPow2 ) 
[258]314{
315    paddr_t mask = (1 << alignPow2) - 1;
316    return ((paddr + mask) & ~mask);
317}
318
[493]319unsigned int vaddr_align_to( unsigned int vaddr, unsigned int alignPow2 ) 
[258]320{
321    unsigned int mask = (1 << alignPow2) - 1;
322    return ((vaddr + mask) & ~mask);
323}
324
[412]325/////////////////////////////////////////////////////////////////////////////////////
326// This function map a vseg identified by the vseg pointer.
327//
[493]328// A given vseg can be mapped in a Big Physical Pages (BPP: 2 Mbytes) or in a
[412]329// Small Physical Pages (SPP: 4 Kbytes), depending on the "big" attribute of vseg,
330// with the following rules:
331// - SPP : There is only one vseg in a small physical page, but a single vseg
332//   can cover several contiguous small physical pages.
333// - BPP : It can exist several vsegs in a single big physical page, and a single
334//   vseg can cover several contiguous big physical pages.
335//
[513]336// 1) First step: it computes various vseg attributes and checks
337//    alignment constraints.
[412]338//
[493]339// 2) Second step: it allocates the required number of contiguous physical pages,
[412]340//    computes the physical base address (if the vseg is not identity mapping),
341//    and register it in the vseg pbase field.
[493]342//    Only the vsegs used by the boot code and the peripheral vsegs
343//    can be identity mapping. The first big physical page in cluster[0,0]
344//    is reserved for the boot vsegs.
[412]345//
[513]346// 3) Third step (only for vseg that have the VSEG_TYPE_PTAB): the M page tables
[493]347//    associated to the M vspaces must be packed in the same vseg.
348//    We divide this vseg in M sub-segments, and compute the vbase and pbase
349//    addresses for M page tables, and register these addresses in the _ptabs_paddr
[412]350//    and _ptabs_vaddr arrays.
351// 
352/////////////////////////////////////////////////////////////////////////////////////
[427]353void boot_vseg_map( mapping_vseg_t* vseg,
354                    unsigned int    vspace_id )
[258]355{
[412]356    mapping_header_t*   header  = (mapping_header_t *)SEG_BOOT_MAPPING_BASE;
357    mapping_cluster_t*  cluster = _get_cluster_base(header);
358    mapping_pseg_t*     pseg    = _get_pseg_base(header);
[258]359
[513]360    //////////// First step : compute vseg attributes
361
[412]362    // compute destination cluster pointer & coordinates
363    pseg    = pseg + vseg->psegid;
364    cluster = cluster + pseg->clusterid;
365    unsigned int        x_dest     = cluster->x;
366    unsigned int        y_dest     = cluster->y;
[258]367
[412]368    // compute the "big" vseg attribute
369    unsigned int        big = vseg->big;
[258]370
[513]371    // all vsegs must be aligned on 4Kbytes
372    if ( vseg->vbase & 0x00000FFF ) 
373    {
374        _printf("\n[BOOT ERROR] vseg %s not aligned : vbase = %x\n", 
375                vseg->name, vseg->vbase );
376        _exit();
377    }
378
[412]379    // compute the "is_ram" vseg attribute
380    unsigned int        is_ram;
381    if ( pseg->type == PSEG_TYPE_RAM )  is_ram = 1;
382    else                                is_ram = 0;
[258]383
[412]384    // compute the "is_ptab" attribute
385    unsigned int        is_ptab;
[513]386    if ( vseg->type == VSEG_TYPE_PTAB ) is_ptab = 1;
387    else                                is_ptab = 0;
[258]388
[427]389    // compute actual vspace index
390    unsigned int vsid;
391    if ( vspace_id == 0xFFFFFFFF ) vsid = 0;
392    else                           vsid = vspace_id;
393
[412]394    //////////// Second step : compute ppn and npages 
395    //////////// - if identity mapping :  ppn <= vpn
396    //////////// - if vseg is periph   :  ppn <= pseg.base >> 12
397    //////////// - if vseg is ram      :  ppn <= physical memory allocator
[258]398
[493]399    unsigned int ppn;          // first physical page index (28 bits = |x|y|bppi|sppi|)
400    unsigned int vpn;          // first virtual page index  (20 bits = |ix1|ix2|)
401    unsigned int vpn_max;      // last  virtual page index  (20 bits = |ix1|ix2|)
[258]402
[412]403    vpn     = vseg->vbase >> 12;
404    vpn_max = (vseg->vbase + vseg->length - 1) >> 12;
[258]405
[412]406    // compute npages
407    unsigned int npages;       // number of required (big or small) pages
408    if ( big == 0 ) npages  = vpn_max - vpn + 1;            // number of small pages
409    else            npages  = (vpn_max>>9) - (vpn>>9) + 1;  // number of big pages
410
411    // compute ppn
412    if ( vseg->ident )           // identity mapping
[258]413    {
[412]414        ppn = vpn;
[258]415    }
[412]416    else                         // not identity mapping
[258]417    {
[412]418        if ( is_ram )            // RAM : physical memory allocation required
[258]419        {
[412]420            // compute pointer on physical memory allocator in dest cluster
421            pmem_alloc_t*     palloc = &boot_pmem_alloc[x_dest][y_dest];
[258]422
[412]423            if ( big == 0 )             // SPP : small physical pages
424            {
425                // allocate contiguous small physical pages
426                ppn = _get_small_ppn( palloc, npages );
427            }
428            else                            // BPP : big physical pages
429            {
430 
431                // one big page can be shared by several vsegs
432                // we must chek if BPP already allocated
433                if ( is_ptab )   // It cannot be mapped
434                {
435                    ppn = _get_big_ppn( palloc, npages ); 
436                }
437                else             // It can be mapped
438                {
439                    unsigned int ix1   = vpn >> 9;   // 11 bits
[427]440                    paddr_t      paddr = _ptabs_paddr[vsid][x_dest][y_dest] + (ix1<<2);
[412]441                    unsigned int pte1  = _physical_read( paddr );
[493]442
[412]443                    if ( (pte1 & PTE_V) == 0 )     // BPP not allocated yet
444                    {
445                        // allocate contiguous big physical pages
[433]446                        ppn = _get_big_ppn( palloc, npages );
[412]447                    }
448                    else                           // BPP already allocated
449                    {
[433]450                        // test if new vseg has the same mode bits than
451                        // the other vsegs in the same big page
452                        unsigned int pte1_mode = 0;
453                        if (pte1 & PTE_C) pte1_mode |= C_MODE_MASK;
454                        if (pte1 & PTE_X) pte1_mode |= X_MODE_MASK;
455                        if (pte1 & PTE_W) pte1_mode |= W_MODE_MASK;
456                        if (pte1 & PTE_U) pte1_mode |= U_MODE_MASK;
[493]457                        if (vseg->mode != pte1_mode) 
458                        {
459                            _printf("\n[BOOT ERROR] in boot_vseg_map() : "
460                                    "vseg %s has different flags than another vseg "
461                                    "in the same BPP\n", vseg->name );
[433]462                            _exit();
463                        }
[412]464                        ppn = ((pte1 << 9) & 0x0FFFFE00);
465                    }
466                }
467                ppn = ppn | (vpn & 0x1FF);
468            }
[258]469        }
[412]470        else                    // PERI : no memory allocation required
[258]471        {
[412]472            ppn = pseg->base >> 12;
[258]473        }
474    }
475
[412]476    // update vseg.pbase field and update vsegs chaining
477    vseg->pbase     = ((paddr_t)ppn) << 12;
[493]478    vseg->mapped    = 1;
[258]479
[412]480
481    //////////// Third step : (only if the vseg is a page table)
482    //////////// - compute the physical & virtual base address for each vspace
483    ////////////   by dividing the vseg in several sub-segments.
484    //////////// - register it in _ptabs_vaddr & _ptabs_paddr arrays,
[427]485    ////////////   and initialize next_pt2 allocators.
486    //////////// - reset all entries in first level page tables
[412]487   
488    if ( is_ptab )
[258]489    {
[412]490        unsigned int   vs;        // vspace index
491        unsigned int   nspaces;   // number of vspaces
492        unsigned int   nsp;       // number of small pages for one PTAB
493        unsigned int   offset;    // address offset for current PTAB
[258]494
[412]495        nspaces = header->vspaces;
496        offset  = 0;
[258]497
[412]498        // each PTAB must be aligned on a 8 Kbytes boundary
[427]499        nsp = ( vseg->length >> 12 ) / nspaces;
[412]500        if ( (nsp & 0x1) == 0x1 ) nsp = nsp - 1;
[258]501
[412]502        // compute max_pt2
503        _ptabs_max_pt2 = ((nsp<<12) - PT1_SIZE) / PT2_SIZE;
[433]504
[412]505        for ( vs = 0 ; vs < nspaces ; vs++ )
[258]506        {
[433]507            _ptabs_vaddr   [vs][x_dest][y_dest] = (vpn + offset) << 12;
[412]508            _ptabs_paddr   [vs][x_dest][y_dest] = ((paddr_t)(ppn + offset)) << 12;
509            _ptabs_next_pt2[vs][x_dest][y_dest] = 0;
[427]510            offset += nsp;
[433]511
[427]512            // reset all entries in PT1 (8 Kbytes)
513            _physical_memset( _ptabs_paddr[vs][x_dest][y_dest], PT1_SIZE, 0 );
[258]514        }
515    }
516
[493]517    asm volatile ("sync");
518
[412]519#if BOOT_DEBUG_PT
[493]520if ( big )
521_printf("\n[BOOT] vseg %s : cluster[%d,%d] / "
522       "vbase = %x / length = %x / BIG    / npages = %d / pbase = %l\n",
523       vseg->name, x_dest, y_dest, vseg->vbase, vseg->length, npages, vseg-> pbase );
524else
525_printf("\n[BOOT] vseg %s : cluster[%d,%d] / "
526        "vbase = %x / length = %x / SMALL / npages = %d / pbase = %l\n",
527       vseg->name, x_dest, y_dest, vseg->vbase, vseg->length, npages, vseg-> pbase );
[412]528#endif
529
530} // end boot_vseg_map()
531
532/////////////////////////////////////////////////////////////////////////////////////
[493]533// For the vseg defined by the vseg pointer, this function register PTEs
[412]534// in one or several page tables.
[436]535// It is a global vseg (kernel vseg) if (vspace_id == 0xFFFFFFFF).
[412]536// The number of involved PTABs depends on the "local" and "global" attributes:
537//  - PTEs are replicated in all vspaces for a global vseg.
[493]538//  - PTEs are replicated in all clusters containing procs for a non local vseg.
[412]539/////////////////////////////////////////////////////////////////////////////////////
[427]540void boot_vseg_pte( mapping_vseg_t*  vseg,
541                    unsigned int     vspace_id )
[412]542{
543    // compute the "global" vseg attribute and actual vspace index
544    unsigned int        global;
545    unsigned int        vsid;   
546    if ( vspace_id == 0xFFFFFFFF )
[258]547    {
[412]548        global = 1;
549        vsid   = 0;
[258]550    }
[412]551    else
[258]552    {
[412]553        global = 0;
554        vsid   = vspace_id;
[258]555    }
556
[412]557    // compute the "local" and "big" attributes
558    unsigned int        local  = vseg->local;
559    unsigned int        big    = vseg->big;
[258]560
[412]561    // compute vseg flags
[493]562    // The three flags (Local, Remote and Dirty) are set to 1
563    // to avoid hardware update for these flags, because GIET_VM
564    // does use these flags.
[412]565    unsigned int flags = 0;
566    if (vseg->mode & C_MODE_MASK) flags |= PTE_C;
567    if (vseg->mode & X_MODE_MASK) flags |= PTE_X;
568    if (vseg->mode & W_MODE_MASK) flags |= PTE_W;
569    if (vseg->mode & U_MODE_MASK) flags |= PTE_U;
570    if ( global )                 flags |= PTE_G;
571                                  flags |= PTE_L;
572                                  flags |= PTE_R;
573                                  flags |= PTE_D;
[258]574
[412]575    // compute VPN, PPN and number of pages (big or small)
576    unsigned int vpn     = vseg->vbase >> 12;
577    unsigned int vpn_max = (vseg->vbase + vseg->length - 1) >> 12;
578    unsigned int ppn     = (unsigned int)(vseg->pbase >> 12);
579    unsigned int npages;
580    if ( big == 0 ) npages  = vpn_max - vpn + 1;           
581    else            npages  = (vpn_max>>9) - (vpn>>9) + 1; 
582
[493]583    // compute destination cluster coordinates, for local vsegs
584    mapping_header_t*   header       = (mapping_header_t *)SEG_BOOT_MAPPING_BASE;
585    mapping_cluster_t*  cluster      = _get_cluster_base(header);
586    mapping_pseg_t*     pseg         = _get_pseg_base(header);
587    mapping_pseg_t*     pseg_dest    = &pseg[vseg->psegid];
588    mapping_cluster_t*  cluster_dest = &cluster[pseg_dest->clusterid];
589    unsigned int        x_dest       = cluster_dest->x;
590    unsigned int        y_dest       = cluster_dest->y;
[412]591
[493]592    unsigned int p;           // iterator for physical page index
593    unsigned int x;           // iterator for cluster x coordinate 
594    unsigned int y;           // iterator for cluster y coordinate 
595    unsigned int v;           // iterator for vspace index
[412]596
597    // loop on PTEs
598    for ( p = 0 ; p < npages ; p++ )
599    { 
600        if  ( (local != 0) && (global == 0) )         // one cluster  / one vspace
[258]601        {
[412]602            if ( big )   // big pages => PTE1s
603            {
604                boot_add_pte1( vsid,
605                               x_dest,
606                               y_dest,
607                               vpn + (p<<9),
608                               flags, 
609                               ppn + (p<<9) );
610            }
611            else         // small pages => PTE2s
612            {
613                boot_add_pte2( vsid,
614                               x_dest,
615                               y_dest,
616                               vpn + p,     
617                               flags, 
618                               ppn + p );
619            }
[258]620        }
[412]621        else if ( (local == 0) && (global == 0) )     // all clusters / one vspace
[258]622        {
[412]623            for ( x = 0 ; x < X_SIZE ; x++ )
[258]624            {
[412]625                for ( y = 0 ; y < Y_SIZE ; y++ )
626                {
[493]627                    if ( cluster[(x * Y_SIZE) + y].procs )
[412]628                    {
[493]629                        if ( big )   // big pages => PTE1s
630                        {
631                            boot_add_pte1( vsid,
632                                           x,
633                                           y,
634                                           vpn + (p<<9),
635                                           flags, 
636                                           ppn + (p<<9) );
637                        }
638                        else         // small pages => PTE2s
639                        {
640                            boot_add_pte2( vsid,
641                                           x,
642                                           y,
643                                           vpn + p,
644                                           flags, 
645                                           ppn + p );
646                        }
[412]647                    }
648                }
[258]649            }
[412]650        }
651        else if ( (local != 0) && (global != 0) )     // one cluster  / all vspaces
652        {
653            for ( v = 0 ; v < header->vspaces ; v++ )
[258]654            {
[412]655                if ( big )   // big pages => PTE1s
656                {
657                    boot_add_pte1( v,
658                                   x_dest,
659                                   y_dest,
660                                   vpn + (p<<9),
661                                   flags, 
662                                   ppn + (p<<9) );
663                }
664                else         // small pages = PTE2s
665                { 
666                    boot_add_pte2( v,
667                                   x_dest,
668                                   y_dest,
669                                   vpn + p,
670                                   flags, 
671                                   ppn + p );
672                }
[258]673            }
[412]674        }
675        else if ( (local == 0) && (global != 0) )     // all clusters / all vspaces
676        {
677            for ( x = 0 ; x < X_SIZE ; x++ )
[258]678            {
[412]679                for ( y = 0 ; y < Y_SIZE ; y++ )
680                {
[493]681                    if ( cluster[(x * Y_SIZE) + y].procs )
[412]682                    {
[493]683                        for ( v = 0 ; v < header->vspaces ; v++ )
[412]684                        {
[493]685                            if ( big )  // big pages => PTE1s
686                            {
687                                boot_add_pte1( v,
688                                               x,
689                                               y,
690                                               vpn + (p<<9),
691                                               flags, 
692                                               ppn + (p<<9) );
693                            }
694                            else        // small pages -> PTE2s
695                            {
696                                boot_add_pte2( v,
697                                               x,
698                                               y,
699                                               vpn + p,
700                                               flags, 
701                                               ppn + p );
702                            }
[412]703                        }
704                    }
705                }
[258]706            }
707        }
[412]708    }  // end for pages
[493]709
710    asm volatile ("sync");
711
[427]712}  // end boot_vseg_pte()
[258]713
[493]714
[412]715///////////////////////////////////////////////////////////////////////////////
[493]716// This function is executed by  processor[x][y][0] in each cluster
717// containing at least one processor.
718// It initialises all page table for all global or private vsegs
719// mapped in cluster[x][y], as specified in the mapping.
[412]720// In each cluster all page tables for the different vspaces must be
721// packed in one vseg occupying one single BPP (Big Physical Page).
[490]722//
[412]723// For each vseg, the mapping is done in two steps:
[436]724// 1) mapping : the boot_vseg_map() function allocates contiguous BPPs
[412]725//    or SPPs (if the vseg is not associated to a peripheral), and register
726//    the physical base address in the vseg pbase field. It initialises the
[493]727//    _ptabs_vaddr[] and _ptabs_paddr[] arrays if the vseg is a PTAB.
[412]728//
[436]729// 2) page table initialisation : the boot_vseg_pte() function initialise
[412]730//    the PTEs (both PTE1 and PTE2) in one or several page tables:
731//    - PTEs are replicated in all vspaces for a global vseg.
732//    - PTEs are replicated in all clusters for a non local vseg.
733//
734// We must handle vsegs in the following order
[493]735//   1) global vseg containing PTAB mapped in cluster[x][y],
736//   2) global vsegs occupying more than one BPP mapped in cluster[x][y],
737//   3) others global vsegs mapped in cluster[x][y],
738//   4) all private vsegs in all user spaces mapped in cluster[x][y].
[412]739///////////////////////////////////////////////////////////////////////////////
[493]740void boot_ptab_init( unsigned int cx,
741                     unsigned int cy ) 
[258]742{
[412]743    mapping_header_t*   header = (mapping_header_t *)SEG_BOOT_MAPPING_BASE;
744    mapping_vspace_t*   vspace = _get_vspace_base(header);
745    mapping_vseg_t*     vseg   = _get_vseg_base(header);
[490]746    mapping_cluster_t*  cluster ;
747    mapping_pseg_t*     pseg    ;
[258]748
749    unsigned int vspace_id;
750    unsigned int vseg_id;
751
[490]752    unsigned int procid     = _get_procid();
753    unsigned int lpid       = procid & ((1<<P_WIDTH)-1);
754
[493]755    if( lpid )
[490]756    {
[493]757        _printf("\n[BOOT ERROR] in boot_ptab_init() : "
758                "P[%d][%d][%d] should not execute it\n", cx, cy, lpid );
[490]759        _exit();
760    } 
761
[493]762    if ( header->vspaces == 0 )
[258]763    {
[493]764        _printf("\n[BOOT ERROR] in boot_ptab_init() : "
765                "mapping %s contains no vspace\n", header->name );
[258]766        _exit();
767    }
768
[493]769    ///////// Phase 1 : global vseg containing the PTAB (two barriers required)
[412]770
[513]771    // get PTAB global vseg in cluster(cx,cy)
[493]772    unsigned int found = 0;
[412]773    for (vseg_id = 0; vseg_id < header->globals; vseg_id++) 
774    {
[490]775        pseg    = _get_pseg_base(header) + vseg[vseg_id].psegid;
776        cluster = _get_cluster_base(header) + pseg->clusterid;
[513]777        if ( (vseg[vseg_id].type == VSEG_TYPE_PTAB) && 
[493]778             (cluster->x == cx) && (cluster->y == cy) )
[412]779        {
[493]780            found = 1;
781            break;
[412]782        }
783    }
[493]784    if ( found == 0 )
[258]785    {
[493]786        _printf("\n[BOOT ERROR] in boot_ptab_init() : "
787                "cluster[%d][%d] contains no PTAB vseg\n", cx , cy );
788        _exit();
[258]789    }
790
[493]791    boot_vseg_map( &vseg[vseg_id], 0xFFFFFFFF );
[490]792
[493]793    //////////////////////////////////////////////
794    _simple_barrier_wait( &_barrier_all_clusters );
795    //////////////////////////////////////////////
[412]796
[493]797    boot_vseg_pte( &vseg[vseg_id], 0xFFFFFFFF );
[412]798
[493]799    //////////////////////////////////////////////
800    _simple_barrier_wait( &_barrier_all_clusters );
801    //////////////////////////////////////////////
802
803    ///////// Phase 2 : global vsegs occupying more than one BPP
804
[258]805    for (vseg_id = 0; vseg_id < header->globals; vseg_id++) 
806    {
[490]807        pseg    = _get_pseg_base(header) + vseg[vseg_id].psegid;
808        cluster = _get_cluster_base(header) + pseg->clusterid;
[513]809        if ( (vseg[vseg_id].length > 0x200000) &&
[490]810             (vseg[vseg_id].mapped == 0) &&
[493]811             (cluster->x == cx) && (cluster->y == cy) )
[412]812        {
[427]813            boot_vseg_map( &vseg[vseg_id], 0xFFFFFFFF );
814            boot_vseg_pte( &vseg[vseg_id], 0xFFFFFFFF );
[412]815        }
[258]816    }
817
[493]818    ///////// Phase 3 : all others global vsegs
[347]819
[412]820    for (vseg_id = 0; vseg_id < header->globals; vseg_id++) 
[493]821    { 
[490]822        pseg    = _get_pseg_base(header) + vseg[vseg_id].psegid;
823        cluster = _get_cluster_base(header) + pseg->clusterid;
[493]824        if ( (vseg[vseg_id].mapped == 0) && 
825             (cluster->x == cx) && (cluster->y == cy) )
[412]826        {
[427]827            boot_vseg_map( &vseg[vseg_id], 0xFFFFFFFF );
828            boot_vseg_pte( &vseg[vseg_id], 0xFFFFFFFF );
[412]829        }
830    }
831
[493]832    ///////// Phase 4 : all private vsegs
[412]833
[258]834    for (vspace_id = 0; vspace_id < header->vspaces; vspace_id++) 
835    {
836        for (vseg_id = vspace[vspace_id].vseg_offset;
837             vseg_id < (vspace[vspace_id].vseg_offset + vspace[vspace_id].vsegs);
838             vseg_id++) 
839        {
[490]840            pseg    = _get_pseg_base(header) + vseg[vseg_id].psegid;
841            cluster = _get_cluster_base(header) + pseg->clusterid;
[493]842            if ( (cluster->x == cx) && (cluster->y == cy) )
[490]843            {
844                boot_vseg_map( &vseg[vseg_id], vspace_id );
845                boot_vseg_pte( &vseg[vseg_id], vspace_id );
846            }
[258]847        }
848    }
849
[493]850    //////////////////////////////////////////////
851    _simple_barrier_wait( &_barrier_all_clusters );
852    //////////////////////////////////////////////
[258]853
[493]854} // end boot_ptab_init()
[258]855
[493]856////////////////////////////////////////////////////////////////////////////////
857// This function should be executed by P[0][0][0] only. It complete the
858// page table initialisation, taking care of all global vsegs that are
859// not mapped in a cluster containing a processor, and have not been
860// handled by the boot_ptab_init(x,y) function.
861// An example of such vsegs are the external peripherals in TSAR_LETI platform.
862////////////////////////////////////////////////////////////////////////////////
863void boot_ptab_extend()
[258]864{
865
[493]866    mapping_header_t*   header = (mapping_header_t *)SEG_BOOT_MAPPING_BASE;
867    mapping_vseg_t*     vseg   = _get_vseg_base(header);
[258]868
[493]869    unsigned int vseg_id;
870
871    for (vseg_id = 0; vseg_id < header->globals; vseg_id++) 
[258]872    {
[493]873        if ( vseg[vseg_id].mapped == 0 ) 
[258]874        {
[493]875            boot_vseg_map( &vseg[vseg_id], 0xFFFFFFFF );
876            boot_vseg_pte( &vseg[vseg_id], 0xFFFFFFFF );
[452]877        }
878    }
[493]879}  // end boot_ptab_extend()
[258]880
881///////////////////////////////////////////////////////////////////////////////
882// This function returns in the vbase and length buffers the virtual base
883// address and the length of the  segment allocated to the schedulers array
884// in the cluster defined by the clusterid argument.
885///////////////////////////////////////////////////////////////////////////////
886void boot_get_sched_vaddr( unsigned int  cluster_id,
887                           unsigned int* vbase, 
888                           unsigned int* length )
889{
[321]890    mapping_header_t* header = (mapping_header_t *)SEG_BOOT_MAPPING_BASE;
[258]891    mapping_vseg_t*   vseg   = _get_vseg_base(header);
892    mapping_pseg_t*   pseg   = _get_pseg_base(header);
893
894    unsigned int vseg_id;
895    unsigned int found = 0;
896
897    for ( vseg_id = 0 ; (vseg_id < header->vsegs) && (found == 0) ; vseg_id++ )
898    {
[513]899        if ( (vseg[vseg_id].type == VSEG_TYPE_SCHED) && 
[263]900             (pseg[vseg[vseg_id].psegid].clusterid == cluster_id ) )
[258]901        {
902            *vbase  = vseg[vseg_id].vbase;
[513]903            *length = vseg[vseg_id].length;
[258]904            found = 1;
905        }
906    }
907    if ( found == 0 )
908    {
[263]909        mapping_cluster_t* cluster = _get_cluster_base(header);
[513]910        _printf("\n[BOOT ERROR] No vseg of type SCHED in cluster [%d,%d]\n",
[493]911                cluster[cluster_id].x, cluster[cluster_id].y );
[258]912        _exit();
913    }
914} // end boot_get_sched_vaddr()
915
[527]916#if BOOT_DEBUG_SCHED
917/////////////////////////////////////////////////////////////////////////////
918// This debug function should be executed by only one procesor.
919// It loops on all processors in all clusters to display
920// the HWI / PTI / WTI interrupt vectors for each processor.
921/////////////////////////////////////////////////////////////////////////////
922void boot_sched_irq_display()
923{
924    unsigned int         cx;
925    unsigned int         cy;
926    unsigned int         lpid;
927    unsigned int         slot;
928    unsigned int         entry;
[564]929    unsigned int         type;
930    unsigned int         channel;
[527]931
932    mapping_header_t*    header  = (mapping_header_t *)SEG_BOOT_MAPPING_BASE;
933    mapping_cluster_t*   cluster = _get_cluster_base(header);
934
935    static_scheduler_t*  psched; 
936
937    for ( cx = 0 ; cx < X_SIZE ; cx++ )
938    {
939        for ( cy = 0 ; cy < Y_SIZE ; cy++ )
940        {
941            unsigned int cluster_id = (cx * Y_SIZE) + cy;
942            unsigned int nprocs = cluster[cluster_id].procs;
943
944            for ( lpid = 0 ; lpid < nprocs ; lpid++ )
945            {
946                psched = _schedulers[cx][cy][lpid];
947       
[564]948                _printf("\n[BOOT] interrupt vectors for proc[%d,%d,%d]\n",
[527]949                        cx , cy , lpid );
950
951                for ( slot = 0 ; slot < 32 ; slot++ )
952                {
[564]953                    entry   = psched->hwi_vector[slot];
954                    type    = entry & 0xFFFF;
955                    channel = entry >> 16;
956                    if ( type != ISR_DEFAULT )     
957                    _printf(" - HWI : index = %d / type = %s / channel = %d\n",
958                            slot , _isr_type_str[type] , channel );
[527]959                }
960                for ( slot = 0 ; slot < 32 ; slot++ )
961                {
[564]962                    entry   = psched->wti_vector[slot];
963                    type    = entry & 0xFFFF;
964                    channel = entry >> 16;
965                    if ( type != ISR_DEFAULT )     
966                    _printf(" - WTI : index = %d / type = %s / channel = %d\n",
967                            slot , _isr_type_str[type] , channel );
[527]968                }
969                for ( slot = 0 ; slot < 32 ; slot++ )
970                {
[564]971                    entry   = psched->pti_vector[slot];
972                    type    = entry & 0xFFFF;
973                    channel = entry >> 16;
974                    if ( type != ISR_DEFAULT )     
975                    _printf(" - PTI : index = %d / type = %s / channel = %d\n",
976                            slot , _isr_type_str[type] , channel );
[527]977                }
978            }
979        }
980    } 
981}  // end boot_sched_irq_display()
982#endif
983
984
[258]985////////////////////////////////////////////////////////////////////////////////////
[493]986// This function is executed in parallel by all processors P[x][y][0].
987// It initialises all schedulers in cluster [x][y]. The MMU must be activated.
988// It is split in two phases separated by a synchronisation barrier.
[527]989// - In Step 1, it initialises the _schedulers[x][y][l] pointers array, the
990//              idle_task context, the  HWI / PTI / WTI interrupt vectors,
991//              and the CU HWI / PTI / WTI masks.
[321]992// - In Step 2, it scan all tasks in all vspaces to complete the tasks contexts,
[493]993//              initialisation as specified in the mapping_info data structure,
994//              and set the CP0_SCHED register.
[258]995////////////////////////////////////////////////////////////////////////////////////
[493]996void boot_scheduler_init( unsigned int x, 
997                          unsigned int y )
[258]998{
[493]999    mapping_header_t*    header  = (mapping_header_t *)SEG_BOOT_MAPPING_BASE;
1000    mapping_cluster_t*   cluster = _get_cluster_base(header);
1001    mapping_vspace_t*    vspace  = _get_vspace_base(header);
[513]1002    mapping_vseg_t*      vseg    = _get_vseg_base(header);
[493]1003    mapping_task_t*      task    = _get_task_base(header);
1004    mapping_periph_t*    periph  = _get_periph_base(header);
1005    mapping_irq_t*       irq     = _get_irq_base(header);
[258]1006
[493]1007    unsigned int         periph_id; 
1008    unsigned int         irq_id;
1009    unsigned int         vspace_id;
[513]1010    unsigned int         vseg_id;
[493]1011    unsigned int         task_id; 
[258]1012
[493]1013    unsigned int         sched_vbase;          // schedulers array vbase address
1014    unsigned int         sched_length;         // schedulers array length
1015    static_scheduler_t*  psched;               // pointer on processor scheduler
[321]1016
[527]1017    unsigned int cluster_id = (x * Y_SIZE) + y;
1018    unsigned int cluster_xy = (x << Y_WIDTH) + y; 
[493]1019    unsigned int nprocs = cluster[cluster_id].procs;
1020    unsigned int lpid;                       
1021   
[527]1022    if ( nprocs > 8 )
1023    {
1024        _printf("\n[BOOT ERROR] cluster[%d,%d] contains more than 8 procs\n", x, y );
1025        _exit();
1026    }
[258]1027
[527]1028    ////////////////////////////////////////////////////////////////////////////////
1029    // Step 1 : - initialize the schedulers[] array of pointers,
1030    //          - initialize the "tasks" and "current variables.
1031    //          - initialise the idle task context.
1032    //          - initialize the HWI, PTI and WTI interrupt vectors.
1033    //          - initialize the XCU masks for HWI / WTI / PTI interrupts.
1034    //
1035    // The general policy for interrupts routing is the following:         
1036    //          - the local HWI are statically allocatedted to local processors.
1037    //          - the nprocs first PTI are allocated for TICK (one per processor).
1038    //          - we allocate 4 WTI per processor: the first one is for WAKUP,
1039    //            the 3 others WTI are used for external interrupts (from PIC),
1040    //            and are dynamically allocated by kernel on demand.
1041    ///////////////////////////////////////////////////////////////////////////////
1042
[493]1043    // get scheduler array virtual base address in cluster[x,y]
1044    boot_get_sched_vaddr( cluster_id, &sched_vbase, &sched_length );
[321]1045
[493]1046    if ( sched_length < (nprocs<<13) ) // 8 Kbytes per scheduler
1047    {
[527]1048        _printf("\n[BOOT ERROR] Sched segment too small in cluster[%d,%d]\n",
1049                x, y );
[493]1050        _exit();
1051    }
[321]1052
[493]1053    // loop on local processors
1054    for ( lpid = 0 ; lpid < nprocs ; lpid++ )
[258]1055    {
[493]1056        // get scheduler pointer and initialise the schedulers pointers array
1057        psched = (static_scheduler_t*)(sched_vbase + (lpid<<13));
1058        _schedulers[x][y][lpid] = psched;
[258]1059
[493]1060        // initialise the "tasks" and "current" variables default values
1061        psched->tasks   = 0;
1062        psched->current = IDLE_TASK_INDEX;
[258]1063
[527]1064        // set default values for HWI / PTI / SWI vectors (valid bit = 0)
[493]1065        unsigned int slot;
1066        for (slot = 0; slot < 32; slot++)
[258]1067        {
[493]1068            psched->hwi_vector[slot] = 0;
1069            psched->pti_vector[slot] = 0;
1070            psched->wti_vector[slot] = 0;
[258]1071        }
[493]1072
1073        // initializes the idle_task context:
1074        // - the SR slot is 0xFF03 because this task run in kernel mode.
1075        // - it uses the page table of vspace[0]
1076        // - it uses the kernel TTY terminal
1077        // - slots containing addresses (SP,RA,EPC) are initialised by kernel_init()
1078
1079        psched->context[IDLE_TASK_INDEX][CTX_CR_ID]   = 0;
1080        psched->context[IDLE_TASK_INDEX][CTX_SR_ID]   = 0xFF03;
1081        psched->context[IDLE_TASK_INDEX][CTX_PTPR_ID] = _ptabs_paddr[0][x][y]>>13;
1082        psched->context[IDLE_TASK_INDEX][CTX_PTAB_ID] = _ptabs_vaddr[0][x][y];
1083        psched->context[IDLE_TASK_INDEX][CTX_TTY_ID]  = 0;
1084        psched->context[IDLE_TASK_INDEX][CTX_LTID_ID] = IDLE_TASK_INDEX;
1085        psched->context[IDLE_TASK_INDEX][CTX_VSID_ID] = 0;
1086        psched->context[IDLE_TASK_INDEX][CTX_RUN_ID]  = 1;
1087    }
1088
[527]1089    // HWI / PTI / WTI masks (up to 8 local processors)
1090    unsigned int hwi_mask[8] = {0,0,0,0,0,0,0,0};
1091    unsigned int pti_mask[8] = {0,0,0,0,0,0,0,0};
1092    unsigned int wti_mask[8] = {0,0,0,0,0,0,0,0};
1093
1094    // scan local peripherals to get and check local XCU
[493]1095    mapping_periph_t*  xcu = NULL;
[527]1096    unsigned int       min = cluster[cluster_id].periph_offset ;
1097    unsigned int       max = min + cluster[cluster_id].periphs ;
[493]1098
[527]1099    for ( periph_id = min ; periph_id < max ; periph_id++ )
[493]1100    {
1101        if( periph[periph_id].type == PERIPH_TYPE_XCU ) 
[258]1102        {
[493]1103            xcu = &periph[periph_id];
[258]1104
[527]1105            // check nb_hwi_in
1106            if ( xcu->arg0 < xcu->irqs )
[295]1107            {
[538]1108                _printf("\n[BOOT ERROR] Not enough HWI inputs for XCU[%d,%d]"
1109                        " : nb_hwi = %d / nb_irqs = %d\n",
1110                         x , y , xcu->arg0 , xcu->irqs );
[295]1111                _exit();
1112            }
[527]1113            // check nb_pti_in
1114            if ( xcu->arg2 < nprocs )
1115            {
1116                _printf("\n[BOOT ERROR] Not enough PTI inputs for XCU[%d,%d]\n",
1117                         x, y );
1118                _exit();
1119            }
1120            // check nb_wti_in
1121            if ( xcu->arg1 < (4 * nprocs) )
1122            {
1123                _printf("\n[BOOT ERROR] Not enough WTI inputs for XCU[%d,%d]\n",
1124                        x, y );
1125                _exit();
1126            }
1127            // check nb_irq_out
[538]1128            if ( xcu->channels < (nprocs * header->irq_per_proc) )
[527]1129            {
1130                _printf("\n[BOOT ERROR] Not enough outputs for XCU[%d,%d]\n",
1131                        x, y );
1132                _exit();
1133            }
[493]1134        }
1135    } 
[263]1136
[493]1137    if ( xcu == NULL )
1138    {         
1139        _printf("\n[BOOT ERROR] missing XCU in cluster[%d,%d]\n", x , y );
1140        _exit();
1141    }
[321]1142
[527]1143    // HWI interrupt vector definition
1144    // scan HWI connected to local XCU
[493]1145    // for round-robin allocation to local processors
1146    lpid = 0;
1147    for ( irq_id = xcu->irq_offset ;
1148          irq_id < xcu->irq_offset + xcu->irqs ;
1149          irq_id++ )
1150    {
1151        unsigned int type    = irq[irq_id].srctype;
1152        unsigned int srcid   = irq[irq_id].srcid;
1153        unsigned int isr     = irq[irq_id].isr & 0xFFFF;
1154        unsigned int channel = irq[irq_id].channel << 16;
[321]1155
[493]1156        if ( (type != IRQ_TYPE_HWI) || (srcid > 31) )
1157        {
1158            _printf("\n[BOOT ERROR] Bad IRQ in cluster[%d,%d]\n", x, y );
1159            _exit();
1160        }
[295]1161
[527]1162        // register entry in HWI interrupt vector
1163        _schedulers[x][y][lpid]->hwi_vector[srcid] = isr | channel;
[295]1164
[527]1165        // update XCU HWI mask for P[x,y,lpid]
1166        hwi_mask[lpid] = hwi_mask[lpid] | (1<<srcid);
1167
[493]1168        lpid = (lpid + 1) % nprocs; 
1169    } // end for irqs
[412]1170
[527]1171    // PTI interrupt vector definition
1172    // one PTI for TICK per processor
1173    for ( lpid = 0 ; lpid < nprocs ; lpid++ )
1174    {
1175        // register entry in PTI interrupt vector
1176        _schedulers[x][y][lpid]->pti_vector[lpid] = ISR_TICK;
1177
1178        // update XCU PTI mask for P[x,y,lpid]
1179        pti_mask[lpid] = pti_mask[lpid] | (1<<lpid);
1180    }
1181
1182    // WTI interrupt vector definition
1183    // 4 WTI per processor, first for WAKUP
1184    for ( lpid = 0 ; lpid < nprocs ; lpid++ )
1185    {
1186        // register WAKUP ISR in WTI interrupt vector
[552]1187        _schedulers[x][y][lpid]->wti_vector[lpid] = ISR_WAKUP;
[527]1188
1189        // update XCU WTI mask for P[x,y,lpid] (4 entries per proc)
1190        wti_mask[lpid] = wti_mask[lpid] | (0x1<<(lpid                 ));
1191        wti_mask[lpid] = wti_mask[lpid] | (0x1<<(lpid + NB_PROCS_MAX  ));
1192        wti_mask[lpid] = wti_mask[lpid] | (0x1<<(lpid + 2*NB_PROCS_MAX));
1193        wti_mask[lpid] = wti_mask[lpid] | (0x1<<(lpid + 3*NB_PROCS_MAX));
1194    }
1195
1196    // set the XCU masks for HWI / WTI / PTI interrupts
1197    for ( lpid = 0 ; lpid < nprocs ; lpid++ )
1198    {
1199        unsigned int channel = lpid * IRQ_PER_PROCESSOR; 
1200
1201        _xcu_set_mask( cluster_xy, channel, hwi_mask[lpid], IRQ_TYPE_HWI ); 
1202        _xcu_set_mask( cluster_xy, channel, wti_mask[lpid], IRQ_TYPE_WTI );
1203        _xcu_set_mask( cluster_xy, channel, pti_mask[lpid], IRQ_TYPE_PTI );
1204    }
1205
[493]1206    //////////////////////////////////////////////
1207    _simple_barrier_wait( &_barrier_all_clusters );
1208    //////////////////////////////////////////////
[412]1209
[527]1210#if BOOT_DEBUG_SCHED
1211if ( cluster_xy == 0 ) boot_sched_irq_display();
1212_simple_barrier_wait( &_barrier_all_clusters );
1213#endif
1214
1215    ///////////////////////////////////////////////////////////////////////////////
[493]1216    // Step 2 : Initialise the tasks context. The context of task placed
1217    //          on  processor P must be stored in the scheduler of P.
1218    //          This require two nested loops: loop on the tasks, and loop
1219    //          on the local processors. We complete the scheduler when the
1220    //          required placement fit one local processor.
[527]1221    ///////////////////////////////////////////////////////////////////////////////
[412]1222
[493]1223    for (vspace_id = 0; vspace_id < header->vspaces; vspace_id++) 
1224    {
1225        // We must set the PTPR depending on the vspace, because the start_vector
1226        // and the stack address are defined in virtual space.
1227        _set_mmu_ptpr( (unsigned int)(_ptabs_paddr[vspace_id][x][y] >> 13) );
[258]1228
[493]1229        // loop on the tasks in vspace (task_id is the global index in mapping)
1230        for (task_id = vspace[vspace_id].task_offset;
1231             task_id < (vspace[vspace_id].task_offset + vspace[vspace_id].tasks);
1232             task_id++) 
1233        {
1234            // get the required task placement coordinates [x,y,p]
1235            unsigned int req_x      = cluster[task[task_id].clusterid].x;
1236            unsigned int req_y      = cluster[task[task_id].clusterid].y;
1237            unsigned int req_p      = task[task_id].proclocid;                 
[258]1238
[493]1239            // ctx_sr : value required before an eret instruction
1240            unsigned int ctx_sr = 0x2000FF13;
[321]1241
[493]1242            // ctx_ptpr : page table physical base address (shifted by 13 bit)
1243            unsigned int ctx_ptpr = (_ptabs_paddr[vspace_id][req_x][req_y] >> 13);
[258]1244
[493]1245            // ctx_ptab : page_table virtual base address
1246            unsigned int ctx_ptab = _ptabs_vaddr[vspace_id][req_x][req_y];
[412]1247
[493]1248            // ctx_epc : Get the virtual address of the memory location containing
[527]1249            // the task entry point : the start_vector is stored by GCC in the
1250            // seg_data segment, and we must wait the .elf loading to get
1251            // the entry point value...
[513]1252            vseg_id = vspace[vspace_id].start_vseg_id;     
1253            unsigned int ctx_epc = vseg[vseg_id].vbase + (task[task_id].startid)*4;
[258]1254
[513]1255            // ctx_sp :  Get the vseg containing the stack
1256            vseg_id = task[task_id].stack_vseg_id;
1257            unsigned int ctx_sp = vseg[vseg_id].vbase + vseg[vseg_id].length;
[493]1258
1259            // get vspace thread index
1260            unsigned int thread_id = task[task_id].trdid;
1261
1262            // loop on the local processors
1263            for ( lpid = 0 ; lpid < nprocs ; lpid++ )
[258]1264            {
[493]1265                if ( (x == req_x) && (y == req_y) && (req_p == lpid) )   // fit
[295]1266                {
[493]1267                    // pointer on selected scheduler
1268                    psched = _schedulers[x][y][lpid];
[258]1269
[493]1270                    // get local task index in scheduler
1271                    unsigned int ltid = psched->tasks;
[258]1272
[493]1273                    // update the "tasks" and "current" fields in scheduler:
1274                    psched->tasks   = ltid + 1;
1275                    psched->current = 0;
[258]1276
[493]1277                    // initializes the task context
1278                    psched->context[ltid][CTX_CR_ID]     = 0;
1279                    psched->context[ltid][CTX_SR_ID]     = ctx_sr;
1280                    psched->context[ltid][CTX_SP_ID]     = ctx_sp;
1281                    psched->context[ltid][CTX_EPC_ID]    = ctx_epc;
1282                    psched->context[ltid][CTX_PTPR_ID]   = ctx_ptpr;
1283                    psched->context[ltid][CTX_PTAB_ID]   = ctx_ptab;
1284                    psched->context[ltid][CTX_LTID_ID]   = ltid;
1285                    psched->context[ltid][CTX_GTID_ID]   = task_id;
1286                    psched->context[ltid][CTX_TRDID_ID]  = thread_id;
1287                    psched->context[ltid][CTX_VSID_ID]   = vspace_id;
1288                    psched->context[ltid][CTX_RUN_ID]    = 1;
[321]1289
[493]1290                    psched->context[ltid][CTX_TTY_ID]    = 0xFFFFFFFF;
1291                    psched->context[ltid][CTX_CMA_FB_ID] = 0xFFFFFFFF;
1292                    psched->context[ltid][CTX_CMA_RX_ID] = 0xFFFFFFFF;
1293                    psched->context[ltid][CTX_CMA_TX_ID] = 0xFFFFFFFF;
1294                    psched->context[ltid][CTX_NIC_RX_ID] = 0xFFFFFFFF;
1295                    psched->context[ltid][CTX_NIC_TX_ID] = 0xFFFFFFFF;
1296                    psched->context[ltid][CTX_TIM_ID]    = 0xFFFFFFFF;
1297                    psched->context[ltid][CTX_HBA_ID]    = 0xFFFFFFFF;
1298
1299#if BOOT_DEBUG_SCHED
1300_printf("\nTask %s in vspace %s allocated to P[%d,%d,%d]\n"
1301        " - ctx[LTID]  = %d\n"
1302        " - ctx[SR]    = %x\n"
1303        " - ctx[SP]    = %x\n"
1304        " - ctx[EPC]   = %x\n"
1305        " - ctx[PTPR]  = %x\n"
1306        " - ctx[PTAB]  = %x\n"
1307        " - ctx[VSID]  = %d\n"
1308        " - ctx[TRDID] = %d\n",
1309        task[task_id].name,
1310        vspace[vspace_id].name,
1311        x, y, lpid,
1312        psched->context[ltid][CTX_LTID_ID],
1313        psched->context[ltid][CTX_SR_ID],
1314        psched->context[ltid][CTX_SP_ID],
1315        psched->context[ltid][CTX_EPC_ID],
1316        psched->context[ltid][CTX_PTPR_ID],
1317        psched->context[ltid][CTX_PTAB_ID],
1318        psched->context[ltid][CTX_VSID_ID],
1319        psched->context[ltid][CTX_TRDID_ID] );
1320#endif
1321                } // end if FIT
1322            } // end for loop on local procs
1323        } // end loop on tasks
1324    } // end loop on vspaces
1325} // end boot_scheduler_init()
1326
1327
1328
[258]1329//////////////////////////////////////////////////////////////////////////////////
1330// This function loads the map.bin file from block device.
1331//////////////////////////////////////////////////////////////////////////////////
1332void boot_mapping_init()
1333{
[590]1334    // load map.bin file into buffer
1335    if ( _fat_load_no_cache( "map.bin",
1336                             SEG_BOOT_MAPPING_BASE,
1337                             SEG_BOOT_MAPPING_SIZE ) )
[258]1338    {
[493]1339        _printf("\n[BOOT ERROR] : map.bin file not found \n");
[258]1340        _exit();
1341    }
1342
[477]1343    // check mapping signature, number of clusters, number of vspaces 
1344    mapping_header_t * header = (mapping_header_t *)SEG_BOOT_MAPPING_BASE;
1345    if ( (header->signature != IN_MAPPING_SIGNATURE) ||
1346         (header->x_size    != X_SIZE)               || 
1347         (header->y_size    != Y_SIZE)               ||
1348         (header->vspaces   > GIET_NB_VSPACE_MAX)    )
1349    {
[590]1350        _printf("\n[BOOT ERROR] Illegal mapping : signature = %x\n", header->signature );
[524]1351        _exit();
1352    }
[477]1353
1354#if BOOT_DEBUG_MAPPING
1355unsigned int  line;
1356unsigned int* pointer = (unsigned int*)SEG_BOOT_MAPPING_BASE;
[493]1357_printf("\n[BOOT] First block of mapping\n");
[477]1358for ( line = 0 ; line < 8 ; line++ )
1359{
[590]1360    _printf(" | %X | %X | %X | %X | %X | %X | %X | %X |\n",
[493]1361            *(pointer + 0),
1362            *(pointer + 1),
1363            *(pointer + 2),
1364            *(pointer + 3),
1365            *(pointer + 4),
1366            *(pointer + 5),
1367            *(pointer + 6),
1368            *(pointer + 7) );
1369
[477]1370    pointer = pointer + 8;
1371}
1372#endif
1373
[258]1374} // end boot_mapping_init()
1375
1376
[557]1377///////////////////////////////////////////////////
1378void boot_dma_copy( unsigned int        cluster_xy,     
1379                    unsigned long long  dst_paddr,
1380                    unsigned long long  src_paddr, 
1381                    unsigned int        size )   
1382{
1383    // size must be multiple of 64 bytes
1384    if ( size & 0x3F ) size = (size & (~0x3F)) + 0x40;
1385
1386    unsigned int mode = MODE_DMA_NO_IRQ;
1387
1388    unsigned int src     = 0;
1389    unsigned int src_lsb = (unsigned int)src_paddr;
1390    unsigned int src_msb = (unsigned int)(src_paddr>>32);
1391   
1392    unsigned int dst     = 1;
1393    unsigned int dst_lsb = (unsigned int)dst_paddr;
1394    unsigned int dst_msb = (unsigned int)(dst_paddr>>32);
1395
1396    // initializes src channel
1397    _mwr_set_channel_register( cluster_xy , src , MWR_CHANNEL_MODE       , mode );
1398    _mwr_set_channel_register( cluster_xy , src , MWR_CHANNEL_SIZE       , size );
1399    _mwr_set_channel_register( cluster_xy , src , MWR_CHANNEL_BUFFER_LSB , src_lsb );
1400    _mwr_set_channel_register( cluster_xy , src , MWR_CHANNEL_BUFFER_MSB , src_msb );
1401    _mwr_set_channel_register( cluster_xy , src , MWR_CHANNEL_RUNNING    , 1 );
1402
1403    // initializes dst channel
1404    _mwr_set_channel_register( cluster_xy , dst , MWR_CHANNEL_MODE       , mode );
1405    _mwr_set_channel_register( cluster_xy , dst , MWR_CHANNEL_SIZE       , size );
1406    _mwr_set_channel_register( cluster_xy , dst , MWR_CHANNEL_BUFFER_LSB , dst_lsb );
1407    _mwr_set_channel_register( cluster_xy , dst , MWR_CHANNEL_BUFFER_MSB , dst_msb );
1408    _mwr_set_channel_register( cluster_xy , dst , MWR_CHANNEL_RUNNING    , 1 );
1409
1410    // start CPY coprocessor (write non-zero value into config register)
1411    _mwr_set_coproc_register( cluster_xy, 0 , 1 );
1412
1413    // poll dst channel status register to detect completion
1414    unsigned int status;
1415    do
1416    {
1417        status = _mwr_get_channel_register( cluster_xy , dst , MWR_CHANNEL_STATUS );
1418    } while ( status == MWR_CHANNEL_BUSY );
1419
1420    if ( status )
1421    {
1422        _printf("\n[BOOT ERROR] in boot_dma_copy()\n");
1423        _exit();
1424    } 
1425 
1426    // stop CPY coprocessor and DMA channels
1427    _mwr_set_channel_register( cluster_xy , src , MWR_CHANNEL_RUNNING    , 0 );
1428    _mwr_set_channel_register( cluster_xy , dst , MWR_CHANNEL_RUNNING    , 0 );
1429    _mwr_set_coproc_register ( cluster_xy , 0 , 0 );
1430
1431}  // end boot_dma_copy()
1432
[527]1433//////////////////////////////////////////////////////////////////////////////////
1434// This function load all loadable segments contained in the .elf file identified
[347]1435// by the "pathname" argument. Some loadable segments can be copied in several
1436// clusters: same virtual address but different physical addresses. 
1437// - It open the file.
[527]1438// - It loads the complete file in the dedicated _boot_elf_buffer.
[359]1439// - It copies each loadable segments  at the virtual address defined in
1440//   the .elf file, making several copies if the target vseg is not local.
[347]1441// - It closes the file.
[527]1442// This function is supposed to be executed by all processors[x,y,0].
1443//
1444// Note: We must use physical addresses to reach the destination buffers that
1445// can be located in remote clusters. We use either a _physical_memcpy(),
1446// or a _dma_physical_copy() if DMA is available.
1447//////////////////////////////////////////////////////////////////////////////////
[347]1448void load_one_elf_file( unsigned int is_kernel,     // kernel file if non zero
[258]1449                        char*        pathname,
[347]1450                        unsigned int vspace_id )    // to scan the proper vspace
[258]1451{
[347]1452    mapping_header_t  * header  = (mapping_header_t *)SEG_BOOT_MAPPING_BASE;
1453    mapping_vspace_t  * vspace  = _get_vspace_base(header);
1454    mapping_vseg_t    * vseg    = _get_vseg_base(header);
1455
[527]1456    unsigned int procid = _get_procid();
1457    unsigned int cxy    = procid >> P_WIDTH;
1458    unsigned int x      = cxy >> Y_WIDTH;
1459    unsigned int y      = cxy & ((1<<Y_WIDTH)-1);
1460    unsigned int p      = procid & ((1<<P_WIDTH)-1);
[258]1461
1462#if BOOT_DEBUG_ELF
[557]1463_printf("\n[DEBUG BOOT_ELF] load_one_elf_file() : P[%d,%d,%d] enters for %s\n",
[527]1464        x , y , p , pathname );
[258]1465#endif
1466
[527]1467    Elf32_Ehdr* elf_header_ptr = NULL;  //  avoid a warning
[258]1468
[590]1469    // only P[0,0,0] load file
[527]1470    if ( (cxy == 0) && (p == 0) )
[258]1471    {
[590]1472        if ( _fat_load_no_cache( pathname,
1473                                 (unsigned int)_boot_elf_buffer,
1474                                 GIET_ELF_BUFFER_SIZE ) )
[527]1475        {
[590]1476            _printf("\n[BOOT ERROR] in load_one_elf_file() : %s\n", pathname );
[527]1477            _exit();
1478        }
[258]1479
[527]1480        // Check ELF Magic Number in ELF header
1481        Elf32_Ehdr* ptr = (Elf32_Ehdr*)_boot_elf_buffer;
1482
1483        if ( (ptr->e_ident[EI_MAG0] != ELFMAG0) ||
1484             (ptr->e_ident[EI_MAG1] != ELFMAG1) ||
1485             (ptr->e_ident[EI_MAG2] != ELFMAG2) ||
1486             (ptr->e_ident[EI_MAG3] != ELFMAG3) )
1487        {
[557]1488            _printf("\n[BOOT ERROR] load_one_elf_file() : %s not ELF format\n",
[527]1489                    pathname );
1490            _exit();
1491        }
1492
1493#if BOOT_DEBUG_ELF
[557]1494_printf("\n[DEBUG BOOT_ELF] load_one_elf_file() : P[%d,%d,%d] load %s at cycle %d\n", 
1495        x , y , p , pathname , _get_proctime() );
[527]1496#endif
1497
1498    } // end if P[0,0,0]
1499
1500    //////////////////////////////////////////////
1501    _simple_barrier_wait( &_barrier_all_clusters );
1502    //////////////////////////////////////////////
1503
1504    // Each processor P[x,y,0] copy replicated segments in cluster[x,y]
1505    elf_header_ptr = (Elf32_Ehdr*)_boot_elf_buffer;
1506
[258]1507    // get program header table pointer
[527]1508    unsigned int offset = elf_header_ptr->e_phoff;
1509    if( offset == 0 )
[258]1510    {
[493]1511        _printf("\n[BOOT ERROR] load_one_elf_file() : file %s "
1512                "does not contain loadable segment\n", pathname );
[258]1513        _exit();
1514    }
1515
[527]1516    Elf32_Phdr* elf_pht_ptr = (Elf32_Phdr*)(_boot_elf_buffer + offset);
1517
[258]1518    // get number of segments
1519    unsigned int nsegments   = elf_header_ptr->e_phnum;
1520
[527]1521    // First loop on loadable segments in the .elf file
1522    unsigned int seg_id;
[258]1523    for (seg_id = 0 ; seg_id < nsegments ; seg_id++)
1524    {
1525        if(elf_pht_ptr[seg_id].p_type == PT_LOAD)
1526        {
1527            // Get segment attributes
1528            unsigned int seg_vaddr  = elf_pht_ptr[seg_id].p_vaddr;
1529            unsigned int seg_offset = elf_pht_ptr[seg_id].p_offset;
1530            unsigned int seg_filesz = elf_pht_ptr[seg_id].p_filesz;
1531            unsigned int seg_memsz  = elf_pht_ptr[seg_id].p_memsz;
1532
[527]1533            if( seg_memsz != seg_filesz )
[258]1534            {
[527]1535                _printf("\n[BOOT ERROR] load_one_elf_file() : segment at vaddr = %x\n"
1536                        " in file %s has memsize = %x / filesize = %x \n"
1537                        " check that all global variables are in data segment\n", 
1538                        seg_vaddr, pathname , seg_memsz , seg_filesz );
[258]1539                _exit();
1540            }
1541
[527]1542            unsigned int src_vaddr = (unsigned int)_boot_elf_buffer + seg_offset;
[258]1543
[347]1544            // search all vsegs matching the virtual address
1545            unsigned int vseg_first;
1546            unsigned int vseg_last;
1547            unsigned int vseg_id;
1548            unsigned int found = 0;
1549            if ( is_kernel )
1550            {
1551                vseg_first = 0;
1552                vseg_last  = header->globals;
1553            }
1554            else
1555            {
1556                vseg_first = vspace[vspace_id].vseg_offset;
1557                vseg_last  = vseg_first + vspace[vspace_id].vsegs;
1558            }
1559
[527]1560            // Second loop on vsegs in the mapping
[347]1561            for ( vseg_id = vseg_first ; vseg_id < vseg_last ; vseg_id++ )
1562            {
1563                if ( seg_vaddr == vseg[vseg_id].vbase )  // matching
1564                {
1565                    found = 1;
1566
[527]1567                    // get destination buffer physical address, size, coordinates
[347]1568                    paddr_t      seg_paddr  = vseg[vseg_id].pbase;
[513]1569                    unsigned int seg_size   = vseg[vseg_id].length;
[557]1570                    unsigned int cluster_xy = (unsigned int)(seg_paddr>>32);
1571                    unsigned int cx         = cluster_xy >> Y_WIDTH;
1572                    unsigned int cy         = cluster_xy & ((1<<Y_WIDTH)-1);
[527]1573
[347]1574                    // check vseg size
1575                    if ( seg_size < seg_filesz )
1576                    {
[493]1577                        _printf("\n[BOOT ERROR] in load_one_elf_file() : vseg %s "
[590]1578                                "is too small for segment %x\n"
1579                                "  file = %s / vseg_size = %x / seg_file_size = %x\n",
1580                                vseg[vseg_id].name , seg_vaddr , pathname,
1581                                seg_size , seg_filesz );
[347]1582                        _exit();
1583                    }
[258]1584
[527]1585                    // P[x,y,0] copy the segment from boot buffer in cluster[0,0]
[557]1586                    // to destination buffer in cluster[x,y], using DMA if available
[527]1587                    if ( (cx == x) && (cy == y) )
[347]1588                    {
[557]1589                        if( USE_MWR_CPY )
[527]1590                        {
[557]1591                            boot_dma_copy( cluster_xy,  // DMA in cluster[x,y]       
1592                                           seg_paddr,
1593                                           (paddr_t)src_vaddr, 
1594                                           seg_filesz );   
1595#if BOOT_DEBUG_ELF
1596_printf("\n[DEBUG BOOT_ELF] load_one_elf_file() : DMA[%d,%d] copy segment %d :\n"
1597        "  vaddr = %x / size = %x / paddr = %l\n",
1598        x , y , seg_id , seg_vaddr , seg_memsz , seg_paddr );
1599#endif
[527]1600                        }
1601                        else
1602                        {
1603                            _physical_memcpy( seg_paddr,            // dest paddr
1604                                              (paddr_t)src_vaddr,   // source paddr
1605                                              seg_filesz );         // size
1606#if BOOT_DEBUG_ELF
[557]1607_printf("\n[DEBUG BOOT_ELF] load_one_elf_file() : P[%d,%d,%d] copy segment %d :\n"
[527]1608        "  vaddr = %x / size = %x / paddr = %l\n",
1609        x , y , p , seg_id , seg_vaddr , seg_memsz , seg_paddr );
1610#endif
[557]1611                        }
[347]1612                    }
1613                }
[527]1614            }  // end for vsegs
[347]1615
1616            // check at least one matching vseg
1617            if ( found == 0 )
[258]1618            {
[493]1619                _printf("\n[BOOT ERROR] in load_one_elf_file() : vseg for loadable "
1620                        "segment %x in file %s not found "
1621                        "check consistency between the .py and .ld files\n",
1622                        seg_vaddr, pathname );
[347]1623                _exit();
[258]1624            }
1625        }
[347]1626    }  // end for loadable segments
[258]1627
[527]1628    //////////////////////////////////////////////
1629    _simple_barrier_wait( &_barrier_all_clusters );
1630    //////////////////////////////////////////////
[258]1631
[590]1632    // only P[0,0,0] signals completion
[527]1633    if ( (cxy == 0) && (p == 0) )
1634    {
1635        _printf("\n[BOOT] File %s loaded at cycle %d\n", 
1636                pathname , _get_proctime() );
1637    }
1638
[258]1639} // end load_one_elf_file()
1640
1641
[347]1642/////i////////////////////////////////////////////////////////////////////////////////
[258]1643// This function uses the map.bin data structure to load the "kernel.elf" file
[347]1644// as well as the various "application.elf" files into memory.
1645// - The "preloader.elf" file is not loaded, because it has been burned in the ROM.
1646// - The "boot.elf" file is not loaded, because it has been loaded by the preloader.
[513]1647// This function scans all vsegs defined in the map.bin data structure to collect
[347]1648// all .elf files pathnames, and calls the load_one_elf_file() for each .elf file.
1649// As the code can be replicated in several vsegs, the same code can be copied
1650// in one or several clusters by the load_one_elf_file() function.
1651//////////////////////////////////////////////////////////////////////////////////////
[258]1652void boot_elf_load()
1653{
[321]1654    mapping_header_t* header = (mapping_header_t *)SEG_BOOT_MAPPING_BASE;
[258]1655    mapping_vspace_t* vspace = _get_vspace_base( header );
[513]1656    mapping_vseg_t*   vseg   = _get_vseg_base( header );
1657
[258]1658    unsigned int      vspace_id;
[513]1659    unsigned int      vseg_id;
[258]1660    unsigned int      found;
1661
[513]1662    // Scan all global vsegs to find the pathname to the kernel.elf file
[258]1663    found = 0;
[513]1664    for( vseg_id = 0 ; vseg_id < header->globals ; vseg_id++ )
[258]1665    {
[513]1666        if(vseg[vseg_id].type == VSEG_TYPE_ELF) 
[258]1667        {   
1668            found = 1;
1669            break;
1670        }
1671    }
1672
1673    // We need one kernel.elf file
1674    if (found == 0)
1675    {
[493]1676        _printf("\n[BOOT ERROR] boot_elf_load() : kernel.elf file not found\n");
[258]1677        _exit();
1678    }
1679
[347]1680    // Load the kernel
1681    load_one_elf_file( 1,                           // kernel file
[513]1682                       vseg[vseg_id].binpath,       // file pathname
[258]1683                       0 );                         // vspace 0
1684
[513]1685    // loop on the vspaces, scanning all vsegs in the vspace,
[258]1686    // to find the pathname of the .elf file associated to the vspace.
1687    for( vspace_id = 0 ; vspace_id < header->vspaces ; vspace_id++ )
1688    {
[513]1689        // loop on the private vsegs
[258]1690        unsigned int found = 0;
[513]1691        for (vseg_id = vspace[vspace_id].vseg_offset;
1692             vseg_id < (vspace[vspace_id].vseg_offset + vspace[vspace_id].vsegs);
1693             vseg_id++) 
[258]1694        {
[513]1695            if(vseg[vseg_id].type == VSEG_TYPE_ELF) 
[258]1696            {   
1697                found = 1;
1698                break;
1699            }
1700        }
1701
1702        // We want one .elf file per vspace
1703        if (found == 0)
1704        {
[493]1705            _printf("\n[BOOT ERROR] boot_elf_load() : "
1706                    ".elf file not found for vspace %s\n", vspace[vspace_id].name );
[258]1707            _exit();
1708        }
1709
[347]1710        load_one_elf_file( 0,                          // not a kernel file
[513]1711                           vseg[vseg_id].binpath,      // file pathname
[347]1712                           vspace_id );                // vspace index
[258]1713
1714    }  // end for vspaces
1715
1716} // end boot_elf_load()
1717
1718
[527]1719/////////////////////////////////////////////////////////////////////////////////
[493]1720// This function is executed in parallel by all processors[x][y][0].
[527]1721// It initialises the physical memory allocator in each cluster containing
1722// a RAM pseg.
1723/////////////////////////////////////////////////////////////////////////////////
[493]1724void boot_pmem_init( unsigned int cx,
1725                     unsigned int cy ) 
[412]1726{
1727    mapping_header_t*  header     = (mapping_header_t *)SEG_BOOT_MAPPING_BASE;
1728    mapping_cluster_t* cluster    = _get_cluster_base(header);
1729    mapping_pseg_t*    pseg       = _get_pseg_base(header);
1730
1731    unsigned int pseg_id;
[490]1732    unsigned int procid     = _get_procid();
1733    unsigned int lpid       = procid & ((1<<P_WIDTH)-1);
[493]1734
1735    if( lpid )
[490]1736    {
[493]1737        _printf("\n[BOOT ERROR] boot_pmem_init() : "
1738        "P[%d][%d][%d] should not execute it\n", cx, cy, lpid );
[490]1739        _exit();
[493]1740    }   
[412]1741
[493]1742    // scan the psegs in local cluster to find  pseg of type RAM
1743    unsigned int found      = 0;
1744    unsigned int cluster_id = cx * Y_SIZE + cy;
1745    unsigned int pseg_min   = cluster[cluster_id].pseg_offset;
1746    unsigned int pseg_max   = pseg_min + cluster[cluster_id].psegs;
[490]1747    for ( pseg_id = pseg_min ; pseg_id < pseg_max ; pseg_id++ )
[412]1748    {
[490]1749        if ( pseg[pseg_id].type == PSEG_TYPE_RAM )
[412]1750        {
[490]1751            unsigned int base = (unsigned int)pseg[pseg_id].base;
1752            unsigned int size = (unsigned int)pseg[pseg_id].length;
[493]1753            _pmem_alloc_init( cx, cy, base, size );
1754            found = 1;
[412]1755
1756#if BOOT_DEBUG_PT
[493]1757_printf("\n[BOOT] pmem allocator initialised in cluster[%d][%d]"
1758        " : base = %x / size = %x\n", cx , cy , base , size );
[412]1759#endif
[490]1760            break;
[412]1761        }
1762    }
[493]1763
1764    if ( found == 0 )
1765    {
1766        _printf("\n[BOOT ERROR] boot_pmem_init() : no RAM in cluster[%d][%d]\n",
[527]1767                cx , cy );
[493]1768        _exit();
1769    }   
[412]1770} // end boot_pmem_init()
1771 
1772/////////////////////////////////////////////////////////////////////////
[258]1773// This function is the entry point of the boot code for all processors.
1774/////////////////////////////////////////////////////////////////////////
[347]1775void boot_init() 
[258]1776{
[493]1777
[295]1778    unsigned int       gpid       = _get_procid();
[493]1779    unsigned int       cx         = gpid >> (Y_WIDTH + P_WIDTH);
1780    unsigned int       cy         = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
1781    unsigned int       lpid       = gpid & ((1 << P_WIDTH) -1);
[490]1782
[527]1783    //////////////////////////////////////////////////////////
[493]1784    // Phase ONE : only P[0][0][0] execute it
[527]1785    //////////////////////////////////////////////////////////
[493]1786    if ( gpid == 0 )   
[258]1787    {
[552]1788        unsigned int cid;  // index for loop on clusters
[258]1789
[493]1790        // initialises the TTY0 spin lock
1791        _spin_lock_init( &_tty0_spin_lock );
1792
1793        _printf("\n[BOOT] P[0,0,0] starts at cycle %d\n", _get_proctime() );
1794
[552]1795        // initialises the IOC peripheral
1796        if      ( USE_IOC_BDV != 0 ) _bdv_init();
1797        else if ( USE_IOC_HBA != 0 ) _hba_init();
1798        else if ( USE_IOC_SDC != 0 ) _sdc_init();
1799        else if ( USE_IOC_RDK == 0 )
1800        {
1801            _printf("\n[BOOT ERROR] boot_init() : no IOC peripheral\n");
1802            _exit();
1803        }
1804
[460]1805        // initialises the FAT
[590]1806        _fat_init( 0 );          // don't use Inode-Tree, Fat-Cache, etc.
[460]1807
[493]1808        _printf("\n[BOOT] FAT initialised at cycle %d\n", _get_proctime() );
1809
1810        // Load the map.bin file into memory
[258]1811        boot_mapping_init();
1812
[524]1813        mapping_header_t*  header     = (mapping_header_t *)SEG_BOOT_MAPPING_BASE;
1814        mapping_cluster_t* cluster    = _get_cluster_base(header);
1815
[493]1816        _printf("\n[BOOT] Mapping %s loaded at cycle %d\n",
1817                header->name , _get_proctime() );
[258]1818
[493]1819        // initialises the barrier for all clusters containing processors
1820        unsigned int nclusters = 0;
1821        for ( cid = 0 ; cid < X_SIZE*Y_SIZE ; cid++ )
1822        {
1823            if ( cluster[cid].procs ) nclusters++ ;
1824        } 
[490]1825
[493]1826        _simple_barrier_init( &_barrier_all_clusters , nclusters );
1827
1828        // wake up all processors P[x][y][0]
1829        for ( cid = 1 ; cid < X_SIZE*Y_SIZE ; cid++ ) 
[490]1830        {
[493]1831            unsigned int x          = cluster[cid].x;
1832            unsigned int y          = cluster[cid].y;
1833            unsigned int cluster_xy = (x << Y_WIDTH) + y;
[490]1834
[493]1835            if ( cluster[cid].procs ) 
1836            {
1837                unsigned long long paddr = (((unsigned long long)cluster_xy)<<32) +
[527]1838                                           SEG_XCU_BASE+XCU_REG( XCU_WTI_REG , 0 );
[493]1839
1840                _physical_write( paddr , (unsigned int)boot_entry );
1841            }
[490]1842        }
[412]1843
[527]1844        _printf("\n[BOOT] Processors P[x,y,0] start at cycle %d\n",
1845                _get_proctime() );
[490]1846    }
[412]1847
[527]1848    /////////////////////////////////////////////////////////////////
[493]1849    // Phase TWO : All processors P[x][y][0] execute it in parallel
[527]1850    /////////////////////////////////////////////////////////////////
[493]1851    if( lpid == 0 )
[490]1852    {
[493]1853        // Initializes physical memory allocator in cluster[cx][cy]
1854        boot_pmem_init( cx , cy );
[412]1855
[493]1856        // Build page table in cluster[cx][cy]
1857        boot_ptab_init( cx , cy );
[258]1858
[493]1859        //////////////////////////////////////////////
1860        _simple_barrier_wait( &_barrier_all_clusters );
1861        //////////////////////////////////////////////
[258]1862
[493]1863        // P[0][0][0] complete page tables with vsegs
1864        // mapped in clusters without processors
1865        if ( gpid == 0 )   
1866        {
1867            // complete page tables initialisation
1868            boot_ptab_extend();
[258]1869
[493]1870            _printf("\n[BOOT] Physical memory allocators and page tables"
1871                    " initialized at cycle %d\n", _get_proctime() );
1872        }
[258]1873
[493]1874        //////////////////////////////////////////////
1875        _simple_barrier_wait( &_barrier_all_clusters );
1876        //////////////////////////////////////////////
1877
1878        // All processors P[x,y,0] activate MMU (using local PTAB)
1879        _set_mmu_ptpr( (unsigned int)(_ptabs_paddr[0][cx][cy]>>13) );
1880        _set_mmu_mode( 0xF );
[258]1881       
[493]1882        // Each processor P[x,y,0] initialises all schedulers in cluster[x,y]
1883        boot_scheduler_init( cx , cy );
[258]1884
[493]1885        // Each processor P[x][y][0] initialises its CP0_SCHED register
1886        _set_sched( (unsigned int)_schedulers[cx][cy][0] );
[258]1887
[493]1888        //////////////////////////////////////////////
1889        _simple_barrier_wait( &_barrier_all_clusters );
1890        //////////////////////////////////////////////
[527]1891
[493]1892        if ( gpid == 0 ) 
1893        {
[527]1894            _printf("\n[BOOT] Schedulers initialised at cycle %d\n", 
1895                    _get_proctime() );
1896        }
[258]1897
[552]1898        // All processor P[x,y,0] contributes to load .elf files into clusters.
[527]1899        boot_elf_load();
[258]1900
[527]1901        //////////////////////////////////////////////
1902        _simple_barrier_wait( &_barrier_all_clusters );
1903        //////////////////////////////////////////////
1904       
[552]1905        // Each processor P[x][y][0] wake up other processors in same cluster
[524]1906        mapping_header_t*  header     = (mapping_header_t *)SEG_BOOT_MAPPING_BASE;
1907        mapping_cluster_t* cluster    = _get_cluster_base(header);
1908        unsigned int       cluster_xy = (cx << Y_WIDTH) + cy;
1909        unsigned int       cluster_id = (cx * Y_SIZE) + cy;
[493]1910        unsigned int p;
1911        for ( p = 1 ; p < cluster[cluster_id].procs ; p++ )
1912        {
1913            _xcu_send_wti( cluster_xy , p , (unsigned int)boot_entry );
1914        }
[258]1915
[527]1916        // only P[0][0][0] makes display
1917        if ( gpid == 0 )
1918        {   
1919            _printf("\n[BOOT] All processors start at cycle %d\n",
1920                    _get_proctime() );
1921        }
[493]1922    }
[552]1923    // All other processors activate MMU (using local PTAB)
[493]1924    if ( lpid != 0 )
[258]1925    {
[493]1926        _set_mmu_ptpr( (unsigned int)(_ptabs_paddr[0][cx][cy]>>13) );
[258]1927        _set_mmu_mode( 0xF );
1928    }
1929
[493]1930    // All processors set CP0_SCHED register
1931    _set_sched( (unsigned int)_schedulers[cx][cy][lpid] );
1932
1933    // All processors reset BEV bit in SR to use GIET_VM exception handler
[427]1934    _set_sr( 0 );
1935
[527]1936    // Each proocessor get kernel entry virtual address
1937    unsigned int kernel_entry = (unsigned int)&kernel_init_vbase;
1938
1939#if BOOT_DEBUG_ELF
[552]1940_printf("\n[DEBUG BOOT_ELF] P[%d,%d,%d] exit boot & jump to %x at cycle %d\n",
[527]1941        cx, cy, lpid, kernel_entry , _get_proctime() );
1942#endif
1943
[493]1944    // All processors jump to kernel_init
[258]1945    asm volatile( "jr   %0" ::"r"(kernel_entry) );
1946
1947} // end boot_init()
1948
1949
1950// Local Variables:
1951// tab-width: 4
1952// c-basic-offset: 4
1953// c-file-offsets:((innamespace . 0)(inline-open . 0))
1954// indent-tabs-mode: nil
1955// End:
1956// vim: filetype=c:expandtab:shiftwidth=4:tabstop=4:softtabstop=4
1957
Note: See TracBrowser for help on using the repository browser.