source: soft/giet_vm/giet_kernel/kernel_init.c @ 483

Last change on this file since 483 was 478, checked in by alain, 10 years ago

1) The chained buffer structure has been modified to have one single buffer descriptor per cache line (64 bytes),
in order to simplify the software cache coherence between L2 and L3 caches when the IO bridge is used.
A new buffer_descriptor_t structure has been defined, and the fbf_chbuf_t and nic_chbuf_t structures have been adapted.
2) The NIC related system call handler _sys_nic_start() and _sys_nic_move() have been modified to support a distributed
kernel chbuf (one 4 Kbytes buffer per cluster), in order to support the one Gbit Ethernet NIC controller throughput.

  • the _sys_nic_start() function initialises the distributed chbuf, using the distributed heap.
  • the _sys_nic_move() function transfer one 4 KBytes container from the local kernel chbuf to an user local buffer.

This approach has been validated on the "classif" application: no packet loss with 16 clusters for average packet
length = 600 bytes and inter-packet gap = 300 cycles.

  • Property svn:executable set to *
File size: 14.1 KB
RevLine 
[258]1///////////////////////////////////////////////////////////////////////////////////
2// File     : kernel_init.c
3// Date     : 26/05/2012
4// Authors  : alain greiner & mohamed karaoui
5// Copyright (c) UPMC-LIP6
6////////////////////////////////////////////////////////////////////////////////////
[440]7// This kernel_init.c file is part of the GIET-VM nano-kernel.
[258]8////////////////////////////////////////////////////////////////////////////////////
9
10#include <giet_config.h>
[440]11#include <hard_config.h>
[258]12#include <utils.h>
[459]13#include <tty0.h>
[467]14#include <kernel_malloc.h>
[258]15#include <fat32.h>
16#include <xcu_driver.h>
[459]17#include <ioc_driver.h>
[478]18#include <mmc_driver.h>
[258]19#include <ctx_handler.h>
20#include <irq_handler.h>
21#include <mapping_info.h>
22#include <mips32_registers.h>
23
[322]24#if !defined(X_SIZE)
25# error: You must define X_SIZE in the hard_config.h file
26#endif
27
28#if !defined(Y_SIZE)
29# error: You must define Y_SIZE in the hard_config.h file
30#endif
31
32#if !defined(Y_WIDTH)
33# error: You must define Y_WIDTH in the hard_config.h file
34#endif
35
36#if !defined(Y_WIDTH)
37# error: You must define Y_WIDTH in the hard_config.h file
38#endif
39
40#if !defined(NB_PROCS_MAX)
41# error: You must define NB_PROCS_MAX in the hard_config.h file
42#endif
43
44#if !defined(NB_TOTAL_PROCS)
45# error: You must define NB_TOTAL_PROCS in the hard_config.h file
46#endif
47
48#if !defined(USE_XCU)
49# error: You must define USE_XCU in the hard_config.h file
50#endif
51
52#if !defined(IDLE_TASK_INDEX)
[391]53# error: You must define IDLE_TASK_INDEX in the ctx_handler.h file
[322]54#endif
55
56#if !defined(GIET_TICK_VALUE)
57# error: You must define GIET_TICK_VALUE in the giet_config.h file
58#endif
59
60#if !defined(GIET_NB_VSPACE_MAX)
61# error: You must define GIET_NB_VSPACE_MAX in the giet_config.h file
62#endif
63
[467]64#if !defined(NB_TTY_CHANNELS)
65# error: You must define NB_TTY_CHANNELS in the hard_config.h file
66#endif
67
68#if (NB_TTY_CHANNELS < 1)
69# error: NB_TTY_CHANNELS cannot be smaller than 1
70#endif
71
72
73
[258]74///////////////////////////////////////////////////////////////////////////////////
[467]75// Ditributed kernel heap descriptors array (for dynamic memory allocation)
76///////////////////////////////////////////////////////////////////////////////////
77
78kernel_heap_t  kernel_heap[X_SIZE][Y_SIZE];
79
80///////////////////////////////////////////////////////////////////////////////////
[410]81// FAT internal representation for kernel code
82///////////////////////////////////////////////////////////////////////////////////
83
[467]84fat32_fs_t     fat      __attribute__((aligned(512)));
[410]85
86///////////////////////////////////////////////////////////////////////////////////
[258]87// array of pointers on the page tables (virtual addresses)
88///////////////////////////////////////////////////////////////////////////////////
89
[345]90volatile unsigned int _ptabs_vaddr[GIET_NB_VSPACE_MAX];    // virtual addresses
91volatile unsigned int _ptabs_ptprs[GIET_NB_VSPACE_MAX];    // physical addresses >> 13
[258]92
93///////////////////////////////////////////////////////////////////////////////////
[467]94// Array of pointers on the schedulers (physical addresses)
[258]95///////////////////////////////////////////////////////////////////////////////////
96
[467]97volatile static_scheduler_t*    _schedulers[X_SIZE][Y_SIZE][NB_PROCS_MAX]; 
[258]98
99////////////////////////////////////////////////////////////////////////////////////
[391]100// Synchonisation barrier before jumping to user code
[258]101////////////////////////////////////////////////////////////////////////////////////
102
[428]103volatile unsigned int kernel_init_barrier = 0;
[294]104
[467]105////////////////////////////////////////////////////////////////////////////////////
106// Global variables for TTY/kernel communications
107////////////////////////////////////////////////////////////////////////////////////
108
109unsigned int   _tty_rx_buf[NB_TTY_CHANNELS];
110unsigned int   _tty_rx_full[NB_TTY_CHANNELS]; 
111
[478]112////////////////////////////////////////////////////////////////////////////////////
113// Distributed locks protecting TTY terminals       
114////////////////////////////////////////////////////////////////////////////////////
[467]115
[478]116sbt_lock_t     _tty_tx_lock[NB_TTY_CHANNELS]  __attribute__((aligned(64)));
[467]117
[310]118///////////////////////////////////////////////////////////////////////////////////
[467]119// This kernel_init() function completes the kernel initialisation in 7 steps:
120// All processors execute this code, but this is done sequencially.
121// - step 0 : Initialise fat, heap descriptors, and tty locks
122// - step 1 : Initialise scheduler pointers array
123// - step 2 : Initialise PTAB pointers arrays
124// - step 3 : Initialise private XCU masks
125// - step 4 :
126// - step 5 :
127// - step 6 :
128///////////////////////////////////////////////////////////////////////////////////
[310]129__attribute__((section (".kinit"))) void kernel_init() 
[258]130{
[428]131    // gpid : hardware processor index (fixed format: X_WIDTH|Y_WIDTH|P_WIDTH)
[467]132    // p    : local processor id in a cluster ( p < NB_PROCS_MAX)
133    // cpid : continuous processor index = (((x * Y_SIZE + y) * NB_PROCS_MAX) + p
[428]134
135    unsigned int gpid       = _get_procid();
136    unsigned int cluster_xy = gpid >> P_WIDTH;
137    unsigned int x          = cluster_xy >> Y_WIDTH & ((1<<X_WIDTH)-1);
[294]138    unsigned int y          = cluster_xy & ((1<<Y_WIDTH)-1);
[440]139    unsigned int p          = gpid & ((1<<P_WIDTH)-1);
140    unsigned int cpid       = ((( x * Y_SIZE) + y) * NB_PROCS_MAX) + p;
[258]141
[428]142    // This initialisation is done sequencially by each processor
143    while( cpid != kernel_init_barrier ) asm volatile ( "nop" );
144
[478]145    // Step 0 : P[0,0,0] initialises various complex structures
146    //          - kernel FAT
147    //          - distributed kernel heaps
148    //          - distributed locks protecting TTY channels
149    //          - distributed locks protecting MMC components
[467]150    if ( gpid == 0 )
151    {
152        _heap_init();
153       
154#if GIET_DEBUG_INIT
155_nolock_printf("\n[DEBUG KERNEL_INIT] P[%d,%d,%d] completes kernel HEAP init\n", x, y, p );
156#endif
157        unsigned int channel;
158        for ( channel = 0 ; channel < NB_TTY_CHANNELS ; channel++ )
159        {
160            _sbt_lock_init( &_tty_tx_lock[channel] );
[478]161
162#if GIET_DEBUG_INIT
163_nolock_printf("\n[DEBUG KERNEL_INIT] P[%d,%d,%d] completes TTY[%d] lock init\n", 
164               x , y , p , channel );
165#endif
[467]166        }
[459]167
[478]168/*
169        unsigned int cx, cy;
170        for ( cx = 0 ; cx < X_SIZE ; cx++ )
171        {
172            for ( cy = 0 ; cy < X_SIZE ; cy++ )
173            {
174                _sbt_lock_init( &_mmc_lock[cx][cy] );
175
[467]176#if GIET_DEBUG_INIT
[478]177_nolock_printf("\n[DEBUG KERNEL_INIT] P[%d,%d,%d] completes MMC[%d][%d] lock init\n",
178               x , y , p , cx , cy );
[467]179#endif
[478]180            }
181        }
182*/
[467]183        _fat_init( IOC_BOOT_MODE ); 
184
185#if GIET_DEBUG_INIT
186_nolock_printf("\n[DEBUG KERNEL_INIT] P[%d,%d,%d] completes kernel FAT init\n", x, y, p );
187#endif
188
189    }
190
[440]191    // Step 1 : each processor get its scheduler virtual address from CP0_SCHED register
192    //          and contributes to _schedulers[] array initialisation
[258]193
194    static_scheduler_t* psched     = (static_scheduler_t*)_get_sched();
195    unsigned int        tasks      = psched->tasks;
196
[440]197    _schedulers[x][y][p] = psched;
[258]198
199#if GIET_DEBUG_INIT
[467]200_nolock_printf("\n[DEBUG KERNEL_INIT] P[%d,%d,%d] initialises SCHED array\n"
201               " - scheduler vbase = %x\n"
202               " - tasks           = %d\n",
203               x, y, p, (unsigned int)psched, tasks );
[258]204#endif
205
[294]206    // step 2 : each processor that is allocated at least one task loops
207    //          on all allocated tasks:
208    //          - contributes to _ptabs_vaddr[] & _ptabs_ptprs[] initialisation.
209    //          - set CTX_RA slot  with the kernel _ctx_eret() virtual address.
210    //          - set CTX_EPC slot that must contain the task entry point,
211    //            and contain only at this point the virtual address of the memory
[440]212    //            location containing this entry point.
[258]213
214    unsigned int ltid;
215
216    for (ltid = 0; ltid < tasks; ltid++) 
217    {
[440]218        unsigned int vsid = _get_task_slot( x, y, p, ltid , CTX_VSID_ID ); 
219        unsigned int ptab = _get_task_slot( x, y, p, ltid , CTX_PTAB_ID ); 
220        unsigned int ptpr = _get_task_slot( x, y, p, ltid , CTX_PTPR_ID ); 
[258]221
[294]222        // initialize PTABS arrays
[258]223        _ptabs_vaddr[vsid] = ptab;
224        _ptabs_ptprs[vsid] = ptpr;
225
[294]226#if GIET_DEBUG_INIT
[467]227_nolock_printf("\n[DEBUG KERNEL_INIT] P[%d,%d,%d] initialises PTABS arrays\n"
[294]228        " - ptabs_vaddr[%d] = %x / ptpr_paddr[%d] = %l\n",
[440]229        x, y, p, 
[294]230        vsid, ptab, vsid, ((unsigned long long)ptpr)<<13 );
231#endif
232
233        // set the ptpr to use the task page table
234        asm volatile( "mtc2    %0,   $0   \n"
235                      : : "r" (ptpr) );
236
237        // compute ctx_ra
[258]238        unsigned int ctx_ra = (unsigned int)(&_ctx_eret);
[440]239        _set_task_slot( x, y, p, ltid, CTX_RA_ID, ctx_ra );
[258]240
[294]241        // compute ctx_epc
[440]242        unsigned int* ptr = (unsigned int*)_get_task_slot( x, y, p, ltid, CTX_EPC_ID );
243        _set_task_slot( x, y, p, ltid, CTX_EPC_ID, *ptr );
[258]244
245#if GIET_DEBUG_INIT
[467]246_nolock_printf("\n[DEBUG KERNEL_INIT] P[%d,%d,%d] updates context for task %d\n"
247               " - ctx_epc   = %x\n"
248               " - ctx_ra    = %x\n",
249               x, y, p, ltid,
250               _get_task_slot( x, y, p, ltid, CTX_EPC_ID ),
251               _get_task_slot( x, y, p, ltid, CTX_RA_ID ) );
[258]252#endif
253
[294]254    }  // end for tasks
[258]255
[449]256    // step 3 : compute and set XCU masks
[258]257
[263]258    unsigned int isr_switch_index = 0xFFFFFFFF;
[258]259    unsigned int hwi_mask = 0;
260    unsigned int pti_mask = 0;
[294]261    unsigned int wti_mask = 0;
262    unsigned int irq_id;            // IN_IRQ index
263    unsigned int entry;             // interrupt vector entry
[258]264
265    for (irq_id = 0; irq_id < 32; irq_id++) 
266    {
[294]267        entry = psched->hwi_vector[irq_id];
268        if ( entry & 0x80000000 ) hwi_mask = hwi_mask | (1<<irq_id);
269        if ( (entry & 0x0000FFFF) == ISR_TICK ) isr_switch_index = irq_id;
[258]270
[294]271        entry = psched->pti_vector[irq_id];
272        if ( entry & 0x80000000 ) pti_mask = pti_mask | (1<<irq_id);
273        if ( (entry & 0x0000FFFF) == ISR_TICK ) isr_switch_index = irq_id;
274
275        entry = psched->wti_vector[irq_id];
276        if ( entry & 0x80000000 ) wti_mask = wti_mask | (1<<irq_id);
277        if ( (entry & 0x0000FFFF) == ISR_TICK ) isr_switch_index = irq_id;
[258]278    }
279
280#if GIET_DEBUG_INIT
[467]281_nolock_printf("\n[DEBUG KERNEL_INIT] P[%d,%d,%d] sets XCU masks\n"
282               " - XCU HWI_MASK = %x\n"
283               " - XCU WTI_MASK = %x\n"
284               " - XCU PTI_MASK = %x\n",
285               x, y, p, hwi_mask, wti_mask, pti_mask );
[258]286#endif
287
[440]288    unsigned int channel = p * IRQ_PER_PROCESSOR; 
[258]289
[294]290    _xcu_set_mask( cluster_xy, channel, hwi_mask, IRQ_TYPE_HWI ); 
291    _xcu_set_mask( cluster_xy, channel, wti_mask, IRQ_TYPE_WTI );
292    _xcu_set_mask( cluster_xy, channel, pti_mask, IRQ_TYPE_PTI );
[258]293
[449]294    // step 4 : start TICK timer if at least one task
[258]295    if (tasks > 0) 
296    {
[294]297        // one ISR_TICK must be defined for each proc
[263]298        if (isr_switch_index == 0xFFFFFFFF) 
[258]299        {
[467]300            _nolock_printf("\n[GIET ERROR] ISR_TICK not found for processor[%d,%d,%d]\n",
301                           x, y, p );
[258]302            _exit();
303        }
304
[294]305        // start system timer
306        _xcu_timer_start( cluster_xy, isr_switch_index, GIET_TICK_VALUE ); 
[258]307
[294]308    }
309
[258]310#if GIET_DEBUG_INIT
[467]311_nolock_printf("\n[DEBUG KERNEL_INIT] P[%d,%d,%d] starts TICK timer\n",
312               x, y, p );
[258]313#endif
314
[449]315    // step 5 : each processor updates the idle_task context:
[391]316    //          (CTX_SP, CTX_RA, CTX_EPC).
317    //          The 4 Kbytes idle stack is implemented in the scheduler.
[258]318    //          The PTPR register, the CTX_PTPR and CTX_PTAB slots
319    //          have been initialised in boot code.
320
[391]321    unsigned int pstack = ((unsigned int)psched) + 0x2000;
[258]322
[440]323    _set_task_slot( x, y, p, IDLE_TASK_INDEX, CTX_SP_ID,  pstack);
324    _set_task_slot( x, y, p, IDLE_TASK_INDEX, CTX_RA_ID,  (unsigned int) &_ctx_eret);
325    _set_task_slot( x, y, p, IDLE_TASK_INDEX, CTX_EPC_ID, (unsigned int) &_idle_task);
[258]326
327#if GIET_DEBUG_INIT
[467]328_nolock_printf("\n[DEBUG KERNEL_INIT] P[%d,%d,%d] initializes IDLE task\n"
329               " - stack_base = %x\n"
330               " - stack_size = 0x1000\n",
331               x, y, p, pstack - 0x1000 );
[258]332#endif
333
[449]334    // step 6 : when all processors reach the synchronisation barrier,
[294]335    //          each processor set registers SP, SR, PTPR, EPC,
[258]336    //          with the values corresponding to the first allocated task,
[294]337    //          or to the idle_task if there is no task allocated,
338    //          and jump to user code
[258]339
340    if (tasks == 0) 
341    {
342        ltid = IDLE_TASK_INDEX;
343
[467]344        _nolock_printf("\n[GIET WARNING] No task allocated to processor[%d,%d,%d]\n",
345                       x, y, p );
[258]346    }
[294]347    else
348    {
349        ltid = 0;
350    }
[258]351
[440]352    unsigned int sp_value   = _get_task_slot( x, y, p, ltid, CTX_SP_ID);
353    unsigned int sr_value   = _get_task_slot( x, y, p, ltid, CTX_SR_ID);
354    unsigned int ptpr_value = _get_task_slot( x, y, p, ltid, CTX_PTPR_ID);
355    unsigned int epc_value  = _get_task_slot( x, y, p, ltid, CTX_EPC_ID);
[258]356
[330]357#if GIET_DEBUG_INIT
[467]358_nolock_printf("\n[DEBUG KERNEL_INIT] P[%d,%d,%d] reach barrier at cycle %d\n",
359               x, y, p, _get_proctime() );
[330]360#endif
[258]361
[310]362    // increment barrier counter
[428]363    kernel_init_barrier++;
[310]364
365    // busy waiting until all processors synchronized
[428]366    while ( kernel_init_barrier != NB_TOTAL_PROCS );
[310]367
[391]368#if GIET_DEBUG_INIT
[467]369_printf("\n[DEBUG KERNEL_INIT] P[%d,%d,%d] initializes registers at cycle %d\n"
[391]370        " - sp   = %x\n"
371        " - sr   = %x\n"
372        " - ptpr = %x\n"
373        " - epc  = %x\n",
[440]374        x, y, p, _get_proctime(),
[391]375        sp_value, sr_value, ptpr_value, epc_value );
376#endif
377
[294]378    // set registers and jump to user code
379    asm volatile ( "move  $29,  %0                  \n"   /* SP <= ctx[CTX_SP_ID] */
380                   "mtc0  %1,   $12                 \n"   /* SR <= ctx[CTX_SR_ID] */
381                   "mtc2  %2,   $0                  \n"   /* PTPR <= ctx[CTX_PTPR] */
382                   "mtc0  %3,   $14                 \n"   /* EPC <= ctx[CTX_EPC]  */
383                   "eret                            \n"   /* jump to user code  */
384                   "nop                             \n"
385                   : 
386                   : "r"(sp_value), "r"(sr_value), "r"(ptpr_value), "r"(epc_value)
[345]387                   : "$29", "memory" );
[294]388
[310]389} // end kernel_init()
[258]390
391
392// Local Variables:
393// tab-width: 4
394// c-basic-offset: 4
395// c-file-offsets:((innamespace . 0)(inline-open . 0))
396// indent-tabs-mode: nil
397// End:
398// vim: filetype=c:expandtab:shiftwidth=4:tabstop=4:softtabstop=4
399
Note: See TracBrowser for help on using the repository browser.