/////////////////////////////////////////////////////////////////////////////////// // File : kernel_init.c // Date : 26/05/2012 // Authors : alain greiner & mohamed karaoui // Copyright (c) UPMC-LIP6 //////////////////////////////////////////////////////////////////////////////////// // The kernel_init.c file is part of the GIET-VM nano-kernel. // // This nano-kernel has been written for the MIPS32 processor. // The virtual adresses are on 32 bits and use the (unsigned int) type, but the // physicals addresses can have up to 40 bits, and use the (unsigned long long) type. // It natively supports clusterised shared mmemory multi-processors architectures, // where each processor is identified by a composite index (cluster_xy, local_id), // and where there is one physical memory bank per cluster. // // This file contains the _kernel_init() function, that performs the second // phase of system initialisation. The three significant actions are: // 1) processor 0 makes peripherals and system FAT initialisation. // 2) processor 0 awake all other processors by an IPI. // 3) all processors running in parallel perform register initialisation, // from their private scheduler, and jump to user code. //////////////////////////////////////////////////////////////////////////////////// #include // kernel libraries #include #include //for peripheral initialisation #include #include #include #include #include #include #include #include #include #include #include #include #include #include /////////////////////////////////////////////////////////////////////////////////// // array of pointers on the page tables (virtual addresses) /////////////////////////////////////////////////////////////////////////////////// __attribute__((section (".kdata"))) unsigned int _ptabs_vaddr[GIET_NB_VSPACE_MAX]; // virtual addresses __attribute__((section (".kdata"))) unsigned int _ptabs_ptprs[GIET_NB_VSPACE_MAX]; // physical addresses >> 13 /////////////////////////////////////////////////////////////////////////////////// // array of pointers on the schedulers (physical addresses) /////////////////////////////////////////////////////////////////////////////////// __attribute__((section (".kdata"))) static_scheduler_t* _schedulers[NB_PROCS_MAX<<(X_WIDTH+Y_WIDTH)]; // virtual addresses //////////////////////////////////////////////////////////////////////////////////// // staks for the "idle" tasks (512 bytes for each processor) //////////////////////////////////////////////////////////////////////////////////// __attribute__((section (".kdata"))) unsigned int _idle_stack[X_SIZE * Y_SIZE * NB_PROCS_MAX * 128 ]; //////////////////////////////////////////////////////////////////////////////////// // Synchonisation Barrier before jumping to user code //////////////////////////////////////////////////////////////////////////////////// __attribute__((section (".kdata"))) unsigned int _init_barrier = 0; //////////////////////////////////////////////////////////////////////////////////// // This function is the entry point in kernel for all processors. // It is executed in parallel by all procesors, and completes the system // initialisation that has been started by processor 0 in the boot_init() function. // // This kernel code makes the following assuptions, regarding the work bone // by the boot code: // // 1) The page tables associated to the various vspaces have been build // in physical memory, and can be used by the kernel code. // // 2) All schedulers (this include all task contexts) have been initialised, // Both the virtual and the physical base addresses of the page tables // are available in the CTX_PTAB and CTX_PTPR slots. // // 3) The CP0_SCHED register of each processor contains a pointer on its // private scheduler (virtual address). // // 4) The CP2_PTPR register of each processor contains a pointer on // the vspace_0 page table (physical address>>13). // // 5) For all processors, the MMU is activated (CP2_MODE contains 0xF). // // This code must be loaded in .kinit section, in order to control seg_kinit_base, // as this address is used by the boot code to jump into kernel code. //////////////////////////////////////////////////////////////////////////////////// // Each processor performs the following actions: // 1/ contribute to _schedulers_paddr[] array initialisation. // 2/ contribute to _ptabs_paddr[] and _ptabs_vaddr arrays initialisation // 3/ completes task context initialisation for ech allocated task // 4/ compute and set the ICU mask for its private ICU channel // 5/ initialise its private TICK timer (if tasks > 0) // 6/ initialise the "idle" task context in its private scheduler // 7/ initialise SP, SR, PTPR, EPC registers and jump to user code with an eret. //////////////////////////////////////////////////////////////////////////////////// __attribute__((section (".kinit"))) void kernel_parallel_init() { unsigned int global_pid = _get_procid(); unsigned int cluster_xy = global_pid / NB_PROCS_MAX; unsigned int x = cluster_xy >> Y_WIDTH; unsigned int y = cluster_xy & ((1<tasks; _schedulers[global_pid] = psched; #if GIET_DEBUG_INIT _printf("\n[GIET DEBUG INIT] Processor[%d,%d,%d]\n" " - scheduler vbase = %x\n" " - tasks = %d\n", x, y, lpid, (unsigned int)psched, tasks ); #endif // step 2 : each processor that is allocated at least one task loops // on all allocated tasks: // - contributes to _ptabs_vaddr[] & _ptabs_ptprs[] initialisation. // - set CTX_RA slot with the kernel _ctx_eret() virtual address. // - set CTX_EPC slot that must contain the task entry point, // and contain only at this point the virtual address of the memory // location containing this entry point. We must switch the PTPR // to use the page table corresponding to the task. unsigned int ltid; for (ltid = 0; ltid < tasks; ltid++) { unsigned int vsid = _get_task_slot( global_pid, ltid , CTX_VSID_ID ); unsigned int ptab = _get_task_slot( global_pid, ltid , CTX_PTAB_ID ); unsigned int ptpr = _get_task_slot( global_pid, ltid , CTX_PTPR_ID ); // initialize PTABS arrays _ptabs_vaddr[vsid] = ptab; _ptabs_ptprs[vsid] = ptpr; #if GIET_DEBUG_INIT _printf("\n[GIET DEBUG INIT] Processor[%d,%d,%d] contributes to PTABS arrays\n" " - ptabs_vaddr[%d] = %x / ptpr_paddr[%d] = %l\n", x, y, lpid, vsid, ptab, vsid, ((unsigned long long)ptpr)<<13 ); #endif // set the ptpr to use the task page table asm volatile( "mtc2 %0, $0 \n" : : "r" (ptpr) ); // compute ctx_ra unsigned int ctx_ra = (unsigned int)(&_ctx_eret); _set_task_slot( global_pid, ltid, CTX_RA_ID, ctx_ra ); // compute ctx_epc unsigned int* ptr = (unsigned int*)_get_task_slot( global_pid, ltid, CTX_EPC_ID ); _set_task_slot( global_pid, ltid, CTX_EPC_ID, *ptr ); #if GIET_DEBUG_INIT _printf("\n[GIET DEBUG INIT] Processor[%d,%d,%d] update context for task %d\n" " - ctx_epc = %x\n" " - ctx_ra = %x\n", x, y, lpid, ltid, _get_task_slot( global_pid, ltid, CTX_EPC_ID ), _get_task_slot( global_pid, ltid, CTX_RA_ID ) ); #endif } // end for tasks // step 4 : compute and set ICU or XCU masks unsigned int isr_switch_index = 0xFFFFFFFF; unsigned int hwi_mask = 0; unsigned int pti_mask = 0; unsigned int wti_mask = 0; unsigned int irq_id; // IN_IRQ index unsigned int entry; // interrupt vector entry for (irq_id = 0; irq_id < 32; irq_id++) { entry = psched->hwi_vector[irq_id]; if ( entry & 0x80000000 ) hwi_mask = hwi_mask | (1<pti_vector[irq_id]; if ( entry & 0x80000000 ) pti_mask = pti_mask | (1<wti_vector[irq_id]; if ( entry & 0x80000000 ) wti_mask = wti_mask | (1< 0) { // one ISR_TICK must be defined for each proc if (isr_switch_index == 0xFFFFFFFF) { _printf("\n[GIET ERROR] ISR_TICK not found for processor[%d,%d,%d]\n", x, y, lpid ); _exit(); } // start system timer #if USE_XICU _xcu_timer_start( cluster_xy, isr_switch_index, GIET_TICK_VALUE ); #else _timer_start( cluster_xy, isr_switch_index, GIET_TICK_VALUE ); #endif } #if GIET_DEBUG_INIT _printf("\n[GIET DEBUG INIT] Processor[%d,%d,%d] start TICK timer\n", x, y, lpid ); #endif // step 6 : each processor updates the idle_task context: // (only CTX_SP, CTX_RA, CTX_EPC). // The stack size is 512 bytes, reserved in seg_kdata. // The PTPR register, the CTX_PTPR and CTX_PTAB slots // have been initialised in boot code. unsigned int p = ((x * Y_SIZE) + y) * NB_PROCS_MAX + lpid; unsigned int stack = (unsigned int)_idle_stack + ((p + 1)<<9); _set_task_slot( global_pid, IDLE_TASK_INDEX, CTX_SP_ID, stack); _set_task_slot( global_pid, IDLE_TASK_INDEX, CTX_RA_ID, (unsigned int) &_ctx_eret); _set_task_slot( global_pid, IDLE_TASK_INDEX, CTX_EPC_ID, (unsigned int) &_idle_task); #if GIET_DEBUG_INIT _printf("\n[GIET DEBUG INIT] Processor[%d,%d,%d] initialize IDLE task\n", x, y, lpid ); #endif // step 7 : when all processors reach the synchronisation barrier, // each processor set registers SP, SR, PTPR, EPC, // with the values corresponding to the first allocated task, // or to the idle_task if there is no task allocated, // and jump to user code if (tasks == 0) { ltid = IDLE_TASK_INDEX; _printf("\n[GIET WARNING] No task allocated to processor[%d,%d,%d]\n", x, y, lpid ); } else { ltid = 0; } unsigned int sp_value = _get_task_slot(global_pid, ltid, CTX_SP_ID); unsigned int sr_value = _get_task_slot(global_pid, ltid, CTX_SR_ID); unsigned int ptpr_value = _get_task_slot(global_pid, ltid, CTX_PTPR_ID); unsigned int epc_value = _get_task_slot(global_pid, ltid, CTX_EPC_ID); #if GIET_DEBUG_INIT _printf("\n[GIET DEBUG INIT] Processor[%d,%d,%d] reach barrier at cycle %d\n" " - sp = %x\n" " - sr = %x\n" " - ptpr = %x\n" " - epc = %x\n", x, y, lpid, _get_proctime(), sp_value, sr_value, ptpr_value, epc_value ); #endif unsigned int* pcount = &_init_barrier; unsigned int nprocs = TOTAL_PROCS; unsigned int count; // increment barrier counter with atomic LL/SC asm volatile ( "_init_barrier_loop: \n" "ll %0, 0(%1) \n" /* count <= *pcount */ "addi $3, %0, 1 \n" /* $3 <= count + 1 */ "sc $3, 0(%1) \n" /* *pcount <= $3 */ "beqz $3, _init_barrier_loop \n" /* retry if failure */ "nop \n" : "=&r"(count) : "r"(pcount) : "$3" ); // busy waiting until all processors synchronized while ( *pcount != nprocs ) asm volatile ("nop"); _printf("\n[GIET] Processor[%d,%d,%d] jumps to user code at cycle %d\n", x, y, lpid, _get_proctime() ); // set registers and jump to user code asm volatile ( "move $29, %0 \n" /* SP <= ctx[CTX_SP_ID] */ "mtc0 %1, $12 \n" /* SR <= ctx[CTX_SR_ID] */ "mtc2 %2, $0 \n" /* PTPR <= ctx[CTX_PTPR] */ "mtc0 %3, $14 \n" /* EPC <= ctx[CTX_EPC] */ "eret \n" /* jump to user code */ "nop \n" : : "r"(sp_value), "r"(sr_value), "r"(ptpr_value), "r"(epc_value) : "$29" ); } // end kernel_parallel_init() // Local Variables: // tab-width: 4 // c-basic-offset: 4 // c-file-offsets:((innamespace . 0)(inline-open . 0)) // indent-tabs-mode: nil // End: // vim: filetype=c:expandtab:shiftwidth=4:tabstop=4:softtabstop=4