/****************************************************************************
 * This file contains the ALMOS-MKH. boot-loader for the TSAR architecture.  *
 *                                                                          *
 * It supports clusterised shared memory multi-processor architectures,     *
 * where each processor is identified by a composite index [cxy,lid],       *
 * with one physical memory bank per cluster.                               *
 *                                                                          *
 * The 'boot.elf' file (containing the boot-loader binary code) is stored   *
 * on disk and is loaded into memory by bscpu (whose index is [0,0]),       *
 * executing the generic preloader.                                         *
 *                                                                          *
 * 1) The boot-loader first phase is executed by bscpu only, while          *
 *    all other cores are waiting in the preloader.                         *
 *    It does the following tasks:                                          *
 *      - load into the memory bank of cluster (0,0) the 'arch_info.bin'    *
 *        file (containing the hardware architecture description) and the   *
 *        'kernel.elf' file, at temporary locations,                        *    
 *      - initializes the 'boot_info_t' structure in cluster(0,0)           *
 *        (there is 1 'boot_info_t' per cluster), which contains both       *
 *        global and cluster specific information that will be used for     *
 *        kernel initialisation.                                            *
 *      - activate CP0s in all other clusters, using IPIs.                  *
 *      - wait completion reports from CP0s on a global barrier.            * 
 *                                                                          *
 * 2) The boot-loader second phase is then executed in parallel by all      *
 *    CP0s (other than bscpu). Each CP0 performs the following tasks:       *
 *      - copies into the memory bank of the local cluster the 'boot.elf',  *
 *        the 'arch_info.bin' (at the same addresses as the 'boot.elf' and  *
 *        the 'arch_info.bin' in the memory bank of the cluster(0,0), and   *
 *        the kernel image (at address 0x0),                                *
 *      - initializes the 'boot_info_t' structure of the local cluster,     *
 *      - activate all other cores in the same cluster (CPi).               *
 *      - wait local CPi completion reports on a local barrier.             *
 *      - report completion to bscpu on the global barrier.                 *
 *                                                                          *
 * 3) The boot-loader third phase is executed in parallel by all cores.     *
 *    After passing the global barrier the bscpu:                           *
 *      - activates the CPi of cluster(0,0),                                *
 *      - blocks on the local barrier waiting for all local CPi to report   *
 *        completion on the local barrier,                                  *
 *      - moves the local kernel image from the temporary location to the   *
 *        address 0x0, (erasing the preloader code).                        *
 *                                                                          *
 * 4) All cores have finished the boot phase, they jump to the kern_init()  *
 *    function (maybe not at the same time).                                *
 ****************************************************************************/

#include <elf-types.h>

#include <almos_config.h>
#include <boot_config.h>

#include <arch_info.h>
#include <boot_info.h>

#include <hal_types.h>

#include <boot_utils.h>
#include <boot_fat32.h>
#include <boot_bdv_driver.h>
#include <boot_hba_driver.h>
#include <boot_tty_driver.h>

/****************************************************************************
 *                                 Macros.                                  *
 ****************************************************************************/

#define PAGE_ROUND_DOWN(x)  ((x) & (~PPM_PAGE_SIZE -1))
#define PAGE_ROUND_UP(x)    (((x) + PPM_PAGE_SIZE-1) &   \
                            (~(PPM_PAGE_SIZE-1)))

/****************************************************************************
 *                             Global variables.                            *
 ****************************************************************************/

// synchronization variables. 
volatile boot_barrier_t global_barrier;     /* Used by bscpu to synchronize 
                                               with other CP0s cores.       */

volatile uint32_t   global_count;           /* Number of cores expected in 
                                               global barrier.              */

volatile uint32_t   local_barrier;          /* Used by CP0 to synchronize 
                                               with local CPi.              */

volatile uint32_t   boot_cluster_ready;     /* Modified by bscpu to report 
                                               that the boot cluster is 
                                               ready.                       */

// kernel image memory layout. 
uint32_t ktext_base;                        /* ktext segment base address.  */
uint32_t ktext_end;                         /* ktext segment end address.   */
uint32_t kdata_base;                        /* kdata segment base address.  */
uint32_t kdata_end;                         /* kdata segment end address.   */

uint32_t kernel_entry;                      /* Kernel entry point.          */

// Extern variables. 
extern void boot_entry();                   /* boot_loader() function       */

/****************************************************************************
 *                           Internal functions.                            *
 ****************************************************************************/

char * device_type_str( uint32_t dev_type )
{
    if     ( dev_type == DEV_TYPE_RAM     ) return "RAM";
    else if( dev_type == DEV_TYPE_DMA     ) return "DMA";
    else if( dev_type == DEV_TYPE_FBF     ) return "FBF";
    else if( dev_type == DEV_TYPE_IOB     ) return "IOB";
    else if( dev_type == DEV_TYPE_IOC_BDV ) return "IOC_BDV";
    else if( dev_type == DEV_TYPE_IOC_HBA ) return "IOC_HBA";
    else if( dev_type == DEV_TYPE_IOC_SDC ) return "IOC_SDC";
    else if( dev_type == DEV_TYPE_IOC_SPI ) return "IOC_SPI";
    else if( dev_type == DEV_TYPE_IOC_RDK ) return "IOC_RDK";
    else if( dev_type == DEV_TYPE_MMC     ) return "MMC";
    else if( dev_type == DEV_TYPE_MWR_CPY ) return "MWR_CPY";
    else if( dev_type == DEV_TYPE_MWR_GCD ) return "MWR_GCD";
    else if( dev_type == DEV_TYPE_MWR_DCT ) return "MWR_DCT";
    else if( dev_type == DEV_TYPE_NIC     ) return "NIC";
    else if( dev_type == DEV_TYPE_ROM     ) return "ROM";
    else if( dev_type == DEV_TYPE_SIM     ) return "SIM";
    else if( dev_type == DEV_TYPE_TIM     ) return "TIM";
    else if( dev_type == DEV_TYPE_TTY     ) return "TTY";
    else if( dev_type == DEV_TYPE_XCU     ) return "XCU";
    else if( dev_type == DEV_TYPE_PIC     ) return "PIC";
    else if( dev_type == DEV_TYPE_CMA     ) return "CMA";
    else                                    return "UNDEFINED";
}

/****************************************************************************
 * This function loads the arch_info.bin file into the boot cluster memory.
 ****************************************************************************/
static void boot_archinfo_load()
{
    archinfo_header_t* header = (archinfo_header_t*)ARCHINFO_BASE;  
    
    // Load file into memory
    if (boot_fat32_load(ARCHINFO_PATHNAME, ARCHINFO_BASE, ARCHINFO_MAX_SIZE))
    {
        boot_printf("\n[BOOT ERROR]: boot_archinfo_load(): "
                    "<%s> file not found\n",
                    ARCHINFO_PATHNAME);
        boot_exit();
    }

    if (header->signature != ARCHINFO_SIGNATURE)
    {
        boot_printf("\n[BOOT_ERROR]: boot_archinfo_load(): "
                    "<%s> file signature should be %x\n",
                    ARCHINFO_PATHNAME, ARCHINFO_SIGNATURE);
        boot_exit();
    }

#if DEBUG_BOOT_INFO
boot_printf("\n[BOOT] file %s loaded at %l\n",
            ARCHINFO_PATHNAME , ARCHINFO_BASE );
#endif

} // boot_archinfo_load()

/****************************************************************************
 * This function loads the 'kernel.elf' file into the boot cluster memory   *
 * bank, analyzes it then places the kernel image at the temporary physical *
 * memory address KERN_IMG_TMP_BASE since other processors are still        *
 * executing the preloader code (which means that the kernel image cannot   *
 * be placed now at its final memory location starting at address 0x0.      *
 ****************************************************************************/
static void boot_kernel_load()
{
    Elf32_Ehdr* elf_header;         /* Pointer on 'kernel.elf' header.      */
    Elf32_Phdr* program_header;     /* Pointer on 'kernel.elf' program 
                                       header.                              */
    uint32_t    phdr_offset;        /* Program header offset in 
                                       'kernel.elf' file.                   */
    uint32_t    segments_nb;        /* Total number of segments in 
                                       'kernel.elf' file.                   */

    uint32_t    seg_src_addr;       /* Segment address in 'kernel.elf' 
                                       file (source).                       */
    uint32_t    seg_paddr;          /* Physical address at which the 
                                       first byte of the segment resides 
                                       in memory.                           */
    uint32_t    seg_offset;         /* Offset from the beginning of 
                                       'kernel.elf' file to the segment's
                                       first byte.                          */ 
    uint32_t    seg_filesz;         /* Segment's number of bytes in 
                                       'kernel.elf' file.                   */ 
    uint32_t    seg_memsz;          /* Segment's number of bytes in the 
                                       memory image.                        */

    uint32_t    seg_id;             /* Iterator for program header scanning
                                       loop.                                */

    /* Loading file into memory. */
    if (boot_fat32_load(KERNEL_PATHNAME, KERN_BASE, KERN_MAX_SIZE))
    {
        boot_printf("\n[BOOT ERROR]: boot_kernel_load(): "
                    "<%s> file not found\n",
                    KERNEL_PATHNAME);
        boot_exit();
    }

    /* 
     * Initializing pointer to header which is the first element of the 
     * .elf file. 
     */
    elf_header = (Elf32_Ehdr*)KERN_BASE;

    /* Signature problem, abort program !!! */
    if ((elf_header->e_ident[EI_MAG0] != ELFMAG0)   ||
        (elf_header->e_ident[EI_MAG1] != ELFMAG1)   ||
        (elf_header->e_ident[EI_MAG2] != ELFMAG2)   ||
        (elf_header->e_ident[EI_MAG3] != ELFMAG3))
    {
        boot_printf("\n[BOOT_ERROR]: boot_kernel_load(): "
                    "<%s> is not an ELF file\n",
                    KERNEL_PATHNAME);
        boot_exit();
    }

    /* Getting the program header table offset and the number of segments. */
    phdr_offset     = elf_header->e_phoff;
    segments_nb     = elf_header->e_phnum;

    /* Getting the program header table pointer. */
    program_header  = (Elf32_Phdr*)(KERN_BASE + phdr_offset);

    /* Looking for loadable segments. */
    for (seg_id = 0; seg_id < segments_nb; seg_id++) 
    {
        // Found one:
        if (program_header[seg_id].p_type == PT_LOAD)
        {
            // Getting its attributes.
            seg_paddr    = program_header[seg_id].p_paddr;   
            seg_offset   = program_header[seg_id].p_offset;
            seg_filesz   = program_header[seg_id].p_filesz;
            seg_memsz    = program_header[seg_id].p_memsz;

            // Load it to its appropriate physical memory address.
            seg_src_addr = (uint32_t)KERN_BASE + seg_offset;
            boot_memcpy((void*)(KERN_IMG_TMP_BASE + seg_paddr), 
                        (void*)seg_src_addr, 
                               seg_filesz);

            // Fill remaining memory with zero if (filesz < memsz).
            boot_memset((void*)(KERN_IMG_TMP_BASE + seg_paddr + seg_filesz),
                                0,
                                seg_memsz - seg_filesz);
            
            /* 
             * Note: we suppose that the 'kernel.elf' file contains only 2 
             * loadable segments ktext + kdata and that the main 
             * difference between these two is the WRITE permission: ktext 
             * contains read-only instructions and read_only data,
             * while kdata contains writable data.
             */

            // Get ktext segment base and end addresses.
            if ((program_header[seg_id].p_flags & PF_W) == 0)
            {
                ktext_base = seg_paddr;
                ktext_end  = seg_paddr + seg_memsz;
            }

            // Get kdata segment base and end addresses.
            else
            {
                kdata_base = seg_paddr;
                kdata_end  = seg_paddr + seg_memsz;
            }
        }
    }

    // Get the entry point for kernel code.
    kernel_entry = (uint32_t)elf_header->e_entry;

} // boot_kernel_load()

/****************************************************************************
 * This function initializes the local 'boot_info_t' structure.             *
 * @ boot_info  : pointer to local boot_info_t structure                    *
 * @ cxy        : cluster identifier                                        *
 ****************************************************************************/
static void boot_info_init( boot_info_t * boot_info,
                            cxy_t         cxy )
{
    archinfo_header_t  * header;        
    archinfo_core_t    * core_base;     
    archinfo_cluster_t * cluster_base; 
    archinfo_device_t  * device_base;
    archinfo_irq_t     * irq_base;  

    archinfo_cluster_t * cluster; 
    archinfo_core_t    * core;
    uint32_t             core_id; 
    archinfo_device_t  * device;
    uint32_t             device_id;
    archinfo_irq_t     * irq; 
    uint32_t             irq_id;
 
    boot_device_t      * boot_dev; 

    // get pointer on ARCHINFO header 
    header = (archinfo_header_t*)ARCHINFO_BASE;

    // Initialize global platform parameters
    boot_info->x_size       = header->x_size;
    boot_info->y_size       = header->y_size;
    boot_info->x_width      = header->x_width;
    boot_info->y_width      = header->y_width;
    boot_info->paddr_width  = header->paddr_width;
    boot_info->io_cxy       = header->io_cxy;

    // Initialize kernel segments
    boot_info->kernel_code_start = ktext_base;
    boot_info->kernel_code_end   = ktext_end;
    boot_info->kernel_data_start = kdata_base;
    boot_info->kernel_data_end   = kdata_end;

    // Initialize specific cluster parameter
    core_base    = archinfo_get_core_base   (header);
    cluster_base = archinfo_get_cluster_base(header);
    device_base  = archinfo_get_device_base (header);
    irq_base     = archinfo_get_irq_base    (header);

    // lopp on the clusters to find local cluster descriptor
    for (cluster =  cluster_base;
         cluster < &cluster_base[header->x_size * header->y_size];
         cluster++)
    {
        if (cluster->cxy != cxy) continue;

        boot_info->cxy          = cluster->cxy;
        boot_info->cores_nr     = cluster->cores;
        boot_info->devices_nr   = cluster->devices;

#if DEBUG_BOOT_INFO
boot_printf("\n[BOOT] build boot_info for cluster %x : %d cores / %d devices\n",
            cluster->cxy , cluster->cores , cluster->devices );
#endif
        // Initialize array of core descriptors
        for (core = &core_base[cluster->core_offset], core_id = 0;
             core < &core_base[cluster->core_offset + cluster->cores];
             core++, core_id++)
        {
            boot_info->core[core_id].gid = (gid_t)core->gid;
            boot_info->core[core_id].lid = (lid_t)core->lid; 
            boot_info->core[core_id].cxy = (cxy_t)core->cxy;

#if DEBUG_BOOT_INFO
boot_printf("  - core %x : cxy = %x / lid = %d\n", 
            core->gid , core->cxy , core->lid );
#endif

        }

        // Initialize array of device descriptors
        for (device = &device_base[cluster->device_offset], device_id = 0;
             device < &device_base[cluster->device_offset + cluster->devices];
             device++, device_id++)
        {
            boot_dev = &boot_info->dev[device_id];

            boot_dev->type       =         device->type;
            boot_dev->base       = (xptr_t)device->base;
            boot_dev->size       =         device->size;
            boot_dev->channels   =         device->channels;
            boot_dev->param0     =         device->arg0;    
            boot_dev->param1     =         device->arg1;    
            boot_dev->param2     =         device->arg2;    
            boot_dev->param3     =         device->arg3;    
            boot_dev->irqs       =         device->irqs;    

#if DEBUG_BOOT_INFO
boot_printf("  - device %s : base = %l / size = %d / channels = %d / irqs = %d\n",
            device_type_str( device->type ) , device->base , device->size ,
            device->channels , device->irqs );   
#endif

            // Initialize information about physical memory in cluster
            if (device->type == DEV_TYPE_RAM)
            {
                // Compute total number of physical memory pages in cluster
                boot_info->pages_nr = device->size >> CONFIG_PPM_PAGE_SHIFT;

                // Get the last address allocated for the kernel segments
                uint32_t end = (ktext_end < kdata_end) ? kdata_end : ktext_end;

                // Computing the number of pages allocated for the kernel.
                if( (end & CONFIG_PPM_PAGE_MASK) == 0 )
                {
                    boot_info->pages_offset = end >> CONFIG_PPM_PAGE_SHIFT;
                }
                else
                {
                    boot_info->pages_offset = (end >> CONFIG_PPM_PAGE_SHIFT) + 1;
                }
            }
            
            // Initialize array of irq descriptors for XCU 
            if (device->type == DEV_TYPE_XCU) 
            {
                for (irq_id = 0; irq_id < CONFIG_MAX_HWIS_PER_ICU; irq_id++)
                {
                    boot_dev->irq[irq_id].valid  = 0;
                }

                for (irq = &irq_base[device->irq_offset];
                     irq < &irq_base[device->irq_offset + device->irqs];
                     irq++)
                {
                    boot_dev->irq[irq->port].valid    = 1;
                    boot_dev->irq[irq->port].dev_type = irq->dev_type;
                    boot_dev->irq[irq->port].channel  = irq->channel;
                    boot_dev->irq[irq->port].is_rx    = irq->is_rx;

#if DEBUG_BOOT_INFO
boot_printf("    . irq_port = %d / source = %s / channel = %d / is_rx = %d\n",
            irq->port , device_type_str( irq->dev_type ) , irq->channel , irq->is_rx );
#endif

                }
            }

            // Initialize array of irq descriptors for PIC
            if (device->type == DEV_TYPE_PIC) 
            {
                for (irq_id = 0; irq_id < CONFIG_MAX_IRQS_PER_PIC; irq_id++)
                {
                    boot_dev->irq[irq_id].valid  = 0;
                }

                for (irq = &irq_base[device->irq_offset];
                     irq < &irq_base[device->irq_offset + device->irqs];
                     irq++)
                {
                    boot_dev->irq[irq->port].valid    = 1;
                    boot_dev->irq[irq->port].dev_type = irq->dev_type;
                    boot_dev->irq[irq->port].channel  = irq->channel;
                    boot_dev->irq[irq->port].is_rx    = irq->is_rx;

#if DEBUG_BOOT_INFO
boot_printf("    . irq_port = %d / source = %s / channel = %d / is_rx = %d\n",
            irq->port , device_type_str( irq->dev_type ) , irq->channel , irq->is_rx );
#endif

                }
            }
        }

#if DEBUG_BOOT_INFO
boot_printf("  - ram : number of pages = %x / first free page = %x\n",
            boot_info->pages_nr , boot_info->pages_offset );
#endif

    }
} // boot_info_init()

/****************************************************************************
 * This function is executed by all cores in order to check their           *
 * local boot_info_t structure.                                             *
 * @ boot_info  : pointer to local 'boot_info_t' structure to be checked.   *
 * @ lid        : core local identifier, index the core descriptor table.   *
 ****************************************************************************/
static void boot_check_core( boot_info_t * boot_info, 
                             lid_t         lid)
{
    gid_t         gid;        // global hardware identifier of this core
    boot_core_t * this;       // BOOT_INFO core descriptor of this core.  

    // Get core hardware identifier 
    gid = (gid_t)boot_get_procid();

    // get pointer on core descriptor
    this = &boot_info->core[lid];

    if ( (this->gid != gid) ||  (this->cxy != boot_info->cxy) )
    {
        boot_printf("\n[BOOT ERROR] in boot_check_core() :\n"
                    " - boot_info cxy = %x\n"
                    " - boot_info lid = %d\n"
                    " - boot_info gid = %x\n"
                    " - actual    gid = %x\n",
                    this->cxy , this->lid , this->gid , gid );
        boot_exit();
    }

} // boot_check_core()

/****************************************************************************
 * This function is called by the bscpu to activate all other CP0s.         *
 * It returns the number of CP0s actually activated.
 ****************************************************************************/
static uint32_t boot_wake_cp0()
{
    archinfo_header_t*  header;         /* Pointer on ARCHINFO header.      */
    archinfo_cluster_t* cluster_base;   /* Pointer on ARCHINFO cluster 
                                           base.                            */ 
    archinfo_cluster_t* cluster;        /* Iterator for waking CP0 loop.    */
    archinfo_device_t*  device_base;    /* Pointer on ARCHINFO peripheral 
                                           device base.                     */
    archinfo_device_t*  device;         /* Iterator for finding XICU device 
                                           loop.                            */

    uint32_t            cp0_nb = 0;     /* Number of CP0 woken up.          */

    header       = (archinfo_header_t*)ARCHINFO_BASE;
    cluster_base = archinfo_get_cluster_base(header);
    device_base  = archinfo_get_device_base (header); 

    // loop on all clusters 
    for (cluster = cluster_base;
         cluster < &cluster_base[header->x_size * header->y_size];
         cluster++)
    {
        // Skip boot cluster.
        if (cluster->cxy == BOOT_CORE_CXY)
            continue;
            
        // Skip clusters without core (thus without CP0).
        if (cluster->cores == 0)
            continue;

        // Skip clusters without device (thus without XICU).
        if (cluster->devices == 0)
            continue;

        // Look for the XICU device associated to the CP0 of this cluster 
        // then send an WTI to it in order to wake it up. 
        for (device = &device_base[cluster->device_offset];
             device < &device_base[cluster->device_offset + cluster->devices];
             device++)
        {
            if (device->type == DEV_TYPE_XCU)
            {
                boot_remote_sw((xptr_t)device->base, (uint32_t)boot_entry);
                cp0_nb++;
            }
        }
    }
    return cp0_nb;

} // boot_wake_cp0()

/****************************************************************************
 * This function is called by all CP0 to activate all local CPi cores.      *
 * @ boot_info  : pointer to local 'boot_info_t' structure, used to find    *
 *                the XICU device associated with local CPi base addresses. *
 ****************************************************************************/
static void boot_wake_local_cores(boot_info_t* boot_info)
{
    boot_device_t*  device;         // Iterator on devices
    unsigned int    core_id;        // Iterator on cores

    // loop on devices to find XCU
    for (device = &boot_info->dev[0];
         device < &boot_info->dev[boot_info->devices_nr];
         device++)
    {
        if (device->type == DEV_TYPE_XCU)
        {
            // loop on cores
            for (core_id = 1; core_id < boot_info->cores_nr; core_id++)
                boot_remote_sw((xptr_t) (device->base + (core_id << 2)),
                               (uint32_t)boot_entry); 
        }
    }
} // boot_wake_local_cores()

/****************************************************************************
 *                               API functions.                             *
 ****************************************************************************/

/****************************************************************************
 * This main function of the boot-loader is called by the  boot_entry()  
 * function, and executed by all cores. 
 * The arguments values are computed by the boot_entry code.
 * @ lid    : core local identifier in its cluster,
 * @ cxy    : cluster identifier,
 ****************************************************************************/
void boot_loader( lid_t lid, 
                  cxy_t cxy )
{
    boot_info_t * boot_info;   // Pointer on local boot_info_t structure
    uint32_t     local_count;  // Number of cores expected in local barrier                                  */

    if (lid == 0) 
    {
        /**************************************
         * PHASE ONE: only bscpu executes it. *
         **************************************/
        if (cxy == BOOT_CORE_CXY)
        {
            boot_printf("\n[BOOT] Starting on core[%d] in cluster %x at cycle %d\n",
                        lid, cxy, boot_get_proctime());

            // Initialize IOC driver 
            if      (USE_IOC_BDV) boot_bdv_init();
            else if (USE_IOC_HBA) boot_hba_init();
            /*
            else if (USE_IOC_SDC) boot_sdc_init();
            else if (USE_IOC_SPI) boot_spi_init();
            */
            else if (!USE_IOC_RDK)
            {
                boot_printf("\n[BOOT ERROR] boot_loader(): "
                            "No IOC driver\n"
                           );
                boot_exit();
            }

            // Initialize simplified version of FAT32. 
            boot_fat32_init();

            // Load the arch_info.bin file into memory.
            boot_archinfo_load();

            // Load the 'kernel.elf' file into memory.
            boot_kernel_load();

            // Get local 'boot_info_t' structure base address.
            // It is the first structure in the .kdata segment. 
            boot_info = (boot_info_t*)(KERN_IMG_TMP_BASE + kdata_base);

            // Signature problem, abort program !!!
            if (boot_info->signature != BOOT_INFO_SIGNATURE)
            {
                boot_printf("\n[BOOT] boot_loader(): "
                            "boot_info signature should be %x\n",
                            BOOT_INFO_SIGNATURE);
                boot_exit();
            }

            // Initialize local 'boot_info_t' structure.
            boot_printf("\n[BOOT] boot_loader(): "
                        "Initializing the local boot_info_t structure "
                        "at cycle %d\n",
                        boot_get_proctime());

            boot_info_init(boot_info, cxy);

            // Check core information.
            boot_check_core(boot_info, lid);

            // Set the barrier.
            boot_cluster_ready = 0;

            // Activate other CP0s
            boot_printf("\n[BOOT] boot_loader(): "
                        "Waking other CP0 up at cycle %d\n",
                        boot_get_proctime());

            global_count = boot_wake_cp0();

            // Wait until all CP0s ready to enter kernel.
            boot_printf("\n[BOOT] boot_loader(): "
                        "Waiting for other %d CP0 at cycle %d\n",
                        global_count, boot_get_proctime());

            boot_barrier(XPTR(BOOT_CORE_CXY, &global_barrier), 
                         global_count);

            // activate other local cores of the boot cluster.
            local_barrier = 0;
            boot_printf("\n[BOOT] boot_loader(): "
                        "Waking other CPi up at cycle %d\n",
                        boot_get_proctime());

            boot_wake_local_cores(boot_info);

            // Wait until all other local cores are ready 
            boot_printf("\n[BOOT] boot_loader(): "
                        "Waiting for other %d CPi at cycle %d\n",
                        boot_info->cores_nr - 1, boot_get_proctime());

            local_count = boot_info->cores_nr - 1;
            while (local_barrier != local_count);

            // Move the local kernel image at address 0x0 (erase preloader code).
            boot_printf("\n[BOOT] boot_loader(): "
                        "Moving the kernel image and erasing the preloader"
                        "at cycle %d\n",
                        boot_get_proctime());

            // ktext segment.
            boot_memcpy((void*)ktext_base,
                        (void*)(KERN_IMG_TMP_BASE + ktext_base),
                        ktext_end - ktext_base);

            // kdata segment.
            boot_memcpy((void*)kdata_base,
                        (void*)(KERN_IMG_TMP_BASE + kdata_base),
                        kdata_end - kdata_base);
            
            // activate other local cores.
            boot_cluster_ready = 1;
            boot_printf("\n[BOOT] boot_loader(): "
                        "Everything is set, jumping to the kernel "
                        "at cycle %d\n",
                        boot_get_proctime());
        }
        /****************************************************
         * PHASE TWO: all CP0s other than bscpu execute it. *
         ****************************************************/
        else
        {
            /* 
             * Note: at this point, we cannot access the global variables of
             * this boot code since all the address extension registers for 
             * DATA are pointing to their local cluster in order to have 
             * access to the local stack and execute this C code. 
             * However, all the address extension registers for INSTRUCTIONS 
             * are still pointing to the boot cluster, thus we can access 
             * and call functions defined in the boot code, for example 
             * boot_remote_memcpy().
             */

            // Copy the boot-loader binary code 'boot.elf' into the local memory
            boot_remote_memcpy(XPTR(cxy,           BOOT_BASE),
                               XPTR(BOOT_CORE_CXY, BOOT_BASE),
                               (unsigned int)BOOT_MAX_SIZE);

            /*
             * Note: from now on, it is safe to refer to the boot code global variables 
             * such as the base address and size of the kernel segments.
             */

            // switch to the INSTRUCTION local memory space,  
            // to avoid contention at the boot cluster.
            asm volatile("mtc2  %0, $25" :: "r"(cxy));

            // Copy the 'arch_info.bin' file into the local memory.
            boot_remote_memcpy(XPTR(cxy,           ARCHINFO_BASE),
                               XPTR(BOOT_CORE_CXY, ARCHINFO_BASE),
                               (unsigned int)ARCHINFO_MAX_SIZE);

            // Copy the kernel image into local memory at address 0x0.
            // ktext segment.
            boot_remote_memcpy(XPTR(cxy, ktext_base),
                               XPTR(BOOT_CORE_CXY, KERN_IMG_TMP_BASE + ktext_base),
                               ktext_end - ktext_base);
            // kdata segment.
            boot_remote_memcpy(XPTR(cxy, kdata_base),
                               XPTR(BOOT_CORE_CXY, KERN_IMG_TMP_BASE + kdata_base),
                               kdata_end - kdata_base);

            // Get local 'boot_info_t' structure base address.
            // This is the first structure in the kdata segment.
            boot_info = (boot_info_t*)kdata_base;

            // Initialize local boot_info_t structure.
            boot_info_init(boot_info, cxy);

            // Check core information.
            boot_check_core(boot_info, lid);

            // Activateall other local CPi cores in this cluster.
            local_barrier = 0;
            boot_wake_local_cores(boot_info);

            // Waiting until all other local cores ready
            local_count = boot_info->cores_nr - 1;
            while (local_barrier != local_count);

            // All cores in this cluster are ready to enter kernel.
            boot_barrier(XPTR(BOOT_CORE_CXY, &global_barrier), 
                         global_count);
        }
    }
    else
    {
        /***************************************************************
         * PHASE THREE: all non CP0 cores in all clusters execute it.  *
         **************************************************************/

        if (cxy == BOOT_CORE_CXY)  // boot cluster only
        {
            // Report to the local CP0 that CPi is ready
            boot_atomic_add((int32_t*)&local_barrier, 1);

            // wait completion of kernel image move in  boot cluster
            while (boot_cluster_ready != 1);

            // Check core information
            boot_info = (boot_info_t*)kdata_base;
            boot_check_core(boot_info, lid);
        }
        else                      // other clusters
        {
            // Switch to the INSTRUCTIONS local memory space 
            // to avoid contention at the boot cluster.
            asm volatile("mtc2  %0, $25" :: "r"(cxy));

            // Report to the local CP0 that CPi is ready
            boot_atomic_add((int32_t*)&local_barrier, 1);

            // Check core information
            boot_info = (boot_info_t*)kdata_base;
            boot_check_core(boot_info, lid);
        }
    }

    // Jump to the kernel code. 
    asm volatile("jr   %0" :: "r"(kernel_entry));

} // boot_loader()
