/*
 * kernel_init.c - kernel parallel initialization
 * 
 * Authors :  Alain Greiner  (2016)
 *
 * Copyright (c) Sorbonne Universites
 *
 * This file is part of ALMOS-MKH.
 *
 * ALMOS-MKH is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2.0 of the License.
 *
 * ALMOS-MKH is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with ALMOS-MKH; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include <almos_config.h>
#include <errno.h>
#include <hal_types.h>
#include <hal_special.h>
#include <hal_context.h>
#include <remote_barrier.h>
#include <core.h>
#include <list.h>
#include <thread.h>
#include <scheduler.h>
#include <kmem.h>
#include <cluster.h>
#include <devfs.h>
#include <sysfs.h>
#include <string.h>
#include <memcpy.h>
#include <ppm.h>
#include <page.h>
#include <device.h>
#include <boot_info.h>
#include <dqdt.h>
#include <dev_icu.h>
#include <dev_mmc.h>
#include <dev_mwr.h>
#include <dev_iox.h>
#include <dev_ioc.h>
#include <dev_pic.h>
#include <printk.h>
#include <vfs.h>


#define KERNEL_INIT_SYNCRO  0xA5A5B5B5

///////////////////////////////////////////////////////////////////////////////////////////
// All these global variables are replicated in all clusters.
// They are initialised by the kernel_init() function.
///////////////////////////////////////////////////////////////////////////////////////////

// This variable defines the local cluster manager 
cluster_t            cluster_manager    CACHELINE_ALIGNED; 

// This variable defines the kernel process descriptor and associated thread
process_t            process_zero       CACHELINE_ALIGNED;            
thread_t             thread_zero        CACHELINE_ALIGNED;

// This variable contains the extended pointers on the device descriptors
devices_directory_t  devices_dir        CACHELINE_ALIGNED;

// This variable contains the input IRQ indexes for the PIC and ICU devices
devices_input_irq_t  devices_input_irq   CACHELINE_ALIGNED;

// This variable synchronizes the cores during kernel_init()
volatile uint32_t    local_sync_init = 0;  

// This variable defines the local cluster identifier
cxy_t                local_cxy;


///////////////////////////////////////////////////////////////////////////////////////////
// This function displays the ALMOS_MKH.banner.
///////////////////////////////////////////////////////////////////////////////////////////
static void print_boot_banner()
{ 
        printk("\n           ____        ___        ___       ___    _______    ________         ___       ___   ___     ___   \n");
        printk("          /    \\      |   |      |   \\     /   |  /  ___  \\  /  ____  |       |   \\     /   | |   |   /  / \n");
        printk("         /  __  \\      | |        |   \\___/   |  |  /   \\  | | /    |_/        |   \\___/   |   | |   /  /  \n");
        printk("        /  /  \\  \\     | |        |  _     _  |  | |     | | | |______   ___   |  _     _  |   | |__/  /     \n");
        printk("       /  /____\\  \\    | |        | | \\   / | |  | |     | | \\______  \\ |___|  | | \\   / | |   |  __  <  \n");
        printk("      /   ______   \\   | |     _  | |  \\_/  | |  | |     | |  _     | |        | |  \\_/  | |   | |  \\  \\  \n");
        printk("     /   /      \\   \\  | |____/ | | |       | |  |  \\___/  | | \\____/ |        | |       | |   | |   \\  \\\n");
        printk("    /_____/    \\_____\\|_________/|___|     |___|  \\_______/  |________/       |___|     |___| |___|   \\__\\\n");


        printk("\n\n\t\t\t\t Multi-Kernel Advanced Locality Management Operating System\n");
        printk("\t\t\t\t   %s \n\n\n", CONFIG_ALMOS_VERSION );
}


///////////////////////////////////////////////////////////////////////////////////////////
// This static function allocates memory for all devices descriptors associated 
// to peripherals contained in the local cluster: 
// - the internal (replicated) devices are placed in the local cluster.
// - the external devices are pseudo-randomly distributed on all clusters. 
// It initialises these device descriptors as specified by the boot_info_t structure,
// including the dynamic linking with the driver for the specified implementation.
// Finally, all copies of the devices directory are initialised.
// TODO check that cluster IO contains a PIC and IOB [AG]
///////////////////////////////////////////////////////////////////////////////////////////
// @ info    : pointer on the local boot-info structure.
///////////////////////////////////////////////////////////////////////////////////////////
static void devices_init( boot_info_t * info )
{
    boot_device_t * dev_tbl;         // pointer on array of devices in boot_info
	uint32_t        dev_nr;          // actual number of devices in this cluster
	xptr_t          xp_dev;          // extended pointer on device descriptor
	xptr_t          xp_dev_bis;      // extended pointer on second device descriptor
	xptr_t          base;            // remote pointer on segment base
	uint32_t        size;            // channel size (bytes)
    uint32_t        type;            // peripheral type
    uint32_t        func;            // device functionnal index
    uint32_t        impl;            // device implementation index
	uint32_t        i;               // device index in dev_tbl
	uint32_t        x;               // X cluster coordinate
	uint32_t        y;               // Y cluster coordinate
	uint32_t        channels;        // number of channels
	uint32_t        chl;             // channel index
	uint32_t        p0;              // device parameter 0
	uint32_t        p1;              // device parameter 1
	uint32_t        p2;              // device parameter 2
	uint32_t        p3;              // device parameter 3
    bool_t          is_local;        // true for internal peripherals

    // get number of peripherals from boot_info
	dev_nr	    = info->devices_nr;
    dev_tbl     = info->dev;

    // loop on all peripherals in cluster
	for( i = 0 ; i < dev_nr ; i++ )
	{
        size     = dev_tbl[i].size;
        base     = dev_tbl[i].base;
        type     = dev_tbl[i].type;
        channels = dev_tbl[i].channels;
        p0       = dev_tbl[i].param0;
        p1       = dev_tbl[i].param1;
        p2       = dev_tbl[i].param2;
        p3       = dev_tbl[i].param3;

        func     = FUNC_FROM_TYPE( type );
        impl     = IMPL_FROM_TYPE( type );

        // do nothing for RAM and ROM functional types
        if( (type == DEV_FUNC_RAM) || (type == DEV_FUNC_ROM) ) continue;

        // loop on channels in peripheral
        for( chl = 0 ; chl < channels ; chl++ )
        { 
            // target cluster is local for internal (replicated) peripherals
            if( (func == DEV_FUNC_ICU) || 
                (func == DEV_FUNC_MMC) ||
                (func == DEV_FUNC_MWR) )   is_local = true;
            else                           is_local = false;

            // allocate memory and initialize device descriptor
		    xp_dev = device_alloc( info , is_local );

            if( xp_dev == XPTR_NULL ) hal_core_sleep();
            
            device_init( xp_dev ,
                         func ,
                         impl,
                         chl,
                         false,                    // TX
                         base + size*chl,
                         size );

            // allocate memory and initialise another device descriptor if NIC, 
            // ALMOS-MKH uses two separate devices descriptor for NIC_RX and NIC_TX
            if( func == DEV_FUNC_NIC )
            {
                xp_dev_bis = device_alloc( info , is_local );

                if( xp_dev_bis == XPTR_NULL ) hal_core_sleep();

                device_init( xp_dev_bis ,
                             func ,
                             impl,
                             chl,
                             true,                // RX
                             (base + size*chl),
                             size );
             }

            // TODO ??? AG
		    // devfs_register( dev );

            // make device type specific initialisation
            // the number of parameters depends on the device type
            if     ( func == DEV_FUNC_ICU ) dev_icu_init( xp_dev , p0 , p1 , p2 );     
            else if( func == DEV_FUNC_MMC ) dev_mmc_init( xp_dev );
// TODO     else if( func == DEV_FUNC_MWR ) dev_mwr_init( xp_dev , p0 , p1 , p2 , p3 );
            else if( func == DEV_FUNC_IOB ) dev_iox_init( xp_dev );
            else if( func == DEV_FUNC_IOC ) dev_ioc_init( xp_dev );
            else if( func == DEV_FUNC_TXT ) dev_txt_init( xp_dev );
            else if( func == DEV_FUNC_PIC ) dev_pic_init( xp_dev , p0 );
            else if( func == DEV_FUNC_NIC ) dev_nic_init( xp_dev );
            else                            hal_core_sleep();

           // initialize the replicated devices_dir[x][y] structures
           // defining the extended pointers on all devices descriptors 
           xptr_t * ptr_dev;    
           xptr_t * ptr_dev_bis;    

           if( func == DEV_FUNC_ICU ) ptr_dev     = &devices_dir.icu[local_cxy];
           if( func == DEV_FUNC_MMC ) ptr_dev     = &devices_dir.mmc[local_cxy];
           if( func == DEV_FUNC_MWR ) ptr_dev     = &devices_dir.mwr[local_cxy];

           if( func == DEV_FUNC_TXT ) ptr_dev     = &devices_dir.txt[chl];
           if( func == DEV_FUNC_IOB ) ptr_dev     = &devices_dir.iob;
           if( func == DEV_FUNC_IOC ) ptr_dev     = &devices_dir.ioc;
           if( func == DEV_FUNC_PIC ) ptr_dev     = &devices_dir.pic;
           if( func == DEV_FUNC_NIC ) ptr_dev     = &devices_dir.nic_tx[chl];
           if( func == DEV_FUNC_NIC ) ptr_dev_bis = &devices_dir.nic_rx[chl];
  
            for( x = 0 ; x < info->x_size ; x++ )
            {
                for( y = 0 ; y < info->y_size ; y++ )
                {
                    cxy_t  cxy = (x<<info->y_width) + y;
                     
                    hal_remote_swd( XPTR( cxy , ptr_dev ) , xp_dev );

                    if( func == DEV_FUNC_NIC )
                    {
                       hal_remote_swd( XPTR( cxy , ptr_dev_bis ) , xp_dev_bis );
                    }
                }
            }
           
		    kinit_dmsg("[INFO] %s created device %s / channel %d / in cluster %x\n",
                       __FUNCTION__ , device_func_str[func] , chl , dev_cxy );

        } // end loop on channels

        // initialize the replicated devices_irq[x][y] structures
        // defining how peripherals are connected to PIC or ICU components
        uint32_t   id;
        uint8_t    valid;
        uint32_t   dev_type;
        uint8_t    channel;
        uint8_t    is_rx;
        uint32_t * ptr_irq; 

        // default initiialization for devices_irq structure
        
        // only external peripherals can be connected to PIC
        if( func == DEV_FUNC_PIC )  
        {
            // loop on PIC inputs
            for( id = 0 ; id < CONFIG_MAX_IRQS_PER_PIC ; id++ )
            {
                valid     = dev_tbl[i].irq[id].valid;
                dev_type  = dev_tbl[i].irq[id].dev_type;
                channel   = dev_tbl[i].irq[id].channel;
                is_rx     = dev_tbl[i].irq[id].is_rx;

                // only valid IRQs are registered in the devices_input_irq structure
                // ptr_irq is a local pointer on the entry to be set in devices_irq
                if( valid )
                {
                    uint16_t dev_func = FUNC_FROM_TYPE( dev_type );
                    if( dev_func == DEV_FUNC_TXT )   
                        ptr_irq = &devices_input_irq.txt[channel];
                    if( dev_func == DEV_FUNC_IOC )                   
                        ptr_irq = &devices_input_irq.ioc; 
                    if( (dev_func == DEV_FUNC_NIC) && (is_rx == 0) ) 
                        ptr_irq = &devices_input_irq.nic_tx[channel]; 
                    if( (dev_func == DEV_FUNC_NIC) && (is_rx != 0) ) 
                        ptr_irq = &devices_input_irq.nic_rx[channel]; 
                   
                    // all copies of devices_irq must be updated in all clusters
                    for( x = 0 ; x < info->x_size ; x++ )
                    {
                        for( y = 0 ; y < info->y_size ; y++ )
                        {
                            cxy_t  cxy = (x<<info->y_width) + y;
                            hal_remote_sw( XPTR( cxy , ptr_irq ) , id );
                        }	
                    }
                }
            } // end loop on PIC inputs
        } // end PIC

        // only internal peripherals can be connected to ICU
        if( func == DEV_FUNC_ICU )  
        {
            // loop on ICU inputs
            for( id = 0 ; id < CONFIG_MAX_HWIS_PER_ICU ; id++ )
            {
                valid    = dev_tbl[i].irq[id].valid;
                dev_type = dev_tbl[i].irq[id].dev_type;
                channel  = dev_tbl[i].irq[id].channel;

                // only valid IRQs are registered in the devices_input_irq structure
                // ptr_irq is a local pointer on the entry to be set in devices_irq
                if( valid )
                {
                    uint16_t dev_func = FUNC_FROM_TYPE( dev_type );
                    if( dev_func == DEV_FUNC_MMC )  
                        ptr_irq = &devices_input_irq.mmc[local_cxy];
                    if( dev_func == DEV_FUNC_MWR )  
                        ptr_irq = &devices_input_irq.mwr[local_cxy]; 
                   
                    // all copies of devices_irq must be updated in all clusters
                    for( x = 0 ; x < info->x_size ; x++ )
                    {
                        for( y = 0 ; y < info->y_size ; y++ )
                        {
                            cxy_t  cxy = (x<<info->y_width) + y;
                            hal_remote_sw( XPTR( cxy , ptr_irq ) , id );
                        }
                    }
                }
            }  // end loop on ICU inputs
        }  // end ICU
	} // end loop on peripherals
}  // end devices_init()



///////////////////////////////////////////////////////////////////////////////////////////
// This function is the entry point for the kernel initialisation.
// It is executed by all cores in all clusters, but only core[0] in each cluster
// initialize the cluster manager, ant the other local shared resources. 
// To comply with the multi-kernels paradigm, it access only local cluster memory,
// using only informations contained in the local boot_info_t structure, 
// that has been set by the bootloader.
// All cores TODO ...
///////////////////////////////////////////////////////////////////////////////////////////
// @ info    : pointer on the local boot-info structure.
///////////////////////////////////////////////////////////////////////////////////////////
void kernel_init( boot_info_t * info )
{
    uint32_t     core_lid;      // running core local index
    cxy_t        core_cxy;      // running core cluster identifier
    gid_t        core_gid;      // running core hardware identifier 
    cluster_t  * cluster;       // pointer on local cluster manager
    core_t     * core;          // pointer on running core descriptor
    thread_t   * thread_idle;   // pointer on thread_idle  

	uint32_t     i;
    bool_t       found;
    error_t      error;

    // initialise global cluster identifier
    local_cxy = info->cxy;

    // each core get its global index from hardware register
    core_gid = hal_get_gid(); 

    // Each core makes an associative search in boot_info
    // to get its (cxy,lid) composite index 
    found    = false;
    core_cxy = 0;
    core_lid = 0;
    for( i = 0 ; i < info->cores_nr ; i++ )
    {
        if( core_gid == info->core[i].gid )
        {
            core_lid = info->core[i].lid;
            core_cxy = info->core[i].cxy;
            found = true;
            break;
        }
    }
         
    if ( found == false )
    {
        printk("PANIC in %s : Core %d not registered in cluster %x\n",
               __FUNCTION__ , core_gid , local_cxy );
        hal_core_sleep();
    }

    if ( core_cxy != local_cxy) 
    {
        printk("PANIC in %s : Core %d has wrong cxy in cluster %x\n",
               __FUNCTION__ , core_gid , local_cxy );
        hal_core_sleep();
    }

    // from this point, only core[0] initialises local resources
    if( core_lid == 0 )    
    {
        // initialize local cluster manager (cores and memory allocators)
        error = cluster_init( info );
        if ( error == 0 ) 
        {
            printk("PANIC in %s : Failed to initialize cluster manager in cluster %x\n",
                   __FUNCTION__ , local_cxy );
            hal_core_sleep();
        }

        // initialize process_zero descriptor
		process_zero_init( info );

        // initialize thread_zero descriptor
	    memset( &thread_zero , 0 , sizeof(thread_t) );
	    thread_zero.type     = THREAD_KERNEL;
	    thread_zero.process  = &process_zero;
	    hal_set_current_thread( &thread_zero );

        // initialise local devices descriptors
        devices_init( info );

        // TODO initialize devFS and sysFS
		// devfs_root_init();
		// sysfs_root_init();

		// TODO dire précisément ce qu'on fait ici [AG]
		// hal_arch_init( info );

        // TODO ??? [AG]
		// clusters_sysfs_register();

        // initialize virtual file system
        // vfs_init();

        // TODO ??? [AG]
		// sysconf_init();

        // activate other cores in same cluster
		local_sync_init = KERNEL_INIT_SYNCRO;
		hal_wbflush();
    }
    else   // other cores                                  
    {
        // other cores wait synchro from core[0]
		while( local_sync_init != KERNEL_INIT_SYNCRO )
        {
		   uint32_t retval = hal_time_stamp() + 1000;
		   while( hal_time_stamp() < retval )  asm volatile ("nop");
        }

        // other cores initialise thread_zero descriptor 
	    memset( &thread_zero , 0 , sizeof(thread_t) );
	    thread_zero.type     = THREAD_KERNEL;
	    thread_zero.process  = &process_zero;
	    hal_set_current_thread( &thread_zero );
    }


    // each core get pointer on local cluster manager and on core descriptor
    cluster = LOCAL_CLUSTER;
  	core    = &cluster->core_tbl[core_lid];

	// each core creates its private idle thread descriptor	
	error = thread_kernel_create( &thread_idle,
                                  THREAD_IDLE, 
                                  &thread_idle_func, 
                                  NULL,
                                  core_lid );
    if( error )
	{
		printk("ERROR in %s: failed to create idle thread for core %d in cluster %x\n",
               __FUNCTION__ , core_lid , core_cxy );
        hal_core_sleep();
    }

    // each core register thread_idle in scheduler 
    core->scheduler.idle = thread_idle;

    // each core register thread pointer in core hardware register
    hal_set_current_thread( thread_idle );

    kinit_dmsg("INFO %s  Created thread idle %x for core %d at cycle %d ]\n",
               thread, hal_get_gid(), hal_time_stamp());

    // global syncho for all core[0] in all clusters
    if( core_lid == 0 )
    {
        remote_barrier( XPTR( info->io_cxy , &cluster->barrier ) , 
                        (cluster->x_size * cluster->y_size) );
    }

    // local synchro for all cores in local cluster
    remote_barrier( XPTR( local_cxy , &cluster->barrier ) , 
                    cluster->cores_nr );

    if( core_lid ==  0 )
    {
        kinit_dmsg("INFO %s completed for cluster %x at cycle %d\n",
                   __FUNCTION__ , local_cxy , hal_time_stamp() );

        if( local_cxy == info->io_cxy ) print_boot_banner();
    }

    // load idle thread context in calling core
	hal_cpu_context_load( thread_idle );

} // end kernel_init()



