//////////////////////////////////////////////////////////////////////////////////
// File     : hba_driver.c
// Date     : 23/11/2013
// Author   : alain greiner
// Copyright (c) UPMC-LIP6
///////////////////////////////////////////////////////////////////////////////////
// Implementation notes:
// All accesses to HBA registers are done by the two
// _hba_set_register() and _hba_get_register() low-level functions,
// that are handling virtual / physical extended addressing.
///////////////////////////////////////////////////////////////////////////////////

#include <giet_config.h>
#include <hard_config.h>
#include <hba_driver.h>
#include <xcu_driver.h>
#include <mmc_driver.h>
#include <kernel_locks.h>
#include <utils.h>
#include <tty0.h>
#include <ctx_handler.h>
#include <irq_handler.h>
#include <vmem.h>

///////////////////////////////////////////////////////////////////////////////////
//               Global variables
///////////////////////////////////////////////////////////////////////////////////

// global index ot the task, for each entry in the command list
__attribute__((section(".kdata")))
unsigned int _hba_gtid[32];

// status of the command, for each entry in the command list
__attribute__((section(".kdata")))
unsigned int _hba_status[32];

// command list : up to 32 commands
__attribute__((section(".kdata")))
hba_cmd_desc_t  _hba_cmd_list[32] __attribute__((aligned(0x40)));   

// command tables array : one command table per entry in command list
__attribute__((section(".kdata")))
hba_cmd_table_t _hba_cmd_table[32] __attribute__((aligned(0x40))); 

// command list write index : next slot to register a command 
__attribute__((section(".kdata")))
unsigned int     _hba_cmd_ptw;

// command list read index : next slot to poll a completed command 
__attribute__((section(".kdata")))
unsigned int     _hba_cmd_ptr;

//////////////////////////////////////////////////////////////////////////////
// This low level function returns the value of register (index)
//////////////////////////////////////////////////////////////////////////////
unsigned int _hba_get_register( unsigned int index )
{
    unsigned int* vaddr = (unsigned int*)SEG_IOC_BASE + index;
    return _io_extended_read( vaddr );
}

//////////////////////////////////////////////////////////////////////////////
// This low level function set a new value in register (index)  
//////////////////////////////////////////////////////////////////////////////
void _hba_set_register( unsigned int index,
                        unsigned int value )
{
    unsigned int* vaddr = (unsigned int*)SEG_IOC_BASE + index;
    _io_extended_write( vaddr, value );
}

///////////////////////////////////////////////////////////////////////////////
//      Extern functions
///////////////////////////////////////////////////////////////////////////////

///////////////////////////////////////////////////////////////////////////////
// This function register a command in both the command list
// and the command table, and updates the HBA_PXCI register.
// return 0 if success, -1 if error
///////////////////////////////////////////////////////////////////////////////
unsigned int _hba_access( unsigned int       use_irq,
                          unsigned int       to_mem,
                          unsigned int       lba,  
                          unsigned long long buf_paddr,
                          unsigned int       count )   
{
    unsigned int procid  = _get_procid();
    unsigned int x       = procid >> (Y_WIDTH + P_WIDTH);
    unsigned int y       = (procid >> P_WIDTH) & ((1<<Y_WIDTH) - 1);
    unsigned int p       = procid & ((1<<P_WIDTH)-1);

#if GIET_DEBUG_IOC_DRIVER
if (_get_proctime() > GIET_DEBUG_IOC_DRIVER)
_printf("\n[DEBUG HBA] _hba_access() : P[%d,%d,%d] enters at cycle %d\n"
        "  use_irq = %d / to_mem = %d / lba = %x / paddr = %l / count = %d\n",
        x , y , p , _get_proctime() , use_irq , to_mem , lba , buf_paddr, count );
#endif

    unsigned int       pxci;              // HBA_PXCI register value
    unsigned int       ptw;               // command list write pointer
    unsigned int       pxis;              // HBA_PXIS register value
    hba_cmd_desc_t*    cmd_desc;          // command descriptor pointer   
    hba_cmd_table_t*   cmd_table;         // command table pointer

    // check buffer alignment
    if( buf_paddr & 0x3F )
    {
        _printf("\n[HBA ERROR] in _hba_access() : buffer not 64 bytes aligned\n");
        return -1;
    }

    // get one entry in Command List
    // atomic increment on the _hba_cmd_ptw allocator
    // only the 5 LSB bits are used to index the Command List
    ptw = _atomic_increment( &_hba_cmd_ptw , 1 ) & 0x1F;

    // blocked until allocated entry in Command List is empty
    do
    {
        // get PXCI register
        pxci = _hba_get_register( HBA_PXCI );
    } 
    while ( pxci & (1<<ptw) );

    // compute pointers on command descriptor and command table    
    cmd_desc  = &_hba_cmd_list[ptw];
    cmd_table = &_hba_cmd_table[ptw];

    // set  buffer descriptor in command table 
    cmd_table->buffer.dba  = (unsigned int)(buf_paddr);
    cmd_table->buffer.dbau = (unsigned int)(buf_paddr >> 32);
    cmd_table->buffer.dbc  = count * 512;

    // initialize command table header
    cmd_table->header.lba0 = (char)lba;
    cmd_table->header.lba1 = (char)(lba>>8);
    cmd_table->header.lba2 = (char)(lba>>16);
    cmd_table->header.lba3 = (char)(lba>>24);
    cmd_table->header.lba4 = 0;
    cmd_table->header.lba5 = 0;

    // initialise command descriptor
    cmd_desc->prdtl[0] = 1;
    cmd_desc->prdtl[1] = 0;
    if( to_mem ) cmd_desc->flag[0] = 0x00;
    else         cmd_desc->flag[0] = 0x40;     

#if USE_IOB    // software L2/L3 cache coherence

    // compute physical addresses
    unsigned long long cmd_desc_paddr;    // command descriptor physical address
    unsigned long long cmd_table_paddr;   // command table header physical address
    unsigned int       flags;             // unused

    if ( _get_mmu_mode() & 0x4 )
    {
        cmd_desc_paddr  = _v2p_translate( (unsigned int)cmd_desc  , &flags );
        cmd_table_paddr = _v2p_translate( (unsigned int)cmd_table , &flags );
    }
    else
    {
        cmd_desc_paddr  = (unsigned int)cmd_desc;
        cmd_table_paddr = (unsigned int)cmd_table;
    }

    // update external memory for command table 
    _mmc_sync( cmd_table_paddr & (~0x3F) , sizeof(hba_cmd_table_t) );

    // update external memory for command descriptor
    _mmc_sync( cmd_desc_paddr & (~0x3F) , sizeof(hba_cmd_desc_t) );

    // inval or synchronize memory buffer
    if ( to_mem )  _mmc_inval( buf_paddr, count<<9 );
    else           _mmc_sync( buf_paddr, count<<9 );

#endif     // end software L2/L3 cache coherence

    /////////////////////////////////////////////////////////////////////
    // In synchronous mode, we poll the PXCI register until completion
    /////////////////////////////////////////////////////////////////////
    if ( use_irq == 0 ) 
    {
        // start HBA transfer
        _hba_set_register( HBA_PXCI, (1<<ptw) );

#if GIET_DEBUG_IOC_DRIVER
if (_get_proctime() > GIET_DEBUG_IOC_DRIVER)
_printf("\n[DEBUG HBA] _hba_access() : P[%d,%d,%d] get slot %d in Cmd List "
        " at cycle %d / polling\n",
        ptw , x , y , p , _get_proctime() );
#endif
        // disable IRQs in PXIE register
        _hba_set_register( HBA_PXIE , 0 );

        // poll PXCI[ptw] until command completed by HBA
        do
        {
            pxci = _hba_get_register( HBA_PXCI );

#if GIET_DEBUG_IOC_DRIVER
if (_get_proctime() > GIET_DEBUG_IOC_DRIVER)
_printf("\n[DEBUG HBA] _hba_access() : P[%d,%d,%d] wait on HBA_PXCI / pxci = %x\n",
        x , y , p , pxci );
#endif
        }
        while( pxci & (1<<ptw) ); 
             
        // get PXIS register
        pxis = _hba_get_register( HBA_PXIS );

        // reset PXIS register
        _hba_set_register( HBA_PXIS , 0 );
    }

    /////////////////////////////////////////////////////////////////
    // in descheduling mode, we deschedule the task
    // and use an interrupt to reschedule the task.
    // We need a critical section, because we must reset the RUN bit
	// before to launch the transfer, and we don't want to be 
    // descheduled between these two operations. 
    /////////////////////////////////////////////////////////////////
    else
    {

#if GIET_DEBUG_IOC_DRIVER
if (_get_proctime() > GIET_DEBUG_IOC_DRIVER)
_printf("\n[DEBUG HBA] _hba_access() : P[%d,%d,%d] get slot %d in Cmd List "
        "at cycle %d / descheduling\n",
        ptw , x , y , p , _get_proctime() );
#endif
        unsigned int save_sr;
        unsigned int ltid = _get_current_task_id();

        // activates HBA interrupts 
        _hba_set_register( HBA_PXIE , 0x00000001 ); 

        // set _hba_gtid[ptw] 
        _hba_gtid[ptw] = (procid<<16) + ltid;

        // enters critical section
        _it_disable( &save_sr ); 

        // reset runnable 
        _set_task_slot( x, y, p, ltid, CTX_RUN_ID, 0 );  

        // start HBA transfer
        _hba_set_register( HBA_PXCI, (1<<ptw) );

        // deschedule task
        _ctx_switch();                      

#if GIET_DEBUG_IOC_DRIVER
if (_get_proctime() > GIET_DEBUG_IOC_DRIVER)
_printf("\n[DEBUG HBA] _hba_access() : task %d on P[%d,%d,%d] resume at cycle %d\n",
        ltid , x , y , p , _get_proctime() );
#endif

        // restore SR
        _it_restore( &save_sr );

        // get command status
        pxis = _hba_status[ptw];
    }    

#if GIET_DEBUG_IOC_DRIVER
if (_get_proctime() > GIET_DEBUG_IOC_DRIVER)
_printf("\n[DEBUG HBA] _hba_access() : P[%d,%d,%d] exit at cycle %d\n",
        x , y , p , _get_proctime() );
#endif

    if ( pxis & 0x40000000 ) return pxis;
    else                     return 0;

} // end _hba_access()


////////////////////////
unsigned int _hba_init()
{
    unsigned int       cmd_list_vaddr;
    unsigned int       cmd_table_vaddr;
    unsigned long long cmd_list_paddr;
    unsigned long long cmd_table_paddr;
    unsigned int       flags;            // unused

    // compute Command list & command table physical addresses
    cmd_list_vaddr  = (unsigned int)(&_hba_cmd_list[0]);
    cmd_table_vaddr = (unsigned int)(&_hba_cmd_table[0]);
    if ( _get_mmu_mode() & 0x4 )
    {
        cmd_list_paddr  = _v2p_translate( cmd_list_vaddr  , &flags );
        cmd_table_paddr = _v2p_translate( cmd_table_vaddr , &flags );
    }
    else
    {
        cmd_list_paddr  = (unsigned long long)cmd_list_vaddr;
        cmd_table_paddr = (unsigned long long)cmd_table_vaddr;
    }

    // initialise Command List pointers
    _hba_cmd_ptw = 0;
    _hba_cmd_ptr = 0;

    // initialise Command Descriptors in Command List
    unsigned int         c;      
    unsigned long long   paddr;
    for( c=0 ; c<32 ; c++ )
    {
        paddr = cmd_table_paddr + c * sizeof(hba_cmd_table_t);
        _hba_cmd_list[c].ctba  = (unsigned int)(paddr);
        _hba_cmd_list[c].ctbau = (unsigned int)(paddr>>32);
    }

    // initialise HBA registers 
    _hba_set_register( HBA_PXCLB  , (unsigned int)(cmd_list_paddr) );
    _hba_set_register( HBA_PXCLBU , (unsigned int)(cmd_list_paddr>>32) );
    _hba_set_register( HBA_PXIE   , 0 );
    _hba_set_register( HBA_PXIS   , 0 );
    _hba_set_register( HBA_PXCI   , 0 );
    _hba_set_register( HBA_PXCMD  , 1 );

    return 0;
}


/////////////////////////////////////////////////////
void _hba_isr( unsigned int irq_type,   // HWI / WTI
               unsigned int irq_id,     // index returned by ICU
               unsigned int channel )   // unused 
{
    // get HBA_PXCI containing commands status
    unsigned int pxci = _hba_get_register( HBA_PXCI );

    // we must handle all completed commands 
    // active commands are between  (_hba_cmd_ptr) and (_hba_cmd_ptw-1) 
    unsigned int current;
    for ( current = _hba_cmd_ptr ; current != _hba_cmd_ptw ; current++ )
    {
        unsigned int ptr = current & 0x1F;
        
        if ( (pxci & (1<<ptr)) == 0 )    // command completed
        {
            // increment the 32 bits variable _hba_cmd_ptr
            _hba_cmd_ptr = (_hba_cmd_ptr + 1);

            // save PXIS register
            _hba_status[ptr] = _hba_get_register( HBA_PXIS );

            // reset PXIS register
            _hba_set_register( HBA_PXIS , 0 );
 
            // identify waiting task 
            unsigned int remote_procid  = _hba_gtid[ptr]>>16;
            unsigned int ltid           = _hba_gtid[ptr] & 0xFFFF;
            unsigned int remote_cluster = remote_procid >> P_WIDTH;
            unsigned int remote_x       = remote_cluster >> Y_WIDTH;
            unsigned int remote_y       = remote_cluster & ((1<<Y_WIDTH)-1);
            unsigned int remote_p       = remote_procid & ((1<<P_WIDTH)-1);
 
            // re-activates waiting task
            _set_task_slot( remote_x,
                            remote_y,
                            remote_p,
                            ltid,
                            CTX_RUN_ID,
                            1 );

            // send a WAKUP WTI to processor running the waiting task 
            _xcu_send_wti( remote_cluster , 
                           remote_p , 
                           0 );          // don't force context switch

#if GIET_DEBUG_IOC_DRIVER  
if (_get_proctime() > GIET_DEBUG_IOC_DRIVER)
_printf("\n[DEBUG HBA] _hba_isr() : command %d completed at cycle %d\n"
        "  resume task %d running on P[%d,%d,%d] / status = %x\n",
        ptr , _get_proctime() ,
        ltid , remote_x , remote_y , remote_p , _hba_status[ptr] );
#endif
        }
        else                         // command non completed
        {
            break;
        }
    }
} // end _hba_isr()

// Local Variables:
// tab-width: 4
// c-basic-offset: 4
// c-file-offsets:((innamespace . 0)(inline-open . 0))
// indent-tabs-mode: nil
// End:
// vim: filetype=c:expandtab:shiftwidth=4:tabstop=4:softtabstop=4

