/****************************************************************************************
File : drivers.c
Written by Alain Greiner & Nicolas Pouillon
Date : december 2010

Basic drivers used by the GIET, that is running
on the MIPS32 processor architecture.

The supported peripherals are:
- the SoClib pibus_multi_tty
- the SocLib pibus_timer
- the SocLib pibus_dma
- The SoCLib pibus_icu
- The SoCLib pibus_gcd
- The SoCLib pibus_frame_buffer
- The SoCLib pibus_block_device

The following global parameters must be defined in the ldscript.
- NB_CLUSTERS : number of clusters 
- NB_PROCS : number of processor per cluster
- NB_NTASKS : max number of tasks per processor
- NB_LOCKS : max number of supported spin_locks
- NB_TIMERS : max number of timers per processor

The follobing base addresses must be defined in the ldscript
- seg_icu_base
- seg_timer_base
- seg_tty_base
- seg_gcd_base
- seg_dma_base
- seg_locks_base
- seg_fb_base
- seg_ioc_base
****************************************************************************************/

#include "drivers.h"
#include "icu.h"
#include "block_device.h"
#include "dma.h"

struct plouf;

//////////////////////////////////////////////////////////////
// various informations that must be defined in ldscript
//////////////////////////////////////////////////////////////
extern struct plouf seg_icu_base;
extern struct plouf seg_timer_base;
extern struct plouf seg_tty_base;
extern struct plouf seg_gcd_base;
extern struct plouf seg_dma_base;
extern struct plouf seg_locks_base;
extern struct plouf seg_fb_base;
extern struct plouf seg_ioc_base;

extern struct plouf NB_CLUSTERS;
extern struct plouf NB_PROCS;
extern struct plouf NB_TASKS;
extern struct plouf NB_TIMERS;
extern struct plouf NB_LOCKS;

#define in_drivers __attribute__((section (".drivers")))
#define in_unckdata __attribute__((section (".unckdata")))

////////////////////////////////////////////////////////////////////////////////////////
//  Global uncachable variables for synchronization between drivers and ISRs
////////////////////////////////////////////////////////////////////////////////////////

in_unckdata int volatile    _dma_status[256];
in_unckdata int volatile    _dma_busy[256]   =   { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };

in_unckdata int volatile    _ioc_lock    = 0;
in_unckdata int volatile    _ioc_done    = 0;
in_unckdata int volatile    _ioc_status;

in_unckdata char volatile   _tty_get_buf[256];
in_unckdata int volatile    _tty_get_full[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };

in_unckdata char volatile   _tty_put_buf[256];
in_unckdata int volatile    _tty_put_full[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };

////////////////////////////////////////////////////////////////////////////////////////
//  Global uncachable variables for inter-task barriers
////////////////////////////////////////////////////////////////////////////////////////

in_unckdata int volatile    _barrier_initial_value[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
in_unckdata int volatile    _barrier_count[16]         = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
in_unckdata int volatile    _barrier_lock[16]          = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };

////////////////////////////////////////////////////////////////////////////////////////
//  Global uncachable variables for spin_locks using LL/C instructions
////////////////////////////////////////////////////////////////////////////////////////

in_unckdata int volatile    _spin_lock[256] =    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };

////////////////////////////////////////////////////////////////////////////////////////
//  mempcy()
// GCC requires this function. Taken from MutekH.
////////////////////////////////////////////////////////////////////////////////////////
__attribute((used))
in_drivers static void *memcpy(void *_dst, const void *_src, unsigned int size)
{
    unsigned int *dst = _dst;
    const unsigned int *src = _src;
    if ( ! ((unsigned int)dst & 3) && ! ((unsigned int)src & 3) )
        while (size > 3) {
            *dst++ = *src++;
            size -= 4;
        }

    unsigned char *cdst = (unsigned char*)dst;
    unsigned char *csrc = (unsigned char*)src;

    while (size--) {
        *cdst++ = *csrc++;
    }
    return _dst;
}

////////////////////////////////////////////////////////////////////////////////////////
//  _procid()
// Access CP0 and returns processor ident
// No more than 1024 processors...
////////////////////////////////////////////////////////////////////////////////////////
in_drivers unsigned int _procid()
{
    unsigned int ret;
    asm volatile( "mfc0 %0, $15, 1": "=r"(ret) );
    return (ret & 0x3FF);
}
////////////////////////////////////////////////////////////////////////////////////////
//  _segment_increment()
// Access CP0 to get the procid, and returns the address increment to access 
// various peripherals (TTY, TIMER, ICU, DMA), in case of multiprocessors architectures.
// It uses the NB_PROCS and NB_CLUSTERS parameters to compute this increment:
// - increment  = cluster_id*cluster_increment + local_id*local_increment
// - cluster_id = procid / NB_PROCS  
// - local_id   = procid % NB_PROCS 
// - cluster_increment = 4G / NB_CLUSTERS
////////////////////////////////////////////////////////////////////////////////////////
in_drivers unsigned int _segment_increment(unsigned int local_increment)
{
    unsigned int	nprocs   		= (unsigned int)&NB_PROCS;
    unsigned int	nclusters		= (unsigned int)&NB_CLUSTERS;
    unsigned int	cluster_increment	= (0x80000000/nclusters)*2;
    unsigned int	pid			= _procid();
    return (pid / nprocs)*cluster_increment + (pid % nprocs)*local_increment;
}
////////////////////////////////////////////////////////////////////////////////////////
//  _proctime()
// Access CP0 and returns processor time
////////////////////////////////////////////////////////////////////////////////////////
in_drivers unsigned int _proctime()
{
    unsigned int ret;
    asm volatile( "mfc0 %0, $9": "=r"(ret) );
    return ret;
}
////////////////////////////////////////////////////////////////////////////////////////
//  _procnumber()
// Returns the number of processsors controled by the GIET
////////////////////////////////////////////////////////////////////////////////////////
in_drivers unsigned int _procnumber()
{
    return (unsigned int)&NB_PROCS * (unsigned int)&NB_CLUSTERS;
}
////////////////////////////////////////////////////////////////////////////////////////
//  _it_mask()
// Access CP0 and mask IRQs
////////////////////////////////////////////////////////////////////////////////////////
in_drivers void _it_mask()
{
    int tmp;
    asm volatile("mfc0  %0, $12"    : "=r" (tmp) );
    asm volatile("ori   %0, %0, 1"  : "=r" (tmp) );
    asm volatile("mtc0  %0, $12"    : "=r" (tmp) );
}
////////////////////////////////////////////////////////////////////////////////////////
//  _it_enable()
// Access CP0 and enable IRQs
////////////////////////////////////////////////////////////////////////////////////////
in_drivers void _it_enable()
{
    int tmp;
    asm volatile("mfc0  %0, $12"    : "=r" (tmp) );
    asm volatile("addi  %0, %0, -1" : "=r" (tmp) );
    asm volatile("mtc0  %0, $12"    : "=r" (tmp) );
}
//////////////////////////////////////////////////////////////////////
//  _dcache_buf_invalidate()
// Invalidate all cache lines corresponding to a memory buffer.
// This is used by the block_device driver.
/////////////////////////////////////////////////////////////////////////
in_drivers void _dcache_buf_invalidate(const void * buffer, size_t size)
{
    size_t i;
    size_t dcache_line_size;

    // retrieve dcache line size from config register (bits 12:10)
    asm volatile("mfc0 %0, $16, 1" : "=r" (dcache_line_size));

    dcache_line_size = 2 << ((dcache_line_size>>10) & 0x7);

    // iterate on lines to invalidate each one of them
    for ( i=0; i<size; i+=dcache_line_size )
        asm volatile(" cache %0, %1"
                :
                :"i" (0x11), "R" (*((char*)buffer+i)));
}

/////////////////////////////////////////////////////////////////////////
//  _itoa_dec()
// convert a 32 bits unsigned int to a string of 10 decimal characters.
/////////////////////////////////////////////////////////////////////////
in_drivers void _itoa_dec(unsigned val, char* buf)
{
    const char  DecTab[] = "0123456789";
    unsigned int i;
    for( i=0 ; i<10 ; i++ )
    {
        if( (val!=0) || (i==0) ) buf[9-i] = DecTab[val % 10];
        else                     buf[9-i] = 0x20;
        val /= 10;
    }
}
//////////////////////////////////////////////////////////////////////////
//  _itoa_hex()
// convert a 32 bits unsigned int to a string of 8 hexadecimal characters.
///////////////////////////////////////////////////////////////////////////
in_drivers void _itoa_hex(int val, char* buf)
{
    const char  HexaTab[] = "0123456789ABCD";
    unsigned int i;
    for( i=0 ; i<8 ; i++ )
    {
        buf[7-i] = HexaTab[val % 16];
        val /= 16;
    }
}
///////////////////////////////////////////////////////////////////////////////////////
// MULTI_TIMER component
// Each processor can handle up to NB_TIMERS independant timers.
// The segment base address is defined as
//         seg_timer_base + segment_increment(NB_TIMERS*16) + index*16
///////////////////////////////////////////////////////////////////////////////////////
//  _timer_write()
// Write a 32 bits word in a memory mapped register of the MULTI_TIMER
///////////////////////////////////////////////////////////////////////////////////////
in_drivers int _timer_write(size_t timer_index, size_t register_index, int value)
{
    int*    		timer_address;
    size_t           	ntimers 	= (size_t)&NB_TIMERS;
    unsigned int     	base      	= (unsigned int)&seg_timer_base;
    unsigned int     	increment 	= _segment_increment(ntimers*TIMER_SPAN*4); 

    if( timer_index >= ntimers) 	return -1;
    if( register_index >= TIMER_SPAN ) 	return -1;

    timer_address = (int*)(base + increment + timer_index*TIMER_SPAN*4);
    timer_address[register_index] = value;          // write word
    return 0;
}
///////////////////////////////////////////////////////////////////////////////////////
//  _timer_read()
// Read a 32 bits word in a memory mapped register of the MULTI_TIMER
///////////////////////////////////////////////////////////////////////////////////////
in_drivers int _timer_read(size_t timer_index, size_t register_index, int* buffer)
{
    int*    		timer_address;
    size_t           	ntimers 	= (size_t)&NB_TIMERS;
    unsigned int     	base      	= (unsigned int)&seg_timer_base;
    unsigned int     	increment 	= _segment_increment(ntimers*TIMER_SPAN*4); 

    if( timer_index >= ntimers) 	return -1;
    if( register_index >= TIMER_SPAN ) 	return -1;

    if( timer_index >= ntimers) return -1;
    if( register_index >= TIMER_SPAN ) return -1;

    timer_address = (int*)(base + increment + timer_index*TIMER_SPAN*4);
    *buffer = timer_address[register_index];        // read word
    return 0;
}
///////////////////////////////////////////////////////////////////////////////////////
//  MULTI_TTY COMPONENT
// The total number of TTYs is equal to NB_CLUSTERS * NB_PROCS * NB_TASKS.
// - tty_address = seg_tty_base + _segment_increment(NB_TASKS*16) + task_id*16
// - tty_index   = proc_id*NB_TASKS + task_id
///////////////////////////////////////////////////////////////////////////////////////
//  _tty_write()
// Write one or several characters directly from a fixed length user buffer
// to the TTY_WRITE register of the TTY controler.
// It doesn't use the TTY_PUT_IRQ interrupt and the associated kernel buffer.
// This is a non blocking call : it test the TTY_STATUS register.
// If the TTY_STATUS_WRITE bit is set, the transfer stops and the function
// returns  the number of characters that have been actually written.
// It returns -1 in case of error (proc_id or task index too large)
///////////////////////////////////////////////////////////////////////////////////////
in_drivers int _tty_write(char* buffer, int length)
{
    char*   		tty_address;
    size_t  		ntasks 		= (size_t)&NB_TASKS;
    size_t  		nprocs 		= (size_t)&NB_PROCS;
    size_t		    nclusters	= (size_t)&NB_CLUSTERS;
    unsigned int    base		= (unsigned int)&seg_tty_base;
    unsigned int	increment 	= _segment_increment(ntasks*TTY_SPAN*4);
    size_t  		pid 		= _procid();
    int     		nwritten 	= 0;
    size_t		    tid;
    int     		i;

    if( ntasks == 0 )  tid = 0;
    else               tid = _current_task_array[pid];

    if( tid >= ntasks )   		        return -1;
    if( pid >= nprocs*nclusters )   	return -1;

    tty_address = (char*)(base + increment + tid*TTY_SPAN*4);

    for ( i=0 ; i < length ; i++ )
    {
        if((tty_address[TTY_STATUS*4] & 0x2) == 0x2)  break;
        else
        {
            tty_address[TTY_WRITE*4] = buffer[i]; // write character
            nwritten++;
        }
    }
    return nwritten;
}
///////////////////////////////////////////////////////////////////////////////////////
//  _tty_read()
// Fetch one character directly from the TTY_READ register of the TTY controler,
// and writes this character to the user buffer.
// It doesn't use the TTY_GET_IRQ interrupt and the associated kernel buffer.
// This is a non blocking call : it returns 0 if the register is empty,
// and returns 1 if the register is full.
// It returns -1 in case of error (proc_id or task_id too large or length != 1)
// The length argument is not used in this implementation, and has been
// introduced for future implementations.
///////////////////////////////////////////////////////////////////////////////////////
in_drivers int _tty_read(char* buffer, int length)
{
    char*   		tty_address;
    size_t  		ntasks 		= (size_t)&NB_TASKS;
    size_t  		nprocs 		= (size_t)&NB_PROCS;
    size_t  		nclusters	= (size_t)&NB_CLUSTERS;
    unsigned int    base		= (unsigned int)&seg_tty_base;
    unsigned int	increment 	= _segment_increment(ntasks*TTY_SPAN*4);
    size_t  		pid 		= _procid();
    size_t		    tid;

    if( pid > 7 )   tid = 0;
    else            tid	= _current_task_array[pid];

    if( length != 1)      		    return -1;
    if( pid >= nprocs*nclusters )   return -1;
    if( tid >= ntasks )   		    return -1;
    
    tty_address = (char*)(base + increment + tid*TTY_SPAN*4);

    if((tty_address[TTY_STATUS*4] & 0x1) == 0x1)
    {
        buffer[0] = tty_address[TTY_READ*4];
        return 1;
    }
    else
    {
        return 0;
    }
}
///////////////////////////////////////////////////////////////////////////////////////
//  _tty_read_irq()
// iAS it uses the TTY_GET_IRQ interrupt and the associated kernel buffer,
// that has been written by the ISR, this function does not access the TTY registers.
// It fetch one single character from the _tty_get_buf[tty_index] kernel buffer, writes 
// this character to the user buffer, and reset the _tty_get_full[tty_index] buffer.
// This is a non blocking call : it returns 0 if the kernel buffer is empty,
// and returns 1 if the buffer is full.
// It returns -1 in case of error (proc_id or task_id too large, or length != 1)
// The length argument is not used in this implementation, and has been
// introduced for future implementations.
///////////////////////////////////////////////////////////////////////////////////////
in_drivers int _tty_read_irq(char* buffer, int length)
{
    int     pid 	= _procid();
    int     ntasks 	= (int)&NB_TASKS;
    int     nprocs 	= (int)&NB_PROCS;
    int     nclusters	= (int)&NB_CLUSTERS;
    int     tty_index;
    int     tid;

    if( pid > 7 )   tid = 0;
    else            tid	= _current_task_array[pid];
 
    if( length != 1)      		return -1;
    if( pid >= nprocs*nclusters )   	return -1;
    if( tid >= ntasks )   		return -1;

    tty_index = pid*ntasks + tid;
    if( _tty_get_full[tty_index] == 0 ) return 0;

    *buffer = _tty_get_buf[tty_index];
    _tty_get_full[tty_index] = 0;
    return 1;
}
///////////////////////////////////////////////////////////////////////////////////////
//  _exit()
// Exit (suicide) after printing message on  a TTY terminal.
///////////////////////////////////////////////////////////////////////////////////////
in_drivers int  _exit()
{
    char buf[] = "\n\n!!!  Exit  Processor          !!!\n";
    int pid = _procid();

    buf[24] = '0';
    buf[25] = 'x';
    buf[26] = (char)((pid>>8) & 0xF) + 0x30;
    buf[27] = (char)((pid>>4) & 0xF) + 0x30;
    buf[28] = (char)(pid & 0xF)      + 0x30;
    _tty_write(buf, 36);

    while(1) asm volatile("nop");   // infinite loop...
}

///////////////////////////////////////////////////////////////////////////////////////
//  _icu_write()
// Write a 32 bits word in a memory mapped register of the ICU peripheral
// The base address is defined by the processor ID
///////////////////////////////////////////////////////////////////////////////////////
in_drivers int _icu_write(size_t register_index, int value)
{
    int*    		icu_address;
    unsigned int	base = (int)&seg_icu_base;
    unsigned int	increment = _segment_increment(ICU_SPAN*4);

    if( register_index >= ICU_SPAN ) return -1;

    icu_address = (int*)(base + increment);
    icu_address[register_index] = value;   // write word
    return 0;
}
///////////////////////////////////////////////////////////////////////////////////////
//  _icu_read()
// Read a 32 bits word in a memory mapped register of the ICU peripheral
// The ICU base address is defined by the processor ID
///////////////////////////////////////////////////////////////////////////////////////
in_drivers int _icu_read(size_t register_index, int* buffer)
{
    int*    		icu_address;
    unsigned int	base = (int)&seg_icu_base;
    unsigned int	increment = _segment_increment(ICU_SPAN*4);

    if( register_index >= ICU_SPAN ) return -1;

    icu_address = (int*)(base + increment);
    *buffer = icu_address[register_index]; 	// read word
    return 0;
}
///////////////////////////////////////////////////////////////////////////////////////
//  _gcd_write()
// Write a 32 bits word in a memory mapped register of the GCD coprocessor
///////////////////////////////////////////////////////////////////////////////////////
in_drivers int _gcd_write(size_t register_index, int value)
{
    int*    gcd_address;
    if( register_index >= 4 ) return -1;

    gcd_address = (int*)&seg_gcd_base;
    gcd_address[register_index] = value;            // write word
    return 0;
}
///////////////////////////////////////////////////////////////////////////////////////
//  _gcd_read()
// Read a 32 bits word in a memory mapped register of the GCD coprocessor
///////////////////////////////////////////////////////////////////////////////////////
in_drivers int _gcd_read(size_t register_index, int* buffer)
{
    int*    gcd_address;
    if( register_index >= 4 ) return -1;

    gcd_address = (int*)&seg_gcd_base;
    *buffer = gcd_address[register_index];          // read word
    return 0;
}
///////////////////////////////////////////////////////////////////////////////////////
//  _locks_write()
// Release a software spin-lock 
///////////////////////////////////////////////////////////////////////////////////////
in_drivers int _locks_write(size_t index)

{
    int     max = (int)&NB_LOCKS;
    if( index >= max ) return -1;

    _spin_lock[index] = 0;
    return 0;
}
///////////////////////////////////////////////////////////////////////////////////////
//  _locks_read()
// Try to take a software spin-lock.
// This is a blocking call, as there is a busy-waiting loop,
// until the lock is granted to the requester.
// There is an internal delay of about 100 cycles between
// two successive lock read, to avoid bus saturation.
///////////////////////////////////////////////////////////////////////////////////////
in_drivers int _locks_read(size_t index)
{
    int     max = (int)&NB_LOCKS;
    if( index >= max ) return -1;

    register int	delay = ( (_proctime() + _procid() ) & 0xF) << 4;
    register int*	plock = (int*)&_spin_lock[index];			

    asm volatile ("_locks_llsc:		        \n"
                  "ll   $2,    0(%0)		\n" 	// $2 <= _locks_lock
                  "bnez $2,    _locks_delay	\n" 	// random delay if busy
                  "li   $3,    1   		\n" 	// prepare argument for sc  
                  "sc   $3,    0(%0)       	\n" 	// try to set _locks_busy
                  "bnez $3,    _locks_ok     	\n" 	// exit if atomic 
                  "_locks_delay:		\n"
                  "move $4,    %1		\n"	// $4 <= delay
                  "_locks_loop:			\n"
                  "addi $4,    $4,    -1	\n"	// $4 <= $4 - 1
                  "beqz $4,    _locks_loop	\n"	// test end delay
                  "j           _locks_llsc    	\n"	// retry
                  "_locks_ok:			\n"
                  ::"r"(plock),"r"(delay):"$2","$3","$4");
    return 0;
}
//////////////////////////////////////////////////////////////////////////////////////////
//  I/O BLOCK_DEVICE
// The three functions below use the three variables _ioc_lock _ioc_done, 
// and _ioc_status for synchronisation.
// - As the IOC component can be used by several programs running in parallel,
// the _ioc_lock variable guaranties exclusive access to the device.
// The _ioc_read() and _ioc_write() functions use atomic LL/SC to get the lock.
// and set _ioc_lock to a non zero value. 
// The _ioc_write() and _ioc_read() functions are blocking, polling the _ioc_lock
// variable until the device is available.
// - When the tranfer is completed, the ISR routine activated by the IOC IRQ
// set the _ioc_done variable to a non-zero value. Possible address errors detected
// by the IOC peripheral are reported by the ISR in the _ioc_status variable.
// The _ioc_completed() function is polling the _ioc_done variable, waiting for
// tranfer conpletion. When the completion is signaled, the _ioc_completed() function
// reset the _ioc_done variable to zero, and releases the _ioc_lock variable.
// 
// In a multi-tasks environment, this polling policy must be replaced by a
// descheduling policy for the requesting process. 
///////////////////////////////////////////////////////////////////////////////////////
//  _ioc_get_lock()
// This blocking function is used by the _ioc_read() and _ioc_write() functions 
// to get _ioc_lock using LL/SC.
///////////////////////////////////////////////////////////////////////////////////////
in_drivers void _ioc_get_lock()
{
    register unsigned int*	plock = (unsigned int*)&_ioc_lock;			

    asm volatile ("_ioc_llsc:			    \n"
                  "ll   $2,    0(%0)		    \n"	// $2 <= _ioc_lock
                  "bnez $2,    _ioc_llsc	    \n" // retry  if busy
                  "li   $3,    1   		    \n"	// prepare argument for sc  
                  "sc   $3,    0(%0)       	    \n" // try to set _ioc_busy
                  "beqz $3,    _ioc_llsc   	    \n" // retry if not atomic 
                  ::"r"(plock):"$2","$3");
}
//////////////////////////////////////////////////////////////////////////////////////
//  _ioc_write()
// Transfer data from a memory buffer to a file on the block_device.
// - lba    : first block index on the disk
// - buffer : base address of the memory buffer
// - count  : number of blocks to be transfered
// The source buffer must be in user address space.
///////////////////////////////////////////////////////////////////////////////////////
in_drivers int _ioc_write(size_t lba, void* buffer, size_t count)
{
    volatile unsigned int*    	ioc_address = (unsigned int*)&seg_ioc_base;

    // buffer must be in user space
//  size_t block_size = ioc_address[BLOCK_DEVICE_BLOCK_SIZE];
//  if( ( (size_t)buffer + block_size*count ) >= 0x80000000 ) return -1;
//  if( ( (size_t)buffer                    ) >= 0x80000000 ) return -1;

    // get the lock
    _ioc_get_lock();

    // block_device configuration
    ioc_address[BLOCK_DEVICE_BUFFER] = (int)buffer;
    ioc_address[BLOCK_DEVICE_COUNT] = count;
    ioc_address[BLOCK_DEVICE_LBA] = lba;
    ioc_address[BLOCK_DEVICE_IRQ_ENABLE] = 1;
    ioc_address[BLOCK_DEVICE_OP] = BLOCK_DEVICE_WRITE;
    return 0;
}
///////////////////////////////////////////////////////////////////////////////////////
//  _ioc_read()
// Transfer data from a file on the block device to a memory buffer.
// - lba    : first block index on the disk
// - buffer : base address of the memory buffer
// - count  : number of blocks to be transfered
// The destination buffer must be in user address space.
// All cache lines corresponding to the the target buffer must be invalidated
// for cache coherence.
///////////////////////////////////////////////////////////////////////////////////////
in_drivers int _ioc_read(size_t lba, void* buffer, size_t count)
{
    volatile unsigned int*    	ioc_address = (unsigned int*)&seg_ioc_base;

    // buffer must be in user space
//  size_t block_size = ioc_address[BLOCK_DEVICE_BLOCK_SIZE];
//  if( ( (size_t)buffer + block_size*count ) >= 0x80000000 ) return -1;
//  if( ( (size_t)buffer                    ) >= 0x80000000 ) return -1;

    // get the lock
    _ioc_get_lock();

    // block_device configuration
    ioc_address[BLOCK_DEVICE_BUFFER] = (int)buffer;
    ioc_address[BLOCK_DEVICE_COUNT] = count;
    ioc_address[BLOCK_DEVICE_LBA] = lba;
    ioc_address[BLOCK_DEVICE_IRQ_ENABLE] = 1;
    ioc_address[BLOCK_DEVICE_OP] = BLOCK_DEVICE_READ;

    return 0;
}
///////////////////////////////////////////////////////////////////////////////////////
//  _ioc_completed()
// This blocking function cheks completion of an I/O transfer and reports errors.
// It returns 0 if the transfer is successfully completed.
// It returns -1 if an error has been reported.
///////////////////////////////////////////////////////////////////////////////////////
in_drivers int _ioc_completed()
{
    // waiting for completion
    while (_ioc_done == 0) { asm volatile("nop"); }
    
    // reset synchronisation variables
    _ioc_done = 0;
    _ioc_lock = 0;

    // return errors
    if((_ioc_status != BLOCK_DEVICE_READ_SUCCESS) &&
            (_ioc_status != BLOCK_DEVICE_WRITE_SUCCESS))    return -1;
    else                        				            return 0;
}

//////////////////////////////////////////////////////////////////////////////////////
//  FRAME_BUFFER
// The _fb_sync_write & _fb_sync_read functions use a memcpy strategy to implement 
// the transfer between a data buffer (user space) and the frame buffer (kernel space).
// They are blocking until completion of the transfer.
//////////////////////////////////////////////////////////////////////////////////////
//  _fb_sync_write()
// Transfer data from an user buffer to the frame_buffer device with a memcpy.
// - offset     : offset (in bytes) in the frame buffer
// - buffer : base address of the memory buffer
// - length : number of bytes to be transfered
//////////////////////////////////////////////////////////////////////////////////////
in_drivers int  _fb_sync_write(size_t offset, void* buffer, size_t length)
{
    volatile char*  fb = (char*)(void*)&seg_fb_base + offset;
    char*       ub = buffer;
    size_t      i;

    // buffer must be in user space
//  if( ( (size_t)buffer + length ) >= 0x80000000 ) return -1;
//  if( ( (size_t)buffer          ) >= 0x80000000 ) return -1;

    // memory copy
    for(i=0 ; i<length ; i++) fb[i] = ub[i];
    return 0;
}
///////////////////////////////////////////////////////////////////////////////////////
//  _fb_sync_read()
// Transfer data from the frame_buffer device to an user buffer with a memcpy.
// - offset     : offset (in bytes) in the frame buffer
// - buffer : base address of the memory buffer
// - length : number of bytes to be transfered
//////////////////////////////////////////////////////////////////////////////////////
in_drivers int  _fb_sync_read(size_t offset, void* buffer, size_t length)
{
    volatile char*  fb = (char*)(void*)&seg_fb_base + offset;
    char*       ub = buffer;
    size_t      i;

    // buffer must be in user space
//  if( ( (size_t)buffer + length ) >= 0x80000000 ) return -1;
//  if( ( (size_t)buffer          ) >= 0x80000000 ) return -1;

    // memory copy
    for(i=0 ; i<length ; i++) ub[i] = fb[i];
    return 0;
}
//////////////////////////////////////////////////////////////////////////////////////
// The _fb_write() and _fb_read() functions use the MULTI_DMA
// coprocessor to transfer data between the user buffer and the frame buffer.
// The _fb_completed() function, use a polling policy to test
// the global variables _dma_busy[i] and detect the transfer completion.
// As each processor can have it's private DMA, there is up to 256 _dma_busy[i]
// set/reset variables that are indexed by the proc_id.
// The _dma_busy variable is reset by the ISR associated to the DMA IRQ.
///////////////////////////////////////////////////////////////////////////////////////
//  _fb_write()
// Transfer data from an user buffer to the frame_buffer device using DMA.
// - offset : offset (in bytes) in the frame buffer
// - buffer : base address of the memory buffer
// - length : number of bytes to be transfered
//////////////////////////////////////////////////////////////////////////////////////
in_drivers int  _fb_write(size_t offset, void* buffer, size_t length)
{
    int*   		dma_address;
    unsigned int    	base   		= (unsigned int)&seg_dma_base;
    unsigned int    	increment	= _segment_increment(DMA_SPAN*4);
    char*  		fb		= (char*)&seg_fb_base + offset;
    unsigned int    	delay 		= (_proctime() & 0xF) << 4;
    unsigned int	pid		= _procid();
    unsigned int    	i;


    // checking buffer boundaries (bytes)
//  if( ( (size_t)buffer + length ) >= 0x80000000 ) return -1;
//  if( ( (size_t)buffer          ) >= 0x80000000 ) return -1;

    // waiting until DMA device is available
    while (_dma_busy[pid] != 0)
    {
        for( i=0 ; i<delay ; i++)   // busy waiting
        {                           // with a pseudo random
            asm volatile("nop");    // delay between bus accesses
        }
    }
    _dma_busy[pid] = 1;

    dma_address = (int*)(base + increment);
 
    // DMA configuration
    dma_address[DMA_IRQ_DISABLE] = 0;
    dma_address[DMA_SRC] 	= (int)buffer;
    dma_address[DMA_DST] 	= (int)fb;
    dma_address[DMA_LEN] 	= (int)length;
    return 0;
}
///////////////////////////////////////////////////////////////////////////////////////
//  _fb_read()
// Transfer data from the frame_buffer device to an user buffer using DMA.
// - offset     : offset (in bytes) in the frame buffer
// - buffer : base address of the memory buffer
// - length : number of bytes to be transfered
//////////////////////////////////////////////////////////////////////////////////////
in_drivers int  _fb_read(size_t offset, void* buffer, size_t length)
{
    int*		dma_address;
    unsigned int    	base		= (unsigned int)&seg_dma_base; 
    unsigned int    	increment	= _segment_increment(DMA_SPAN*4);
    char*  		fb    		= (char*)&seg_fb_base + offset;
    unsigned int    	delay 		= (_proctime() & 0xF) << 4;
    unsigned int	pid		= _procid();
    unsigned int    	i;

    // checking buffer boundaries (bytes)
//  if( ( (size_t)buffer + length ) >= 0x80000000 ) return -1;
//  if( ( (size_t)buffer          ) >= 0x80000000 ) return -1;

    // waiting until DMA device is available
    while (_dma_busy[pid] != 0)
    {
        for( i=0 ; i<delay ; i++)   // busy waiting
        {                           // with a pseudo random
            asm volatile("nop");    // delay between bus accesses
        }
    }
    _dma_busy[pid] = 1;

    dma_address = (int*)(base + increment);

    // DMA configuration
    dma_address[DMA_IRQ_DISABLE] = 0;
    dma_address[DMA_SRC] 	= (int)fb;
    dma_address[DMA_DST] 	= (int)buffer;
    dma_address[DMA_LEN] 	= (int)length;
    return 0;
}
///////////////////////////////////////////////////////////////////////////////////////
//  _fb_completed()
// This blocking function cheks completion of a DMA transfer to or fom the frame buffer.
// The MIPS32 wait instruction stall the processor until the next interrupt.
// It returns 0 if the transfer is successfully completed
// It returns -1 if an error has been reported.
///////////////////////////////////////////////////////////////////////////////////////
in_drivers int _fb_completed()
{
    unsigned int	pid = _procid();

    while (_dma_busy[pid] != 0)
    {
        asm volatile("nop");
    }
    if(_dma_status[pid] == DMA_SUCCESS)  return 0;
    else                                 return _dma_status[pid];
}
//////////////////////////////////////////////////////////////////////////////////////
// _barrier_init()
// This function makes a cooperative initialisation of the barrier:
// - barrier_count[index] <= N
// - barrier_lock[index]  <= 0
// All tasks try to initialize the barrier, but the initialisation 
// is done by only one task, using LL/SC instructions.
// This cooperative initialisation is questionnable, 
// bcause the barrier can ony be initialised once...
//////////////////////////////////////////////////////////////////////////////////////
in_drivers int _barrier_init(unsigned int index, unsigned int value)
{

    register int* pinit 	= (int*)&_barrier_initial_value[index];
    register int* pcount 	= (int*)&_barrier_count[index];
    register int* plock  	= (int*)&_barrier_lock[index];

    if ( index > 7 )	return 1;

    // parallel initialisation using atomic instructions LL/SC
    asm volatile ("_barrier_init_test:          	\n"
                  "ll   $2,     0(%0)           	\n"	// read barrier_inital_value 
                  "bnez $2,     _barrier_init_done	\n"
                  "move $3,     %3			        \n"
                  "sc   $3,     0(%0)    	      	\n"	// try to write barrier_initial_value
                  "beqz $3,     _barrier_init_test	\n"
                  "move $3,	%3			            \n" 
                  "sw   $3,	0(%1)			        \n"	// barrier_count <= barrier_initial_value
                  "move $3, $0                      \n" // 
                  "sw   $3,	0(%2)			        \n"	// barrier_lock <= 0
                  "_barrier_init_done:			\n"
                  ::"r"(pinit),"r"(pcount),"r"(plock),"r"(value):"$2","$3");
    return 0 ;
}
//////////////////////////////////////////////////////////////////////////////////////
// 	_barrier_wait()
// This blocking function uses a busy_wait technics (on the barrier_lock value), 
// because the GIET does not support dynamic scheduling/descheduling of tasks. 
// The barrier state is actually defined by two variables:
// _barrier_count[index] define the number of particpants that are waiting
// _barrier_lock[index] define the bool variable whose value is polled 
// The last participant change the value of _barrier_lock[index] to release the barrier...
// There is at most 16 independant barriers, and an error is returned
// if the barrier index is larger than 15.
//////////////////////////////////////////////////////////////////////////////////////
in_drivers int _barrier_wait(unsigned int index)
{
    register int* 	pcount 		= (int*)&_barrier_count[index];		
    register int  	count;

    int 	       lock 		= _barrier_lock[index];		

    if ( index > 15 )	return 1;
    
    // parallel decrement _barrier_count[index] using atomic instructions LL/SC
    // input : pointer on _barrier_count[index]
    // output : count = _barrier_count[index] (before decrementation)
    asm volatile ("_barrier_decrement:          		\n"
                  "ll   %0,     0(%1)           		\n"
                  "addi $3,     %0,     -1      		\n"
                  "sc   $3,     0(%1)           		\n"
                  "beqz $3,     _barrier_decrement		\n"
                  :"=&r"(count)
                  :"r"(pcount)
                  :"$2","$3");

    // the last task re-initializes the barrier_ count variable
    // and the barrier_lock variable, waking up all other waiting tasks

    if ( count == 1 ) 	 // last task
    {
        _barrier_count[index] = _barrier_initial_value[index];
        asm volatile( "sync" );
        _barrier_lock[index]   = (lock == 0) ? 1 : 0;
        return 0 ;
    }
    else 		// other tasks
    {
        while ( lock == _barrier_lock[index] ) 	{ }	// busy waiting 
        return 0 ;
    }
} 
//////////////////////////////////////////////////////////////////////////////////////


// Local Variables:
// tab-width: 4;
// c-basic-offset: 4;
// c-file-offsets:((innamespace . 0)(inline-open . 0));
// indent-tabs-mode: nil;
// End:
//
// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4

