///////////////////////////////////////////////////////////////////////////////////
// File     : drivers.c
// Date     : 01/04/2012
// Author   : alain greiner
// Copyright (c) UPMC-LIP6
///////////////////////////////////////////////////////////////////////////////////
// The drivers.c and drivers.h files are part ot the GIET nano kernel.
// They contains the drivers for the peripherals available in the SoCLib library:
// - vci_multi_tty
// - vci_multi_timer
// - vci_multi_dma
// - vci_multi_icu
// - vci_gcd
// - vci_frame_buffer
// - vci_block_device
//
// The following global parameters must be defined in the giet_config.h file:
// - NB_CLUSTERS  : number of clusters 
// - NB_PROCS     : number of PROCS per cluster 
// - NB_TIMERS    : number of TIMERS per cluster
// - NB_DMAS      : number of DMA channels
// - NB_TTYS      : number of TTY terminals
// - NB_TIMERS    : number of TIMERS per cluster
// - CLUSTER_SPAN : address increment between clusters
//
// The following base addresses must be defined in the sys.ld file:
// - seg_icu_base
// - seg_timer_base
// - seg_tty_base
// - seg_gcd_base
// - seg_dma_base
// - seg_fb_base
// - seg_ioc_base
///////////////////////////////////////////////////////////////////////////////////

#include <vm_handler.h>
#include <sys_handler.h>
#include <giet_config.h>
#include <drivers.h>
#include <common.h>
#include <hwr_mapping.h>
#include <mips32_registers.h>
#include <ctx_handler.h>

#if !defined(NB_PROCS) 
# error: You must define NB_PROCS in 'giet_config.h' file!
#endif
#if !defined(NB_CLUSTERS) 
# error: You must define NB_CLUSTERS in 'giet_config.h' file!
#endif
#if !defined(CLUSTER_SPAN) 
# error: You must define CLUSTER_SPAN in 'giet_config.h' file!
#endif
#if !defined(NB_TTYS)
# error: You must define NB_TTYS in 'giet_config.h' file!
#endif
#if !defined(NB_DMAS)
# error: You must define NB_DMAS in 'giet_config.h' file!
#endif
#if !defined(NB_TIMERS)
# error: You must define NB_TIMERS in 'giet_config.h' file!
#endif

#if (NB_TTYS < 1)
# error: NB_TTYS cannot be smaller than 1!
#endif

#if (NB_TIMERS < NB_PROCS)
# error: NB_TIMERS must be larger or equal to NB_PROCS!
#endif

#if (NB_PROCS > 8)
# error: NB_PROCS cannot be larger than 8!
#endif

#if (NB_DMAS < 1)
# error: NB_DMAS cannot be 0!
#endif


/////////////////////////////////////////////////////////////////////////////
// 	Global (uncachable) variables
/////////////////////////////////////////////////////////////////////////////

#define in_unckdata __attribute__((section (".unckdata")))

in_unckdata volatile unsigned int  _dma_status[NB_DMAS];
in_unckdata volatile unsigned char _dma_busy[NB_DMAS] = { [0 ... NB_DMAS-1] = 0 };

in_unckdata volatile unsigned char _ioc_status       = 0;
in_unckdata volatile unsigned char _ioc_done         = 0;
in_unckdata unsigned int		   _ioc_lock         = 0;
in_unckdata unsigned int		   _ioc_iommu_ix1    = 0;
in_unckdata unsigned int		   _ioc_iommu_npages = 0;

in_unckdata volatile unsigned char _tty_get_buf[NB_TTYS];
in_unckdata volatile unsigned char _tty_get_full[NB_TTYS] = { [0 ... NB_TTYS-1] = 0 };
in_unckdata unsigned int           _tty_put_lock;

//////////////////////////////////////////////////////////////////////////////
// 	VciMultiTimer driver
//////////////////////////////////////////////////////////////////////////////
// There is one MULTI-TIMER component per cluster.
// The number of timers per cluster must be larger or equal to the number 
// processors (NB_TIMERS >= NB_PROCS), because each processor uses a private
// yimer for context switch.
// The total number of timers is NB_CLUSTERS * NB_TIMERS
// The global timer index = cluster_id*NB_TIMERS + timer_id
//////////////////////////////////////////////////////////////////////////////

//////////////////////////////////////////////////////////////////////////////
// _timer_write()
//
// Write a 32-bit word in a memory mapped register of a timer device,
// identified by the cluster index and the local timer index. 
// Returns 0 if success, > 0 if error.
//////////////////////////////////////////////////////////////////////////////
unsigned int _timer_write( unsigned int cluster_index,
                           unsigned int timer_index,
                           unsigned int register_index, 
                           unsigned int value )
{
    unsigned int*	timer_address;

    // parameters checking 
    if ( register_index >= TIMER_SPAN) 		return 1;
    if ( cluster_index >= NB_CLUSTERS)		return 1;
    if ( timer_index >= NB_TIMERS )         return 1;

    timer_address = (unsigned int*)&seg_timer_base + 
                    ( cluster_index * CLUSTER_SPAN )  +
                    ( timer_index * TIMER_SPAN );

    timer_address[register_index] = value; // write word

    return 0;
}

//////////////////////////////////////////////////////////////////////////////
// _timer_read()
//
// Read a 32-bit word in a memory mapped register of a timer device,
// identified by the cluster index and the local timer index. 
// Returns 0 if success, > 0 if error.
//////////////////////////////////////////////////////////////////////////////
unsigned int _timer_read(unsigned int cluster_index,
                         unsigned int timer_index, 
                         unsigned int register_index, 
                         unsigned int *buffer)
{
    unsigned int *timer_address;

    // parameters checking
    if ( register_index >= TIMER_SPAN) 		return 1;
    if ( cluster_index >= NB_CLUSTERS)		return 1;
    if ( timer_index >= NB_TIMERS )         return 1;

    timer_address = (unsigned int*)&seg_timer_base + 
                    ( cluster_index * CLUSTER_SPAN )  +
                    ( timer_index * TIMER_SPAN );

    *buffer = timer_address[register_index]; // read word 

    return 0;
}

/////////////////////////////////////////////////////////////////////////////////
// 	VciMultiTty driver
/////////////////////////////////////////////////////////////////////////////////
// The total number of TTYs is defined by the configuration parameter NB_TTYS.
// The system terminal is TTY[0].
// The TTYs are allocated to applications by the GIET in the boot phase.
// The nummber of TTYs allocated to each application, and used by each
// task can be defined in the mapping_info data structure.
// For each user task, the tty_id is stored in the context of the task (slot 34),
// and must be explicitely defined in the boot code.
// The TTY address is always computed as : seg_tty_base + tty_id*TTY_SPAN
///////////////////////////////////////////////////////////////////////////////////

//////////////////////////////////////////////////////////////////////////////
// _tty_write()
//
// Write one or several characters directly from a fixed-length user buffer to
// the TTY_WRITE register of the TTY controler.
// It doesn't use the TTY_PUT_IRQ interrupt and the associated kernel buffer.
// This is a non blocking call: it tests the TTY_STATUS register, and stops
// the transfer as soon as the TTY_STATUS[WRITE] bit is set. 
// The function returns  the number of characters that have been written.
//////////////////////////////////////////////////////////////////////////////
unsigned int _tty_write( const char		*buffer, 
                         unsigned int	length)
{
    volatile unsigned int *tty_address;

    unsigned int proc_id;
    unsigned int task_id;
    unsigned int tty_id;
    unsigned int nwritten;

    proc_id = _procid();
    
    task_id = _scheduler[proc_id].current;
    tty_id  = _scheduler[proc_id].context[task_id][CTX_TTY_ID];

    tty_address = (unsigned int*)&seg_tty_base + tty_id*TTY_SPAN;

    for (nwritten = 0; nwritten < length; nwritten++)
    {
        // check tty's status 
        if ((tty_address[TTY_STATUS] & 0x2) == 0x2)
            break;
        else
            // write character 
            tty_address[TTY_WRITE] = (unsigned int)buffer[nwritten];
    }
    return nwritten;
}

//////////////////////////////////////////////////////////////////////////////
// _tty_read_irq()
//
// This non-blocking function uses the TTY_GET_IRQ[tty_id] interrupt and 
// the associated kernel buffer, that has been written by the ISR.
// It fetches one single character from the _tty_get_buf[tty_id] kernel
// buffer, writes this character to the user buffer, and resets the
// _tty_get_full[tty_id] buffer.
// Returns 0 if the kernel buffer is empty, 1 if the buffer is full.
//////////////////////////////////////////////////////////////////////////////
unsigned int _tty_read_irq( char			*buffer, 
                            unsigned int	length)
{
    unsigned int proc_id;
    unsigned int task_id;
    unsigned int tty_id;
    unsigned int ret;

    proc_id = _procid();
    task_id = _scheduler[proc_id].current;
    tty_id  = _scheduler[proc_id].context[task_id][CTX_TTY_ID];

    if (_tty_get_full[tty_id] == 0) 
    {
        ret = 0;
    }
    else
    {
        *buffer = _tty_get_buf[tty_id];
        _tty_get_full[tty_id] = 0;
        ret = 1;
    }
    return ret;
}

////////////////////////////////////////////////////////////////////////////////
// _tty_read()
//
// This non-blocking function fetches one character directly from the TTY_READ 
// register of the TTY controler, and writes this character to the user buffer.
// It doesn't use the TTY_GET_IRQ interrupt and the associated kernel buffer.
// Returns 0 if the register is empty, 1 if the register is full.
////////////////////////////////////////////////////////////////////////////////
unsigned int _tty_read( char			*buffer, 
                        unsigned int	length)
{
    volatile unsigned int *tty_address;

    unsigned int proc_id;
    unsigned int task_id;
    unsigned int tty_id;

    proc_id = _procid();
    task_id = _scheduler[proc_id].current;
    tty_id  = _scheduler[proc_id].context[task_id][CTX_TTY_ID];

    tty_address = (unsigned int*)&seg_tty_base + tty_id*TTY_SPAN;

    if ((tty_address[TTY_STATUS] & 0x1) != 0x1) return 0;

    *buffer = (char)tty_address[TTY_READ];
    return 1;
}

////////////////////////////////////////////////////////////////////////////////
// 	VciMultiIcu driver
////////////////////////////////////////////////////////////////////////////////
// There is in principle one MULTI-ICU component per cluster, and the
// number of independant ICUs is equal to NB_PROCS, because there is 
// one ICU per processor.
////////////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////////////
// _icu_write()
//
// Write a 32-bit word in a memory mapped register of the MULTI_ICU device,
// identified by the cluster index, and a processor local index.
// Returns 0 if success, > 0 if error.
////////////////////////////////////////////////////////////////////////////////
unsigned int _icu_write( unsigned int cluster_index,
                         unsigned int proc_index,
                         unsigned int register_index, 
                         unsigned int value )
{
    unsigned int *icu_address;

    // parameters checking 
    if ( register_index >= ICU_SPAN) 		return 1;
    if ( cluster_index >= NB_CLUSTERS)		return 1;
    if ( proc_index >= NB_PROCS )           return 1;

    icu_address = (unsigned int*)&seg_icu_base + 
                  ( cluster_index * CLUSTER_SPAN )  +
                  ( proc_index * ICU_SPAN );

    icu_address[register_index] = value;   // write word 
    return 0;
}

////////////////////////////////////////////////////////////////////////////////
// _icu_read()
//
// Read a 32-bit word in a memory mapped register of the MULTI_ICU device,
// identified by the cluster index and a processor local index.
// Returns 0 if success, > 0 if error.
////////////////////////////////////////////////////////////////////////////////
unsigned int _icu_read(  unsigned int cluster_index,
                         unsigned int proc_index,
                         unsigned int register_index, 
                         unsigned int* buffer )
{
    unsigned int *icu_address;

    // parameters checking 
    if ( register_index >= ICU_SPAN) 		return 1;
    if ( cluster_index >= NB_CLUSTERS)		return 1;
    if ( proc_index >= NB_PROCS )           return 1;

    icu_address = (unsigned int*)&seg_icu_base + 
                  ( cluster_index * CLUSTER_SPAN )  +
                  ( proc_index * ICU_SPAN );

    *buffer = icu_address[register_index]; // read word 
    return 0;
}

////////////////////////////////////////////////////////////////////////////////
// 	VciGcd driver
////////////////////////////////////////////////////////////////////////////////
// The Greater Dommon Divider is a -very- simple hardware coprocessor
// performing the computation of the GCD of two 32 bits integers.
// It has no DMA capability.
////////////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////////////
// _gcd_write()
//
// Write a 32-bit word in a memory mapped register of the GCD coprocessor.
// Returns 0 if success, > 0 if error.
////////////////////////////////////////////////////////////////////////////////
unsigned int _gcd_write( unsigned int register_index, 
                         unsigned int value)
{
    volatile unsigned int *gcd_address;

    // parameters checking
    if (register_index >= GCD_END)
        return 1;

    gcd_address = (unsigned int*)&seg_gcd_base;

    gcd_address[register_index] = value; // write word
    return 0;
}

////////////////////////////////////////////////////////////////////////////////
// _gcd_read()
//
// Read a 32-bit word in a memory mapped register of the GCD coprocessor.
// Returns 0 if success, > 0 if error.
////////////////////////////////////////////////////////////////////////////////
unsigned int _gcd_read( unsigned int register_index, 
                        unsigned int *buffer)
{
    volatile unsigned int *gcd_address;

    // parameters checking 
    if (register_index >= GCD_END)
        return 1;

    gcd_address = (unsigned int*)&seg_gcd_base;

    *buffer = gcd_address[register_index]; // read word
    return 0;
}

////////////////////////////////////////////////////////////////////////////////
// VciBlockDevice driver
////////////////////////////////////////////////////////////////////////////////
// The VciBlockDevice is a single channel external storage contrôler.
//
// The IOMMU can be activated or not:
// 
// 1) When the IOMMU is used, a fixed size 2Mbytes vseg is allocated to 
// the IOC peripheral, in the I/O virtual space, and the user buffer is
// dynamically remapped in the IOMMU page table. The corresponding entry 
// in the IOMMU PT1 is defined by the kernel _ioc_iommu_ix1 variable.
// The number of pages to be unmapped is stored in the _ioc_npages variable.
// The number of PT2 entries is dynamically computed and stored in the
// kernel _ioc_iommu_npages variable. It cannot be larger than 512.
// The user buffer is unmapped by the _ioc_completed() function when 
// the transfer is completed.
//
// 2/ If the IOMMU is not used, we check that  the user buffer is mapped to a
// contiguous physical buffer (this is generally true because the user space
// page tables are statically constructed to use contiguous physical memory).
//
// Finally, the memory buffer must fulfill the following conditions:
// - The user buffer must be word aligned, 
// - The user buffer must be mapped in user address space, 
// - The user buffer must be writable in case of (to_mem) access,
// - The total number of physical pages occupied by the user buffer cannot
//   be larger than 512 pages if the IOMMU is activated,
// - All physical pages occupied by the user buffer must be contiguous
//   if the IOMMU is not activated.
// An error code is returned if these conditions are not verified.
//
// As the IOC component can be used by several programs running in parallel,
// the _ioc_lock variable guaranties exclusive access to the device.  The
// _ioc_read() and _ioc_write() functions use atomic LL/SC to get the lock.
// and set _ioc_lock to a non zero value.  The _ioc_write() and _ioc_read()
// functions are blocking, polling the _ioc_lock variable until the device is
// available.
// When the tranfer is completed, the ISR routine activated by the IOC IRQ
// set the _ioc_done variable to a non-zero value. Possible address errors
// detected by the IOC peripheral are reported by the ISR in the _ioc_status
// variable.
// The _ioc_completed() function is polling the _ioc_done variable, waiting for
// transfer completion. When the completion is signaled, the _ioc_completed()
// function reset the _ioc_done variable to zero, and releases the _ioc_lock
// variable.
//
// In a multi-processing environment, this polling policy should be replaced by
// a descheduling policy for the requesting process.
///////////////////////////////////////////////////////////////////////////////

///////////////////////////////////////////////////////////////////////////////
// _ioc_get_lock()
//
// This blocking helper is used by '_ioc_read()' and '_ioc_write()' functions
// to get _ioc_lock using atomic LL/SC.
///////////////////////////////////////////////////////////////////////////////
static inline void _ioc_get_lock()
{
    register unsigned int delay = (_proctime() & 0xF) << 4;
    register unsigned int *plock = (unsigned int*)&_ioc_lock;

    asm volatile (
            "_ioc_llsc:             \n"
            "ll   $2,    0(%0)      \n" /* $2 <= _ioc_lock current value */
            "bnez $2,    _ioc_delay \n" /* delay if _ioc_lock already taken */
            "li   $3,    1          \n" /* $3 <= argument for sc */
            "sc   $3,    0(%0)      \n" /* try to set _ioc_lock */
            "bnez $3,    _ioc_ok    \n" /* exit if atomic */
            "_ioc_delay:            \n"
            "move $4,    %1         \n" /* $4 <= delay */
            "_ioc_loop:             \n"
            "beqz $4,    _ioc_loop  \n" /* test end delay */
            "addi $4,    $4,    -1  \n" /* $4 <= $4 - 1 */
            "j           _ioc_llsc  \n" /* retry ll */
            "nop                    \n"
            "_ioc_ok:               \n"
            :
            :"r"(plock), "r"(delay)
            :"$2", "$3", "$4");
}

///////////////////////////////////////////////////////////////////////////////
//  _ioc_access()
// This function transfer data between a memory buffer and the block device.
// The buffer lentgth is (count*block_size) bytes.
//
// Arguments are:
// - to_mem     : from external storage to memory when non 0
// - lba        : first block index on the external storage.
// - user_vaddr : virtual base address of the memory buffer.
// - count      : number of blocks to be transfered.
// Returns 0 if success, > 0 if error.
///////////////////////////////////////////////////////////////////////////////
unsigned int _ioc_access( unsigned int  to_mem,
                          unsigned int 	lba,
                          unsigned int  user_vaddr,
                          unsigned int 	count )
{
    unsigned int		user_vpn_min;	// first virtuel page index in user space
    unsigned int		user_vpn_max;	// last virtual page index in user space
    unsigned int		vpn;			// current virtual page index in user space
    unsigned int		ppn;			// physical page number
    unsigned int		flags;			// page protection flags
    unsigned int		ix2;			// page index in IOMMU PT1 page table
    unsigned int		addr;			// buffer address for IOC peripheral
    unsigned int		user_ptp;		// page table pointer in user space
    unsigned int		ko;				// bool returned by _v2p_translate()
    unsigned int		ppn_first;		// first physical page number for user buffer
    unsigned int		ltid;			// current task local index
    static_scheduler_t*	psched;			// pointer on the current task scheduler
        
    // check buffer alignment
    if ( (unsigned int)user_vaddr & 0x3 ) return 1;

    unsigned int*	ioc_address = (unsigned int*)&seg_ioc_base;
    unsigned int	block_size   = ioc_address[BLOCK_DEVICE_BLOCK_SIZE];
    unsigned int	length       = count*block_size;

    // get user space page table virtual address
    psched   = &_scheduler[_procid()];
    ltid     = psched->current;
    user_ptp = psched->context[ltid][CTX_PTAB_ID];
    
    user_vpn_min = user_vaddr >> 12;
    user_vpn_max = (user_vaddr + length - 1) >> 12;
    ix2          = 0;

    // loop on all virtual pages covering the user buffer
    for ( vpn = user_vpn_min ; vpn <= user_vpn_max ; vpn++ )
    {
        // get ppn and flags for each vpn
        ko = _v2p_translate( (page_table_t*)user_ptp,
                             vpn,
                             &ppn,
                             &flags );

        // check access rights
        if ( ko )								  return 2;		// unmapped
        if ( (flags & PTE_U) == 0 )				  return 3;		// not in user space
        if ( ( (flags & PTE_W) == 0 ) && to_mem ) return 4;		// not writable

        // save first ppn value
        if ( ix2 == 0 ) ppn_first = ppn;

        if ( GIET_IOMMU_ACTIVE )    // the user buffer must be remapped in the I/0 space
        {
            // check buffer length < 2 Mbytes
            if ( ix2 > 511 ) return 2;

            // map the physical page in IOMMU page table
            _iommu_add_pte2( _ioc_iommu_ix1,	// PT1 index
                             ix2,				// PT2 index
					         ppn,				// Physical page number	
                             flags );			// Protection flags
        }
        else			// no IOMMU : check that physical pages are contiguous
        {
            if ( (ppn - ppn_first) != ix2 )	      return 5;		// split physical buffer  
        }
        
        // increment page index
        ix2++;
    } // end for vpn

    // register the number of pages to be unmapped
    _ioc_iommu_npages = (user_vpn_max - user_vpn_min) + 1;

    // invalidate data cache in case of memory write
    if ( to_mem ) _dcache_buf_invalidate( (void*)user_vaddr, length );

    // compute buffer base address for IOC depending on IOMMU activation
    if ( GIET_IOMMU_ACTIVE ) addr = (_ioc_iommu_ix1) << 21 | (user_vaddr & 0xFFF);
    else                     addr = (ppn_first << 12) | (user_vaddr & 0xFFF);

    // get the lock on ioc device 
    _ioc_get_lock();

    // peripheral configuration  
    ioc_address[BLOCK_DEVICE_BUFFER]     = addr;
    ioc_address[BLOCK_DEVICE_COUNT]      = count;
    ioc_address[BLOCK_DEVICE_LBA]        = lba;
    if ( to_mem == 0 ) ioc_address[BLOCK_DEVICE_OP] = BLOCK_DEVICE_WRITE;
    else               ioc_address[BLOCK_DEVICE_OP] = BLOCK_DEVICE_READ;

    return 0;
}

/////////////////////////////////////////////////////////////////////////////////
// _ioc_completed()
//
// This function checks completion of an I/O transfer and reports errors. 
// As it is a blocking call, the processor is stalled.
// If the virtual memory is activated, the pages mapped in the I/O virtual
// space are unmapped, and the IOB TLB is cleared.
// Returns 0 if success, > 0 if error.
/////////////////////////////////////////////////////////////////////////////////
unsigned int _ioc_completed()
{
    unsigned int	ret;
    unsigned int	ix2;

    // busy waiting
    while (_ioc_done == 0)
        asm volatile("nop");

    // unmap the buffer from IOMMU page table if IOMMU is activated
    if ( GIET_IOMMU_ACTIVE )
    {
        unsigned int* iob_address = (unsigned int*)&seg_iob_base;

        for ( ix2 = 0 ; ix2 < _ioc_iommu_npages ; ix2++ )
        {
            // unmap the page in IOMMU page table
            _iommu_inval_pte2( _ioc_iommu_ix1,	// PT1 index 
                              ix2 );			// PT2 index

            // clear IOMMU TLB
            iob_address[IOB_INVAL_PTE] = (_ioc_iommu_ix1 << 21) | (ix2) << 12; 
        }
    }

    // test IOC status 
    if ((_ioc_status != BLOCK_DEVICE_READ_SUCCESS)
            && (_ioc_status != BLOCK_DEVICE_WRITE_SUCCESS)) ret = 1;	// error
    else                                                    ret = 0;	// success

    // reset synchronization variables
    _ioc_lock =0;
    _ioc_done =0;

    return ret;
}

///////////////////////////////////////////////////////////////////////////////
// _ioc_read()
// Transfer data from the block device to a memory buffer in user space. 
// - lba    : first block index on the block device
// - buffer : base address of the memory buffer (must be word aligned)
// - count  : number of blocks to be transfered.
// Returns 0 if success, > 0 if error.
///////////////////////////////////////////////////////////////////////////////
unsigned int _ioc_read( unsigned int 	lba, 
                        void*		    buffer, 
                        unsigned int	count )
{
    return _ioc_access( 1,		// read
                        lba,
                        (unsigned int)buffer,
                        count );
}

///////////////////////////////////////////////////////////////////////////////
// _ioc_write()
// Transfer data from a memory buffer in user space to the block device. 
// - lba    : first block index on the block device
// - buffer : base address of the memory buffer (must be word aligned)
// - count  : number of blocks to be transfered.
// Returns 0 if success, > 0 if error.
///////////////////////////////////////////////////////////////////////////////
unsigned int _ioc_write( unsigned int 	lba, 
                         const void*	buffer, 
                         unsigned int	count )
{
    return _ioc_access( 0,		// write
                        lba,
                        (unsigned int)buffer,
                        count );
}

//////////////////////////////////////////////////////////////////////////////////
// 	VciFrameBuffer driver
//////////////////////////////////////////////////////////////////////////////////
// The '_fb_sync_write' and '_fb_sync_read' functions use a memcpy strategy to
// implement the transfer between a data buffer (user space) and the frame
// buffer (kernel space). They are blocking until completion of the transfer.
// The '_fb_write()', '_fb_read()' and '_fb_completed()' functions use the DMA
// coprocessor to transfer data between the user buffer and the frame buffer.
// These  functions use a polling policy to test the global variables _dma_busy[i] 
// and detect the transfer completion.  
// There is  NB_PROCS DMA channels, that are indexed by the proc_id.
// The _dma_busy[i] synchronisation variables (one per channel) are set by the OS, 
// and reset by the ISR.
//////////////////////////////////////////////////////////////////////////////////

//////////////////////////////////////////////////////////////////////////////////
// _fb_sync_write()
// Transfer data from an memory buffer to the frame_buffer device using 
// a memcpy. The source memory buffer must be in user address space.
// - offset : offset (in bytes) in the frame buffer.
// - buffer : base address of the memory buffer.
// - length : number of bytes to be transfered.
// Returns 0 if success, > 0 if error.
//////////////////////////////////////////////////////////////////////////////////
unsigned int _fb_sync_write( unsigned int	offset, 
                             const void* 	buffer, 
                             unsigned int 	length )
{
    volatile unsigned char *fb_address;

    /* buffer must be in user space */
    if (((unsigned int)buffer >= 0x80000000)
            || (((unsigned int)buffer + length ) >= 0x80000000 ))
        return 1;

    fb_address = (unsigned char*)&seg_fb_base + offset;

    /* buffer copy */
    memcpy((void*)fb_address, (void*)buffer, length);

    return 0;
}

//////////////////////////////////////////////////////////////////////////////////
// _fb_sync_read()
// Transfer data from the frame_buffer device to a memory buffer using
// a memcpy. The destination memory buffer must be in user address space.
// - offset : offset (in bytes) in the frame buffer.
// - buffer : base address of the memory buffer.
// - length : number of bytes to be transfered.
// Returns 0 if success, > 0 if error.
//////////////////////////////////////////////////////////////////////////////////
unsigned int _fb_sync_read( unsigned int 	offset, 
                            const void*		buffer, 
                            unsigned int 	length )
{
    volatile unsigned char *fb_address;

    /* parameters checking */
    /* buffer must be in user space */
    if (((unsigned int)buffer >= 0x80000000)
            || (((unsigned int)buffer + length ) >= 0x80000000 ))
        return 1;

    fb_address = (unsigned char*)&seg_fb_base + offset;

    /* buffer copy */
    memcpy((void*)buffer, (void*)fb_address, length);

    return 0;
}

//////////////////////////////////////////////////////////////////////////////////
// _fb_write()
// Transfer data from an memory buffer to the frame_buffer device using a DMA.
// The source memory buffer must be in user address space.
// - offset : offset (in bytes) in the frame buffer.
// - buffer : base address of the memory buffer.
// - length : number of bytes to be transfered.
// Returns 0 if success, > 0 if error.
//////////////////////////////////////////////////////////////////////////////////
unsigned int _fb_write( unsigned int 	offset, 
                        const void*	buffer, 
                        unsigned int 	length )
{
    volatile unsigned char *fb_address;
    volatile unsigned int *dma;

    unsigned int proc_id;
    unsigned int delay;
    unsigned int i;

    /* buffer must be in user space */
    if (((unsigned int)buffer >= 0x80000000)
            || (((unsigned int)buffer + length ) >= 0x80000000 ))
        return 1;

    proc_id = _procid();
    fb_address = (unsigned char*)&seg_fb_base + offset;
    dma = (unsigned int*)&seg_dma_base + (proc_id * DMA_SPAN);

    /* waiting until DMA device is available */
    while (_dma_busy[proc_id] != 0)
    {
        /* if the lock failed, busy wait with a pseudo random delay between bus
         * accesses */
        delay = (_proctime() & 0xF) << 4;
        for (i = 0; i < delay; i++)
            asm volatile("nop");
    }
    _dma_busy[proc_id] = 1;

    /* DMA configuration for write transfer */
    dma[DMA_IRQ_DISABLE] = 0;
    dma[DMA_SRC] = (unsigned int)buffer;
    dma[DMA_DST] = (unsigned int)fb_address;
    dma[DMA_LEN] = (unsigned int)length;
    return 0;
}

//////////////////////////////////////////////////////////////////////////////////
// _fb_read()
// Transfer data from the frame_buffer device to an memory buffer using a DMA.
// The destination memory buffer must be in user address space.
// - offset : offset (in bytes) in the frame buffer.
// - buffer : base address of the memory buffer.
// - length : number of bytes to be transfered.
// All cache lines corresponding to the the target buffer are invalidated
// for cache coherence.
// Returns 0 if success, > 0 if error.
//////////////////////////////////////////////////////////////////////////////////
unsigned int _fb_read( unsigned int 	offset, 
                       const void*	buffer, 
                       unsigned int 	length )
{
    volatile unsigned char *fb_address;
    volatile unsigned int *dma;

    unsigned int proc_id;
    unsigned int delay;
    unsigned int i;

    /* buffer must be in user space */
    if (((unsigned int)buffer >= 0x80000000)
            || (((unsigned int)buffer + length ) >= 0x80000000 ))
        return 1;

    proc_id = _procid();
    fb_address = (unsigned char*)&seg_fb_base + offset;
    dma = (unsigned int*)&seg_dma_base + (proc_id * DMA_SPAN);

    /* waiting until DMA device is available */
    while (_dma_busy[proc_id] != 0)
    {
        /* if the lock failed, busy wait with a pseudo random delay between bus
         * accesses */
        delay = (_proctime() & 0xF) << 4;
        for (i = 0; i < delay; i++)
            asm volatile("nop");
    }
    _dma_busy[proc_id] = 1;

    /* DMA configuration for write transfer */
    dma[DMA_IRQ_DISABLE] = 0;
    dma[DMA_SRC] = (unsigned int)fb_address;
    dma[DMA_DST] = (unsigned int)buffer;
    dma[DMA_LEN] = (unsigned int)length;

    /* invalidation of data cache */
    _dcache_buf_invalidate(buffer, length);

    return 0;
}

//////////////////////////////////////////////////////////////////////////////////
// _fb_completed()
// This function checks completion of a DMA transfer to or fom the frame buffer.
// As it is a blocking call, the processor is stalled until the next interrupt.
// Returns 0 if success, > 0 if error.
//////////////////////////////////////////////////////////////////////////////////
unsigned int _fb_completed()
{
    unsigned int proc_id;

    proc_id = _procid();

    while (_dma_busy[proc_id] != 0)
        asm volatile("nop");

    if (_dma_status[proc_id] != 0)
        return 1;

    return 0;
}

