source: trunk/softs/giet_tsar/drivers.c @ 160

Last change on this file since 160 was 158, checked in by alain, 14 years ago

Introducing the three sub-directories in the softs directory:

  • giet_tsar
  • soft_filter_giet
  • soft_transpose_giet
File size: 40.1 KB
Line 
1/****************************************************************************************
2File : drivers.c
3Written by Alain Greiner & Nicolas Pouillon
4Date : december 2010
5
6Basic drivers used by the GIET, that is running
7on the MIPS32 processor architecture.
8
9The supported peripherals are:
10- the SoClib pibus_multi_tty
11- the SocLib pibus_timer
12- the SocLib pibus_dma
13- The SoCLib pibus_icu
14- The SoCLib pibus_gcd
15- The SoCLib pibus_frame_buffer
16- The SoCLib pibus_block_device
17
18The following global parameters must be defined in the ldscript.
19- NB_CLUSTERS : number of clusters
20- NB_PROCS : number of processor per cluster
21- NB_NTASKS : max number of tasks per processor
22- NB_LOCKS : max number of supported spin_locks
23- NB_TIMERS : max number of timers per processor
24
25The follobing base addresses must be defined in the ldscript
26- seg_icu_base
27- seg_timer_base
28- seg_tty_base
29- seg_gcd_base
30- seg_dma_base
31- seg_locks_base
32- seg_fb_base
33- seg_ioc_base
34****************************************************************************************/
35
36#include "drivers.h"
37#include "icu.h"
38#include "block_device.h"
39#include "dma.h"
40
41struct plouf;
42
43//////////////////////////////////////////////////////////////
44// various informations that must be defined in ldscript
45//////////////////////////////////////////////////////////////
46extern struct plouf seg_icu_base;
47extern struct plouf seg_timer_base;
48extern struct plouf seg_tty_base;
49extern struct plouf seg_gcd_base;
50extern struct plouf seg_dma_base;
51extern struct plouf seg_locks_base;
52extern struct plouf seg_fb_base;
53extern struct plouf seg_ioc_base;
54
55extern struct plouf NB_CLUSTERS;
56extern struct plouf NB_PROCS;
57extern struct plouf NB_TASKS;
58extern struct plouf NB_TIMERS;
59extern struct plouf NB_LOCKS;
60
61#define in_drivers __attribute__((section (".drivers")))
62#define in_unckdata __attribute__((section (".unckdata")))
63
64////////////////////////////////////////////////////////////////////////////////////////
65//  Global uncachable variables for synchronization between drivers and ISRs
66////////////////////////////////////////////////////////////////////////////////////////
67
68in_unckdata int volatile    _dma_status[256];
69in_unckdata int volatile    _dma_busy[256]   =   { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
70                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
71                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
72                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
73                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
74                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
75                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
76                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
77                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
78                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
79                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
80                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
81                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
82                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
83                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
84                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
85
86in_unckdata int volatile    _ioc_lock    = 0;
87in_unckdata int volatile    _ioc_done    = 0;
88in_unckdata int volatile    _ioc_status;
89
90in_unckdata char volatile   _tty_get_buf[256];
91in_unckdata int volatile    _tty_get_full[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
92                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
93                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
94                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
95                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
96                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
97                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
98                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
99                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
100                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
101                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
102                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
103                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
104                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
105                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
106                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
107
108in_unckdata char volatile   _tty_put_buf[256];
109in_unckdata int volatile    _tty_put_full[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
110                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
111                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
112                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
113                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
114                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
115                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
116                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
117                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
118                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
119                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
120                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
121                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
122                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
123                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
124                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
125
126////////////////////////////////////////////////////////////////////////////////////////
127//  Global uncachable variables for inter-task barriers
128////////////////////////////////////////////////////////////////////////////////////////
129
130in_unckdata int volatile    _barrier_initial_value[8] = { 0,0,0,0,0,0,0,0 };
131in_unckdata int volatile    _barrier_count[8]         = { 0,0,0,0,0,0,0,0 };
132
133////////////////////////////////////////////////////////////////////////////////////////
134//  Global uncachable variables for spin_locks using LL/C instructions
135////////////////////////////////////////////////////////////////////////////////////////
136
137in_unckdata int volatile    _spin_lock[256] =    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
138                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
139                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
140                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
141                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
142                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
143                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
144                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
145                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
146                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
147                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
148                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
149                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
150                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
151                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
152                                                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
153
154////////////////////////////////////////////////////////////////////////////////////////
155//  mempcy()
156// GCC requires this function. Taken from MutekH.
157////////////////////////////////////////////////////////////////////////////////////////
158__attribute((used))
159in_drivers static void *memcpy(void *_dst, const void *_src, unsigned int size)
160{
161    unsigned int *dst = _dst;
162    const unsigned int *src = _src;
163    if ( ! ((unsigned int)dst & 3) && ! ((unsigned int)src & 3) )
164        while (size > 3) {
165            *dst++ = *src++;
166            size -= 4;
167        }
168
169    unsigned char *cdst = (unsigned char*)dst;
170    unsigned char *csrc = (unsigned char*)src;
171
172    while (size--) {
173        *cdst++ = *csrc++;
174    }
175    return _dst;
176}
177
178////////////////////////////////////////////////////////////////////////////////////////
179//  _procid()
180// Access CP0 and returns processor ident
181////////////////////////////////////////////////////////////////////////////////////////
182in_drivers unsigned int _procid()
183{
184    unsigned int ret;
185    asm volatile( "mfc0 %0, $15, 1": "=r"(ret) );
186    return (ret & 0xFF);
187}
188////////////////////////////////////////////////////////////////////////////////////////
189//  _segment_increment()
190// Access CP0 to get the procid, and returns the address increment to access
191// various peripherals (TTY, TIMER, ICU, DMA), in case of multiprocessors architectures.
192// It uses the NB_PROCS and NB_CLUSTERS parameters to compute this increment:
193// - increment  = cluster_id*cluster_increment + local_id*local_increment
194// - cluster_id = procid / NB_PROCS 
195// - local_id   = procid % NB_PROCS
196// - cluster_increment = 4G / NB_CLUSTERS
197////////////////////////////////////////////////////////////////////////////////////////
198in_drivers unsigned int _segment_increment(unsigned int local_increment)
199{
200    unsigned int        nprocs                  = (unsigned int)&NB_PROCS;
201    unsigned int        nclusters               = (unsigned int)&NB_CLUSTERS;
202    unsigned int        cluster_increment       = (0x80000000/nclusters)*2;
203    unsigned int        pid                     = _procid();
204    return (pid / nprocs)*cluster_increment + (pid % nprocs)*local_increment;
205}
206////////////////////////////////////////////////////////////////////////////////////////
207//  _proctime()
208// Access CP0 and returns processor time
209////////////////////////////////////////////////////////////////////////////////////////
210in_drivers unsigned int _proctime()
211{
212    unsigned int ret;
213    asm volatile( "mfc0 %0, $9": "=r"(ret) );
214    return ret;
215}
216////////////////////////////////////////////////////////////////////////////////////////
217//  _procnumber()
218// Returns the number of processsors controled by the GIET
219////////////////////////////////////////////////////////////////////////////////////////
220in_drivers unsigned int _procnumber()
221{
222    return (unsigned int)&NB_PROCS * (unsigned int)&NB_CLUSTERS;
223}
224////////////////////////////////////////////////////////////////////////////////////////
225//  _it_mask()
226// Access CP0 and mask IRQs
227////////////////////////////////////////////////////////////////////////////////////////
228in_drivers void _it_mask()
229{
230    int tmp;
231    asm volatile("mfc0  %0, $12"    : "=r" (tmp) );
232    asm volatile("ori   %0, %0, 1"  : "=r" (tmp) );
233    asm volatile("mtc0  %0, $12"    : "=r" (tmp) );
234}
235////////////////////////////////////////////////////////////////////////////////////////
236//  _it_enable()
237// Access CP0 and enable IRQs
238////////////////////////////////////////////////////////////////////////////////////////
239in_drivers void _it_enable()
240{
241    int tmp;
242    asm volatile("mfc0  %0, $12"    : "=r" (tmp) );
243    asm volatile("addi  %0, %0, -1" : "=r" (tmp) );
244    asm volatile("mtc0  %0, $12"    : "=r" (tmp) );
245}
246//////////////////////////////////////////////////////////////////////
247//  _dcache_buf_invalidate()
248// Invalidate all cache lines corresponding to a memory buffer.
249// This is used by the block_device driver.
250/////////////////////////////////////////////////////////////////////////
251in_drivers void _dcache_buf_invalidate(const void * buffer, size_t size)
252{
253    size_t i;
254    size_t dcache_line_size;
255
256    // retrieve dcache line size from config register (bits 12:10)
257    asm volatile("mfc0 %0, $16, 1" : "=r" (dcache_line_size));
258
259    dcache_line_size = 2 << ((dcache_line_size>>10) & 0x7);
260
261    // iterate on lines to invalidate each one of them
262    for ( i=0; i<size; i+=dcache_line_size )
263        asm volatile(" cache %0, %1"
264                :
265                :"i" (0x11), "R" (*((char*)buffer+i)));
266}
267
268/////////////////////////////////////////////////////////////////////////
269//  _itoa_dec()
270// convert a 32 bits unsigned int to a string of 10 decimal characters.
271/////////////////////////////////////////////////////////////////////////
272in_drivers void _itoa_dec(unsigned val, char* buf)
273{
274    const char  DecTab[] = "0123456789";
275    unsigned int i;
276    for( i=0 ; i<10 ; i++ )
277    {
278        if( (val!=0) || (i==0) ) buf[9-i] = DecTab[val % 10];
279        else                     buf[9-i] = 0x20;
280        val /= 10;
281    }
282}
283//////////////////////////////////////////////////////////////////////////
284//  _itoa_hex()
285// convert a 32 bits unsigned int to a string of 8 hexadecimal characters.
286///////////////////////////////////////////////////////////////////////////
287in_drivers void _itoa_hex(int val, char* buf)
288{
289    const char  HexaTab[] = "0123456789ABCD";
290    unsigned int i;
291    for( i=0 ; i<8 ; i++ )
292    {
293        buf[7-i] = HexaTab[val % 16];
294        val /= 16;
295    }
296}
297///////////////////////////////////////////////////////////////////////////////////////
298// MULTI_TIMER component
299// Each processor can handle up to NB_TIMERS independant timers.
300// The segment base address is defined as
301//         seg_timer_base + segment_increment(NB_TIMERS*16) + index*16
302///////////////////////////////////////////////////////////////////////////////////////
303//  _timer_write()
304// Write a 32 bits word in a memory mapped register of the MULTI_TIMER
305///////////////////////////////////////////////////////////////////////////////////////
306in_drivers int _timer_write(size_t timer_index, size_t register_index, int value)
307{
308    int*                timer_address;
309    size_t              ntimers         = (size_t)&NB_TIMERS;
310    unsigned int        base            = (unsigned int)&seg_timer_base;
311    unsigned int        increment       = _segment_increment(ntimers*TIMER_SPAN*4); 
312
313    if( timer_index >= ntimers)         return -1;
314    if( register_index >= TIMER_SPAN )  return -1;
315
316    timer_address = (int*)(base + increment + timer_index*TIMER_SPAN*4);
317    timer_address[register_index] = value;          // write word
318    return 0;
319}
320///////////////////////////////////////////////////////////////////////////////////////
321//  _timer_read()
322// Read a 32 bits word in a memory mapped register of the MULTI_TIMER
323///////////////////////////////////////////////////////////////////////////////////////
324in_drivers int _timer_read(size_t timer_index, size_t register_index, int* buffer)
325{
326    int*                timer_address;
327    size_t              ntimers         = (size_t)&NB_TIMERS;
328    unsigned int        base            = (unsigned int)&seg_timer_base;
329    unsigned int        increment       = _segment_increment(ntimers*TIMER_SPAN*4); 
330
331    if( timer_index >= ntimers)         return -1;
332    if( register_index >= TIMER_SPAN )  return -1;
333
334    if( timer_index >= ntimers) return -1;
335    if( register_index >= TIMER_SPAN ) return -1;
336
337    timer_address = (int*)(base + increment + timer_index*TIMER_SPAN*4);
338    *buffer = timer_address[register_index];        // read word
339    return 0;
340}
341///////////////////////////////////////////////////////////////////////////////////////
342//  MULTI_TTY COMPONENT
343// The total number of TTYs is equal to NB_CLUSTERS * NB_PROCS * NB_TASKS.
344// - tty_address = seg_tty_base + _segment_increment(NB_TASKS*16) + task_id*16
345// - tty_index   = proc_id*NB_TASKS + task_id
346///////////////////////////////////////////////////////////////////////////////////////
347//  _tty_write()
348// Write one or several characters directly from a fixed length user buffer
349// to the TTY_WRITE register of the TTY controler.
350// It doesn't use the TTY_PUT_IRQ interrupt and the associated kernel buffer.
351// This is a non blocking call : it test the TTY_STATUS register.
352// If the TTY_STATUS_WRITE bit is set, the transfer stops and the function
353// returns  the number of characters that have been actually written.
354// It returns -1 in case of error (proc_id or task index too large)
355///////////////////////////////////////////////////////////////////////////////////////
356in_drivers int _tty_write(char* buffer, int length)
357{
358    char*               tty_address;
359    size_t              ntasks          = (size_t)&NB_TASKS;
360    size_t              nprocs          = (size_t)&NB_PROCS;
361    size_t              nclusters       = (size_t)&NB_CLUSTERS;
362    unsigned int        base            = (unsigned int)&seg_tty_base;
363    unsigned int        increment       = _segment_increment(ntasks*TTY_SPAN*4);
364    size_t              pid             = _procid();
365    size_t              tid             = _current_task_array[pid];
366    int                 nwritten        = 0;
367    int                 i;
368
369    if( tid >= ntasks )                 return -1;
370    if( pid >= nprocs*nclusters )       return -1;
371
372    tty_address = (char*)(base + increment + tid*TTY_SPAN*4);
373
374    for ( i=0 ; i < length ; i++ )
375    {
376        if((tty_address[TTY_STATUS*4] & 0x2) == 0x2)  break;
377        else
378        {
379            tty_address[TTY_WRITE*4] = buffer[i]; // write character
380            nwritten++;
381        }
382    }
383    return nwritten;
384}
385///////////////////////////////////////////////////////////////////////////////////////
386//  _tty_read()
387// Fetch one character directly from the TTY_READ register of the TTY controler,
388// and writes this character to the user buffer.
389// It doesn't use the TTY_GET_IRQ interrupt and the associated kernel buffer.
390// This is a non blocking call : it returns 0 if the register is empty,
391// and returns 1 if the register is full.
392// It returns -1 in case of error (proc_id or task_id too large or length != 1)
393// The length argument is not used in this implementation, and has been
394// introduced for future implementations.
395///////////////////////////////////////////////////////////////////////////////////////
396in_drivers int _tty_read(char* buffer, int length)
397{
398    char*               tty_address;
399    size_t              ntasks          = (size_t)&NB_TASKS;
400    size_t              nprocs          = (size_t)&NB_PROCS;
401    size_t              nclusters       = (size_t)&NB_CLUSTERS;
402    unsigned int        base            = (unsigned int)&seg_tty_base;
403    unsigned int        increment       = _segment_increment(ntasks*TTY_SPAN*4);
404    size_t              pid             = _procid();
405    size_t              tid             = _current_task_array[pid];
406
407    if( length != 1)                    return -1;
408    if( pid >= nprocs*nclusters )       return -1;
409    if( tid >= ntasks )                 return -1;
410   
411    tty_address = (char*)(base + increment + tid*TTY_SPAN*4);
412
413    if((tty_address[TTY_STATUS*4] & 0x1) == 0x1)
414    {
415        buffer[0] = tty_address[TTY_READ*4];
416        return 1;
417    }
418    else
419    {
420        return 0;
421    }
422}
423///////////////////////////////////////////////////////////////////////////////////////
424//  _tty_read_irq()
425// iAS it uses the TTY_GET_IRQ interrupt and the associated kernel buffer,
426// that has been written by the ISR, this function does not access the TTY registers.
427// It fetch one single character from the _tty_get_buf[tty_index] kernel buffer, writes
428// this character to the user buffer, and reset the _tty_get_full[tty_index] buffer.
429// This is a non blocking call : it returns 0 if the kernel buffer is empty,
430// and returns 1 if the buffer is full.
431// It returns -1 in case of error (proc_id or task_id too large, or length != 1)
432// The length argument is not used in this implementation, and has been
433// introduced for future implementations.
434///////////////////////////////////////////////////////////////////////////////////////
435in_drivers int _tty_read_irq(char* buffer, int length)
436{
437    int     pid         = _procid();
438    int     tid         = _current_task_array[pid];
439    int     ntasks      = (int)&NB_TASKS;
440    int     nprocs      = (int)&NB_PROCS;
441    int     nclusters   = (int)&NB_CLUSTERS;
442    int     tty_index;
443
444    if( length != 1)                    return -1;
445    if( pid >= nprocs*nclusters )       return -1;
446    if( tid >= ntasks )                 return -1;
447
448    tty_index = pid*ntasks + tid;
449    if( _tty_get_full[tty_index] == 0 ) return 0;
450
451    *buffer = _tty_get_buf[tty_index];
452    _tty_get_full[tty_index] = 0;
453    return 1;
454}
455///////////////////////////////////////////////////////////////////////////////////////
456//  _exit()
457// Exit (suicide) after printing message on  a TTY terminal.
458///////////////////////////////////////////////////////////////////////////////////////
459in_drivers int  _exit()
460{
461    char buf[] = "\n\n!!!  Exit  Processor          !!!\n";
462    int pid = _procid();
463
464    buf[24] = '0';
465    buf[25] = 'x';
466    buf[26] = (char)((pid>>8) & 0xF) + 0x30;
467    buf[27] = (char)((pid>>4) & 0xF) + 0x30;
468    buf[28] = (char)(pid & 0xF)      + 0x30;
469    _tty_write(buf, 36);
470
471    while(1) asm volatile("nop");   // infinite loop...
472}
473
474///////////////////////////////////////////////////////////////////////////////////////
475//  _icu_write()
476// Write a 32 bits word in a memory mapped register of the ICU peripheral
477// The base address is defined by the processor ID
478///////////////////////////////////////////////////////////////////////////////////////
479in_drivers int _icu_write(size_t register_index, int value)
480{
481    int*                icu_address;
482    unsigned int        base = (int)&seg_icu_base;
483    unsigned int        increment = _segment_increment(ICU_SPAN*4);
484
485    if( register_index >= ICU_SPAN ) return -1;
486
487    icu_address = (int*)(base + increment);
488    icu_address[register_index] = value;   // write word
489    return 0;
490}
491///////////////////////////////////////////////////////////////////////////////////////
492//  _icu_read()
493// Read a 32 bits word in a memory mapped register of the ICU peripheral
494// The ICU base address is defined by the processor ID
495///////////////////////////////////////////////////////////////////////////////////////
496in_drivers int _icu_read(size_t register_index, int* buffer)
497{
498    int*                icu_address;
499    unsigned int        base = (int)&seg_icu_base;
500    unsigned int        increment = _segment_increment(ICU_SPAN*4);
501
502    if( register_index >= ICU_SPAN ) return -1;
503
504    icu_address = (int*)(base + increment);
505    *buffer = icu_address[register_index];      // read word
506    return 0;
507}
508///////////////////////////////////////////////////////////////////////////////////////
509//  _gcd_write()
510// Write a 32 bits word in a memory mapped register of the GCD coprocessor
511///////////////////////////////////////////////////////////////////////////////////////
512in_drivers int _gcd_write(size_t register_index, int value)
513{
514    int*    gcd_address;
515    if( register_index >= 4 ) return -1;
516
517    gcd_address = (int*)&seg_gcd_base;
518    gcd_address[register_index] = value;            // write word
519    return 0;
520}
521///////////////////////////////////////////////////////////////////////////////////////
522//  _gcd_read()
523// Read a 32 bits word in a memory mapped register of the GCD coprocessor
524///////////////////////////////////////////////////////////////////////////////////////
525in_drivers int _gcd_read(size_t register_index, int* buffer)
526{
527    int*    gcd_address;
528    if( register_index >= 4 ) return -1;
529
530    gcd_address = (int*)&seg_gcd_base;
531    *buffer = gcd_address[register_index];          // read word
532    return 0;
533}
534///////////////////////////////////////////////////////////////////////////////////////
535//  _locks_write()
536// Release a software spin-lock
537///////////////////////////////////////////////////////////////////////////////////////
538in_drivers int _locks_write(size_t index)
539
540{
541    int     max = (int)&NB_LOCKS;
542    if( index >= max ) return -1;
543
544    _spin_lock[index] = 0;
545    return 0;
546}
547///////////////////////////////////////////////////////////////////////////////////////
548//  _locks_read()
549// Try to take a software spin-lock.
550// This is a blocking call, as there is a busy-waiting loop,
551// until the lock is granted to the requester.
552// There is an internal delay of about 100 cycles between
553// two successive lock read, to avoid bus saturation.
554///////////////////////////////////////////////////////////////////////////////////////
555in_drivers int _locks_read(size_t index)
556{
557    int     max = (int)&NB_LOCKS;
558    if( index >= max ) return -1;
559
560    register int        delay = ( (_proctime() + _procid() ) & 0xF) << 4;
561    register int*       plock = (int*)&_spin_lock[index];                       
562
563    asm volatile ("_locks_llsc:                 \n"
564                  "ll   $2,    0(%0)            \n"     // $2 <= _locks_lock
565                  "bnez $2,    _locks_delay     \n"     // random delay if busy
566                  "li   $3,    1                \n"     // prepare argument for sc 
567                  "sc   $3,    0(%0)            \n"     // try to set _locks_busy
568                  "bnez $3,    _locks_ok        \n"     // exit if atomic
569                  "_locks_delay:                \n"
570                  "move $4,    %1               \n"     // $4 <= delay
571                  "_locks_loop:                 \n"
572                  "addi $4,    $4,    -1        \n"     // $4 <= $4 - 1
573                  "beqz $4,    _locks_loop      \n"     // test end delay
574                  "j           _locks_llsc      \n"     // retry
575                  "_locks_ok:                   \n"
576                  ::"r"(plock),"r"(delay):"$2","$3","$4");
577    return 0;
578}
579//////////////////////////////////////////////////////////////////////////////////////////
580//  I/O BLOCK_DEVICE
581// The three functions below use the three variables _ioc_lock _ioc_done,
582// and _ioc_status for synchronsation.
583// - As the IOC component can be used by several programs running in parallel,
584// the _ioc_lock variable guaranties exclusive access to the device.
585// The _ioc_read() and _ioc_write() functions use atomic LL/SC to get the lock.
586// and set _ioc_lock to a non zero value.
587// The _ioc_write() and _ioc_read() functions are blocking, polling the _ioc_lock
588// variable until the device is available.
589// - When the tranfer is completed, the ISR routine activated by the IOC IRQ
590// set the _ioc_done variable to a non-zero value. Possible address errors detected
591// by the IOC peripheral are reported by the ISR in the _ioc_status variable.
592// The _ioc_completed() function is polling the _ioc_done variable, waiting for
593// tranfer conpletion. When the completion is signaled, the _ioc_completed() function
594// reset the _ioc_done variable to zero, and releases the _ioc_lock variable.
595//
596// In a multi-tasks environment, this polling policy must be replaced by a
597// descheduling policy for the requesting process.
598///////////////////////////////////////////////////////////////////////////////////////
599//  _ioc_get_lock()
600// This blocking function is used by the _ioc_read() and _ioc_write() functions
601// to get _ioc_lock using LL/SC.
602///////////////////////////////////////////////////////////////////////////////////////
603in_drivers void _ioc_get_lock()
604{
605    register unsigned int       delay = (_proctime() & 0xF) << 4;
606    register unsigned int*      plock = (unsigned int*)&_ioc_lock;                     
607
608    asm volatile ("_ioc_llsc:                           \n"
609                  "ll   $2,    0(%0)                \n" // $2 <= _ioc_lock
610                  "bnez $2,    _ioc_delay           \n" // random delay if busy
611                  "li   $3,    1                            \n" // prepare argument for sc 
612                  "sc   $3,    0(%0)                \n" // try to set _ioc_busy
613                  "bnez $3,    _ioc_ok              \n" // exit if atomic
614                  "_ioc_delay:                              \n"
615                  "move $4,    %1                           \n" // $4 <= delay
616                  "_ioc_loop:                               \n"
617                  "addi $4,    $4,    -1            \n" // $4 <= $4 - 1
618                  "beqz $4,    _ioc_loop            \n" // test end delay
619                  "j           _ioc_llsc        \n"     // retry
620                  "_ioc_ok:                                 \n"
621                  ::"r"(plock),"r"(delay):"$2","$3","$4");
622}
623//////////////////////////////////////////////////////////////////////////////////////
624//  _ioc_write()
625// Transfer data from a memory buffer to a file on the block_device.
626// - lba    : first block index on the disk
627// - buffer : base address of the memory buffer
628// - count  : number of blocks to be transfered
629// The source buffer must be in user address space.
630///////////////////////////////////////////////////////////////////////////////////////
631in_drivers int _ioc_write(size_t lba, void* buffer, size_t count)
632{
633    volatile unsigned int*      ioc_address = (unsigned int*)&seg_ioc_base;
634
635    // buffer must be in user space
636//  size_t block_size = ioc_address[BLOCK_DEVICE_BLOCK_SIZE];
637//  if( ( (size_t)buffer + block_size*count ) >= 0x80000000 ) return -1;
638//  if( ( (size_t)buffer                    ) >= 0x80000000 ) return -1;
639
640    // get the lock
641    _ioc_get_lock();
642
643    // block_device configuration
644    ioc_address[BLOCK_DEVICE_BUFFER] = (int)buffer;
645    ioc_address[BLOCK_DEVICE_COUNT] = count;
646    ioc_address[BLOCK_DEVICE_LBA] = lba;
647    ioc_address[BLOCK_DEVICE_IRQ_ENABLE] = 1;
648    ioc_address[BLOCK_DEVICE_OP] = BLOCK_DEVICE_WRITE;
649    return 0;
650}
651///////////////////////////////////////////////////////////////////////////////////////
652//  _ioc_read()
653// Transfer data from a file on the block device to a memory buffer.
654// - lba    : first block index on the disk
655// - buffer : base address of the memory buffer
656// - count  : number of blocks to be transfered
657// The destination buffer must be in user address space.
658// All cache lines corresponding to the the target buffer must be invalidated
659// for cache coherence.
660///////////////////////////////////////////////////////////////////////////////////////
661in_drivers int _ioc_read(size_t lba, void* buffer, size_t count)
662{
663    volatile unsigned int*      ioc_address = (unsigned int*)&seg_ioc_base;
664
665    // buffer must be in user space
666//  size_t block_size = ioc_address[BLOCK_DEVICE_BLOCK_SIZE];
667//  if( ( (size_t)buffer + block_size*count ) >= 0x80000000 ) return -1;
668//  if( ( (size_t)buffer                    ) >= 0x80000000 ) return -1;
669
670    // get the lock
671    _ioc_get_lock();
672
673    // block_device configuration
674    ioc_address[BLOCK_DEVICE_BUFFER] = (int)buffer;
675    ioc_address[BLOCK_DEVICE_COUNT] = count;
676    ioc_address[BLOCK_DEVICE_LBA] = lba;
677    ioc_address[BLOCK_DEVICE_IRQ_ENABLE] = 1;
678    ioc_address[BLOCK_DEVICE_OP] = BLOCK_DEVICE_READ;
679
680    return 0;
681}
682///////////////////////////////////////////////////////////////////////////////////////
683//  _ioc_completed()
684// This blocking function cheks completion of an I/O transfer and reports errors.
685// It returns 0 if the transfer is successfully completed.
686// It returns -1 if an error has been reported.
687///////////////////////////////////////////////////////////////////////////////////////
688in_drivers int _ioc_completed()
689{
690    // waiting for completion
691    while (_ioc_done == 0) { asm volatile("nop"); }
692   
693    // reset synchronisation variables
694    _ioc_done = 0;
695    _ioc_lock = 0;
696
697    // return errors
698    if((_ioc_status != BLOCK_DEVICE_READ_SUCCESS) &&
699            (_ioc_status != BLOCK_DEVICE_WRITE_SUCCESS))    return -1;
700    else                                                                    return 0;
701}
702
703//////////////////////////////////////////////////////////////////////////////////////
704//  FRAME_BUFFER
705// The _fb_sync_write & _fb_sync_read functions use a memcpy strategy to implement
706// the transfer between a data buffer (user space) and the frame buffer (kernel space).
707// They are blocking until completion of the transfer.
708//////////////////////////////////////////////////////////////////////////////////////
709//  _fb_sync_write()
710// Transfer data from an user buffer to the frame_buffer device with a memcpy.
711// - offset     : offset (in bytes) in the frame buffer
712// - buffer : base address of the memory buffer
713// - length : number of bytes to be transfered
714//////////////////////////////////////////////////////////////////////////////////////
715in_drivers int  _fb_sync_write(size_t offset, void* buffer, size_t length)
716{
717    volatile char*  fb = (char*)(void*)&seg_fb_base + offset;
718    char*       ub = buffer;
719    size_t      i;
720
721    // buffer must be in user space
722//  if( ( (size_t)buffer + length ) >= 0x80000000 ) return -1;
723//  if( ( (size_t)buffer          ) >= 0x80000000 ) return -1;
724
725    // memory copy
726    for(i=0 ; i<length ; i++) fb[i] = ub[i];
727    return 0;
728}
729///////////////////////////////////////////////////////////////////////////////////////
730//  _fb_sync_read()
731// Transfer data from the frame_buffer device to an user buffer with a memcpy.
732// - offset     : offset (in bytes) in the frame buffer
733// - buffer : base address of the memory buffer
734// - length : number of bytes to be transfered
735//////////////////////////////////////////////////////////////////////////////////////
736in_drivers int  _fb_sync_read(size_t offset, void* buffer, size_t length)
737{
738    volatile char*  fb = (char*)(void*)&seg_fb_base + offset;
739    char*       ub = buffer;
740    size_t      i;
741
742    // buffer must be in user space
743//  if( ( (size_t)buffer + length ) >= 0x80000000 ) return -1;
744//  if( ( (size_t)buffer          ) >= 0x80000000 ) return -1;
745
746    // memory copy
747    for(i=0 ; i<length ; i++) ub[i] = fb[i];
748    return 0;
749}
750//////////////////////////////////////////////////////////////////////////////////////
751// The _fb_write() and _fb_read() functions use the MULTI_DMA
752// coprocessor to transfer data between the user buffer and the frame buffer.
753// The _fb_completed() function, use a polling policy to test
754// the global variables _dma_busy[i] and detect the transfer completion.
755// As each processor can have it's private DMA, there is up to 256 _dma_busy[i]
756// set/reset variables that are indexed by the proc_id.
757// The _dma_busy variable is reset by the ISR associated to the DMA IRQ.
758///////////////////////////////////////////////////////////////////////////////////////
759//  _fb_write()
760// Transfer data from an user buffer to the frame_buffer device using DMA.
761// - offset : offset (in bytes) in the frame buffer
762// - buffer : base address of the memory buffer
763// - length : number of bytes to be transfered
764//////////////////////////////////////////////////////////////////////////////////////
765in_drivers int  _fb_write(size_t offset, void* buffer, size_t length)
766{
767    int*                dma_address;
768    unsigned int        base            = (unsigned int)&seg_dma_base;
769    unsigned int        increment       = _segment_increment(DMA_SPAN*4);
770    char*               fb              = (char*)&seg_fb_base + offset;
771    unsigned int        delay           = (_proctime() & 0xF) << 4;
772    unsigned int        pid             = _procid();
773    unsigned int        i;
774
775
776    // checking buffer boundaries (bytes)
777//  if( ( (size_t)buffer + length ) >= 0x80000000 ) return -1;
778//  if( ( (size_t)buffer          ) >= 0x80000000 ) return -1;
779
780    // waiting until DMA device is available
781    while (_dma_busy[pid] != 0)
782    {
783        for( i=0 ; i<delay ; i++)   // busy waiting
784        {                           // with a pseudo random
785            asm volatile("nop");    // delay between bus accesses
786        }
787    }
788    _dma_busy[pid] = 1;
789
790    dma_address = (int*)(base + increment);
791 
792    // DMA configuration
793    dma_address[DMA_IRQ_DISABLE] = 0;
794    dma_address[DMA_SRC]        = (int)buffer;
795    dma_address[DMA_DST]        = (int)fb;
796    dma_address[DMA_LEN]        = (int)length;
797    return 0;
798}
799///////////////////////////////////////////////////////////////////////////////////////
800//  _fb_read()
801// Transfer data from the frame_buffer device to an user buffer using DMA.
802// - offset     : offset (in bytes) in the frame buffer
803// - buffer : base address of the memory buffer
804// - length : number of bytes to be transfered
805//////////////////////////////////////////////////////////////////////////////////////
806in_drivers int  _fb_read(size_t offset, void* buffer, size_t length)
807{
808    int*                dma_address;
809    unsigned int        base            = (unsigned int)&seg_dma_base; 
810    unsigned int        increment       = _segment_increment(DMA_SPAN*4);
811    char*               fb              = (char*)&seg_fb_base + offset;
812    unsigned int        delay           = (_proctime() & 0xF) << 4;
813    unsigned int        pid             = _procid();
814    unsigned int        i;
815
816    // checking buffer boundaries (bytes)
817//  if( ( (size_t)buffer + length ) >= 0x80000000 ) return -1;
818//  if( ( (size_t)buffer          ) >= 0x80000000 ) return -1;
819
820    // waiting until DMA device is available
821    while (_dma_busy[pid] != 0)
822    {
823        for( i=0 ; i<delay ; i++)   // busy waiting
824        {                           // with a pseudo random
825            asm volatile("nop");    // delay between bus accesses
826        }
827    }
828    _dma_busy[pid] = 1;
829
830    dma_address = (int*)(base + increment);
831
832    // DMA configuration
833    dma_address[DMA_IRQ_DISABLE] = 0;
834    dma_address[DMA_SRC]        = (int)fb;
835    dma_address[DMA_DST]        = (int)buffer;
836    dma_address[DMA_LEN]        = (int)length;
837    return 0;
838}
839///////////////////////////////////////////////////////////////////////////////////////
840//  _fb_completed()
841// This blocking function cheks completion of a DMA transfer to or fom the frame buffer.
842// The MIPS32 wait instruction stall the processor until the next interrupt.
843// It returns 0 if the transfer is successfully completed
844// It returns -1 if an error has been reported.
845///////////////////////////////////////////////////////////////////////////////////////
846in_drivers int _fb_completed()
847{
848    unsigned int        pid = _procid();
849
850    while (_dma_busy[pid] != 0)
851    {
852        asm volatile("nop");
853    }
854    if(_dma_status[pid] == DMA_SUCCESS)  return 0;
855    else                                 return _dma_status[pid];
856}
857//////////////////////////////////////////////////////////////////////////////////////
858// _barrier_init()
859// This function makes a cooperative initialisation of the barrier:
860// Several tasks can try to initialize the barrier, but the initialisation
861// is done by only one task, using LL/SC instructions.
862//////////////////////////////////////////////////////////////////////////////////////
863in_drivers int _barrier_init(unsigned int index, unsigned int value)
864{
865
866    register int* pinit         = (int*)&_barrier_initial_value[index];
867    register int* pcount        = (int*)&_barrier_count[index];
868
869    if ( index > 7 )    return 1;
870
871    // parallel initialisation using atomic instructions LL/SC
872    asm volatile ("_barrier_init_test:                  \n"
873                  "ll   $2,     0(%0)                   \n"     // read initial value
874                  "bnez $2,     _barrier_init_done      \n"
875                  "move $3,     %2                      \n"
876                  "sc   $3,     0(%0)                   \n"     // try to write initial value
877                  "beqz $3,     _barrier_init_test      \n"
878                  "move $3,     %2                      \n"
879                  "sw   $3,     0(%1)                   \n"     // write count
880                  "_barrier_init_done:                  \n"
881                  ::"r"(pinit),"r"(pcount),"r"(value):"$2","$3");
882    return 0 ;
883}
884//////////////////////////////////////////////////////////////////////////////////////
885//      _barrier_wait()
886// This blocking function uses a busy_wait technics (on the counter value),
887// because the GIET does not support dynamic scheduling/descheduling of tasks.
888// In the busy waiting state, each task uses a pseudo-random delay between
889// two successive read of the barrier counter in order to avoid bus saturation.
890// the average delay is about 1000 cycles.
891// There is at most 8 independant barriers, and an error is returned
892// if the barrier index is larger than 7.
893//////////////////////////////////////////////////////////////////////////////////////
894in_drivers int _barrier_wait(unsigned int index)
895{
896    register int*       pcount          = (int*)&_barrier_count[index];         
897    register int        maxcount        = _barrier_initial_value[index]; 
898    register int        count;
899
900    if ( index > 7 )    return 1;
901
902    // parallel decrement barrier counter using atomic instructions LL/SC
903    // input : pointer on the barrier counter
904    // output : counter value
905    asm volatile ("_barrier_decrement:                          \n"
906                  "ll   %0,     0(%1)                           \n"
907                  "addi $3,     %0,     -1                      \n"
908                  "sc   $3,     0(%1)                           \n"
909                  "beqz $3,     _barrier_decrement              \n"
910                  :"=r"(count):"r"(pcount):"$2","$3");
911
912    // the last task re-initializes the barrier counter
913    // to the max value, waking up all other waiting tasks
914
915    if ( count == 1 )    // last task
916    {
917        *pcount = maxcount;
918        return 0;
919    }
920    else                // other tasks
921    {
922        while ( *pcount != maxcount )   { }     // busy waiting
923        return 0 ;
924    }
925} 
926//////////////////////////////////////////////////////////////////////////////////////
927
928
929// Local Variables:
930// tab-width: 4;
931// c-basic-offset: 4;
932// c-file-offsets:((innamespace . 0)(inline-open . 0));
933// indent-tabs-mode: nil;
934// End:
935//
936// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4
937
Note: See TracBrowser for help on using the repository browser.