- Timestamp:
- Feb 12, 2014, 9:51:23 AM (11 years ago)
- Location:
- trunk/softs
- Files:
-
- 11 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/softs/giet_tsar/giet.S
r622 r629 169 169 /* It depends on both the cluster_xy & local_id, */ 170 170 /* and we must use the physical address extension */ 171 mfc0 $10, $15, 1 /* $10 <= proc_id*/172 andi $10, $10, 0x3FF /* at most 1024 processors*/171 mfc0 $10, $15, 1 /* $10 <= proc_id */ 172 andi $10, $10, 0x3FF /* at most 1024 processors */ 173 173 li $11, NB_PROCS_MAX 174 174 divu $10, $11 175 mflo $12 /* $12 <= cluster_xy */176 mfhi $13 /* $13 <= local_id */177 178 li $7, 0b011110000000 /* $7 <= PRIO offset */ 179 sll $8, $13, 2 /* $8 <= local_id*4*/180 addu $9, $7, $8 /* $9 <= PRIO offset + local_id*4*/181 la $27, seg_xcu_base182 addu $26, $9, $ 27/* $26 <= seg_icu_base + PRIO offset + local_id*4 */183 184 /* XCU[cluster_xy] access to get PRIO register value */185 mtc2 $12, $24 /* set PADDR extension */186 lw $1 4, ($26) /* $14 <= PRIO register value*/187 mtc2 $0, $24 /* reset PADDR extension */188 189 /* test PTI, then HWI, then WTI */190 andi $27, $1 4, 0x1 /* test bit T in PRIO register*/191 bne $27, $0, _int_PTI /* branch to PTI handler */192 andi $27, $1 4, 0x2 /* test bit W in PRIO register*/193 bne $27, $0, _int_HWI /* branch to HWI handler */194 andi $27, $1 4, 0x4 /* test bit W in PRIO register*/195 bne $27, $0, _int_WTI /* branch to IPI handler*/175 mflo $12 /* $12 <= cluster_xy */ 176 mfhi $13 /* $13 <= local_id */ 177 la $14, seg_xcu_base /* $14 <= seg_xcu_base */ 178 179 li $7, 0b011110000000 /* $7 <= PRIO offset */ 180 sll $8, $13, 2 /* $8 <= local_id*4 */ 181 addu $9, $7, $8 /* $9 <= PRIO offset + local_id*4 */ 182 addu $26, $9, $14 /* $26 <= seg_icu_base + PRIO offset + local_id*4 */ 183 184 /* XCU[cluster_xy] access to get PRIO register value */ 185 mtc2 $12, $24 /* set PADDR extension */ 186 lw $15, ($26) /* $15 <= PRIO register value */ 187 mtc2 $0, $24 /* reset PADDR extension */ 188 189 /* test PTI, then HWI, then WTI */ 190 andi $27, $15, 0x1 /* test bit T in PRIO register */ 191 bne $27, $0, _int_PTI /* branch to PTI handler */ 192 andi $27, $15, 0x2 /* test bit W in PRIO register */ 193 bne $27, $0, _int_HWI /* branch to HWI handler */ 194 andi $27, $15, 0x4 /* test bit W in PRIO register */ 195 bne $27, $0, _int_WTI /* branch to WTI handler */ 196 196 197 197 /* exit interrupt handler: restore registers */ 198 198 _int_restore: 199 199 .set noat 200 lw $1, 4*4($29) /* restore $1 */200 lw $1, 4*4($29) 201 201 .set at 202 lw $2, 4*5($29) /* restore $2 */203 lw $3, 4*6($29) /* restore $3 */204 lw $4, 4*7($29) /* restore $4 */205 lw $5, 4*8($29) /* restore $5 */206 lw $6, 4*9($29) /* restore $6 */207 lw $7, 4*10($29) /* restore $7 */208 lw $8, 4*11($29) /* restore $8 */209 lw $9, 4*12($29) /* restore $9 */210 lw $10, 4*13($29) /* restore $10 */211 lw $11, 4*14($29) /* restore $11 */212 lw $12, 4*15($29) /* restore $12 */213 lw $13, 4*16($29) /* restore $13 */214 lw $14, 4*17($29) /* restore $14 */215 lw $15, 4*18($29) /* restore $15 */216 lw $24, 4*19($29) /* restore $24 */217 lw $25, 4*20($29) /* restore $25 */218 lw $31, 4*21($29) /* restore $31 */202 lw $2, 4*5($29) 203 lw $3, 4*6($29) 204 lw $4, 4*7($29) 205 lw $5, 4*8($29) 206 lw $6, 4*9($29) 207 lw $7, 4*10($29) 208 lw $8, 4*11($29) 209 lw $9, 4*12($29) 210 lw $10, 4*13($29) 211 lw $11, 4*14($29) 212 lw $12, 4*15($29) 213 lw $13, 4*16($29) 214 lw $14, 4*17($29) 215 lw $15, 4*18($29) 216 lw $24, 4*19($29) 217 lw $25, 4*20($29) 218 lw $31, 4*21($29) 219 219 lw $27, 4*22($29) /* get EPC */ 220 220 addiu $29, $29, 23*4 /* restore SP */ … … 222 222 eret /* exit GIET */ 223 223 224 /* The PTI handler get PTI index, */ 225 /* acknowledge the PTI register */ 226 /* and call the corresponding ISR */ 224 227 _int_PTI: 225 srl $26, $14, 6 /* $26 <= (PRIO>>6 = PTI index) */ 226 j _int_call_isr 227 nop 228 229 _int_HWI: 230 srl $26, $14, 14 /* $26 <= (PRIO>>14 = HWI index) */ 231 j _int_call_isr 232 nop 233 234 _int_WTI: 235 srl $26, $14, 22 /* $26 <= (PRIO>>22 = WTI index) */ 236 j _int_call_isr 237 nop 238 239 /* Call the relevant ISR */ 240 _int_call_isr: 241 andi $26, $26, 0x7C /* $26 <= interrupt_index * 4 */ 228 srl $26, $15, 6 /* $26 <= PRIO >> 6 */ 229 andi $26, $26, 0x7C /* $26 <= PTI_INDEX * 4 */ 230 addi $27, $14, 0x180 /* $27 <= &PTI_ACK[0] */ 231 add $27, $27, $26 /* $27 <= &PTI_ACK[PTI_INDEX] */ 232 lw $0, ($27) /* acknowledge XICU PTI */ 242 233 la $27, _interrupt_vector 243 234 addu $26, $26, $27 244 lw $26, ($26) /* read ISR address */ 245 jalr $26 /* call ISR */ 246 nop 247 j _int_restore 248 nop 249 235 lw $26, ($26) /* read ISR address */ 236 jalr $26 /* call ISR */ 237 nop 238 j _int_restore /* return from INT handler */ 239 nop 240 241 /* The HWI handler get HWI index */ 242 /* and call the corresponding ISR */ 243 _int_HWI: 244 srl $26, $15, 14 /* $26 <= PRIO >> 14 */ 245 andi $26, $26, 0x7C /* $26 <= HWI_INDEX * 4 */ 246 la $27, _interrupt_vector 247 addu $26, $26, $27 /* $26 <= &ISR[HWI_INDEX */ 248 lw $26, ($26) /* read ISR address */ 249 jalr $26 /* call ISR */ 250 nop 251 j _int_restore /* return from INT handler */ 252 nop 253 254 /* The WTI handler get WTI index, */ 255 /* acknowledge the WTI register */ 256 /* and call the corresponding ISR */ 257 _int_WTI: 258 srl $26, $15, 22 /* $26 <= PRIO >> 22 */ 259 andi $26, $26, 0x7C /* $26 <= WTI_INDEX * 4 */ 260 add $27, $14, $26 /* $27 <= &WTI_REG[WTI_INDEX] */ 261 lw $0, ($27) /* acknowledge XICU WTI */ 262 la $27, _interrupt_vector 263 addu $26, $26, $27 /* $26 <= &ISR[WTI_INDEX] */ 264 lw $26, ($26) /* read ISR address */ 265 jalr $26 /* call ISR */ 266 nop 267 j _int_restore /* return from INT handler */ 268 nop 269 250 270 /* The default ISR is called when no specific ISR has been installed */ 251 271 /* in the interrupt vector. It simply displays a message on TTY0 */ -
trunk/softs/giet_tsar/reset.S
r622 r629 10 10 * 11 11 * As we don't want to use the virtual memory, the physical address is 12 * equal to the virtual address (identity mapping) and all processors use 13 * the physical memory bank in cluster 0. Both the reset base address and 14 * the kernel base address can be redefined to use a physical memory bank 15 * smaller than 4 Gbytes. 12 * equal to the virtual address (identity mapping) and all processors stacks 13 * and code segments are allocated in the physical memory bank in cluster 0. 14 * 15 * Both the reset base address and the kernel base address must be redefined 16 * to use a physical memory bank smaller than 2 Gbytes. 16 17 * 17 18 * There is one XCU iand one MMC per cluster. 18 * All other peripherals (including the boot ROM) are located in cluster 0. 19 * Only two HWI interrupts are supported: 20 * - IRQ_IN[0] IOC 21 * - IRQ_IN[12] MMC 19 * 20 * There is one IOPIC component in cluster_io. 21 * 22 * There is two sets of peripherals: 23 * 24 * 1) A block device and a single channel TTY controller are available 25 * in cluster(0,0). 26 * 27 * 2) Other peripherals (including another Blockdevice, a multi-channels TTY 28 * contrÃŽler, a Frame buffer) are located in cluster_io. 29 * For those externals peripherals, hardware interrupts (HWI) are translated 30 * to software interrupts (WTI) by and IOPIC component, that is programmed 31 * to route all SWI to to processor 0 in cluster (0,0). 22 32 * 23 33 * The boot sequence is the following: … … 25 35 * - Each processor initializes the CP0 EBASE register 26 36 * - Only processor 0 initializes the Interrupt vector. 37 * - Only processor 0 initializes the IOPIC component. 27 38 * - Each processor initializes its private XCU mask. 28 39 * - Each processor initializes the Status Register (SR) … … 37 48 .extern seg_stack_base 38 49 .extern seg_xcu_base 50 .extern seg_pic_base 39 51 .extern seg_kcode_base 40 52 .extern _interrupt_vector 41 53 .extern _ioc_isr 42 54 .extern _mmc_isr 55 .extern _tty_isr 43 56 .extern main 44 57 … … 62 75 la $27, seg_stack_base 63 76 addi $26, $10, 1 /* $26 <= (proc_id + 1) */ 64 sll $26, $26, 1 6 /* $26 <= (proc_id + 1) * 64K */77 sll $26, $26, 14 /* $26 <= (proc_id + 1) * 16K */ 65 78 addu $29, $27, $26 /* $29 <= seg_stack_base(proc_id) */ 66 79 … … 69 82 mtc0 $26, CP0_EBASE /* CP0_EBASE <= seg_kcode_base */ 70 83 71 /* only proc (0,0,0) initializes interrupt vector */84 /* only proc (0,0,0) initializes interrupt vector for IOC, TTY, MMC */ 72 85 bne $10, $0, reset_xcu 73 86 nop 74 87 75 la $26, _interrupt_vector /* interrupt vector address */ 88 la $26, _interrupt_vector /* interrupt vector address */ 89 la $27, _mmc_isr 90 sw $27, 32($26) /* interrupt_vector[8] <= _mmc_isr */ 76 91 la $27, _ioc_isr 77 sw $27, 0($26) /* interrupt_vector[0] <= _isr_ioc */ 78 la $27, _mmc_isr 79 sw $27, 48($26) /* interrupt_vector[12] <= _isr_mmc */ 80 92 sw $27, 36($26) /* interrupt_vector[9] <= _ioc_isr */ 93 la $27, _tty_isr 94 sw $27, 40($26) /* interrupt_vector[10] <= _tty_isr */ 95 96 /* only proc (0,0,0) initializes IOPIC : IOPIC_ADDRESS[i] <= &XICU[0].WTI_REG[i] */ 97 98 li $20, X_SIZE 99 addi $20, $20, -1 100 sll $20, $20, 4 101 li $21, Y_SIZE 102 add $22, $20, $21 /* $22 <= cluster(X_SIZE-1, Y_SIZE) */ 103 104 mtc2 $22, CP2_PADDR_EXT /* CP2_PADDR_EXT <= cluster_io */ 105 106 li $24, 16 /* $24 iteration (de)counter */ 107 la $27, seg_xcu_base /* $27 <= &(XICU[0].WTI_REG[0]) */ 108 la $26, seg_pic_base /* $26 <= &IOPIC_ADDRESS[0] */ 109 110 reset_loop: 111 sw $27, 0($26) /* IOPIC_ADDRESS[i] <= &XICU[0].WTI_REG[i] */ 112 addi $24, $24, -1 /* decrement iteration index */ 113 addi $27, $27, 4 /* $27 <= &(XICU[0].WTI_REG[i++] */ 114 addi $26, $26, 16 /* $26 <= &IOPIC_ADDRESS[i++] */ 115 bne $24, $0, reset_loop 116 nop 117 118 mtc2 $0, CP2_PADDR_EXT /* CP2_PADDR_EXT <= zero */ 119 81 120 reset_xcu: 82 121 83 /* only proc (x,y,0) receive IRQs and initialise its private XCU mask*/122 /* only proc (x,y,0) receive IRQs and initialise HWI and WTI XICU masks */ 84 123 bne $11, $0, reset_end 85 124 nop 86 125 la $26, seg_xcu_base 87 li $27, 0b010010000000 /* offset for MSK_HWI_ENABLE & lpid == 0 */ 88 addu $24, $26, $27 /* $24 <= &MASK */ 89 li $25, 0x00001001 /* IOC: IRQ[0] / MEMC: IRQ[12] */ 90 sw $25, 0($24) /* set MASK */ 126 li $27, 0b010010000000 /* offset for MSK_HWI_ENABLE[lpid == 0] */ 127 addu $24, $26, $27 /* $24 <= &HWI_MASK */ 128 li $25, 0x0700 /* TTY:HWI[10] IOC:HWI[9] MEMC:HWI[8] */ 129 sw $25, 0($24) /* set HWI mask */ 130 131 li $27, 0b011010000000 /* offset for MSK_WTI_ENABLE[lpid == 0] */ 132 addu $24, $26, $27 /* $24 <= $WTI_MASK */ 133 li $25, 0xFFFFFFFF /* all WTI enabled */ 134 sw $25, 0($24) /* set WTI mask */ 91 135 92 136 reset_end: -
trunk/softs/giet_tsar/stdio.c
r626 r629 4 4 // Date : janvier 2014 5 5 // 6 // This file define varions functions that can be used by applications to access6 // This file defines various functions that can be used by applications to access 7 7 // peripherals, for the TSAR multi-processors multi_clusters architecture. 8 8 // There is NO separation between application code and system code, as the … … 22 22 // - NB_PROCS_MAX : max number of processor per cluster 23 23 // - NB_TTY_CHANNELS : max number of TTY channels 24 // - USE_EXT_IO : use external peripherals if not zero 24 25 // 25 26 // The follobing base addresses must be defined in the ldscript … … 31 32 #include "stdio.h" 32 33 34 #if !defined(NB_PROCS_MAX) 35 #error: you must define NB_PROCS_MAX in the hard_config.h file 36 #endif 37 38 #if !defined(USE_EXT_IO) 39 #error: you must define USE_EXT_IO in the hard_config.h file 40 #endif 41 42 #if !defined(X_SIZE) 43 #error: you must define X_SIZE in the hard_config.h file 44 #endif 45 46 #if !defined(Y_SIZE) 47 #error: you must define Y_SIZE in the hard_config.h file 48 #endif 49 50 #if !defined(X_WIDTH) 51 #error: you must define X_WIDTH in the hard_config.h file 52 #endif 53 54 #if (X_WIDTH != 4) 55 #error: The X_WIDTH parameter must be equal to 4 56 #endif 57 58 #if !defined(Y_WIDTH) 59 #error: you must define X_WIDTH in the hard_config.h file 60 #endif 61 62 #if (X_WIDTH != 4) 63 #error: The Y_WIDTH parameter must be equal to 4 64 #endif 65 66 #if !defined(NB_TTY_CHANNELS) 67 #error: you must define NB_TTY_CHANNELS in the hard_config.h file 68 #endif 69 70 71 72 33 73 #define NB_LOCKS 256 34 74 #define NB_BARRIERS 16 … … 74 114 75 115 //////////////////////////////////////////////////////////////////////////////////////// 76 // Taken from MutekH.116 // Memcopy taken from MutekH. 77 117 //////////////////////////////////////////////////////////////////////////////////////// 78 118 in_drivers void* _memcpy( void* _dst, … … 100 140 return _dst; 101 141 } 102 142 //////////////////////////////////////////////////////////////////////////////////////// 143 // Memcopy using extended addresses 144 //////////////////////////////////////////////////////////////////////////////////////// 145 in_drivers void _extended_memcpy( unsigned int dst_cluster, 146 unsigned int dst_address, 147 unsigned int src_cluster, 148 unsigned int src_address, 149 unsigned int length ) 150 { 151 if ( (dst_address & 0x3) || (src_address & 0x3) || (length & 0x3) ) 152 { 153 _tty_get_lock( 0 ); 154 _tty_puts( "ERROR in _extended_memcpy()" ); 155 _tty_release_lock( 0 ); 156 _exit(); 157 } 158 159 unsigned int i; 160 unsigned int word; 161 162 for ( i = 0 ; i < length ; i = i+4 ) 163 { 164 word = _word_extended_read( src_cluster, (src_address + i) ); 165 _word_extended_write( dst_cluster, (dst_address + i), word ); 166 } 167 } 103 168 //////////////////////////////////////////////////////////////////////////////////////// 104 169 // Access CP0 and returns processor ident … … 179 244 } 180 245 181 /////////////////////////////////////////////////////////////////////////////////////// 182 // Exit (suicide) after printing message on a TTY terminal. 246 //////////////////////////////////////////////////////////////////////////// 247 // This function makes a physical read access to a 32 bits word in memory, 248 // after a temporary paddr extension. 249 //////////////////////////////////////////////////////////////////////////// 250 in_drivers unsigned int _word_extended_read( unsigned int cluster, 251 unsigned int address ) 252 { 253 unsigned int value; 254 asm volatile( 255 "li $3, 0xFFFFFFFE \n" 256 "mfc0 $2, $12 \n" 257 "and $3, $2, $3 \n" 258 "mtc0 $3, $12 \n" /* IRQ disabled */ 259 260 "mtc2 %2, $24 \n" /* PADDR_EXT <= msb */ 261 "lw %0, 0(%1) \n" /* value <= *paddr */ 262 "mtc2 $0, $24 \n" /* PADDR_EXT <= 0 */ 263 264 "li $3, 0x00000001 \n" 265 "mfc0 $2, $12 \n" 266 "or $3, $3, $2 \n" 267 "mtc0 $3, $12 \n" /* IRQ enabled */ 268 : "=r" (value) 269 : "r" (address), "r" (cluster) 270 : "$2", "$3" ); 271 return value; 272 } 273 //////////////////////////////////////////////////////////////////////////// 274 // This function makes a physical read access to a single byte in memory, 275 // after a temporary paddr extension. 276 //////////////////////////////////////////////////////////////////////////// 277 in_drivers unsigned char _byte_extended_read( unsigned int cluster, 278 unsigned int address ) 279 { 280 unsigned int value; 281 asm volatile( 282 "li $3, 0xFFFFFFFE \n" 283 "mfc0 $2, $12 \n" 284 "and $3, $2, $3 \n" 285 "mtc0 $3, $12 \n" /* IRQ disabled */ 286 287 "mtc2 %2, $24 \n" /* PADDR_EXT <= msb */ 288 "lb %0, 0(%1) \n" /* value <= *paddr */ 289 "mtc2 $0, $24 \n" /* PADDR_EXT <= 0 */ 290 291 "li $3, 0x00000001 \n" 292 "mfc0 $2, $12 \n" 293 "or $3, $3, $2 \n" 294 "mtc0 $3, $12 \n" /* IRQ enabled */ 295 : "=r" (value) 296 : "r" (address), "r" (cluster) 297 : "$2", "$3" ); 298 return (unsigned char)value; 299 } 300 //////////////////////////////////////////////////////////////////////////// 301 // This function makes a physical write access to a 32 bits word in memory, 302 // after a temporary DTLB address extension. 303 //////////////////////////////////////////////////////////////////////////// 304 in_drivers void _word_extended_write( unsigned int cluster, 305 unsigned int address, 306 unsigned int word ) 307 { 308 asm volatile( 309 "li $3, 0xFFFFFFFE \n" 310 "mfc0 $2, $12 \n" 311 "and $3, $2, $3 \n" 312 "mtc0 $3, $12 \n" /* IRQ disabled */ 313 314 "mtc2 %2, $24 \n" /* PADDR_EXT <= msb */ 315 "sw %0, 0(%1) \n" /* *paddr <= value */ 316 "mtc2 $0, $24 \n" /* PADDR_EXT <= 0 */ 317 318 "li $3, 0x00000001 \n" 319 "mfc0 $2, $12 \n" 320 "or $3, $2, $3 \n" 321 "mtc0 $3, $12 \n" /* IRQ enabled */ 322 : 323 : "r" (word), "r" (address), "r" (cluster) 324 : "$2", "$3"); 325 } 326 //////////////////////////////////////////////////////////////////////////// 327 // This function makes a physical write access to single byte in memory, 328 // after a temporary DTLB de-activation and address extension. 329 //////////////////////////////////////////////////////////////////////////// 330 in_drivers void _byte_extended_write( unsigned int cluster, 331 unsigned int address, 332 unsigned char byte ) 333 { 334 asm volatile( 335 "li $3, 0xFFFFFFFE \n" 336 "mfc0 $2, $12 \n" 337 "and $3, $2, $3 \n" 338 "mtc0 $3, $12 \n" /* IRQ disabled */ 339 340 "mtc2 %2, $24 \n" /* PADDR_EXT <= msb */ 341 "sb %0, 0(%1) \n" /* *paddr <= value */ 342 "mtc2 $0, $24 \n" /* PADDR_EXT <= 0 */ 343 344 "li $3, 0x00000001 \n" 345 "mfc0 $2, $12 \n" 346 "or $3, $2, $3 \n" 347 "mtc0 $3, $12 \n" /* IRQ enabled */ 348 : 349 : "r" (byte), "r" (address), "r" (cluster) 350 : "$2", "$3"); 351 } 352 353 /////////////////////////////////////////////////////////////////////////////////////// 354 // Exit (suicide) after printing message on TTY0 183 355 /////////////////////////////////////////////////////////////////////////////////////// 184 356 in_drivers void _exit() … … 189 361 unsigned int y = (proc_id / NB_PROCS_MAX) & ((1<<Y_WIDTH) - 1); 190 362 191 _tty_printf("\n\n!!! Exit Processor (%d,%d,%d) !!!\n", x, y, l ); 363 _tty_get_lock( 0 ); 364 _tty_puts("\n !!! exit proc["); 365 _tty_putd( x ); 366 _tty_puts(","); 367 _tty_putd( y ); 368 _tty_puts(","); 369 _tty_putd( l ); 370 _tty_puts("] !!!\n"); 371 _tty_release_lock( 0 ); 192 372 193 373 while(1) asm volatile("nop"); // infinite loop... … … 227 407 /////////////////////////////////////////////////////////////////////////////////////// 228 408 // The total number of TTY terminals is defined by NB_TTY_CHANNELS. 229 // 1. If there is only one terminal, it is supposed to be shared, and used by 230 // all processors: a lock must be taken before display. 231 // 2. If there is several terminals, and the number of processors is smaller 232 // than the number of terminals, there is one terminal per processor, but 233 // the TTY index is not equal to the proc_id, due to cluster indexing policy: 234 // - proc_id = cluster_xy * NB_PROCS_MAX + local_id (with cluster_xy = x << Y_WIDTH + y) 235 // - tty_id = cluster_id * NB_PROCS_MAX + local_id (with cluster_id = x * Y_SIZE + y) 236 // 3. If the computed tty_id is larger than NB_TTY_CHANNELS, an error is returned. 409 // - If there is only one terminal, it is supposed to be shared, and used by 410 // all processors: a lock must be taken before display. 411 // - If there is several terminals, and the number of processors is smaller 412 // than the number of terminals, there is one terminal per processor, but 413 // the TTY index is not equal to the proc_id, due to cluster indexing policy: 414 // proc_id = cluster_xy * NB_PROCS_MAX + local_id (with cluster_xy = x << Y_WIDTH + y) 415 // tty_id = cluster_id * NB_PROCS_MAX + local_id (with cluster_id = x * Y_SIZE + y) 416 // - If the computed tty_id is larger than NB_TTY_CHANNELS, an error is returned. 417 /////////////////////////////////////////////////////////////////////////////////////// 418 // If USE_EXT_IO is set, we use the TTY controler implemented in cluster_io 419 // (x = X_SIZE-1 / y = Y_SIZE), which requires and extended address access. 420 // If USE_EXT_IO not set, we use the single channel TTY contrÃŽler in cluster (0,0). 421 /////////////////////////////////////////////////////////////////////////////////////// 422 237 423 /////////////////////////////////////////////////////////////////////////////////////// 238 424 // Write one or several characters directly from a fixed length user buffer 239 425 // to the TTY_WRITE register of the TTY controler. 426 // The channel index must be checked by the calling function. 240 427 // This is a non blocking call : it test the TTY_STATUS register. 241 428 // If the TTY_STATUS_WRITE bit is set, the transfer stops and the function … … 246 433 unsigned int channel ) 247 434 { 248 char* tty_address; 249 unsigned int base = (unsigned int)&seg_tty_base; 250 unsigned int nwritten = 0; 251 int i; 252 253 tty_address = (char*)(base + channel*TTY_SPAN*4); 435 unsigned int base = (unsigned int)&seg_tty_base + channel*TTY_SPAN*4; 436 unsigned int nwritten = 0; 437 unsigned int cluster_io = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE; 438 unsigned int status; 439 unsigned int i; 254 440 255 441 for ( i=0 ; i < length ; i++ ) 256 442 { 257 if((tty_address[TTY_STATUS*4] & 0x2) == 0x2) break; 258 else 259 { 260 tty_address[TTY_WRITE*4] = buffer[i]; // write character 261 nwritten++; 443 if( USE_EXT_IO ) // extended addressing to reach cluster_io 444 { 445 status = _word_extended_read( cluster_io, base + TTY_STATUS*4 ); 446 if ( (status & 0x2) == 0x2 ) break; 447 else 448 { 449 _byte_extended_write( cluster_io, base + TTY_WRITE*4 , buffer[i] ); 450 nwritten++; 451 } 452 } 453 else // direct addressing to cluster(0,0) 454 { 455 char* tty = (char*)base; 456 if ( (tty[TTY_STATUS*4] & 0x2) == 0x2 ) break; 457 else 458 { 459 tty[TTY_WRITE*4] = buffer[i]; // write character 460 nwritten++; 461 } 262 462 } 263 463 } … … 265 465 return nwritten; 266 466 } 467 267 468 /////////////////////////////////////////////////////////////////////////////////////// 268 469 // Fetch one character directly from the TTY_READ register of the TTY controler, 269 470 // and writes this character to the user buffer. 471 // The channel index must be checked by the calling function. 270 472 // This is a non blocking call : it returns 0 if the register is empty, 271 473 // and returns 1 if the register is full. … … 274 476 unsigned int channel ) 275 477 { 276 char* tty_address; 277 unsigned int base = (unsigned int)&seg_tty_base; 278 279 tty_address = (char*)(base + channel*TTY_SPAN*4); 280 281 if((tty_address[TTY_STATUS*4] & 0x1) == 0x1) 282 { 283 buffer[0] = tty_address[TTY_READ*4]; 284 return 1; 478 unsigned int base = (unsigned int)&seg_tty_base + channel*TTY_SPAN*4; 479 unsigned int cluster_io = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE; 480 unsigned int status; 481 482 if( USE_EXT_IO ) 483 { 484 status = _word_extended_read( cluster_io, base + TTY_STATUS*4 ); 485 if ( (status & 0x1) == 0x1 ) 486 { 487 buffer[0] = (char)_word_extended_read( cluster_io, base + TTY_READ*4 ); 488 return 1; 489 } 490 else 491 { 492 return 0; 493 } 285 494 } 286 495 else 287 496 { 288 return 0; 289 } 290 } 497 char* tty = (char*)base; 498 499 if((tty[TTY_STATUS*4] & 0x1) == 0x1) 500 { 501 buffer[0] = tty[TTY_READ*4]; 502 return 1; 503 } 504 else 505 { 506 return 0; 507 } 508 } 509 } 510 291 511 ////////////////////////////////////////////////////////////////////////////// 292 512 // This function displays a string on TTY0. … … 356 576 in_drivers void _tty_get_lock( unsigned int channel ) 357 577 { 358 unsigned int* tty_address = (unsigned int *) &seg_tty_base; 359 while ( tty_address[channel * TTY_SPAN + TTY_CONFIG] ) asm volatile("nop"); 578 if ( USE_EXT_IO ) // extended addressing to cluster_io 579 { 580 unsigned int cluster_io = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE; 581 unsigned int address = (unsigned int)&seg_tty_base 582 + ((TTY_CONFIG + channel*TTY_SPAN)*4); 583 while ( _word_extended_read( cluster_io, address ) ) asm volatile("nop"); 584 } 585 else // direct addressing to cluster(0,0) 586 { 587 unsigned int* tty = (unsigned int *) &seg_tty_base; 588 while ( tty[channel * TTY_SPAN + TTY_CONFIG] ) asm volatile("nop"); 589 } 360 590 } 361 591 … … 366 596 in_drivers void _tty_release_lock( unsigned int channel ) 367 597 { 368 unsigned int* tty_address = (unsigned int *) &seg_tty_base; 369 tty_address[channel * TTY_SPAN + TTY_CONFIG] = 0; 598 if ( USE_EXT_IO ) // extended addressing to cluster_io 599 { 600 unsigned int cluster_io = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE; 601 unsigned int address = (unsigned int)&seg_tty_base 602 + ((TTY_CONFIG + channel*TTY_SPAN)*4); 603 _word_extended_write( cluster_io, address, 0 ); 604 } 605 else // direct addressing to cluster(0,0) 606 { 607 unsigned int* tty_address = (unsigned int *) &seg_tty_base; 608 tty_address[channel * TTY_SPAN + TTY_CONFIG] = 0; 609 } 370 610 } 371 611 … … 383 623 unsigned int y; 384 624 385 // compute TTY terminal index 386 if ( NB_TTY_CHANNELS == 1 ) 387 { 388 channel = 0; 389 } 390 else 391 { 392 l = (proc_id % NB_PROCS_MAX); 393 x = (proc_id / NB_PROCS_MAX) >> Y_WIDTH; 394 y = (proc_id / NB_PROCS_MAX) & ((1<<Y_WIDTH) - 1); 395 channel = (x * Y_SIZE + y) * NB_PROCS_MAX + l; 396 if (channel >= NB_TTY_CHANNELS ) 397 { 398 _tty_get_lock( 0 ); 399 _tty_puts( "ERROR in _tty_getc()\n" ); 400 _tty_release_lock( 0 ); 401 _exit(); 402 } 625 // check TTY channel 626 l = (proc_id % NB_PROCS_MAX); 627 x = (proc_id / NB_PROCS_MAX) >> Y_WIDTH; 628 y = (proc_id / NB_PROCS_MAX) & ((1<<Y_WIDTH) - 1); 629 channel = (x * Y_SIZE + y) * NB_PROCS_MAX + l; 630 if (channel >= NB_TTY_CHANNELS ) 631 { 632 _tty_get_lock( 0 ); 633 _tty_puts( "ERROR in _tty_getc(): TTY index too large\n" ); 634 _tty_release_lock( 0 ); 635 _exit(); 403 636 } 404 637 … … 433 666 unsigned int i; 434 667 unsigned int channel; 435 unsigned int l;436 668 unsigned int x; 437 669 unsigned int y; 438 439 // compute TTY terminal index 440 if ( NB_TTY_CHANNELS == 1 ) 441 { 442 channel = 0; 443 } 444 else 445 { 446 l = (proc_id % NB_PROCS_MAX); 447 x = (proc_id / NB_PROCS_MAX) >> Y_WIDTH; 448 y = (proc_id / NB_PROCS_MAX) & ((1<<Y_WIDTH) - 1); 449 channel = (x * Y_SIZE + y) * NB_PROCS_MAX + l; 450 if (channel >= NB_TTY_CHANNELS ) 451 { 452 _tty_get_lock( 0 ); 453 _tty_puts( "ERROR in _tty_getw()\n" ); 454 _tty_release_lock( 0 ); 455 _exit(); 456 } 670 unsigned int l; 671 672 // check TTY channel 673 l = (proc_id % NB_PROCS_MAX); 674 x = (proc_id / NB_PROCS_MAX) >> Y_WIDTH; 675 y = (proc_id / NB_PROCS_MAX) & ((1<<Y_WIDTH) - 1); 676 channel = (x * Y_SIZE + y) * NB_PROCS_MAX + l; 677 if (channel >= NB_TTY_CHANNELS ) 678 { 679 _tty_get_lock( 0 ); 680 _tty_puts( "ERROR in _tty_getw(): TTY index too large\n" ); 681 _tty_release_lock( 0 ); 682 _exit(); 457 683 } 458 684 … … 519 745 520 746 unsigned int channel; 521 unsigned int l;522 747 unsigned int x; 523 748 unsigned int y; 524 749 unsigned int proc_id = _procid(); 525 750 526 // compute TTY channel 527 if ( NB_TTY_CHANNELS == 1 ) 751 // compute TTY channel : 752 // if the number of TTY channels is smaller 753 // than the number of clusters, use TTY_0_0 754 // else, TTY channel <= cluster index 755 if ( NB_TTY_CHANNELS < (X_SIZE * Y_SIZE) ) 528 756 { 529 757 channel = 0; … … 531 759 else 532 760 { 533 l = (proc_id % NB_PROCS_MAX);534 761 x = (proc_id / NB_PROCS_MAX) >> Y_WIDTH; 535 762 y = (proc_id / NB_PROCS_MAX) & ((1<<Y_WIDTH) - 1); 536 channel = (x * Y_SIZE + y) * NB_PROCS_MAX + l; 537 if (channel >= NB_TTY_CHANNELS ) 538 { 539 _tty_get_lock( 0 ); 540 _tty_puts("ERROR in _tty_printf() for proc[" ); 541 _tty_putd( x ); 542 _tty_puts(","); 543 _tty_putd( y ); 544 _tty_puts(","); 545 _tty_putd( l ); 546 _tty_puts("] / TTY channel too large = "); 547 _tty_putd( channel ); 548 _tty_puts("\n"); 549 _tty_release_lock( 0 ); 550 _exit(); 551 } 763 channel = (x * Y_SIZE + y); 552 764 } 553 765 … … 640 852 ////////////////////////////////////////////////////////////////////////////////////// 641 853 // These functions are the ISRs that must be executed when an IRQ is activated 642 // by the TTY: _tty_isr_X is associated to channel [X].643 // It save the character in the communication buffer _tty_get_buf[X ],644 // and set the set/reset variable _tty_get_full[X ].854 // by the TTY: _tty_isr_XX is associated to TTY channel [XX]. 855 // It save the character in the communication buffer _tty_get_buf[XX], 856 // and set the set/reset variable _tty_get_full[XX]. 645 857 // A character is lost if the buffer is full when the ISR is executed. 646 858 ////////////////////////////////////////////////////////////////////////////////////// 647 859 in_drivers void _tty_isr_indexed(size_t index) 648 860 { 649 char* base = (char*)&seg_tty_base; 650 char* tty_address = (char*)(base + index*TTY_SPAN*4); 651 652 _tty_get_buf[index] = tty_address[TTY_READ*4]; // save character and reset IRQ 653 _tty_get_full[index] = 1; // signals character available 654 } 861 if ( USE_EXT_IO ) // extended addressing to TTY in cluster_io 862 { 863 unsigned int cluster = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE; 864 unsigned int base = (unsigned int)&seg_tty_base + 865 ((index*TTY_SPAN + TTY_READ)*4); 866 867 _tty_get_buf[index] = (char)_word_extended_read( cluster, base ); 868 } 869 else // direct addressing to TTY in cluster(0,0) 870 { 871 char* tty = (char*)&seg_tty_base + index*TTY_SPAN*4; 872 873 _tty_get_buf[index] = tty[TTY_READ*4]; // save character and reset IRQ 874 } 875 _tty_get_full[index] = 1; // signals character available 876 } 877 878 in_drivers void _tty_isr() { _tty_isr_indexed(0); } 655 879 656 880 in_drivers void _tty_isr_00() { _tty_isr_indexed(0); } … … 689 913 690 914 ////////////////////////////////////////////////////////////////////////////////////////// 691 // I/O BLOCK_DEVICE 692 // The three functions below use the three variables _ioc_lock _ioc_done, 915 // BLOCK_DEVICE (IOC) 916 ////////////////////////////////////////////////////////////////////////////////////////// 917 // The functions below use the three variables _ioc_lock _ioc_done, 693 918 // and _ioc_status for synchronisation. 694 919 // - As the IOC component can be used by several programs running in parallel, … … 705 930 // reset the _ioc_done variable to zero, and releases the _ioc_lock variable. 706 931 /////////////////////////////////////////////////////////////////////////////////////// 932 // If USE_EXT_IO is set, we use the IOC controler implemented in cluster_io 933 // (x = X_SIZE-1 / y = Y_SIZE), which requires and extended address access. 934 // If USE_EXT_IO not set, we use the IOC contrÃŽler in cluster (0,0). 935 /////////////////////////////////////////////////////////////////////////////////////// 936 937 /////////////////////////////////////////////////////////////////////////////////////// 707 938 // This blocking function is used by the _ioc_read() and _ioc_write() functions 708 939 // to get _ioc_lock using LL/SC. … … 720 951 ::"r"(plock):"$2","$3"); 721 952 } 953 722 954 ////////////////////////////////////////////////////////////////////////////////////// 723 955 // Transfer data from a memory buffer to the block_device. … … 725 957 // - buffer : base address of the memory buffer 726 958 // - count : number of blocks to be transfered 727 // The source buffer must be in user address space.959 // - ext : cluster index for the memory buffer 728 960 /////////////////////////////////////////////////////////////////////////////////////// 729 961 in_drivers void _ioc_write( size_t lba, … … 732 964 size_t ext ) 733 965 { 734 volatile unsigned int* ioc_address = (unsigned int*)&seg_ioc_base;735 736 966 // get the lock 737 967 _ioc_get_lock(); 738 968 739 // block_device configuration 740 ioc_address[BLOCK_DEVICE_BUFFER] = (unsigned int)buffer; 741 ioc_address[BLOCK_DEVICE_BUFFER_EXT] = ext; 742 ioc_address[BLOCK_DEVICE_COUNT] = count; 743 ioc_address[BLOCK_DEVICE_LBA] = lba; 744 ioc_address[BLOCK_DEVICE_IRQ_ENABLE] = 1; 745 ioc_address[BLOCK_DEVICE_OP] = BLOCK_DEVICE_WRITE; 746 } 969 if ( USE_EXT_IO ) // extended addressing to cluster_io 970 { 971 unsigned int cluster = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE; 972 unsigned int base = (unsigned int)&seg_ioc_base; 973 974 _word_extended_write( cluster, base + BLOCK_DEVICE_BUFFER*4, (unsigned int)buffer ); 975 _word_extended_write( cluster, base + BLOCK_DEVICE_BUFFER_EXT*4, ext ); 976 _word_extended_write( cluster, base + BLOCK_DEVICE_COUNT*4, count ); 977 _word_extended_write( cluster, base + BLOCK_DEVICE_LBA*4, lba ); 978 _word_extended_write( cluster, base + BLOCK_DEVICE_IRQ_ENABLE*4, 1 ); 979 _word_extended_write( cluster, base + BLOCK_DEVICE_OP*4, BLOCK_DEVICE_WRITE ); 980 } 981 else // direct addressing to cluster(0,0) 982 { 983 unsigned int* ioc = (unsigned int*)&seg_ioc_base; 984 985 ioc[BLOCK_DEVICE_BUFFER] = (unsigned int)buffer; 986 ioc[BLOCK_DEVICE_BUFFER_EXT] = ext; 987 ioc[BLOCK_DEVICE_COUNT] = count; 988 ioc[BLOCK_DEVICE_LBA] = lba; 989 ioc[BLOCK_DEVICE_IRQ_ENABLE] = 1; 990 ioc[BLOCK_DEVICE_OP] = BLOCK_DEVICE_WRITE; 991 } 992 } 993 747 994 /////////////////////////////////////////////////////////////////////////////////////// 748 995 // Transfer data from a file on the block device to a memory buffer. … … 750 997 // - buffer : base address of the memory buffer 751 998 // - count : number of blocks to be transfered 752 // The destination buffer must be in user address space. 753 // All cache lines corresponding to the the target buffer must be invalidated 754 // for cache coherence. 999 // - ext : cluster index for the memory buffer 755 1000 /////////////////////////////////////////////////////////////////////////////////////// 756 1001 in_drivers void _ioc_read( size_t lba, … … 759 1004 size_t ext ) 760 1005 { 761 volatile unsigned int* ioc_address = (unsigned int*)&seg_ioc_base;762 763 1006 // get the lock 764 1007 _ioc_get_lock(); 765 1008 766 // block_device configuration 767 ioc_address[BLOCK_DEVICE_BUFFER] = (unsigned int)buffer; 768 ioc_address[BLOCK_DEVICE_BUFFER_EXT] = ext; 769 ioc_address[BLOCK_DEVICE_COUNT] = count; 770 ioc_address[BLOCK_DEVICE_LBA] = lba; 771 ioc_address[BLOCK_DEVICE_IRQ_ENABLE] = 1; 772 ioc_address[BLOCK_DEVICE_OP] = BLOCK_DEVICE_READ; 773 } 1009 if ( USE_EXT_IO ) // extended addressing to cluster_io 1010 { 1011 unsigned int cluster = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE; 1012 unsigned int base = (unsigned int)&seg_ioc_base; 1013 1014 _word_extended_write( cluster, base + BLOCK_DEVICE_BUFFER*4, (unsigned int)buffer ); 1015 _word_extended_write( cluster, base + BLOCK_DEVICE_BUFFER_EXT*4, ext ); 1016 _word_extended_write( cluster, base + BLOCK_DEVICE_COUNT*4, count ); 1017 _word_extended_write( cluster, base + BLOCK_DEVICE_LBA*4, lba ); 1018 _word_extended_write( cluster, base + BLOCK_DEVICE_IRQ_ENABLE*4, 1 ); 1019 _word_extended_write( cluster, base + BLOCK_DEVICE_OP*4, BLOCK_DEVICE_READ ); 1020 } 1021 else // direct addressing to cluster(0,0) 1022 { 1023 unsigned int* ioc = (unsigned int*)&seg_ioc_base; 1024 1025 ioc[BLOCK_DEVICE_BUFFER] = (unsigned int)buffer; 1026 ioc[BLOCK_DEVICE_BUFFER_EXT] = ext; 1027 ioc[BLOCK_DEVICE_COUNT] = count; 1028 ioc[BLOCK_DEVICE_LBA] = lba; 1029 ioc[BLOCK_DEVICE_IRQ_ENABLE] = 1; 1030 ioc[BLOCK_DEVICE_OP] = BLOCK_DEVICE_READ; 1031 } 1032 } 1033 774 1034 /////////////////////////////////////////////////////////////////////////////////////// 775 1035 // This blocking function cheks completion of an I/O transfer and reports errors. … … 795 1055 } 796 1056 } 1057 797 1058 ////////////////////////////////////////////////////////////////////////////////////// 798 1059 // This ISR must be executed when an IRQ is activated by IOC to signal completion. … … 803 1064 in_drivers void _ioc_isr() 804 1065 { 805 int* ioc_address = (int*)&seg_ioc_base; 1066 if ( USE_EXT_IO ) // extended addressing to cluster_io 1067 { 1068 unsigned int cluster = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE; 1069 unsigned int base = (unsigned int)&seg_ioc_base; 1070 1071 _ioc_status = _word_extended_read( cluster, base + BLOCK_DEVICE_STATUS*4 ); 1072 } 1073 else // direct addressing to cluster(Ã ,0) 1074 { 1075 unsigned int* ioc = (unsigned int*)&seg_ioc_base; 806 1076 807 _ioc_status = ioc_address[BLOCK_DEVICE_STATUS]; // save status & reset IRQ 808 _ioc_done = 1; // signals completion 1077 _ioc_status = ioc[BLOCK_DEVICE_STATUS]; // save status & reset IRQ 1078 } 1079 _ioc_done = 1; // signals completion 809 1080 } 810 1081 … … 825 1096 ////////////////////////////////////////////////////////////////////////////////////// 826 1097 // FRAME_BUFFER 1098 ////////////////////////////////////////////////////////////////////////////////////// 827 1099 // The _fb_sync_write & _fb_sync_read functions use a memcpy strategy to implement 828 1100 // the transfer between a data buffer and the frame buffer. 829 1101 // They are blocking until completion of the transfer. 830 1102 ////////////////////////////////////////////////////////////////////////////////////// 1103 1104 ////////////////////////////////////////////////////////////////////////////////////// 831 1105 // _fb_sync_write() 832 1106 // Transfer data from an user buffer to the frame_buffer device with a memcpy. 833 // - offset 1107 // - offset : offset (in bytes) in the frame buffer 834 1108 // - buffer : base address of the memory buffer 835 1109 // - length : number of bytes to be transfered 836 ////////////////////////////////////////////////////////////////////////////////////// 837 in_drivers void _fb_sync_write( size_t offset, 838 void* buffer, 839 size_t length, 840 size_t ext ) 841 { 842 volatile char* fb = (char*)(void*)&seg_fbf_base + offset; 843 char* ub = buffer; 844 845 _memcpy( (void*)fb, (void*)ub, length ); 846 } 1110 // - ext : cluster_xy for the user buffer 1111 ////////////////////////////////////////////////////////////////////////////////////// 1112 in_drivers void _fb_sync_write( unsigned int offset, 1113 unsigned int buffer, 1114 unsigned int length, 1115 unsigned int ext ) 1116 { 1117 unsigned int src_address = buffer; 1118 unsigned int src_cluster = ext; 1119 unsigned int dst_address = (unsigned int)&seg_fbf_base + offset; 1120 unsigned int dst_cluster = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE; // cluster_xy for I/O 1121 1122 _extended_memcpy( dst_cluster, 1123 dst_address, 1124 src_cluster, 1125 src_address, 1126 length ); 1127 } 1128 847 1129 /////////////////////////////////////////////////////////////////////////////////////// 848 1130 // _fb_sync_read() 849 1131 // Transfer data from the frame_buffer device to an user buffer with a memcpy. 850 // - offset 1132 // - offset : offset (in bytes) in the frame buffer 851 1133 // - buffer : base address of the memory buffer 852 1134 // - length : number of bytes to be transfered 853 ////////////////////////////////////////////////////////////////////////////////////// 854 in_drivers void _fb_sync_read( size_t offset, 855 void* buffer, 856 size_t length, 857 size_t ext ) 858 { 859 volatile char* fb = (char*)(void*)&seg_fbf_base + offset; 860 char* ub = buffer; 861 862 _memcpy( (void*)ub, (void*)fb, length ); 1135 // - ext : cluster_xy for the user buffer 1136 ////////////////////////////////////////////////////////////////////////////////////// 1137 in_drivers void _fb_sync_read( unsigned int offset, 1138 unsigned int buffer, 1139 unsigned int length, 1140 unsigned int ext ) 1141 { 1142 unsigned int dst_address = buffer; 1143 unsigned int dst_cluster = ext; 1144 unsigned int src_address = (unsigned int)&seg_fbf_base + offset; 1145 unsigned int src_cluster = ((X_SIZE-1)<<Y_WIDTH) + Y_SIZE; // cluster_xy for I/O 1146 1147 _extended_memcpy( dst_cluster, 1148 dst_address, 1149 src_cluster, 1150 src_address, 1151 length ); 863 1152 } 864 1153 … … 879 1168 _spin_lock[index] = 0; 880 1169 } 1170 881 1171 /////////////////////////////////////////////////////////////////////////////////////// 882 1172 // Try to take a software spin-lock. … … 954 1244 ::"r"(pinit),"r"(pcount),"r"(plock),"r"(value):"$2","$3"); 955 1245 } 1246 956 1247 ////////////////////////////////////////////////////////////////////////////////////// 957 1248 // This blocking function uses a busy_wait technics (on the barrier_lock value), -
trunk/softs/giet_tsar/stdio.h
r626 r629 59 59 void* _memcpy( void* dst, const void* src, size_t size ); 60 60 61 void _extended_memcpy( unsigned int dst_cluster, 62 unsigned int dst_address, 63 unsigned int src_cluster, 64 unsigned int src_address, 65 unsigned int length ); 61 66 unsigned int _procid(); 62 67 unsigned int _proctime(); … … 85 90 void _tty_getw( unsigned int* buffer ); 86 91 void _tty_printf( char* format, ... ); 92 void _tty_isr(); 87 93 88 94 void _ioc_get_lock(); … … 94 100 void _mmc_isr(); 95 101 96 void _fb_sync_write( size_t offset, void* buffer, size_t length, size_t ext ); 97 void _fb_sync_read( size_t offset, void* buffer, size_t length, size_t ext ); 102 void _fb_sync_write( unsigned int offset, 103 unsigned int buffer, 104 unsigned int length, 105 unsigned int ext ); 106 void _fb_sync_read( unsigned int offset, 107 unsigned int buffer, 108 unsigned int length, 109 unsigned int ext ); 98 110 99 111 void _release_lock( size_t lock_index ); … … 103 115 void _barrier_wait(size_t index); 104 116 117 unsigned char _byte_extended_read( unsigned int cluster, 118 unsigned int address ); 119 unsigned int _word_extended_read( unsigned int cluster, 120 unsigned int address ); 121 void _word_extended_write( unsigned int cluster, 122 unsigned int address, 123 unsigned int word ); 124 void _byte_extended_write( unsigned int cluster, 125 unsigned int address, 126 unsigned char byte ); 105 127 #endif 106 128 -
trunk/softs/soft_hello_giet/hard_config.h
r623 r629 12 12 #define Y_WIDTH 4 13 13 14 #define NB_PROCS_MAX 2 14 #define NB_PROCS_MAX 4 15 16 #define USE_EXT_IO 1 15 17 16 18 #define NB_DMA_CHANNELS 0 17 #define NB_TTY_CHANNELS (NB_PROCS_MAX * X_SIZE * Y_SIZE)18 19 #define NB_HBA_CHANNELS 0 19 20 #define NB_NIC_CHANNELS 0 20 21 #define NB_CMA_CHANNELS 0 21 22 23 #define NB_TTY_CHANNELS 4 22 24 23 25 #endif //_HARD_CONFIG_H -
trunk/softs/soft_hello_giet/ldscript
r623 r629 23 23 24 24 seg_xcu_base = 0xF0000000; /* controler XCU */ 25 seg_tty_base = 0xF 2000000; /* controler TTY */25 seg_tty_base = 0xF4000000; /* controler TTY */ 26 26 seg_fbf_base = 0xF3000000; /* controler FBF */ 27 seg_ioc_base = 0xF4000000; /* controler IOC */ 28 seg_mmc_base = 0xFF000000; /* config MMC */ 27 seg_ioc_base = 0xF2000000; /* controler IOC */ 28 seg_nic_base = 0xF7000000; /* controler NIC */ 29 seg_cma_base = 0xF8000000; /* controler CMA */ 30 seg_pic_base = 0xF9000000; /* controler PIC */ 31 seg_mmc_base = 0xE0000000; /* config MMC */ 29 32 30 33 -
trunk/softs/soft_sort_giet/hard_config.h
r626 r629 14 14 #define NB_PROCS_MAX 4 15 15 16 #define USE_EXT_IO 1 17 16 18 #define NB_DMA_CHANNELS 0 17 #define NB_TTY_CHANNELS (NB_PROCS_MAX * X_SIZE * Y_SIZE)18 19 #define NB_HBA_CHANNELS 0 19 20 #define NB_NIC_CHANNELS 0 20 21 #define NB_CMA_CHANNELS 0 21 22 23 #define NB_TTY_CHANNELS 4 22 24 23 25 #endif //_HARD_CONFIG_H -
trunk/softs/soft_sort_giet/ldscript
r626 r629 1 1 /********************************************************** 2 2 File : ldscript 3 Author : Cesar Fuguet3 Author : Alain Greiner 4 4 Date : January 2014 5 5 **********************************************************/ … … 23 23 24 24 seg_xcu_base = 0xF0000000; /* controler XCU */ 25 seg_tty_base = 0xF 2000000; /* controler TTY */25 seg_tty_base = 0xF4000000; /* controler TTY */ 26 26 seg_fbf_base = 0xF3000000; /* controler FBF */ 27 seg_ioc_base = 0xF4000000; /* controler IOC */ 28 seg_mmc_base = 0xFF000000; /* config MMC */ 27 seg_ioc_base = 0xF2000000; /* controler IOC */ 28 seg_nic_base = 0xF7000000; /* controler NIC */ 29 seg_cma_base = 0xF8000000; /* controler CMA */ 30 seg_pic_base = 0xF9000000; /* controler PIC */ 31 seg_mmc_base = 0xE0000000; /* config MMC */ 29 32 30 33 -
trunk/softs/soft_sort_giet/main.c
r626 r629 57 57 /* Hello World */ 58 58 59 task0_printf("\n[ PROC %d\t] Starting SORT application\n", proc_id);60 61 task0_printf("[ PROC %d\t] MESH %d x %d x %d processors\n",62 proc_id, X_SIZE, Y_SIZE, NB_PROCS_MAX);63 64 /**************************************************************************/ 65 /* Barriers Initi tialitatin */59 task0_printf("\n[ PROC_%d_%d_%d ] Starting SORT application\n",x,y,lid); 60 61 task0_printf("[ PROC_%d_%d_%d ] MESH %d x %d x %d processors\n", 62 x,y,lid, X_SIZE, Y_SIZE, NB_PROCS_MAX); 63 64 /**************************************************************************/ 65 /* Barriers Initialisation */ 66 66 67 67 if (thread_id == 0) … … 69 69 for (i = 0; i < __builtin_ctz(total_procs); i++) 70 70 { 71 printf("[ PROC %d\t] Initializing barrier %d with %d\n",72 proc_id, i, total_procs >> i);71 printf("[ PROC_%d_%d_%d ] Initializing barrier %d with %d\n", 72 x,y,lid, i, total_procs >> i); 73 73 74 74 _barrier_init(i, total_procs >> i); 75 75 } 76 76 printf("\n"); 77 77 asm volatile ("sync"); 78 78 init_ok = 1; … … 80 80 81 81 /**************************************************************************/ 82 /* Array Initi tialitatin */82 /* Array Initialisation */ 83 83 84 84 for (i = IPP * thread_id; i < IPP * (thread_id + 1); i++) … … 93 93 /* Parallel sorting of array pieces */ 94 94 95 printf("[ PROC %d\t] Stage 0: Processor Sorting...\n\r", proc_id);95 printf("[ PROC_%d_%d_%d ] Stage 0: Starting...\n\r", x,y,lid); 96 96 bubbleSort(array0, IPP, IPP * thread_id); 97 printf("[ PROC %d\t] Stage 0: Finishing...\n\r", proc_id);97 printf("[ PROC_%d_%d_%d ] Stage 0: Finishing...\n\r", x,y,lid); 98 98 99 99 for (i = 0; i < __builtin_ctz(total_procs); i++) … … 104 104 if((thread_id % (2 << i)) != 0) _exit(); 105 105 106 printf("[ PROC %d\t] Stage %d: Starting...\n\r", proc_id, i+1);106 printf("[ PROC_%d_%d_%d ] Stage %d: Starting...\n\r", x,y,lid, i+1); 107 107 108 108 if((i % 2) == 0) … … 124 124 ); 125 125 126 printf("[ PROC %d\t] Stage %d: Finishing...\n\r", proc_id, i+1);126 printf("[ PROC_%d_%d_%d ] Stage %d: Finishing...\n\r", x,y,lid, i+1); 127 127 } 128 128 … … 147 147 if (success) 148 148 { 149 printf("[ PROC %d\t] Success!!\n\r", proc_id);149 printf("[ PROC_%d_%d_%d ] Success!!\n\r", x,y,lid); 150 150 } 151 151 else -
trunk/softs/soft_transpose_giet/ldscript
r623 r629 10 10 peripherals are not present in the architecture */ 11 11 12 seg_reset_base = 0x 10000000; /* le code de boot*/12 seg_reset_base = 0x00000000; /* boot code */ 13 13 14 seg_kcode_base = 0x000 01000; /* le code du système */15 seg_kdata_base = 0x000 10000; /* les donnees du système*/16 seg_kunc_base = 0x000 20000; /* les données non cachées du système*/14 seg_kcode_base = 0x00010000; /* kernel code */ 15 seg_kdata_base = 0x00020000; /* kernel cacheable data */ 16 seg_kunc_base = 0x00030000; /* kernel uncacheable data */ 17 17 18 seg_code_base = 0x000 30000; /* le code utilisateur*/19 seg_data_base = 0x000 40000; /* les données utilisateur*/18 seg_code_base = 0x00040000; /* application code */ 19 seg_data_base = 0x00050000; /* application data */ 20 20 21 seg_heap_base = 0x00100000; /* le tas utilisateur*/22 seg_stack_base = 0x00 400000; /* la pile utilisateur*/21 seg_heap_base = 0x00100000; /* heaps for application tasks */ 22 seg_stack_base = 0x00300000; /* stacks */ 23 23 24 seg_xcu_base = 0xF0000000; /* controleur XCU */ 25 seg_dma_base = 0xF1000000; /* controleur DMA */ 26 seg_tty_base = 0xF2000000; /* controleur TTY */ 27 seg_fbf_base = 0xF3000000; /* controleur FBF */ 28 seg_ioc_base = 0xF4000000; /* controleur IOC */ 24 seg_xcu_base = 0xF0000000; /* controler XCU */ 25 seg_tty_base = 0xF4000000; /* controler TTY */ 26 seg_fbf_base = 0xF3000000; /* controler FBF */ 27 seg_ioc_base = 0xF2000000; /* controler IOC */ 28 seg_nic_base = 0xF7000000; /* controler NIC */ 29 seg_cma_base = 0xF8000000; /* controler CMA */ 30 seg_pic_base = 0xF9000000; /* controler PIC */ 31 seg_mmc_base = 0xE0000000; /* config MMC */ 29 32 30 33 -
trunk/softs/soft_transpose_giet/main.c
r248 r629 1 1 2 #include "hard_config.h" 2 3 #include "stdio.h" 3 4 #include "limits.h" 4 5 #include "../giet_tsar/block_device.h" 5 6 6 #define NL 512 7 #define NP 512 8 #define NB_IMAGES 1 9 #define NB_CLUSTER_MAX 256 10 11 #define PRINTF(...) ({ if (proc_id == 0) { tty_printf(__VA_ARGS__); } }) 12 13 //#define DISPLAY_ONLY 14 15 /////////////////////////////////////////// 7 #define NL 128 8 #define NP 128 9 #define NB_IMAGES 5 10 11 #define PRINTF(...) ({ if (lpid == 0) { _tty_printf(__VA_ARGS__); } }) 12 13 #define DISPLAY_OK 14 16 15 // tricks to read parameters from ldscript 17 /////////////////////////////////////////// 18 19 struct plaf; 20 21 extern struct plouf seg_ioc_base; 16 extern struct plaf seg_ioc_base; 22 17 extern struct plaf seg_heap_base; 23 extern struct plaf NB_PROCS; 24 extern struct plaf NB_CLUSTERS; 18 19 // global variables stored in seg_data (cluster 0) 20 21 // instrumentation counters for each processor 22 unsigned int LOAD_START[256][4]; 23 unsigned int LOAD_END [256][4]; 24 unsigned int TRSP_START[256][4]; 25 unsigned int TRSP_END [256][4]; 26 unsigned int DISP_START[256][4]; 27 unsigned int DISP_END [256][4]; 28 29 // checksum variables 30 unsigned check_line_before[NL]; 31 unsigned check_line_after[NL]; 25 32 26 33 ///////////// 27 void main(){ 28 unsigned int frame = 0; 29 unsigned int date = 0; 30 31 unsigned int c; // cluster index for loops 32 unsigned int l; // line index for loops 33 unsigned int p; // pixel index for loops 34 35 unsigned int proc_id = procid(); // processor id 36 unsigned int nlocal_procs = (unsigned int) &NB_PROCS; // number of processors per cluster 37 unsigned int nclusters = (unsigned int) &NB_CLUSTERS; // number of clusters 38 unsigned int local_id = proc_id % nlocal_procs; // local processor id 39 unsigned int cluster_id = proc_id / nlocal_procs; // cluster id 40 unsigned int base = (unsigned int) &seg_heap_base; // base address for shared buffers 41 unsigned int increment = 0x80000000 / nclusters * 2; // cluster increment 42 unsigned int nglobal_procs = nclusters * nlocal_procs; // number of tasks 43 unsigned int npixels = NP * NL; // number of pixel per frame 44 45 unsigned int * ioc_address = (unsigned int *) &seg_ioc_base; 46 unsigned int block_size = ioc_address[BLOCK_DEVICE_BLOCK_SIZE]; 47 unsigned int nblocks = npixels / block_size; // number of blocks per frame 48 49 PRINTF("\n *** Entering main at cycle %d ***\n\n", proctime()); 34 void main() 35 { 36 unsigned int image = 0; 37 38 unsigned int l; // line index for loops 39 unsigned int p; // pixel index for loops 40 41 unsigned int * ioc_address = (unsigned int *) &seg_ioc_base; 42 unsigned int block_size = ioc_address[BLOCK_DEVICE_BLOCK_SIZE]; 43 44 unsigned int proc_id = _procid(); // processor id 45 unsigned int nclusters = X_SIZE*Y_SIZE; // number of clusters 46 unsigned int lpid = proc_id % NB_PROCS_MAX; // local processor id 47 unsigned int cluster_xy = proc_id / NB_PROCS_MAX; // cluster index (8 bits format) 48 unsigned int x = cluster_xy >> Y_WIDTH; // x coordinate 49 unsigned int y = cluster_xy & ((1<<Y_WIDTH)-1); // y coordinate 50 unsigned int ntasks = nclusters * NB_PROCS_MAX; // number of tasks 51 unsigned int npixels = NP * NL; // number of pixel per image 52 unsigned int nblocks = npixels / block_size; // number of blocks per image 53 54 // task_id is a "continuous" index for the the task running on processor (x,y,lpid) 55 unsigned int task_id = (((x * Y_SIZE) + y) * NB_PROCS_MAX) + lpid; 56 57 // cluster_id is a "continuous" index for cluster(x,y) 58 unsigned int cluster_id = (x * Y_SIZE) + y; 59 60 PRINTF("\n *** Proc 0 in cluster [%d,%d] enters main at cycle %d ***\n\n", 61 x, y, _proctime()); 50 62 51 63 // parameters checking 52 if ((nlocal_procs != 1) && (nlocal_procs != 2) && (nlocal_procs != 4)){ 53 PRINTF("NB_PROCS must be 1, 2 or 4\n"); 54 exit(1); 64 if ((NB_PROCS_MAX != 1) && (NB_PROCS_MAX != 2) && (NB_PROCS_MAX != 4)) 65 { 66 PRINTF("NB_PROCS_MAX must be 1, 2 or 4\n"); 67 _exit(); 55 68 } 56 69 if ((nclusters != 1) && (nclusters != 2) && (nclusters != 4) && (nclusters != 8) && 57 70 (nclusters != 16) && (nclusters != 32) && (nclusters != 64) && (nclusters != 128) && 58 (nclusters != 256)){ 71 (nclusters != 256)) 72 { 59 73 PRINTF("NB_CLUSTERS must be a power of 1 between 1 and 256\n"); 60 exit(1);74 _exit(); 61 75 } 62 if (nglobal_procs > 1024){ 63 PRINTF("NB_PROCS * NB_CLUSTERS cannot be larger than 1024\n"); 64 exit(1); 65 } 66 if (proc_id >= nglobal_procs){ 67 PRINTF("processor id %d larger than NB_CLUSTERS*NB_PROCS\n", proc_id); 68 exit(1); 69 } 70 71 // Arrays of pointers on the shared, distributed buffers containing the frames 72 // These arrays are indexed by the cluster index (sized for the worst case : 256 clusters) 73 unsigned char * A[NB_CLUSTER_MAX]; 74 unsigned char * B[NB_CLUSTER_MAX]; 75 76 // Arrays of pointers on the instrumentation arrays 77 // These arrays are indexed by the cluster index (sized for the worst case : 256 clusters) 78 // each pointer points on the base adress of an array of NPROCS unsigned int 79 unsigned int * LOAD_START[NB_CLUSTER_MAX]; 80 unsigned int * LOAD_END[NB_CLUSTER_MAX]; 81 unsigned int * TRSP_START[NB_CLUSTER_MAX]; 82 unsigned int * TRSP_END[NB_CLUSTER_MAX]; 83 unsigned int * DISP_START[NB_CLUSTER_MAX]; 84 unsigned int * DISP_END[NB_CLUSTER_MAX]; 85 86 // shared buffers address definition 87 // from the seg_heap_base and increment depending on the cluster index 88 // These arrays of pointers are identical and replicated in the stack of each task 89 for (c = 0; c < nclusters; c++){ 90 A[c] = (unsigned char *) (base + increment * c); 91 B[c] = (unsigned char *) (base + npixels + increment * c); 92 LOAD_START[c] = (unsigned int *) (base + 2 * npixels + increment * c); 93 LOAD_END[c] = (unsigned int *) (base + 2 * npixels + nlocal_procs + increment * c); 94 TRSP_START[c] = (unsigned int *) (base + 2 * npixels + 2 * nlocal_procs + increment * c); 95 TRSP_END[c] = (unsigned int *) (base + 2 * npixels + 3 * nlocal_procs + increment * c); 96 DISP_START[c] = (unsigned int *) (base + 2 * npixels + 4 * nlocal_procs + increment * c); 97 DISP_END[c] = (unsigned int *) (base + 2 * npixels + 5 * nlocal_procs + increment * c); 98 } 76 77 // pointers on the distributed buffers containing the images, 78 // allocated in the heap segment: each buffer contains 256 Kbytes 79 unsigned char* buf_in = (unsigned char*)&seg_heap_base; 80 unsigned char* buf_out = buf_in + 0x00100000; 99 81 100 82 PRINTF("NB_CLUSTERS = %d\n", nclusters); 101 PRINTF("NB_LOCAL_PROCS = %d\n", nlocal_procs);102 PRINTF("NB_ GLOBAL_PROCS = %d\n", nglobal_procs);83 PRINTF("NB_LOCAL_PROCS = %d\n", NB_PROCS_MAX); 84 PRINTF("NB_TASKS = %d\n", ntasks); 103 85 PRINTF("NB_PIXELS = %d\n", npixels); 104 86 PRINTF("BLOCK_SIZE = %d\n", block_size); 105 87 PRINTF("NB_BLOCKS = %d\n\n", nblocks); 106 88 107 108 PRINTF("*** Starting barrier init at cycle %d ***\n",proctime());89 PRINTF("*** Proc 0 in cluster [%d,%d] starts barrier init at cycle %d\n", 90 x, y, _proctime()); 109 91 110 92 // barriers initialization 111 barrier_init(0, nglobal_procs); 112 barrier_init(1, nglobal_procs); 113 barrier_init(2, nglobal_procs); 114 115 PRINTF("*** Completing barrier init at cycle %d ***\n", proctime()); 116 117 // Main loop (on frames) 118 while (frame < NB_IMAGES){ 119 // pseudo parallel load from disk to A[c] buffer : nblocks/nclusters blocks 120 // only task running on processor with (local_id == 0) does it 121 122 if (local_id == 0){ 123 int p; 124 125 date = proctime(); 126 PRINTF("\n*** Starting load for frame %d at cycle %d\n", frame, date); 93 _barrier_init(0, ntasks); 94 _barrier_init(1, ntasks); 95 _barrier_init(2, ntasks); 96 _barrier_init(3, ntasks); 97 98 PRINTF("*** Proc 0 in cluster [%d,%d] completes barrier init at cycle %d\n", 99 x, y, _proctime()); 100 101 // Main loop (on images) 102 while (image < NB_IMAGES) 103 { 104 // pseudo parallel load from disk to buf_in buffer : nblocks/nclusters blocks 105 // only task running on processor with (lpid == 0) does it 106 107 LOAD_START[cluster_id][lpid] = _proctime(); 108 109 if (lpid == 0) 110 { 111 _ioc_read( ((image * nblocks) + ((nblocks * cluster_id) / nclusters)), 112 buf_in, 113 (nblocks / nclusters), 114 cluster_xy ); 115 116 PRINTF("\n*** Proc 0 in cluster [%d,%d] starts load for image %d at cycle %d\n", 117 x, y, image, _proctime() ); 118 119 _ioc_completed(); 120 121 PRINTF("*** Proc 0 in cluster [%d,%d] completes load for image %d at cycle %d\n", 122 x, y, image, _proctime() ); 123 } 124 125 LOAD_END[cluster_id][lpid] = _proctime(); 126 127 _barrier_wait(0); 128 129 // parallel transpose from buf_in to buf_out buffers 130 // each processor makes the transposition for (NL/ntasks) lines 131 // (p,l) are the pixel coordinates in the source image 132 133 PRINTF("\n*** proc 0 in cluster [%d,%d] starts transpose for image %d at cycle %d\n", 134 x, y, image, _proctime()); 135 136 TRSP_START[cluster_id][lpid] = _proctime(); 137 138 unsigned int nlt = NL / ntasks; // number of lines per processor 139 unsigned int first = task_id * nlt; // first line index 140 unsigned int last = first + nlt; // last line index 141 unsigned int nlines_clusters = NL / nclusters; // number of lines per cluster 142 unsigned int npix_clusters = NP / nclusters; // number of pixels per cluster 143 144 unsigned int src_cluster; 145 unsigned int src_index; 146 unsigned int dst_cluster; 147 unsigned int dst_index; 148 149 unsigned int word; 150 151 for (l = first; l < last; l++) 152 { 153 PRINTF(" - processing line %d\n", l); 154 155 check_line_before[l] = 0; 127 156 128 for (p = 0; p < nlocal_procs; p++){ 129 LOAD_START[cluster_id][p] = date; 130 } 131 if (ioc_read(frame * nblocks + nblocks * cluster_id / nclusters, A[cluster_id], nblocks / nclusters)){ 132 PRINTF("echec ioc_read\n"); 133 exit(); 134 } 135 if (ioc_completed()){ 136 PRINTF("echec ioc_completed\n"); 137 exit(); 138 } 139 140 date = proctime(); 141 PRINTF("*** Completing load for frame %d at cycle %d\n", frame, date); 142 for (p = 0; p < nlocal_procs; p++){ 143 LOAD_END[cluster_id][p] = date; 157 // in each iteration we read one word an write four bytes 158 for (p = 0 ; p < NP ; p = p+4) 159 { 160 // read one word, with extended address from local buffer 161 src_cluster = cluster_xy; 162 src_index = (l % nlines_clusters) * NP + p; 163 word = _word_extended_read( src_cluster, 164 (unsigned int)&buf_in[src_index] ); 165 166 unsigned char byte0 = (unsigned char)( word & 0x000000FF); 167 unsigned char byte1 = (unsigned char)((word>>8) & 0x000000FF); 168 unsigned char byte2 = (unsigned char)((word>>16) & 0x000000FF); 169 unsigned char byte3 = (unsigned char)((word>>24) & 0x000000FF); 170 171 // compute checksum 172 check_line_before[l] = check_line_before[l] + byte0 + byte1 + byte2 + byte3; 173 174 // write four bytes with extended address to four remote buffers 175 dst_cluster = (((p / npix_clusters) / Y_SIZE) << Y_WIDTH) + 176 ((p / npix_clusters) % Y_SIZE); 177 dst_index = (p % npix_clusters) * NL + l; 178 _byte_extended_write( dst_cluster, 179 (unsigned int)&buf_out[dst_index], 180 byte0 ); 181 182 dst_cluster = ((((p+1) / npix_clusters) / Y_SIZE) << Y_WIDTH) + 183 (((p+1) / npix_clusters) % Y_SIZE); 184 dst_index = ((p+1) % npix_clusters) * NL + l; 185 _byte_extended_write( dst_cluster, 186 (unsigned int)&buf_out[dst_index], 187 byte1 ); 188 189 dst_cluster = ((((p+2) / npix_clusters) / Y_SIZE) << Y_WIDTH) + 190 (((p+2) / npix_clusters) % Y_SIZE); 191 dst_index = ((p+2) % npix_clusters) * NL + l; 192 _byte_extended_write( dst_cluster, 193 (unsigned int)&buf_out[dst_index], 194 byte2 ); 195 196 dst_cluster = ((((p+3) / npix_clusters) / Y_SIZE) << Y_WIDTH) + 197 (((p+3) / npix_clusters) % Y_SIZE); 198 dst_index = ((p+3) % npix_clusters) * NL + l; 199 _byte_extended_write( dst_cluster, 200 (unsigned int)&buf_out[dst_index], 201 byte3 ); 144 202 } 145 203 } 146 204 147 barrier_wait(0); 148 149 // parallel transpose from A to B buffers 150 // each processor makes the transposition for (NL/nglobal_procs) lines 151 // (p,l) are the (x,y) pixel coordinates in the source frame 152 153 #ifndef DISPLAY_ONLY 154 date = proctime(); 155 PRINTF("\n*** Starting transpose for frame %d at cycle %d\n", frame, date); 156 TRSP_START[cluster_id][local_id] = date; 157 158 unsigned int nlt = NL / nglobal_procs; // Nombre de ligne à traiter par processeur 159 unsigned int first = proc_id * nlt; // Index de la premiÚre ligne à traiter pour le proc courant (celui qui exécute le code) 160 unsigned int last = first + nlt; // Index de la derniÚre ligne 161 unsigned int nlines_clusters = NL / nclusters; // Nombre de lignes à traiter par cluster 162 unsigned int npix_clusters = NP / nclusters; // Nombre de pixels par ligne à traiter par cluster 163 164 for (l = first; l < last; l++){ 165 PRINTF(" - processing line %d\n", l); 166 for (p = 0; p < NP; p++){ 167 unsigned int source_index = (l % nlines_clusters) * NP + p; 168 unsigned int dest_cluster = p / npix_clusters; 169 unsigned int dest_index = (p % npix_clusters) * NL + l; 170 B[dest_cluster][dest_index] = A[cluster_id][source_index]; 205 PRINTF("*** proc 0 in cluster [%d,%d] complete transpose for image %d at cycle %d\n", 206 x, y, image, _proctime() ); 207 208 TRSP_END[cluster_id][lpid] = _proctime(); 209 210 _barrier_wait(1); 211 212 // optional parallel display from local buf_out to frame buffer 213 214 #ifdef DISPLAY_OK 215 216 PRINTF("\n*** proc 0 in cluster [%d,%d] starts display for image %d at cycle %d\n", 217 x, y, image, _proctime() ); 218 219 DISP_START[cluster_id][lpid] = _proctime(); 220 221 unsigned int npxt = npixels / ntasks; // number of pixels per task 222 unsigned int buffer = (unsigned int)buf_out + npxt*lpid; 223 224 _fb_sync_write( npxt * task_id, buffer, npxt, cluster_xy ); 225 226 PRINTF("*** Proc 0 in cluster [%d,%d] completes display for image %d at cycle %d\n", 227 x, y, image, _proctime() ); 228 229 DISP_END[cluster_id][lpid] = _proctime(); 230 231 _barrier_wait(2); 232 233 #endif 234 235 // Instrumentation and checksum (done by processor 0 in cluster 0) 236 if (proc_id == 0) 237 { 238 PRINTF("\n*** Proc [0,0,0] starts checks for image %d at cycle %d\n\n", 239 image, _proctime() ); 240 241 unsigned int success = 1; 242 243 for ( l = 0 ; l < NL ; l++ ) 244 { 245 check_line_after[l] = 0; 246 247 for ( p = 0 ; p < NP ; p++ ) 248 { 249 // read one byte in remote buffer 250 src_cluster = (((p / npix_clusters) / Y_SIZE) << Y_WIDTH) + 251 ((p / npix_clusters) % Y_SIZE); 252 src_index = (p % npix_clusters) * NL + l; 253 254 unsigned char byte = _byte_extended_read( src_cluster, 255 (unsigned int)&buf_out[src_index] ); 256 257 check_line_after[l] = check_line_after[l] + byte; 258 } 259 260 PRINTF(" - l = %d / before = %d / after = %d \n", 261 l, check_line_before[l], check_line_after[l] ); 262 263 if ( check_line_before[l] != check_line_after[l] ) success = 0; 171 264 } 172 } 173 174 date = proctime(); 175 PRINTF("*** Completing transpose for frame %d at cycle %d\n", frame, date); 176 TRSP_END[cluster_id][local_id] = date; 177 barrier_wait(1); 178 #endif 179 180 // parallel display from B[c] to frame buffer 181 // each processor uses its private dma to display NL*NP/nglobal_procs pixels 182 183 date = proctime(); 184 PRINTF("\n*** Starting display for frame %d at cycle %d\n", frame, date); 185 DISP_START[cluster_id][local_id] = date; 186 187 unsigned int npxt = npixels / nglobal_procs; // number of pixels per proc 188 189 #ifndef DISPLAY_ONLY 190 if (fb_write(npxt * proc_id, B[cluster_id] + npxt * local_id, npxt)){ 191 PRINTF("[%d]: echec fb_sync_write\n", proc_id); 192 exit(); 193 } 194 #else 195 if (fb_write(npxt * proc_id, A[cluster_id] + npxt * local_id, npxt)){ 196 PRINTF("[%d]: echec fb_sync_write\n", proc_id); 197 exit(); 198 } 199 #endif 200 201 if (fb_completed()){ 202 PRINTF("[%d]: echec fb_completed\n", proc_id); 203 exit(); 204 } 205 206 date = proctime(); 207 PRINTF("*** Completing display for frame %d at cycle %d\n", frame, date); 208 DISP_END[cluster_id][local_id] = date; 209 210 barrier_wait(2); 211 212 // Instrumentation (done by processor 0 in cluster 0) 213 if (local_id == 0){ 214 date = proctime(); 215 PRINTF("\n*** Starting Instrumentation for frame %d at cycle %d\n\n", frame, date); 265 266 if ( success ) PRINTF("\n*** proc [0,0,0] : CHECKSUM OK \n\n"); 267 else PRINTF("\n*** proc [0,0,0] : CHECKSUM KO \n\n"); 216 268 217 269 int cc, pp; … … 229 281 unsigned int max_disp_ended = 0; 230 282 231 for (cc = 0; cc < nclusters; cc++){ 232 for (pp = 0; pp < nlocal_procs; pp++){ 233 if (LOAD_START[cc][pp] < min_load_start){ 234 min_load_start = LOAD_START[cc][pp]; 235 } 236 if (LOAD_START[cc][pp] > max_load_start){ 237 max_load_start = LOAD_START[cc][pp]; 238 } 239 if (LOAD_END[cc][pp] < min_load_ended){ 240 min_load_ended = LOAD_END[cc][pp]; 241 } 242 if (LOAD_END[cc][pp] > max_load_ended){ 243 max_load_ended = LOAD_END[cc][pp]; 244 } 245 246 if (TRSP_START[cc][pp] < min_trsp_start){ 247 min_trsp_start = TRSP_START[cc][pp]; 248 } 249 if (TRSP_START[cc][pp] > max_trsp_start){ 250 max_trsp_start = TRSP_START[cc][pp]; 251 } 252 if (TRSP_END[cc][pp] < min_trsp_ended){ 253 min_trsp_ended = TRSP_END[cc][pp]; 254 } 255 if (TRSP_END[cc][pp] > max_trsp_ended){ 256 max_trsp_ended = TRSP_END[cc][pp]; 257 } 258 259 if (DISP_START[cc][pp] < min_disp_start){ 260 min_disp_start = DISP_START[cc][pp]; 261 } 262 if (DISP_START[cc][pp] > max_disp_start){ 263 max_disp_start = DISP_START[cc][pp]; 264 } 265 if (DISP_END[cc][pp] < min_disp_ended){ 266 min_disp_ended = DISP_END[cc][pp]; 267 } 268 if (DISP_END[cc][pp] > max_disp_ended){ 269 max_disp_ended = DISP_END[cc][pp]; 270 } 283 for (cc = 0; cc < nclusters; cc++) 284 { 285 for (pp = 0; pp < NB_PROCS_MAX; pp++) 286 { 287 if (LOAD_START[cc][pp] < min_load_start) min_load_start = LOAD_START[cc][pp]; 288 if (LOAD_START[cc][pp] > max_load_start) max_load_start = LOAD_START[cc][pp]; 289 if (LOAD_END[cc][pp] < min_load_ended) min_load_ended = LOAD_END[cc][pp]; 290 if (LOAD_END[cc][pp] > max_load_ended) max_load_ended = LOAD_END[cc][pp]; 291 if (TRSP_START[cc][pp] < min_trsp_start) min_trsp_start = TRSP_START[cc][pp]; 292 if (TRSP_START[cc][pp] > max_trsp_start) max_trsp_start = TRSP_START[cc][pp]; 293 if (TRSP_END[cc][pp] < min_trsp_ended) min_trsp_ended = TRSP_END[cc][pp]; 294 if (TRSP_END[cc][pp] > max_trsp_ended) max_trsp_ended = TRSP_END[cc][pp]; 295 if (DISP_START[cc][pp] < min_disp_start) min_disp_start = DISP_START[cc][pp]; 296 if (DISP_START[cc][pp] > max_disp_start) max_disp_start = DISP_START[cc][pp]; 297 if (DISP_END[cc][pp] < min_disp_ended) min_disp_ended = DISP_END[cc][pp]; 298 if (DISP_END[cc][pp] > max_disp_ended) max_disp_ended = DISP_END[cc][pp]; 271 299 } 272 300 } 273 301 274 302 PRINTF(" - LOAD_START : min = %d / max = %d / med = %d / delta = %d\n", 275 min_load_start, max_load_start, (min_load_start+max_load_start)/2, max_load_start-min_load_start); 303 min_load_start, max_load_start, (min_load_start+max_load_start)/2, 304 max_load_start-min_load_start); 305 276 306 PRINTF(" - LOAD_END : min = %d / max = %d / med = %d / delta = %d\n", 277 min_load_ended, max_load_ended, (min_load_ended+max_load_ended)/2, max_load_ended-min_load_ended); 307 min_load_ended, max_load_ended, (min_load_ended+max_load_ended)/2, 308 max_load_ended-min_load_ended); 278 309 279 310 PRINTF(" - TRSP_START : min = %d / max = %d / med = %d / delta = %d\n", 280 min_trsp_start, max_trsp_start, (min_trsp_start+max_trsp_start)/2, max_trsp_start-min_trsp_start); 311 min_trsp_start, max_trsp_start, (min_trsp_start+max_trsp_start)/2, 312 max_trsp_start-min_trsp_start); 313 281 314 PRINTF(" - TRSP_END : min = %d / max = %d / med = %d / delta = %d\n", 282 min_trsp_ended, max_trsp_ended, (min_trsp_ended+max_trsp_ended)/2, max_trsp_ended-min_trsp_ended); 315 min_trsp_ended, max_trsp_ended, (min_trsp_ended+max_trsp_ended)/2, 316 max_trsp_ended-min_trsp_ended); 283 317 284 318 PRINTF(" - DISP_START : min = %d / max = %d / med = %d / delta = %d\n", 285 min_disp_start, max_disp_start, (min_disp_start+max_disp_start)/2, max_disp_start-min_disp_start); 319 min_disp_start, max_disp_start, (min_disp_start+max_disp_start)/2, 320 max_disp_start-min_disp_start); 321 286 322 PRINTF(" - DISP_END : min = %d / max = %d / med = %d / delta = %d\n", 287 min_disp_ended, max_disp_ended, (min_disp_ended+max_disp_ended)/2, max_disp_ended-min_disp_ended); 288 289 PRINTF(" - BARRIER TRSP/DISP = %d\n", min_disp_start - max_trsp_ended); 323 min_disp_ended, max_disp_ended, (min_disp_ended+max_disp_ended)/2, 324 max_disp_ended-min_disp_ended); 290 325 } 291 frame++; 292 293 } // end while frame 294 295 PRINTF("*** End of main ***\n"); 296 297 while(1); 326 327 image++; 328 329 _barrier_wait( 3 ); 330 } // end while image 331 332 333 _exit(); 334 298 335 } // end main() 299 336
Note: See TracChangeset
for help on using the changeset viewer.