- Timestamp:
- Jul 18, 2019, 2:06:55 PM (5 years ago)
- Location:
- trunk
- Files:
-
- 69 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/boot/tsar_mips32/boot.c
r624 r637 954 954 #if DEBUG_BOOT_MULTI 955 955 boot_printf("\n[BOOT] core[%x,%d] jump to kernel_init = %x at cycle %d\n", 956 cxy, lid, __FUNCTION__,kernel_entry, boot_get_proctime() );956 cxy, lid, kernel_entry, boot_get_proctime() ); 957 957 #endif 958 958 -
trunk/hal/generic/hal_uspace.h
r626 r637 31 31 // 32 32 // When moving data between user space and kernel space, the user address is always 33 // a virtual address, but the kernel address can be a physical address, on 32 bits 34 // architectures, and require MMU dynamic activation/deactivation. 33 // a virtual address, but the kernel address is an extended pointer. 35 34 // For sake of portability, user/kernel data transfers must use the following API. 36 35 ////////////////////////////////////////////////////////////////////////////////////////// … … 41 40 * that can be located in any cluster. 42 41 ***************************************************************************************** 43 * @ k_cxy : cluster identifier for kernel destination buffer. 44 * @ k_dst : local pointer on kernel destination buffer. 45 * @ u_src : source buffer address in user space. 42 * @ k_dst_xp : extended pointer on kernel destination buffer. 43 * @ u_src_ptr : source buffer address in user space. 46 44 * @ size : size (number of bytes). 47 45 ****************************************************************************************/ 48 extern void hal_copy_from_uspace( cxy_t k_cxy, 49 void * k_dst, 50 void * u_src, 46 extern void hal_copy_from_uspace( xptr_t k_dst_xp, 47 void * u_src_ptr, 51 48 uint32_t size ); 52 49 … … 55 52 * to a data buffer in the user space. 56 53 ***************************************************************************************** 57 * @ k_cxy : cluster identifier for kernel source buffer. 58 * @ k_src : local pointer on kernel source buffer. 59 * @ u_dst : destination buffer address in user space. 54 * @ u_dst_ptr : destination buffer address in user space. 55 * @ k_src_xp : extended pointer on kernel source buffer. 60 56 * @ size : size (number of bytes). 61 57 ****************************************************************************************/ 62 extern void hal_copy_to_uspace( cxy_t k_cxy, 63 void * k_src, 64 void * u_dst, 58 extern void hal_copy_to_uspace( void * u_dst_ptr, 59 xptr_t k_src_xp, 65 60 uint32_t size ); 66 61 … … 69 64 * The transfer stops after the first encountered NUL character, and no more than 70 65 * <max_size> characters are actually copied to target buffer. 71 * If the kernel uses physical addresses, it activates the MMU to access the user buffer.72 66 ***************************************************************************************** 73 * @ u_dst : destination buffer address in user space.74 * @ k_src : source address in kernelspace.67 * @ k_dst_xp : extended pointer on kernel destination buffer. 68 * @ u_src_ptr : source address in user space. 75 69 * @ max_size : max number of characters to be copied. 76 70 ****************************************************************************************/ 77 extern void hal_strcpy_from_uspace( char * k_dst,78 char * u_src ,71 extern void hal_strcpy_from_uspace( xptr_t k_dst_xp, 72 char * u_src_ptr, 79 73 uint32_t max_size ); 80 74 … … 83 77 * The transfer stops after the first encountered NUL character, and no more than 84 78 * <max_size> characters are actually copied to target buffer. 85 * If the kernel uses physical addresses, it activates the MMU to access the user buffer.86 79 ***************************************************************************************** 87 * @ u_dst 88 * @ k_src : source address in kernel space.80 * @ u_dst_ptr : destination buffer address in user space. 81 * @ k_src_xp : extended pointer on kernel source buffer. 89 82 * @ max_size : max number of characters to be copied. 90 83 ****************************************************************************************/ 91 extern void hal_strcpy_to_uspace( char * u_dst ,92 char * k_src,84 extern void hal_strcpy_to_uspace( char * u_dst_ptr, 85 xptr_t k_src_xp, 93 86 uint32_t max_size ); 94 87 95 88 /***************************************************************************************** 96 89 * This function computes the length of a string in user space. 97 * If the kernel uses physical addresses, it activates the MMU to access the user buffer.98 90 ***************************************************************************************** 99 91 * @ string : string in user space. -
trunk/hal/tsar_mips32/core/hal_gpt.c
r635 r637 133 133 /////////////////////////////////////////////////////////////////////////////////////// 134 134 135 #define GPT_LOCK_WATCHDOG 100000 135 #define GPT_LOCK_WATCHDOG 1000000 136 136 137 137 ///////////////////////////////////// -
trunk/hal/tsar_mips32/core/hal_uspace.c
r626 r637 32 32 /////////////////////////////////////////////////////////////////////////////////////// 33 33 // This function moves <size> bytes from a source buffer in user virtual space, 34 // defined by the <u_src > argument, to a destination kernel buffer, defined by the35 // <k_ cxy> and <k_dst> arguments.36 // It works in a critical section, as it modifies brieflytwo CP2 registers:34 // defined by the <u_src_ptr> argument, to a destination kernel buffer, defined by the 35 // <k_dst_xp> argument. 36 // It works in a critical section, as it modifies two CP2 registers: 37 37 // It activates briefly the DATA_MMU by writing into the CP2_MODE register to access the 38 38 // user buffer, and modifies the CP2_DATA_EXT register to access the kernel buffer. … … 41 41 // If the buffers are not aligned, it moves all data byte per byte. 42 42 /////////////////////////////////////////////////////////////////////////////////////// 43 // @ k_cxy : cluster of destination kernel buffer 44 // @ k_dst : pointer on destination kernel buffer 45 // @ u_src : pointer on source user buffer 43 // @ k_dst_xp : extended pointer on destination kernel buffer 44 // @ u_src_ptr : pointer on source user buffer 46 45 // @ size : number of bytes to move 47 46 /////////////////////////////////////////////////////////////////////////////////////// 48 void hal_copy_from_uspace( cxy_t k_cxy, 49 void * k_dst, 50 void * u_src, 47 void hal_copy_from_uspace( xptr_t k_dst_xp, 48 void * u_src_ptr, 51 49 uint32_t size ) 52 50 { 53 51 uint32_t save_sr; 54 uint32_t words; // number of words (if buffers aligned) 55 uint32_t src = (uint32_t)u_src; 56 uint32_t dst = (uint32_t)k_dst; 52 uint32_t words; // number of words (if buffers aligned) 53 uint32_t src = (uint32_t)u_src_ptr; 54 uint32_t dst = (uint32_t)GET_PTR( k_dst_xp ); 55 uint32_t cxy = (uint32_t)GET_CXY( k_dst_xp ); 56 57 57 58 58 #if DEBUG_HAL_USPACE … … 61 61 if( cycle > DEBUG_HAL_USPACE ) 62 62 printk("\n[%s] thread[%x,%x] enter / %d bytes / u_buf(%x,%x) -> k_buf(%x,%x) / cycle %d\n", 63 __FUNCTION__, this->process->pid, this->trdid, size, local_cxy, u_src, k_cxy, k_dst, cycle );63 __FUNCTION__, this->process->pid, this->trdid, size, local_cxy, src, cxy, dst, cycle ); 64 64 #endif 65 65 … … 80 80 "ori $13, $12, 0x4 \n" /* $13 <= MMU_MODE with DTLB */ 81 81 82 /* save old MMU_DATA_EXT and set k_cxy in it*/82 /* save old MMU_DATA_EXT and set cxy in it */ 83 83 "mfc2 $16, $24 \n" /* $16 <= old MMU_DATA_EXT */ 84 "mtc2 %4, $24 \n" /* MMU_DATA_EXT <= k_cxy*/84 "mtc2 %4, $24 \n" /* MMU_DATA_EXT <= cxy */ 85 85 86 86 /* transfer one word per iteration in first loop if aligned */ … … 118 118 ".set reorder \n" 119 119 : 120 : "r"(src) , "r"(dst) , "r"(words) , "r"(size) , "r"( k_cxy)120 : "r"(src) , "r"(dst) , "r"(words) , "r"(size) , "r"(cxy) 121 121 : "$8","$9","$10","$11","$12","$13","$14","$15","$16","memory" ); 122 122 … … 128 128 if( cycle > DEBUG_HAL_USPACE ) 129 129 printk("\n[%s] thread[%x,%x] moved %d bytes / u_buf(%x,%x) -> k_buf(%x,%x) / cycle %d\n", 130 __FUNCTION__, this->process->pid, this->trdid, size, local_cxy, u_src, k_cxy, k_dst, cycle );130 __FUNCTION__, this->process->pid, this->trdid, size, local_cxy, src, cxy, dst, cycle ); 131 131 #endif 132 132 … … 135 135 /////////////////////////////////////////////////////////////////////////////////////// 136 136 // This function moves <size> bytes from a source kernel buffer, defined by the 137 // <k_ cxy> and <k_src> arguments, to a destination buffer in user virtual space,138 // defined by the <u_dst> argument.139 // It works in a critical section, as it modifies brieflytwo CP2 registers:137 // <k_src_xp> argument, to a destination buffer in user virtual space, defined by 138 // the <u_dst_ptr> argument. 139 // It works in a critical section, as it modifies two CP2 registers: 140 140 // It activates briefly the DATA_MMU by writing into the CP2_MODE register to access the 141 141 // user buffer, and modifies the CP2_DATA_EXT register to access the kernel buffer. … … 144 144 // If the buffers are not aligned, it moves all data byte per byte. 145 145 /////////////////////////////////////////////////////////////////////////////////////// 146 // @ k_cxy : cluster of destination kernel buffer 147 // @ k_dst : pointer on destination kernel buffer 148 // @ u_src : pointer on source user buffer 149 // @ size : number of bytes to move 150 /////////////////////////////////////////////////////////////////////////////////////// 151 void hal_copy_to_uspace( cxy_t k_cxy, 152 void * k_src, 153 void * u_dst, 146 // @ u_dst_ptr : pointer on destination user buffer 147 // @ k_src_xp : extended pointer on source kernel buffer 148 // @ size : number of bytes to move 149 /////////////////////////////////////////////////////////////////////////////////////// 150 void hal_copy_to_uspace( void * u_dst_ptr, 151 xptr_t k_src_xp, 154 152 uint32_t size ) 155 153 { 156 154 uint32_t save_sr; 157 uint32_t words; // number of words (if buffers aligned) 158 uint32_t src = (uint32_t)k_src; 159 uint32_t dst = (uint32_t)u_dst; 155 uint32_t words; // number of words (if buffers aligned) 156 uint32_t dst = (uint32_t)u_dst_ptr; 157 uint32_t src = (uint32_t)GET_PTR( k_src_xp ); 158 uint32_t cxy = (uint32_t)GET_CXY( k_src_xp ); 160 159 161 160 #if DEBUG_HAL_USPACE … … 164 163 if( cycle > DEBUG_HAL_USPACE ) 165 164 printk("\n[%s] thread[%x,%x] enter / %d bytes / k_buf(%x,%x) -> u_buf(%x,%x) / cycle %d\n", 166 __FUNCTION__, this->process->pid, this->trdid, size, k_cxy, k_src, local_cxy, u_dst, cycle );165 __FUNCTION__, this->process->pid, this->trdid, size, cxy, src, local_cxy, dst, cycle ); 167 166 #endif 168 167 … … 183 182 "ori $13, $12, 0x4 \n" /* $13 <= MMU_MODE with DTLB */ 184 183 185 /* save old MMU_DATA_EXT and set k_cxy in it*/184 /* save old MMU_DATA_EXT and set cxy in it */ 186 185 "mfc2 $16, $24 \n" /* $16 <= old MMU_DATA_EXT */ 187 "mtc2 %4, $24 \n" /* MMU_DATA_EXT <= k_cxy*/186 "mtc2 %4, $24 \n" /* MMU_DATA_EXT <= cxy */ 188 187 189 188 /* transfer one word per iteration in first loop if aligned */ … … 221 220 ".set reorder \n" 222 221 : 223 : "r"(src) , "r"(dst) , "r"(words) , "r"(size) , "r"( k_cxy)222 : "r"(src) , "r"(dst) , "r"(words) , "r"(size) , "r"(cxy) 224 223 : "$8","$9","$10","$11","$12","$13","$14","$15","$16","memory" ); 225 224 … … 231 230 if( cycle > DEBUG_HAL_USPACE ) 232 231 printk("\n[%s] thread[%x,%x] moved %d bytes / k_buf(%x,%x) -> u_buf(%x,%x) / cycle %d\n", 233 __FUNCTION__, this->process->pid, this->trdid, size, k_cxy, k_src, local_cxy, u_dst, cycle );232 __FUNCTION__, this->process->pid, this->trdid, size, cxy, src, local_cxy, dst, cycle ); 234 233 #endif 235 234 236 235 } // end hal_copy_to_uspace() 237 236 238 ////////////////////////////////////////////// 239 void hal_strcpy_from_uspace( char * k_dst,240 char * u_src ,237 ///////////////////////////////////////////////// 238 void hal_strcpy_from_uspace( xptr_t k_dst_xp, 239 char * u_src_ptr, 241 240 uint32_t size ) 242 241 { 243 242 uint32_t save_sr; 244 uint32_t src = (uint32_t)u_src; 245 uint32_t dst = (uint32_t)k_dst; 243 uint32_t src = (uint32_t)u_src_ptr; 244 uint32_t dst = (uint32_t)GET_PTR( k_dst_xp ); 245 uint32_t cxy = (uint32_t)GET_CXY( k_dst_xp ); 246 246 247 247 hal_disable_irq( &save_sr ); 248 248 249 249 // loop on characters while ( (character != NUL) and (count < size ) ) 250 250 251 asm volatile( 251 252 ".set noreorder \n" 253 254 /* save old MMU_DATA_EXT and set cxy in it */ 255 "mfc2 $16, $24 \n" /* $16 <= old MMU_DATA_EXT */ 256 "mtc2 %3, $24 \n" /* MMU_DATA_EXT <= cxy */ 257 252 258 "move $11, %0 \n" /* $11 <= count == size */ 253 259 "move $12, %1 \n" /* $12 <= u_src */ 254 260 "move $13, %2 \n" /* $13 <= k_dst */ 255 "mfc2 $15, $1 \n" /* $15 <= mode DTLB and ITLB off */ 256 "ori $14, $15, 0x4 \n" /* $14 <= mode DTLB on */ 261 "mfc2 $15, $1 \n" /* $15 <= MMU_MODE */ 262 "ori $14, $15, 0x4 \n" /* $14 <= MMU_MODE / DTLB ON */ 263 257 264 "1: \n" 258 265 "mtc2 $14, $1 \n" /* MMU_MODE <= DTLB ON */ 259 266 "lb $10, 0($12) \n" /* read char from user space */ 260 "mtc2 $15, $1 \n" /* restore DTLB and ITLB off*/267 "mtc2 $15, $1 \n" /* MMU_MODE <= DTLB OFF */ 261 268 "sb $10, 0($13) \n" /* store char to kernel space */ 262 269 "beq $10, $0, 2f \n" /* exit if char = 0 */ … … 268 275 "2: \n" 269 276 "nop \n" 277 278 /* restore old MMU_DATA_EXT register */ 279 "mtc2 $16, $24 \n" /* MMU_DATA_EXT <= $16 */ 280 270 281 ".set reorder \n" 271 282 : 272 : "r"(size) ,"r"(src),"r"(dst)273 : "$10","$11","$12","$13","$14","$15" );283 : "r"(size) , "r"(src) , "r"(dst) , "r"(cxy) 284 : "$10","$11","$12","$13","$14","$15","$16" ); 274 285 275 286 hal_restore_irq( save_sr ); … … 277 288 } // hal_strcpy_from_uspace() 278 289 279 //////////////////////////////////////////// 280 void hal_strcpy_to_uspace( char * u_dst ,281 char * k_src,290 //////////////////////////////////////////////// 291 void hal_strcpy_to_uspace( char * u_dst_ptr, 292 xptr_t k_src_xp, 282 293 uint32_t size ) 283 294 { 284 295 uint32_t save_sr; 285 uint32_t src = (uint32_t)k_src; 286 uint32_t dst = (uint32_t)u_dst; 296 uint32_t dst = (uint32_t)u_dst_ptr; 297 uint32_t src = (uint32_t)GET_PTR( k_src_xp ); 298 uint32_t cxy = (uint32_t)GET_CXY( k_src_xp ); 287 299 288 300 hal_disable_irq( &save_sr ); 289 301 290 302 // loop on characters while ( (character != NUL) and (count < size) ) 303 291 304 asm volatile( 292 305 ".set noreorder \n" 306 307 /* save old MMU_DATA_EXT and set cxy in it */ 308 "mfc2 $16, $24 \n" /* $16 <= old MMU_DATA_EXT */ 309 "mtc2 %3, $24 \n" /* MMU_DATA_EXT <= cxy */ 310 293 311 "move $11, %0 \n" /* $11 <= count == size */ 294 312 "move $12, %1 \n" /* $12 <= k_src */ 295 313 "move $13, %2 \n" /* $13 <= u_dst */ 296 "mfc2 $15, $1 \n" /* $15 <= mode DTLB and ITLB off */ 297 "ori $14, $15, 0x4 \n" /* $14 <= mode DTLB on */ 314 "mfc2 $15, $1 \n" /* $15 <= MMU_MODE */ 315 "ori $14, $15, 0x4 \n" /* $14 <= MMU_MODE modified */ 316 298 317 "1: \n" 299 318 "lb $10, 0($12) \n" /* read char from kernel space */ 300 319 "mtc2 $14, $1 \n" /* MMU_MODE <= DTLB ON */ 301 320 "sb $10, 0($13) \n" /* store char to user space */ 302 "mtc2 $15, $1 \n" /* restore DTLB and ITLB off*/321 "mtc2 $15, $1 \n" /* MMU_MODE <= DTLB OFF */ 303 322 "beq $10, $0, 2f \n" /* exit if char == 0 */ 304 323 "addi $11, $11, -1 \n" /* decrement count */ 305 324 "addi $12, $12, 1 \n" /* increment k_src pointer */ 306 "beq $11, $0, 2f \n" /* exit if count == size*/325 "beq $11, $0, 2f \n" /* exit if count == 0 */ 307 326 "addi $13, $13, 1 \n" /* increment u_src pointer */ 308 327 "j 1b \n" /* jump to next iteration */ 309 328 "2: \n" 310 329 "nop \n" 330 331 /* restore old MMU_DATA_EXT register */ 332 "mtc2 $16, $24 \n" /* MMU_DATA_EXT <= $16 */ 333 311 334 ".set reorder \n" 312 335 : 313 : "r"(size) ,"r"(src),"r"(dst)314 : "$10","$11","$12","$13","$14","$15" );336 : "r"(size) , "r"(src) , "r"(dst) , "r"(cxy) 337 : "$10","$11","$12","$13","$14","$15","$16" ); 315 338 316 339 hal_restore_irq( save_sr ); -
trunk/hal/tsar_mips32/core/hal_vmm.c
r635 r637 111 111 printk("\n[%s] thread[%x,%x] registered kcode vseg[%x,%x] in cluster %x\n", 112 112 __FUNCTION__, this->process->pid, this->trdid, info->kcode_base, info->kcode_size, local_cxy ); 113 hal_vmm_display( &process_zero, true );113 hal_vmm_display( XPTR( local_cxy, &process_zero ) , true ); 114 114 #endif 115 115 … … 136 136 printk("\n[%s] thread[%x,%x] enter in cluster %x \n", 137 137 __FUNCTION__, this->process->pid, this->trdid, cxy ); 138 hal_vmm_display( &process_zero , true ); 139 hal_vmm_display( process , true ); 138 hal_vmm_display( XPTR( local_cxy , process ) , true ); 140 139 #endif 141 140 … … 190 189 __FUNCTION__, this->process->pid, this->trdid, 191 190 vseg_type_str(vseg->type) , vseg->min, (vseg->max - vseg->min) ); 192 hal_vmm_display( process, true );191 hal_vmm_display( XPTR( local_cxy , process ) , true ); 193 192 #endif 194 193 -
trunk/kernel/Makefile
r633 r637 175 175 176 176 SYS_OBJS_4 = build/syscalls/sys_get_config.o \ 177 build/syscalls/sys_get_core .o\177 build/syscalls/sys_get_core_id.o \ 178 178 build/syscalls/sys_get_cycle.o \ 179 179 build/syscalls/sys_display.o \ … … 187 187 SYS_OBJS_5 = build/syscalls/sys_exit.o \ 188 188 build/syscalls/sys_sync.o \ 189 build/syscalls/sys_fsync.o 189 build/syscalls/sys_fsync.o \ 190 build/syscalls/sys_get_best_core.o \ 191 build/syscalls/sys_get_nb_cores.o 190 192 191 193 VFS_OBJS = build/fs/vfs.o \ -
trunk/kernel/devices/dev_dma.c
r619 r637 2 2 * dev_dma.c - DMA (Interrupt Controler Unit) generic device API implementation. 3 3 * 4 * Authors Alain Greiner (2016,2017,2018 )4 * Authors Alain Greiner (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) UPMC Sorbonne Universites … … 61 61 error_t error; 62 62 63 lid_t lid = cluster_select_local_core( local_cxy ); 64 63 65 error = thread_kernel_create( &new_thread, 64 66 THREAD_DEV, 65 67 &chdev_server_func, 66 68 dma, 67 cluster_select_local_core());69 lid ); 68 70 if( error ) 69 71 { -
trunk/kernel/devices/dev_ioc.c
r626 r637 67 67 68 68 // select a core to execute the IOC server thread 69 lid_t lid = cluster_select_local_core( );69 lid_t lid = cluster_select_local_core( local_cxy ); 70 70 71 71 // bind the IOC IRQ to the selected core -
trunk/kernel/devices/dev_nic.c
r619 r637 2 2 * dev_nic.c - NIC (Network Controler) generic device API implementation. 3 3 * 4 * Author Alain Greiner (2016,2017,2018 )4 * Author Alain Greiner (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) UPMC Sorbonne Universites … … 58 58 59 59 // select a core to execute the NIC server thread 60 lid_t lid = cluster_select_local_core( );60 lid_t lid = cluster_select_local_core( local_cxy ); 61 61 62 62 // bind the NIC IRQ to the selected core -
trunk/kernel/devices/dev_txt.c
r626 r637 95 95 { 96 96 // select a core to execute the server thread 97 lid_t lid = cluster_select_local_core( );97 lid_t lid = cluster_select_local_core( local_cxy ); 98 98 99 99 // The unique IRQ from cluster 00's MTTY must be bound to a RX chdev … … 131 131 thread_unblock( XPTR( local_cxy , new_thread ) , THREAD_BLOCKED_GLOBAL ); 132 132 } 133 } 133 } // end dev_txt_init() 134 134 135 135 ////////////////////////////////////////////////////////////////////////////////// … … 166 166 // return I/O operation status from calling thread descriptor 167 167 return this->txt_cmd.error; 168 } 168 169 } // end dev_txt_access() 169 170 170 171 ///////////////////////////////////////// … … 173 174 uint32_t count ) 174 175 { 176 error_t error; 175 177 176 178 #if (DEBUG_SYS_WRITE & 1) … … 182 184 uint32_t cycle = (uint32_t)hal_get_cycles(); 183 185 if( DEBUG_DEV_TXT_TX < cycle ) 184 printk("\n[%s] thread[%x,%x] enters / cycle %d\n", 185 __FUNCTION__, this->process->pid, this->trdid, cycle ); 186 #endif 187 188 // get extended pointer on TXT[0] chdev 186 printk("\n[%s] thread[%x,%x] enters for <%s> / cycle %d\n", 187 __FUNCTION__, this->process->pid, this->trdid, buffer, cycle ); 188 #endif 189 190 // If we use MTTY (vci_multi_tty), we do a synchronous write on TXT[0] 191 // If we use TTY (vci_tty_tsar), we do a standard asynchronous write 192 // TODO this is not very clean ... [AG] 193 194 // get pointers on chdev 189 195 xptr_t dev_xp = chdev_dir.txt_tx[0]; 190 191 assert( (dev_xp != XPTR_NULL) , __FUNCTION__ , 192 "undefined TXT0 chdev descriptor" ); 193 194 // get TXTO chdev cluster and local pointer 195 cxy_t dev_cxy = GET_CXY( dev_xp ); 196 chdev_t * dev_ptr = (chdev_t *)GET_PTR( dev_xp ); 197 198 // If we use MTTYs (vci_multi_tty), we perform only sync writes 199 // Otherwise, we use vci_tty_tsar so we can use async writes 196 cxy_t dev_cxy = GET_CXY( dev_xp ); 197 chdev_t * dev_ptr = GET_PTR( dev_xp ); 200 198 201 199 if( dev_ptr->impl == IMPL_TXT_MTY ) … … 211 209 args.channel = channel; 212 210 213 // call d river function211 // call directly the driver function 214 212 aux( &args ); 215 213 216 return 0; 217 } 218 214 error = 0; 215 } 219 216 else 220 217 { 221 return dev_txt_access( TXT_WRITE , channel , buffer , count ); 218 // register command in chdev queue for an asynchronous access 219 error = dev_txt_access( TXT_WRITE , channel , buffer , count ); 220 221 if( error ) 222 { 223 printk("\n[ERROR] in %s : cannot write string %s / cycle %d\n", 224 __FUNCTION__, buffer, (uint32_t)hal_get_cycles() ); 225 } 222 226 } 223 227 … … 225 229 cycle = (uint32_t)hal_get_cycles(); 226 230 if( DEBUG_DEV_TXT_TX < cycle ) 227 printk("\n[%s] thread[%x,%x] exit / cycle %d\n",231 printk("\n[%s] thread[%x,%x] exit / cycle %d\n", 228 232 __FUNCTION__, this->process->pid, this->trdid, cycle ); 229 233 #endif … … 233 237 #endif 234 238 235 } 239 return error; 240 241 } // end dev_txt_write() 236 242 237 243 ///////////////////////////////////////// … … 239 245 char * buffer ) 240 246 { 247 error_t error; 241 248 242 249 #if (DEBUG_SYS_READ & 1) … … 252 259 #endif 253 260 254 return dev_txt_access( TXT_READ , channel , buffer , 1 ); 261 // register command in chdev queue for an asynchronous access 262 error = dev_txt_access( TXT_READ , channel , buffer , 1 ); 263 264 if( error ) 265 { 266 printk("\n[ERROR] in %s : cannot get character / cycle %d\n", 267 __FUNCTION__, (uint32_t)hal_get_cycles() ); 268 } 255 269 256 270 #if DEBUG_DEV_TXT_RX 257 271 cycle = (uint32_t)hal_get_cycles(); 258 272 if( DEBUG_DEV_TXT_RX < cycle ) 259 printk("\n[%s] thread[%x,%x] exit/ cycle %d\n",260 __FUNCTION__, this->process->pid, this->trdid, cycle );273 printk("\n[%s] thread[%x,%x] get character <%c> / cycle %d\n", 274 __FUNCTION__, this->process->pid, this->trdid, *buffer, cycle ); 261 275 #endif 262 276 … … 265 279 #endif 266 280 267 } 281 return error; 282 283 } // end dev_txt_read() 268 284 269 285 //////////////////////////////////////////////// -
trunk/kernel/devices/dev_txt.h
r626 r637 124 124 * device and the driver specific data structures when required. 125 125 * It creates the associated server thread and allocates a WTI from local ICU. 126 * It must de executed by a local thread.126 * It must be executed by a thread running in cluster containing the chdev descriptor. 127 127 ****************************************************************************************** 128 128 * @ chdev : local pointer on TXT device descriptor. … … 134 134 * by the "channel" argument. The corresponding request is actually registered in the 135 135 * chdev requests queue, and the calling thread is descheduled, blocked until 136 * transfer completion. 137 * It must be called in the client cluster. 136 * transfer completion. It can be called by any thread running in any cluster. 138 137 ****************************************************************************************** 139 138 * @ channel : TXT channel index. … … 148 147 * by the "channel" argument. The corresponding request is actually registered in the 149 148 * chdev requests queue, and the calling thread is descheduled, blocked until 150 * transfer completion. 151 * It must be called in the client cluster. 149 * transfer completion. It can be called by any thread running in any cluster. 152 150 ****************************************************************************************** 153 151 * @ channel : TXT channel index. … … 166 164 * interfering with another possible TXT access to another terminal. 167 165 * As it is used for debug, the command arguments <buffer> and <count> are registerd 168 * in a specific "txt_sy c_args_t" structure passed to the driver "aux" function.166 * in a specific "txt_sync_args_t" structure passed to the driver "aux" function. 169 167 **************************************************************************************** 170 168 * @ buffer : local pointer on source buffer containing the string. -
trunk/kernel/fs/devfs.c
r635 r637 675 675 676 676 // move burst bytes from k_buf to u_buf 677 hal_strcpy_to_uspace( u_buf , k_buf , burst ); 677 hal_strcpy_to_uspace( u_buf, 678 XPTR( local_cxy , k_buf ), 679 burst ); 678 680 679 681 // update loop variables … … 704 706 705 707 // move burst bytes from u_buf to k_buf 706 hal_strcpy_from_uspace( k_buf, u_buf , burst );708 hal_strcpy_from_uspace( XPTR( local_cxy , k_buf ) , u_buf , burst ); 707 709 708 710 // write burst bytes from kernel buffer to TXT device -
trunk/kernel/kern/cluster.c
r635 r637 76 76 77 77 // initialize the cluster_info[][] array 78 for (x = 0; x < CONFIG_MAX_CLUSTERS_X; x++)79 { 80 for (y = 0; y < CONFIG_MAX_CLUSTERS_Y;y++)78 for( x = 0 ; x < CONFIG_MAX_CLUSTERS_X ; x++ ) 79 { 80 for( y = 0; y < CONFIG_MAX_CLUSTERS_Y ; y++ ) 81 81 { 82 82 cluster->cluster_info[x][y] = info->cluster_info[x][y]; … … 95 95 } 96 96 97 // initialize number of cores97 // initialize number of local cores 98 98 cluster->cores_nr = info->cores_nr; 99 99 100 100 } // end cluster_info_init() 101 102 ////////////////////////////////////// 103 void cluster_info_display( cxy_t cxy ) 104 { 105 uint32_t x; 106 uint32_t y; 107 uint32_t ncores; 108 109 cluster_t * cluster = LOCAL_CLUSTER; 110 111 // get x_size & y_size from target cluster 112 uint32_t x_size = hal_remote_l32( XPTR( cxy , &cluster->x_size ) ); 113 uint32_t y_size = hal_remote_l32( XPTR( cxy , &cluster->y_size ) ); 114 115 // get pointers on TXT0 chdev 116 xptr_t txt0_xp = chdev_dir.txt_tx[0]; 117 cxy_t txt0_cxy = GET_CXY( txt0_xp ); 118 chdev_t * txt0_ptr = GET_PTR( txt0_xp ); 119 120 // get extended pointer on remote TXT0 lock 121 xptr_t lock_xp = XPTR( txt0_cxy , &txt0_ptr->wait_lock ); 122 123 // get TXT0 lock 124 remote_busylock_acquire( lock_xp ); 125 126 nolock_printk("\n***** cluster_info in cluster %x / x_size %d / y_size %d\n", 127 cxy, x_size, y_size ); 128 129 for( x = 0 ; x < x_size ; x++ ) 130 { 131 for( y = 0 ; y < y_size ; y++ ) 132 { 133 ncores = (uint32_t)hal_remote_lb( XPTR( cxy , &cluster->cluster_info[x][y] ) ); 134 nolock_printk(" - ncores[%d][%d] = %d\n", x, y, ncores ); 135 } 136 } 137 138 // release TXT0 lock 139 remote_busylock_release( lock_xp ); 140 141 } // end cluster_info_display() 101 142 102 143 ///////////////////////////////////////////////////////// … … 115 156 printk("\n[%s] thread[%x,%x] enters for cluster %x / cycle %d\n", 116 157 __FUNCTION__, this->process->pid, this->trdid, local_cxy , cycle ); 158 #endif 159 160 #if (DEBUG_CLUSTER_INIT & 1) 161 cluster_info_display( local_cxy ); 117 162 #endif 118 163 … … 243 288 } 244 289 245 //////////////////////////////////////// 246 bool_t cluster_is_undefined( cxy_t cxy ) 247 { 248 uint32_t x_size = LOCAL_CLUSTER->x_size; 249 uint32_t y_size = LOCAL_CLUSTER->y_size; 250 251 uint32_t x = HAL_X_FROM_CXY( cxy ); 252 uint32_t y = HAL_Y_FROM_CXY( cxy ); 253 254 if( x >= x_size ) return true; 255 if( y >= y_size ) return true; 256 257 return false; 258 } 259 260 ////////////////////////////////////// 261 bool_t cluster_is_active ( cxy_t cxy ) 290 ///////////////////////////////////////////// 291 inline bool_t cluster_is_active ( cxy_t cxy ) 262 292 { 263 293 uint32_t x = HAL_X_FROM_CXY( cxy ); … … 271 301 //////////////////////////////////////////////////////////////////////////////////// 272 302 273 /////////////////////////////////////// 274 lid_t cluster_select_local_core( void)275 { 276 uint32_t min = 1000 ;303 ///////////////////////////////////////////// 304 lid_t cluster_select_local_core( cxy_t cxy ) 305 { 306 uint32_t min = 1000000; 277 307 lid_t sel = 0; 278 308 uint32_t nthreads; 279 309 lid_t lid; 280 310 scheduler_t * sched; 281 282 cluster_t * cluster = LOCAL_CLUSTER; 283 284 for( lid = 0 ; lid < cluster->cores_nr ; lid++ ) 285 { 286 sched = &cluster->core_tbl[lid].scheduler; 287 nthreads = sched->u_threads_nr + sched->k_threads_nr; 311 cluster_t * cluster = LOCAL_CLUSTER; 312 uint32_t ncores = hal_remote_l32( XPTR( cxy , &cluster->cores_nr ) ); 313 314 for( lid = 0 ; lid < ncores ; lid++ ) 315 { 316 sched = &cluster->core_tbl[lid].scheduler; 317 318 nthreads = hal_remote_l32( XPTR( cxy , &sched->u_threads_nr ) ) + 319 hal_remote_l32( XPTR( cxy , &sched->k_threads_nr ) ); 288 320 289 321 if( nthreads < min ) … … 700 732 uint32_t pref_nr; // number of owned processes in cluster cxy 701 733 702 assert( (cluster_is_ undefined( cxy ) == false), "illegal cluster index" );734 assert( (cluster_is_active( cxy ) ), "illegal cluster index" ); 703 735 704 736 // get extended pointer on root and lock for local process list in cluster -
trunk/kernel/kern/cluster.h
r635 r637 4 4 * authors Ghassan Almaless (2008,2009,2010,2011,2012) 5 5 * Mohamed Lamine Karaoui (2015) 6 * Alain Greiner (2016,2017,2018 )6 * Alain Greiner (2016,2017,2018,2019) 7 7 * 8 8 * Copyright (c) UPMC Sorbonne Universites … … 112 112 uint32_t nb_fbf_channels; /*! number of FBF channels */ 113 113 114 char cluster_info[CONFIG_MAX_CLUSTERS_X][CONFIG_MAX_CLUSTERS_Y]; 114 // number of cores for each cluster in the mesh 115 uint8_t cluster_info[CONFIG_MAX_CLUSTERS_X][CONFIG_MAX_CLUSTERS_Y]; 115 116 116 117 // local parameters … … 162 163 * in the local boot-info structure <info> build by the boot-loader. 163 164 * 1) the cluster_info_init() function is called first, to initialize the structural 164 * constants, and cannot use the TXT0 kernel terminal. 165 * 2) the cluster_manager_init() function initialize various complex structures: 165 * constants, including the cluster_info[x][y] array. 166 * It cannot use the TXT0 kernel terminal. 167 * 2) the cluster_manager_init() function initializes various complex structures: 166 168 * - the local DQDT nodes, 167 169 * - the PPM, KHM, and KCM allocators, … … 169 171 * - the local RPC FIFO, 170 172 * - the process manager. 171 * It does NOT initialise the local device descriptors.172 173 * It can use the TXT0 kernel terminal. 173 174 ****************************************************************************************** … … 178 179 179 180 /****************************************************************************************** 180 * This function checks the validity of a cluster identifier. 181 ****************************************************************************************** 182 * @ cxy : cluster identifier to be checked. 183 * @ returns true if the identified cluster does not exist. 184 *****************************************************************************************/ 185 bool_t cluster_is_undefined( cxy_t cxy ); 186 187 /****************************************************************************************** 188 * This function uses the local cluster_info[][] array in cluster descriptor, 189 * and returns true when the cluster identified by the <cxy> argument is active. 190 ****************************************************************************************** 191 * @ cxy : cluster identifier. 181 * This debug function displays the current values stored in the cluster_info[][] array 182 * of a remote cluster identified by the <cxy> argument. 183 * It can be called by a thread running in any cluster. 184 ****************************************************************************************** 185 * @ cxy : remote cluster identifier. 186 *****************************************************************************************/ 187 void cluster_info_display( cxy_t cxy ); 188 189 /****************************************************************************************** 190 * This function access the local cluster_info[][] array and returns true when the 191 * cluster identified by the <cxy> argument is active (contains a kernel instance). 192 ****************************************************************************************** 193 * @ cxy : checked cluster identifier. 192 194 * @ return true if cluster contains a kernel instance. 193 195 *****************************************************************************************/ … … 300 302 * This function displays on the kernel terminal TXT0 all user processes registered 301 303 * in the cluster defined by the <cxy> argument. 302 * It can be called by a thread running in any cluster, because is use remote accesses 303 * to scan the xlist of registered processes. 304 * It can be called by a thread running in any cluster. 304 305 ****************************************************************************************** 305 306 * @ cxy : cluster identifier. … … 310 311 311 312 /****************************************************************************************** 312 * This function uses the local boot_inforeturns the core local index that has the lowest usage in local cluster. 313 *****************************************************************************************/ 314 lid_t cluster_select_local_core( void ); 313 * This function selects the core that has the lowest usage in a - possibly remote - 314 * cluster identified by the <cxy> argument. 315 * It can be called by a thread running in any cluster. 316 ****************************************************************************************** 317 * @ cxy : target cluster identifier. 318 * @ return the selected core local index. 319 *****************************************************************************************/ 320 lid_t cluster_select_local_core( cxy_t cxy ); 315 321 316 322 -
trunk/kernel/kern/do_syscall.c
r626 r637 95 95 96 96 sys_get_config, // 40 97 sys_get_core ,// 4197 sys_get_core_id, // 41 98 98 sys_get_cycle, // 42 99 99 sys_display, // 43 … … 108 108 sys_sync, // 51 109 109 sys_fsync, // 52 110 sys_get_best_core, // 53 111 sys_get_nb_cores, // 54 110 112 }; 111 113 … … 160 162 161 163 case SYS_GET_CONFIG: return "GET_CONFIG"; // 40 162 case SYS_GET_CORE : return "GET_CORE";// 41164 case SYS_GET_CORE_ID: return "GET_CORE_ID"; // 41 163 165 case SYS_GET_CYCLE: return "GET_CYCLE"; // 42 164 166 case SYS_DISPLAY: return "DISPLAY"; // 43 … … 172 174 case SYS_EXIT: return "EXIT"; // 50 173 175 case SYS_SYNC: return "SYNC"; // 51 174 case SYS_FSYNC: return "FSYNc"; // 52 176 case SYS_FSYNC: return "FSYNC"; // 52 177 case SYS_GET_BEST_CORE: return "GET_BEST_CORE"; // 53 178 case SYS_GET_NB_CORES: return "GET_NB_CORES"; // 54 175 179 176 180 default: return "undefined"; -
trunk/kernel/kern/dqdt.c
r632 r637 2 2 * dqdt.c - Distributed Quaternary Decision Tree implementation. 3 3 * 4 * Author : Alain Greiner (2016,2017,2018 )4 * Author : Alain Greiner (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) UPMC Sorbonne Universites … … 55 55 56 56 // display node content 57 nolock_printk("- level %d / cluster %x : threads = %x / pages = %x / clusters %d / cores %d\n", 58 node.level, GET_CXY( node_xp ), node.threads, node.pages, node.clusters, node.cores ); 57 nolock_printk("- [%d,%x] : threads %x / pages %x / clusters %d / cores %d / parent_cxy %x\n", 58 node.level, GET_CXY( node_xp ), 59 node.threads, node.pages, 60 node.clusters, node.cores, 61 GET_CXY( node.parent ) ); 59 62 60 63 // recursive call on children if node is not terminal … … 116 119 xptr_t parent_xp ) 117 120 { 118 assert( (level < 5) , __FUNCTION__, "illegal DQDT level %d\n", level );121 assert( (level <= 5) , __FUNCTION__, "illegal DQDT level %d\n", level ); 119 122 120 123 uint32_t node_x; // node X coordinate … … 147 150 148 151 #if DEBUG_DQDT_INIT 149 printk("\n[ DBG] %s: cxy(%d,%d) / level %d / mask %x / half %d / ptr %x\n",152 printk("\n[%s] thread[%x,%x] : cxy(%d,%d) / level %d / mask %x / half %d / ptr %x\n", 150 153 __FUNCTION__, node_x, node_y, level, mask, half, node_ptr ); 151 154 #endif … … 336 339 void dqdt_init( void ) 337 340 { 338 // get x_size & y_size from cluster manager339 cluster_t * cluster = &cluster_manager;341 // get x_size & y_size 342 cluster_t * cluster = LOCAL_CLUSTER; 340 343 uint32_t x_size = cluster->x_size; 341 344 uint32_t y_size = cluster->y_size; … … 349 352 uint32_t level_max = bits_log2( size_ext ); 350 353 351 // each CP0register the DQDT root in local cluster manager354 // all CP0s register the DQDT root in local cluster manager 352 355 cluster->dqdt_root_xp = XPTR( 0 , &cluster->dqdt_tbl[level_max] ); 353 356 357 // only CP0 in cluster 0 build the DQDT 358 if( local_cxy == 0 ) 359 { 360 354 361 #if DEBUG_DQDT_INIT 355 if( local_cxy == 0 ) 356 printk("\n[ DBG] %s : x_size = %d / y_size = %d / level_max = %d\n",357 __FUNCTION__, x_size, y_size, level_max );362 thread_t * this = CURRENT_THREAD; 363 printk("\n[%s] thread[%x,%x] enters : x_size = %d / y_size = %d / level_max = %d\n", 364 __FUNCTION__, this->process->pid, this->trdid, x_size, y_size, level_max ); 358 365 #endif 359 366 … … 362 369 363 370 #if DEBUG_DQDT_INIT 364 if( local_cxy == 0 ) dqdt_display(); 365 #endif 366 371 dqdt_display(); 372 #endif 373 374 } 367 375 } // end dqdt_init() 368 376 … … 516 524 } 517 525 526 /////////////////////////////////// 527 xptr_t dqdt_get_root( cxy_t cxy, 528 uint32_t level ) 529 { 530 xptr_t node_xp; 531 cxy_t node_cxy; 532 dqdt_node_t * node_ptr; 533 uint32_t current_level; 534 535 assert( (level <= 5) , __FUNCTION__, "illegal DQDT level %d\n", level ); 536 537 #if DEBUG_DQDT_GET_ROOT 538 thread_t * this = CURRENT_THREAD; 539 printk("\n[%s] thread[%x,%x] enters / cxy %x / level %d\n", 540 __FUNCTION__, this->process->pid, this->trdid, cxy, level ); 541 #endif 542 543 // check macro-cluster 544 if( cluster_is_active( cxy ) ) 545 { 546 // initialise node_xp and current_level 547 node_xp = XPTR( cxy , &LOCAL_CLUSTER->dqdt_tbl[0] ); 548 current_level = 0; 549 550 // traverse the quad-tree from bottom to root 551 while( current_level < level ) 552 { 553 node_cxy = GET_CXY( node_xp ); 554 node_ptr = GET_PTR( node_xp ); 555 556 node_xp = hal_remote_l64( XPTR( node_cxy , &node_ptr->parent ) ); 557 current_level++; 558 } 559 } 560 else 561 { 562 node_xp = XPTR_NULL; 563 } 564 565 #if DEBUG_DQDT_GET_ROOT 566 printk("\n[%s] thread[%x,%x] exit / root_xp[%x,%x]\n", 567 __FUNCTION__, this->process->pid, this->trdid, GET_CXY( node_xp ), GET_PTR( node_xp ) ); 568 #endif 569 570 return node_xp; 571 572 } 518 573 519 574 ///////////////////////////////////////////////////////////////////////////////////// … … 584 639 585 640 586 ////////////////////////////////////////// 587 cxy_t dqdt_get_cluster_for_ process( void)641 /////////////////////////////////////////////////// 642 cxy_t dqdt_get_cluster_for_thread( xptr_t root_xp ) 588 643 { 589 644 // call recursive function 590 cxy_t cxy = dqdt_select_cluster( LOCAL_CLUSTER->dqdt_root_xp , false );591 592 #if DEBUG_DQDT_SELECT_FOR_ PROCESS645 cxy_t cxy = dqdt_select_cluster( root_xp , false ); 646 647 #if DEBUG_DQDT_SELECT_FOR_THREAD 593 648 uint32_t cycle = hal_get_cycles(); 594 649 if( cycle > DEBUG_DQDT_SELECT_FOR_PROCESS ) … … 600 655 } 601 656 602 ///////////////////////////////////////// 603 cxy_t dqdt_get_cluster_for_memory( void)657 /////////////////////////////////////////////////// 658 cxy_t dqdt_get_cluster_for_memory( xptr_t root_xp ) 604 659 { 605 660 // call recursive function 606 cxy_t cxy = dqdt_select_cluster( LOCAL_CLUSTER->dqdt_root_xp , true );661 cxy_t cxy = dqdt_select_cluster( root_xp , true ); 607 662 608 663 #if DEBUG_DQDT_SELECT_FOR_MEMORY -
trunk/kernel/kern/dqdt.h
r632 r637 2 2 * kern/dqdt.h - Distributed Quad Decision Tree 3 3 * 4 * Author : Alain Greiner (2016,2017,2018 )4 * Author : Alain Greiner (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) UPMC Sorbonne Universites … … 31 31 /**************************************************************************************** 32 32 * This DQDT infrastructure maintains a topological description of ressources usage 33 * in each cluster: number of threads , and number of physical pages allocated.33 * in each cluster: number of threads per core, and number of physical pages allocated. 34 34 * 35 * - If X_SIZE or Y_SIZE are equal to 1, it makes the assumption that the cluster 36 * topology is a one dimensionnal vector, an build the smallest one-dimensionnal 37 * quad-tree covering this one-dimensionnal vector. If the number of clusters 38 * is not a power of 4, the tree is truncated as required. 39 * 40 * TODO : the mapping for the one dimensionnal topology is not implemented yet [AG]. 41 * 42 * - If both Y_SIZE and Y_SIZE are larger than 1, it makes the assumption that 43 * the clusters topology is a 2D mesh. The [X,Y] coordinates of a cluster are 44 * obtained from the CXY identifier using the Rrelevant macros. 45 * X = CXY >> Y_WIDTH / Y = CXY & ((1<<Y_WIDTH)-1) 46 * - If the mesh X_SIZE and Y_SIZE dimensions are not equal, or are not power of 2, 47 * or the mesh contains "holes" reported in the cluster_info[x][y] array, 48 * we build the smallest two dimensionnal quad-tree covering all clusters, 49 * and this tree is truncated as required. 50 * - The mesh size is supposed to contain at most 32 * 32 clusters. 51 * Therefore, it can exist at most 6 DQDT nodes in a given cluster: 52 * . Level 0 nodes exist on all clusters and have no children. 53 * . Level 1 nodes exist when both X and Y coordinates are multiple of 2 54 * . Level 2 nodes exist when both X and Y coordinates are multiple of 4 55 * . Level 3 nodes exist when both X and Y coordinates are multiple of 8 56 * . Level 4 nodes exist when both X and Y coordinates are multiple of 16 57 * . Level 5 nodes exist when both X and Y coordinates are multiple of 32 58 * - For nodes other than level 0, the placement is defined as follow: 59 * . The root node is placed in the cluster containing the core executing 60 * the dqdt_init() function. 61 * . An intermediate node (representing a given sub-tree) is placed in one 62 * cluster covered by the subtree, pseudo-randomly selected. 35 * It is organized as a quad-tree, where the leaf cells are the clusters, organised 36 * as a 2D mesh. Each node in the quad-tree (including the root and the leaf cells, 37 * covers a "macro-cluster", that is a square array of clusters where the number 38 * in the macro-cluster is a power of 4, and the macro-cluster side is a power of two. 39 * Each node contains informations on ressources usage (physical memory and cores) 40 * in the covered macro-cluster. 41 * This quad-tree can be truncated, if the physical mesh X_SIZE and Y_SIZE dimensions 42 * are not equal, or are not power of 2, or if the physical mesh contains "holes". 43 * The mesh size is supposed to contain at most 32*32 clusters in this implementation. 44 * . Level 0 nodes exist in all clusters and have no children. 45 * . Level 1 nodes can be placed in any cluster of the covered 2*2 macro-cluster. 46 * . Level 2 nodes can be placed in any cluster of the covered 4*4 macro-cluster. 47 * . Level 3 nodes can be placed in any cluster of the covered 8*8 macro-cluster. 48 * . Level 4 nodes can be placed in any cluster of the covered 16*16 macro-cluster. 49 * . Level 5 nodes can be placed in any cluster of the covered 32*32 macro-cluster. 50 * The root node is placed in the cluster containing the core executing the dqdt_init() 51 * function. Other (non level 0) nodes are placed pseudo-randomly. 63 52 ***************************************************************************************/ 64 53 … … 66 55 * This structure describes a node of the DQDT. 67 56 * The max number of children is 4, but it can be smaller for some nodes. 68 * Level 0 nodes are the clusters, and have no children. 69 * The root node has no parent. 57 * Level 0 nodes have no children. The root node has no parent. 70 58 ***************************************************************************************/ 71 59 … … 74 62 uint32_t level; /*! node level */ 75 63 uint32_t arity; /*! actual children number in this node */ 76 uint32_t threads; /*! current number of threads in macro-cluster*/77 uint32_t pages; /*! current number of pages in macro-cluster*/64 uint32_t threads; /*! number of threads in macro-cluster */ 65 uint32_t pages; /*! number of allocated pages in macro-cluster */ 78 66 uint32_t cores; /*! number of active cores in macro cluster */ 79 uint32_t clusters; /*! number of active cluster in macro cluster*/67 uint32_t clusters; /*! number of active clusters in macro cluster */ 80 68 xptr_t parent; /*! extended pointer on parent node */ 81 69 xptr_t children[2][2]; /*! extended pointers on children nodes */ … … 87 75 * This function recursively initializes the DQDT structure from informations 88 76 * stored in cluster manager (x_size, y_size and cluster_info[x][y]. 89 * It is executed in all clusters by the local CP0, to compute level_max and register77 * It is called in all clusters by the local CP0, to compute level_max and register 90 78 * the DQDT root node in each cluster manager, but only CPO in cluster 0 build actually 91 79 * the quad-tree covering all active clusters. … … 102 90 ***************************************************************************************/ 103 91 void dqdt_increment_threads( void ); 92 104 93 void dqdt_decrement_threads( void ); 105 94 … … 121 110 122 111 /**************************************************************************************** 123 * This function can be called in any cluster. It traverses the DQDT tree 124 * from the root to the bottom, to analyse the computing load and select the cluster 125 * with the lowest number ot threads to place a new process. 112 * This function returns an extended pointer on the dqdt node that is the root of 113 * the sub-tree covering the macro-cluster defined by the <level> argument and 114 * containing the cluster defined by the <cxy> argument. It returns XPTR_NULL if 115 * this macro-cluster is undefined (when the cxy cluster contains no core). 126 116 **************************************************************************************** 117 * @ cxy : cluster identifier. 118 * @ level : level of the sub-tree. 119 * @ returns root_xp if success / return XPTR_NULL if no active core in macro_cluster. 120 ***************************************************************************************/ 121 xptr_t dqdt_get_root( cxy_t cxy, 122 uint32_t level ); 123 124 /**************************************************************************************** 125 * This function can be called in any cluster. It traverses the DQDT tree from the 126 * local root of a macro-cluster, defined by the <root_xp> argument, to the bottom. 127 * It analyses the computing load & select the cluster containing the lowest number 128 * ot threads. 129 **************************************************************************************** 130 * @ root_xp : extended pointer on DQDT node root. 127 131 * @ returns the cluster identifier with the lowest computing load. 128 132 ***************************************************************************************/ 129 cxy_t dqdt_get_cluster_for_ process( void);133 cxy_t dqdt_get_cluster_for_thread( xptr_t root_xp ); 130 134 131 135 /**************************************************************************************** 132 * This function can be called in any cluster. It traverses the DQDT tree 133 * from the root to the bottom, to analyse the memory load and select the cluster 134 * with the lowest memory load for dynamic memory allocation with no locality constraint. 136 * This function can be called in any cluster. It traverses the DQDT tree from the 137 * local root of a macro-cluster, defined by the <root_xp> argument, to the bottom. 138 * It analyses the memory load & select the cluster with the lowest number of allocated 139 * physical pages. 135 140 **************************************************************************************** 141 * @ root_xp : extended pointer on DQDT node root. 136 142 * @ returns the cluster identifier with the lowest memory load. 137 143 ***************************************************************************************/ 138 cxy_t dqdt_get_cluster_for_memory( void);144 cxy_t dqdt_get_cluster_for_memory( xptr_t root_xp ); 139 145 140 146 /**************************************************************************************** 141 147 * This function displays on kernel TXT0 the DQDT state for all nodes in the quad-tree. 142 * It traverses the quadtree from root to bottom, and can be called by a thread143 * running in any cluster148 * It traverses the quadtree from the global root to bottom. 149 * It can be called by a thread running in any cluster 144 150 ***************************************************************************************/ 145 151 void dqdt_display( void ); -
trunk/kernel/kern/kernel_init.c
r635 r637 1008 1008 1009 1009 ///////////////////////////////////////////////////////////////////////////////// 1010 // STEP 2 : core[0] initializes the clu ter manager,1011 // including the physical memory allocator .1010 // STEP 2 : core[0] initializes the cluster manager, 1011 // including the physical memory allocators. 1012 1012 ///////////////////////////////////////////////////////////////////////////////// 1013 1013 … … 1102 1102 1103 1103 //////////////////////////////////////////////////////////////////////////////// 1104 // STEP 5 : core[0] initialize sthe distibuted LAPIC descriptor.1105 // core[0] initialize sthe internal chdev descriptors1104 // STEP 5 : core[0] initialize the distibuted LAPIC descriptor. 1105 // core[0] initialize the internal chdev descriptors 1106 1106 // core[0] initialize the local external chdev descriptors 1107 1107 //////////////////////////////////////////////////////////////////////////////// -
trunk/kernel/kern/process.c
r635 r637 1909 1909 1910 1910 // select a core in local cluster to execute the main thread 1911 lid = cluster_select_local_core( );1911 lid = cluster_select_local_core( local_cxy ); 1912 1912 1913 1913 // initialize pthread attributes for main thread -
trunk/kernel/kern/rpc.c
r635 r637 1053 1053 1054 1054 // select one core 1055 core_lid = cluster_select_local_core( );1055 core_lid = cluster_select_local_core( local_cxy ); 1056 1056 1057 1057 // call local kernel function -
trunk/kernel/kern/scheduler.h
r564 r637 41 41 { 42 42 busylock_t lock; /*! lock protecting scheduler state */ 43 uint 16_t u_threads_nr; /*! total number of attached user threads */44 uint 16_t k_threads_nr; /*! total number of attached kernel threads */43 uint32_t u_threads_nr; /*! total number of attached user threads */ 44 uint32_t k_threads_nr; /*! total number of attached kernel threads */ 45 45 list_entry_t u_root; /*! root of list of user threads */ 46 46 list_entry_t k_root; /*! root of list of kernel threads */ -
trunk/kernel/kern/thread.c
r635 r637 247 247 else 248 248 { 249 core_lid = cluster_select_local_core( );249 core_lid = cluster_select_local_core( local_cxy ); 250 250 } 251 251 … … 375 375 printk("\n[%s] CPU & FPU contexts created\n", 376 376 __FUNCTION__, thread->trdid ); 377 hal_vmm_display( process, true );377 hal_vmm_display( XPTR( local_cxy , process ) , true ); 378 378 #endif 379 379 … … 418 418 419 419 // select a target core in local cluster 420 core_lid = cluster_select_local_core( );420 core_lid = cluster_select_local_core( local_cxy ); 421 421 422 422 #if (DEBUG_THREAD_USER_FORK & 1) … … 724 724 printk("\n[%s] thread[%x,%x] set CPU context & jump to user code / cycle %d\n", 725 725 __FUNCTION__, process->pid, thread->trdid, cycle ); 726 hal_vmm_display( process, true );726 hal_vmm_display( XPTR( local_cxy , process ) , true ); 727 727 #endif 728 728 … … 1332 1332 // check trdid argument 1333 1333 if( (target_thread_ltid >= CONFIG_THREADS_MAX_PER_CLUSTER) || 1334 cluster_is_ undefined( target_cxy ) )return XPTR_NULL;1334 cluster_is_active( target_cxy ) == false ) return XPTR_NULL; 1335 1335 1336 1336 // get root of list of process descriptors in target cluster -
trunk/kernel/kernel_config.h
r635 r637 68 68 #define DEBUG_ELF_LOAD 0 69 69 70 #define DEBUG_DQDT_GET_ROOT 0 70 71 #define DEBUG_DQDT_INIT 0 72 #define DEBUG_DQDT_SELECT_FOR_THREAD 0 73 #define DEBUG_DQDT_SELECT_FOR_MEMORY 0 74 #define DEBUG_DQDT_UPDATE_PAGES 0 71 75 #define DEBUG_DQDT_UPDATE_THREADS 0 72 #define DEBUG_DQDT_SELECT_FOR_PROCESS 073 #define DEBUG_DQDT_UPDATE_PAGES 074 #define DEBUG_DQDT_SELECT_FOR_MEMORY 075 76 76 77 #define DEBUG_FATFS_ADD_DENTRY 0 … … 170 171 #define DEBUG_RWLOCK_CXY 0 171 172 172 #define DEBUG_SCHED_HANDLE_SIGNALS 2173 #define DEBUG_SCHED_HANDLE_SIGNALS 0 173 174 #define DEBUG_SCHED_YIELD 0 174 175 #define DEBUG_SCHED_RPC_ACTIVATE 0 … … 176 177 #define DEBUG_SEM 0 177 178 178 #define DEBUG_SYSCALLS_ERROR 2179 #define DEBUG_SYSCALLS_ERROR 2 179 180 180 181 #define DEBUG_SYS_BARRIER 0 … … 190 191 #define DEBUG_SYS_GETCWD 0 191 192 #define DEBUG_SYS_GETPID 0 193 #define DEBUG_SYS_GET_BEST_CORE 0 194 #define DEBUG_SYS_GET_CORE_ID 0 195 #define DEBUG_SYS_GET_NB_CORES 0 192 196 #define DEBUG_SYS_ISATTY 0 193 197 #define DEBUG_SYS_IS_FG 0 … … 456 460 457 461 #define CONFIG_INSTRUMENTATION_SYSCALLS 0 458 #define CONFIG_INSTRUMENTATION_PGFAULTS 1459 #define CONFIG_INSTRUMENTATION_FOOTPRINT 1462 #define CONFIG_INSTRUMENTATION_PGFAULTS 0 463 #define CONFIG_INSTRUMENTATION_FOOTPRINT 0 460 464 461 465 -
trunk/kernel/mm/mapper.c
r635 r637 442 442 if ( page_xp == XPTR_NULL ) return -1; 443 443 444 // compute cluster and pointers on page in mapper 445 xptr_t map_xp = ppm_page2base( page_xp ); 446 uint8_t * map_ptr = GET_PTR( map_xp ); 447 cxy_t map_cxy = GET_CXY( map_xp ); 444 // compute extended pointer in kernel mapper 445 xptr_t map_xp = ppm_page2base( page_xp ) + page_offset; 448 446 449 447 #if (DEBUG_MAPPER_MOVE_USER & 1) … … 458 456 if( to_buffer ) 459 457 { 460 hal_copy_to_uspace( map_cxy , map_ptr + page_offset , buf_ptr, page_bytes );458 hal_copy_to_uspace( buf_ptr , map_xp , page_bytes ); 461 459 462 460 #if DEBUG_MAPPER_MOVE_USER & 1 … … 464 462 printk("\n[%s] thread[%x,%x] moved %d bytes / mapper %s (%x,%x) -> user buffer(%x,%x)\n", 465 463 __FUNCTION__, this->process->pid, this->trdid, page_bytes, 466 name, map_cxy, map_ptr + page_offset, local_cxy, buf_ptr );464 name, GET_CXY(map_xp), GET_PTR(map_xp), local_cxy, buf_ptr ); 467 465 #endif 468 466 … … 471 469 { 472 470 ppm_page_do_dirty( page_xp ); 473 hal_copy_from_uspace( map_ cxy , map_ptr + page_offset, buf_ptr , page_bytes );471 hal_copy_from_uspace( map_xp , buf_ptr , page_bytes ); 474 472 475 473 #if DEBUG_MAPPER_MOVE_USER & 1 … … 477 475 printk("\n[%s] thread[%x,%x] moved %d bytes / user buffer(%x,%x) -> mapper %s (%x,%x)\n", 478 476 __FUNCTION__, this->process->pid, this->trdid, page_bytes, 479 local_cxy, buf_ptr, name, map_cxy, map_ptr + page_offset);477 local_cxy, buf_ptr, name, GET_CXY(map_xp), GET_PTR(map_xp) ); 480 478 mapper_display_page( mapper_xp , page_id, 128 ); 481 479 #endif -
trunk/kernel/mm/ppm.c
r636 r637 533 533 page_xp = XPTR( page_cxy , page_ptr ); 534 534 535 536 535 // get local pointer on PPM (same in all clusters) 537 536 ppm_t * ppm = &LOCAL_CLUSTER->ppm; … … 568 567 buddy_index = current_index ^ (1 << current_order); 569 568 buddy_ptr = pages_tbl + buddy_index; 569 570 // get buddy order 571 buddy_order = hal_remote_l32( XPTR( page_cxy , &buddy_ptr->order ) ); 570 572 571 573 // exit loop if buddy not found -
trunk/kernel/syscalls/shared_include/shared_mman.h
r623 r637 51 51 typedef struct mmap_attr_s 52 52 { 53 void * addr; /*! requested virtual address (unused : should be NULL)*/53 void * addr; /*! buffer for allocated vseg base address (return value) */ 54 54 unsigned int length; /*! requested vseg size (bytes) */ 55 55 unsigned int prot; /*! access modes */ -
trunk/kernel/syscalls/shared_include/syscalls_numbers.h
r626 r637 29 29 * It must be kept consistent with the array defined in do_syscalls.c 30 30 *****************************************************************************************/ 31 typedef enum { 31 typedef enum 32 { 32 33 SYS_THREAD_EXIT = 0, 33 34 SYS_THREAD_YIELD = 1, … … 75 76 76 77 SYS_GET_CONFIG = 40, 77 SYS_GET_CORE 78 SYS_GET_CORE_ID = 41, 78 79 SYS_GET_CYCLE = 42, 79 80 SYS_DISPLAY = 43, … … 88 89 SYS_SYNC = 51, 89 90 SYS_FSYNC = 52, 91 SYS_GET_BEST_CORE = 53, 92 SYS_GET_NB_CORES = 54, 90 93 91 SYSCALLS_NR = 5 3,94 SYSCALLS_NR = 55, 92 95 93 96 } syscalls_t; -
trunk/kernel/syscalls/sys_barrier.c
r635 r637 33 33 #include <remote_barrier.h> 34 34 35 #if DEBUG_SYS_BARRIER36 35 ////////////////////////////////////////////////////// 37 36 static char * sys_barrier_op_str( uint32_t operation ) … … 42 41 else return "undefined"; 43 42 } 44 #endif45 43 46 44 ////////////////////////////////// … … 74 72 75 73 #if DEBUG_SYSCALLS_ERROR 76 printk("\n[ERROR] in %s : unmapped barrier %x / thread %x / process %x\n",77 __FUNCTION__ , vaddr , this->trdid , process->pid );74 printk("\n[ERROR] in %s for %s : unmapped barrier %x / thread[%x,%x]\n", 75 __FUNCTION__, sys_barrier_op_str(operation), vaddr, process->pid, this->trdid ); 78 76 #endif 79 77 this->errno = error; … … 94 92 95 93 #if DEBUG_SYSCALLS_ERROR 96 printk("\n[ERROR] in %s : unmapped barrier attributes %x / thread %x / process %x\n",97 __FUNCTION__ , attr , this->trdid , process->pid );94 printk("\n[ERROR] in %s for INIT : unmapped barrier attributes %x / thread[%x,%x]\n", 95 __FUNCTION__ , attr , process->pid , this->trdid ); 98 96 #endif 99 97 this->errno = EINVAL; … … 102 100 103 101 // copy barrier attributes into kernel space 104 hal_copy_from_uspace( local_cxy, 105 &k_attr, 106 (void*)attr, 102 hal_copy_from_uspace( XPTR( local_cxy , &k_attr ), 103 (void *)attr, 107 104 sizeof(pthread_barrierattr_t) ); 108 105 … … 111 108 112 109 #if DEBUG_SYSCALLS_ERROR 113 printk("\n[ERROR] in %s : wrong arguments / count %d / x_size %d / y_size %d / nthreads %x\n",110 printk("\n[ERROR] in %s for INIT : count (%d) != x_size (%d) * y_size (%d) * nthreads (%x)\n", 114 111 __FUNCTION__, count, k_attr.x_size, k_attr.y_size, k_attr.nthreads ); 115 112 #endif … … 131 128 132 129 #if DEBUG_SYSCALLS_ERROR 133 printk("\n[ERROR] in %s : cannot create barrier %x / thread %x / process %x\n",134 __FUNCTION__ , vaddr , this->trdid , process->pid );130 printk("\n[ERROR] in %s for INIT : cannot create barrier %x / thread[%x,%x]\n", 131 __FUNCTION__ , vaddr , process->pid , this->trdid ); 135 132 #endif 136 133 this->errno = ENOMEM; … … 148 145 149 146 #if DEBUG_SYSCALLS_ERROR 150 printk("\n[ERROR] in %s : barrier %x not registered / thread %x / process %x\n",151 __FUNCTION__ , (intptr_t)vaddr , this->trdid , process->pid );147 printk("\n[ERROR] in %s for WAIT : barrier %x not registered / thread[%x,%x]\n", 148 __FUNCTION__ , (intptr_t)vaddr , process->pid, this->trdid ); 152 149 #endif 153 150 this->errno = EINVAL; … … 169 166 170 167 #if DEBUG_SYSCALLS_ERROR 171 printk("\n[ERROR] in %s : barrier %x not registered / thread %x / process %x\n",172 __FUNCTION__ , (intptr_t)vaddr , this->trdid , process->pid );168 printk("\n[ERROR] in %s for DESTROY : barrier %x not registered / thread[%x,%x]\n", 169 __FUNCTION__ , (intptr_t)vaddr , process->pid, this->trdid ); 173 170 #endif 174 171 this->errno = EINVAL; -
trunk/kernel/syscalls/sys_chdir.c
r610 r637 2 2 * sys_chdir.c - kernel function implementing the "chdir" syscall. 3 3 * 4 * Author Alain Greiner (2016,2017,2018 )4 * Author Alain Greiner (2016,2017,2018, 2019) 5 5 * 6 6 * Copyright (c) UPMC Sorbonne Universites … … 75 75 76 76 // copy pathname in kernel space 77 hal_strcpy_from_uspace( kbuf , pathname , CONFIG_VFS_MAX_PATH_LENGTH ); 77 hal_strcpy_from_uspace( XPTR( local_cxy , kbuf ), 78 pathname, 79 CONFIG_VFS_MAX_PATH_LENGTH ); 78 80 79 81 #if DEBUG_SYS_CHDIR -
trunk/kernel/syscalls/sys_chmod.c
r566 r637 2 2 * sys_chmod.c - Change file access rights. 3 3 * 4 * Author Alain Greiner (2016,2017 )4 * Author Alain Greiner (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) 2015 UPMC Sorbonne Universites … … 47 47 48 48 #if DEBUG_SYSCALLS_ERROR 49 50 49 printk("\n[ERROR] in %s : pathname too long / thread %x in process %x\n", 50 __FUNCTION__, this->trdid, process->pid ); 51 51 #endif 52 52 this->errno = ENFILE; … … 55 55 56 56 // copy pathname in kernel space 57 hal_strcpy_from_uspace( kbuf , pathname , CONFIG_VFS_MAX_PATH_LENGTH ); 57 hal_strcpy_from_uspace( XPTR( local_cxy , kbuf ), 58 pathname, 59 CONFIG_VFS_MAX_PATH_LENGTH ); 58 60 59 61 printk("\n[ERROR] in %s : not implemented yet\n", __FUNCTION__ ); -
trunk/kernel/syscalls/sys_display.c
r635 r637 122 122 123 123 // copy string to kernel space 124 hal_strcpy_from_uspace( kbuf , string , 512 ); 124 hal_strcpy_from_uspace( XPTR( local_cxy , kbuf ), 125 string, 126 512 ); 125 127 126 128 // print message on TXT0 kernel terminal … … 136 138 137 139 // check cxy argument 138 if( cluster_is_ undefined( cxy ))140 if( cluster_is_active( cxy ) == false ) 139 141 { 140 142 … … 172 174 173 175 // check cxy argument 174 if( cluster_is_ undefined( cxy ))176 if( cluster_is_active( cxy ) == false ) 175 177 { 176 178 … … 213 215 214 216 // check cxy argument 215 if( cluster_is_ undefined( cxy ))217 if( cluster_is_active( cxy ) == false ) 216 218 { 217 219 … … 323 325 324 326 // copy pathname in kernel space 325 hal_strcpy_from_uspace( kbuf , path , CONFIG_VFS_MAX_PATH_LENGTH ); 327 hal_strcpy_from_uspace( XPTR( local_cxy , kbuf ), 328 path, 329 CONFIG_VFS_MAX_PATH_LENGTH ); 326 330 327 331 // compute root inode for pathname … … 447 451 uint32_t cxy = (uint32_t)arg0; 448 452 449 if( cluster_is_ undefined( cxy ))453 if( cluster_is_active( cxy ) == false ) 450 454 { 451 455 -
trunk/kernel/syscalls/sys_exec.c
r635 r637 89 89 90 90 // copy the array of pointers to kernel buffer 91 hal_copy_from_uspace( local_cxy, 92 k_pointers, 91 hal_copy_from_uspace( XPTR( local_cxy , k_pointers ), 93 92 u_pointers, 94 93 CONFIG_PPM_PAGE_SIZE ); … … 109 108 110 109 // copy the user string to kernel buffer 111 hal_copy_from_uspace( local_cxy, 112 k_buf_ptr, 110 hal_copy_from_uspace( XPTR( local_cxy , k_buf_ptr ), 113 111 k_pointers[index], 114 112 length ); … … 199 197 200 198 // copy pathname in exec_info structure (kernel space) 201 hal_strcpy_from_uspace( exec_info.path , pathname , CONFIG_VFS_MAX_PATH_LENGTH ); 199 hal_strcpy_from_uspace( XPTR( local_cxy , exec_info.path ), 200 pathname, 201 CONFIG_VFS_MAX_PATH_LENGTH ); 202 202 203 203 #if DEBUG_SYS_EXEC -
trunk/kernel/syscalls/sys_fork.c
r635 r637 105 105 else // DQDT placement 106 106 { 107 child_cxy = dqdt_get_cluster_for_ process();107 child_cxy = dqdt_get_cluster_for_thread( LOCAL_CLUSTER->dqdt_root_xp ); 108 108 } 109 109 -
trunk/kernel/syscalls/sys_get_config.c
r635 r637 108 108 109 109 // copy to user space 110 hal_copy_to_uspace( local_cxy, &k_x_size, x_size, sizeof(uint32_t) );111 hal_copy_to_uspace( local_cxy, &k_y_size, y_size, sizeof(uint32_t) );112 hal_copy_to_uspace( local_cxy, &k_ncores, ncores, sizeof(uint32_t) );110 hal_copy_to_uspace( x_size, XPTR( local_cxy , &k_x_size ), sizeof(uint32_t) ); 111 hal_copy_to_uspace( y_size, XPTR( local_cxy , &k_y_size ), sizeof(uint32_t) ); 112 hal_copy_to_uspace( ncores, XPTR( local_cxy , &k_ncores ), sizeof(uint32_t) ); 113 113 114 114 hal_fence(); -
trunk/kernel/syscalls/sys_get_cycle.c
r635 r637 45 45 process_t * process = this->process; 46 46 47 #if (DEBUG_SYS_GET_CYCLE || CONFIG_INSTRUMENTATION_SYSCALLS) 48 uint64_t tm_start = hal_get_cycles(); 49 #endif 50 47 51 // check buffer in user space 48 52 error = vmm_get_vseg( process , (intptr_t)cycle , &vseg ); … … 63 67 64 68 // copy to user space 65 hal_copy_to_uspace( local_cxy, &k_cycle, cycle, sizeof(uint64_t) ); 69 hal_copy_to_uspace( cycle, 70 XPTR( local_cxy , &k_cycle ), 71 sizeof(uint64_t) ); 72 73 #if (DEBUG_SYS_GET_CYCLE || CONFIG_INSTRUMENTATION_SYSCALLS) 74 uint64_t tm_end = hal_get_cycles(); 75 #endif 76 77 #if DEBUG_SYS_GET_CYCLE 78 if( DEBUG_SYS_GET_CYCLE < tm_end ) 79 printk("\n[%s] thread[%x,%x] exit / cycle %d\n", 80 __FUNCTION__ , process->pid, this->trdid, (uint32_t)tm_end ); 81 #endif 82 83 #if CONFIG_INSTRUMENTATION_SYSCALLS 84 hal_atomic_add( &syscalls_cumul_cost[SYS_GET_CYCLE] , tm_end - tm_start ); 85 hal_atomic_add( &syscalls_occurences[SYS_GET_CYCLE] , 1 ); 86 #endif 66 87 67 88 return 0; -
trunk/kernel/syscalls/sys_getcwd.c
r610 r637 2 2 * sys_getcwd.c - kernel function implementing the "getcwd" syscall. 3 3 * 4 * Author Alain Greiner (2016,2017,2018 )4 * Author Alain Greiner (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) UPMC Sorbonne Universites … … 97 97 98 98 // copy kernel buffer to user space 99 hal_strcpy_to_uspace( buffer , first , CONFIG_VFS_MAX_PATH_LENGTH ); 99 hal_strcpy_to_uspace( buffer, 100 XPTR( local_cxy , first ), 101 CONFIG_VFS_MAX_PATH_LENGTH ); 100 102 101 103 hal_fence(); -
trunk/kernel/syscalls/sys_is_fg.c
r635 r637 90 90 91 91 // copy to user space 92 hal_copy_to_uspace( local_cxy, &is_txt_owner, is_fg, sizeof(uint32_t) ); 92 hal_copy_to_uspace( is_fg, 93 XPTR( local_cxy , &is_txt_owner ), 94 sizeof(uint32_t) ); 93 95 94 96 hal_fence(); -
trunk/kernel/syscalls/sys_mkdir.c
r610 r637 60 60 61 61 // copy pathname in kernel space 62 hal_strcpy_from_uspace( kbuf , pathname , CONFIG_VFS_MAX_PATH_LENGTH ); 62 hal_strcpy_from_uspace( XPTR( local_cxy , kbuf ), 63 pathname, 64 CONFIG_VFS_MAX_PATH_LENGTH ); 63 65 64 66 #if DEBUG_SYS_MKDIR -
trunk/kernel/syscalls/sys_mkfifo.c
r566 r637 2 2 * sys_mkfifo.c - creates a named FIFO file. 3 3 * 4 * Author Alain Greiner (2016,2017 )4 * Author Alain Greiner (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) UPMC Sorbonne Universites … … 33 33 uint32_t mode __attribute__((unused)) ) 34 34 { 35 error_t error;36 35 char kbuf[CONFIG_VFS_MAX_PATH_LENGTH]; 37 36 … … 39 38 process_t * process = this->process; 40 39 40 #if (DEBUG_SYS_MKFIFO || CONFIG_INSTRUMENTATION_SYSCALLS) 41 uint64_t tm_start = hal_get_cycles(); 42 #endif 43 44 #if DEBUG_SYS_MKFIFO 45 if( DEBUG_SYS_MKFIFO < tm_end ) 46 printk("\n[%s] thread[%x,%x] enter for <%s> / cycle %d\n", 47 __FUNCTION__, process->pid, this->trdid, pathname, (uint32_t)tm_end ); 48 #endif 49 41 50 // check fd_array not full 42 51 if( process_fd_array_full() ) 43 52 { 44 printk("\n[ERROR] in %s : file descriptor array full for process %x\n", 45 __FUNCTION__ , process->pid ); 53 54 #if DEBUG_SYSCALLS_ERROR 55 printk("\n[ERROR] in %s : file descriptor array full for process %x\n", 56 __FUNCTION__ , process->pid ); 57 #endif 46 58 this->errno = ENFILE; 47 59 return -1; … … 51 63 if( hal_strlen_from_uspace( pathname ) >= CONFIG_VFS_MAX_PATH_LENGTH ) 52 64 { 53 printk("\n[ERROR] in %s : pathname too long\n", __FUNCTION__ ); 65 66 #if DEBUG_SYSCALLS_ERROR 67 printk("\n[ERROR] in %s : pathname too long\n", __FUNCTION__ ); 68 #endif 54 69 this->errno = ENFILE; 55 70 return -1; … … 57 72 58 73 // copy pathname in kernel space 59 hal_strcpy_from_uspace( kbuf , pathname , CONFIG_VFS_MAX_PATH_LENGTH ); 74 hal_strcpy_from_uspace( XPTR( local_cxy , kbuf ), 75 pathname, 76 CONFIG_VFS_MAX_PATH_LENGTH ); 60 77 61 78 printk("\n[ERROR] in %s : not implemented yet\n", __FUNCTION__ ); 62 79 return -1; 63 80 64 if( error ) 65 { 66 printk("\n[ERROR] in %s : cannot create named FIFO %s\n", 67 __FUNCTION__ , kbuf ); 68 this->errno = error; 69 return -1; 70 } 81 #if (DEBUG_SYS_MKFIFO || CONFIG_INSTRUMENTATION_SYSCALLS) 82 uint64_t tm_end = hal_get_cycles(); 83 #endif 71 84 72 return 0; 85 #if DEBUG_SYS_MKFIFO 86 if( DEBUG_SYS_MKFIFO < tm_end ) 87 printk("\n[%s] thread[%x,%x] exit for <%s> / cycle %d\n", 88 __FUNCTION__, process->pid, this->trdid, pathname, (uint32_t)tm_end ); 89 #endif 90 91 #if CONFIG_INSTRUMENTATION_SYSCALLS 92 hal_atomic_add( &syscalls_cumul_cost[SYS_MKFIFO] , tm_end - tm_start ); 93 hal_atomic_add( &syscalls_occurences[SYS_MKFIFO] , 1 ); 94 #endif 73 95 74 96 } // end sys_mkfifo() -
trunk/kernel/syscalls/sys_mmap.c
r635 r637 41 41 { 42 42 vseg_t * vseg; 43 cxy_t vseg_cxy; 44 vseg_type_t vseg_type; 43 cxy_t vseg_cxy; // target cluster for the vseg 44 vseg_type_t vseg_type; // vseg type 45 45 mmap_attr_t k_attr; // attributes copy in kernel space 46 46 xptr_t mapper_xp; 47 error_t error;48 47 reg_t save_sr; // required to enable IRQs 49 48 … … 62 61 63 62 // check user buffer (containing attributes) is mapped 64 error = vmm_get_vseg( process , (intptr_t)attr , &vseg ); 65 66 if( error ) 63 if( vmm_get_vseg( process , (intptr_t)attr , &vseg ) ) 67 64 { 68 65 … … 76 73 77 74 // copy attributes from user space to kernel space 78 hal_copy_from_uspace( local_cxy, 79 &k_attr, 75 hal_copy_from_uspace( XPTR( local_cxy , &k_attr ), 80 76 attr, 81 77 sizeof(mmap_attr_t) ); … … 119 115 120 116 // test mmap type : can be FILE / ANON / REMOTE 117 // to define vseg_type & vseg_cxy 121 118 122 119 /////////////////////////////////////////////////////////// MAP_FILE … … 126 123 #if (DEBUG_SYS_MMAP & 1) 127 124 if ( DEBUG_SYS_MMAP < tm_start ) 128 printk("\n[%s] thread[%x,%x] map file : fdid %d / offset %d / %dbytes\n",125 printk("\n[%s] thread[%x,%x] type file : fdid %d / offset %x / %x bytes\n", 129 126 __FUNCTION__, process->pid, this->trdid, fdid, offset, length ); 130 127 #endif 131 128 132 // FIXME: handle concurent delete of file by another thread closing it129 // FIXME: handle concurent delete of file by another thread 133 130 134 131 if( fdid >= CONFIG_PROCESS_FILE_MAX_NR ) … … 228 225 #if (DEBUG_SYS_MMAP & 1) 229 226 if ( DEBUG_SYS_MMAP < tm_start ) 230 printk("\n[%s] thread[%x,%x] map anon / %dbytes / cluster %x\n",227 printk("\n[%s] thread[%x,%x] type anon / %x bytes / cluster %x\n", 231 228 __FUNCTION__, process->pid, this->trdid, length, vseg_cxy ); 232 229 #endif … … 242 239 #if (DEBUG_SYS_MMAP & 1) 243 240 if ( DEBUG_SYS_MMAP < tm_start ) 244 printk("\n[%s] thread[%x,%x] map remote / %d bytes /cluster %x\n",241 printk("\n[%s] thread[%x,%x] type remote / %x bytes / target cluster %x\n", 245 242 __FUNCTION__, process->pid, this->trdid, length, vseg_cxy ); 246 243 #endif 247 244 248 if( cluster_is_ undefined( vseg_cxy ))245 if( cluster_is_active( vseg_cxy ) == false ) 249 246 { 250 247 … … 266 263 process_t * ref_ptr = GET_PTR( ref_xp ); 267 264 268 // create the vseg in reference cluster265 // register vseg in reference VSL 269 266 if( local_cxy == ref_cxy ) 270 267 { … … 306 303 } 307 304 308 // copy vseg base address to user space 309 hal_copy_to_uspace( local_cxy, 310 &vseg->min, 311 &attr->addr, 305 // copy vseg base address to user space mmap_attr_t 306 hal_copy_to_uspace( &attr->addr, 307 XPTR( ref_cxy , &vseg->min ), 312 308 sizeof(intptr_t) ); 313 309 hal_fence(); … … 324 320 #if DEBUG_SYS_MMAP 325 321 if ( DEBUG_SYS_MMAP < tm_end ) 326 printk("\n[%s] thread[%x,%x] exit / %s / cxy %x / base %x / size % d/ cycle %d\n",322 printk("\n[%s] thread[%x,%x] exit / %s / cxy %x / base %x / size %x / cycle %d\n", 327 323 __FUNCTION__, process->pid, this->trdid, 328 324 vseg_type_str(vseg->type), vseg->cxy, vseg->min, length, (uint32_t)tm_end ); -
trunk/kernel/syscalls/sys_open.c
r625 r637 77 77 78 78 // copy pathname in kernel space 79 hal_strcpy_from_uspace( kbuf, pathname , CONFIG_VFS_MAX_PATH_LENGTH );79 hal_strcpy_from_uspace( XPTR( local_cxy , kbuf ) , pathname , CONFIG_VFS_MAX_PATH_LENGTH ); 80 80 81 81 #if DEBUG_SYS_OPEN -
trunk/kernel/syscalls/sys_opendir.c
r635 r637 85 85 86 86 // copy pathname in kernel space 87 hal_strcpy_from_uspace( kbuf , pathname , CONFIG_VFS_MAX_PATH_LENGTH ); 87 hal_strcpy_from_uspace( XPTR( local_cxy , kbuf ), 88 pathname, 89 CONFIG_VFS_MAX_PATH_LENGTH ); 88 90 89 91 #if DEBUG_SYS_OPENDIR … … 174 176 175 177 // set ident value in user buffer 176 hal_copy_to_uspace( local_cxy, 177 &ident, 178 dirp, 178 hal_copy_to_uspace( dirp, 179 XPTR( local_cxy , &ident ), 179 180 sizeof(intptr_t) ); 180 181 -
trunk/kernel/syscalls/sys_place_fork.c
r623 r637 40 40 process_t * process = this->process; 41 41 42 #if (DEBUG_SYS_PLACE_FORK || CONFIG_INSTRUMENTATION_SYSCALLS) 43 uint64_t tm_start = hal_get_cycles(); 44 #endif 45 46 #if DEBUG_SYS_PLACE_FORK 47 if( DEBUG_SYS_PLACE_FORK < tm_start ) 48 printk("\n[%s] thread[%x,%x] enter / cxy %x / cycle %d\n", 49 __FUNCTION__, process->pid, this->trdid, cxy, (uint32_t)tm_start ); 50 #endif 51 42 52 // check cxy argument 43 if( cluster_is_ undefined( cxy ))53 if( cluster_is_active( cxy ) == false ) 44 54 { 45 55 … … 56 66 this->fork_cxy = cxy; 57 67 68 #if (DEBUG_SYS_PLACE_FORK || CONFIG_INSTRUMENTATION_SYSCALLS) 69 uint64_t tm_end = hal_get_cycles(); 70 #endif 71 72 #if DEBUG_SYS_PLACE_FORK 73 if( DEBUG_SYS_PLACE_FORK < tm_end ) 74 printk("\n[%s] thread[%x,%x] exit / cycle %d\n", 75 __FUNCTION__ , process->pid, this->trdid, (uint32_t)tm_end ); 76 #endif 77 78 #if CONFIG_INSTRUMENTATION_SYSCALLS 79 hal_atomic_add( &syscalls_cumul_cost[SYS_PLACE_FORK] , tm_end - tm_start ); 80 hal_atomic_add( &syscalls_occurences[SYS_PLACE_FORK] , 1 ); 81 #endif 82 58 83 return 0; 59 84 -
trunk/kernel/syscalls/sys_readdir.c
r635 r637 112 112 113 113 // copy dirent pointer to user buffer 114 hal_copy_to_uspace( local_cxy, 115 &direntp, 116 buffer, 114 hal_copy_to_uspace( buffer, 115 XPTR( local_cxy , &direntp ), 117 116 sizeof(void *) ); 118 117 -
trunk/kernel/syscalls/sys_rename.c
r613 r637 2 2 * sys_rename.c - Rename a file or a directory. 3 3 * 4 * Author Alain Greiner (2016,2017,2018 )4 * Author Alain Greiner (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) UPMC Sorbonne Universites … … 75 75 76 76 // copy old name an new name in kernel space 77 hal_strcpy_from_uspace( k_old, old , CONFIG_VFS_MAX_PATH_LENGTH );78 hal_strcpy_from_uspace( k_new, new , CONFIG_VFS_MAX_PATH_LENGTH );77 hal_strcpy_from_uspace( XPTR( local_cxy , k_old ) , old , CONFIG_VFS_MAX_PATH_LENGTH ); 78 hal_strcpy_from_uspace( XPTR( local_cxy , k_new ) , new , CONFIG_VFS_MAX_PATH_LENGTH ); 79 79 80 80 #if DEBUG_SYS_RENAME -
trunk/kernel/syscalls/sys_rmdir.c
r604 r637 2 2 * sys_rmdir.c - Remove a directory from file system. 3 3 * 4 * Author Alain Greiner (2016,2017 )4 * Author Alain Greiner (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) 2015 UPMC Sorbonne Universites … … 42 42 process_t * process = this->process; 43 43 44 #if (DEBUG_SYS_RMDIR || CONFIG_INSTRUMENTATION_SYSCALLS) 45 uint64_t tm_start = hal_get_cycles(); 46 #endif 47 44 48 // check pathname length 45 49 if( hal_strlen_from_uspace( pathname ) >= CONFIG_VFS_MAX_PATH_LENGTH ) … … 54 58 55 59 // copy pathname in kernel space 56 hal_strcpy_from_uspace( kbuf , pathname , CONFIG_VFS_MAX_PATH_LENGTH ); 60 hal_strcpy_from_uspace( XPTR( local_cxy , kbuf ), 61 pathname, 62 CONFIG_VFS_MAX_PATH_LENGTH ); 57 63 58 64 // get cluster and local pointer on reference process -
trunk/kernel/syscalls/sys_sem.c
r635 r637 58 58 process_t * process = this->process; 59 59 60 #if (DEBUG_SYS_SEM || CONFIG_INSTRUMENTATION_SYSCALLS) 61 uint64_t tm_start = hal_get_cycles(); 62 #endif 63 60 64 #if DEBUG_SYS_SEM 61 uint64_t tm_start;62 uint64_t tm_end;63 tm_start = hal_get_cycles();64 65 if( DEBUG_SYS_SEM < tm_start ) 65 66 printk("\n[DBG] %s : thread %x in process %x enter for %s / cycle %d\n", … … 137 138 138 139 // return value to user 139 hal_copy_to_uspace( local_cxy, 140 ¤t, 141 current_value, 140 hal_copy_to_uspace( current_value, 141 XPTR( local_cxy , ¤t ), 142 142 sizeof(uint32_t) ); 143 143 } … … 224 224 hal_fence(); 225 225 226 #if (DEBUG_SYS_SEM || CONFIG_INSTRUMENTATION_SYSCALLS) 227 uint64_t tm_end = hal_get_cycles(); 228 #endif 229 226 230 #if DEBUG_SYS_SEM 227 tm_end = hal_get_cycles();228 231 if( DEBUG_SYS_SEM < tm_end ) 229 232 printk("\n[DBG] %s : thread %x in process %x exit for %s / cost = %d / cycle %d\n", … … 232 235 #endif 233 236 237 #if CONFIG_INSTRUMENTATION_SYSCALLS 238 hal_atomic_add( &syscalls_cumul_cost[SYS_SEM] , tm_end - tm_start ); 239 hal_atomic_add( &syscalls_occurences[SYS_SEM] , 1 ); 240 #endif 241 234 242 return 0; 235 243 -
trunk/kernel/syscalls/sys_stat.c
r635 r637 80 80 81 81 // copy pathname in kernel space 82 hal_strcpy_from_uspace( kbuf , pathname , CONFIG_VFS_MAX_PATH_LENGTH ); 82 hal_strcpy_from_uspace( XPTR( local_cxy , kbuf ), 83 pathname, 84 CONFIG_VFS_MAX_PATH_LENGTH ); 83 85 84 86 #if DEBUG_SYS_STAT … … 121 123 122 124 // copy k_stat to u_stat 123 hal_copy_to_uspace( local_cxy, 124 &k_stat, 125 u_stat, 125 hal_copy_to_uspace( u_stat, 126 XPTR( local_cxy , &k_stat ), 126 127 sizeof(struct stat) ); 127 128 -
trunk/kernel/syscalls/sys_thread_create.c
r635 r637 66 66 67 67 #if DEBUG_SYS_THREAD_CREATE 68 tm_start = hal_get_cycles();69 68 if( DEBUG_SYS_THREAD_CREATE < tm_start ) 70 69 printk("\n[%s] thread[%x,%x] enter / cycle %d\n", … … 73 72 74 73 // check trdid buffer in user space 75 error = vmm_get_vseg( process , (intptr_t)trdid_ptr , &vseg ); 76 77 if ( error ) 74 if( vmm_get_vseg( process , (intptr_t)trdid_ptr , &vseg ) ) 78 75 { 79 76 … … 89 86 if( user_attr != NULL ) 90 87 { 91 error = vmm_get_vseg( process , (intptr_t)user_attr , &vseg ); 92 93 if( error ) 88 if( vmm_get_vseg( process , (intptr_t)user_attr , &vseg ) ) 94 89 { 95 90 … … 102 97 } 103 98 104 hal_copy_from_uspace( local_cxy, 105 &kern_attr, 99 hal_copy_from_uspace( XPTR( local_cxy , &kern_attr ), 106 100 user_attr, 107 101 sizeof(pthread_attr_t) ); … … 109 103 110 104 // check start_func in user space 111 error = vmm_get_vseg( process , (intptr_t)start_func , &vseg ); 112 113 if( error ) 105 if( vmm_get_vseg( process , (intptr_t)start_func , &vseg ) ) 114 106 { 115 107 … … 125 117 if( start_args != NULL ) 126 118 { 127 error = vmm_get_vseg( process , (intptr_t)start_args , &vseg ); 128 129 if( error ) 119 if( vmm_get_vseg( process , (intptr_t)start_args , &vseg ) ) 130 120 { 131 121 … … 145 135 if( kern_attr.attributes & PT_ATTR_CLUSTER_DEFINED ) 146 136 { 147 if( cluster_is_ undefined( kern_attr.cxy ))137 if( cluster_is_active( kern_attr.cxy ) == false ) 148 138 { 149 139 … … 159 149 else 160 150 { 161 child_cxy = dqdt_get_cluster_for_ process();151 child_cxy = dqdt_get_cluster_for_thread( LOCAL_CLUSTER->dqdt_root_xp ); 162 152 } 163 153 } … … 165 155 { 166 156 kern_attr.attributes = PT_ATTR_DETACH | PT_ATTR_CLUSTER_DEFINED; 167 child_cxy = dqdt_get_cluster_for_process();157 child_cxy = dqdt_get_cluster_for_thread( LOCAL_CLUSTER->dqdt_root_xp ); 168 158 } 169 159 … … 209 199 // returns trdid to user space 210 200 trdid = hal_remote_l32( XPTR( child_cxy , &child_ptr->trdid ) ); 211 hal_copy_to_uspace( local_cxy, 212 &trdid, 213 trdid_ptr, 201 hal_copy_to_uspace( trdid_ptr, 202 XPTR( local_cxy , &trdid ), 214 203 sizeof(pthread_t) ); 215 204 -
trunk/kernel/syscalls/sys_thread_detach.c
r566 r637 2 2 * sys_thread_detach.c - detach a joinable thread 3 3 * 4 * Authors Alain Greiner (2016,2017 )4 * Authors Alain Greiner (2016,2017,2018,2019) 5 5 * 6 * Copyright (c) 2011,2012UPMC Sorbonne Universites6 * Copyright (c) UPMC Sorbonne Universites 7 7 * 8 8 * This file is part of ALMOS-MKH. … … 48 48 49 49 // check trdid argument 50 if( (target_ltid >= CONFIG_THREADS_MAX_PER_CLUSTER) || cluster_is_undefined( target_cxy ) ) 50 if( (target_ltid >= CONFIG_THREADS_MAX_PER_CLUSTER) || 51 (cluster_is_active( target_cxy ) == false) ) 51 52 { 52 53 printk("\n[ERROR] in %s : illegal trdid argument\n", __FUNCTION__ ); -
trunk/kernel/syscalls/sys_thread_join.c
r633 r637 2 2 * sys_thread_join.c - passive wait on the end of a given thread. 3 3 * 4 * Authors Alain Greiner (2016,2017 )5 * 6 * Copyright (c) 2011,2012UPMC Sorbonne Universites4 * Authors Alain Greiner (2016,2017,2018,2019) 5 * 6 * Copyright (c) UPMC Sorbonne Universites 7 7 * 8 8 * This file is part of ALMOS-MKH. … … 72 72 73 73 // check trdid argument 74 if( (target_ltid >= CONFIG_THREADS_MAX_PER_CLUSTER) || cluster_is_undefined(target_cxy) ) 74 if( (target_ltid >= CONFIG_THREADS_MAX_PER_CLUSTER) || 75 (cluster_is_active(target_cxy) == false) ) 75 76 { 76 77 -
trunk/kernel/syscalls/sys_thread_wakeup.c
r566 r637 1 1 /* 2 * sys_thread_wakeup.c - wakeup all indicated threads2 * sys_thread_wakeup.c - wakeup indicated thread 3 3 * 4 * Author Alain Greiner (2016,2017 )4 * Author Alain Greiner (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) UPMC Sorbonne Universites … … 36 36 process_t * process = this->process; 37 37 38 #if (DEBUG_SYS_THREAD_WAKEUP || CONFIG_INSTRUMENTATION_SYSCALLS) 39 uint64_t tm_start = hal_get_cycles(); 40 #endif 41 38 42 #if DEBUG_SYS_THREAD_WAKEUP 39 uint64_t tm_start;40 uint64_t tm_end;41 tm_start = hal_get_cycles();42 43 if( DEBUG_SYS_THREAD_WAKEUP < tm_start ) 43 printk("\n[ DBG] %s :thread %x in process enter to activate thread %x / cycle %d\n",44 printk("\n[%s] thread %x in process enter to activate thread %x / cycle %d\n", 44 45 __FUNCTION__, this->trdid, process->pid, trdid, (uint32_t)tm_start ); 45 46 #endif … … 50 51 51 52 // check trdid argument 52 if( (target_ltid >= CONFIG_THREADS_MAX_PER_CLUSTER) || cluster_is_undefined( target_cxy ) ) 53 if( (target_ltid >= CONFIG_THREADS_MAX_PER_CLUSTER) || 54 (cluster_is_active( target_cxy ) == false) ) 53 55 { 54 56 … … 78 80 thread_unblock( thread_xp , THREAD_BLOCKED_GLOBAL ); 79 81 82 #if (DEBUG_SYS_THREAD_WAKEUP || CONFIG_INSTRUMENTATION_SYSCALLS) 83 uint64_t tm_end = hal_get_cycles(); 84 #endif 85 86 80 87 #if DEBUG_SYS_THREAD_WAKEUP 81 tm_end = hal_get_cycles();82 88 if( DEBUG_SYS_THREAD_WAKEUP < tm_end ) 83 printk("\n[ DBG] %s :thread %x in process %x exit / thread %x activated / cycle %d\n",89 printk("\n[%s] thread %x in process %x exit / thread %x activated / cycle %d\n", 84 90 __FUNCTION__ , this->trdid, process->pid, trdid, (uint32_t)tm_end ); 91 #endif 92 93 #if CONFIG_INSTRUMENTATION_SYSCALLS 94 hal_atomic_add( &syscalls_cumul_cost[SYS_THREAD_WAKEUP] , tm_end - tm_start ); 95 hal_atomic_add( &syscalls_occurences[SYS_THREAD_WAKEUP] , 1 ); 85 96 #endif 86 97 -
trunk/kernel/syscalls/sys_timeofday.c
r635 r637 50 50 process_t * process = this->process; 51 51 52 #if (DEBUG_SYS_TIMEOFDAY || CONFIG_INSTRUMENTATION_SYSCALLS) 53 uint64_t tm_start = hal_get_cycles(); 54 #endif 55 56 #if DEBUG_SYS_TIMEOFDAY 57 if( DEBUG_SYS_TIMEOFDAY < tm_start ) 58 printk("\n[%s] thread[%x,%x] enter / cycle %d\n", 59 __FUNCTION__, process->pid, this->trdid, (uint32_t)tm_start ); 60 #endif 61 52 62 // check tz (non supported / must be null) 53 63 if( tz ) … … 82 92 83 93 // copy values to user space 84 hal_copy_to_uspace( local_cxy, 85 &k_tv, 86 tv, 94 hal_copy_to_uspace( tv, 95 XPTR( local_cxy , &k_tv ), 87 96 sizeof(struct timeval) ); 88 97 89 98 hal_fence(); 90 99 100 #if (DEBUG_SYS_TIMEOFDAY || CONFIG_INSTRUMENTATION_SYSCALLS) 101 uint64_t tm_end = hal_get_cycles(); 102 #endif 103 104 #if DEBUG_SYS_TIMEOFDAY 105 if( DEBUG_SYS_TIMEOFDAY < tm_end ) 106 printk("\n[%s] thread[%x,%x] exit / cycle %d\n", 107 __FUNCTION__, process->pid, this->trdid, (uint32_t)tm_end ); 108 #endif 109 110 #if CONFIG_INSTRUMENTATION_SYSCALLS 111 hal_atomic_add( &syscalls_cumul_cost[SYS_TIMEOFDAY] , tm_end - tm_start ); 112 hal_atomic_add( &syscalls_occurences[SYS_TIMEOFDAY] , 1 ); 113 #endif 114 91 115 return 0; 92 116 -
trunk/kernel/syscalls/sys_trace.c
r566 r637 2 2 * sys_trace.c - activate / desactivate the context switches trace for a given core 3 3 * 4 * Author Alain Greiner (c) (2016,2017,2018 )4 * Author Alain Greiner (c) (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) UPMC Sorbonne Universites … … 40 40 process_t * process = this->process; 41 41 42 #if (DEBUG_SYS_TRACE || CONFIG_INSTRUMENTATION_SYSCALLS) 43 uint64_t tm_start = hal_get_cycles(); 44 #endif 45 42 46 #if DEBUG_SYS_TRACE 43 uint64_t tm_start;44 uint64_t tm_end;45 tm_start = hal_get_cycles();46 47 if( DEBUG_SYS_TRACE < tm_start ) 47 printk("\n[ DBG] %s : thread %d enter / process %x/ cycle = %d\n",48 __FUNCTION__, this , this->process->pid, (uint32_t)tm_start );48 printk("\n[%s] thread[%x,%x] enters / cycle = %d\n", 49 __FUNCTION__, this->process->pid, this->trdid, (uint32_t)tm_start ); 49 50 #endif 50 51 51 52 // check cluster identifier 52 if( cluster_is_ undefined( cxy ))53 if( cluster_is_active( cxy ) == false ) 53 54 { 54 55 … … 85 86 hal_fence(); 86 87 87 #if DEBUG_SYS_TRACE 88 tm_end = hal_get_cycles(); 89 if( DEBUG_SYS_TRACE < tm_end ) 90 printk("\n[DBG] %s : thread %x exit / process %x / cost = %d / cycle %d\n", 91 __FUNCTION__, this, this->process->pid, (uint32_t)(tm_end - tm_start) , (uint32_t)tm_end ); 88 #if (DEBUG_SYS_TRACE || CONFIG_INSTRUMENTATION_SYSCALLS) 89 uint64_t tm_end = hal_get_cycles(); 92 90 #endif 93 91 92 #if DEBUG_SYS_TRACE 93 if( DEBUG_SYS_TRACE < tm_end ) 94 printk("\n[%s] thread[%x,%x] exit / cycle %d\n", 95 __FUNCTION__, this->process->pid, this->trdid, (uint32_t)tm_end ); 96 #endif 97 98 #if CONFIG_INSTRUMENTATION_SYSCALLS 99 hal_atomic_add( &syscalls_cumul_cost[SYS_TRACE] , tm_end - tm_start ); 100 hal_atomic_add( &syscalls_occurences[SYS_TRACE] , 1 ); 101 #endif 94 102 return 0; 95 103 -
trunk/kernel/syscalls/sys_unlink.c
r610 r637 2 2 * sys_unlink.c - unlink a file or directorya from VFS 3 3 * 4 * Author Alain Greiner (2016,2017,2018 )4 * Author Alain Greiner (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) UPMC Sorbonne Universites … … 60 60 61 61 // copy pathname in kernel space 62 hal_strcpy_from_uspace( kbuf , pathname , CONFIG_VFS_MAX_PATH_LENGTH ); 62 hal_strcpy_from_uspace( XPTR( local_cxy , kbuf ), 63 pathname, 64 CONFIG_VFS_MAX_PATH_LENGTH ); 63 65 64 66 #if DEBUG_SYS_UNLINK -
trunk/kernel/syscalls/sys_wait.c
r635 r637 53 53 pid_t pid = process->pid; 54 54 55 55 56 #if DEBUG_SYS_WAIT 56 uint64_t 57 uint64_t cycle = hal_get_cycles(); 57 58 if( DEBUG_SYS_WAIT < cycle ) 58 59 printk("\n[%s] thread[%x,%x] enter / cycle %d\n", … … 153 154 #endif 154 155 // return child termination state to parent process 155 hal_copy_to_uspace( local_cxy, 156 &child_state, 157 status, 156 hal_copy_to_uspace( status, 157 XPTR( local_cxy , &child_state ), 158 158 sizeof(int) ); 159 159 return child_pid; … … 192 192 193 193 // never executed 194 return -1;194 return 0; 195 195 196 196 } // end sys_wait() -
trunk/kernel/syscalls/syscalls.h
r626 r637 210 210 /****************************************************************************************** 211 211 * [13] This function map physical memory (or a file) in the calling thread virtual space. 212 * The <attr> argument is a pointer on a structure for arguments (see shared_ syscalls.h).212 * The <attr> argument is a pointer on a structure for arguments (see shared_mman.h). 213 213 * The user defined virtual address (MAP_FIXED flag) is not supported. 214 214 * TODO : the access rights checking is not implemented yet [AG] … … 560 560 561 561 /****************************************************************************************** 562 * [41] This function implements the non-standard get_core () syscall.562 * [41] This function implements the non-standard get_core_id() syscall. 563 563 * It returns in <cxy> and <lid> the calling core cluster and local index. 564 564 ****************************************************************************************** … … 567 567 * @ return 0 if success / return -1 if illegal arguments 568 568 *****************************************************************************************/ 569 int sys_get_core ( uint32_t * cxy,570 uint32_t * lid );569 int sys_get_core_id( uint32_t * cxy, 570 uint32_t * lid ); 571 571 572 572 /****************************************************************************************** … … 696 696 int sys_fsync( uint32_t file_id ); 697 697 698 /****************************************************************************************** 699 * [53] This function implements the non-standard "get_best_core" syscall. 700 * It selects, in a macro-cluster specified by the <base_cxy> and <level> arguments, 701 * the core that has the lowest load. 702 * When an active core has been found in the target macro-cluster, it writes into the 703 * <cxy> and <lid> buffers the cluster identifier and the core local index, and return 0. 704 * It returns -1 in case of illegal arguments (level / cxy / lid). 705 * It returns +1 if there is no active core in specified macro-cluster. 706 ****************************************************************************************** 707 * @ base_cxy : [in] any cluster identifier in macro-cluster. 708 * @ level : [in] macro-cluster level in [1,2,3,4,5]. 709 * @ cxy : [out] selected core cluster identifier. 710 * @ lid : [out] selected core local index in cluster. 711 * @ return 0 if success / -1 if illegal arguments / +1 if no core in macro-clusters. 712 *****************************************************************************************/ 713 int sys_get_best_core( uint32_t base_cxy, 714 uint32_t level, 715 uint32_t * cxy, 716 uint32_t * lid ); 717 718 /****************************************************************************************** 719 * [54] This function implements the non-standard "get_nb_cores" syscall. 720 * It writes in the <ncores> buffer the number of cores in the target cluster <cxy>. 721 ****************************************************************************************** 722 * @ cxy : [in] target cluster identifier. 723 * @ ncores : [out] number of cores / 0 if cluster cxy undefined in architecture. 724 * @ return 0 if success / return -1 if illegal "ncores" arguments. 725 *****************************************************************************************/ 726 int sys_get_nb_cores( uint32_t cxy, 727 uint32_t * ncores ); 728 698 729 #endif // _SYSCALLS_H_ -
trunk/libs/libalmosmkh/almosmkh.c
r626 r637 2 2 * almosmkh.c - User level ALMOS-MKH specific library implementation. 3 3 * 4 * Author Alain Greiner (2016,2017,2018 )4 * Author Alain Greiner (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) UPMC Sorbonne Universites … … 24 24 #include <almosmkh.h> 25 25 #include <hal_user.h> 26 #include <hal_macros.h> 26 27 #include <hal_shared_types.h> 27 28 #include <syscalls_numbers.h> … … 32 33 #include <mman.h> 33 34 34 #define MALLOC_DEBUG 0 35 #define DEBUG_REMOTE_MALLOC 0 36 #define DEBUG_PTHREAD_PARALLEL 1 35 37 36 ///////////// Non standard system calls ///////////////////////////////// 38 ////////////////////////////////////////////////////////////////////////////////////// 39 ///////////// Non standard system calls /////////////////////////////////////// 40 ////////////////////////////////////////////////////////////////////////////////////// 37 41 38 42 ////////////////////////// … … 63 67 } 64 68 65 ///////////////////////////////// 66 int get_core ( unsigned int * cxy,67 unsigned int * lid )68 { 69 return hal_user_syscall( SYS_GET_CORE ,69 //////////////////////////////////// 70 int get_core_id( unsigned int * cxy, 71 unsigned int * lid ) 72 { 73 return hal_user_syscall( SYS_GET_CORE_ID, 70 74 (reg_t)cxy, 71 75 (reg_t)lid, 0, 0 ); 76 } 77 78 ///////////////////////////////////// 79 int get_nb_cores( unsigned int cxy, 80 unsigned int * ncores ) 81 { 82 return hal_user_syscall( SYS_GET_NB_CORES, 83 (reg_t)cxy, 84 (reg_t)ncores, 0, 0 ); 85 } 86 87 /////////////////////////////////////////// 88 int get_best_core( unsigned int base_cxy, 89 unsigned int level, 90 unsigned int * cxy, 91 unsigned int * lid ) 92 { 93 return hal_user_syscall( SYS_GET_BEST_CORE, 94 (reg_t)base_cxy, 95 (reg_t)level, 96 (reg_t)cxy, 97 (reg_t)lid ); 72 98 } 73 99 … … 250 276 } // end get_string() 251 277 252 253 /////////////// non standard debug functions ////////////////////////// 278 ////////////////////////////////////////////////////////////////////////////////////// 279 /////////////// non standard debug functions /////////////////////////////////// 280 ////////////////////////////////////////////////////////////////////////////////////// 254 281 255 282 //////////////////////////////////// … … 496 523 497 524 498 /////////////// non standard malloc functions ////////////////////////// 525 ///////////////////////////////////////////////////////////////////////////////////////// 526 /////////////// non standard remote_malloc //////////////////////////////////////// 527 ///////////////////////////////////////////////////////////////////////////////////////// 499 528 500 529 ///////////////////////////////////////////////////////////////////////////////////////// 501 530 // Global variable defining the allocator array (one per cluster) 502 531 // This array (about 16 Kbytes ) will be stored in the data segment 503 // of any application linked with this malloclibray.532 // of any application linked with this libray. 504 533 ///////////////////////////////////////////////////////////////////////////////////////// 505 534 … … 546 575 //////////////////////////////////////////////////////////////////////////////////////////// 547 576 548 #if MALLOC_DEBUG577 #if DEBUG_REMOTE_MALLOC 549 578 static void display_free_array( unsigned int cxy ) 550 579 { … … 594 623 unsigned int iter; // iterator 595 624 596 #if MALLOC_DEBUG 597 printf("\n[MALLOC] %s : enter for store[%x] / size = %x\n", 598 __FUNCTION__, cxy, store_size ); 625 #if DEBUG_REMOTE_MALLOC 626 unsigned int core_cxy; 627 unsigned int core_lid; 628 get_core_id( &core_cxy , &core_lid ); 629 printf("\n[%s] core[%x,%d] enter for store[%x] / size = %x\n", 630 __FUNCTION__, core_cxy, core_lid, cxy, store_size ); 599 631 #endif 600 632 … … 635 667 } 636 668 637 #if MALLOC_DEBUG638 printf("\n[ MALLOC] %s : mmap done for store[%x] / base = %x\n",639 __FUNCTION__, c xy, store_base);669 #if DEBUG_REMOTE_MALLOC 670 printf("\n[%s] core[%x,%d] created vseg %x for store[%x]\n", 671 __FUNCTION__, core_cxy, core_lid, store_base, cxy ); 640 672 #endif 641 673 … … 656 688 } 657 689 658 // DEPRECATED: we don't reset the alloc_base array659 // because we don't want to allocate the physical memory660 // when the heap is created [AG]661 // memset( (void *)alloc_base , 0 , alloc_size );662 663 690 // split the store into various sizes blocks, 664 691 // initializes the free[] array and NEXT pointers … … 690 717 691 718 692 #if MALLOC_DEBUG 693 printf("\n[MALLOC] %s : completes store[%x] initialisation\n", 694 __FUNCTION__, cxy ); 695 719 #if DEBUG_REMOTE_MALLOC 720 printf("\n[%s] core[%x,%d] completed store[%x] initialisation\n", 721 __FUNCTION__, core_cxy, core_lid, cxy ); 722 #endif 723 724 #if (DEBUG_REMOTE_MALLOC & 1) 696 725 display_free_array( cxy ); 697 726 #endif … … 762 791 int error; 763 792 764 #if MALLOC_DEBUG 765 printf("\n[MALLOC] %s : enter for size = %x / cxy = %x\n", 766 __FUNCTION__ , size , cxy ); 793 #if DEBUG_REMOTE_MALLOC 794 unsigned int core_cxy; 795 unsigned int core_lid; 796 get_core_id( &core_cxy , &core_lid ); 797 printf("\n[%s] core[%x,%d] enter for size = %x / target_cxy = %x\n", 798 __FUNCTION__ , core_cxy, core_lid, size , cxy ); 767 799 #endif 768 800 … … 828 860 unsigned char * ptr = (unsigned char*)(store[cxy].alloc_base + offset); 829 861 830 // DEPRECATED : we cannot check the alloc[] array,831 // because it has not been initialised by store_init,832 // to avoid physical memory allocation at heap creation [AG]833 // if ( *ptr != 0 )834 // {835 // pthread_mutex_unlock( &store[cxy].mutex );836 // printf("\n[PANIC] in %s : allocate an already allocated block...\n",837 // __FUNCTION__ );838 // return NULL;839 // }840 841 862 // update alloc_array 842 863 *ptr = requested_index; … … 845 866 pthread_mutex_unlock( &store[cxy].mutex ); 846 867 847 #if MALLOC_DEBUG848 printf("\n[ MALLOC] %s :exit / base = %x / size = %x / from store[%x]\n",849 __FUNCTION__, base , size , cxy );868 #if DEBUG_REMOTE_MALLOC 869 printf("\n[%s] core[%x,%d] exit / base = %x / size = %x / from store[%x]\n", 870 __FUNCTION__, core_cxy, core_lid, base , size , cxy ); 850 871 #endif 851 872 … … 853 874 854 875 } // end remote_malloc() 855 856 857 876 858 877 ////////////////////////////////////////// … … 920 939 921 940 return new_ptr; 922 } 941 942 } // end remote_realloc() 943 923 944 924 945 ////////////////////////////////////////////////////// … … 991 1012 { 992 1013 993 #if MALLOC_DEBUG1014 #if DEBUG_REMOTE_MALLOC 994 1015 printf("\n[MALLOC] %s : enter for block = %x / cxy = %x\n", 995 1016 __FUNCTION__, ptr, cxy ); … … 1052 1073 pthread_mutex_unlock( &store[cxy].mutex ); 1053 1074 1054 #if MALLOC_DEBUG1075 #if DEBUG_REMOTE_MALLOC 1055 1076 printf("\n[MALLOC] %s : conmpletes for block = %x / cxy = %x\n", 1056 1077 __FUNCTION__, ptr, cxy ); … … 1058 1079 1059 1080 } // end remote_free() 1081 1082 ///////////////////////////////////////////////////////////////////////////////////////// 1083 /////////////// non standard pthread_parallel_create ////////////////////////////// 1084 ///////////////////////////////////////////////////////////////////////////////////////// 1085 1086 #define X_MAX 16 // max number of clusters in a row 1087 #define Y_MAX 16 // max number of clusters in a column 1088 #define CLUSTERS_MAX X_MAX * Y_MAX 1089 #define LEVEL_MAX 5 1090 #define CORES_MAX 4 // max number of cores per cluster 1091 1092 typedef struct build_args_s 1093 { 1094 unsigned char cxy; // this thread cluster identifier 1095 unsigned char level; // this thread level in quad-tree 1096 unsigned char parent_cxy; // parent thread cluster identifier 1097 unsigned char root_level; // quad-tree root level 1098 void * work_func; // pointer on work function pointer 1099 void * work_args_array; // pointer on 2D array of pointers 1100 pthread_barrier_t * parent_barriers_array; // pointer on 1D array of barriers 1101 unsigned int error; // return value : 0 if success 1102 } 1103 build_args_t; 1104 1105 ///////////////////////////////////////////////////////////////////////////////////////// 1106 // Global variables used for inter-thread communications 1107 ///////////////////////////////////////////////////////////////////////////////////////// 1108 1109 pthread_attr_t build_attr [CLUSTERS_MAX][LEVEL_MAX]; // POSIX thread attributes 1110 1111 build_args_t build_args [CLUSTERS_MAX][LEVEL_MAX]; // build function arguments 1112 1113 pthread_barrier_t build_barrier[CLUSTERS_MAX][LEVEL_MAX]; // parent/child synchro 1114 1115 pthread_attr_t work_attr [CLUSTERS_MAX][CORES_MAX]; // POSIX thread attributes 1116 1117 ////////////////////////////////////////////////////////// 1118 static void pthread_recursive_build( build_args_t * args ) 1119 { 1120 unsigned int trdid; // unused (required by pthread_create() 1121 1122 // get arguments 1123 unsigned int cxy = args->cxy; 1124 unsigned int level = args->level; 1125 unsigned int parent_cxy = args->parent_cxy; 1126 unsigned int root_level = args->root_level; 1127 void * work_func = args->work_func; 1128 void * work_args_array = args->work_args_array; 1129 pthread_barrier_t * parent_barriers_array = args->parent_barriers_array; 1130 1131 // set error default value 1132 build_args[cxy][level].error = 0; 1133 1134 /////////////////////////////////////////////////////////// 1135 if( level == 0 ) // children are "work" threads 1136 { 1137 unsigned int lid; // core local index 1138 unsigned int ncores; // number of cores in a cluster 1139 1140 // get number of cores per cluster 1141 get_nb_cores( cxy , &ncores ); 1142 1143 // kill process if no active core in cluster 1144 // TODO this "if" should be replaced by an "assert" [AG] 1145 if( ncores == 0 ) 1146 { 1147 printf("\n[PANIC] in %s : no active core in cluster %x\n", 1148 __FUNCTION__ , cxy ); 1149 1150 // report error to parent 1151 build_args[parent_cxy][level+1].error = 1; 1152 1153 // kill process 1154 exit( EXIT_FAILURE ); 1155 } 1156 1157 // initialize the parent_barrier 1158 if( pthread_barrier_init( &parent_barriers_array[cxy] , NULL , ncores + 1 ) ) 1159 { 1160 printf("\n[ERROR] in %s : cannot initialise barrier for build thread[%x][%d]\n", 1161 __FUNCTION__ , cxy , level ); 1162 1163 // report error to parent 1164 build_args[parent_cxy][level+1].error = 1; 1165 } 1166 1167 #if DEBUG_PTHREAD_PARALLEL 1168 printf("\n[%s] <build> thread[%x][%d] created barrier / %d children\n", 1169 __FUNCTION__, cxy, level, ncores + 1 ); 1170 #endif 1171 // create (ncores) "work" threads 1172 for ( lid = 0 ; lid < ncores ; lid++ ) 1173 { 1174 // set attributes for thread[cxy][lid] 1175 work_attr[cxy][lid].attributes = PT_ATTR_DETACH | 1176 PT_ATTR_CLUSTER_DEFINED | 1177 PT_ATTR_CORE_DEFINED; 1178 work_attr[cxy][lid].cxy = cxy; 1179 work_attr[cxy][lid].lid = lid; 1180 1181 // compute pointer on thread[cxy][lid] arguments 1182 void * work_args = *((void **)work_args_array + (cxy * CORES_MAX) + lid); 1183 1184 // create thread 1185 if ( pthread_create( &trdid, // unused 1186 &work_attr[cxy][lid], 1187 work_func, 1188 work_args ) ) 1189 { 1190 printf("\n[ERROR] in %s : cannot create work thread[%x,%x]\n", 1191 __FUNCTION__ , cxy , lid ); 1192 1193 // report error to parent 1194 build_args[parent_cxy][level+1].error = 1; 1195 } 1196 1197 #if DEBUG_PTHREAD_PARALLEL 1198 printf("\n[%s] <build> thread[%x][%d] created <work> thread[%x][%d]\n", 1199 __FUNCTION__, cxy, level, cxy, lid ); 1200 #endif 1201 } 1202 1203 // wait on barrier until "work" children threads completed 1204 if( pthread_barrier_wait( &parent_barriers_array[cxy] ) ) 1205 { 1206 printf("\n[ERROR] in %s / first barrier for <build> thread[%x][%d]\n", 1207 __FUNCTION__ , cxy , level ); 1208 1209 // report error to parent 1210 build_args[parent_cxy][level+1].error = 1; 1211 } 1212 1213 #if DEBUG_PTHREAD_PARALLEL 1214 printf("\n[%s] <build> thread[%x][%d] resume after children completion\n", 1215 __FUNCTION__, cxy, level ); 1216 #endif 1217 1218 } // end level == 0 1219 1220 //////////////////////////////////////////////////////////// 1221 else // children are "build" threads 1222 { 1223 // the 4 children threads can be created in any core of each quarters 1224 // of the parent macro-cluster 1225 1226 unsigned int parent_x; // X coordinate of parent macro-cluster 1227 unsigned int parent_y; // Y coordinate of parent macro-cluster 1228 unsigned int child_x; // X coordinate of child macro-cluster 1229 unsigned int child_y; // Y coordinate of child macro-cluster 1230 unsigned int child_cxy[2][2]; // selected cluster for child thread 1231 unsigned int child_lid[2][2]; // selected core index for child thread 1232 int child_sts[2][2]; // -1 if error / 0 if success / +1 if not found 1233 unsigned int x; // X loop index for children 1234 unsigned int y; // Y loop index for children 1235 1236 unsigned int nb_children = 0; 1237 1238 // get parent macro-cluster mask and half-size from level 1239 unsigned int mask = (1 << level) - 1; 1240 unsigned int half = (level > 0) ? (1 << (level - 1)) : 0; 1241 1242 // get parent macro-cluster coordinates 1243 parent_x = HAL_X_FROM_CXY( cxy ) & ~mask; 1244 parent_y = HAL_Y_FROM_CXY( cxy ) & ~mask; 1245 1246 // get child_cxy and child_lid for up to 4 children threads : 00 / 01 / 10 / 11 1247 for (x = 0 ; x < 2 ; x++) 1248 { 1249 // compute child macro-cluster X coordinate 1250 child_x = (x == 0) ? parent_x : (parent_x + half); 1251 1252 for (y = 0 ; y < 2 ; y++) 1253 { 1254 // compute child macro-cluster Y coordinate 1255 child_y = (y == 0) ? parent_y : (parent_y + half); 1256 1257 // select the best core in macro-cluster 1258 child_sts[x][y] = get_best_core( HAL_CXY_FROM_XY( child_x , child_y ), 1259 level-1, 1260 &child_cxy[x][y], 1261 &child_lid[x][y] ); 1262 1263 if( child_sts[x][y] < 0 ) // failure => report error 1264 { 1265 printf("\n[ERROR] in %s : illegal arguments for <build> thread[%x,%x]\n", 1266 __FUNCTION__ , cxy , level ); 1267 1268 // report error to parent 1269 build_args[parent_cxy][level+1].error = 1; 1270 } 1271 else if (child_sts[x][y] > 0 ) // macro-cluster undefined => does nothing 1272 { 1273 } 1274 else // core found 1275 { 1276 nb_children++; 1277 } 1278 } // end for y 1279 } // end for x 1280 1281 // kill process if no active core in cluster 1282 // TODO this "if" should be replaced by an "assert" [AG] 1283 if( nb_children == 0 ) 1284 { 1285 printf("\n[PANIC] in %s : no active core in macro cluster [%x,%d]\n", 1286 __FUNCTION__ , cxy , level ); 1287 1288 // report error to parent 1289 build_args[parent_cxy][level+1].error = 1; 1290 1291 // kill process 1292 exit( EXIT_FAILURE ); 1293 } 1294 1295 // initialize the barrier for (nb_children + 1) 1296 if( pthread_barrier_init( &build_barrier[cxy][level], NULL , nb_children + 1 ) ) 1297 { 1298 printf("\n[error] in %s : cannot initialise barrier for build thread[%x][%d]\n", 1299 __FUNCTION__ , cxy , level ); 1300 1301 // report error to parent 1302 build_args[parent_cxy][level+1].error = 1; 1303 } 1304 1305 #if DEBUG_PTHREAD_PARALLEL 1306 printf("\n[%s] <build> thread[%x][%d] created barrier / %d children\n", 1307 __FUNCTION__, cxy, level, nb_children + 1 ); 1308 #endif 1309 // create 1 to 4 children threads 1310 for (x = 0 ; x < 2 ; x++) 1311 { 1312 for (y = 0 ; y < 2 ; y++) 1313 { 1314 // thread is created only if macro-cluster is active 1315 if( child_sts[x][y] == 0 ) 1316 { 1317 unsigned int tgt_cxy = child_cxy[x][y]; 1318 unsigned int tgt_lid = child_lid[x][y]; 1319 1320 // set child thread attributes 1321 build_attr[tgt_cxy][level-1].attributes = PT_ATTR_DETACH | 1322 PT_ATTR_CLUSTER_DEFINED | 1323 PT_ATTR_CORE_DEFINED; 1324 build_attr[tgt_cxy][level-1].cxy = tgt_cxy; 1325 build_attr[tgt_cxy][level-1].lid = tgt_lid; 1326 1327 // propagate build function arguments 1328 build_args[tgt_cxy][level-1].cxy = child_cxy[x][y]; 1329 build_args[tgt_cxy][level-1].level = level-1; 1330 build_args[tgt_cxy][level-1].parent_cxy = cxy; 1331 build_args[tgt_cxy][level-1].root_level = root_level; 1332 build_args[tgt_cxy][level-1].work_func = work_func; 1333 build_args[tgt_cxy][level-1].work_args_array = work_args_array; 1334 build_args[tgt_cxy][level-1].parent_barriers_array = parent_barriers_array; 1335 1336 // create thread 1337 if( pthread_create( &trdid, 1338 &build_attr[tgt_cxy][level-1], 1339 &pthread_recursive_build, 1340 &build_args[tgt_cxy][level-1] ) ) 1341 { 1342 printf("\n[ERROR] in %s : cannot create build thread[%x][%d]\n", 1343 __FUNCTION__ , child_cxy , level -1 ); 1344 1345 // report error to parent 1346 build_args[parent_cxy][level+1].error = 1; 1347 } 1348 1349 #if DEBUG_PTHREAD_PARALLEL 1350 printf("\n[%s] <build> thread[%x][%d] created <build> thread[%x][%d] on core[%x,%d]\n", 1351 __FUNCTION__, cxy, level, tgt_cxy, level - 1, tgt_cxy, tgt_lid ); 1352 #endif 1353 } //end if sts[x][y] 1354 } // end for y 1355 } // end for x 1356 1357 // wait on barrier until "build" children threads completed 1358 if( pthread_barrier_wait( &build_barrier[cxy][level] ) ) 1359 { 1360 printf("\n[ERROR] in %s / first barrier for <build> thread[%x][%d]\n", 1361 __FUNCTION__ , cxy , level ); 1362 1363 // report error to parent 1364 build_args[parent_cxy][level+1].error = 1; 1365 } 1366 1367 #if DEBUG_PTHREAD_PARALLEL 1368 printf("\n[%s] <build> thread[%x][%d] resume after children completion\n", 1369 __FUNCTION__, cxy, level ); 1370 #endif 1371 1372 } // end level > 0 1373 1374 // report error to parent when required 1375 if( build_args[cxy][level].error ) 1376 { 1377 build_args[parent_cxy][level+1].error = 1; 1378 } 1379 1380 // all <build> threads - but the root - 1381 // signal completion to parent thread and exit 1382 if( level < root_level ) 1383 { 1384 if( pthread_barrier_wait( &build_barrier[parent_cxy][level+1] ) ) 1385 { 1386 printf("\n[ERROR] in %s / second barrier for <build> thread[%x][%d]\n", 1387 __FUNCTION__ , cxy , level ); 1388 1389 // report error to parent 1390 build_args[parent_cxy][level+1].error = 1; 1391 } 1392 1393 #if DEBUG_PTHREAD_PARALLEL 1394 printf("\n[%s] <build> thread[%x][%d] exit\n", 1395 __FUNCTION__, cxy , level ); 1396 #endif 1397 // "build" thread exit 1398 pthread_exit( NULL ); 1399 } 1400 } // end pthread_recursive_build() 1401 1402 /////////////////////////////////////////////////////// 1403 int pthread_parallel_create( unsigned int root_level, 1404 void * work_func, 1405 void * work_args_array, 1406 void * parent_barriers_array ) 1407 { 1408 unsigned int root_cxy; 1409 unsigned int root_lid; // unused, but required by get_core_id() 1410 1411 #if DEBUG_PTHREAD_PARALLEL 1412 printf("\n[%s] enter / root_level %d / func %x / args %x / barriers %x\n", 1413 __FUNCTION__, root_level, work_func, work_args_array, parent_barriers_array ); 1414 #endif 1415 1416 // get calling thread cluster 1417 get_core_id( &root_cxy , &root_lid ); 1418 1419 // set the build function arguments for the root <build> thread 1420 build_args[root_cxy][root_level].cxy = root_cxy; 1421 build_args[root_cxy][root_level].level = root_level; 1422 build_args[root_cxy][root_level].root_level = root_level; 1423 build_args[root_cxy][root_level].work_func = work_func; 1424 build_args[root_cxy][root_level].work_args_array = work_args_array; 1425 build_args[root_cxy][root_level].parent_barriers_array = parent_barriers_array; 1426 1427 // call the recursive build function 1428 pthread_recursive_build( &build_args[root_cxy][root_level] ); 1429 1430 // check error 1431 if( build_args[root_cxy][root_level].error ) 1432 { 1433 printf("\n[error] in %s\n", __FUNCTION__ ); 1434 return -1; 1435 } 1436 1437 return 0; 1438 1439 } // end pthread_parallel_create() 1440 1441 1060 1442 1061 1443 // Local Variables: -
trunk/libs/libalmosmkh/almosmkh.h
r629 r637 2 2 * almosmkh.h - User level ALMOS-MKH specific library definition. 3 3 * 4 * Author Alain Greiner (2016,2017,2018 )4 * Author Alain Greiner (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) UPMC Sorbonne Universites … … 72 72 73 73 /*************************************************************************************** 74 * This syscall returns the cluster an local index for the calling core. 74 * This syscall returns the cluster identifier and the local index 75 * for the calling core. 75 76 *************************************************************************************** 76 77 * @ cxy : [out] cluster identifier. … … 78 79 * @ return always 0. 79 80 **************************************************************************************/ 80 int get_core( unsigned int * cxy, 81 unsigned int * lid ); 81 int get_core_id( unsigned int * cxy, 82 unsigned int * lid ); 83 84 /*************************************************************************************** 85 * This syscall returns the number of cores in a given cluster. 86 *************************************************************************************** 87 * @ cxy : [in] target cluster identifier. 88 * @ ncores : [out] number of cores in target cluster. 89 * @ return always 0. 90 **************************************************************************************/ 91 int get_nb_cores( unsigned int cxy, 92 unsigned int * ncores ); 93 94 /*************************************************************************************** 95 * This syscall uses the DQDT to search, in a macro-cluster specified by the 96 * <cxy_base> and <level> arguments arguments, the core with the lowest load. 97 * it writes in the <cxy> and <lid> buffers the selected core cluster identifier 98 * and the local core index. 99 *************************************************************************************** 100 * @ cxy_base : [in] any cluster identifier in macro-cluster.in clusters array. 101 * @ level : [in] macro-cluster level in [1,2,3,4,5]. 102 * @ cxy : [out] selected core cluster identifier. 103 * @ lid : [out] selectod core local index. 104 * @ return 0 if success / 1 if no core in macro-cluster / -1 if illegal arguments. 105 **************************************************************************************/ 106 int get_best_core( unsigned int cxy_base, 107 unsigned int level, 108 unsigned int * cxy, 109 unsigned int * lid ); 82 110 83 111 /*************************************************************************************** 84 * This function returns the calling core cycles counter,112 * This function returns the value contained in the calling core cycles counter, 85 113 * taking into account a possible overflow on 32 bits architectures. 86 114 *************************************************************************************** … … 414 442 unsigned int cxy ); 415 443 444 /********* Non standard (ALMOS-MKH specific) pthread_parallel_create() syscall *********/ 445 446 ////////////////////////////////////////////////////////////////////////////////////////// 447 // This system call can be used to parallelize the creation and the termination 448 // of a parallel multi-threaded application. It removes the loop in the main thread that 449 // creates the N working threads (N sequencial pthread_create() ). It also removes the 450 // loop that waits completion of these N working threads (N sequencial pthread_join() ). 451 // It creates one "work" thread (in detached mode) per core in the target architecture. 452 // Each "work" thread is identified by the [cxy][lid] indexes (cluster / local core). 453 // The pthread_parallel_create() function returns only when all "work" threads completed 454 // (successfully or not). 455 // 456 // To use this system call, the application code must define the following structures: 457 // - To define the arguments to pass to the <work> function the application must allocate 458 // and initialize a first 2D array, indexed by [cxy] and [lid] indexes, where each slot 459 // contains an application specific structure, and another 2D array, indexed by the same 460 // indexes, containing pointers on these structures. This array of pointers is one 461 // argument of the pthread_parallel_create() function. 462 // - To detect the completion of the <work> threads, the application must allocate a 1D 463 // array, indexed by the cluster index [cxy], where each slot contains a pthread_barrier 464 // descriptor. This barrier is initialised by the pthread_parallel_create() function, 465 // in all cluster containing at least one work thread. This array of barriers is another 466 // argument of the pthread_parallel_create() function. 467 // 468 // Implementation note: 469 // To parallelize the "work" threads creation and termination, the pthread_parallel_create() 470 // function creates a distributed quad-tree (DQT) of "build" threads covering all cores 471 // required to execute the parallel application. 472 // Depending on the hardware topology, this DQT can be truncated, (i.e. some 473 // parent nodes can have less than 4 chidren), if (x_size != y_size), or if one size 474 // is not a power of 2. Each "build" thread is identified by two indexes [cxy][level]. 475 // Each "build" thread makes the following tasks: 476 // 1) It calls the pthread_create() function to create up to 4 children threads, that 477 // are are "work" threads when (level == 0), or "build" threads, when (level > 0). 478 // 2) It initializes the barrier (global variable), used to block/unblock 479 // the parent thread until children completion. 480 // 3) It calls the pthread_barrier_wait( self ) to wait until all children threads 481 // completed (successfully or not). 482 // 4) It calls the pthread_barrier_wait( parent ) to unblock the parent thread. 483 ////////////////////////////////////////////////////////////////////////////////////////// 484 485 /***************************************************************************************** 486 * This blocking function creates N working threads that execute the code defined 487 * by the <work_func> and <work_args> arguments. 488 * The number N of created threads is entirely defined by the <root_level> argument. 489 * This value defines an abstract quad-tree, with a square base : level in [0,1,2,3,4], 490 * side in [1,2,4,8,16], nclusters in [1,4,16,64,256]. This base is called macro_cluster. 491 * A working thread is created on all cores contained in the specified macro-cluster. 492 * The actual number of physical clusters containing cores can be smaller than the number 493 * of clusters covered by the quad tree. The actual number of cores in a cluster can be 494 * less than the max value. 495 * 496 * In the current implementation, all threads execute the same <work_func> function, 497 * on different arguments, that are specified as a 2D array of pointers <work_args>. 498 * This can be modified in a future version, where the <work_func> argument can become 499 * a 2D array of pointers, to have one specific function for each thread. 500 ***************************************************************************************** 501 * @ root_level : [in] DQT root level in [0,1,2,3,4]. 502 * @ work_func : [in] pointer on start function. 503 * @ work_args_array : [in] pointer on a 2D array of pointers. 504 * @ parent_barriers_array : [in] pointer on a 1D array of barriers. 505 * @ return 0 if success / return -1 if failure. 506 ****************************************************************************************/ 507 int pthread_parallel_create( unsigned int root_level, 508 void * work_func, 509 void * work_args_array, 510 void * parent_barriers_array ); 511 416 512 #endif /* _LIBALMOSMKH_H_ */ 417 513 -
trunk/libs/libpthread/pthread.c
r619 r637 230 230 231 231 //////////////////////////////////////////////////////////////////////////////////////////// 232 // The following functions define another implementation for the POSX barrier 233 // based on a distributed quadtree implemented in user space, and relying 234 // on a busy waiting policy. 235 //////////////////////////////////////////////////////////////////////////////////////////// 236 237 238 //////////////////////////////////////////////////////////////////////////////////////////// 239 // This recursive function initializes the SQT nodes 240 // traversing the SQT from root to bottom 241 //////////////////////////////////////////////////////////////////////////////////////////// 242 static void sqt_barrier_build( pthread_barrier_t * barrier, 232 // The following functions define another implementation for the POSX barrier, based on 233 // a distributed quad tree implemented in user space, but using a busy waiting policy. 234 //////////////////////////////////////////////////////////////////////////////////////////// 235 236 237 //////////////////////////////////////////////////////////////////////////////////////////// 238 // This recursive function initializes the DQT nodes traversing the SQT from root to bottom 239 //////////////////////////////////////////////////////////////////////////////////////////// 240 static void dqt_barrier_build( pthread_barrier_t * barrier, 243 241 unsigned int x, 244 242 unsigned int y, 245 243 unsigned int level, 246 sqt_node_t * parent,244 dqt_node_t * parent, 247 245 unsigned int x_size, 248 246 unsigned int y_size, … … 250 248 { 251 249 // get target node address 252 sqt_node_t * node = barrier->node[x][y][level];250 dqt_node_t * node = barrier->node[x][y][level]; 253 251 254 252 if (level == 0 ) // terminal case … … 266 264 267 265 #if PTHREAD_BARRIER_DEBUG 268 printf("\n[BARRIER] %s : sqt_node[%d][%d][%d] / arity %d / desc %x\n"266 printf("\n[BARRIER] %s : dqt_node[%d][%d][%d] / arity %d / desc %x\n" 269 267 "parent %x / child0 %x / child1 %x / child2 %x / child3 %x\n", 270 268 __FUNCTION__, x, y, level, node->arity, node, node->parent, … … 312 310 313 311 #if PTHREAD_BARRIER_DEBUG 314 printf("\n[BARRIER] %s : sqt_node[%d][%d][%d] / arity %d / desc %x\n"312 printf("\n[BARRIER] %s : dqt_node[%d][%d][%d] / arity %d / desc %x\n" 315 313 "parent %x / child0 %x / child1 %x / child2 %x / child3 %x\n", 316 314 __FUNCTION__, x, y, level, node->arity, node, node->parent, … … 322 320 { 323 321 if ( (cx[i] < x_size) && (cy[i] < y_size) ) 324 sqt_barrier_build( barrier,322 dqt_barrier_build( barrier, 325 323 cx[i], 326 324 cy[i], … … 332 330 } 333 331 } 334 } // end sqt_barrier_build()332 } // end dqt_barrier_build() 335 333 336 334 //////////////////////////////////////////////////////////////// … … 394 392 ( (l == 4) && ((x&0x0F) == 0) && ((y&0x0F) == 0) ) ) 395 393 { 396 sqt_node_t * node = remote_malloc( sizeof(sqt_node_t) , cxy );394 dqt_node_t * node = remote_malloc( sizeof(dqt_node_t) , cxy ); 397 395 398 396 if( node == NULL ) 399 397 { 400 printf("\n[ERROR] in %s : cannot allocate sqt_node in cluster %x\n",398 printf("\n[ERROR] in %s : cannot allocate dqt_node in cluster %x\n", 401 399 __FUNCTION__ , cxy ); 402 400 return -1; … … 411 409 412 410 // recursively initialize all SQT nodes from root to bottom 413 sqt_barrier_build( barrier,411 dqt_barrier_build( barrier, 414 412 0, 415 413 0, … … 428 426 ////////////////////////////////////////////////////////////////////////////////////////// 429 427 // This recursive function decrements the distributed "count" variables, 430 // traversing the SQT from bottom to root.428 // traversing the DQT from bottom to root. 431 429 // The last arrived thread reset the local node before returning. 432 430 ////////////////////////////////////////////////////////////////////////////////////////// 433 static void sqt_barrier_decrement( sqt_node_t * node )431 static void dqt_barrier_decrement( dqt_node_t * node ) 434 432 { 435 433 … … 457 455 { 458 456 // decrement the parent node if the current node is not the root 459 if ( node->parent != NULL ) sqt_barrier_decrement( node->parent );457 if ( node->parent != NULL ) dqt_barrier_decrement( node->parent ); 460 458 461 459 #if PTHREAD_BARRIER_DEBUG … … 484 482 return; 485 483 } 486 } // end sqt_barrier_decrement()484 } // end dqt_barrier_decrement() 487 485 488 486 /////////////////////////////////////////////////////// … … 504 502 505 503 // recursively decrement count from bottom to root 506 sqt_barrier_decrement( barrier->node[x][y][0] );504 dqt_barrier_decrement( barrier->node[x][y][0] ); 507 505 508 506 hal_user_fence(); -
trunk/libs/libpthread/pthread.h
r632 r637 2 2 * pthread.h - User level <pthread> library definition. 3 3 * 4 * Author Alain Greiner (2016,2017,2018 )4 * Author Alain Greiner (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) UPMC Sorbonne Universites -
trunk/libs/mini-libc/stdio.h
r623 r637 2 2 * stdio.h - User level <stdio> library definition. 3 3 * 4 * Author Alain Greiner (2016,2017,2018 )4 * Author Alain Greiner (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) UPMC Sorbonne Universites -
trunk/libs/mini-libc/stdlib.c
r589 r637 148 148 void * malloc( unsigned int size ) 149 149 { 150 // get cluster identifier 151 unsigned int cxy; 152 unsigned int lid; 153 get_core( &cxy , &lid ); 150 unsigned int cxy; 151 unsigned int lid; 152 153 // get cluster identifier 154 get_core_id( &cxy , &lid ); 154 155 155 156 return remote_malloc( size, cxy ); … … 160 161 unsigned int size ) 161 162 { 162 // get calling core cluster identifier 163 unsigned int cxy; 164 unsigned int lid; 165 get_core( &cxy , &lid ); 163 unsigned int cxy; 164 unsigned int lid; 165 166 // get cluster identifier 167 get_core_id( &cxy , &lid ); 166 168 167 169 return remote_calloc( count , size , cxy ); … … 172 174 unsigned int size ) 173 175 { 174 // get calling core cluster identifier 175 unsigned int cxy; 176 unsigned int lid; 177 get_core( &cxy , &lid ); 176 unsigned int cxy; 177 unsigned int lid; 178 179 // get cluster identifier 180 get_core_id( &cxy , &lid ); 178 181 179 182 return remote_realloc( ptr , size , cxy ); … … 183 186 void free( void * ptr ) 184 187 { 185 // get calling core cluster identifier 186 unsigned int cxy; 187 unsigned int lid; 188 get_core( &cxy , &lid ); 188 unsigned int cxy; 189 unsigned int lid; 190 191 // get cluster identifier 192 get_core_id( &cxy , &lid ); 189 193 190 194 remote_free( ptr , cxy ); -
trunk/params-hard.mk
r636 r637 2 2 3 3 ARCH = /Users/alain/soc/tsar-trunk-svn-2013/platforms/tsar_generic_iob 4 X_SIZE = 14 X_SIZE = 2 5 5 Y_SIZE = 2 6 NB_PROCS = 46 NB_PROCS = 2 7 7 NB_TTYS = 2 8 8 IOC_TYPE = IOC_BDV -
trunk/user/fft/fft.c
r636 r637 22 22 // of N complex points, using the Cooley-Tuckey FFT method. 23 23 // The N data points are seen as a 2D array (rootN rows * rootN columns). 24 // Each thread handle (rootN / nthreads) rows. The N input data points25 // be initialised in three different modes:24 // Each thread handle (rootN / nthreads) rows. 25 // The N input data points can be initialised in three different modes: 26 26 // - CONSTANT : all data points have the same [1,0] value 27 27 // - COSIN : data point n has [cos(n/N) , sin(n/N)] values … … 31 31 // - M : N = 2**M = number of data points / M must be an even number. 32 32 // - T : nthreads = ncores defined by the hardware / must be power of 2. 33 // The number of threads cannot be larger than the number of rows. 33 34 // 34 // This application uses 4 shared data arrays, that are dynamically 35 // allocated an distributed, using the remote_malloc() function, with 36 // one sub-buffer per cluster: 37 // - data[N] contains N input data points, with 2 double per point. 38 // - trans[N] contains N intermediate data points, 2 double per point. 39 // - umain[rootN] contains rootN coefs required for a rootN points FFT. 40 // - twid[N] contains N coefs : exp(2*pi*i*j/N) / i and j in [0,rootN-1]. 41 // For data, trans, twid, each sub-buffer contains (N/nclusters) points. 42 // For umain, each sub-buffer contains (rootN/nclusters) points. 35 // This application uses 3 shared data arrays, that are dynamically 36 // allocated and distributed in clusters, with one sub-buffer per cluster: 37 // - data[N] contains N input data points, 38 // - trans[N] contains N intermediate data points, 39 // - twid[N] contains N coefs : exp(2*pi*i*j/N) / i and j in [0,rootN-1] 40 // Each sub-buffer contains (N/nclusters) entries, with 2 double per entry. 41 // These distributed buffers are allocated and initialised in parallel 42 // by the working threads running on core 0 in each cluster. 43 43 // 44 // There is one thread per core. 45 // The max number of clusters is defined by (X_MAX * Y_MAX). 46 // The max number of cores per cluster is defined by CORES_MAX. 44 // Each working thread allocates also a private coefs[rootN-1] buffer, 45 // that contains all coefs required for a rootN points FFT. 46 // 47 // There is one working thread per core. 48 // The actual number of cores and cluster in a given hardware architecture 49 // is obtained by the get_config() syscall (x_size, y_size, ncores). 50 // The max number of clusters is bounded by (X_MAX * Y_MAX). 51 // The max number of cores per cluster is bounded by CORES_MAX. 47 52 // 48 53 // Several configuration parameters can be defined below: … … 57 62 // by the main thread in the main() function. 58 63 // - The parallel execution time (parallel_time[i]) is computed by each 59 // thread(i) in the slave() function.64 // working thread(i) in the work() function. 60 65 // - The synchronisation time related to the barriers (sync_time[i]) 61 // is computed by each thread(i) in the slave() function.66 // is computed by each thread(i) in the work() function. 62 67 // The results are displayed on the TXT terminal, and registered on disk. 63 68 /////////////////////////////////////////////////////////////////////////// … … 87 92 // parameters 88 93 89 #define DEFAULT_M 1 2 // 4096data points90 #define USE_DQT_BARRIER 0// use DDT barrier if non zero94 #define DEFAULT_M 14 // 16384 data points 95 #define USE_DQT_BARRIER 1 // use DDT barrier if non zero 91 96 #define MODE COSIN // DATA array initialisation mode 92 97 #define CHECK 0 93 #define DEBUG_MAIN 0// trace main() function (detailed if odd)94 #define DEBUG_ SLAVE 0 // trace slave() function (detailed if odd)98 #define DEBUG_MAIN 1 // trace main() function (detailed if odd) 99 #define DEBUG_WORK 1 // trace work() function (detailed if odd) 95 100 #define DEBUG_FFT1D 0 // trace FFT1D() function (detailed if odd) 96 101 #define DEBUG_ROW 0 // trace FFTRow() function (detailed if odd) … … 101 106 102 107 ///////////////////////////////////////////////////////////////////////////////////// 103 // structure containing the arguments for the slave() function108 // FFT specific global variables 104 109 ///////////////////////////////////////////////////////////////////////////////////// 105 110 106 typedef struct args_s 107 { 108 unsigned int tid; // thread continuous index 109 unsigned int main_tid; // main thread continuous index 111 // work function arguments 112 typedef struct work_args_s 113 { 114 unsigned int tid; // thread continuous index 115 unsigned int lid; // core local index 116 unsigned int cid; // cluster continuous index 117 pthread_barrier_t * parent_barrier; // parent barrier to signal completion 110 118 } 111 args_t; 112 113 ///////////////////////////////////////////////////////////////////////////////////// 114 // global variables 115 ///////////////////////////////////////////////////////////////////////////////////// 116 117 unsigned int x_size; // number of clusters per row in the mesh 118 unsigned int y_size; // number of clusters per column in the mesh 119 unsigned int ncores; // number of cores per cluster 119 work_args_t; 120 120 121 unsigned int nthreads; // total number of threads (one thread per core) 121 122 unsigned int nclusters; // total number of clusters … … 129 130 double * data[CLUSTERS_MAX]; // original time-domain data 130 131 double * trans[CLUSTERS_MAX]; // used as auxiliary space for transpose 132 double * twid[CLUSTERS_MAX]; // twiddle factor : exp(-2iPI*k*n/N) 131 133 double * bloup[CLUSTERS_MAX]; // used as auxiliary space for DFT 132 double * umain[CLUSTERS_MAX]; // roots of unity used fo rootN points FFT133 double * twid[CLUSTERS_MAX]; // twiddle factor : exp(-2iPI*k*n/N)134 134 135 135 // instrumentation counters … … 142 142 pthread_barrierattr_t barrier_attr; 143 143 144 // threads identifiers, attributes, and arguments 145 pthread_t trdid[THREADS_MAX]; // kernel threads identifiers 146 pthread_attr_t attr[THREADS_MAX]; // POSIX thread attributes 147 args_t args[THREADS_MAX]; // slave function arguments 148 149 ///////////////////////////////////////////////////////////////////////////////// 144 ///////////////////////////////////////////////////////////////////////////////////// 145 // Global variables required by parallel_pthread_create() 146 ///////////////////////////////////////////////////////////////////////////////////// 147 148 // 2D arrays of input arguments for the <work> threads 149 // These arrays are initialised by the application main thread 150 151 work_args_t work_args[CLUSTERS_MAX][CORES_MAX]; // work function arguments 152 work_args_t * work_ptrs[CLUSTERS_MAX][CORES_MAX]; // pointers on arguments 153 154 // 1D array of barriers to allow the <work> threads to signal termination 155 // this array is initialised in each cluster by the <build[cxy][0]> thread 156 157 pthread_barrier_t parent_barriers[CLUSTERS_MAX]; // termination barrier 158 159 ///////////////////////////////////////////////////////////////////////////////////// 150 160 // functions declaration 151 ///////////////////////////////////////////////////////////////////////////////// 152 153 void slave(args_t * args );161 ///////////////////////////////////////////////////////////////////////////////////// 162 163 void work( work_args_t * args ); 154 164 155 165 double CheckSum( void ); 156 166 157 void InitX(double ** x , unsigned int mode); 158 159 void InitU(double ** u); 160 161 void InitT(double ** u); 167 void InitD( double ** data , 168 unsigned int mode, 169 unsigned int tid ); 170 171 void InitT( double ** twid, 172 unsigned int tid ); 173 174 void InitU( double * coefs ); 162 175 163 176 unsigned int BitReverse( unsigned int k ); … … 168 181 double * upriv, 169 182 double ** twid, 170 unsigned int MyNum,183 unsigned int tid, 171 184 unsigned int MyFirst, 172 185 unsigned int MyLast ); … … 217 230 int error; 218 231 219 unsigned int main_cxy; // main thread cluster 220 unsigned int main_x; // main thread X coordinate 221 unsigned int main_y; // main thread y coordinate 222 unsigned int main_lid; // main thread local core index 223 unsigned int main_tid; // main thread continuous index 232 unsigned int x_size; // number of clusters per row 233 unsigned int y_size; // number of clusters per column 234 unsigned int ncores; // max number of cores per cluster 224 235 225 236 unsigned int x; // current index for cluster X coordinate 226 237 unsigned int y; // current index for cluster Y coordinate 227 238 unsigned int lid; // current index for core in a cluster 228 unsigned int ci; // continuous cluster index (from x,y) 239 unsigned int tid; // continuous thread index 240 unsigned int cid; // cluster continuous index 229 241 unsigned int cxy; // hardware specific cluster identifier 230 unsigned int tid; // continuous thread index 242 243 char name[64]; // instrumentation file name 244 char path[128]; // instrumentation path name 245 char string[256]; 246 int ret; 231 247 232 248 unsigned long long start_init_cycle; 233 249 unsigned long long end_init_cycle; 234 250 251 #if DEBUG_MAIN 252 unsigned long long debug_cycle; 253 #endif 254 235 255 #if CHECK 236 double ck1;// for input/output checking237 double ck3;// for input/output checking256 double ck1; // for input/output checking 257 double ck3; // for input/output checking 238 258 #endif 239 259 … … 241 261 get_cycle( &start_init_cycle ); 242 262 243 // get platform parameters to compute nthreads & nclusters263 // get platform parameters 244 264 if( get_config( &x_size , &y_size , &ncores ) ) 245 265 { … … 269 289 } 270 290 291 // compute nthreads and nclusters 271 292 nthreads = x_size * y_size * ncores; 272 293 nclusters = x_size * y_size; 294 295 // compute covering DQT size an level 296 unsigned int z = (x_size > y_size) ? x_size : y_size; 297 unsigned int root_level = (z == 1) ? 0 : (z == 2) ? 1 : (z == 4) ? 2 : (z == 8) ? 3 : 4; 273 298 274 299 // compute various constants depending on N and T … … 285 310 } 286 311 287 // get main thread coordinates (main_x, main_y, main_lid) 288 get_core( &main_cxy , &main_lid ); 289 main_x = HAL_X_FROM_CXY( main_cxy ); 290 main_y = HAL_Y_FROM_CXY( main_cxy ); 291 main_tid = (((main_x * y_size) + main_y) * ncores) + main_lid; 292 293 printf("\n[fft] starts / core[%x,%d] / %d points / %d thread(s) / PID %x / cycle %d\n", 294 main_cxy, main_lid, N, nthreads, getpid(), (unsigned int)start_init_cycle ); 295 296 // allocate memory for the distributed data[i], trans[i], umain[i], twid[i] buffers 297 // the index (i) is a continuous cluster index 298 unsigned int data_size = (N / nclusters) * 2 * sizeof(double); 299 unsigned int coefs_size = (rootN / nclusters) * 2 * sizeof(double); 300 for (x = 0 ; x < x_size ; x++) 301 { 302 for (y = 0 ; y < y_size ; y++) 303 { 304 ci = x * y_size + y; 305 cxy = HAL_CXY_FROM_XY( x , y ); 306 data[ci] = (double *)remote_malloc( data_size , cxy ); 307 trans[ci] = (double *)remote_malloc( data_size , cxy ); 308 bloup[ci] = (double *)remote_malloc( data_size , cxy ); 309 umain[ci] = (double *)remote_malloc( coefs_size , cxy ); 310 twid[ci] = (double *)remote_malloc( data_size , cxy ); 311 } 312 printf("\n[fft] starts / %d points / %d thread(s) / PID %x / cycle %d\n", 313 N, nthreads, getpid(), (unsigned int)start_init_cycle ); 314 315 // build instrumentation file name 316 if( USE_DQT_BARRIER ) 317 snprintf( name , 64 , "p_fft_dqt_%d_%d_%d", N , x_size * y_size , ncores ); 318 else 319 snprintf( name , 64 , "p_fft_smp_%d_%d_%d", N , x_size * y_size , ncores ); 320 321 // build pathname 322 snprintf( path , 128 , "/home/%s", name ); 323 324 // open instrumentation file 325 FILE * f = fopen( path , NULL ); 326 if ( f == NULL ) 327 { 328 printf("\n[fft error] cannot open instrumentation file <%s>\n", path ); 329 exit( 0 ); 312 330 } 313 331 314 332 #if DEBUG_MAIN 315 printf("\n[fft] main completes remote_malloc\n"); 316 #endif 317 318 // arrays initialisation 319 InitX( data , MODE ); 320 InitU( umain ); 321 InitT( twid ); 322 323 #if DEBUG_MAIN 324 printf("\n[fft] main completes arrays init\n"); 333 get_cycle( &debug_cycle ); 334 printf("\n[fft] main open file <%s> at cycle %d\n", 335 path, (unsigned int)debug_cycle ); 325 336 #endif 326 337 … … 342 353 #endif 343 354 344 // initialise barrier 355 // initialise barrier synchronizing all <work> threads 345 356 if( USE_DQT_BARRIER ) 346 357 { … … 362 373 363 374 #if DEBUG_MAIN 364 printf("\n[fft] main completes barrier init\n"); 365 #endif 366 367 // launch other threads to execute the slave() function 368 // on cores other than the core running the main thread 375 get_cycle( &debug_cycle ); 376 printf("\n[fft] main completes barrier init at cycle %d\n", 377 (unsigned int)debug_cycle ); 378 #endif 379 380 // build array of arguments for the <work> threads 369 381 for (x = 0 ; x < x_size ; x++) 370 382 { … … 376 388 for ( lid = 0 ; lid < ncores ; lid++ ) 377 389 { 378 // compute thread user index (continuous index) 379 tid = (((x * y_size) + y) * ncores) + lid; 380 381 // set thread attributes 382 attr[tid].attributes = PT_ATTR_CLUSTER_DEFINED | PT_ATTR_CORE_DEFINED; 383 attr[tid].cxy = cxy; 384 attr[tid].lid = lid; 385 386 // set slave function argument 387 args[tid].tid = tid; 388 args[tid].main_tid = main_tid; 389 390 // create thread 391 if( tid != main_tid ) 392 { 393 if ( pthread_create( &trdid[tid], // pointer on kernel identifier 394 &attr[tid], // pointer on thread attributes 395 &slave, // pointer on function 396 &args[tid]) ) // pointer on function arguments 397 { 398 printf("\n[fft error] creating thread %x\n", tid ); 399 exit( 0 ); 400 } 401 402 #if (DEBUG_MAIN & 1) 403 unsigned long long debug_cycle; 404 get_cycle( &debug_cycle ); 405 printf("\n[fft] main created thread %d on core[%x,%d] / cycle %d\n", 406 tid, cxy, lid, (unsigned int)debug_cycle ); 407 #endif 408 } 390 // compute cluster continuous index 391 cid = (x * y_size) + y; 392 393 // compute work thread continuous index 394 tid = (cid * ncores) + lid; 395 396 // initialize 2D array of arguments 397 work_args[cxy][lid].tid = tid; 398 work_args[cxy][lid].lid = lid; 399 work_args[cxy][lid].cid = cid; 400 work_args[cxy][lid].parent_barrier = &parent_barriers[cxy]; 401 402 // initialize 2D array of pointers 403 work_ptrs[cxy][lid] = &work_args[cxy][lid]; 409 404 } 410 405 } 411 406 } 412 407 408 // register sequencial time 409 get_cycle( &end_init_cycle ); 410 init_time = (unsigned int)(end_init_cycle - start_init_cycle); 411 413 412 #if DEBUG_MAIN 414 printf("\n[fft] main completes threads creation\n"); 415 #endif 416 417 get_cycle( &end_init_cycle ); 418 419 // register sequencial time 420 init_time = (unsigned int)(end_init_cycle - start_init_cycle); 421 422 // main itself executes the slave() function 423 slave( &args[main_tid] ); 424 425 // wait other threads completion 426 for (x = 0 ; x < x_size ; x++) 427 { 428 for (y = 0 ; y < y_size ; y++) 429 { 430 for ( lid = 0 ; lid < ncores ; lid++ ) 431 { 432 // compute thread continuous index 433 tid = (((x * y_size) + y) * ncores) + lid; 434 435 if( tid != main_tid ) 436 { 437 if( pthread_join( trdid[tid] , NULL ) ) 438 { 439 printf("\n[fft error] in main thread joining thread %x\n", tid ); 440 exit( 0 ); 441 } 442 443 #if (DEBUG_MAIN & 1) 444 printf("\n[fft] main thread %d joined thread %d\n", main_tid, tid ); 445 #endif 446 447 } 448 } 449 } 450 } 413 printf("\n[fft] main completes <work> threads arguments at cycle %d\n", 414 (unsigned int)end_init_cycle ); 415 #endif 416 417 // create and execute the working threads 418 if( pthread_parallel_create( root_level, 419 &work, 420 &work_ptrs[0][0], 421 &parent_barriers[0] ) ) 422 { 423 printf("\n[fft error] creating threads\n"); 424 exit( 0 ); 425 } 426 427 #if DEBUG_MAIN 428 get_cycle( &debug_cycle ); 429 printf("\n[fft] main resume for instrumentation at cycle %d\n", 430 (unsigned int)debug_cycle) ; 431 #endif 451 432 452 433 #if PRINT_ARRAY … … 463 444 #endif 464 445 465 // instrumentation466 char name[64];467 char path[128];468 char string[256];469 int ret;470 471 // build file name472 if( USE_DQT_BARRIER )473 snprintf( name , 64 , "fft_dqt_%d_%d_%d", N , x_size * y_size , ncores );474 else475 snprintf( name , 64 , "fft_smp_%d_%d_%d", N , x_size * y_size , ncores );476 477 // build pathname478 snprintf( path , 128 , "/home/%s", name );479 480 // open instrumentation file481 FILE * f = fopen( path , NULL );482 if ( f == NULL )483 {484 printf("\n[fft error] cannot open instrumentation file <%s>\n", path );485 exit( 0 );486 }487 printf("\n[fft] file <%s> open\n", path );488 489 446 // display header on terminal, and save to file 490 447 printf("\n----- %s -----\n", name ); … … 497 454 } 498 455 499 // display results for each thread on terminal, and save to file456 // get instrumentation results for each thread 500 457 for (tid = 0 ; tid < nthreads ; tid++) 501 458 { … … 503 460 tid, init_time, parallel_time[tid], sync_time[tid] ); 504 461 505 // display on terminal, and save to instrumentation file 506 printf("%s" , string ); 462 // save to instrumentation file 507 463 fprintf( f , "%s" , string ); 508 464 if( ret < 0 ) 509 465 { 510 466 printf("\n[fft error] cannot write thread %d to file <%s>\n", tid, path ); 467 printf("%s", string ); 511 468 exit(0); 512 469 } 513 470 } 514 471 515 // display MIN/MAX values on terminal and save to file472 // compute min/max values 516 473 unsigned int min_para = parallel_time[0]; 517 474 unsigned int max_para = parallel_time[0]; … … 527 484 } 528 485 486 // display MIN/MAX values on terminal and save to file 529 487 snprintf( string , 256 , "\n Sequencial Parallel Barrier\n" 530 488 "MIN : %d\t | %d\t | %d\t (cycles)\n" … … 547 505 exit(0); 548 506 } 549 printf("\n[fft] file <%s> closed\n", path ); 507 508 #if DEBUG_MAIN 509 get_cycle( &debug_cycle ); 510 printf("\n[fft] main close file <%s> at cycle %d\n", 511 path, (unsigned int)debug_cycle ); 512 #endif 550 513 551 514 exit( 0 ); … … 553 516 } // end main() 554 517 555 /////////////////////////////////////////////////////////////// 556 // This function is executed in parallel by all threads.557 /////////////////////////////////////////////////////////////// 558 void slave(args_t * args )559 { 560 unsigned int i;561 unsigned int MyNum; // this thread index562 unsigned int MainNum; // main threadindex563 unsigned int MyFirst; // index first row allocated to thread564 unsigned int MyLast; // index last row allocated to thread 565 double * upriv;566 unsigned int c_id;567 unsigned int c_offset;518 ///////////////////////////////////////////////////////////////// 519 // This function is executed in parallel by all <work> threads. 520 ///////////////////////////////////////////////////////////////// 521 void work( work_args_t * args ) 522 { 523 unsigned int tid; // this thread continuous index 524 unsigned int lid; // core local index 525 unsigned int cid; // cluster continuous index 526 pthread_barrier_t * parent_barrier; // pointer on parent barrier 527 528 unsigned int MyFirst; // index first row allocated to thread 529 unsigned int MyLast; // index last row allocated to thread 530 double * upriv; // private array of FFT coefs 568 531 569 532 unsigned long long parallel_start; … … 572 535 unsigned long long barrier_stop; 573 536 574 MyNum = args->tid; 575 MainNum = args->main_tid; 537 // get thread arguments 538 tid = args->tid; 539 lid = args->lid; 540 cid = args->cid; 541 parent_barrier = args->parent_barrier; 576 542 577 543 get_cycle( ¶llel_start ); 578 544 579 #if DEBUG_ SLAVE545 #if DEBUG_WORK 580 546 printf("\n[fft] %s : thread %d enter / cycle %d\n", 581 __FUNCTION__, MyNum, (unsigned int)parallel_start ); 582 #endif 547 __FUNCTION__, tid, (unsigned int)parallel_start ); 548 #endif 549 550 // core 0 allocate memory from the local cluster 551 // for the distributed data[], trans[], twid[] buffers 552 // and for the private upriv[] buffer 553 if( lid == 0 ) 554 { 555 unsigned int data_size = (N / nclusters) * 2 * sizeof(double); 556 unsigned int coefs_size = (rootN - 1) * 2 * sizeof(double); 557 558 data[cid] = (double *)malloc( data_size ); 559 trans[cid] = (double *)malloc( data_size ); 560 twid[cid] = (double *)malloc( data_size ); 561 562 upriv = (double *)malloc( coefs_size ); 563 } 583 564 584 565 // BARRIER … … 586 567 pthread_barrier_wait( &barrier ); 587 568 get_cycle( &barrier_stop ); 588 sync_time[MyNum] += (unsigned int)(barrier_stop - barrier_start); 589 590 #if DEBUG_SLAVE 591 printf("\n[@@@] %s : thread %d exit first barrier / cycle %d\n", 592 __FUNCTION__, MyNum, (unsigned int)barrier_stop ); 593 #endif 594 595 // allocate and initialise local array upriv[] 596 // that is a local copy of the rootN coefs defined in umain[] 597 upriv = (double *)malloc(2 * (rootN - 1) * sizeof(double)); 598 for ( i = 0 ; i < (rootN - 1) ; i++) 599 { 600 c_id = i / (rootN / nclusters); 601 c_offset = i % (rootN / nclusters); 602 upriv[2*i] = umain[c_id][2*c_offset]; 603 upriv[2*i+1] = umain[c_id][2*c_offset+1]; 604 } 569 sync_time[tid] += (unsigned int)(barrier_stop - barrier_start); 570 571 #if DEBUG_WORK 572 printf("\n[fft] %s : thread %d exit first barrier / cycle %d\n", 573 __FUNCTION__, tid, (unsigned int)barrier_stop ); 574 #endif 575 576 // all threads initialize data[] local array 577 InitD( data , MODE , tid ); 578 579 // all threads initialize twid[] local array 580 InitT( twid , tid ); 581 582 // all threads initialise private upriv[] array 583 InitU( upriv ); 584 585 // BARRIER 586 get_cycle( &barrier_start ); 587 pthread_barrier_wait( &barrier ); 588 get_cycle( &barrier_stop ); 589 sync_time[tid] += (unsigned int)(barrier_stop - barrier_start); 590 591 #if DEBUG_WORK 592 printf("\n[fft] %s : thread %d exit second barrier / cycle %d\n", 593 __FUNCTION__, tid, (unsigned int)barrier_stop ); 594 #endif 605 595 606 596 // compute first and last rows handled by the thread 607 MyFirst = rootN * MyNum/ nthreads;608 MyLast = rootN * ( MyNum+ 1) / nthreads;597 MyFirst = rootN * tid / nthreads; 598 MyLast = rootN * (tid + 1) / nthreads; 609 599 610 600 // perform forward FFT 611 FFT1D( 1 , data , trans , upriv , twid , MyNum, MyFirst , MyLast );601 FFT1D( 1 , data , trans , upriv , twid , tid , MyFirst , MyLast ); 612 602 613 603 #if CHECK … … 615 605 pthread_barrier_wait( &barrier ); 616 606 get_cycle( &barrier_stop ); 617 sync_time[ MyNum] += (unsigned int)(barrier_stop - barrier_start);618 FFT1D( -1 , data , trans , upriv , twid , MyNum, MyFirst , MyLast );607 sync_time[tid] += (unsigned int)(barrier_stop - barrier_start); 608 FFT1D( -1 , data , trans , upriv , twid , tid , MyFirst , MyLast ); 619 609 #endif 620 610 … … 622 612 623 613 // register parallel time 624 parallel_time[MyNum] = (unsigned int)(parallel_stop - parallel_start); 625 626 #if DEBUG_SLAVE 627 printf("\n[fft] %s : thread %x completes fft / p_start %d / p_stop %d\n", 628 __FUNCTION__, MyNum, (unsigned int)parallel_start, (unsigned int)parallel_stop ); 629 int tid; 630 for (tid = 0 ; tid < nthreads ; tid++) 631 { 632 printf("- tid %d : Sequencial %d / Parallel %d / Barrier %d\n", 633 tid , init_time, parallel_time[tid], sync_time[tid] ); 634 } 635 #endif 636 637 // exit only if MyNum != MainNum 638 if( MyNum != MainNum ) pthread_exit( NULL ); 639 640 } // end slave() 614 parallel_time[tid] = (unsigned int)(parallel_stop - parallel_start); 615 616 #if DEBUG_WORK 617 printf("\n[fft] %s : thread %d completes fft / p_start %d / p_stop %d\n", 618 __FUNCTION__, tid, (unsigned int)parallel_start, (unsigned int)parallel_stop ); 619 #endif 620 621 // work thread signals completion to main 622 pthread_barrier_wait( parent_barrier ); 623 624 #if DEBUG_WORK 625 printf("\n[fft] %s : thread %d exit\n", 626 __FUNCTION__, tid ); 627 #endif 628 629 // work thread exit 630 pthread_exit( NULL ); 631 632 } // end work() 641 633 642 634 //////////////////////////////////////////////////////////////////////////////////////// … … 724 716 } 725 717 726 727 //////////////////////////// 728 void InitX(double ** x, 729 unsigned int mode ) 718 ////////////////////////////////////////////////////////////////////////////////////// 719 // Each working thread <tid> contributes to initialize (rootN / nthreads) rows, 720 // in the shared - and distributed - <data> array. 721 ////////////////////////////////////////////////////////////////////////////////////// 722 void InitD(double ** data, 723 unsigned int mode, 724 unsigned int tid ) 730 725 { 731 726 unsigned int i , j; … … 734 729 unsigned int index; 735 730 736 for ( j = 0 ; j < rootN ; j++ ) // loop on row index 731 // compute row_min and row_max 732 unsigned int row_min = tid * rows_per_thread; 733 unsigned int row_max = row_min + rows_per_thread; 734 735 for ( j = row_min ; j < row_max ; j++ ) // loop on rows 737 736 { 738 for ( i = 0 ; i < rootN ; i++ ) // loop on pointin a row737 for ( i = 0 ; i < rootN ; i++ ) // loop on points in a row 739 738 { 740 739 index = j * rootN + i; … … 745 744 if ( mode == RANDOM ) 746 745 { 747 x[c_id][2*c_offset] = ( (double)rand() ) / 65536;748 x[c_id][2*c_offset+1] = ( (double)rand() ) / 65536;746 data[c_id][2*c_offset] = ( (double)rand() ) / 65536; 747 data[c_id][2*c_offset+1] = ( (double)rand() ) / 65536; 749 748 } 750 749 … … 754 753 { 755 754 double phi = (double)( 2 * PI * index) / N; 756 x[c_id][2*c_offset] = cos( phi );757 x[c_id][2*c_offset+1] = sin( phi );755 data[c_id][2*c_offset] = cos( phi ); 756 data[c_id][2*c_offset+1] = sin( phi ); 758 757 } 759 758 … … 761 760 if ( mode == CONSTANT ) 762 761 { 763 x[c_id][2*c_offset] = 1.0;764 x[c_id][2*c_offset+1] = 0.0;762 data[c_id][2*c_offset] = 1.0; 763 data[c_id][2*c_offset+1] = 0.0; 765 764 } 766 765 } … … 768 767 } 769 768 770 ///////////////////////// 771 void InitU( double ** u ) 772 { 773 unsigned int q; 774 unsigned int j; 775 unsigned int base; 776 unsigned int n1; 777 unsigned int c_id; 778 unsigned int c_offset; 779 double phi; 780 unsigned int stop = 0; 781 782 for (q = 0 ; ((unsigned int)(1 << q) < N) && (stop == 0) ; q++) 783 { 784 n1 = 1 << q; 785 base = n1 - 1; 786 for (j = 0; (j < n1) && (stop == 0) ; j++) 787 { 788 if (base + j > rootN - 1) return; 789 790 c_id = (base + j) / (rootN / nclusters); 791 c_offset = (base + j) % (rootN / nclusters); 792 phi = (double)(2.0 * PI * j) / (2 * n1); 793 u[c_id][2*c_offset] = cos( phi ); 794 u[c_id][2*c_offset+1] = -sin( phi ); 795 } 796 } 797 } 798 799 ////////////////////////// 800 void InitT( double ** u ) 769 /////////////////////////////////////////////////////////////////////////////////////// 770 // Each working thread <tid> contributes to initialize (rootN / nthreads) rows, 771 // in the shared - and distributed - <twiddle> array. 772 /////////////////////////////////////////////////////////////////////////////////////// 773 void InitT( double ** twid, 774 unsigned int tid ) 801 775 { 802 776 unsigned int i, j; … … 806 780 double phi; 807 781 808 for ( j = 0 ; j < rootN ; j++ ) // loop on row index 782 // compute row_min and row_max 783 unsigned int row_min = tid * rows_per_thread; 784 unsigned int row_max = row_min + rows_per_thread; 785 786 for ( j = row_min ; j < row_max ; j++ ) // loop on rows 809 787 { 810 for ( i = 0 ; i < rootN ; i++ ) // loop on points in a row788 for ( i = 0 ; i < rootN ; i++ ) // loop on points in a row 811 789 { 812 790 index = j * rootN + i; … … 815 793 816 794 phi = (double)(2.0 * PI * i * j) / N; 817 u[c_id][2*c_offset] = cos( phi ); 818 u[c_id][2*c_offset+1] = -sin( phi ); 795 twid[c_id][2*c_offset] = cos( phi ); 796 twid[c_id][2*c_offset+1] = -sin( phi ); 797 } 798 } 799 } 800 801 /////////////////////////////////////////////////////////////////////////////////////// 802 // Each working thread initialize the private <upriv> array / (rootN - 1) entries. 803 /////////////////////////////////////////////////////////////////////////////////////// 804 void InitU( double * upriv ) 805 { 806 unsigned int q; 807 unsigned int j; 808 unsigned int base; 809 unsigned int n1; 810 double phi; 811 812 for (q = 0 ; ((unsigned int)(1 << q) < N) ; q++) 813 { 814 n1 = 1 << q; // n1 == 2**q 815 base = n1 - 1; 816 for (j = 0; (j < n1) ; j++) 817 { 818 if (base + j > rootN - 1) return; 819 820 phi = (double)(2.0 * PI * j) / (2 * n1); 821 upriv[2*(base+j)] = cos( phi ); 822 upriv[2*(base+j)+1] = -sin( phi ); 819 823 } 820 824 } … … 856 860 double * upriv, // local array containing coefs for rootN FFT 857 861 double ** twid, // distributed arrays containing N twiddle factors 858 unsigned int MyNum,// thread continuous index862 unsigned int tid, // thread continuous index 859 863 unsigned int MyFirst, 860 864 unsigned int MyLast ) … … 868 872 get_cycle( &cycle ); 869 873 printf("\n[fft] %s : thread %d enter / first %d / last %d / cycle %d\n", 870 __FUNCTION__, MyNum, MyFirst, MyLast, (unsigned int)cycle );874 __FUNCTION__, tid, MyFirst, MyLast, (unsigned int)cycle ); 871 875 #endif 872 876 … … 877 881 get_cycle( &cycle ); 878 882 printf("\n[fft] %s : thread %d after first transpose / cycle %d\n", 879 __FUNCTION__, MyNum, (unsigned int)cycle );883 __FUNCTION__, tid, (unsigned int)cycle ); 880 884 if( PRINT_ARRAY ) PrintArray( tmp , N ); 881 885 #endif … … 885 889 pthread_barrier_wait( &barrier ); 886 890 get_cycle( &barrier_stop ); 887 sync_time[ MyNum] = (unsigned int)(barrier_stop - barrier_start);891 sync_time[tid] = (unsigned int)(barrier_stop - barrier_start); 888 892 889 893 #if( DEBUG_FFT1D & 1 ) 890 894 get_cycle( &cycle ); 891 895 printf("\n[fft] %s : thread %d exit barrier after first transpose / cycle %d\n", 892 __FUNCTION__, MyNum, (unsigned int)cycle );896 __FUNCTION__, tid, (unsigned int)cycle ); 893 897 #endif 894 898 … … 902 906 903 907 #if( DEBUG_FFT1D & 1 ) 904 printf("\n[fft] %s : thread %d after first twiddle\n", __FUNCTION__, MyNum);908 printf("\n[fft] %s : thread %d after first twiddle\n", __FUNCTION__, tid); 905 909 if( PRINT_ARRAY ) PrintArray( tmp , N ); 906 910 #endif … … 912 916 913 917 #if( DEBUG_FFT1D & 1 ) 914 printf("\n[fft] %s : thread %d exit barrier after first twiddle\n", __FUNCTION__, MyNum);915 #endif 916 917 sync_time[ MyNum] += (unsigned int)(barrier_stop - barrier_start);918 printf("\n[fft] %s : thread %d exit barrier after first twiddle\n", __FUNCTION__, tid); 919 #endif 920 921 sync_time[tid] += (unsigned int)(barrier_stop - barrier_start); 918 922 919 923 // transpose tmp to x … … 921 925 922 926 #if( DEBUG_FFT1D & 1 ) 923 printf("\n[fft] %s : thread %d after second transpose\n", __FUNCTION__, MyNum);927 printf("\n[fft] %s : thread %d after second transpose\n", __FUNCTION__, tid); 924 928 if( PRINT_ARRAY ) PrintArray( x , N ); 925 929 #endif … … 931 935 932 936 #if( DEBUG_FFT1D & 1 ) 933 printf("\n[fft] %s : thread %d exit barrier after second transpose\n", __FUNCTION__, MyNum);934 #endif 935 936 sync_time[ MyNum] += (unsigned int)(barrier_stop - barrier_start);937 printf("\n[fft] %s : thread %d exit barrier after second transpose\n", __FUNCTION__, tid); 938 #endif 939 940 sync_time[tid] += (unsigned int)(barrier_stop - barrier_start); 937 941 938 942 // do FFTs on rows of x and apply the scaling factor … … 944 948 945 949 #if( DEBUG_FFT1D & 1 ) 946 printf("\n[fft] %s : thread %d after FFT on rows\n", __FUNCTION__, MyNum);950 printf("\n[fft] %s : thread %d after FFT on rows\n", __FUNCTION__, tid); 947 951 if( PRINT_ARRAY ) PrintArray( x , N ); 948 952 #endif … … 954 958 955 959 #if( DEBUG_FFT1D & 1 ) 956 printf("\n[fft] %s : thread %d exit barrier after FFT on rows\n", __FUNCTION__, MyNum);957 #endif 958 sync_time[ MyNum] += (unsigned int)(barrier_stop - barrier_start);960 printf("\n[fft] %s : thread %d exit barrier after FFT on rows\n", __FUNCTION__, tid); 961 #endif 962 sync_time[tid] += (unsigned int)(barrier_stop - barrier_start); 959 963 960 964 // transpose x to tmp … … 962 966 963 967 #if( DEBUG_FFT1D & 1 ) 964 printf("\n[fft] %s : thread %x after third transpose\n", __FUNCTION__, MyNum);968 printf("\n[fft] %s : thread %x after third transpose\n", __FUNCTION__, tid); 965 969 if( PRINT_ARRAY ) PrintArray( x , N ); 966 970 #endif … … 972 976 973 977 #if( DEBUG_FFT1D & 1 ) 974 printf("\n[fft] %s : thread %d exit barrier after third transpose\n", __FUNCTION__, MyNum);975 #endif 976 977 sync_time[ MyNum] += (unsigned int)(barrier_stop - barrier_start);978 sync_time[ MyNum] += (long)(barrier_stop - barrier_start);978 printf("\n[fft] %s : thread %d exit barrier after third transpose\n", __FUNCTION__, tid); 979 #endif 980 981 sync_time[tid] += (unsigned int)(barrier_stop - barrier_start); 982 sync_time[tid] += (long)(barrier_stop - barrier_start); 979 983 980 984 // copy tmp to x … … 982 986 983 987 #if DEBUG_FFT1D 984 printf("\n[fft] %s : thread %d completed\n", __FUNCTION__, MyNum);988 printf("\n[fft] %s : thread %d completed\n", __FUNCTION__, tid); 985 989 if( PRINT_ARRAY ) PrintArray( x , N ); 986 990 #endif -
trunk/user/idbg/idbg.c
r580 r637 20 20 21 21 get_cycle( &cycle ); 22 get_core ( &cxy , &lid );22 get_core_id( &cxy , &lid ); 23 23 24 24 printf( "\n[IDBG] starts on core[%x,%d] / cycle %d\n", -
trunk/user/ksh/ksh.c
r636 r637 1186 1186 char cmd[CMD_MAX_SIZE]; // buffer for one command 1187 1187 1188 / /1. first direct command1188 /* 1. first direct command 1189 1189 if( sem_wait( &semaphore ) ) 1190 1190 { … … 1199 1199 strcpy( cmd , "load bin/user/sort.elf" ); 1200 1200 execute( cmd ); 1201 //1202 1203 1204 1205 / /2. second direct command1201 */ 1202 1203 1204 1205 /* 2. second direct command 1206 1206 if( sem_wait( &semaphore ) ) 1207 1207 { … … 1216 1216 strcpy( cmd , "load bin/user/fft.elf" ); 1217 1217 execute( cmd ); 1218 //1218 */ 1219 1219 1220 1220 … … 1455 1455 // get KSH process pid and core 1456 1456 parent_pid = getpid(); 1457 get_core ( &cxy , &lid );1457 get_core_id( &cxy , &lid ); 1458 1458 1459 1459 #if DEBUG_MAIN -
trunk/user/pgcd/pgcd.c
r626 r637 27 27 28 28 get_cycle( &cycle ); 29 get_core ( &cxy , &lid );29 get_core_id( &cxy , &lid ); 30 30 31 31 printf( "\n[pgcd] starts on core[%x,%d] / cycle %d\n\n", -
trunk/user/sort/sort.c
r636 r637 54 54 #include <hal_macros.h> 55 55 56 #define ARRAY_LENGTH 2048 // number of items 57 #define MAX_THREADS 1024 // 16 * 16 * 4 58 59 #define USE_DQT_BARRIER 1 // use DQT barrier if non zero 60 #define DISPLAY_ARRAY 0 // display items values before and after 61 #define DEBUG_MAIN 0 // trace main function 62 #define DEBUG_SORT 0 // trace sort function 63 #define CHECK_RESULT 0 // for debug 64 #define INSTRUMENTATION 1 // register computation times on file 65 66 ///////////////////////////////////////////////////////////// 67 // argument for the sort() function (one thread per core) 68 ///////////////////////////////////////////////////////////// 56 #define ARRAY_LENGTH 2048 // number of items 57 #define MAX_THREADS 1024 // 16 * 16 * 4 58 59 #define X_MAX 16 // max number of clusters in a row 60 #define Y_MAX 16 // max number of clusters in a column 61 #define CORES_MAX 4 // max number of cores in a cluster 62 #define CLUSTERS_MAX X_MAX * Y_MAX 63 64 #define USE_DQT_BARRIER 1 // use DQT barrier if non zero 65 #define DISPLAY_ARRAY 0 // display items values before and after 66 #define DEBUG_MAIN 0 // trace main function 67 #define DEBUG_SORT 0 // trace sort function 68 #define CHECK_RESULT 0 // for debug 69 #define INSTRUMENTATION 1 // register computation times on file 70 71 /////////////////////////////////////////////////////////////////////////////////// 72 // Arguments for the sort() function 73 /////////////////////////////////////////////////////////////////////////////////// 69 74 70 75 typedef struct 71 76 { 72 unsigned int threads; // total number of threads73 unsigned int thread_uid; // thread user index (0 to threads -1)74 unsigned int main_uid; // main thread user index77 unsigned int tid; // continuous thread index 78 unsigned int threads; // total number of threads 79 pthread_barrier_t * parent_barrier; // pointer on termination barrier 75 80 } 76 args_t;77 78 ////////////////////////////////////////// 79 // Global variables80 ////////////////////////////////////////// 81 sort_args_t; 82 83 //////////////////////////////////////////////////////////////////////////////////// 84 // Sort specific global variables 85 //////////////////////////////////////////////////////////////////////////////////// 81 86 82 87 int array0[ARRAY_LENGTH]; // values to sort … … 85 90 pthread_barrier_t barrier; // synchronisation variables 86 91 87 pthread_t trdid[MAX_THREADS]; // kernel identifiers 88 pthread_attr_t attr[MAX_THREADS]; // thread attributes 89 args_t arg[MAX_THREADS]; // sort function arguments 92 ///////////////////////////////////////////////////////////////////////////////////// 93 // Global variables required by parallel_pthread_create() 94 ///////////////////////////////////////////////////////////////////////////////////// 95 96 // 2D arrays of input arguments for the <sort> threads 97 // These arrays are initialised by the application main thread 98 99 sort_args_t sort_args[CLUSTERS_MAX][CORES_MAX]; // sort function arguments 100 sort_args_t * sort_ptrs[CLUSTERS_MAX][CORES_MAX]; // pointers on arguments 101 102 // 1D array of barriers to allow the <sort> threads to signal termination 103 // this array is initialised by the pthread_parallel_create() function 104 105 pthread_barrier_t parent_barriers[CLUSTERS_MAX]; // termination barrier 106 90 107 91 108 //////////////////////////////////// … … 157 174 } // end merge() 158 175 159 ////////////////////////////// ////////160 static void sort( constargs_t * ptr )176 ////////////////////////////// 177 void sort( sort_args_t * ptr ) 161 178 { 162 unsigned int i; 163 unsigned long long cycle; 164 unsigned int cxy; 165 unsigned int lid; 166 167 int * src_array = NULL; 168 int * dst_array = NULL; 169 170 // get core coordinates an date 171 get_core( &cxy , &lid ); 172 get_cycle( &cycle ); 173 174 unsigned int thread_uid = ptr->thread_uid; 175 unsigned int threads = ptr->threads; 176 unsigned int main_uid = ptr->main_uid; 177 178 #if DISPLAY_ARRAY 179 unsigned int n; 180 if( thread_uid == main_uid ) 181 { 182 printf("\n*** array before sort\n"); 183 for( n=0; n<ARRAY_LENGTH; n++) printf("array[%d] = %d\n", n , array0[n] ); 184 } 179 unsigned int i; 180 int * src_array = NULL; 181 int * dst_array = NULL; 182 183 // get arguments 184 unsigned int tid = ptr->tid; 185 unsigned int threads = ptr->threads; 186 pthread_barrier_t * parent_barrier = ptr->parent_barrier; 187 188 unsigned int items = ARRAY_LENGTH / threads; 189 unsigned int stages = __builtin_ctz( threads ) + 1; 190 191 #if DEBUG_SORT 192 printf("\n[sort] start : ptr %x / tid %d / threads %d / barrier %x\n", 193 ptr, tid, threads, parent_barrier ); 194 #endif 195 196 bubbleSort( array0, items, items * tid ); 197 198 #if DEBUG_SORT 199 printf("\n[sort] thread[%d] : stage 0 completed\n", tid ); 185 200 #endif 186 201 … … 189 204 190 205 #if DEBUG_SORT 191 if( thread_uid == 0 ) 192 printf("\n[sort] thread[%d] exit barrier 0\n", thread_uid ); 193 #endif 194 195 unsigned int items = ARRAY_LENGTH / threads; 196 unsigned int stages = __builtin_ctz( threads ) + 1; 197 198 #if DEBUG_SORT 199 if( thread_uid == 0 ) 200 printf("\n[sort] thread[%d] : start\n", thread_uid ); 201 #endif 202 203 bubbleSort( array0, items, items * thread_uid ); 204 205 #if DEBUG_SORT 206 if( thread_uid == 0 ) 207 printf("\n[sort] thread[%d] : stage 0 completed\n", thread_uid ); 208 #endif 209 210 ///////////////////////////////// 211 pthread_barrier_wait( &barrier ); 212 213 #if DEBUG_SORT 214 if( thread_uid == 0 ) 215 printf("\n[sort] thread[%d] exit barrier 0\n", thread_uid ); 216 #endif 217 218 #if DISPLAY_ARRAY 219 if( thread_uid == main_uid ) 220 { 221 printf("\n*** array after bubble sort\n"); 222 for( n=0; n<ARRAY_LENGTH; n++) printf("array[%d] = %d\n", n , array0[n] ); 223 } 206 printf("\n[sort] thread[%d] exit barrier 0\n", tid ); 224 207 #endif 225 208 … … 239 222 } 240 223 241 if( (thread_uid & ((1<<i)-1)) == 0 ) 242 { 243 244 #if DEBUG_SORT 245 if( thread_uid == 0 ) 246 printf("\n[sort] thread[%d] : stage %d start\n", thread_uid , i ); 224 if( (tid & ((1<<i)-1)) == 0 ) 225 { 226 227 #if DEBUG_SORT 228 printf("\n[sort] thread[%d] : stage %d start\n", tid , i ); 247 229 #endif 248 230 merge( src_array, 249 231 dst_array, 250 232 items << (i-1), 251 items * thread_uid, 252 items * (thread_uid + (1 << (i-1))), 253 items * thread_uid ); 254 255 #if DEBUG_SORT 256 if( thread_uid == 0 ) 257 printf("\n[sort] thread[%d] : stage %d completed\n", thread_uid , i ); 233 items * tid, 234 items * (tid + (1 << (i-1))), 235 items * tid ); 236 237 #if DEBUG_SORT 238 printf("\n[sort] thread[%d] : stage %d completed\n", tid , i ); 258 239 #endif 259 240 } … … 263 244 264 245 #if DEBUG_SORT 265 if( thread_uid == 0 ) 266 printf("\n[sort] thread[%d] exit barrier %d\n", thread_uid , i ); 267 #endif 268 269 #if DISPLAY_ARRAY 270 if( thread_uid == main_uid ) 271 { 272 printf("\n*** array after merge %d\n", i ); 273 for( n=0; n<ARRAY_LENGTH; n++) printf("array[%d] = %d\n", n , dst_array[n] ); 274 } 246 printf("\n[sort] thread[%d] exit barrier %d\n", tid , i ); 275 247 #endif 276 248 277 249 } // en for stages 278 250 279 // all threads but the main thread exit 280 if( thread_uid != main_uid ) pthread_exit( NULL ); 251 // sort thread signal completion to main thread 252 pthread_barrier_wait( parent_barrier ); 253 254 #if DEBUG_SORT 255 printf("\n[sort] thread[%d] exit\n", tid ); 256 #endif 257 258 // sort thread exit 259 pthread_exit( NULL ); 281 260 282 261 } // end sort() … … 291 270 unsigned int ncores; // number of cores per cluster 292 271 unsigned int total_threads; // total number of threads 293 unsigned int thread_uid; // user defined thread index 294 unsigned int main_cxy; // cluster identifier for main 295 unsigned int main_x; // X coordinate for main thread 296 unsigned int main_y; // Y coordinate for main thread 297 unsigned int main_lid; // core local index for main thread 298 unsigned int main_uid; // thread user index for main thread 299 unsigned int x; // X coordinate for a thread 300 unsigned int y; // Y coordinate for a thread 272 unsigned int x; // X coordinate for a sort thread 273 unsigned int y; // Y coordinate for a sort thread 274 unsigned int cxy; // cluster identifier for a sort thead 301 275 unsigned int lid; // core local index for a thread 276 unsigned int tid; // sort thread continuous index 277 pthread_barrierattr_t barrier_attr; // barrier attributes (used for DQT) 302 278 unsigned int n; // index in array to sort 303 pthread_barrierattr_t barrier_attr; // barrier attributes304 279 305 280 unsigned long long start_cycle; … … 314 289 total_threads = x_size * y_size * ncores; 315 290 316 // get core coordinates and user index for the main thread 317 get_core( &main_cxy , & main_lid ); 318 main_x = HAL_X_FROM_CXY( main_cxy ); 319 main_y = HAL_Y_FROM_CXY( main_cxy ); 320 main_uid = (((main_x * y_size) + main_y) * ncores) + main_lid; 291 // compute covering DQT size an level 292 unsigned int z = (x_size > y_size) ? x_size : y_size; 293 unsigned int root_level = (z == 1) ? 0 : (z == 2) ? 1 : (z == 4) ? 2 : (z == 8) ? 3 : 4; 321 294 322 295 // checks number of threads … … 326 299 (total_threads != 512) && (total_threads != 1024) ) 327 300 { 328 printf("\n[sort error]number of cores must be power of 2\n");301 printf("\n[sort] ERROR : number of cores must be power of 2\n"); 329 302 exit( 0 ); 330 303 } … … 333 306 if ( ARRAY_LENGTH % total_threads) 334 307 { 335 printf("\n[sort error]array size must be multiple of number of threads\n");308 printf("\n[sort] ERROR : array size must be multiple of number of threads\n"); 336 309 exit( 0 ); 337 310 } … … 355 328 if( error ) 356 329 { 357 printf("\n[sort error]cannot initialise barrier\n" );330 printf("\n[sort] ERROR : cannot initialise barrier\n" ); 358 331 exit( 0 ); 359 332 } … … 370 343 } 371 344 345 #if DISPLAY_ARRAY 346 printf("\n*** array before sort\n"); 347 for( n=0; n<ARRAY_LENGTH; n++) printf("array[%d] = %d\n", n , array0[n] ); 348 #endif 349 372 350 #if DEBUG_MAIN 373 351 printf("\n[sort] main completes array init\n"); 374 352 #endif 375 353 376 // launch other threads to execute sort() function 377 // on cores other than the core running the main thread 378 for ( x = 0 ; x < x_size ; x++ ) 379 { 380 for ( y = 0 ; y < y_size ; y++ ) 381 { 354 // build array of arguments for the <sort> threads 355 for (x = 0 ; x < x_size ; x++) 356 { 357 for (y = 0 ; y < y_size ; y++) 358 { 359 // compute cluster identifier 360 cxy = HAL_CXY_FROM_XY( x , y ); 361 382 362 for ( lid = 0 ; lid < ncores ; lid++ ) 383 363 { 384 // compute thread user index (continuous index) 385 thread_uid = (((x * y_size) + y) * ncores) + lid; 386 387 // set arguments for all threads 388 arg[thread_uid].threads = total_threads; 389 arg[thread_uid].thread_uid = thread_uid; 390 arg[thread_uid].main_uid = main_uid; 391 392 // set thread attributes for all threads 393 attr[thread_uid].attributes = PT_ATTR_CLUSTER_DEFINED | PT_ATTR_CORE_DEFINED; 394 attr[thread_uid].cxy = HAL_CXY_FROM_XY( x , y ); 395 attr[thread_uid].lid = lid; 396 397 if( thread_uid != main_uid ) 398 { 399 if ( pthread_create( &trdid[thread_uid], // buffer for kernel identifier 400 &attr[thread_uid], // thread attributes 401 &sort, // entry function 402 &arg[thread_uid] ) ) // sort arguments 403 { 404 printf("\n[sort error] main cannot create thread %x \n", thread_uid ); 405 exit( 0 ); 406 } 407 408 #if (DEBUG_MAIN & 1) 409 printf("\n[sort] main created thread %x \n", thread_uid ); 410 #endif 411 } 364 // compute thread continuous index 365 tid = (((x * y_size) + y) * ncores) + lid; 366 367 // initialize 2D array of arguments 368 sort_args[cxy][lid].tid = tid; 369 sort_args[cxy][lid].threads = total_threads; 370 sort_args[cxy][lid].parent_barrier = &parent_barriers[cxy]; 371 372 // initialize 2D array of pointers 373 sort_ptrs[cxy][lid] = &sort_args[cxy][lid]; 412 374 } 413 375 } 414 376 } 415 377 416 378 /////////////////////////// 417 379 get_cycle( &seq_end_cycle ); … … 422 384 #endif 423 385 424 // the main thread run also the sort() function 425 sort( &arg[main_uid] ); 426 427 // wait other threads completion 428 for ( x = 0 ; x < x_size ; x++ ) 429 { 430 for ( y = 0 ; y < y_size ; y++ ) 431 { 432 for ( lid = 0 ; lid < ncores ; lid++ ) 433 { 434 // compute thread continuous index 435 thread_uid = (((x * y_size) + y) * ncores) + lid; 436 437 if( thread_uid != main_uid ) 438 { 439 if( pthread_join( trdid[thread_uid] , NULL ) ) 440 { 441 printf("\n[fft error] in main thread %d joining thread %d\n", 442 main_uid , thread_uid ); 443 exit( 0 ); 444 } 445 446 #if (DEBUG_MAIN & 1) 447 printf("\n[fft] main thread %d joined thread %d\n", main_uid, thread_uid ); 448 #endif 449 450 } 451 } 452 } 386 // create and execute the working threads 387 if( pthread_parallel_create( root_level, 388 &sort, 389 &sort_ptrs[0][0], 390 &parent_barriers[0] ) ) 391 { 392 printf("\n[sort] ERROR : cannot create threads\n"); 393 exit( 0 ); 453 394 } 454 395 … … 456 397 get_cycle( ¶_end_cycle ); 457 398 458 printf("\n[sort] main completes parallel sort at cycle %d\n", 459 (unsigned int)para_end_cycle ); 399 #if DEBUG_main 400 printf("\n[sort] main completes parallel sort at cycle %d\n", 401 (unsigned int)para_end_cycle ); 402 #endif 460 403 461 404 // destroy barrier 462 405 pthread_barrier_destroy( &barrier ); 406 407 #if DISPLAY_ARRAY 408 printf("\n*** array after merge %d\n", i ); 409 for( n=0; n<ARRAY_LENGTH; n++) printf("array[%d] = %d\n", n , dst_array[n] ); 410 #endif 463 411 464 412 #if CHECK_RESULT … … 492 440 // build file name 493 441 if( USE_DQT_BARRIER ) 494 snprintf( name , 64 , " sort_dqt_%d_%d_%d", ARRAY_LENGTH, x_size * y_size, ncores );442 snprintf( name , 64 , "p_sort_dqt_%d_%d_%d", ARRAY_LENGTH, x_size * y_size, ncores ); 495 443 else 496 snprintf( name , 64 , " sort_smp_%d_%d_%d", ARRAY_LENGTH, x_size * y_size, ncores );444 snprintf( name , 64 , "p_sort_smp_%d_%d_%d", ARRAY_LENGTH, x_size * y_size, ncores ); 497 445 498 446 // build file pathname … … 515 463 if( stream == NULL ) 516 464 { 517 printf("\n[sort error]cannot open instrumentation file <%s>\n", path );465 printf("\n[sort] ERROR : cannot open instrumentation file <%s>\n", path ); 518 466 exit(0); 519 467 } … … 532 480 if( ret < 0 ) 533 481 { 534 printf("\n[sort error]cannot write to instrumentation file <%s>\n", path );482 printf("\n[sort] ERROR : cannot write to instrumentation file <%s>\n", path ); 535 483 exit(0); 536 484 } … … 548 496 if( ret ) 549 497 { 550 printf("\n[sort error]cannot close instrumentation file <%s>\n", path );498 printf("\n[sort] ERROR : cannot close instrumentation file <%s>\n", path ); 551 499 exit(0); 552 500 }
Note: See TracChangeset
for help on using the changeset viewer.