1 | /* |
---|
2 | * hal_context.c - implementation of Thread Context API for TSAR-MIPS32 |
---|
3 | * |
---|
4 | * Author Alain Greiner (2016,2017,2018,2019) |
---|
5 | * |
---|
6 | * Copyright (c) UPMC Sorbonne Universites |
---|
7 | * |
---|
8 | * This file is part of ALMOS-MKH. |
---|
9 | * |
---|
10 | * ALMOS-MKH.is free software; you can redistribute it and/or modify it |
---|
11 | * under the terms of the GNU General Public License as published by |
---|
12 | * the Free Software Foundation; version 2.0 of the License. |
---|
13 | * |
---|
14 | * ALMOS-MKH.is distributed in the hope that it will be useful, but |
---|
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
17 | * General Public License for more details. |
---|
18 | * |
---|
19 | * You should have received a copy of the GNU General Public License |
---|
20 | * along with ALMOS-MKH.; if not, write to the Free Software Foundation, |
---|
21 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
---|
22 | */ |
---|
23 | |
---|
24 | #include <hal_kernel_types.h> |
---|
25 | #include <hal_switch.h> |
---|
26 | #include <memcpy.h> |
---|
27 | #include <thread.h> |
---|
28 | #include <string.h> |
---|
29 | #include <process.h> |
---|
30 | #include <printk.h> |
---|
31 | #include <vmm.h> |
---|
32 | #include <bits.h> |
---|
33 | #include <core.h> |
---|
34 | #include <cluster.h> |
---|
35 | #include <hal_context.h> |
---|
36 | #include <hal_kentry.h> |
---|
37 | |
---|
38 | ///////////////////////////////////////////////////////////////////////////////////////// |
---|
39 | // Define various SR initialisation values for the TSAR-MIPS32 architecture. |
---|
40 | ///////////////////////////////////////////////////////////////////////////////////////// |
---|
41 | |
---|
42 | #define SR_USR_MODE 0x0000FF13 |
---|
43 | #define SR_USR_MODE_FPU 0x2000FF13 |
---|
44 | #define SR_SYS_MODE 0x0000FF01 |
---|
45 | |
---|
46 | ///////////////////////////////////////////////////////////////////////////////////////// |
---|
47 | // This structure defines the CPU context for the TSAR-MIPS32 architecture. |
---|
48 | // The following registers are saved/restored at each context switch: |
---|
49 | // - GPR : all, but (zero, k0, k1), plus (hi, lo) |
---|
50 | // - CP0 : c0_th , c0_sr , C0_epc |
---|
51 | // - CP2 : c2_ptpr , C2_mode |
---|
52 | // |
---|
53 | // WARNING : check the two CONFIG_CPU_CTX_SIZE & CONFIG_FPU_CTX_SIZE configuration |
---|
54 | // parameters when modifying this structure. |
---|
55 | ///////////////////////////////////////////////////////////////////////////////////////// |
---|
56 | |
---|
57 | typedef struct hal_cpu_context_s |
---|
58 | { |
---|
59 | uint32_t c0_epc; // slot 0 |
---|
60 | uint32_t at_01; // slot 1 |
---|
61 | uint32_t v0_02; // slot 2 |
---|
62 | uint32_t v1_03; // slot 3 |
---|
63 | uint32_t a0_04; // slot 4 |
---|
64 | uint32_t a1_05; // slot 5 |
---|
65 | uint32_t a2_06; // slot 6 |
---|
66 | uint32_t a3_07; // slot 7 |
---|
67 | |
---|
68 | uint32_t t0_08; // slot 8 |
---|
69 | uint32_t t1_09; // slot 9 |
---|
70 | uint32_t t2_10; // slot 10 |
---|
71 | uint32_t t3_11; // slot 11 |
---|
72 | uint32_t t4_12; // slot 12 |
---|
73 | uint32_t t5_13; // slot 13 |
---|
74 | uint32_t t6_14; // slot 14 |
---|
75 | uint32_t t7_15; // slot 15 |
---|
76 | |
---|
77 | uint32_t s0_16; // slot 16 |
---|
78 | uint32_t s1_17; // slot 17 |
---|
79 | uint32_t s2_18; // slot 18 |
---|
80 | uint32_t s3_19; // slot 19 |
---|
81 | uint32_t s4_20; // slot 20 |
---|
82 | uint32_t s5_21; // slot 21 |
---|
83 | uint32_t s6_22; // slot 22 |
---|
84 | uint32_t s7_23; // slot 23 |
---|
85 | |
---|
86 | uint32_t t8_24; // slot 24 |
---|
87 | uint32_t t9_25; // slot 25 |
---|
88 | uint32_t hi_26; // slot 26 |
---|
89 | uint32_t lo_27; // slot 27 |
---|
90 | uint32_t gp_28; // slot 28 |
---|
91 | uint32_t sp_29; // slot 29 |
---|
92 | uint32_t s8_30; // slot 30 |
---|
93 | uint32_t ra_31; // slot 31 |
---|
94 | |
---|
95 | uint32_t c2_ptpr; // slot 32 |
---|
96 | uint32_t c2_mode; // slot 33 |
---|
97 | |
---|
98 | uint32_t c0_sr; // slot 34 |
---|
99 | uint32_t c0_th; // slot 35 |
---|
100 | } |
---|
101 | hal_cpu_context_t; |
---|
102 | |
---|
103 | ///////////////////////////////////////////////////////////////////////////////////////// |
---|
104 | // This structure defines the fpu_context for the TSAR MIPS32 architecture. |
---|
105 | ///////////////////////////////////////////////////////////////////////////////////////// |
---|
106 | |
---|
107 | typedef struct hal_fpu_context_s |
---|
108 | { |
---|
109 | uint32_t fpu_regs[32]; |
---|
110 | } |
---|
111 | hal_fpu_context_t; |
---|
112 | |
---|
113 | |
---|
114 | ///////////////////////////////////////////////////////////////////////////////////////// |
---|
115 | // CPU context related functions |
---|
116 | ///////////////////////////////////////////////////////////////////////////////////////// |
---|
117 | |
---|
118 | |
---|
119 | ////////////////////////////////////////////////// |
---|
120 | error_t hal_cpu_context_alloc( thread_t * thread ) |
---|
121 | { |
---|
122 | assert( (sizeof(hal_cpu_context_t) <= CONFIG_CPU_CTX_SIZE) , |
---|
123 | "illegal CPU context size" ); |
---|
124 | |
---|
125 | // allocate memory for cpu_context |
---|
126 | kmem_req_t req; |
---|
127 | req.type = KMEM_KCM; |
---|
128 | req.order = bits_log2( sizeof(hal_cpu_context_t) ); |
---|
129 | req.flags = AF_KERNEL | AF_ZERO; |
---|
130 | |
---|
131 | hal_cpu_context_t * context = kmem_alloc( &req ); |
---|
132 | |
---|
133 | if( context == NULL ) return -1; |
---|
134 | |
---|
135 | // link to thread |
---|
136 | thread->cpu_context = (void *)context; |
---|
137 | return 0; |
---|
138 | |
---|
139 | } // end hal_cpu_context_alloc() |
---|
140 | |
---|
141 | ///////////////////////////////////////////////// |
---|
142 | // The following context slots are initialised |
---|
143 | // GPR : a0_04 / sp_29 / ra_31 |
---|
144 | // CP0 : c0_sr / c0_th / c0_epc |
---|
145 | // CP2 : c2_ptpr / c2_mode |
---|
146 | ///////////////////////////////////////////////// |
---|
147 | void hal_cpu_context_init( thread_t * thread ) |
---|
148 | { |
---|
149 | hal_cpu_context_t * context = (hal_cpu_context_t *)thread->cpu_context; |
---|
150 | |
---|
151 | assert( (context != NULL ), "CPU context not allocated" ); |
---|
152 | |
---|
153 | // compute the PPN for the GPT PT1 |
---|
154 | ppn_t gpt_pt1_ppn = ppm_base2ppn( XPTR( local_cxy , thread->process->vmm.gpt.ptr ) ); |
---|
155 | |
---|
156 | // initialisation depends on thread type |
---|
157 | if( thread->type == THREAD_USER ) |
---|
158 | { |
---|
159 | context->a0_04 = (uint32_t)thread->entry_args; |
---|
160 | context->sp_29 = (uint32_t)thread->user_stack_vseg->max - 8; |
---|
161 | context->ra_31 = (uint32_t)&hal_kentry_eret; |
---|
162 | context->c0_epc = (uint32_t)thread->entry_func; |
---|
163 | context->c0_sr = SR_USR_MODE; |
---|
164 | context->c0_th = (uint32_t)thread; |
---|
165 | context->c2_ptpr = (uint32_t)(gpt_pt1_ppn >> 1); |
---|
166 | context->c2_mode = 0xF; |
---|
167 | } |
---|
168 | else // kernel thread |
---|
169 | { |
---|
170 | context->a0_04 = (uint32_t)thread->entry_args; |
---|
171 | context->sp_29 = (uint32_t)thread->k_stack_base + (uint32_t)thread->k_stack_size - 8; |
---|
172 | context->ra_31 = (uint32_t)thread->entry_func; |
---|
173 | context->c0_sr = SR_SYS_MODE; |
---|
174 | context->c0_th = (uint32_t)thread; |
---|
175 | context->c2_ptpr = (uint32_t)(gpt_pt1_ppn >> 1); |
---|
176 | context->c2_mode = 0x3; |
---|
177 | } |
---|
178 | |
---|
179 | #if DEBUG_HAL_CONTEXT_INIT |
---|
180 | hal_cpu_context_display( XPTR( local_cxy , thread ) ); |
---|
181 | #endif |
---|
182 | |
---|
183 | } // end hal_cpu_context_init() |
---|
184 | |
---|
185 | //////////////////////////////////////////// |
---|
186 | void hal_cpu_context_fork( xptr_t child_xp ) |
---|
187 | { |
---|
188 | cxy_t parent_cxy; // parent thread cluster |
---|
189 | thread_t * parent_ptr; // local pointer on parent thread |
---|
190 | hal_cpu_context_t * parent_context; // local pointer on parent cpu_context |
---|
191 | uint32_t * parent_uzone; // local_pointer on parent uzone (in kernel stack) |
---|
192 | char * parent_ksp; // kernel stack pointer on parent kernel stack |
---|
193 | uint32_t parent_us_base; // parent user stack base value |
---|
194 | |
---|
195 | cxy_t child_cxy; // parent thread cluster |
---|
196 | thread_t * child_ptr; // local pointer on child thread |
---|
197 | hal_cpu_context_t * child_context; // local pointer on child cpu_context |
---|
198 | uint32_t * child_uzone; // local_pointer on child uzone (in kernel stack) |
---|
199 | char * child_ksp; // kernel stack pointer on child kernel stack |
---|
200 | uint32_t child_us_base; // child user stack base value |
---|
201 | |
---|
202 | process_t * child_process; // local pointer on child processs |
---|
203 | void * child_gpt_ptr; // local pointer on child GPT PT1 |
---|
204 | uint32_t child_gpt_ppn; // PPN of child GPT PT1 |
---|
205 | vseg_t * child_us_vseg; // local pointer on child user stack vseg |
---|
206 | |
---|
207 | // allocate a local CPU context in parent kernel stack |
---|
208 | hal_cpu_context_t context; |
---|
209 | |
---|
210 | // get (local) parent thread cluster and local pointer |
---|
211 | parent_cxy = local_cxy; |
---|
212 | parent_ptr = CURRENT_THREAD; |
---|
213 | |
---|
214 | // get (remote) child thread cluster and local pointer |
---|
215 | child_cxy = GET_CXY( child_xp ); |
---|
216 | child_ptr = GET_PTR( child_xp ); |
---|
217 | |
---|
218 | // get local pointer on (local) parent CPU context |
---|
219 | parent_context = parent_ptr->cpu_context; |
---|
220 | |
---|
221 | // get local pointer on (remote) child CPU context |
---|
222 | child_context = hal_remote_lpt( XPTR(child_cxy , &child_ptr->cpu_context) ); |
---|
223 | |
---|
224 | // get local pointer on remote child process |
---|
225 | child_process = hal_remote_lpt( XPTR(child_cxy , &child_ptr->process) ); |
---|
226 | |
---|
227 | // get base and ppn of remote child process GPT PT1 |
---|
228 | child_gpt_ptr = hal_remote_lpt( XPTR(child_cxy , &child_process->vmm.gpt.ptr) ); |
---|
229 | child_gpt_ppn = ppm_base2ppn( XPTR( child_cxy , child_gpt_ptr ) ); |
---|
230 | |
---|
231 | // get local pointer on local parent uzone (in parent kernel stack) |
---|
232 | parent_uzone = parent_ptr->uzone_current; |
---|
233 | |
---|
234 | // compute local pointer on remote child uzone (in child kernel stack) |
---|
235 | child_uzone = (uint32_t *)( (intptr_t)parent_uzone + |
---|
236 | (intptr_t)child_ptr - |
---|
237 | (intptr_t)parent_ptr ); |
---|
238 | |
---|
239 | // update the uzone pointer in child thread descriptor |
---|
240 | hal_remote_spt( XPTR( child_cxy , &child_ptr->uzone_current ) , child_uzone ); |
---|
241 | |
---|
242 | #if DEBUG_HAL_CONTEXT_FORK |
---|
243 | uint32_t cycle = (uint32_t)hal_get_cycles(); |
---|
244 | if( DEBUG_HAL_CONTEXT_FORK < cycle ) |
---|
245 | printk("\n[%s] thread[%x,%x] parent_uzone %x / child_uzone %x / cycle %d\n", |
---|
246 | __FUNCTION__, parent_ptr->process->pid, parent_ptr->trdid, parent_uzone, child_uzone, cycle ); |
---|
247 | #endif |
---|
248 | |
---|
249 | // get user stack base for parent thread |
---|
250 | parent_us_base = parent_ptr->user_stack_vseg->min; |
---|
251 | |
---|
252 | // get user stack base for child thread |
---|
253 | child_us_vseg = hal_remote_lpt( XPTR( child_cxy , &child_ptr->user_stack_vseg ) ); |
---|
254 | child_us_base = hal_remote_l32( XPTR( child_cxy , &child_us_vseg->min ) ); |
---|
255 | |
---|
256 | #if DEBUG_HAL_CONTEXT_FORK |
---|
257 | if( DEBUG_HAL_CONTEXT_FORK < cycle ) |
---|
258 | printk("\n[%s] thread[%x,%x] parent_ustack_base %x / child_ustack_base %x\n", |
---|
259 | __FUNCTION__, parent_ptr->process->pid, parent_ptr->trdid, parent_us_base, child_us_base ); |
---|
260 | #endif |
---|
261 | |
---|
262 | // get current value of kernel stack pointer in parent kernel stack |
---|
263 | parent_ksp = (char *)hal_get_sp(); |
---|
264 | |
---|
265 | // compute value of kernel stack pointer in child kernel stack |
---|
266 | child_ksp = (char *)((intptr_t)parent_ksp + |
---|
267 | (intptr_t)child_ptr - |
---|
268 | (intptr_t)parent_ptr ); |
---|
269 | |
---|
270 | #if DEBUG_HAL_CONTEXT_FORK |
---|
271 | if( DEBUG_HAL_CONTEXT_FORK < cycle ) |
---|
272 | printk("\n[%s] thread[%x,%x] parent_ksp %x / child_ksp %x\n", |
---|
273 | __FUNCTION__, parent_ptr->process->pid, parent_ptr->trdid, parent_ksp, child_ksp ); |
---|
274 | #endif |
---|
275 | |
---|
276 | // compute number of bytes to be copied, depending on current value of parent_ksp |
---|
277 | uint32_t size = (uint32_t)parent_ptr + CONFIG_THREAD_DESC_SIZE - (uint32_t)parent_ksp; |
---|
278 | |
---|
279 | // copy parent kernel stack content to child thread descriptor |
---|
280 | // (this includes the uzone, that is allocated in the kernel stack) |
---|
281 | hal_remote_memcpy( XPTR( child_cxy , child_ksp ), |
---|
282 | XPTR( local_cxy , parent_ksp ), |
---|
283 | size ); |
---|
284 | |
---|
285 | #if DEBUG_HAL_CONTEXT_FORK |
---|
286 | if( DEBUG_HAL_CONTEXT_FORK < cycle ) |
---|
287 | printk("\n[%s] thread[%x,%x] copied kstack from parent (%x) to child (%x)\n", |
---|
288 | __FUNCTION__, parent_ptr->process->pid, parent_ptr->trdid, parent_ptr, child_ptr ); |
---|
289 | #endif |
---|
290 | |
---|
291 | // save current values of CPU registers to local copy of CPU context |
---|
292 | hal_do_cpu_save( &context ); |
---|
293 | |
---|
294 | // update three slots in this local CPU context |
---|
295 | context.sp_29 = (uint32_t)child_ksp; |
---|
296 | context.c0_th = (uint32_t)child_ptr; |
---|
297 | context.c2_ptpr = (uint32_t)child_gpt_ppn >> 1; |
---|
298 | |
---|
299 | // From this point, both parent and child execute the following code, |
---|
300 | // but child thread will only execute it after being unblocked by parent thread. |
---|
301 | // They can be distinguished by the (CURRENT_THREAD,local_cxy) values, |
---|
302 | // and we must re-initialise the calling thread pointer from c0_th register |
---|
303 | |
---|
304 | thread_t * this = CURRENT_THREAD; |
---|
305 | |
---|
306 | if( (this == parent_ptr) && (local_cxy == parent_cxy) ) // parent thread |
---|
307 | { |
---|
308 | // parent thread must update four slots in child uzone |
---|
309 | // - UZ_TH : parent and child have different threads descriptors |
---|
310 | // - UZ_SP : parent and child have different user stack base addresses. |
---|
311 | // - UZ_PTPR : parent and child use different Generic Page Tables |
---|
312 | |
---|
313 | // parent thread computes values for child thread |
---|
314 | uint32_t child_sp = parent_uzone[UZ_SP] + child_us_base - parent_us_base; |
---|
315 | uint32_t child_th = (uint32_t)child_ptr; |
---|
316 | uint32_t child_ptpr = (uint32_t)child_gpt_ppn >> 1; |
---|
317 | |
---|
318 | #if DEBUG_HAL_CONTEXT_FORK |
---|
319 | if( DEBUG_HAL_CONTEXT_FORK < cycle ) |
---|
320 | printk("\n[%s] thread[%x,%x] : parent_uz_sp %x / child_uz_sp %x\n", |
---|
321 | __FUNCTION__, parent_ptr->process->pid, parent_ptr->trdid, |
---|
322 | parent_uzone[UZ_SP], child_sp ); |
---|
323 | #endif |
---|
324 | |
---|
325 | // parent thread updates the child uzone |
---|
326 | hal_remote_s32( XPTR( child_cxy , &child_uzone[UZ_SP] ) , child_sp ); |
---|
327 | hal_remote_s32( XPTR( child_cxy , &child_uzone[UZ_TH] ) , child_th ); |
---|
328 | hal_remote_s32( XPTR( child_cxy , &child_uzone[UZ_PTPR] ) , child_ptpr ); |
---|
329 | |
---|
330 | // parent thread copies the local context to remote child context |
---|
331 | hal_remote_memcpy( XPTR( child_cxy , child_context ), |
---|
332 | XPTR( local_cxy , &context ) , |
---|
333 | sizeof( hal_cpu_context_t ) ); |
---|
334 | #if DEBUG_HAL_CONTEXT_FORK |
---|
335 | if( DEBUG_HAL_CONTEXT_FORK < cycle ) |
---|
336 | printk("\n[%s] thread[%x,%x] copied parent CPU context to child CPU context\n", |
---|
337 | __FUNCTION__, parent_ptr->process->pid, parent_ptr->trdid ); |
---|
338 | #endif |
---|
339 | |
---|
340 | // parent thread unblocks child thread |
---|
341 | thread_unblock( XPTR( child_cxy , child_ptr ) , THREAD_BLOCKED_GLOBAL ); |
---|
342 | |
---|
343 | #if DEBUG_HAL_CONTEXT_FORK |
---|
344 | cycle = (uint32_t)hal_get_cycles(); |
---|
345 | trdid_t child_trdid = hal_remote_l32( XPTR( child_cxy , &child_ptr->trdid ) ); |
---|
346 | pid_t child_pid = hal_remote_l32( XPTR( child_cxy , &child_process->pid ) ); |
---|
347 | printk("\n[%s] thread[%x,%x] unblocked child thread[%x,%x] / cycle %d\n", |
---|
348 | __FUNCTION__, parent_ptr->process->pid, parent_ptr->trdid, child_pid, child_trdid, cycle ); |
---|
349 | #endif |
---|
350 | |
---|
351 | } |
---|
352 | |
---|
353 | } // end hal_cpu_context_fork() |
---|
354 | |
---|
355 | ////////////////////////////////////////////// |
---|
356 | void hal_cpu_context_exec( thread_t * thread ) |
---|
357 | { |
---|
358 | // re_initialize CPU context |
---|
359 | hal_cpu_context_init( thread ); |
---|
360 | |
---|
361 | // restore CPU registers ... and jump to user code |
---|
362 | hal_do_cpu_restore( (hal_cpu_context_t *)thread->cpu_context ); |
---|
363 | |
---|
364 | } // end hal_cpu_context_exec() |
---|
365 | |
---|
366 | ///////////////////////////////////////////////// |
---|
367 | void hal_cpu_context_display( xptr_t thread_xp ) |
---|
368 | { |
---|
369 | hal_cpu_context_t * ctx; |
---|
370 | |
---|
371 | // get thread cluster and local pointer |
---|
372 | cxy_t cxy = GET_CXY( thread_xp ); |
---|
373 | thread_t * ptr = GET_PTR( thread_xp ); |
---|
374 | |
---|
375 | // get context pointer |
---|
376 | ctx = (hal_cpu_context_t *)hal_remote_lpt( XPTR( cxy , &ptr->cpu_context ) ); |
---|
377 | |
---|
378 | // get relevant context slots values |
---|
379 | uint32_t sp_29 = hal_remote_l32( XPTR( cxy , &ctx->sp_29 ) ); |
---|
380 | uint32_t ra_31 = hal_remote_l32( XPTR( cxy , &ctx->ra_31 ) ); |
---|
381 | uint32_t c0_sr = hal_remote_l32( XPTR( cxy , &ctx->c0_sr ) ); |
---|
382 | uint32_t c0_epc = hal_remote_l32( XPTR( cxy , &ctx->c0_epc ) ); |
---|
383 | uint32_t c0_th = hal_remote_l32( XPTR( cxy , &ctx->c0_th ) ); |
---|
384 | uint32_t c2_ptpr = hal_remote_l32( XPTR( cxy , &ctx->c2_ptpr ) ); |
---|
385 | uint32_t c2_mode = hal_remote_l32( XPTR( cxy , &ctx->c2_mode ) ); |
---|
386 | |
---|
387 | printk("\n***** CPU context for thread %x in process %x / cycle %d\n" |
---|
388 | " sp_29 = %X ra_31 = %X\n" |
---|
389 | " c0_sr = %X c0_epc = %X c0_th = %X\n" |
---|
390 | " c2_ptpr = %X c2_mode = %X\n", |
---|
391 | ptr, ptr->process->pid, (uint32_t)hal_get_cycles(), |
---|
392 | sp_29 , ra_31, |
---|
393 | c0_sr , c0_epc , c0_th, |
---|
394 | c2_ptpr , c2_mode ); |
---|
395 | |
---|
396 | } // end hal_cpu_context_display() |
---|
397 | |
---|
398 | ///////////////////////////////////////////////// |
---|
399 | void hal_cpu_context_destroy( thread_t * thread ) |
---|
400 | { |
---|
401 | kmem_req_t req; |
---|
402 | |
---|
403 | hal_cpu_context_t * ctx = thread->cpu_context; |
---|
404 | |
---|
405 | // release CPU context if required |
---|
406 | if( ctx != NULL ) |
---|
407 | { |
---|
408 | req.type = KMEM_KCM; |
---|
409 | req.ptr = ctx; |
---|
410 | kmem_free( &req ); |
---|
411 | } |
---|
412 | |
---|
413 | } // end hal_cpu_context_destroy() |
---|
414 | |
---|
415 | |
---|
416 | |
---|
417 | |
---|
418 | |
---|
419 | ////////////////////////////////////////////////// |
---|
420 | error_t hal_fpu_context_alloc( thread_t * thread ) |
---|
421 | { |
---|
422 | assert( (sizeof(hal_fpu_context_t) <= CONFIG_FPU_CTX_SIZE) , |
---|
423 | "illegal CPU context size" ); |
---|
424 | |
---|
425 | // allocate memory for fpu_context |
---|
426 | kmem_req_t req; |
---|
427 | req.type = KMEM_KCM; |
---|
428 | req.flags = AF_KERNEL | AF_ZERO; |
---|
429 | req.order = bits_log2( sizeof(hal_fpu_context_t) ); |
---|
430 | |
---|
431 | hal_fpu_context_t * context = kmem_alloc( &req ); |
---|
432 | |
---|
433 | if( context == NULL ) return -1; |
---|
434 | |
---|
435 | // link to thread |
---|
436 | thread->fpu_context = (void *)context; |
---|
437 | return 0; |
---|
438 | |
---|
439 | } // end hal_fpu_context_alloc() |
---|
440 | |
---|
441 | ////////////////////////////////////////////// |
---|
442 | void hal_fpu_context_init( thread_t * thread ) |
---|
443 | { |
---|
444 | hal_fpu_context_t * context = thread->fpu_context; |
---|
445 | |
---|
446 | assert( (context != NULL) , "fpu context not allocated" ); |
---|
447 | |
---|
448 | memset( context , 0 , sizeof(hal_fpu_context_t) ); |
---|
449 | } |
---|
450 | |
---|
451 | ////////////////////////////////////////// |
---|
452 | void hal_fpu_context_copy( thread_t * dst, |
---|
453 | thread_t * src ) |
---|
454 | { |
---|
455 | assert( (src != NULL) , "src thread pointer is NULL\n"); |
---|
456 | assert( (dst != NULL) , "dst thread pointer is NULL\n"); |
---|
457 | |
---|
458 | // get fpu context pointers |
---|
459 | hal_fpu_context_t * src_context = src->fpu_context; |
---|
460 | hal_fpu_context_t * dst_context = dst->fpu_context; |
---|
461 | |
---|
462 | // copy CPU context from src to dst |
---|
463 | memcpy( dst_context , src_context , sizeof(hal_fpu_context_t) ); |
---|
464 | |
---|
465 | } // end hal_fpu_context_copy() |
---|
466 | |
---|
467 | ///////////////////////////////////////////////// |
---|
468 | void hal_fpu_context_destroy( thread_t * thread ) |
---|
469 | { |
---|
470 | kmem_req_t req; |
---|
471 | |
---|
472 | hal_fpu_context_t * context = thread->fpu_context; |
---|
473 | |
---|
474 | // release FPU context if required |
---|
475 | if( context != NULL ) |
---|
476 | { |
---|
477 | req.type = KMEM_KCM; |
---|
478 | req.ptr = context; |
---|
479 | kmem_free( &req ); |
---|
480 | } |
---|
481 | |
---|
482 | } // end hal_fpu_context_destroy() |
---|
483 | |
---|
484 | ////////////////////////////////////////////// |
---|
485 | void hal_fpu_context_save( xptr_t thread_xp ) |
---|
486 | { |
---|
487 | // allocate a local FPU context in kernel stack |
---|
488 | hal_fpu_context_t src_context; |
---|
489 | |
---|
490 | // get remote child cluster and local pointer |
---|
491 | cxy_t thread_cxy = GET_CXY( thread_xp ); |
---|
492 | thread_t * thread_ptr = GET_PTR( thread_xp ); |
---|
493 | |
---|
494 | asm volatile( |
---|
495 | ".set noreorder \n" |
---|
496 | "swc1 $f0, 0*4(%0) \n" |
---|
497 | "swc1 $f1, 1*4(%0) \n" |
---|
498 | "swc1 $f2, 2*4(%0) \n" |
---|
499 | "swc1 $f3, 3*4(%0) \n" |
---|
500 | "swc1 $f4, 4*4(%0) \n" |
---|
501 | "swc1 $f5, 5*4(%0) \n" |
---|
502 | "swc1 $f6, 6*4(%0) \n" |
---|
503 | "swc1 $f7, 7*4(%0) \n" |
---|
504 | "swc1 $f8, 8*4(%0) \n" |
---|
505 | "swc1 $f9, 9*4(%0) \n" |
---|
506 | "swc1 $f10, 10*4(%0) \n" |
---|
507 | "swc1 $f11, 11*4(%0) \n" |
---|
508 | "swc1 $f12, 12*4(%0) \n" |
---|
509 | "swc1 $f13, 13*4(%0) \n" |
---|
510 | "swc1 $f14, 14*4(%0) \n" |
---|
511 | "swc1 $f15, 15*4(%0) \n" |
---|
512 | "swc1 $f16, 16*4(%0) \n" |
---|
513 | "swc1 $f17, 17*4(%0) \n" |
---|
514 | "swc1 $f18, 18*4(%0) \n" |
---|
515 | "swc1 $f19, 19*4(%0) \n" |
---|
516 | "swc1 $f20, 20*4(%0) \n" |
---|
517 | "swc1 $f21, 21*4(%0) \n" |
---|
518 | "swc1 $f22, 22*4(%0) \n" |
---|
519 | "swc1 $f23, 23*4(%0) \n" |
---|
520 | "swc1 $f24, 24*4(%0) \n" |
---|
521 | "swc1 $f25, 25*4(%0) \n" |
---|
522 | "swc1 $f26, 26*4(%0) \n" |
---|
523 | "swc1 $f27, 27*4(%0) \n" |
---|
524 | "swc1 $f28, 28*4(%0) \n" |
---|
525 | "swc1 $f29, 29*4(%0) \n" |
---|
526 | "swc1 $f30, 30*4(%0) \n" |
---|
527 | "swc1 $f31, 31*4(%0) \n" |
---|
528 | ".set reorder \n" |
---|
529 | : : "r"(&src_context) ); |
---|
530 | |
---|
531 | // get local pointer on target thread FPU context |
---|
532 | void * dst_context = hal_remote_lpt( XPTR( thread_cxy , &thread_ptr->fpu_context ) ); |
---|
533 | |
---|
534 | // copy local context to remote child context) |
---|
535 | hal_remote_memcpy( XPTR( thread_cxy , dst_context ), |
---|
536 | XPTR( local_cxy , &src_context ), |
---|
537 | sizeof( hal_fpu_context_t ) ); |
---|
538 | |
---|
539 | } // end hal_fpu_context_save() |
---|
540 | |
---|
541 | ///////////////////////////////////////////////// |
---|
542 | void hal_fpu_context_restore( thread_t * thread ) |
---|
543 | { |
---|
544 | // get pointer on FPU context and cast to uint32_t |
---|
545 | uint32_t ctx = (uint32_t)thread->fpu_context; |
---|
546 | |
---|
547 | asm volatile( |
---|
548 | ".set noreorder \n" |
---|
549 | "lwc1 $f0, 0*4(%0) \n" |
---|
550 | "lwc1 $f1, 1*4(%0) \n" |
---|
551 | "lwc1 $f2, 2*4(%0) \n" |
---|
552 | "lwc1 $f3, 3*4(%0) \n" |
---|
553 | "lwc1 $f4, 4*4(%0) \n" |
---|
554 | "lwc1 $f5, 5*4(%0) \n" |
---|
555 | "lwc1 $f6, 6*4(%0) \n" |
---|
556 | "lwc1 $f7, 7*4(%0) \n" |
---|
557 | "lwc1 $f8, 8*4(%0) \n" |
---|
558 | "lwc1 $f9, 9*4(%0) \n" |
---|
559 | "lwc1 $f10, 10*4(%0) \n" |
---|
560 | "lwc1 $f11, 11*4(%0) \n" |
---|
561 | "lwc1 $f12, 12*4(%0) \n" |
---|
562 | "lwc1 $f13, 13*4(%0) \n" |
---|
563 | "lwc1 $f14, 14*4(%0) \n" |
---|
564 | "lwc1 $f15, 15*4(%0) \n" |
---|
565 | "lwc1 $f16, 16*4(%0) \n" |
---|
566 | "lwc1 $f17, 17*4(%0) \n" |
---|
567 | "lwc1 $f18, 18*4(%0) \n" |
---|
568 | "lwc1 $f19, 19*4(%0) \n" |
---|
569 | "lwc1 $f20, 20*4(%0) \n" |
---|
570 | "lwc1 $f21, 21*4(%0) \n" |
---|
571 | "lwc1 $f22, 22*4(%0) \n" |
---|
572 | "lwc1 $f23, 23*4(%0) \n" |
---|
573 | "lwc1 $f24, 24*4(%0) \n" |
---|
574 | "lwc1 $f25, 25*4(%0) \n" |
---|
575 | "lwc1 $f26, 26*4(%0) \n" |
---|
576 | "lwc1 $f27, 27*4(%0) \n" |
---|
577 | "lwc1 $f28, 28*4(%0) \n" |
---|
578 | "lwc1 $f29, 29*4(%0) \n" |
---|
579 | "lwc1 $f30, 30*4(%0) \n" |
---|
580 | "lwc1 $f31, 31*4(%0) \n" |
---|
581 | ".set reorder \n" |
---|
582 | : : "r"(ctx) ); |
---|
583 | |
---|
584 | } // end hal_cpu_context_restore() |
---|
585 | |
---|
586 | |
---|