source: trunk/kernel/mm/vmm.h @ 669

Last change on this file since 669 was 657, checked in by alain, 5 years ago

Introduce remote_buf.c/.h & socket.c/.h files.
Update dev_nic.c/.h files.

File size: 28.5 KB
Line 
1/*
2 * vmm.h - virtual memory management related operations
3 *
4 * Authors   Ghassan Almaless (2008,2009,2010,2011, 2012)
5 *           Alain Greiner (2016,2017,2018,2019,2020))
6 *
7 * Copyright (c) UPMC Sorbonne Universites
8 *
9 * This file is part of ALMOS-MKH.
10 *
11 * ALMOS-MKH is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; version 2.0 of the License.
14 *
15 * ALMOS-MKH is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with ALMOS-MKH; if not, write to the Free Software Foundation,
22 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25#ifndef _VMM_H_
26#define _VMM_H_
27
28#include <hal_kernel_types.h>
29#include <bits.h>
30#include <list.h>
31#include <queuelock.h>
32#include <remote_queuelock.h>
33#include <hal_gpt.h>
34#include <vseg.h>
35#include <page.h>
36
37/****  Forward declarations  ****/
38
39struct process_s;
40struct vseg_s;
41
42/*********************************************************************************************
43 * This structure defines the STACK allocator used by the VMM to dynamically handle
44 * vseg allocation or release requests for an user thread.
45 * This allocator handles a fixed size array of fixed size slots in STACK zone of user space.
46 * The stack size and the number of slots are defined by the CONFIG_VMM_STACK_SIZE, and
47 * CONFIG_VMM_STACK_BASE parameters.
48 * Each slot can contain one user stack vseg. The first 4 Kbytes page in the slot is not
49 * mapped to detect stack overflow.
50 * In this implementation, the slot index is defined by the user thead LTID.
51 * All allocated stacks are registered in a bitmap defining the STACK zone state:
52 * - The allocator checks that the requested slot has not been already allocated, and set the
53 *   corresponding bit in the bitmap.
54 * - The de-allocator reset the corresponding bit in the bitmap.
55 ********************************************************************************************/
56
57typedef struct stack_mgr_s
58{
59    busylock_t     lock;               /*! lock protecting STACK allocator                  */
60    vpn_t          vpn_base;           /*! first page of STACK zone                         */
61    bitmap_t       bitmap;             /*! bit vector of allocated stacks                   */
62}
63stack_mgr_t;
64
65/*********************************************************************************************
66 * This structure defines the MMAP allocator used by the VMM to dynamically handle MMAP vsegs
67 * requested or released by an user process. It must be called in the reference cluster.
68 * - allocation policy :
69 *   This allocator implements the buddy algorithm. All allocated vsegs occupy an integer
70 *   number of pages, that is power of 2, and are aligned (vpn_base is multiple of vpn_size).
71 *   The requested number of pages is rounded if required. The global allocator state is
72 *   completely defined by the free_pages_root[] array indexed by the vseg order.
73 *   These free lists are local, but are implemented as xlist because we use the existing
74 *   vseg.xlist to register a free vseg in its free list.
75 * - release policy :
76 *   A released vseg is recursively merged with the "buddy" vseg when it is free, in
77 *   order to build the largest possible aligned free vsegs. The resulting vseg.vpn_size
78 *   field is updated.
79 * Implementation note:
80 * The only significant (and documented) fiels in the vsegs registered in the MMAP allocator
81 * free lists are "xlist", "vpn_base", and "vpn_size".
82 ********************************************************************************************/
83
84typedef struct mmap_mgr_s
85{
86    busylock_t     lock;               /*! lock protecting MMAP allocator                   */
87    vpn_t          vpn_base;           /*! first page of MMAP zone                          */
88    vpn_t          vpn_size;           /*! number of pages in MMAP zone                     */
89    xlist_entry_t  free_list_root[CONFIG_VMM_HEAP_MAX_ORDER + 1];  /* roots of free lists   */
90}
91mmap_mgr_t;
92
93/*********************************************************************************************
94 * This structure defines the Virtual Memory Manager for a given process in a given cluster.
95 * This local VMM implements four main services:
96 * 1) It contains the local copy of vseg list (VSL), only complete in referrence.
97 * 2) It contains the local copy of the generic page table (GPT), only complete in reference.
98 * 3) The stack manager dynamically allocates virtual memory space for the STACK vsegs.
99 * 4) The mmap manager dynamically allocates virtual memory for the (FILE/ANON/REMOTE) vsegs.
100 ******************************************************a**************************************
101 * Implementation notes:
102 * 1. In most clusters, the VSL and GPT are only partial copies of the reference VSL and GPT
103 *    structures, stored in the reference cluster.
104 * 2. The VSL contains only local vsegs, but it is implemented as an xlist, and protected by
105 *    a remote_rwlock, because it can be accessed by a thread running in a remote cluster.
106 *    An example is the vmm_fork_copy() function.
107 * 3. The GPT in the reference cluster can be directly accessed by remote threads to handle
108 *    false page-fault (page is mapped in the reference GPT, but the PTE copy is missing
109 *    in the local GPT). As each PTE can be protected by a specific GPT_LOCKED attribute
110 *    for exclusive access, it is NOT protected by a global lock.
111 ********************************************************************************************/
112
113typedef struct vmm_s
114{
115        remote_queuelock_t vsl_lock;            /*! lock protecting the local VSL               */
116        xlist_entry_t      vsegs_root;          /*! Virtual Segment List root                   */
117        uint32_t           vsegs_nr;            /*! total number of local vsegs                 */
118
119    gpt_t              gpt;                 /*! Generic Page Table descriptor               */
120
121    stack_mgr_t        stack_mgr;           /*! embedded STACK vsegs allocator              */
122
123    mmap_mgr_t         mmap_mgr;            /*! embedded MMAP vsegs allocator               */
124
125        uint32_t           false_pgfault_nr;    /*! false page fault counter (for all threads)  */
126        uint32_t           local_pgfault_nr;    /*! false page fault counter (for all threads)  */
127        uint32_t           global_pgfault_nr;   /*! false page fault counter (for all threads)  */
128    uint32_t           false_pgfault_cost;  /*! cumulated cost (for all threads)            */
129    uint32_t           local_pgfault_cost;  /*! cumulated cost (for all threads)            */
130    uint32_t           global_pgfault_cost; /*! cumulated cost (for all threads)            */
131
132    vpn_t              args_vpn_base;       /*! args vseg first page                        */
133    vpn_t              envs_vpn_base;       /*! envs vseg first page                        */
134        vpn_t              code_vpn_base;       /*! code vseg first page                        */
135        vpn_t              data_vpn_base;       /*! data vseg first page                        */
136    vpn_t              heap_vpn_base;       /*! heap zone first page                        */
137
138        intptr_t           entry_point;         /*! main thread entry point                     */
139}
140vmm_t;
141
142/*********************************************************************************************
143 * This function makes only a partial initialisation of the VMM attached to an user
144 * process: It intializes the STACK and MMAP allocators, and the VSL lock.
145 * - The GPT has been previously created, with the hal_gpt_create() function.
146 * - The "kernel" vsegs are previously registered, by the hal_vmm_kernel_update() function.
147 * - The "code" and "data" vsegs arlmmmmmme registered by the elf_load_process() function.
148 * - The "stack" vsegs are dynamically registered by the thread_user_create() function.
149 * - The "file", "anon", "remote" vsegs are dynamically registered by the mmap() syscall.
150 *********************************************************************************************
151 * @ process   : pointer on process descriptor
152 * @ return 0 if success / return -1 if failure.
153 ********************************************************************************************/
154error_t vmm_user_init( struct process_s * process );
155
156/*********************************************************************************************
157 * This function re-initialises the VMM attached to an user process to prepare a new
158 * call to the vmm_user_init() function after an exec() syscall.
159 * It removes from the VMM of the process identified by the <process> argument all
160 * all user vsegs, by calling the vmm_remove_vseg() function.
161 * - the vsegs are removed from the VSL.
162 * - the corresponding GPT entries are removed from the GPT.
163 * - the physical pages are released to the relevant kmem when they are not shared.
164 * The VSL and the GPT are not modified for the kernel vsegs.
165 *********************************************************************************************
166 * @ process   : pointer on process descriptor.
167 ********************************************************************************************/
168void vmm_user_reset( struct process_s * process );
169
170/*********************************************************************************************
171 * This function is called by the process_make_fork() function. It partially copies
172 * the content of a remote parent process VMM to the local child process VMM:
173 * - The KERNEL vsegs required by the architecture must have been previously
174 *   created in the child VMM, using the hal_vmm_kernel_update() function.
175 * - The DATA, ANON, REMOTE vsegs registered in the parent VSL are registered in the
176 *   child VSL. All valid PTEs in parent GPT are copied to the child GPT.
177 *   The WRITABLE  and COW flags are not modified, as it will be done later for those
178 *   shared pages by the vmm_set_cow() function.
179 * - The CODE vsegs registered in the parent VSL are registered in the child VSL, but the
180 *   GPT entries are not copied in the child GPT, and will be dynamically updated from
181 *   the .elf file when a page fault is reported.
182 * - The FILE vsegs registered in the parent VSL are registered in the child VSL, and all
183 *   valid GPT entries in parent GPT are copied to the child GPT. The COW flag is not set.
184 * - No STACK vseg is copied from  parent VMM to child VMM: the child stack vseg is copied
185 *   later from the cluster containing the user thread requesting the fork().
186 *********************************************************************************************
187 * @ child_process     : local pointer on local child process descriptor.
188 * @ parent_process_xp : extended pointer on remote parent process descriptor.
189 * @ return 0 if success / return -1 if failure.
190 ********************************************************************************************/
191error_t vmm_fork_copy( struct process_s * child_process,
192                       xptr_t             parent_process_xp );
193
194/*********************************************************************************************
195 * This function is called by the process_make_fork() function to update the COW attribute
196 * in the parent parent process vsegs. It set the COW flag, and reset the WRITABLE flag of
197 * all GPT entries of the DATA, MMAP, and REMOTE vsegs of the <process> argument.
198 * It must be called by a thread running in the reference cluster, that contains the complete
199 * VSL and GPT (use the rpc_vmm_set_cow_client() when the calling thread client is remote).
200 * It updates all copies of the process in all clusters, to maintain coherence in GPT copies,
201 * using the list of copies stored in the owner process, and using remote_write accesses to
202 * update the remote GPTs. It atomically increment the pending_fork counter, in all involved
203 * physical page descriptors. It cannot fail, as only mapped entries in GPTs are updated.
204 *********************************************************************************************
205 * @ process   : local pointer on local reference process descriptor.
206 ********************************************************************************************/
207void vmm_set_cow( struct process_s * process );
208
209/*********************************************************************************************
210 * This function modifies the vseg identified by <process> and <base> arguments in all
211 * clusters containing a VSL copy, as defined by <new_base> and <new_size> arguments.
212 * The new vseg, defined by the <new_base> and <new_size> arguments must be included
213 * in the existing vseg. The target VSL size and base fields are modified in the VSL.
214 * This is done in all clusters containing a VMM copy to maintain VMM coherence.
215 * It is called by the sys_munmap() and dev_fbf_resize_window() functions.
216 * It can be called by a thread running in any cluster, as it uses the vmm_resize_vseg() in
217 * the local cluster, and parallel RPC_VMM_RESIZE_VSEG for remote clusters.
218 * It cannot fail, as only vseg registered  in VSL copies are updated.
219 *********************************************************************************************
220 * @ process   : local pointer on process descriptor.
221 * @ base      : current vseg base address in user space.
222 * @ new_base  : new vseg base.
223 * @ new_size  : new vseg size.
224 ********************************************************************************************/
225void vmm_global_resize_vseg( struct process_s * process,
226                             intptr_t           base,
227                             intptr_t           new_base,
228                             intptr_t           new_size );
229
230/*********************************************************************************************
231 * This function removes the vseg identified by the <process> and <base> arguments from
232 * the VSL and remove all associated PTE entries from the GPT.
233 * This is done in all clusters containing a VMM copy to maintain VMM coherence.
234 * It is called by the sys_munmap() and dev_fbf_resize_window() functions.
235 * It can be called by a thread running in any cluster, as it uses the vmm_remove_vseg() in
236 * the local cluster, and parallel RPC_VMM_REMOVE_VSEG for remote clusters.
237 * It cannot fail, as only vseg registered  in VSL copies are deleted.
238 *********************************************************************************************
239 * @ pid      : local pointer on process identifier.
240 * @ base     : vseg base address in user space.
241 ********************************************************************************************/
242void vmm_global_delete_vseg( struct process_s * process,
243                             intptr_t           base );
244
245/*********************************************************************************************
246 * This function modifies one GPT entry identified by the <process> and <vpn> arguments
247 * in all clusters containing a process copy. It maintains coherence in GPT copies,
248 * using remote_write accesses.
249 * It cannot fail, as only mapped PTE2 in GPT copies are updated.
250 *********************************************************************************************
251 * @ process   : local pointer on local process descriptor.
252 * @ vpn       : PTE index.
253 * @ attr      : PTE / attributes.
254 * @ ppn       : PTE / physical page index.
255 ********************************************************************************************/
256void vmm_global_update_pte( struct process_s * process,
257                            vpn_t              vpn,
258                            uint32_t           attr,
259                            ppn_t              ppn );
260
261/*********************************************************************************************
262 * This function deletes, in the local cluster, all vsegs registered in the VSL
263 * of the process identified by the <process> argument. For each vseg:
264 * - it unmaps all vseg PTEs from the GPT (release the physical pages when required).
265 * - it removes the vseg from the local VSL.
266 * - it releases the memory allocated to the local vseg descriptors.
267 * - it releases the memory allocated to the GPT itself.
268 *********************************************************************************************
269 * @ process   : pointer on process descriptor.
270 ********************************************************************************************/
271void vmm_destroy( struct process_s * process );
272
273/*********************************************************************************************
274 * This function scans the list of vsegs registered in the VMM of a given process descriptor
275 * to check if a given virtual region (defined by a base and size) overlap an existing vseg.
276 *********************************************************************************************
277 * @ process  : pointer on process descriptor.
278 * @ base     : region virtual base address.
279 * @ size     : region size (bytes).
280 * @ returns NULL if no conflict / return conflicting vseg pointer if conflict.
281 ********************************************************************************************/
282vseg_t * vmm_check_conflict( struct process_s * process,
283                             vpn_t              base,
284                             vpn_t              size );
285
286/*********************************************************************************************
287 * This function allocates memory for a vseg descriptor, initialises it, and register it
288 * in the VSL of the local process descriptor.
289 * - For the FILE, ANON, & REMOTE types, it does not use the <base> argument, but uses
290 *   the specific VMM MMAP allocator.
291 * - For the STACK type, it does not use the <base> and <size> arguments,  but uses the
292 *   the specific VMM STACK allocator.
293 * It checks collision with pre-existing vsegs.
294 * To comply with the "on-demand" paging policy, this function does NOT modify the GPT,
295 * and does not allocate physical memory for vseg data.
296 * It should be called by a local thread (could be a RPC thread if the client thread is not
297 * running in the reference cluster).
298 *********************************************************************************************
299 * @ process     : pointer on local processor descriptor.
300 * @ type        : vseg type.
301 * @ base        : vseg base address (or user thread ltid for an user stack vseg).
302 * @ size        : vseg size (bytes).
303 * @ file_offset : offset in file for CODE, DATA, FILE types.
304 * @ file_size   : can be smaller than "size" for DATA type.
305 * @ mapper_xp   : extended pointer on mapper for CODE, DATA, FILE types.
306 * @ cxy         : physical mapping cluster (for non distributed vsegs).
307 * @ returns pointer on vseg if success / returns NULL if no memory, or conflict.
308 ********************************************************************************************/
309vseg_t * vmm_create_vseg( struct process_s * process,
310                          vseg_type_t        type,
311                          intptr_t           base,
312                              uint32_t           size,
313                          uint32_t           file_offset,
314                          uint32_t           file_size,
315                          xptr_t             mapper_xp,
316                          cxy_t              cxy );
317
318/*********************************************************************************************
319 * This function removes from the VMM of a local process descriptor, identified by
320 * the <process> argument, the vseg identified by the <vseg> argument. 
321 * It is called by the vmm_user_reset(), vmm_global_delete_vseg(), vmm_destroy() functions.
322 * It must be called by a local thread, running in the cluster containing the modified VMM.
323 * Use the RPC_VMM_REMOVE_VSEG if required.
324 * It makes a kernel panic if the process is not registered in the local cluster.
325 * For all vseg types, the vseg is detached from local VSL, and all associated PTEs are
326 * unmapped from local GPT. Other actions depend on the vseg type:
327 * Regarding the vseg descriptor release:
328 *   . for ANON and REMOTE, the vseg is not released, but registered in local zombi_list.
329 *   . for STACK the vseg is released to the local stack allocator.
330 *   . for all other types, the vseg descriptor is released to the local kmem.
331 * Regarding the physical pages release:
332 *   . for KERNEL and FILE, the pages are not released to kmem.
333 *   . for CODE and STACK, the pages are released to local kmem.
334 *   . for DATA, ANON and REMOTE, the pages are released to relevant kmem only when
335 *     the local cluster is the reference cluster.
336 * The VSL lock protecting the VSL must be taken by the caller.
337 *********************************************************************************************
338 * @ process  : local pointer on process descriptor.
339 * @ vseg     : local pointer on target vseg.
340 ********************************************************************************************/
341void vmm_remove_vseg( struct process_s * process,
342                      struct vseg_s    * vseg );
343
344/*********************************************************************************************
345 * This function resize a local vseg identified by the <process> and <vseg> arguments.
346 * Both the "size" and "base" fields are modified in the process VSL. When the new vseg
347 * contains less pages than the target vseg, the relevant pages are removed from the GPT.
348 * It is called by the vmm_global_resize() and dev_fbf_resize_window() functions.
349 * It must be called by a local thread, running in the cluster containing the modified VSL.
350 * Use the RPC_VMM_RESIZE_VSEG if required.
351 * The VSL lock protecting the VSL must be taken by the caller.
352 *********************************************************************************************
353 * @ process   : local pointer on process descriptor
354 * @ vseg      : local pointer on target vseg
355 * @ new_base  : vseg base address
356 * @ new_size  : vseg size (bytes)
357 ********************************************************************************************/
358void vmm_resize_vseg( struct process_s * process,
359                      struct vseg_s    * vseg,
360                      intptr_t           new_base,
361                      intptr_t           new_size );
362
363/*********************************************************************************************
364 * This function checks that a given virtual address <vaddr> in a given <process> is
365 * contained in a registered vseg. It can be called by any thread running in any cluster.
366 * - if the vseg is registered in the local process VSL, it returns the local vseg pointer.
367 * - if the vseg is missing in local VSL, it access directly the reference VSL.
368 * - if the vseg is found in reference VSL, it updates the local VSL and returns this pointer.
369 * It returns an error when the vseg is missing in the reference VMM, or when there is
370 * not enough memory for a new vseg descriptor in the calling thread cluster.
371 * For both the local and the reference VSL, it takes the VSL lock before scanning the VSL.
372 *********************************************************************************************
373 * @ process   : [in] pointer on process descriptor.
374 * @ vaddr     : [in] virtual address.
375 * @ vseg      : [out] local pointer on local vseg.
376 * @ returns 0 if success / returns -1 if user error
377 ********************************************************************************************/
378error_t vmm_get_vseg( struct process_s  * process,
379                      intptr_t            vaddr,
380                      vseg_t           ** vseg );           
381
382/*********************************************************************************************
383 * This function is called by the generic exception handler in case of page-fault event,
384 * detected for a given <vpn>. The <process> argument is used to access the relevant VMM.
385 * It checks the missing VPN and returns an user error if it is not in a registered vseg.
386 * For a legal VPN, there is actually 3 cases:
387 * 1) if the missing VPN belongs to a private vseg (STACK or CODE segment types, non
388 *    replicated in all clusters), it allocates a new physical page, computes the attributes,
389 *    depending on vseg type, and updates directly the local GPT.
390 * 2) if the missing VPN belongs to a public vseg, it can be a false page-fault, when the VPN
391 *    is mapped in the reference GPT, but not in the local GPT. For this false page-fault,
392 *    the local GPT is simply updated from the reference GPT.
393 * 3) if the missing VPN is public, and unmapped in the ref GPT, it is a true page fault.
394 *    The calling thread  allocates a new physical page, computes the attributes, depending
395 *    on vseg type, and updates directly (without RPC) the local GPT and the reference GPT.
396 *    Other GPT copies  will updated on demand.
397 * Concurrent accesses to the GPT(s) are handled, by locking the target PTE before accessing
398 * the local and/or reference GPT(s).
399 *********************************************************************************************
400 * @ process  : local pointer on local process.
401 * @ vpn      : VPN of the missing PTE.
402 * @ returns EXCP_NON_FATAL / EXCP_USER_ERROR / EXCP_KERNEL_PANIC after analysis
403 ********************************************************************************************/
404error_t vmm_handle_page_fault( struct process_s * process,
405                               vpn_t              vpn );
406
407/*********************************************************************************************
408 * This function is called by the generic exception handler in case of WRITE violation event,
409 * detected for a given <vpn>. The <process> argument is used to access the relevant VMM.
410 * It returns a kernel panic if the faulty VPN is not in a registered vseg, or is not mapped.
411 * For a legal mapped vseg there is two cases:
412 * 1) If the missing VPN belongs to a private vseg (STACK), it access only the local GPT.
413 *    It access the forks counter in the current physical page descriptor.
414 *    If there is a pending fork, it allocates a new physical page from the cluster defined
415 *    by the vseg type, copies the old physical page content to the new physical page,
416 *    and decrements the pending_fork counter in old physical page descriptor.
417 *    Finally, it reset the COW flag and set the WRITE flag in local GPT.
418 * 2) If the missing VPN is public, it access only the reference GPT.
419 *    It access the forks counter in the current physical page descriptor.
420 *    If there is a pending fork, it allocates a new physical page from the cluster defined
421 *    by the vseg type, copies the old physical page content to the new physical page,
422 *    and decrements the pending_fork counter in old physical page descriptor.
423 *    Finally it calls the vmm_global_update_pte() function to reset the COW flag and set
424 *    the WRITE flag in all the GPT copies, using a RPC if the reference cluster is remote.
425 * In both cases, concurrent accesses to the GPT are handled by locking the target PTE
426 * before accessing the GPT.
427 *********************************************************************************************
428 * @ process   : pointer on local process descriptor copy.
429 * @ vpn       : VPN of the faulting PTE.
430 * @ returns EXCP_NON_FATAL / EXCP_USER_ERROR / EXCP_KERNEL_PANIC after analysis
431 ********************************************************************************************/
432error_t vmm_handle_cow( struct process_s * process,
433                        vpn_t              vpn );
434
435/*********************************************************************************************
436 * This function is called by the vmm_get_pte() function when a page is unmapped.
437 * Depending on the vseg type, defined by the <vseg> argument, it returns the PPN
438 * (Physical Page Number) associated to a missing page defined by the <vpn> argument.
439 * - For the FILE type, it returns directly the physical page from the file mapper.
440 * - For the CODE and DATA types, it allocates a new physical page from the cluster defined
441 *   by the <vseg->cxy> field, or by the <vpn> MSB bits for a distributed vseg,
442 *   and initialize this page from the .elf file mapper.
443 * - For all other types, it allocates a new physical page from the cluster defined
444 *   by the <vseg->cxy> field, or by the <vpn> MSB bits for a distributed vseg,
445 *   but the new page is not initialized.
446 *********************************************************************************************
447 * @ vseg   : local pointer on vseg containing the mising page.
448 * @ vpn    : Virtual Page Number identifying the missing page.
449 * @ ppn    : [out] returned Physical Page Number.
450 * return 0 if success / return EINVAL or ENOMEM if error.
451 ********************************************************************************************/
452error_t vmm_get_one_ppn( vseg_t * vseg,
453                         vpn_t    vpn,
454                         ppn_t  * ppn );
455
456#endif /* _VMM_H_ */
Note: See TracBrowser for help on using the repository browser.