source: trunk/kernel/mm/vmm.h @ 617

Last change on this file since 617 was 614, checked in by alain, 6 years ago

1) introduce a dev_ioc_sync_write() function in IOC API,

to improve the DEVFS synchronous update.

2) fix a big bug in both the user_dir_create() and user_dir_destroy()

functions: add an extended pointer on the reference client process
in the function's arguments.

File size: 27.6 KB
RevLine 
[1]1/*
2 * vmm.h - virtual memory management related operations
3 *
4 * Authors   Ghassan Almaless (2008,2009,2010,2011, 2012)
5 *           Mohamed Lamine Karaoui (2015)
[437]6 *           Alain Greiner (2016,2017,2018)
[18]7 *
[1]8 * Copyright (c) UPMC Sorbonne Universites
9 *
10 * This file is part of ALMOS-MKH.
11 *
12 * ALMOS-MKH is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; version 2.0 of the License.
15 *
16 * ALMOS-MKH is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with ALMOS-MKH; if not, write to the Free Software Foundation,
23 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26#ifndef _VMM_H_
27#define _VMM_H_
28
[457]29#include <hal_kernel_types.h>
[1]30#include <bits.h>
31#include <list.h>
[567]32#include <queuelock.h>
[1]33#include <hal_gpt.h>
34#include <vseg.h>
35#include <page.h>
36
37/****  Forward declarations  ****/
38
39struct process_s;
[611]40struct vseg_s;
[1]41
42/*********************************************************************************************
[407]43 * This structure defines the STACK allocator used by the VMM to dynamically handle
[611]44 * vseg allocation or release requests for an user thread.
45 * This allocator handles a fixed size array of fixed size slots in STACK zone of user space.
[1]46 * The stack size and the number of slots are defined by the CONFIG_VMM_STACK_SIZE, and
[407]47 * CONFIG_VMM_STACK_BASE parameters.
[611]48 * Each slot can contain one user stack vseg. The first 4 Kbytes page in the slot is not
49 * mapped to detect stack overflow.
[1]50 * The slot index can be computed form the slot base address, and reversely.
51 * All allocation / release operations are registered in the stack_bitmap, that completely
[611]52 * define the STACK zone status.
[1]53 ********************************************************************************************/
54
55typedef struct stack_mgr_s
56{
[567]57    busylock_t     lock;               /*! lock protecting STACK allocator                  */
[1]58    vpn_t          vpn_base;           /*! first page of STACK zone                         */
59    bitmap_t       bitmap;             /*! bit bector of allocated stacks                   */
60}
61stack_mgr_t;
62
63/*********************************************************************************************
[407]64 * This structure defines the MMAP allocator used by the VMM to dynamically handle 
[1]65 * MMAP vsegs requested or released by an user process.
[18]66 * This allocator should be only used in the reference cluster.
67 * - allocation policy : all allocated vsegs occupy an integer number of pages that is
[1]68 *   power of 2, and are aligned on a page boundary. The requested number of pages is
[18]69 *   rounded if required. The first_free_vpn variable defines completely the MMAP zone state.
[1]70 *   It is never decremented, as the released vsegs are simply registered in a zombi_list.
[18]71 *   The relevant zombi_list is checked first for each allocation request.
[1]72 * - release policy : a released MMAP vseg is registered in an array of zombi_lists.
73 *   This array is indexed by ln(number of pages), and each entry contains the root of
74 *   a local list of zombi vsegs that have the same size. The physical memory allocated
75 *   for a zombi vseg descriptor is not released, to use the "list" field.
76 *   This physical memory allocated for MMAP vseg descriptors is actually released
77 *   when the VMM is destroyed.
78 ********************************************************************************************/
79
80typedef struct mmap_mgr_s
81{
[567]82    busylock_t     lock;               /*! lock protecting MMAP allocator                   */
[1]83    vpn_t          vpn_base;           /*! first page of MMAP zone                          */
84    vpn_t          vpn_size;           /*! number of pages in MMAP zone                     */
85    vpn_t          first_free_vpn;     /*! first free page in MMAP zone                     */
86    list_entry_t   zombi_list[32];     /*! array of roots of released vsegs lists           */
87}
88mmap_mgr_t;
89
90/*********************************************************************************************
91 * This structure defines the Virtual Memory Manager for a given process in a given cluster.
[585]92 * This local VMM implements four main services:
[567]93 * 1) It contains the local copy of vseg list (VSL), only complete in referrence.
94 * 2) It contains the local copy of the generic page table (GPT), only complete in reference.
[408]95 * 3) The stack manager dynamically allocates virtual memory space for the STACK vsegs.
96 * 4) The mmap manager dynamically allocates virtual memory for the (FILE/ANON/REMOTE) vsegs.
97 ******************************************************a**************************************
98 * Implementation notes:
[585]99 * 1. In most clusters, the VSL and GPT are only partial copies of the reference VSL and GPT
100 *    structures, stored in the reference cluster.
101 * 2. The VSL contains only local vsegs, but it is implemented as an xlist, and protected by
[408]102 *    a remote_rwlock, because it can be accessed by a thread running in a remote cluster.
103 *    An exemple is the vmm_fork_copy() function.
[585]104 * 3. The GPT in the reference cluster can be directly accessed by remote threads to handle
105 *    false page-fault (page is mapped in the reference GPT, but the PTE copy is missing
106 *    in the local GPT). It is also protected by a remote_rwlock.
[1]107 ********************************************************************************************/
108
109typedef struct vmm_s
110{
[567]111        remote_rwlock_t  vsegs_lock;         /*! lock protecting the local VSL                  */
[585]112        xlist_entry_t    vsegs_root;         /*! Virtual Segment List (complete in reference)   */
[408]113        uint32_t         vsegs_nr;           /*! total number of local vsegs                    */
[1]114
[585]115    remote_rwlock_t  gpt_lock;           /*! lock protecting the local GPT                  */
[408]116    gpt_t            gpt;                /*! Generic Page Table (complete in reference)     */
[1]117
[408]118    stack_mgr_t      stack_mgr;          /*! embedded STACK vsegs allocator                 */
119    mmap_mgr_t       mmap_mgr;           /*! embedded MMAP vsegs allocator                  */
[1]120
[408]121        uint32_t         pgfault_nr;         /*! page fault counter (instrumentation)           */
[1]122
[408]123    vpn_t            kent_vpn_base;      /*! kentry vseg first page                         */
124    vpn_t            args_vpn_base;      /*! args vseg first page                           */
125    vpn_t            envs_vpn_base;      /*! envs zone first page                           */
126    vpn_t            heap_vpn_base;      /*! envs zone first page                           */
127        vpn_t            code_vpn_base;      /*! code zone first page                           */
128        vpn_t            data_vpn_base;      /*! data zone first page                           */
[1]129
[408]130        intptr_t         entry_point;        /*! main thread entry point                        */
[1]131}
132vmm_t;
133
134/*********************************************************************************************
[406]135 * This function initialises the virtual memory manager attached to an user process.
[407]136 * - It initializes the STACK and MMAP allocators.
137 * - It registers the "kentry", "args", "envs" vsegs in the VSL.
[409]138 * - It initializes the generic page table, calling the HAL specific hal_gpt_init() function.
139 * - For TSAR it map all pages for the "kentry" vseg, that must be identity mapping.
[614]140 ******************************************************a**************************************
141 * Implementation notes:
[407]142 * - The "code" and "data" vsegs are registered by the elf_load_process() function.
143 * - The "stack" vsegs are dynamically created by the thread_user_create() function.
[409]144 * - The "file", "anon", "remote" vsegs are dynamically created by the mmap() syscall.
[1]145 *********************************************************************************************
146 * @ process   : pointer on process descriptor
[415]147 * @ return 0 if success / return -1 if failure.
[1]148 ********************************************************************************************/
[415]149error_t vmm_init( struct process_s * process );
[1]150
151/*********************************************************************************************
[407]152 * This function displays on TXY0 the list or registered vsegs for a given <process>.
[429]153 * It must be executed by a thread running in reference cluster.
154 * If the <mapping> argument is true, it displays for each vseg all mapped PTEs in GPT.
[23]155 *********************************************************************************************
[407]156 * @ process   : pointer on process descriptor.
157 * @ mapping   : detailed mapping if true.
158 ********************************************************************************************/
159void vmm_display( struct process_s * process,
160                  bool_t             mapping );
161
[610]162/*********************************************************************************************
[433]163 * This function is called by the process_make_fork() function. It partially copies
[408]164 * the content of a remote parent process VMM to the local child process VMM:
165 * - all DATA, MMAP, REMOTE vsegs registered in the parent VSL are registered in the child
166 *   VSL, and all valid GPT entries in parent GPT are copied to the child GPT.
167 *   The WRITABLE flag is reset and the COW flag is set in child GPT.
168 * - all CODE vsegs registered in the parent VSL are registered in the child VSL, but the
169 *   GPT entries are not copied in the chilf GPT, that will be dynamically updated from
170 *   the .elf file when a page fault is reported.
171 * - all FILE vsegs registered in the parent VSL are registered in the child VSL, and all
172 *   valid GPT entries in parent GPT are copied to the child GPT. The COW flag is not set.
173 * - no STACK vseg is copied from  parent VMM to child VMM, because the child STACK vseg
[469]174 *   must be copied later from the cluster containing the user thread requesting the fork().
[407]175 *********************************************************************************************
[408]176 * @ child_process     : local pointer on local child process descriptor.
177 * @ parent_process_xp : extended pointer on remote parent process descriptor.
[415]178 * @ return 0 if success / return -1 if failure.
[23]179 ********************************************************************************************/
[408]180error_t vmm_fork_copy( struct process_s * child_process,
181                       xptr_t             parent_process_xp );
[23]182
183/*********************************************************************************************
[433]184 * This function is called by the process_make_fork() function executing the fork syscall.
[408]185 * It set the COW flag, and reset the WRITABLE flag of all GPT entries of the DATA, MMAP,
186 * and REMOTE vsegs of a process identified by the <process> argument.
187 * It must be called by a thread running in the reference cluster, that contains the complete
[433]188 * VSL and GPT (use the rpc_vmm_set_cow_client() when the calling thread client is remote).
[408]189 * It updates all copies of the process in all clusters, to maintain coherence in GPT copies,
190 * using the list of copies stored in the owner process, and using remote_write accesses to
[433]191 * update the remote GPTs. It atomically increment the pending_fork counter, in all involved
192 * physical page descriptors. It cannot fail, as only mapped entries in GPTs are updated.
[1]193 *********************************************************************************************
[408]194 * @ process   : local pointer on local reference process descriptor.
195 ********************************************************************************************/
196void vmm_set_cow( struct process_s * process );
197
198/*********************************************************************************************
[585]199 * This global function modifies a GPT entry identified by the <process> and <vpn>
[433]200 * arguments in all clusters containing a process copy.
201 * It must be called by a thread running in the reference cluster.
[408]202 * It updates all copies of the process in all clusters, to maintain coherence in GPT copies,
203 * using the list of copies stored in the owner process, and using remote_write accesses to
204 * update the remote GPTs. It cannot fail, as only mapped entries in GPT copies are updated.
205 *********************************************************************************************
206 * @ process   : local pointer on local process descriptor.
207 * @ vpn       : PTE index.
208 * @ attr      : PTE / attributes.
209 * @ ppn       : PTE / physical page index.
210 ********************************************************************************************/
[433]211void vmm_global_update_pte( struct process_s * process,
212                            vpn_t              vpn,
213                            uint32_t           attr,
214                            ppn_t              ppn );
[408]215
216/*********************************************************************************************
[433]217 * This function deletes, in the local cluster, all vsegs registered in the VSL
218 * of the process identified by the <process> argument. For each vseg:
219 * - it unmaps all vseg PTEs from the GPT (release the physical pages when required).
220 * - it removes the vseg from the local VSL.
221 * - it releases the memory allocated to the local vseg descriptors.
[611]222 * - it releases the memory allocated to the GPT itself.
[408]223 *********************************************************************************************
[23]224 * @ process   : pointer on process descriptor.
[1]225 ********************************************************************************************/
226void vmm_destroy( struct process_s * process );
227
228/*********************************************************************************************
[18]229 * This function scans the list of vsegs registered in the VMM of a given process descriptor
[1]230 * to check if a given virtual region (defined by a base and size) overlap an existing vseg.
231 *********************************************************************************************
232 * @ process  : pointer on process descriptor.
233 * @ base     : region virtual base address.
234 * @ size     : region size (bytes).
235 * @ returns NULL if no conflict / return conflicting vseg pointer if conflict.
236 ********************************************************************************************/
237vseg_t * vmm_check_conflict( struct process_s * process,
238                             vpn_t              base,
239                             vpn_t              size );
240
241/*********************************************************************************************
[18]242 * This function allocates memory for a vseg descriptor, initialises it, and register it
[595]243 * in the VMM of the local process descriptor, that must be the reference process.
[407]244 * For the 'stack", "file", "anon", & "remote" types, it does not use the <base> argument,
245 * but uses the STACK and MMAP virtual memory allocators.
246 * It checks collision with all pre-existing vsegs.
247 * To comply with the "on-demand" paging policy, this function does NOT modify the page table,
248 * and does not allocate physical memory for vseg data.
249 * It should be called by a local thread (could be a RPC thread if the client thread is not
250 * running in the regerence cluster).
[1]251 *********************************************************************************************
[407]252 * @ process     : pointer on local processor descriptor.
253 * @ type        : vseg type.
254 * @ base        : vseg base address (not used for dynamically allocated vsegs).
255 * @ size        : vseg size (bytes).
256 * @ file_offset : offset in file for CODE, DATA, FILE types.
257 * @ file_size   : can be smaller than "size" for DATA type.
258 * @ mapper_xp   : extended pointer on mapper for CODE, DATA, FILE types.
259 * @ cxy         : physical mapping cluster (for non distributed vsegs).
260 * @ returns pointer on vseg if success / returns NULL if no memory, or conflict.
[1]261 ********************************************************************************************/
262vseg_t * vmm_create_vseg( struct process_s * process,
[407]263                          vseg_type_t        type,
[18]264                          intptr_t           base,
[407]265                              uint32_t           size,
266                          uint32_t           file_offset,
267                          uint32_t           file_size,
268                          xptr_t             mapper_xp,
269                          cxy_t              cxy );
[1]270
271/*********************************************************************************************
[611]272 * This function removes from the local VMM of a process descriptor identified by the <pid>
273 * argument a local vseg identified by its base address <vaddr> in user space.
274 * It can be used for any type of vseg, but must be called by a local thread.
275 * Use the RPC_VMM_DELETE_VSEG if the client thread is not local.
276 * It does nothing if the process is not registered in the local cluster.
277 * It does nothing if the vseg is not registered in the local process VSL.
278 * - It removes from the local GPT all registered PTEs. If it is executed in the reference
279 *   cluster, it releases the referenced physical pages, to the relevant kmem allocator,
280 *   depending on vseg type and the pending forks counter.
281 * - It removes the vseg from the local VSL, and release the vseg descriptor if not MMAP.
[1]282 *********************************************************************************************
[611]283 * @ process  : process identifier.
284 * @ vaddr    : vseg base address in user space.
[1]285 ********************************************************************************************/
[611]286void vmm_delete_vseg( pid_t    pid,
287                      intptr_t vaddr );
[1]288
289/*********************************************************************************************
[611]290 * This function insert a new <vseg> descriptor in the VSL identifed by the <vmm> argument.
291 * and updates the vmm field in the vseg descriptor.
292 * It takes the lock protecting VSL.
293 *********************************************************************************************
294 * @ vmm       : local pointer on local VMM.
295 * @ vseg      : local pointer on local vseg descriptor.
296 ********************************************************************************************/
297void vmm_attach_vseg_to_vsl( vmm_t  * vmm,
298                             vseg_t * vseg );
299
300/*********************************************************************************************
301 * This function removes a vseg identified by the <vseg> argument from the local VSL
302 * identified by the <vmm> argument and release the memory allocated to vseg descriptor,
303 * for all vseg types, BUT the MMAP type (i.e. ANON or REMOTE).
304 * - If the vseg has not the STACK or MMAP type, it is simply removed from the VSL,
305 *   and vseg descriptor is released.
306 * - If the vseg has the STACK type, it is removed from VSL, vseg descriptor is released,
307 *   and the stack slot is returned to the local VMM_STACK allocator.
308 * - If the vseg has the MMAP type, it is removed from VSL and is registered in zombi_list
309 *   of the VMM_MMAP allocator for future reuse. The vseg descriptor is NOT released.
310 *********************************************************************************************
311 * @ vmm       : local pointer on local VMM.
312 * @ vseg      : local pointer on local vseg to be removed.
313 ********************************************************************************************/
314void vmm_detach_vseg_from_vsl( vmm_t  * vmm,
315                               vseg_t * vseg );
316
317/*********************************************************************************************
[18]318 * This function removes a given region (defined by a base address and a size) from
[407]319 * the VMM of a given process descriptor. This can modify the number of vsegs:
[1]320 * (a) if the region is not entirely mapped in an existing vseg, it's an error.
321 * (b) if the region has same base and size as an existing vseg, the vseg is removed.
[406]322 * (c) if the removed region cut the vseg in two parts, it is modified.
323 * (d) if the removed region cut the vseg in three parts, it is modified, and a new
324 *     vseg is created with same type.
[610]325 * FIXME [AG] this function should be called by a thread running in the reference cluster,
326 *       and the VMM should be updated in all process descriptors copies.
[1]327 *********************************************************************************************
328 * @ process   : pointer on process descriptor
329 * @ base      : vseg base address
330 * @ size      : vseg size (bytes)
331 ********************************************************************************************/
332error_t vmm_resize_vseg( struct process_s * process,
333                         intptr_t           base,
334                         intptr_t           size );
335
336/*********************************************************************************************
[611]337 * This low-level function scan the local VSL in <vmm> to find the unique vseg containing
338 * a given virtual address <vaddr>.
339 * It is called by the vmm_get_vseg(), vmm_get_pte(), and vmm_resize_vseg() functions.
340 *********************************************************************************************
341 * @ vmm     : pointer on the process VMM.
342 * @ vaddr   : virtual address.
343 * @ return vseg pointer if success / return NULL if not found.
344 ********************************************************************************************/
345struct vseg_s * vmm_vseg_from_vaddr( vmm_t    * vmm,
346                                     intptr_t   vaddr );
347
348/*********************************************************************************************
[388]349 * This function checks that a given virtual address is contained in a registered vseg.
[399]350 * It can be called by any thread running in any cluster:
351 * - if the vseg is registered in the local process VMM, it returns the local vseg pointer.
[388]352 * - if the vseg is missing in local VMM, it uses a RPC to get it from the reference cluster,
353 *   register it in local VMM and returns the local vseg pointer, if success.
[406]354 * - it returns an user error if the vseg is missing in the reference VMM, or if there is
[611]355 *   not enough memory for a new vseg descriptor in the calling thread cluster.
[1]356 *********************************************************************************************
[388]357 * @ process   : [in] pointer on process descriptor
358 * @ vaddr     : [in] virtual address
[440]359 * @ vseg      : [out] local pointer on local vseg
360 * @ returns 0 if success / returns -1 if user error (out of segment).
[611]361 ********************************************************************************************/
[388]362error_t vmm_get_vseg( struct process_s  * process,
363                      intptr_t            vaddr,
[394]364                      vseg_t           ** vseg );           
[1]365
366/*********************************************************************************************
[585]367 * This function is called by the generic exception handler in case of page-fault event,
[610]368 * detected for a given <vpn>. The <process> argument is used to access the relevant VMM.
[585]369 * It checks the missing VPN and returns an user error if it is not in a registered vseg.
370 * For a legal VPN, there is actually 3 cases:
371 * 1) if the missing VPN belongs to a private vseg (STACK or CODE segment types, non
372 *    replicated in all clusters), it allocates a new physical page, computes the attributes,
373 *    depending on vseg type, and updates directly the local GPT.
374 * 2) if the missing VPN belongs to a public vseg, it can be a false page-fault, when the VPN
375 *    is mapped in the reference GPT, but not in the local GPT. For this false page-fault,
376 *    the local GPT is simply updated from the reference GPT.
377 * 3) if the missing VPN is public, and unmapped in the reference GPT, it's a true page fault.
378 *    The calling thread  allocates a new physical page, computes the attributes, depending
379 *    on vseg type, and updates directly (without RPC) the local GPT and the reference GPT.
380 *    Other GPT copies  will updated on demand.
[610]381 * Concurrent accesses to the GPT are handled, thanks to the
[585]382 * remote_rwlock protecting each GPT copy.
[1]383 *********************************************************************************************
[610]384 * @ process  : local pointer on local process.
385 * @ vpn      : VPN of the missing PTE.
[585]386 * @ returns EXCP_NON_FATAL / EXCP_USER_ERROR / EXCP_KERNEL_PANIC after analysis
[1]387 ********************************************************************************************/
388error_t vmm_handle_page_fault( struct process_s * process,
[585]389                               vpn_t              vpn );
[1]390
391/*********************************************************************************************
[610]392 * This function is called by the generic exception handler in case of WRITE violation event,
393 * detected for a given <vpn>. The <process> argument is used to access the relevant VMM.
[585]394 * It returns a kernel panic if VPN is not in a registered vseg or is not mapped.
395 * For a legal mapped vseg there is two cases:
396 * 1) If the missing VPN belongs to a private vseg (STACK or CODE segment types, non
397 *    replicated in all clusters), it access the local GPT to get the current PPN and ATTR.
398 *    It access the forks counter in the current physical page descriptor.
399 *    If there is a pending fork, it allocates a new physical page from the cluster defined
400 *    by the vseg type, copies the old physical page content to the new physical page,
401 *    and decrements the pending_fork counter in old physical page descriptor.
402 *    Finally, it reset the COW flag and set the WRITE flag in local GPT.
403 * 2) If the missing VPN is public, it access the reference GPT to get the current PPN and
404 *    ATTR. It access the forks counter in the current physical page descriptor.
405 *    If there is a pending fork, it allocates a new physical page from the cluster defined
406 *    by the vseg type, copies the old physical page content to the new physical page,
407 *    and decrements the pending_fork counter in old physical page descriptor.
408 *    Finally it calls the vmm_global_update_pte() function to reset the COW flag and set
409 *    the WRITE flag in all the GPT copies, using a RPC if the reference cluster is remote.
[610]410 * In both cases, concurrent accesses to the GPT are protected by the remote_rwlock
411 * atached to the GPT copy in VMM.
[407]412 *********************************************************************************************
[585]413 * @ process   : pointer on local process descriptor copy.
414 * @ vpn       : VPN of the faulting PTE.
415 * @ returns EXCP_NON_FATAL / EXCP_USER_ERROR / EXCP_KERNEL_PANIC after analysis
[1]416 ********************************************************************************************/
[585]417error_t vmm_handle_cow( struct process_s * process,
418                        vpn_t              vpn );
[1]419
420/*********************************************************************************************
[401]421 * This function is called by the vmm_get_pte() function when a page is unmapped.
[313]422 * Depending on the vseg type, defined by the <vseg> argument, it returns the PPN
423 * (Physical Page Number) associated to a missing page defined by the <vpn> argument.
[406]424 * - For the FILE type, it returns directly the physical page from the file mapper.
[433]425 * - For the CODE and DATA types, it allocates a new physical page from the cluster defined
[406]426 *   by the <vseg->cxy> field, or by the <vpn> MSB bits for a distributed vseg,
427 *   and initialize this page from the .elf file mapper.
428 * - For all other types, it allocates a new physical page from the cluster defined
429 *   by the <vseg->cxy> field, or by the <vpn> MSB bits for a distributed vseg,
430 *   but the new page is not initialized.
[313]431 *********************************************************************************************
432 * @ vseg   : local pointer on vseg containing the mising page.
433 * @ vpn    : Virtual Page Number identifying the missing page.
434 * @ ppn    : [out] returned Physical Page Number.
[401]435 * return 0 if success / return EINVAL or ENOMEM if error.
[313]436 ********************************************************************************************/
437error_t vmm_get_one_ppn( vseg_t * vseg,
438                         vpn_t    vpn,
439                         ppn_t  * ppn );
440
[1]441
442#endif /* _VMM_H_ */
Note: See TracBrowser for help on using the repository browser.