source: trunk/kernel/mm/mapper.h @ 646

Last change on this file since 646 was 635, checked in by alain, 5 years ago

This version is a major evolution: The physical memory allocators,
defined in the kmem.c, ppm.c, and kcm.c files have been modified
to support remote accesses. The RPCs that were previously user
to allocate physical memory in a remote cluster have been removed.
This has been done to cure a dead-lock in case of concurrent page-faults.

This version 2.2 has been tested on a (4 clusters / 2 cores per cluster)
TSAR architecture, for both the "sort" and the "fft" applications.

File size: 15.3 KB
RevLine 
[1]1/*
[611]2 * mapper.h - Kernel cache for VFS files/directories definition.
[1]3 *
4 * Authors   Mohamed Lamine Karaoui (2015)
[623]5 *           Alain Greiner (2016,2017,2018,2019)
[1]6 *
7 * Copyright (c)  UPMC Sorbonne Universites
8 *
9 * This file is part of ALMOS-MKH.
10 *
11 * ALMOS-MKH is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; version 2.0 of the License.
14 *
15 * ALMOS-MKH is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with ALMOS-MKH; if not, write to the Free Software Foundation,
22 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25#ifndef _MAPPER_H_
26#define _MAPPER_H_
27
[457]28#include <hal_kernel_types.h>
[1]29#include <hal_atomic.h>
30#include <xlist.h>
31#include <grdxt.h>
32#include <rwlock.h>
33
34/****  Forward declarations ****/
35
36struct page_s;
37struct vfs_inode_s;
38
39/*******************************************************************************************
[614]40 * This mapper_t object implements the kernel cache for a given VFS file or directory.
[23]41 * There is one mapper per file/dir. It is implemented as a three levels radix tree,
[1]42 * entirely stored in the same cluster as the inode representing the file/dir.
[18]43 * - The fast retrieval key is the page index in the file.
[1]44 *   The ix1_width, ix2_width, ix3_width sub-indexes are configuration parameters.
45 * - The leaves are pointers on physical page descriptors, dynamically allocated
46 *   in the local cluster.
[610]47 * - The mapper is protected by a "remote_rwlock", to support several simultaneous
48 *   "readers", and only one "writer".
49 * - A "reader" thread, calling the mapper_remote_get_page() function to get a page
[623]50 *   descriptor pointer from the page index in file, can be running in any cluster.
[610]51 * - A "writer" thread, calling the mapper_handle_miss() function to handle a page miss
52 *   must be local (running in the mapper cluster).
[623]53 * - The vfs_fs_move_page() function access the file system to handle a mapper miss,
[246]54 *   or update a dirty page on device.
[610]55 * - The vfs_mapper_load_all() functions is used to load all pages of a directory
56 *   into the mapper (prefetch).
[265]57 * - the mapper_move_user() function is used to move data to or from an user buffer.
[238]58 *   This user space buffer can be physically distributed in several clusters.
[313]59 * - the mapper_move_kernel() function is used to move data to or from a remote kernel
60 *   buffer, that can be physically located in any cluster.
[246]61 * - In the present implementation the cache size for a given file increases on demand,
62 *   and the  allocated memory is only released when the mapper/inode is destroyed.
[612]63 *
[626]64 * TODO (1) the mapper being only used to implement the VFS cache(s), the mapper.c
65 *          and mapper.h file should be trandfered to the fs directory.
[635]66 * TODO (2) the "type" field in mapper descriptor is redundant and probably unused.
[1]67 ******************************************************************************************/
68
69
70/*******************************************************************************************
71 * This structure defines the mapper descriptor.
72 ******************************************************************************************/
73
74typedef struct mapper_s
75{
[23]76        struct vfs_inode_s * inode;           /*! owner inode                                     */
[246]77    uint32_t             type;        /*! file system type                                */
[606]78        grdxt_t              rt;              /*! embedded pages cache descriptor (radix tree)    */
79        remote_rwlock_t      lock;        /*! several readers / only one writer               */
[1]80        uint32_t                 refcount;    /*! several vsegs can refer the same file           */
81        xlist_entry_t        vsegs_root;  /*! root of list of vsegs refering this mapper      */
82        xlist_entry_t        wait_root;   /*! root of list of threads waiting on mapper       */
83    list_entry_t         dirty_root;  /*! root of list of dirty pages                     */
84}
85mapper_t;
86
87/*******************************************************************************************
[18]88 * This function allocates physical memory for a mapper descriptor, and initializes it
[1]89 * (refcount <= 0) / inode <= NULL).
90 * It must be executed by a thread running in the cluster containing the mapper.
91 *******************************************************************************************
[513]92 * @ type   : type of the mapper to create.
93 * @ return : pointer on created mapper if success / return NULL if no memory
[1]94 ******************************************************************************************/
[513]95mapper_t * mapper_create( vfs_fs_type_t type );
[1]96
97/*******************************************************************************************
[606]98 * This function releases all physical memory allocated for a mapper.
99 * Both the mapper descriptor and the radix tree are released.
100 * It does NOT synchronize dirty pages. Use the vfs_sync_inode() function if required.
[1]101 * It must be executed by a thread running in the cluster containing the mapper.
102 *******************************************************************************************
103 * @ mapper      : target mapper.
104 ******************************************************************************************/
[606]105void mapper_destroy( mapper_t * mapper );
[1]106
107/*******************************************************************************************
[635]108 * This function load from the IOC device a missing page identified by the <page_id>
109 * argument into a - possibly remote - mapper identified by the <mapper_xp> argument.
110 * It can be executed by a thread running in any cluster.
111 * It allocates a physical page from the remote cluster PPM, initialises it by accessing
112 * the IOC device, and registers the page in the remote mapper radix tree.
[606]113 * WARNING : the calling function mapper_remote_get_page() is supposed to take and release
114 * the lock protecting the mapper in WRITE_MODE.
115 *******************************************************************************************
[635]116 * @ mapper_xp   : [in]  extended pointer on remote mapper.
117 * @ page_id     : [in]  missing page index in file.
118 * @ page_xp     : [out] buffer for extended pointer on missing page descriptor.
119 * @ return 0 if success / return -1 if IOC cannot be accessed.
[606]120 ******************************************************************************************/
[635]121error_t mapper_remote_handle_miss( xptr_t     mapper_xp,
122                                   uint32_t   page_id,
123                                   xptr_t   * page_xp );
[606]124
125/*******************************************************************************************
[635]126 * This function removes a physical page from a - possibly remote - mapper,
127 * and releases the page to the remote PPM.
128 * It can be executed by any thread running in any cluster.
129 * It takes the mapper lock in WRITE_MODE to update the mapper.
130 *******************************************************************************************
131 * @ mapper     : extended pointer on the remote mapper.
132 * @ page       : local pointer on the page in remote mapper.
133 ******************************************************************************************/
134void mapper_remote_release_page( xptr_t          mapper_xp,
135                                 struct page_s * page );
136
137/*******************************************************************************************
[625]138 * This function move data between a remote mapper, identified by the <mapper_xp> argument,
[610]139 * and a distributed user buffer. It can be called by a thread running in any cluster.
[606]140 * It is called by the vfs_user_move() to implement sys_read() and sys_write() syscalls.
[313]141 * If required, the data transfer is split in "fragments", where one fragment contains
[265]142 * contiguous bytes in the same mapper page.
[313]143 * It uses "hal_uspace" accesses to move a fragment to/from the user buffer.
[1]144 * In case of write, the dirty bit is set for all pages written in the mapper.
[610]145 * The mapper being an extendable cache, it is automatically extended when required.
[606]146 * The "offset" field in the file descriptor, and the "size" field in inode descriptor
147 * are not modified by this function.
[1]148 *******************************************************************************************
[610]149 * @ mapper_xp    : extended pointer on mapper.
[265]150 * @ to_buffer    : mapper -> buffer if true / buffer -> mapper if false.
[23]151 * @ file_offset  : first byte to move in file.
[407]152 * @ u_buf        : user space pointer on user buffer.
[23]153 * @ size         : number of bytes to move.
[606]154 * returns O if success / returns -1 if error.
[1]155 ******************************************************************************************/
[614]156error_t mapper_move_user( xptr_t     mapper_xp,
[313]157                          bool_t     to_buffer,
158                          uint32_t   file_offset,
[407]159                          void     * u_buf,
[313]160                          uint32_t   size );
161
[606]162/********************************************************************************************
[625]163 * This function move data between a remote mapper, identified by the <mapper_xp> argument,
164 * and a localised remote kernel buffer. It can be called by a thread running any cluster.
[313]165 * If required, the data transfer is split in "fragments", where one fragment contains
166 * contiguous bytes in the same mapper page.
167 * It uses a "remote_memcpy" to move a fragment to/from the kernel buffer.
168 * In case of write, the dirty bit is set for all pages written in the mapper.
169 *******************************************************************************************
[606]170 * @ mapper_xp    : extended pointer on mapper.
[313]171 * @ to_buffer    : mapper -> buffer if true / buffer -> mapper if false.
172 * @ file_offset  : first byte to move in file.
173 * @ buffer_xp    : extended pointer on kernel buffer.
174 * @ size         : number of bytes to move.
[606]175 * returns O if success / returns -1 if error.
[313]176 ******************************************************************************************/
[606]177error_t mapper_move_kernel( xptr_t     mapper_xp,
[265]178                            bool_t     to_buffer,
179                            uint32_t   file_offset,
[313]180                            xptr_t     buffer_xp,
[265]181                            uint32_t   size );
[1]182
183/*******************************************************************************************
[611]184 * This function returns an extended pointer on a page descriptor.
185 * The - possibly remote - mapper is identified by the <mapper_xp> argument.
186 * The page is identified by <page_id> argument (page index in the file).
187 * It can be executed by a thread running in any cluster, as it uses remote
[606]188 * access primitives to scan the mapper.
189 * In case of miss, this function takes the mapper lock in WRITE_MODE, and call the
190 * mapper_handle_miss() to load the missing page from device to mapper, using an RPC
191 * when the mapper is remote.
192 *******************************************************************************************
193 * @ mapper_xp  : extended pointer on the mapper.
194 * @ page_id    : page index in file
[611]195 * @ returns extended pointer on page descriptor if success / return XPTR_NULL if error.
[606]196 ******************************************************************************************/
197xptr_t mapper_remote_get_page( xptr_t    mapper_xp,
198                               uint32_t  page_id );
199
200/*******************************************************************************************
201 * This function allows to read a single word in a mapper seen as and array of uint32_t.
[611]202 * It has bee designed to support remote access to the FAT mapper of the FATFS.
[606]203 * It can be called by any thread running in any cluster.
[1]204 * In case of miss, it takes the mapper lock in WRITE_MODE, load the missing
[606]205 * page from device to mapper, and release the mapper lock.
[1]206 *******************************************************************************************
[606]207 * @ mapper_xp  : [in]  extended pointer on the mapper.
[628]208 * @ page_id    : [in]  page index in mapper.
209 * @ word_id    : [in]  32 bits word index in page.
210 * @ value      : [out] local pointer on destination buffer.
[606]211 * @ returns 0 if success / return -1 if error.
[1]212 ******************************************************************************************/
[606]213error_t mapper_remote_get_32( xptr_t     mapper_xp,
[628]214                              uint32_t   page_id,
[606]215                              uint32_t   word_id,
[628]216                              uint32_t * value );
[1]217
[606]218/*******************************************************************************************
219 * This function allows to write a single word to a mapper seen as and array of uint32_t.
[625]220 * It has been designed to support remote access to the FAT mapper of the FATFS.
[606]221 * It can be called by any thread running in any cluster.
222 * In case of miss, it takes the mapper lock in WRITE_MODE, load the missing
223 * page from device to mapper, and release the mapper lock.
[628]224 * It does not update the FAT on IOC device.
[606]225 *******************************************************************************************
226 * @ mapper_xp  : [in]  extended pointer on the mapper.
[628]227 * @ page_id    : [in]  page index in mapper.
228 * @ word_id    : [in]  32 bits word index in page.
[611]229 * @ value      : [in]  value to be written.
[606]230 * @ returns 0 if success / return -1 if error.
231 ******************************************************************************************/
232error_t mapper_remote_set_32( xptr_t     mapper_xp,
[628]233                              uint32_t   page_id,
[606]234                              uint32_t   word_id,
235                              uint32_t   value );
[18]236
[611]237/*******************************************************************************************
[626]238 * This function scan all pages present in the mapper identified by the <mapper> argument,
[635]239 * and synchronize all pages marked as "dirty" on disk.
[623]240 * These pages are unmarked and removed from the local PPM dirty_list.
241 * This function must be called by a local thread running in same cluster as the mapper.
242 * A remote thread must call the RPC_MAPPER_SYNC function.
243 *******************************************************************************************
244 * @ mapper     : [in]  local pointer on local mapper.
245 * @ returns 0 if success / return -1 if error.
246 ******************************************************************************************/
247error_t mapper_sync( mapper_t *  mapper );
248
249/*******************************************************************************************
[611]250 * This debug function displays the content of a given page of a given mapper.
251 * - the mapper is identified by the <mapper_xp> argument.
252 * - the page is identified by the <page_id> argument.
253 * - the number of bytes to display in page is defined by the <nbytes> argument.
254 * The format is eigth (32 bits) words per line in hexadecimal.
255 * It can be called by any thread running in any cluster.
256 * In case of miss in mapper, it load the missing page from device to mapper.
257 *******************************************************************************************
258 * @ mapper_xp  : [in]  extended pointer on the mapper.
259 * @ page_id    : [in]  page index in file.
260 * @ nbytes     : [in]  value to be written.
261 * @ returns 0 if success / return -1 if error.
262 ******************************************************************************************/
263error_t mapper_display_page( xptr_t     mapper_xp,
264                             uint32_t   page_id,
[614]265                             uint32_t   nbytes );
[611]266
267
[1]268#endif /* _MAPPER_H_ */
Note: See TracBrowser for help on using the repository browser.