| 1 | /* | 
|---|
| 2 |  * mapper.h - Map memory, file or device in process virtual address space. | 
|---|
| 3 |  * | 
|---|
| 4 |  * Authors   Mohamed Lamine Karaoui (2015) | 
|---|
| 5 |  *           Alain Greiner (2016,2017,2018) | 
|---|
| 6 |  * | 
|---|
| 7 |  * Copyright (c)  UPMC Sorbonne Universites | 
|---|
| 8 |  * | 
|---|
| 9 |  * This file is part of ALMOS-MKH. | 
|---|
| 10 |  * | 
|---|
| 11 |  * ALMOS-MKH is free software; you can redistribute it and/or modify it | 
|---|
| 12 |  * under the terms of the GNU General Public License as published by | 
|---|
| 13 |  * the Free Software Foundation; version 2.0 of the License. | 
|---|
| 14 |  * | 
|---|
| 15 |  * ALMOS-MKH is distributed in the hope that it will be useful, but | 
|---|
| 16 |  * WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 17 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|---|
| 18 |  * General Public License for more details. | 
|---|
| 19 |  * | 
|---|
| 20 |  * You should have received a copy of the GNU General Public License | 
|---|
| 21 |  * along with ALMOS-MKH; if not, write to the Free Software Foundation, | 
|---|
| 22 |  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 
|---|
| 23 |  */ | 
|---|
| 24 |  | 
|---|
| 25 | #ifndef _MAPPER_H_ | 
|---|
| 26 | #define _MAPPER_H_ | 
|---|
| 27 |  | 
|---|
| 28 | #include <hal_types.h> | 
|---|
| 29 | #include <hal_atomic.h> | 
|---|
| 30 | #include <xlist.h> | 
|---|
| 31 | #include <grdxt.h> | 
|---|
| 32 | #include <rwlock.h> | 
|---|
| 33 |  | 
|---|
| 34 | /****  Forward declarations ****/ | 
|---|
| 35 |  | 
|---|
| 36 | struct page_s; | 
|---|
| 37 | struct vfs_inode_s; | 
|---|
| 38 |  | 
|---|
| 39 | /******************************************************************************************* | 
|---|
| 40 |  * The mapper implements the kernel cache for a given file or directory. | 
|---|
| 41 |  * There is one mapper per file/dir. It is implemented as a three levels radix tree, | 
|---|
| 42 |  * entirely stored in the same cluster as the inode representing the file/dir. | 
|---|
| 43 |  * - The fast retrieval key is the page index in the file. | 
|---|
| 44 |  *   The ix1_width, ix2_width, ix3_width sub-indexes are configuration parameters. | 
|---|
| 45 |  * - The leaves are pointers on physical page descriptors, dynamically allocated | 
|---|
| 46 |  *   in the local cluster. | 
|---|
| 47 |  * - In a given cluster, a mapper is a "private" structure: a thread accessing the mapper | 
|---|
| 48 |  *   must be running in the cluster containing it (can be a local thread or a RPC thread). | 
|---|
| 49 |  * - The mapper is protected by a blocking "rwlock", to support several simultaneous | 
|---|
| 50 |  *   readers, and only one writer. This lock implement a busy waiting policy. | 
|---|
| 51 |  * - The mapper_get_page() function that return a page descriptor pointer from a page | 
|---|
| 52 |  *   index in file is in charge of handling the miss on the mapper cache. | 
|---|
| 53 |  * - The vfs_mapper_move_page() function access the file system to handle a mapper miss, | 
|---|
| 54 |  *   or update a dirty page on device. | 
|---|
| 55 |  * - The vfs_mapper_load_all() functions is used to load all pages of a given file  | 
|---|
| 56 |  *   or directory into the mapper. | 
|---|
| 57 |  * - the mapper_move_user() function is used to move data to or from an user buffer. | 
|---|
| 58 |  *   This user space buffer can be physically distributed in several clusters. | 
|---|
| 59 |  * - the mapper_move_kernel() function is used to move data to or from a remote kernel | 
|---|
| 60 |  *   buffer, that can be physically located in any cluster. | 
|---|
| 61 |  * - In the present implementation the cache size for a given file increases on demand, | 
|---|
| 62 |  *   and the  allocated memory is only released when the mapper/inode is destroyed. | 
|---|
| 63 |  ******************************************************************************************/ | 
|---|
| 64 |  | 
|---|
| 65 |  | 
|---|
| 66 | /******************************************************************************************* | 
|---|
| 67 |  * This structure defines the mapper descriptor. | 
|---|
| 68 |  ******************************************************************************************/ | 
|---|
| 69 |  | 
|---|
| 70 | typedef struct mapper_s | 
|---|
| 71 | { | 
|---|
| 72 |         struct vfs_inode_s * inode;           /*! owner inode                                     */ | 
|---|
| 73 |     uint32_t             type;        /*! file system type                                */ | 
|---|
| 74 |         grdxt_t              radix;           /*! pages cache implemented as a radix tree         */ | 
|---|
| 75 |         rwlock_t             lock;        /*! several readers / only one writer               */ | 
|---|
| 76 |         uint32_t                 refcount;    /*! several vsegs can refer the same file           */ | 
|---|
| 77 |         xlist_entry_t        vsegs_root;  /*! root of list of vsegs refering this mapper      */ | 
|---|
| 78 |         xlist_entry_t        wait_root;   /*! root of list of threads waiting on mapper       */ | 
|---|
| 79 |     list_entry_t         dirty_root;  /*! root of list of dirty pages                     */ | 
|---|
| 80 | } | 
|---|
| 81 | mapper_t; | 
|---|
| 82 |  | 
|---|
| 83 | /******************************************************************************************* | 
|---|
| 84 |  * This structure defines a "fragment". It is used to move data between the kernel mapper, | 
|---|
| 85 |  * and an user buffer, that can be split in several distributed physical pages located | 
|---|
| 86 |  * in different clusters. A fragment is a set of contiguous bytes in the file. | 
|---|
| 87 |  * - It can be stored in one single physical page in the user buffer. | 
|---|
| 88 |  * - It can spread two successive physical pages in the kernel mapper. | 
|---|
| 89 |  ******************************************************************************************/ | 
|---|
| 90 |  | 
|---|
| 91 | typedef struct fragment_s | 
|---|
| 92 | { | 
|---|
| 93 |     uint32_t    file_offset;         /*! offset of fragment in file (i.e. in mapper)      */ | 
|---|
| 94 |     uint32_t    size;                /*! number of bytes in fragment                      */ | 
|---|
| 95 |     cxy_t       buf_cxy;             /*! user buffer cluster identifier                   */ | 
|---|
| 96 |     void      * buf_ptr;             /*! local pointer on first byte in user buffer       */ | 
|---|
| 97 | } | 
|---|
| 98 | fragment_t; | 
|---|
| 99 |  | 
|---|
| 100 | /******************************************************************************************* | 
|---|
| 101 |  * This function allocates physical memory for a mapper descriptor, and initializes it | 
|---|
| 102 |  * (refcount <= 0) / inode <= NULL). | 
|---|
| 103 |  * It must be executed by a thread running in the cluster containing the mapper. | 
|---|
| 104 |  ******************************************************************************************* | 
|---|
| 105 |  * @ return pointer on created mapper if success / return NULL if no memory | 
|---|
| 106 |  ******************************************************************************************/ | 
|---|
| 107 | mapper_t * mapper_create(); | 
|---|
| 108 |  | 
|---|
| 109 | /******************************************************************************************* | 
|---|
| 110 |  * This function releases all physical pages allocated for the mapper. | 
|---|
| 111 |  * It synchronizes all dirty pages (i.e. update the file on disk) if required. | 
|---|
| 112 |  * The mapper descriptor and the radix tree themselves are released. | 
|---|
| 113 |  * It must be executed by a thread running in the cluster containing the mapper. | 
|---|
| 114 |  ******************************************************************************************* | 
|---|
| 115 |  * @ mapper      : target mapper. | 
|---|
| 116 |  * @ return 0 if success / return EIO if a dirty page cannot be updated on device. | 
|---|
| 117 |  ******************************************************************************************/ | 
|---|
| 118 | error_t mapper_destroy( mapper_t * mapper ); | 
|---|
| 119 |  | 
|---|
| 120 | /******************************************************************************************* | 
|---|
| 121 |  * This function move data between a mapper and a - possibly distributed - user buffer. | 
|---|
| 122 |  * It must be called by a thread running in the cluster containing the mapper. | 
|---|
| 123 |  * It is called by the vfs_user_move() function to implement sys_read() and sys_write(). | 
|---|
| 124 |  * If required, the data transfer is split in "fragments", where one fragment contains  | 
|---|
| 125 |  * contiguous bytes in the same mapper page. | 
|---|
| 126 |  * It uses "hal_uspace" accesses to move a fragment to/from the user buffer. | 
|---|
| 127 |  * In case of write, the dirty bit is set for all pages written in the mapper. | 
|---|
| 128 |  * The offset in the file descriptor is not modified by this function. | 
|---|
| 129 |  ******************************************************************************************* | 
|---|
| 130 |  * @ mapper       : local pointer on mapper. | 
|---|
| 131 |  * @ to_buffer    : mapper -> buffer if true / buffer -> mapper if false. | 
|---|
| 132 |  * @ file_offset  : first byte to move in file. | 
|---|
| 133 |  * @ u_buf        : user space pointer on user buffer. | 
|---|
| 134 |  * @ size         : number of bytes to move. | 
|---|
| 135 |  * returns O if success / returns EINVAL if error. | 
|---|
| 136 |  ******************************************************************************************/ | 
|---|
| 137 | error_t mapper_move_user( mapper_t * mapper, | 
|---|
| 138 |                           bool_t     to_buffer, | 
|---|
| 139 |                           uint32_t   file_offset, | 
|---|
| 140 |                           void     * u_buf, | 
|---|
| 141 |                           uint32_t   size ); | 
|---|
| 142 |  | 
|---|
| 143 | /******************************************************************************************* | 
|---|
| 144 |  * This function move data between a mapper and a remote kernel buffer. | 
|---|
| 145 |  * It must be called by a thread running in the cluster containing the mapper. | 
|---|
| 146 |  * If required, the data transfer is split in "fragments", where one fragment contains  | 
|---|
| 147 |  * contiguous bytes in the same mapper page. | 
|---|
| 148 |  * It uses a "remote_memcpy" to move a fragment to/from the kernel buffer. | 
|---|
| 149 |  * In case of write, the dirty bit is set for all pages written in the mapper. | 
|---|
| 150 |  * The offset in the file descriptor is not modified by this function. | 
|---|
| 151 |  ******************************************************************************************* | 
|---|
| 152 |  * @ mapper       : local pointer on mapper. | 
|---|
| 153 |  * @ to_buffer    : mapper -> buffer if true / buffer -> mapper if false. | 
|---|
| 154 |  * @ file_offset  : first byte to move in file. | 
|---|
| 155 |  * @ buffer_xp    : extended pointer on kernel buffer. | 
|---|
| 156 |  * @ size         : number of bytes to move. | 
|---|
| 157 |  * returns O if success / returns EINVAL if error. | 
|---|
| 158 |  ******************************************************************************************/ | 
|---|
| 159 | error_t mapper_move_kernel( mapper_t * mapper, | 
|---|
| 160 |                             bool_t     to_buffer, | 
|---|
| 161 |                             uint32_t   file_offset, | 
|---|
| 162 |                             xptr_t     buffer_xp, | 
|---|
| 163 |                             uint32_t   size ); | 
|---|
| 164 |  | 
|---|
| 165 |  | 
|---|
| 166 | /******************************************************************************************* | 
|---|
| 167 |  * This function removes a physical page from the mapper, update the FS if the page | 
|---|
| 168 |  * is dirty, and releases the page to PPM. It is called by the mapper_destroy() function. | 
|---|
| 169 |  * It must be executed by a thread running in the cluster containing the mapper. | 
|---|
| 170 |  * It takes both the page lock and the mapper lock in WRITE_MODE to release the page. | 
|---|
| 171 |  ******************************************************************************************* | 
|---|
| 172 |  * @ mapper     : local pointer on the mapper. | 
|---|
| 173 |  * @ page       : pointer on page to remove. | 
|---|
| 174 |  * @ return 0 if success / return EIO if a dirty page cannot be copied to FS. | 
|---|
| 175 |  ******************************************************************************************/ | 
|---|
| 176 | error_t mapper_release_page( mapper_t      * mapper, | 
|---|
| 177 |                              struct page_s * page ); | 
|---|
| 178 |  | 
|---|
| 179 | /******************************************************************************************* | 
|---|
| 180 |  * This function searches a physical page descriptor from its index in mapper. | 
|---|
| 181 |  * It must be executed by a thread running in the cluster containing the mapper. | 
|---|
| 182 |  * In case of miss, it takes the mapper lock in WRITE_MODE, load the missing | 
|---|
| 183 |  * page from device to the mapper, and release the mapper lock. | 
|---|
| 184 |  ******************************************************************************************* | 
|---|
| 185 |  * @ mapper     : local pointer on the mapper. | 
|---|
| 186 |  * @ index      : page index in file | 
|---|
| 187 |  * @ returns pointer on page descriptor if success / return NULL if error. | 
|---|
| 188 |  ******************************************************************************************/ | 
|---|
| 189 | struct page_s * mapper_get_page( mapper_t * mapper, | 
|---|
| 190 |                                  uint32_t   index ); | 
|---|
| 191 |  | 
|---|
| 192 |   | 
|---|
| 193 |  | 
|---|
| 194 | #endif /* _MAPPER_H_ */ | 
|---|