source: trunk/kernel/libk/user_dir.c @ 639

Last change on this file since 639 was 635, checked in by alain, 6 years ago

This version is a major evolution: The physical memory allocators,
defined in the kmem.c, ppm.c, and kcm.c files have been modified
to support remote accesses. The RPCs that were previously user
to allocate physical memory in a remote cluster have been removed.
This has been done to cure a dead-lock in case of concurrent page-faults.

This version 2.2 has been tested on a (4 clusters / 2 cores per cluster)
TSAR architecture, for both the "sort" and the "fft" applications.

File size: 17.8 KB
RevLine 
[613]1/*
2 * user_dir.c - kernel DIR related operations implementation.
3 *
[629]4 * Authors   Alain   Greiner (2016,2017,2018,2019)
[613]5 *
6 * Copyright (c) UPMC Sorbonne Universites
7 *
8 * This file is part of ALMOS-MKH.
9 *
10 * ALMOS-MKH is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; version 2.0 of the License.
13 *
14 * ALMOS-MKH is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with ALMOS-MKH; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24#include <kernel_config.h>
25#include <hal_kernel_types.h>
26#include <hal_irqmask.h>
27#include <hal_remote.h>
28#include <thread.h>
29#include <xlist.h>
30#include <scheduler.h>
31#include <remote_queuelock.h>
32#include <user_dir.h>
33
34
35/////////////////////////////////////////////
36xptr_t user_dir_from_ident( intptr_t  ident )
37{
38    // get pointer on local process_descriptor
39    process_t * process = CURRENT_THREAD->process;
40
41    // get pointers on reference process
42    xptr_t      ref_xp  = process->ref_xp;
43    cxy_t       ref_cxy = GET_CXY( ref_xp );
44    process_t * ref_ptr = GET_PTR( ref_xp );
45
46    // get extended pointers on open directories list and lock 
47    xptr_t root_xp = XPTR( ref_cxy , &ref_ptr->dir_root );
48    xptr_t lock_xp = XPTR( ref_cxy , &ref_ptr->dir_lock );
49
50    // get lock protecting open directories list
51    remote_queuelock_acquire( lock_xp );
52 
53    // scan reference process dir list
54    xptr_t           iter_xp;
55    xptr_t           dir_xp;
56    cxy_t            dir_cxy;
57    user_dir_t     * dir_ptr;
58    intptr_t         current;
59    bool_t           found = false;
60           
61    XLIST_FOREACH( root_xp , iter_xp )
62    {
63        dir_xp  = XLIST_ELEMENT( iter_xp , user_dir_t , list );
64        dir_cxy = GET_CXY( dir_xp );
65        dir_ptr = GET_PTR( dir_xp );
66        current = (intptr_t)hal_remote_lpt( XPTR( dir_cxy , &dir_ptr->ident ) );   
67        if( ident == current )
68        {
69            found = true;
70            break;
71        }
72    }
73
74    // relese lock protecting open directories list
75    remote_queuelock_release( lock_xp );
76 
77    if( found == false )  return XPTR_NULL;
78    else                  return dir_xp;
79
80}  // end user_dir_from_ident()
81
[614]82//////////////////////////////////////////////////
83user_dir_t * user_dir_create( vfs_inode_t * inode,
84                              xptr_t        ref_xp )
[613]85{ 
86    user_dir_t    * dir;               // local pointer on created user_dir_t
87    vseg_t        * vseg;              // local pointer on dirent array vseg
88    uint32_t        vseg_size;         // size of vseg in bytes
89    process_t     * ref_ptr;           // local pointer on reference process
90    cxy_t           ref_cxy;           // reference process cluster identifier
[614]91    pid_t           ref_pid;           // reference process PID
[613]92    xptr_t          gpt_xp;            // extended pointer on reference process GPT
[629]93    uint32_t        attr;              // attributes for all GPT entries
[613]94    uint32_t        dirents_per_page;  // number of dirent descriptors per page
95    page_t        * page;              // local pointer on page descriptor
96    struct dirent * base;              // local pointer on physical page base
97    uint32_t        total_dirents;     // total number of dirents in dirent array
98    uint32_t        total_pages;       // total number of pages for dirent array
[629]99    vpn_t           vpn_base;          // first page in dirent array vseg
100    vpn_t           vpn;               // current page in dirent array vseg
[613]101    ppn_t           ppn;               // ppn of currently allocated physical page
102    uint32_t        entries;           // number of dirent actually comied in one page
103    uint32_t        first_entry;       // index of first dentry to copy in dirent array
104    bool_t          done;              // last entry found and copied when true
105    list_entry_t    root;              // root of temporary list of allocated pages
106    uint32_t        page_id;           // page index in list of physical pages
107    kmem_req_t      req;               // kmem request descriptor
[629]108    ppn_t           fake_ppn;          // unused, but required by hal_gptlock_pte()
109    uint32_t        fake_attr;         // unused, but required by hal_gptlock_pte()
[613]110    error_t         error;
111
[629]112    // get cluster, local pointer, and pid of reference process
[614]113    ref_cxy = GET_CXY( ref_xp );
114    ref_ptr = GET_PTR( ref_xp );
115    ref_pid = hal_remote_l32( XPTR( ref_cxy , &ref_ptr->pid ) );
[613]116
117#if DEBUG_USER_DIR
118uint32_t cycle = (uint32_t)hal_get_cycles();
119thread_t * this = CURRENT_THREAD;
120if( cycle > DEBUG_USER_DIR )
[614]121printk("\n[%s] thread[%x,%x] enter for inode (%x,%x) and process %x / cycle %d\n",
122__FUNCTION__, this->process->pid, this->trdid, local_cxy, inode, ref_pid, cycle );
[613]123#endif
124
125// check dirent size
[635]126assert( ( sizeof(struct dirent) == 64), "sizeof(dirent) must be 64\n");
[613]127
128    // compute number of dirent per page
129    dirents_per_page = CONFIG_PPM_PAGE_SIZE >> 6;
130   
131    // initialise temporary list of pages
132    list_root_init( &root );
133
134    // allocate memory for a local user_dir descriptor
[635]135    req.type  = KMEM_KCM;
136    req.order = bits_log2( sizeof(user_dir_t) );
137    req.flags = AF_ZERO | AF_KERNEL;
[613]138    dir       = kmem_alloc( &req );
139
140    if( dir == NULL )
141    {
142        printk("\n[ERROR] in %s : cannot allocate user_dir_t in cluster %x\n",
143        __FUNCTION__, local_cxy );
144        return NULL;
145    }
146
[635]147    // Build an initialize the dirent array as a list of pages.
[613]148    // For each iteration in this while loop:
149    // - allocate one physical 4 Kbytes (64 dirent slots)
150    // - call the relevant FS specific function to scan the directory mapper,
151    //   and copy up to 64 entries in the page.
152    // - register the page in a temporary list using the embedded page list_entry
153    // - exit when the last entry has been found (done == true).
154
155    // initialize loops variables
156    done          = false;
157    total_dirents = 0;
158    total_pages   = 0;
159    first_entry   = 0;
160
161    while( done == false )  // loop on physical pages
162    {
163        // allocate one physical page
[635]164        req.type  = KMEM_PPM;
165        req.order = 0;
[613]166        req.flags = AF_ZERO;
[635]167        base      = kmem_alloc( &req );
[613]168
[635]169        if( base == NULL )
[613]170        {
171            printk("\n[ERROR] in %s : cannot allocate page in cluster %x\n",
172            __FUNCTION__, ref_cxy );
173            goto user_dir_create_failure;
174        }
175
176        // call the relevant FS specific function to copy up to 64 dirents in page
177        error = vfs_fs_get_user_dir( inode,
178                                     base,
179                                     dirents_per_page,
180                                     first_entry,
181                                     false,        // don't create missing inodes
182                                     &entries,
183                                     &done );
184        if( error )
185        {
186            printk("\n[ERROR] in %s : cannot initialise dirent array in cluster %x\n",
187            __FUNCTION__, ref_cxy );
188            goto user_dir_create_failure;
189        }
190
191        // increment number of written dirents
192        total_dirents += entries;
193
[635]194        // get page descriptor pointer from base
195        page = GET_PTR( ppm_base2page( XPTR( local_cxy , base ) ) );
196
[613]197        // register page in temporary list
198        list_add_last( &root , &page->list ); 
199        total_pages++; 
200
201        // set first_entry for next iteration
202        first_entry = total_dirents;
203
204    } // end while
205       
[614]206#if DEBUG_USER_DIR
207if( cycle > DEBUG_USER_DIR )
208printk("\n[%s] thread[%x,%x] initialised dirent array / %d entries\n",
209__FUNCTION__, this->process->pid, this->trdid, total_dirents, cycle );
210#endif
211
[613]212    // compute required vseg size for a 64 bytes dirent
213    vseg_size = total_dirents << 6;
214
215    // create an ANON vseg and register it in reference process VSL
216    if( local_cxy == ref_cxy )
217    {
[614]218        vseg = vmm_create_vseg( ref_ptr,
[613]219                                VSEG_TYPE_ANON,
220                                0,                      // vseg base (unused)
221                                vseg_size,
222                                0,                      // file offset (unused)
223                                0,                      // file_size (unused)
224                                XPTR_NULL,              // mapper (unused)
[614]225                                local_cxy );
[613]226    }
227    else
228    {
229        rpc_vmm_create_vseg_client( ref_cxy,
230                                    ref_ptr,
231                                    VSEG_TYPE_ANON,
232                                    0,                     // vseg base (unused)
233                                    vseg_size,
234                                    0,                     // file offset (unused)
235                                    0,                     // file size (unused)
236                                    XPTR_NULL,             // mapper (unused)
[614]237                                    local_cxy,
[613]238                                    &vseg ); 
239    }
[614]240
[613]241    if( vseg == NULL )
242    {
[614]243        printk("\n[ERROR] in %s : cannot create vseg for user_dir in cluster %x\n",
[613]244        __FUNCTION__, ref_cxy);
245        goto user_dir_create_failure;
246    }
247
[614]248#if DEBUG_USER_DIR
[613]249if( cycle > DEBUG_USER_DIR )
250printk("\n[%s] thread[%x,%x] allocated vseg ANON / base %x / size %x\n",
[614]251__FUNCTION__, this->process->pid, this->trdid, vseg->min, vseg->max - vseg->min );
[613]252#endif
253
254// check vseg size
255assert( (total_pages == hal_remote_l32( XPTR( ref_cxy , &vseg->vpn_size ) ) ),
256"unconsistent vseg size for dirent array" );
257
[629]258    // build extended pointer on reference process GPT
[613]259    gpt_xp         = XPTR( ref_cxy , &ref_ptr->vmm.gpt );
260
[629]261    // build PTE attributes
262    attr = GPT_MAPPED   |
263           GPT_SMALL    |
264           GPT_READABLE |
265           GPT_CACHABLE |
266           GPT_USER     ;
267
[613]268    // get first vpn from vseg descriptor
[629]269    vpn_base = hal_remote_l32( XPTR( ref_cxy , &vseg->vpn_base ) );
[613]270
271    // scan the list of allocated physical pages to map
[629]272    // all physical pages in the reference process GPT
[613]273    page_id = 0;
274    while( list_is_empty( &root ) == false )
275    {
276        // get pointer on first page descriptor
277        page = LIST_FIRST( &root , page_t , list );
278
279        // compute ppn
280        ppn = ppm_page2ppn( XPTR( local_cxy , page ) );
[629]281
282        // compute vpn
283        vpn = vpn_base + page_id;
[613]284       
[629]285        // lock the PTE (and create PT2 if required)
286        error = hal_gpt_lock_pte( gpt_xp,
287                                  vpn,
288                                  &fake_attr,
289                                  &fake_ppn );
[613]290        if( error )
291        {
292            printk("\n[ERROR] in %s : cannot map vpn %x in GPT\n",
[629]293            __FUNCTION__, vpn );
[619]294
295            // delete the vseg
[629]296            if( ref_cxy == local_cxy)
297                vmm_delete_vseg( ref_pid, vpn_base << CONFIG_PPM_PAGE_SHIFT );
298            else 
299                rpc_vmm_delete_vseg_client( ref_cxy, ref_pid, vpn_base << CONFIG_PPM_PAGE_SHIFT );
[619]300
[613]301            // release the user_dir descriptor
[635]302            req.type = KMEM_KCM;
[613]303            req.ptr  = dir;
304            kmem_free( &req );
305            return NULL;
306        }
307
[629]308        // set PTE in GPT                         
309        hal_gpt_set_pte( gpt_xp,
310                         vpn,
311                         attr,
312                         ppn );
313
[614]314#if DEBUG_USER_DIR
[613]315if( cycle > DEBUG_USER_DIR )
316printk("\n[%s] thread[%x,%x] mapped vpn %x to ppn %x\n",
[614]317__FUNCTION__, this->process->pid, this->trdid, vpn + page_id, ppn );
[613]318#endif
319
320        // remove the page from temporary list
321        list_unlink( &page->list );
322
323        page_id++;
324
325    }  // end map loop
326
327// check number of pages
328assert( (page_id == total_pages) , "unconsistent pages number\n" );
329
330    // initialise user_dir_t structure
331    dir->current = 0;
332    dir->entries = total_dirents;
[629]333    dir->ident   = (intptr_t)(vpn_base << CONFIG_PPM_PAGE_SHIFT);
[613]334
335    // build extended pointers on root and lock of user_dir xlist in ref process
336    xptr_t root_xp  = XPTR( ref_cxy , &ref_ptr->dir_root );
337    xptr_t lock_xp  = XPTR( ref_cxy , &ref_ptr->dir_lock );
338
339    // build extended pointer on list field in user_dir structure
340    xptr_t entry_xp = XPTR( local_cxy , &dir->list );
341
342    // get lock protecting open directories list
343    remote_queuelock_acquire( lock_xp );
344
345    // register user_dir_t in reference process 
346    xlist_add_first( root_xp , entry_xp );
347
348    // release lock protecting  open directorie list
349    remote_queuelock_release( lock_xp );
350
351#if DEBUG_USER_DIR
352cycle = (uint32_t)hal_get_cycles();
353if( cycle > DEBUG_USER_DIR )
354printk("\n[%s] thread[%x,%x] created user_dir (%x,%x) / %d entries / cycle %d\n",
[614]355__FUNCTION__, this->process->pid, this->trdid, local_cxy, dir, total_dirents, cycle );
[613]356#endif
357
358    return dir;
359
360user_dir_create_failure:
361
362    // release local user_dir_t structure
[635]363    req.type = KMEM_KCM;
[613]364    req.ptr  = dir;
365    kmem_free( &req );
366
367    // release local physical pages
368    while( list_is_empty( &root ) == false )
369    {
370        page = LIST_FIRST( &root , page_t , list );
[635]371
372        // get base from page descriptor pointer
373        base = GET_PTR( ppm_page2base( XPTR( local_cxy , page ) ) );
374 
375        req.type  = KMEM_PPM;
376        req.ptr   = base;
[613]377        kmem_free( &req );
378    }
379
380    return NULL;
381
382}  // end user_dir_create()
383
[614]384////////////////////////////////////////
385void user_dir_destroy( user_dir_t * dir,
386                       xptr_t       ref_xp )
[613]387{
[614]388    thread_t     * this;       // local pointer on calling thread
[613]389    cluster_t    * cluster;    // local pointer on local cluster
390    intptr_t       ident;      // user pointer on dirent array
[614]391    xptr_t         ref_pid;    // reference process PID
[613]392    cxy_t          ref_cxy;    // reference process cluster identifier
393    process_t    * ref_ptr;    // local pointer on reference process
394    xptr_t         root_xp;    // root of xlist
395    xptr_t         lock_xp;    // extended pointer on lock protecting xlist
396    xptr_t         iter_xp;    // iteratot in xlist
397    reg_t          save_sr;    // for critical section
398    cxy_t          owner_cxy;  // owner process cluster
399    lpid_t         lpid;       // process local index
400    rpc_desc_t     rpc;        // rpc descriptor
[619]401    uint32_t       responses;  // response counter
[613]402     
403    this    = CURRENT_THREAD;
404    cluster = LOCAL_CLUSTER;
405
[614]406    // get cluster, local pointer, and PID of reference user process
407    ref_cxy = GET_CXY( ref_xp );
408    ref_ptr = GET_PTR( ref_xp );
409    ref_pid = hal_remote_l32( XPTR( ref_cxy , &ref_ptr->pid ) );
410
[613]411#if DEBUG_USER_DIR
412uint32_t cycle = (uint32_t)hal_get_cycles();
413if( cycle > DEBUG_USER_DIR )
[614]414printk("\n[%s] thread[%x,%x] enter for user_dir (%x,%x) and process %x / cycle %d\n",
[633]415__FUNCTION__, this->process->pid, this->trdid, local_cxy, dir, ref_pid, cycle );
[613]416#endif
417
418    // get user pointer on dirent array
419    ident = dir->ident;
420
421    // build extended pointer on lock protecting open directories list
422    lock_xp = XPTR( ref_cxy , &ref_ptr->dir_lock );
423
424    // get lock protecting open directories list
425    remote_queuelock_acquire( lock_xp );
426
427    // remove dir from reference process xlist
428    xlist_unlink( XPTR( local_cxy , &dir->list ) );
429
430    // release lock protecting open directories list
431    remote_queuelock_release( lock_xp );
432
433    // To delete all copies of the vseg containing the dirent array, the client thread
434    // send parallel RPCs to all clusters containing a client process copy (including
435    // the local cluster). It blocks and deschedules when all RPCs have been sent,
436    // to wait all RPC responses, and will be unblocked by the last RPC server thread.
437    // It allocates a - shared - RPC descriptor in the stack,  because all parallel
438    // server threads use the same input arguments, and the same response field.
439
440    // get owner cluster identifier and process lpid
[614]441    owner_cxy = CXY_FROM_PID( ref_pid );
442    lpid      = LPID_FROM_PID( ref_pid );
[613]443
444    // get root of list of copies and lock from owner cluster
445    root_xp   = XPTR( owner_cxy , &cluster->pmgr.copies_root[lpid] );
446    lock_xp   = XPTR( owner_cxy , &cluster->pmgr.copies_lock[lpid] );
447
448    // mask IRQs
449    hal_disable_irq( &save_sr);
450
451    // client thread blocks itself
452    thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_RPC );
453
[619]454    // initialize responses counter
455    responses = 0;
456
457    // initialize a shared RPC descriptor
458    // can be shared, because no out arguments
459    rpc.rsp       = &responses;
[613]460    rpc.blocking  = false;
461    rpc.index     = RPC_VMM_DELETE_VSEG;
462    rpc.thread    = this;
463    rpc.lid       = this->core->lid;
[614]464    rpc.args[0]   = ref_pid;
[613]465    rpc.args[1]   = ident;
466
467    // take the lock protecting process copies
468    remote_queuelock_acquire( lock_xp );
469
470    // scan list of process copies
471    XLIST_FOREACH( root_xp , iter_xp )
472    {
473        // get extended pointer and cluster of process
474        xptr_t      process_xp  = XLIST_ELEMENT( iter_xp , process_t , copies_list );
475        cxy_t       process_cxy = GET_CXY( process_xp );
476
477        // atomically increment responses counter
[619]478        hal_atomic_add( &responses , 1 );
[613]479
[619]480        // send RPC to target cluster 
481        rpc_send( process_cxy , &rpc );
482    }
[613]483
484    // release the lock protecting process copies
485    remote_queuelock_release( lock_xp );
486
487    // client thread deschedule
[619]488    sched_yield("blocked on rpc_vmm_delete_vseg");
[613]489 
490    // restore IRQs
491    hal_restore_irq( save_sr);
492
493    // release local user_dir_t structure
494    kmem_req_t  req;
[635]495    req.type = KMEM_KCM;
[613]496    req.ptr  = dir;
497    kmem_free( &req );
498
499#if DEBUG_USER_DIR
500cycle = (uint32_t)hal_get_cycles();
501if( cycle > DEBUG_USER_DIR )
502printk("\n[%s] thread[%x,%x] deleted user_dir (%x,%x) / cycle %d\n",
[633]503__FUNCTION__, this->process->pid, this->trdid, local_cxy, dir, cycle );
[613]504#endif
505
506}  // end user_dir_destroy()
Note: See TracBrowser for help on using the repository browser.