/*
 * vmm.c - virtual memory manager related operations interface.
 *
 * Authors   Ghassan Almaless (2008,2009,2010,2011, 2012)
 *           Mohamed Lamine Karaoui (2015)
 *           Alain Greiner (2016)
 *
 * Copyright (c) UPMC Sorbonne Universites
 *
 * This file is part of ALMOS-MKH.
 *
 * ALMOS-MKH is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2.0 of the License.
 *
 * ALMOS-MKH is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with ALMOS-MKH; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include <kernel_config.h>
#include <hal_kernel_types.h>
#include <hal_special.h>
#include <hal_gpt.h>
#include <hal_vmm.h>
#include <printk.h>
#include <memcpy.h>
#include <rwlock.h>
#include <list.h>
#include <xlist.h>
#include <bits.h>
#include <process.h>
#include <thread.h>
#include <vseg.h>
#include <cluster.h>
#include <scheduler.h>
#include <vfs.h>
#include <mapper.h>
#include <page.h>
#include <kmem.h>
#include <vmm.h>
#include <cluster_info.h>

//////////////////////////////////////////////////////////////////////////////////
//   Extern global variables
//////////////////////////////////////////////////////////////////////////////////

extern  process_t  process_zero;   // defined in cluster.c file


///////////////////////////////////////
error_t vmm_init( process_t * process )
{
    error_t   error;
    vseg_t  * vseg_kentry;
    vseg_t  * vseg_args;
    vseg_t  * vseg_envs;
    intptr_t  base;
    intptr_t  size;

#if DEBUG_VMM_INIT
uint32_t cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_INIT )
printk("\n[DBG] %s : thread %x enter for process %x / cycle %d\n", 
__FUNCTION__ , CURRENT_THREAD , process->pid , cycle );
#endif

    // get pointer on VMM
    vmm_t   * vmm = &process->vmm;

    // initialize local list of vsegs 
    vmm->vsegs_nr = 0;
	xlist_root_init( XPTR( local_cxy , &vmm->vsegs_root ) );
	remote_rwlock_init( XPTR( local_cxy , &vmm->vsegs_lock ) );

    assert( ((CONFIG_VMM_KENTRY_SIZE + CONFIG_VMM_ARGS_SIZE + CONFIG_VMM_ENVS_SIZE) 
            <= CONFIG_VMM_ELF_BASE) , "UTILS zone too small\n" );

    assert( (CONFIG_THREAD_MAX_PER_CLUSTER <= 32) ,
            "no more than 32 threads per cluster for a single process\n");

    assert( ((CONFIG_VMM_STACK_SIZE * CONFIG_THREAD_MAX_PER_CLUSTER) <=
             (CONFIG_VMM_VSPACE_SIZE - CONFIG_VMM_STACK_BASE)) ,
             "STACK zone too small\n");

    // register kentry vseg in VSL
    base = CONFIG_VMM_KENTRY_BASE << CONFIG_PPM_PAGE_SHIFT;
    size = CONFIG_VMM_KENTRY_SIZE << CONFIG_PPM_PAGE_SHIFT;

    vseg_kentry = vmm_create_vseg( process,
                                   VSEG_TYPE_CODE,
                                   base,
                                   size,
                                   0,             // file_offset unused
                                   0,             // file_size unused
                                   XPTR_NULL,     // mapper_xp unused
                                   local_cxy );

    if( vseg_kentry == NULL )
    {
        printk("\n[ERROR] in %s : cannot register kentry vseg\n", __FUNCTION__ );
        return -1;
    }

    vmm->kent_vpn_base = base;

    // register args vseg in VSL
    base = (CONFIG_VMM_KENTRY_BASE + 
            CONFIG_VMM_KENTRY_SIZE ) << CONFIG_PPM_PAGE_SHIFT;
    size = CONFIG_VMM_ARGS_SIZE << CONFIG_PPM_PAGE_SHIFT;

    vseg_args = vmm_create_vseg( process,
                                 VSEG_TYPE_DATA,
                                 base,
                                 size,
                                 0,             // file_offset unused
                                 0,             // file_size unused
                                 XPTR_NULL,     // mapper_xp unused
                                 local_cxy );

    if( vseg_args == NULL )
    {
        printk("\n[ERROR] in %s : cannot register args vseg\n", __FUNCTION__ );
        return -1;
    }

    vmm->args_vpn_base = base;

    // register the envs vseg in VSL
    base = (CONFIG_VMM_KENTRY_BASE + 
            CONFIG_VMM_KENTRY_SIZE +
            CONFIG_VMM_ARGS_SIZE   ) << CONFIG_PPM_PAGE_SHIFT;
    size = CONFIG_VMM_ENVS_SIZE << CONFIG_PPM_PAGE_SHIFT;

    vseg_envs = vmm_create_vseg( process,
                                 VSEG_TYPE_DATA,
                                 base,
                                 size,
                                 0,             // file_offset unused
                                 0,             // file_size unused
                                 XPTR_NULL,     // mapper_xp unused
                                 local_cxy );

    if( vseg_envs == NULL )
    {
        printk("\n[ERROR] in %s : cannot register envs vseg\n", __FUNCTION__ );
        return -1;
    }

    vmm->envs_vpn_base = base;

    // create GPT (empty)
    error = hal_gpt_create( &vmm->gpt );

    if( error ) 
    printk("\n[ERROR] in %s : cannot create GPT\n", __FUNCTION__ );

    // initialize GPT (architecture specic)
    // (For TSAR, identity map the kentry_vseg)
    error = hal_vmm_init( vmm );

    if( error ) 
    printk("\n[ERROR] in %s : cannot initialize GPT\n", __FUNCTION__ );

    // initialize STACK allocator
    vmm->stack_mgr.bitmap   = 0;
    vmm->stack_mgr.vpn_base = CONFIG_VMM_STACK_BASE;
    spinlock_init( &vmm->stack_mgr.lock );

    // initialize MMAP allocator
    vmm->mmap_mgr.vpn_base        = CONFIG_VMM_HEAP_BASE;
    vmm->mmap_mgr.vpn_size        = CONFIG_VMM_STACK_BASE - CONFIG_VMM_HEAP_BASE;
    vmm->mmap_mgr.first_free_vpn  = CONFIG_VMM_HEAP_BASE;
    spinlock_init( &vmm->mmap_mgr.lock );

    uint32_t i;
    for( i = 0 ; i < 32 ; i++ ) list_root_init( &vmm->mmap_mgr.zombi_list[i] );

    // initialize instrumentation counters
	vmm->pgfault_nr = 0;

    hal_fence();

#if DEBUG_VMM_INIT
cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_INIT )
printk("\n[DBG] %s : thread %x exit for process %x / entry_point = %x / cycle %d\n", 
__FUNCTION__ , CURRENT_THREAD , process->pid , process->vmm.entry_point , cycle );
#endif

    return 0;

}  // end vmm_init()

//////////////////////////////////////
void vmm_display( process_t * process,
                  bool_t      mapping )
{
    vmm_t * vmm = &process->vmm;
    gpt_t * gpt = &vmm->gpt;

    printk("\n***** VSL and GPT(%x) for process %x in cluster %x\n\n",
    process->vmm.gpt.ptr , process->pid , local_cxy );

    // get lock protecting the vseg list
    remote_rwlock_rd_lock( XPTR( local_cxy , &vmm->vsegs_lock ) );

    // scan the list of vsegs
    xptr_t         root_xp = XPTR( local_cxy , &vmm->vsegs_root );
    xptr_t         iter_xp;
    xptr_t         vseg_xp;
    vseg_t       * vseg;
    XLIST_FOREACH( root_xp , iter_xp )
    {
        vseg_xp = XLIST_ELEMENT( iter_xp , vseg_t , xlist );
        vseg    = GET_PTR( vseg_xp );

        printk(" - %s : base = %X / size = %X / npages = %d\n",
        vseg_type_str( vseg->type ) , vseg->min , vseg->max - vseg->min , vseg->vpn_size );

        if( mapping )
        {
            vpn_t    vpn;
            ppn_t    ppn;
            uint32_t attr;
            vpn_t    base = vseg->vpn_base;
            vpn_t    size = vseg->vpn_size;
            for( vpn = base ; vpn < (base+size) ; vpn++ )
            {
                hal_gpt_get_pte( gpt , vpn , &attr , &ppn );
                if( attr & GPT_MAPPED )
                {
                    printk("    . vpn = %X / attr = %X / ppn = %X\n", vpn , attr , ppn );
                }
            }
        }
    }

    // release the lock
    remote_rwlock_rd_unlock( XPTR( local_cxy , &vmm->vsegs_lock ) );

}  // vmm_display()

/////////////////////i//////////////////////////
void vmm_global_update_pte( process_t * process,
                            vpn_t       vpn,
                            uint32_t    attr,
                            ppn_t       ppn )
{

    xlist_entry_t * process_root_ptr;
    xptr_t          process_root_xp;
    xptr_t          process_iter_xp;

    xptr_t          remote_process_xp;
    cxy_t           remote_process_cxy;
    process_t     * remote_process_ptr;
    xptr_t          remote_gpt_xp;

    pid_t           pid;
    cxy_t           owner_cxy;
    lpid_t          owner_lpid;

#if DEBUG_VMM_UPDATE_PTE
uint32_t cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_UPDATE_PTE < cycle )
printk("\n[DBG] %s : thread %x enter for process %x / vpn %x / cycle %d\n",
__FUNCTION__ , CURRENT_THREAD , process->pid , vpn , cycle );
#endif

    // check cluster is reference
    assert( (GET_CXY( process->ref_xp ) == local_cxy) ,
    "not called in reference cluster\n");

    // get extended pointer on root of process copies xlist in owner cluster
    pid              = process->pid;
    owner_cxy        = CXY_FROM_PID( pid );
    owner_lpid       = LPID_FROM_PID( pid );
    process_root_ptr = &LOCAL_CLUSTER->pmgr.copies_root[owner_lpid];
    process_root_xp  = XPTR( owner_cxy , process_root_ptr );

    // loop on destination process copies
    XLIST_FOREACH( process_root_xp , process_iter_xp )
    {
        // get cluster and local pointer on remote process
        remote_process_xp  = XLIST_ELEMENT( process_iter_xp , process_t , copies_list );
        remote_process_ptr = GET_PTR( remote_process_xp );
        remote_process_cxy = GET_CXY( remote_process_xp );

#if (DEBUG_VMM_UPDATE_PTE & 0x1)
if( DEBUG_VMM_UPDATE_PTE < cycle )
printk("\n[DBG] %s : thread %x handling process %x in cluster %x\n",
__FUNCTION__ , CURRENT_THREAD , process->pid , remote_process_cxy );
#endif

        // get extended pointer on remote gpt
        remote_gpt_xp = XPTR( remote_process_cxy , &remote_process_ptr->vmm.gpt );

        // update remote GPT
        hal_gpt_update_pte( remote_gpt_xp, vpn, attr, ppn );
    }  

#if DEBUG_VMM_UPDATE_PTE
cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_UPDATE_PTE < cycle )
printk("\n[DBG] %s : thread %x exit for process %x / vpn %x / cycle %d\n",
__FUNCTION__ , CURRENT_THREAD , process->pid , vpn , cycle );
#endif

}  // end vmm_global_update_pte()

///////////////////////////////////////
void vmm_set_cow( process_t * process )
{
    vmm_t         * vmm;

    xlist_entry_t * process_root_ptr;
    xptr_t          process_root_xp;
    xptr_t          process_iter_xp;

    xptr_t          remote_process_xp;
    cxy_t           remote_process_cxy;
    process_t     * remote_process_ptr;
    xptr_t          remote_gpt_xp;

    xptr_t          vseg_root_xp;
    xptr_t          vseg_iter_xp;

    xptr_t          vseg_xp;
    vseg_t        * vseg;

    pid_t           pid;
    cxy_t           owner_cxy;
    lpid_t          owner_lpid;

#if DEBUG_VMM_SET_COW
uint32_t cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_SET_COW < cycle )
printk("\n[DBG] %s : thread %x enter for process %x / cycle %d\n",
__FUNCTION__ , CURRENT_THREAD , process->pid , cycle );
#endif

    // check cluster is reference
    assert( (GET_CXY( process->ref_xp ) == local_cxy) ,
    "local cluster is not process reference cluster\n");

    // get pointer on reference VMM
    vmm = &process->vmm;

    // get extended pointer on root of process copies xlist in owner cluster
    pid              = process->pid;
    owner_cxy        = CXY_FROM_PID( pid );
    owner_lpid       = LPID_FROM_PID( pid );
    process_root_ptr = &LOCAL_CLUSTER->pmgr.copies_root[owner_lpid];
    process_root_xp  = XPTR( owner_cxy , process_root_ptr );

    // get extended pointer on root of vsegs xlist from reference VMM
    vseg_root_xp  = XPTR( local_cxy , &vmm->vsegs_root ); 

    // loop on destination process copies
    XLIST_FOREACH( process_root_xp , process_iter_xp )
    {
        // get cluster and local pointer on remote process
        remote_process_xp  = XLIST_ELEMENT( process_iter_xp , process_t , copies_list );
        remote_process_ptr = GET_PTR( remote_process_xp );
        remote_process_cxy = GET_CXY( remote_process_xp );

#if (DEBUG_VMM_SET_COW &0x1)
if( DEBUG_VMM_SET_COW < cycle )
printk("\n[DBG] %s : thread %x handling process %x in cluster %x\n",
__FUNCTION__ , CURRENT_THREAD , process->pid , remote_process_cxy );
#endif

        // get extended pointer on remote gpt
        remote_gpt_xp = XPTR( remote_process_cxy , &remote_process_ptr->vmm.gpt );

        // loop on vsegs in (local) reference process VSL
        XLIST_FOREACH( vseg_root_xp , vseg_iter_xp )
        {
            // get pointer on vseg
            vseg_xp  = XLIST_ELEMENT( vseg_iter_xp , vseg_t , xlist );
            vseg     = GET_PTR( vseg_xp );

            assert( (GET_CXY( vseg_xp ) == local_cxy) ,
            "all vsegs in reference VSL must be local\n" );

            // get vseg type, base and size
            uint32_t type     = vseg->type;
            vpn_t    vpn_base = vseg->vpn_base;
            vpn_t    vpn_size = vseg->vpn_size;

#if (DEBUG_VMM_SET_COW & 0x1)
if( DEBUG_VMM_SET_COW < cycle )
printk("\n[DBG] %s : thread %x handling vseg %s / vpn_base = %x / vpn_size = %x\n",
__FUNCTION__, CURRENT_THREAD , vseg_type_str(type), vpn_base, vpn_size );
#endif
            // only DATA, ANON and REMOTE vsegs
            if( (type == VSEG_TYPE_DATA)  ||
                (type == VSEG_TYPE_ANON)  ||
                (type == VSEG_TYPE_REMOTE) )
            {
                vpn_t      vpn;
                uint32_t   attr;
                ppn_t      ppn;
                xptr_t     page_xp;
                cxy_t      page_cxy;
                page_t   * page_ptr;
                xptr_t     forks_xp;
                xptr_t     lock_xp;

                // update flags in remote GPT 
                hal_gpt_set_cow( remote_gpt_xp,
                                 vpn_base,
                                 vpn_size ); 

                // atomically increment pending forks counter in physical pages,
                // for all vseg pages that are mapped in reference cluster 
                if( remote_process_cxy == local_cxy )
                {
                    // the reference GPT is the local GPT
                    gpt_t * gpt = GET_PTR( remote_gpt_xp );

                    // scan all pages in vseg 
                    for( vpn = vpn_base ; vpn < (vpn_base + vpn_size) ; vpn++ )
                    {
                        // get page attributes and PPN from reference GPT
                        hal_gpt_get_pte( gpt , vpn , &attr , &ppn ); 

                        // atomically update pending forks counter if page is mapped
                        if( attr & GPT_MAPPED )
                        {
                            // get pointers and cluster on page descriptor
                            page_xp  = ppm_ppn2page( ppn );
                            page_cxy = GET_CXY( page_xp );
                            page_ptr = GET_PTR( page_xp );

                            // get extended pointers on "forks" and "lock"
                            forks_xp = XPTR( page_cxy , &page_ptr->forks );
                            lock_xp  = XPTR( page_cxy , &page_ptr->lock );

                            // increment "forks"
                            remote_spinlock_lock( lock_xp );
                            hal_remote_atomic_add( forks_xp , 1 );
                            remote_spinlock_unlock( lock_xp );
                        }
                    }   // end loop on vpn
                }   // end if local
            }   // end if vseg type
        }   // end loop on vsegs
    }   // end loop on process copies
 
#if DEBUG_VMM_SET_COW
cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_SET_COW < cycle )
printk("\n[DBG] %s : thread %x exit for process %x / cycle %d\n",
__FUNCTION__ , CURRENT_THREAD , process->pid , cycle );
#endif

}  // end vmm_set-cow()

/////////////////////////////////////////////////
error_t vmm_fork_copy( process_t * child_process,
                       xptr_t      parent_process_xp )
{
    error_t     error;
    cxy_t       parent_cxy;
    process_t * parent_process;
    vmm_t     * parent_vmm;
    xptr_t      parent_lock_xp;
    vmm_t     * child_vmm;
    xptr_t      iter_xp;
    xptr_t      parent_vseg_xp;
    vseg_t    * parent_vseg;
    vseg_t    * child_vseg;
    uint32_t    type;
    bool_t      cow;
    vpn_t       vpn;            
    vpn_t       vpn_base;
    vpn_t       vpn_size;
    xptr_t      page_xp;        // extended pointer on page descriptor
    page_t    * page_ptr;
    cxy_t       page_cxy;
    xptr_t      forks_xp;       // extended pointer on forks counter in page descriptor
    xptr_t      lock_xp;        // extended pointer on lock protecting the forks counter
    xptr_t      parent_root_xp;
    bool_t      mapped; 
    ppn_t       ppn;

#if DEBUG_VMM_FORK_COPY
uint32_t cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_FORK_COPY < cycle )
printk("\n[DBG] %s : thread %x enter / cycle %d\n",
__FUNCTION__ , CURRENT_THREAD, cycle );
#endif

    // get parent process cluster and local pointer
    parent_cxy     = GET_CXY( parent_process_xp );
    parent_process = GET_PTR( parent_process_xp );

    // get local pointers on parent and child VMM
    parent_vmm = &parent_process->vmm; 
    child_vmm  = &child_process->vmm;

    // get extended pointer on lock protecting the parent VSL
    parent_lock_xp = XPTR( parent_cxy , &parent_vmm->vsegs_lock );

    // initialize the lock protecting the child VSL
    remote_rwlock_init( XPTR( local_cxy , &child_vmm->vsegs_lock ) );

    // initialize the child VSL as empty
    xlist_root_init( XPTR( local_cxy, &child_vmm->vsegs_root ) );
    child_vmm->vsegs_nr = 0;

    // create child GPT
    error = hal_gpt_create( &child_vmm->gpt );

    if( error )
    {
        printk("\n[ERROR] in %s : cannot create GPT\n", __FUNCTION__ );
        return -1;
    }

    // build extended pointer on parent VSL
    parent_root_xp = XPTR( parent_cxy , &parent_vmm->vsegs_root );

    // take the lock protecting the parent VSL
    remote_rwlock_rd_lock( parent_lock_xp );

    // loop on parent VSL xlist
    XLIST_FOREACH( parent_root_xp , iter_xp )
    {
        // get local and extended pointers on current parent vseg
        parent_vseg_xp = XLIST_ELEMENT( iter_xp , vseg_t , xlist );
        parent_vseg    = GET_PTR( parent_vseg_xp );

        // get vseg type
        type = hal_remote_lw( XPTR( parent_cxy , &parent_vseg->type ) );
        
#if DEBUG_VMM_FORK_COPY
cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_FORK_COPY < cycle )
printk("\n[DBG] %s : thread %x found parent vseg %s / vpn_base = %x / cycle %d\n",
__FUNCTION__ , CURRENT_THREAD, vseg_type_str(type),
hal_remote_lw( XPTR( parent_cxy , &parent_vseg->vpn_base ) ) , cycle );
#endif

        // all parent vsegs - but STACK - must be copied in child VSL
        if( type != VSEG_TYPE_STACK )
        {
            // allocate memory for a new child vseg
            child_vseg = vseg_alloc();
            if( child_vseg == NULL )   // release all allocated vsegs
            {
                vmm_destroy( child_process );
                printk("\n[ERROR] in %s : cannot create vseg for child\n", __FUNCTION__ );
                return -1;
            }

            // copy parent vseg to child vseg
            vseg_init_from_ref( child_vseg , parent_vseg_xp );

            // register child vseg in child VSL
            vseg_attach( child_vmm , child_vseg );

#if DEBUG_VMM_FORK_COPY
cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_FORK_COPY < cycle )
printk("\n[DBG] %s : thread %x copied vseg %s / vpn_base = %x to child VSL / cycle %d\n",
__FUNCTION__ , CURRENT_THREAD , vseg_type_str(type),
hal_remote_lw( XPTR( parent_cxy , &parent_vseg->vpn_base ) ) , cycle );
#endif

            // copy DATA, MMAP, REMOTE, FILE parent GPT entries to child GPT
            if( type != VSEG_TYPE_CODE )
            {
                // activate the COW for DATA, MMAP, REMOTE vsegs only
                cow = ( type != VSEG_TYPE_FILE );

                vpn_base = child_vseg->vpn_base;
                vpn_size = child_vseg->vpn_size;

                // scan pages in parent vseg
                for( vpn = vpn_base ; vpn < (vpn_base + vpn_size) ; vpn++ )
                {
                    error = hal_gpt_pte_copy( &child_vmm->gpt,
                                              XPTR( parent_cxy , &parent_vmm->gpt ),
                                              vpn,
                                              cow,
                                              &ppn,
                                              &mapped );
                    if( error )
                    {
                        vmm_destroy( child_process );
                        printk("\n[ERROR] in %s : cannot copy GPT\n", __FUNCTION__ );
                        return -1;
                    }

                    // increment pending forks counter in page if mapped
                    if( mapped )
                    {
                        // get pointers and cluster on page descriptor
                        page_xp  = ppm_ppn2page( ppn );
                        page_cxy = GET_CXY( page_xp );
                        page_ptr = GET_PTR( page_xp );

                        // get extended pointers on "forks" and "lock"
                        forks_xp = XPTR( page_cxy , &page_ptr->forks );
                        lock_xp  = XPTR( page_cxy , &page_ptr->lock );

                        // increment "forks"
                        remote_spinlock_lock( lock_xp );
                        hal_remote_atomic_add( forks_xp , 1 );
                        remote_spinlock_unlock( lock_xp );

#if DEBUG_VMM_FORK_COPY
cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_FORK_COPY < cycle )
printk("\n[DBG] %s : thread %x copied vpn %x to child GPT / cycle %d\n",
__FUNCTION__ , CURRENT_THREAD , vpn , cycle );
#endif
                    }
                }
            }   // end if no code & no stack
        }   // end if no stack
    }   // end loop on vsegs

    // release the parent vsegs lock
    remote_rwlock_rd_unlock( parent_lock_xp );

    // initialize child GPT (architecture specic)
    // => For TSAR, identity map the kentry_vseg
    error = hal_vmm_init( child_vmm );

    if( error )
    {
        printk("\n[ERROR] in %s : cannot create GPT\n", __FUNCTION__ );
        return -1;
    }

    // initialize the child VMM STACK allocator
    child_vmm->stack_mgr.bitmap   = 0;
    child_vmm->stack_mgr.vpn_base = CONFIG_VMM_STACK_BASE;

    // initialize the child VMM MMAP allocator
    uint32_t i;
    child_vmm->mmap_mgr.vpn_base        = CONFIG_VMM_HEAP_BASE;
    child_vmm->mmap_mgr.vpn_size        = CONFIG_VMM_STACK_BASE - CONFIG_VMM_HEAP_BASE;
    child_vmm->mmap_mgr.first_free_vpn  = CONFIG_VMM_HEAP_BASE;
    for( i = 0 ; i < 32 ; i++ ) list_root_init( &child_vmm->mmap_mgr.zombi_list[i] );

    // initialize instrumentation counters
	child_vmm->pgfault_nr    = 0;

    // copy base addresses from parent VMM to child VMM
    child_vmm->kent_vpn_base = (vpn_t)hal_remote_lpt(XPTR(parent_cxy, &parent_vmm->kent_vpn_base));
    child_vmm->args_vpn_base = (vpn_t)hal_remote_lpt(XPTR(parent_cxy, &parent_vmm->args_vpn_base));
    child_vmm->envs_vpn_base = (vpn_t)hal_remote_lpt(XPTR(parent_cxy, &parent_vmm->envs_vpn_base));
    child_vmm->heap_vpn_base = (vpn_t)hal_remote_lpt(XPTR(parent_cxy, &parent_vmm->heap_vpn_base));
    child_vmm->code_vpn_base = (vpn_t)hal_remote_lpt(XPTR(parent_cxy, &parent_vmm->code_vpn_base));
    child_vmm->data_vpn_base = (vpn_t)hal_remote_lpt(XPTR(parent_cxy, &parent_vmm->data_vpn_base));

    child_vmm->entry_point = (intptr_t)hal_remote_lpt(XPTR(parent_cxy, &parent_vmm->entry_point));

    hal_fence();

#if DEBUG_VMM_FORK_COPY
cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_FORK_COPY < cycle )
printk("\n[DBG] %s : thread %x exit successfully / cycle %d\n",
__FUNCTION__ , CURRENT_THREAD , cycle );
#endif

    return 0;

}  // vmm_fork_copy()

///////////////////////////////////////
void vmm_destroy( process_t * process )
{
    xptr_t   vseg_xp;
	vseg_t * vseg;

#if DEBUG_VMM_DESTROY
uint32_t cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_DESTROY < cycle )
printk("\n[DBG] %s : thread %x enter for process %x in cluster %x / cycle %d\n",
__FUNCTION__, CURRENT_THREAD->trdid, process->pid, local_cxy, cycle );
#endif

#if (DEBUG_VMM_DESTROY & 1 )
if( DEBUG_VMM_DESTROY < cycle )
vmm_display( process , true );
#endif

    // get pointer on local VMM
    vmm_t  * vmm = &process->vmm;

    // get extended pointer on VSL root and VSL lock
    xptr_t   root_xp = XPTR( local_cxy , &vmm->vsegs_root );
	xptr_t   lock_xp = XPTR( local_cxy , &vmm->vsegs_lock );

    // get lock protecting vseg list
	remote_rwlock_wr_lock( lock_xp );

    // remove all user vsegs registered in VSL
	while( !xlist_is_empty( root_xp ) )
	{
        // get pointer on first vseg in VSL
		vseg_xp = XLIST_FIRST_ELEMENT( root_xp , vseg_t , xlist );
        vseg    = GET_PTR( vseg_xp );

        // unmap and release physical pages 
        vmm_unmap_vseg( process , vseg );

        // remove vseg from VSL
		vseg_detach( vseg );

        // release memory allocated to vseg descriptor
        vseg_free( vseg );

#if( DEBUG_VMM_DESTROY & 1 )
if( DEBUG_VMM_DESTROY < cycle )
printk("\n[DBG] %s : %s vseg released / vpn_base %x / vpn_size %d\n",
__FUNCTION__ , vseg_type_str( vseg->type ), vseg->vpn_base, vseg->vpn_size );
#endif

	}

    // release lock protecting VSL
	remote_rwlock_wr_unlock( lock_xp );

    // remove all vsegs from zombi_lists in MMAP allocator
    uint32_t i;
    for( i = 0 ; i<32 ; i++ )
    {
	    while( !list_is_empty( &vmm->mmap_mgr.zombi_list[i] ) )
	    {
		    vseg = LIST_FIRST( &vmm->mmap_mgr.zombi_list[i] , vseg_t , zlist );

#if( DEBUG_VMM_DESTROY & 1 )
if( DEBUG_VMM_DESTROY < cycle )
printk("\n[DBG] %s : found zombi vseg / vpn_base %x / vpn_size %d\n",
__FUNCTION__ , vseg_type_str( vseg->type ), vseg->vpn_base, vseg->vpn_size );
#endif
		    vseg_detach( vseg );
            vseg_free( vseg );

#if( DEBUG_VMM_DESTROY & 1 )
if( DEBUG_VMM_DESTROY < cycle )
printk("\n[DBG] %s : zombi vseg released / vpn_base %x / vpn_size %d\n",
__FUNCTION__ , vseg_type_str( vseg->type ), vseg->vpn_base, vseg->vpn_size );
#endif
	    }
    }

    // release memory allocated to the GPT itself
    hal_gpt_destroy( &vmm->gpt );

#if DEBUG_VMM_DESTROY
cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_DESTROY < cycle )
printk("\n[DBG] %s : thread %x exit for process %x in cluster %x / cycle %d\n",
__FUNCTION__, CURRENT_THREAD->trdid, process->pid, local_cxy , cycle );
#endif

}  // end vmm_destroy()

/////////////////////////////////////////////////
vseg_t * vmm_check_conflict( process_t * process,
                             vpn_t       vpn_base,
                             vpn_t       vpn_size )
{
    vmm_t        * vmm = &process->vmm;

    // scan the VSL
	vseg_t       * vseg;
    xptr_t         iter_xp;
    xptr_t         vseg_xp;
    xptr_t         root_xp = XPTR( local_cxy , &vmm->vsegs_root );

	XLIST_FOREACH( root_xp , iter_xp )
	{
		vseg_xp = XLIST_ELEMENT( iter_xp , vseg_t , xlist );
        vseg    = GET_PTR( vseg_xp );

		if( ((vpn_base + vpn_size) > vseg->vpn_base) &&
             (vpn_base < (vseg->vpn_base + vseg->vpn_size)) ) return vseg;
	}
    return NULL;

}  // end vmm_check_conflict()

////////////////////////////////////////////////////////////////////////////////////////////
// This static function is called by the vmm_create_vseg() function, and implements
// the VMM stack_vseg specific allocator.
////////////////////////////////////////////////////////////////////////////////////////////
// @ vmm      : pointer on VMM.
// @ vpn_base : (return value) first allocated page
// @ vpn_size : (return value) number of allocated pages
////////////////////////////////////////////////////////////////////////////////////////////
static error_t vmm_stack_alloc( vmm_t * vmm,
                                vpn_t * vpn_base,
                                vpn_t * vpn_size )
{
    // get stack allocator pointer
    stack_mgr_t * mgr = &vmm->stack_mgr;

    // get lock on stack allocator
    spinlock_lock( &mgr->lock );

    // get first free slot index in bitmap
    int32_t index = bitmap_ffc( &mgr->bitmap , 4 );
    if( (index < 0) || (index > 31) )
    {
        spinlock_unlock( &mgr->lock );
        return ENOMEM;
    }

    // update bitmap
    bitmap_set( &mgr->bitmap , index );

    // release lock on stack allocator
    spinlock_unlock( &mgr->lock );

    // returns vpn_base, vpn_size (one page non allocated)
    *vpn_base = mgr->vpn_base + index * CONFIG_VMM_STACK_SIZE + 1;
    *vpn_size = CONFIG_VMM_STACK_SIZE - 1;
    return 0;

} // end vmm_stack_alloc()

////////////////////////////////////////////////////////////////////////////////////////////
// This static function is called by the vmm_create_vseg() function, and implements
// the VMM MMAP specific allocator.
////////////////////////////////////////////////////////////////////////////////////////////
// @ vmm      : [in] pointer on VMM.
// @ npages   : [in] requested number of pages.
// @ vpn_base : [out] first allocated page.
// @ vpn_size : [out] actual number of allocated pages.
////////////////////////////////////////////////////////////////////////////////////////////
static error_t vmm_mmap_alloc( vmm_t * vmm,
                               vpn_t   npages,
                               vpn_t * vpn_base,
                               vpn_t * vpn_size )
{
    uint32_t   index;
    vseg_t   * vseg;
    vpn_t      base;
    vpn_t      size;
    vpn_t      free;

    // mmap vseg size must be power of 2
    // compute actual size and index in zombi_list array
    size  = POW2_ROUNDUP( npages );
    index = bits_log2( size );

    // get mmap allocator pointer
    mmap_mgr_t * mgr = &vmm->mmap_mgr;

    // get lock on mmap allocator
    spinlock_lock( &mgr->lock );

    // get vseg from zombi_list or from mmap zone
    if( list_is_empty( &mgr->zombi_list[index] ) )     // from mmap zone
    {
        // check overflow
        free = mgr->first_free_vpn;
        if( (free + size) > mgr->vpn_size ) return ENOMEM;

        // update STACK allocator
        mgr->first_free_vpn += size;

        // compute base
        base = free;
    }
    else                                             // from zombi_list
    {
        // get pointer on zombi vseg from zombi_list
        vseg = LIST_FIRST( &mgr->zombi_list[index] , vseg_t , zlist );

        // remove vseg from free-list
        list_unlink( &vseg->zlist );

        // compute base
        base = vseg->vpn_base;
    }

    // release lock on mmap allocator
    spinlock_unlock( &mgr->lock );

    // returns vpn_base, vpn_size
    *vpn_base = base;
    *vpn_size = size;
    return 0;

}  // end vmm_mmap_alloc()

////////////////////////////////////////////////
vseg_t * vmm_create_vseg( process_t   * process,
	                      vseg_type_t   type,
                          intptr_t      base,
	                      uint32_t      size,
                          uint32_t      file_offset,
                          uint32_t      file_size,
                          xptr_t        mapper_xp,
                          cxy_t         cxy )
{
    vseg_t     * vseg;          // created vseg pointer
    vpn_t        vpn_base;      // first page index
    vpn_t        vpn_size;      // number of pages
	error_t      error;

#if DEBUG_VMM_CREATE_VSEG
uint32_t cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_CREATE_VSEG < cycle )
printk("\n[DBG] %s : thread %x enter / process %x / base %x / size %x / %s / cxy %x / cycle %d\n",
__FUNCTION__, CURRENT_THREAD, process->pid, base, size, vseg_type_str(type), cxy, cycle );
#endif

    // get pointer on VMM 
	vmm_t * vmm    = &process->vmm;

    // compute base, size, vpn_base, vpn_size, depending on vseg type
    // we use the VMM specific allocators for "stack", "file", "anon", & "remote" vsegs
    if( type == VSEG_TYPE_STACK )
    {
        // get vpn_base and vpn_size from STACK allocator
        error = vmm_stack_alloc( vmm , &vpn_base , &vpn_size );
        if( error )
        {
            printk("\n[ERROR] in %s : no space for stack vseg / process %x in cluster %x\n",
            __FUNCTION__ , process->pid , local_cxy );
            return NULL;
        }

        // compute vseg base and size from vpn_base and vpn_size
        base = vpn_base << CONFIG_PPM_PAGE_SHIFT;
        size = vpn_size << CONFIG_PPM_PAGE_SHIFT;
    }
    else if( (type == VSEG_TYPE_ANON) ||
             (type == VSEG_TYPE_FILE) ||
             (type == VSEG_TYPE_REMOTE) )
    {
        // get vpn_base and vpn_size from MMAP allocator
        vpn_t npages = size >> CONFIG_PPM_PAGE_SHIFT;
        error = vmm_mmap_alloc( vmm , npages , &vpn_base , &vpn_size );
        if( error )
        {
            printk("\n[ERROR] in %s : no vspace for mmap vseg / process %x in cluster %x\n",
                   __FUNCTION__ , process->pid , local_cxy );
            return NULL;
        }

        // compute vseg base and size from vpn_base and vpn_size
        base = vpn_base << CONFIG_PPM_PAGE_SHIFT;
        size = vpn_size << CONFIG_PPM_PAGE_SHIFT;
    }
    else
    {
        uint32_t vpn_min = base >> CONFIG_PPM_PAGE_SHIFT;
        uint32_t vpn_max = (base + size - 1) >> CONFIG_PPM_PAGE_SHIFT;

        vpn_base = vpn_min;
	    vpn_size = vpn_max - vpn_min + 1;
    }

    // check collisions
    vseg = vmm_check_conflict( process , vpn_base , vpn_size );
    if( vseg != NULL )
    {
        printk("\n[ERROR] in %s for process %x : new vseg [vpn_base = %x / vpn_size = %x]\n"
               "  overlap existing vseg [vpn_base = %x / vpn_size = %x]\n",
        __FUNCTION__ , process->pid, vpn_base, vpn_size, vseg->vpn_base, vseg->vpn_size );
        return NULL;
    }

    // allocate physical memory for vseg descriptor
	vseg = vseg_alloc();
	if( vseg == NULL )
	{
	    printk("\n[ERROR] in %s for process %x : cannot allocate memory for vseg\n",
        __FUNCTION__ , process->pid );
        return NULL;
	}

    // initialize vseg descriptor
	vseg_init( vseg,
               type,
               base,
               size,
               vpn_base,
               vpn_size,
               file_offset,
               file_size,
               mapper_xp,
               cxy );

    // attach vseg to VSL
    xptr_t lock_xp = XPTR( local_cxy , &vmm->vsegs_lock );
	remote_rwlock_wr_lock( lock_xp );
	vseg_attach( vmm , vseg );
	remote_rwlock_wr_unlock( lock_xp );

#if DEBUG_VMM_CREATE_VSEG
cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_CREATE_VSEG < cycle )
printk("\n[DBG] %s : thread %x exit / process %x / %s / cxy %x / cycle %d\n",
__FUNCTION__, CURRENT_THREAD, process->pid, vseg_type_str(type), cxy, cycle );
#endif

	return vseg;

}  // vmm_create_vseg()

/////////////////////////////////////
void vmm_remove_vseg( vseg_t * vseg )
{
    // get pointers on calling process and VMM
    thread_t   * this    = CURRENT_THREAD;
    vmm_t      * vmm     = &this->process->vmm;
    uint32_t     type    = vseg->type;

    // detach vseg from VSL
    xptr_t lock_xp = XPTR( local_cxy , &vmm->vsegs_lock );
	remote_rwlock_wr_lock( lock_xp );
	vseg_detach( vseg );
	remote_rwlock_wr_unlock( lock_xp );

    // release the stack slot to VMM stack allocator if STACK type
    if( type == VSEG_TYPE_STACK )
    {
        // get pointer on stack allocator
        stack_mgr_t * mgr = &vmm->stack_mgr;

        // compute slot index
        uint32_t index = ((vseg->vpn_base - mgr->vpn_base - 1) / CONFIG_VMM_STACK_SIZE);

        // update stacks_bitmap
        spinlock_lock( &mgr->lock );
        bitmap_clear( &mgr->bitmap , index );
        spinlock_unlock( &mgr->lock );
    }

    // release the vseg to VMM mmap allocator if MMAP type
    if( (type == VSEG_TYPE_ANON) || (type == VSEG_TYPE_FILE) || (type == VSEG_TYPE_REMOTE) )
    {
        // get pointer on mmap allocator
        mmap_mgr_t * mgr = &vmm->mmap_mgr;

        // compute zombi_list index
        uint32_t index = bits_log2( vseg->vpn_size );

        // update zombi_list
        spinlock_lock( &mgr->lock );
        list_add_first( &mgr->zombi_list[index] , &vseg->zlist );
        spinlock_unlock( &mgr->lock );
    }

    // release physical memory allocated for vseg descriptor if no MMAP type
    if( (type != VSEG_TYPE_ANON) && (type != VSEG_TYPE_FILE) && (type != VSEG_TYPE_REMOTE) )
    {
        vseg_free( vseg );
    }
}  // end vmm_remove_vseg()

/////////////////////////////////////////
void vmm_unmap_vseg( process_t * process,
                     vseg_t    * vseg )
{
    vpn_t       vpn;        // VPN of current PTE
    vpn_t       vpn_min;    // VPN of first PTE
    vpn_t       vpn_max;    // VPN of last PTE (excluded)
    ppn_t       ppn;        // current PTE ppn value
    uint32_t    attr;       // current PTE attributes
    kmem_req_t  req;        // request to release memory
    xptr_t      page_xp;    // extended pointer on page descriptor
    cxy_t       page_cxy;   // page descriptor cluster
    page_t    * page_ptr;   // page descriptor pointer
    xptr_t      forks_xp;   // extended pointer on pending forks counter
    xptr_t      lock_xp;    // extended pointer on lock protecting forks counter
    uint32_t    forks;      // actual number of pendinf forks

#if DEBUG_VMM_UNMAP_VSEG
uint32_t cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_UNMAP_VSEG < cycle )
printk("\n[DBG] %s : thread %x enter / process %x / vseg %s / base %x / cycle %d\n",
__FUNCTION__, CURRENT_THREAD, process->pid, vseg_type_str( vseg->type ), vseg->vpn_base, cycle );
#endif

    // get pointer on local GPT
    gpt_t     * gpt = &process->vmm.gpt;

    // loop on pages in vseg
    vpn_min = vseg->vpn_base;
    vpn_max = vpn_min + vseg->vpn_size;
	for( vpn = vpn_min ; vpn < vpn_max ; vpn++ )
    {
        // get GPT entry
        hal_gpt_get_pte( gpt , vpn , &attr , &ppn );

        if( attr & GPT_MAPPED )  // entry is mapped
        { 

#if( DEBUG_VMM_UNMAP_VSEG & 1 )
if( DEBUG_VMM_UNMAP_VSEG < cycle )
printk("- vpn %x / ppn %x\n" , vpn , ppn );
#endif

            // check small page
            assert( (attr & GPT_SMALL) ,
            "an user vseg must use small pages" );

            // unmap GPT entry in all GPT copies
            hal_gpt_reset_pte( gpt , vpn );

            // handle pending forks counter if
            // 1) not identity mapped
            // 2) running in reference cluster
            if( ((vseg->flags & VSEG_IDENT)  == 0) &&
                (GET_CXY( process->ref_xp ) == local_cxy) )
            {
                // get extended pointer on physical page descriptor
                page_xp  = ppm_ppn2page( ppn );
                page_cxy = GET_CXY( page_xp );
                page_ptr = GET_PTR( page_xp );

                // get extended pointers on forks and lock fields
                forks_xp = XPTR( page_cxy , &page_ptr->forks );
                lock_xp  = XPTR( page_cxy , &page_ptr->lock );

                // get lock protecting page descriptor
                remote_spinlock_lock( lock_xp );

                // get pending forks counter
                forks = hal_remote_lw( forks_xp );
                
                if( forks )  // decrement pending forks counter
                {
                    hal_remote_atomic_add( forks_xp , -1 );
                }  
                else         // release physical page to relevant cluster
                {
                    if( page_cxy == local_cxy )   // local cluster
                    {
                        req.type = KMEM_PAGE;
                        req.ptr  = page_ptr; 
                        kmem_free( &req );
                    }
                    else                          // remote cluster
                    {
                        rpc_pmem_release_pages_client( page_cxy , page_ptr );
                    }
                }

                // release lock protecting page descriptor
                remote_spinlock_unlock( lock_xp );
            }
        }
    }

#if DEBUG_VMM_UNMAP_VSEG
cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_UNMAP_VSEG < cycle )
printk("\n[DBG] %s : thread %x exit / process %x / vseg %s / base %x / cycle %d\n",
__FUNCTION__, CURRENT_THREAD, process->pid, vseg_type_str( vseg->type ), vseg->vpn_base, cycle );
#endif

}  // end vmm_unmap_vseg()

//////////////////////////////////////////////////////////////////////////////////////////
// This low-level static function is called by the vmm_get_vseg(), vmm_get_pte(),
// and vmm_resize_vseg() functions.  It scan the local VSL to find the unique vseg 
// containing a given virtual address.
//////////////////////////////////////////////////////////////////////////////////////////
// @ vmm     : pointer on the process VMM.
// @ vaddr   : virtual address.
// @ return vseg pointer if success / return NULL if not found.
//////////////////////////////////////////////////////////////////////////////////////////
static vseg_t * vseg_from_vaddr( vmm_t    * vmm,
                                 intptr_t   vaddr )
{
    xptr_t   iter_xp;
    xptr_t   vseg_xp;
    vseg_t * vseg;

    // get extended pointers on VSL lock and root
    xptr_t lock_xp = XPTR( local_cxy , &vmm->vsegs_lock );
    xptr_t root_xp = XPTR( local_cxy , &vmm->vsegs_root );

    // get lock protecting the VSL
    remote_rwlock_rd_lock( lock_xp );

    // scan the list of vsegs in VSL
    XLIST_FOREACH( root_xp , iter_xp )
    {
        vseg_xp = XLIST_ELEMENT( iter_xp , vseg_t , xlist );
        vseg    = GET_PTR( vseg_xp );
        if( (vaddr >= vseg->min) && (vaddr < vseg->max) )
        {
            // return success
            remote_rwlock_rd_unlock( lock_xp );
            return vseg;
        }
    }

    // return failure
    remote_rwlock_rd_unlock( lock_xp );
    return NULL;

}  // end vseg_from_vaddr()

/////////////////////////////////////////////
error_t vmm_resize_vseg( process_t * process,
                         intptr_t    base,
                         intptr_t    size )
{
    error_t   error;
    vseg_t  * new;
    vpn_t     vpn_min;
    vpn_t     vpn_max;

    // get pointer on process VMM
    vmm_t * vmm = &process->vmm;

    intptr_t addr_min = base;
	intptr_t addr_max = base + size;

    // get pointer on vseg
	vseg_t * vseg = vseg_from_vaddr( vmm , base );

	if( vseg == NULL)  return EINVAL;

    // get extended pointer on VSL lock
    xptr_t lock_xp = XPTR( local_cxy , &vmm->vsegs_lock );

    // get lock protecting VSL
	remote_rwlock_wr_lock( lock_xp );

	if( (vseg->min > addr_min) || (vseg->max < addr_max) )   // region not included in vseg
    {
        error = EINVAL;
    }
	else if( (vseg->min == addr_min) && (vseg->max == addr_max) ) // vseg must be removed
    {
        vmm_remove_vseg( vseg );
        error = 0;
    }
	else if( vseg->min == addr_min )                         // vseg must be resized
    {
        // update vseg base address
        vseg->min = addr_max;

        // update vpn_base and vpn_size
        vpn_min        = vseg->min >> CONFIG_PPM_PAGE_SHIFT;
        vpn_max        = (vseg->max - 1) >> CONFIG_PPM_PAGE_SHIFT;
        vseg->vpn_base = vpn_min;
        vseg->vpn_size = vpn_max - vpn_min + 1;
        error = 0;
    }
	else if( vseg->max == addr_max )                          // vseg must be resized
    {
        // update vseg max address
        vseg->max = addr_min;

        // update vpn_base and vpn_size
        vpn_min        = vseg->min >> CONFIG_PPM_PAGE_SHIFT;
        vpn_max        = (vseg->max - 1) >> CONFIG_PPM_PAGE_SHIFT;
        vseg->vpn_base = vpn_min;
        vseg->vpn_size = vpn_max - vpn_min + 1;
        error = 0;
    }
    else                                                      // vseg cut in three regions 
    {
        // resize existing vseg
        vseg->max = addr_min;

        // update vpn_base and vpn_size
        vpn_min        = vseg->min >> CONFIG_PPM_PAGE_SHIFT;
        vpn_max        = (vseg->max - 1) >> CONFIG_PPM_PAGE_SHIFT;
        vseg->vpn_base = vpn_min;
        vseg->vpn_size = vpn_max - vpn_min + 1;

        // create new vseg
        new = vmm_create_vseg( process, 
                               vseg->type,
                               addr_min, 
                               (vseg->max - addr_max),
                               vseg->file_offset,
                               vseg->file_size,
                               vseg->mapper_xp,
                               vseg->cxy ); 

        if( new == NULL ) error = EINVAL;
        else              error = 0;
    }

    // release VMM lock
	remote_rwlock_wr_unlock( lock_xp );

	return error;

}  // vmm_resize_vseg()

///////////////////////////////////////////
error_t  vmm_get_vseg( process_t * process,
                       intptr_t    vaddr,
                       vseg_t   ** found_vseg )
{
    xptr_t   vseg_xp;
    error_t  error;
    vseg_t * vseg;
    vmm_t  * vmm;

    // get pointer on local VMM
    vmm = &process->vmm;

    // try to get vseg from local VMM
    vseg = vseg_from_vaddr( vmm , vaddr );

    if( vseg == NULL )   // vseg not found in local cluster => try to get it from ref
	{
        // get extended pointer on reference process
        xptr_t ref_xp = process->ref_xp;

        // get cluster and local pointer on reference process 
        cxy_t       ref_cxy = GET_CXY( ref_xp );
        process_t * ref_ptr = GET_PTR( ref_xp );

        if( local_cxy == ref_cxy )  return -1;   // local cluster is the reference

        // get extended pointer on reference vseg
        rpc_vmm_get_vseg_client( ref_cxy , ref_ptr , vaddr , &vseg_xp , &error );
            
        if( error )   return -1;                // vseg not found => illegal user vaddr 
        
        // allocate a vseg in local cluster
        vseg = vseg_alloc();

        if( vseg == NULL ) return -1;           // cannot allocate a local vseg

        // initialise local vseg from reference
        vseg_init_from_ref( vseg , vseg_xp );

        // register local vseg in local VMM
        vseg_attach( &process->vmm , vseg );
    }   
    
    // success
    *found_vseg = vseg;
    return 0;

}  // end vmm_get_vseg()

//////////////////////////////////////////////////////////////////////////////////////
// This static function compute the target cluster to allocate a physical page
// for a given <vpn> in a given <vseg>, allocates the page (with an RPC if required)
// and returns an extended pointer on the allocated page descriptor.
// The vseg cannot have the FILE type.
//////////////////////////////////////////////////////////////////////////////////////
static xptr_t vmm_page_allocate( vseg_t * vseg,
                                 vpn_t    vpn )
{

#if DEBUG_VMM_ALLOCATE_PAGE
if( DEBUG_VMM_ALLOCATE_PAGE < (uint32_t)hal_get_cycles() )
printk("\n[DBG] in %s : thread %x enter for vpn %x\n",
__FUNCTION__ , CURRENT_THREAD, vpn );
#endif

    // compute target cluster
    page_t     * page_ptr;
    cxy_t        page_cxy;
    kmem_req_t   req;

    uint32_t     type  = vseg->type;
    uint32_t     flags = vseg->flags;

    assert( ( type != VSEG_TYPE_FILE ) , "illegal vseg type\n" );

    if( flags & VSEG_DISTRIB )    // distributed => cxy depends on vpn LSB
    {
        uint32_t x_size  = LOCAL_CLUSTER->x_size;
        uint32_t y_size  = LOCAL_CLUSTER->y_size;
        uint32_t y_width = LOCAL_CLUSTER->y_width;
        uint32_t index   = vpn & ((x_size * y_size) - 1);
        uint32_t x       = index / y_size;
        uint32_t y       = index % y_size;

        // If the cluster selected from VPN's LSBs is empty, then we select one randomly
        // cluster_random_select() ensures that its randomly selected cluster is not empty
        if ( cluster_info_is_active( LOCAL_CLUSTER->cluster_info[x][y] ) == 0 ) {
            cxy_t cxy = cluster_random_select();
            x = ( ( cxy >> y_width ) & 0xF);
            y = ( cxy & 0xF );
        }
        page_cxy = ( x << y_width ) + y;
    }
    else                          // other cases => cxy specified in vseg
    {
        page_cxy = vseg->cxy;
    }

    // allocate a physical page from target cluster
    if( page_cxy == local_cxy )  // target cluster is the local cluster
    {
        req.type  = KMEM_PAGE;
        req.size  = 0;
        req.flags = AF_NONE;
        page_ptr  = (page_t *)kmem_alloc( &req );
    }
    else                           // target cluster is not the local cluster
    {
        rpc_pmem_get_pages_client( page_cxy , 0 , &page_ptr );
    }

#if DEBUG_VMM_ALLOCATE_PAGE
if( DEBUG_VMM_ALLOCATE_PAGE < (uint32_t)hal_get_cycles() )
printk("\n[DBG] in %s : thread %x exit for vpn = %d / ppn = %x\n",
__FUNCTION__ , CURRENT_THREAD, vpn, ppm_page2ppn( XPTR( page_cxy , page_ptr ) ) );
#endif

    if( page_ptr == NULL ) return XPTR_NULL;
    else                   return XPTR( page_cxy , page_ptr );

}  // end vmm_page_allocate()  

////////////////////////////////////////
error_t vmm_get_one_ppn( vseg_t * vseg,
                         vpn_t    vpn,
                         ppn_t  * ppn )
{
    error_t    error;
    xptr_t     page_xp;           // extended pointer on physical page descriptor
    page_t   * page_ptr;          // local pointer on physical page descriptor
    uint32_t   index;             // missing page index in vseg mapper
    uint32_t   type;              // vseg type;

    type      = vseg->type;
    index     = vpn - vseg->vpn_base;

#if DEBUG_VMM_GET_ONE_PPN 
thread_t * this = CURRENT_THREAD;
if( DEBUG_VMM_GET_ONE_PPN < (uint32_t)hal_get_cycles() )
printk("\n[DBG] %s : thread %x enter for vpn = %x / type = %s / index = %d\n",
__FUNCTION__, this, vpn, vseg_type_str(type), index );
#endif

    // FILE type : get the physical page from the file mapper
    if( type == VSEG_TYPE_FILE )
    {
        // get extended pointer on mapper
        xptr_t mapper_xp = vseg->mapper_xp;

        assert( (mapper_xp != XPTR_NULL),
        "mapper not defined for a FILE vseg\n" );
        
        // get mapper cluster and local pointer
        cxy_t      mapper_cxy = GET_CXY( mapper_xp );
        mapper_t * mapper_ptr = GET_PTR( mapper_xp );

        // get page descriptor from mapper
        if( mapper_cxy == local_cxy )             // mapper is local
        {
            page_ptr = mapper_get_page( mapper_ptr , index );
        }
        else                                      // mapper is remote
        {
            rpc_mapper_get_page_client( mapper_cxy , mapper_ptr , index , &page_ptr );
        }

        if ( page_ptr == NULL ) return EINVAL;

        page_xp = XPTR( mapper_cxy , page_ptr );
    }

    // Other types : allocate a physical page from target cluster,
    // as defined by vseg type and vpn value
    else
    {
        // allocate one physical page 
        page_xp = vmm_page_allocate( vseg , vpn );

        if( page_xp == XPTR_NULL ) return ENOMEM;

        // initialise missing page from .elf file mapper for DATA and CODE types
        // the vseg->mapper_xp field is an extended pointer on the .elf file mapper
        if( (type == VSEG_TYPE_CODE) || (type == VSEG_TYPE_DATA) )
        {
            // get extended pointer on mapper
            xptr_t     mapper_xp = vseg->mapper_xp;

            assert( (mapper_xp != XPTR_NULL),
            "mapper not defined for a CODE or DATA vseg\n" );
        
            // get mapper cluster and local pointer
            cxy_t      mapper_cxy = GET_CXY( mapper_xp );
            mapper_t * mapper_ptr = GET_PTR( mapper_xp );

            // compute missing page offset in vseg
            uint32_t offset = index << CONFIG_PPM_PAGE_SHIFT;

            // compute missing page offset in .elf file
            uint32_t elf_offset = vseg->file_offset + offset;

#if (DEBUG_VMM_GET_ONE_PPN & 0x1)
if( DEBUG_VMM_GET_ONE_PPN < (uint32_t)hal_get_cycles() )
printk("\n[DBG] %s : thread %x for vpn = %x / elf_offset = %x\n",
__FUNCTION__, this, vpn, elf_offset );
#endif


            // compute extended pointer on page base 
            xptr_t base_xp  = ppm_page2base( page_xp );

            // file_size (in .elf mapper) can be smaller than vseg_size (BSS)
            uint32_t file_size = vseg->file_size;

            if( file_size < offset )                 // missing page fully in  BSS 
            {

#if (DEBUG_VMM_GET_ONE_PPN & 0x1)
if( DEBUG_VMM_GET_ONE_PPN < (uint32_t)hal_get_cycles() )
printk("\n[DBG] %s : thread%x for vpn = %x / fully in BSS\n",
__FUNCTION__, this, vpn );
#endif


                if( GET_CXY( page_xp ) == local_cxy )
                {
                    memset( GET_PTR( base_xp ) , 0 , CONFIG_PPM_PAGE_SIZE );
                }
                else
                {
                   hal_remote_memset( base_xp , 0 , CONFIG_PPM_PAGE_SIZE );       
                }
            }
            else if( file_size >= (offset + CONFIG_PPM_PAGE_SIZE) )  // fully in  mapper
            {

#if (DEBUG_VMM_GET_ONE_PPN & 0x1)
if( DEBUG_VMM_GET_ONE_PPN < (uint32_t)hal_get_cycles() )
printk("\n[DBG] %s : thread %x, for vpn = %x / fully in mapper\n",
__FUNCTION__, this, vpn );
#endif
                if( mapper_cxy == local_cxy ) 
                {
                    error = mapper_move_kernel( mapper_ptr,
                                                true,             // to_buffer
                                                elf_offset,
                                                base_xp,
                                                CONFIG_PPM_PAGE_SIZE ); 
                }
                else 
                {
                    rpc_mapper_move_buffer_client( mapper_cxy,
                                                   mapper_ptr,
                                                   true,         // to buffer
                                                   false,        // kernel buffer
                                                   elf_offset,
                                                   base_xp,
                                                   CONFIG_PPM_PAGE_SIZE,
                                                   &error );
                }
                if( error ) return EINVAL;
            }
            else  // both in mapper and in BSS :
                  // - (file_size - offset)             bytes from mapper
                  // - (page_size + offset - file_size) bytes from BSS
            {

#if (DEBUG_VMM_GET_ONE_PPN & 0x1)
if( DEBUG_VMM_GET_ONE_PPN < (uint32_t)hal_get_cycles() )
printk("\n[DBG] %s : thread %x for vpn = %x / both mapper & BSS\n"
"      %d bytes from mapper / %d bytes from BSS\n",
__FUNCTION__, this, vpn,
file_size - offset , offset + CONFIG_PPM_PAGE_SIZE - file_size  );
#endif
                // initialize mapper part
                if( mapper_cxy == local_cxy )
                {
                    error = mapper_move_kernel( mapper_ptr,
                                                true,         // to buffer
                                                elf_offset,
                                                base_xp,
                                                file_size - offset ); 
                }
                else                                
                {
                    rpc_mapper_move_buffer_client( mapper_cxy,
                                                   mapper_ptr,
                                                   true,         // to buffer
                                                   false,        // kernel buffer
                                                   elf_offset,
                                                   base_xp,
                                                   file_size - offset, 
                                                   &error );
                }
                if( error ) return EINVAL;

                // initialize BSS part
                if( GET_CXY( page_xp ) == local_cxy )
                {
                    memset( GET_PTR( base_xp ) + file_size - offset , 0 , 
                            offset + CONFIG_PPM_PAGE_SIZE - file_size );
                }
                else
                {
                   hal_remote_memset( base_xp + file_size - offset , 0 , 
                                      offset + CONFIG_PPM_PAGE_SIZE - file_size );
                }
            }    
        }  // end initialisation for CODE or DATA types   
    } 

    // return ppn
    *ppn = ppm_page2ppn( page_xp );

#if DEBUG_VMM_GET_ONE_PPN
if( DEBUG_VMM_GET_ONE_PPN < (uint32_t)hal_get_cycles() )
printk("\n[DBG] %s : thread %x exit for vpn = %x / ppn = %x\n",
__FUNCTION__ , this , vpn , *ppn );
#endif

    return 0;

}  // end vmm_get_one_ppn()

/////////////////////////////////////////
error_t vmm_get_pte( process_t * process,
                     vpn_t       vpn,
                     bool_t      cow,
                     uint32_t  * attr,
                     ppn_t     * ppn )
{
    ppn_t      old_ppn;    // current PTE_PPN
    uint32_t   old_attr;   // current PTE_ATTR
    ppn_t      new_ppn;    // new PTE_PPN
    uint32_t   new_attr;   // new PTE_ATTR
    vmm_t    * vmm;
    vseg_t   * vseg;      
    error_t    error;

    thread_t * this  = CURRENT_THREAD;

#if DEBUG_VMM_GET_PTE
uint32_t   cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_GET_PTE < cycle )
printk("\n[DBG] %s : thread %x in process %x enter / vpn %x / cow %d / cycle %d\n",
__FUNCTION__, this->trdid, process->pid, vpn, cow, cycle );
#endif

    // get VMM pointer
    vmm = &process->vmm;

    // get local vseg descriptor
    error =  vmm_get_vseg( process, 
                           ((intptr_t)vpn << CONFIG_PPM_PAGE_SHIFT), 
                           &vseg );

    // vseg has been checked by the vmm_handle_page_fault() function
    assert( (vseg != NULL) , "vseg undefined / vpn %x\n");

    if( cow )  //////////////// copy_on_write request //////////////////////
               // get PTE from local GPT
               // allocate a new physical page if there is pending forks,
               // initialize it from old physical page content,
               // update PTE in all GPT copies, 
    {
        // access local GPT to get current PTE attributes and PPN
        hal_gpt_get_pte( &vmm->gpt , vpn , &old_attr , &old_ppn );

        assert( (old_attr & GPT_MAPPED),
          "PTE unmapped for a COW exception / vpn %x\n" );

#if( DEBUG_VMM_GET_PTE & 1 )
if( DEBUG_VMM_GET_PTE < cycle )
printk("\n[DBG] %s : thread %x in process %x handling COW for vpn %x\n",
__FUNCTION__, this->trdid, process->pid, vpn );
#endif

        // get extended pointer, cluster and local pointer on physical page descriptor
        xptr_t   page_xp  = ppm_ppn2page( old_ppn );
        cxy_t    page_cxy = GET_CXY( page_xp );
        page_t * page_ptr = GET_PTR( page_xp );

        // get extended pointers on forks and lock field in page descriptor
        xptr_t forks_xp = XPTR( page_cxy , &page_ptr->forks );
        xptr_t lock_xp  = XPTR( page_cxy , &page_ptr->lock );

        // take lock protecting page descriptor
        remote_spinlock_lock( lock_xp );

        // get number of pending forks in page descriptor
        uint32_t forks = hal_remote_lw( forks_xp );

        if( forks )        // pending fork => allocate a new page, copy old to new
        {
            // allocate a new physical page
            page_xp = vmm_page_allocate( vseg , vpn );
            if( page_xp == XPTR_NULL ) 
            {
                printk("\n[ERROR] in %s : no memory / process = %x / vpn = %x\n",
                __FUNCTION__ , process->pid , vpn );
                return -1;
            }

            // compute allocated page PPN 
            new_ppn = ppm_page2ppn( page_xp );

            // copy old page content to new page
            xptr_t  old_base_xp = ppm_ppn2base( old_ppn );
            xptr_t  new_base_xp = ppm_ppn2base( new_ppn );
            memcpy( GET_PTR( new_base_xp ),
                    GET_PTR( old_base_xp ),
                    CONFIG_PPM_PAGE_SIZE );

             // decrement pending forks counter in page descriptor
             hal_remote_atomic_add( forks_xp , -1 );
        }             
        else               // no pending fork => keep the existing page
        {
            new_ppn = old_ppn;
        }

        // release lock protecting page descriptor
        remote_spinlock_unlock( lock_xp );

        // build new_attr : reset COW and set WRITABLE,
        new_attr = (old_attr | GPT_WRITABLE) & (~GPT_COW);

        // update GPT[vpn] for all GPT copies
        vmm_global_update_pte( process, vpn, new_attr, new_ppn );
    }
    else        //////////// page_fault request ///////////////////////////
                // get PTE from local GPT
                // allocate a physical page if it is a true page fault,
                // initialize it if type is FILE, CODE, or DATA,
                // register in reference GPT, but don't update GPT copies
    {  
        // access local GPT to get current PTE 
        hal_gpt_get_pte( &vmm->gpt , vpn , &old_attr , &old_ppn );

        if( (old_attr & GPT_MAPPED) == 0 )   // true page_fault => map it
        {

#if( DEBUG_VMM_GET_PTE & 1 )
if( DEBUG_VMM_GET_PTE < cycle )
printk("\n[DBG] %s : thread %x in process %x handling page fault for vpn %x\n",
__FUNCTION__, this->trdid, process->pid, vpn );
#endif
            // allocate new_ppn, and initialize the new page
            error = vmm_get_one_ppn( vseg , vpn , &new_ppn );
            if( error )
            {
                printk("\n[ERROR] in %s : no memory / process = %x / vpn = %x\n",
                __FUNCTION__ , process->pid , vpn );
                return -1;
            }

            // define new_attr from vseg flags 
            new_attr = GPT_MAPPED | GPT_SMALL;
            if( vseg->flags & VSEG_USER  ) new_attr |= GPT_USER;
            if( vseg->flags & VSEG_WRITE ) new_attr |= GPT_WRITABLE;
            if( vseg->flags & VSEG_EXEC  ) new_attr |= GPT_EXECUTABLE;
            if( vseg->flags & VSEG_CACHE ) new_attr |= GPT_CACHABLE;

            // register new PTE in reference GPT
            // on demand policy => no update of GPT copies
            error = hal_gpt_set_pte( &vmm->gpt,
                                     vpn,
                                     new_attr,
                                     new_ppn );
            if( error )
            {
                printk("\n[ERROR] in %s : cannot update GPT / process = %x / vpn = %x\n",
                __FUNCTION__ , process->pid , vpn );
                return -1;
            }
        }
        else                                  // mapped in reference GPT => get it
        {
            new_ppn  = old_ppn;
            new_attr = old_attr;
        }
    }

#if DEBUG_VMM_GET_PTE
cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_GET_PTE < cycle )
printk("\n[DBG] %s : thread %x in process %x exit / vpn %x / ppn %x / attr %x / cycle %d\n",
__FUNCTION__, this->trdid, process->pid, vpn, new_ppn, new_attr, cycle );
#endif

    // return PPN and flags
    *ppn  = new_ppn;
    *attr = new_attr;
    return 0;

}  // end vmm_get_pte()

///////////////////////////////////////////////////
error_t vmm_handle_page_fault( process_t * process,
                               vpn_t       vpn,
                               bool_t      is_cow )
{
    uint32_t         attr;          // missing page attributes
    ppn_t            ppn;           // missing page PPN
    vseg_t         * vseg;          // vseg containing vpn
    uint32_t         type;          // vseg type
    cxy_t            ref_cxy;       // reference cluster for missing vpn
    process_t      * ref_ptr;       // reference process for missing vpn
    error_t          error;

    thread_t       * this = CURRENT_THREAD;

#if DEBUG_VMM_HANDLE_PAGE_FAULT
uint32_t cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_HANDLE_PAGE_FAULT < cycle )
printk("\n[DBG] %s : thread %x in process %x enter for vpn %x / core[%x,%d] / cycle %d\n",
__FUNCTION__, this, process->pid, vpn, local_cxy, this->core->lid, cycle );
#endif

    // get local vseg (access reference VSL if required)
    error = vmm_get_vseg( process , vpn<<CONFIG_PPM_PAGE_SHIFT , &vseg );

    if( error )
    {
        printk("\n[ERROR] in %s : vpn %x / process %x / thread %x / core[%x,%d] / cycle %d\n",
        __FUNCTION__, vpn, process->pid, this->trdid, local_cxy, this->core->lid,
        (uint32_t)hal_get_cycles() );
        return error;
    }

    // get segment type
    type = vseg->type;

    // get reference process cluster and local pointer
    // for private vsegs (CODE and DATA type), 
    // the reference is the local process descriptor.
    if( (type == VSEG_TYPE_STACK) || (type == VSEG_TYPE_CODE) )
    {
        ref_cxy = local_cxy;
        ref_ptr = process;
    }
    else
    {
        ref_cxy = GET_CXY( process->ref_xp );
        ref_ptr = GET_PTR( process->ref_xp );
    }

    // get missing PTE attributes and PPN 
    if( local_cxy != ref_cxy )  
    {

#if DEBUG_VMM_HANDLE_PAGE_FAULT
if( DEBUG_VMM_HANDLE_PAGE_FAULT < cycle )
printk("\n[DBG] %s : thread %x in process %x call RPC_VMM_GET_PTE\n",
__FUNCTION__, this, process->pid );
#endif

        rpc_vmm_get_pte_client( ref_cxy,
                                ref_ptr,
                                vpn,
                                is_cow,
                                &attr,
                                &ppn,
                                &error );

        // get local VMM pointer
        vmm_t * vmm = &process->vmm;

        // update local GPT
        error |= hal_gpt_set_pte( &vmm->gpt,
                                  vpn,
                                  attr,
                                  ppn );
    }
    else   // local cluster is the reference cluster
    {
        error = vmm_get_pte( process,
                             vpn,
                             is_cow,
                             &attr,
                             &ppn );
    }

#if DEBUG_VMM_HANDLE_PAGE_FAULT
cycle = (uint32_t)hal_get_cycles();
if( DEBUG_VMM_HANDLE_PAGE_FAULT < cycle )
printk("\n[DBG] %s : thread %x in process %x exit for vpn %x / core[%x,%d] / cycle %d\n",
__FUNCTION__, this, process->pid, vpn, local_cxy, this->core->lid, cycle );
#endif

    return error;

}   // end vmm_handle_page_fault()









/* deprecated April 2018  [AG]

error_t vmm_v2p_translate( process_t * process,
                           void      * ptr,
                           paddr_t   * paddr )
{
    // access page table
    error_t  error;
    vpn_t    vpn;
    uint32_t attr;
    ppn_t    ppn;
    uint32_t offset;

    vpn    = (vpn_t)( (intptr_t)ptr >> CONFIG_PPM_PAGE_SHIFT );
    offset = (uint32_t)( ((intptr_t)ptr) & CONFIG_PPM_PAGE_MASK );

    if( local_cxy == GET_CXY( process->ref_xp) ) // local process is reference process
    {
        error = vmm_get_pte( process, vpn , false , &attr , &ppn );
    }
    else                                         // calling process is not reference process
    {
        cxy_t       ref_cxy = GET_CXY( process->ref_xp );
        process_t * ref_ptr = GET_PTR( process->ref_xp );
        rpc_vmm_get_pte_client( ref_cxy , ref_ptr , vpn , false , &attr , &ppn , &error );
    }

    // set paddr
    *paddr = (((paddr_t)ppn) << CONFIG_PPM_PAGE_SHIFT) | offset;

    return error;

}  // end vmm_v2p_translate()

*/
