/*
 * ppm.c - Per-cluster Physical Pages Manager implementation
 *
 * Authors  Ghassan Almaless (2008,2009,2010,2011,2012)
 *          Alain Greiner    (2016,2017,2018,2019)
 *
 * Copyright (c) UPMC Sorbonne Universites
 *
 * This file is part of ALMOS-MKH.
 *
 * ALMOS-MKH.is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2.0 of the License.
 *
 * ALMOS-MKH.is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with ALMOS-MKH.; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include <kernel_config.h>
#include <hal_kernel_types.h>
#include <hal_special.h>
#include <printk.h>
#include <list.h>
#include <bits.h>
#include <page.h>
#include <dqdt.h>
#include <busylock.h>
#include <queuelock.h>
#include <thread.h>
#include <cluster.h>
#include <kmem.h>
#include <process.h>
#include <mapper.h>
#include <ppm.h>
#include <vfs.h>

////////////////////////////////////////////////////////////////////////////////////////
//         global variables
////////////////////////////////////////////////////////////////////////////////////////

extern chdev_directory_t    chdev_dir;          // allocated in kernel_init.c

////////////////////////////////////////////////////////////////////////////////////////
//     functions to  translate [ page <-> base <-> ppn ]
////////////////////////////////////////////////////////////////////////////////////////

/////////////////////////////////////////////
inline xptr_t ppm_page2base( xptr_t page_xp )
{
	ppm_t  * ppm      = &LOCAL_CLUSTER->ppm;

    cxy_t    page_cxy = GET_CXY( page_xp );
    page_t * page_ptr = GET_PTR( page_xp );

   void   * base_ptr = ppm->vaddr_base + 
                       ((page_ptr - ppm->pages_tbl)<<CONFIG_PPM_PAGE_SHIFT);

	return XPTR( page_cxy , base_ptr );

} // end ppm_page2base()

/////////////////////////////////////////////
inline xptr_t ppm_base2page( xptr_t base_xp )
{
	ppm_t  * ppm = &LOCAL_CLUSTER->ppm;

    cxy_t    base_cxy = GET_CXY( base_xp );
    void   * base_ptr = GET_PTR( base_xp );

	page_t * page_ptr = ppm->pages_tbl + 
                        ((base_ptr - ppm->vaddr_base)>>CONFIG_PPM_PAGE_SHIFT);

	return XPTR( base_cxy , page_ptr );

}  // end ppm_base2page()



///////////////////////////////////////////
inline ppn_t ppm_page2ppn( xptr_t page_xp )
{
	ppm_t  * ppm      = &LOCAL_CLUSTER->ppm;

    cxy_t    page_cxy = GET_CXY( page_xp );
    page_t * page_ptr = GET_PTR( page_xp );

    paddr_t  paddr    = PADDR( page_cxy , (page_ptr - ppm->pages_tbl)<<CONFIG_PPM_PAGE_SHIFT );

    return (ppn_t)(paddr >> CONFIG_PPM_PAGE_SHIFT);

}  // end hal_page2ppn()

///////////////////////////////////////
inline xptr_t ppm_ppn2page( ppn_t ppn )
{
	ppm_t   * ppm  = &LOCAL_CLUSTER->ppm;

    paddr_t  paddr = ((paddr_t)ppn) << CONFIG_PPM_PAGE_SHIFT;

    cxy_t    cxy   = CXY_FROM_PADDR( paddr );
    lpa_t    lpa   = LPA_FROM_PADDR( paddr );

    return XPTR( cxy , &ppm->pages_tbl[lpa>>CONFIG_PPM_PAGE_SHIFT] );

}  // end hal_ppn2page



///////////////////////////////////////
inline xptr_t ppm_ppn2base( ppn_t ppn )
{
	ppm_t  * ppm   = &LOCAL_CLUSTER->ppm;
   
    paddr_t  paddr = ((paddr_t)ppn) << CONFIG_PPM_PAGE_SHIFT;

    cxy_t    cxy   = CXY_FROM_PADDR( paddr );
    lpa_t    lpa   = LPA_FROM_PADDR( paddr );

	return XPTR( cxy , (void *)ppm->vaddr_base + lpa );

}  // end ppm_ppn2base()

///////////////////////////////////////////
inline ppn_t ppm_base2ppn( xptr_t base_xp )
{
	ppm_t  * ppm      = &LOCAL_CLUSTER->ppm;

    cxy_t    base_cxy = GET_CXY( base_xp );
    void   * base_ptr = GET_PTR( base_xp );

    paddr_t  paddr    = PADDR( base_cxy , (base_ptr - ppm->vaddr_base) );

    return (ppn_t)(paddr >> CONFIG_PPM_PAGE_SHIFT);

}  // end ppm_base2ppn()


////////////////////////////////////////////////////////////////////////////////////////
//     functions to  allocate / release  physical pages 
////////////////////////////////////////////////////////////////////////////////////////

///////////////////////////////////////////
void ppm_free_pages_nolock( page_t * page )
{
	page_t   * buddy;            // searched buddy block page descriptor
	uint32_t   buddy_index;      // buddy bloc index in page_tbl[]
	page_t   * current;          // current (merged) block page descriptor
	uint32_t   current_index;    // current (merged) block index in page_tbl[]
	uint32_t   current_order;    // current (merged) block order

	ppm_t    * ppm         = &LOCAL_CLUSTER->ppm;
	page_t   * pages_tbl   = ppm->pages_tbl;

assert( !page_is_flag( page , PG_FREE ) ,
"page already released : ppn = %x\n" , ppm_page2ppn(XPTR(local_cxy,page)) );

assert( !page_is_flag( page , PG_RESERVED ) ,
"reserved page : ppn = %x\n" , ppm_page2ppn(XPTR(local_cxy,page)) );

	// update released page descriptor flags
	page_set_flag( page , PG_FREE );

	// search the buddy page descriptor
	// - merge with current page descriptor if found
	// - exit to release the current page descriptor if not found
	current       = page;
	current_index = (uint32_t)(page - ppm->pages_tbl);
	for( current_order = page->order ;
	     current_order < CONFIG_PPM_MAX_ORDER ;
	     current_order++ )
	{
		buddy_index = current_index ^ (1 << current_order);
		buddy       = pages_tbl + buddy_index;

        // exit this loop if buddy block not found
		if( !page_is_flag( buddy , PG_FREE ) || 
            (buddy->order != current_order) ) break;

		// remove buddy block from free_list 
		list_unlink( &buddy->list );
		ppm->free_pages_nr[current_order] --;

        // reset order field in buddy block page descriptor
		buddy->order = 0;

		// compute merged block index in page_tbl[]
		current_index &= buddy_index;
	}

	// update pointer and order field for merged block page descriptor 
	current        = pages_tbl + current_index;
	current->order = current_order;

	// insert merged block in free list
	list_add_first( &ppm->free_pages_root[current_order] , &current->list );
	ppm->free_pages_nr[current_order] ++;

}  // end ppm_free_pages_nolock()

////////////////////////////////////////////
page_t * ppm_alloc_pages( uint32_t   order )
{
	page_t   * current_block;
	uint32_t   current_order;
	uint32_t   current_size;
	page_t   * found_block;  

#if DEBUG_PPM_ALLOC_PAGES
thread_t * this = CURRENT_THREAD;
uint32_t cycle = (uint32_t)hal_get_cycles();
if( DEBUG_PPM_ALLOC_PAGES < cycle )
printk("\n[%s] thread[%x,%x] enter for %d page(s) in cluster %x / cycle %d\n",
__FUNCTION__, this->process->pid, this->trdid, 1<<order, local_cxy, cycle );
#endif

#if(DEBUG_PPM_ALLOC_PAGES & 0x1)
if( DEBUG_PPM_ALLOC_PAGES < cycle )
ppm_remote_display( local_cxy );
#endif

	ppm_t    * ppm = &LOCAL_CLUSTER->ppm;

// check order
assert( (order < CONFIG_PPM_MAX_ORDER) , "illegal order argument = %d\n" , order );

    //build extended pointer on lock protecting remote PPM
    xptr_t lock_xp = XPTR( local_cxy , &ppm->free_lock );

	// take lock protecting free lists
	remote_busylock_acquire( lock_xp );

	current_block = NULL;

	// find a free block equal or larger to requested size
	for( current_order = order ; current_order < CONFIG_PPM_MAX_ORDER ; current_order ++ )
	{
		if( !list_is_empty( &ppm->free_pages_root[current_order] ) )
		{
            // get first free block in this free_list
			current_block = LIST_FIRST( &ppm->free_pages_root[current_order] , page_t , list );

            // remove this block from this free_list
			list_unlink( &current_block->list );

            // register pointer on found block
            found_block = current_block;

            // update this free-list number of blocks
	        ppm->free_pages_nr[current_order] --;

            // compute found block size
	        current_size = (1 << current_order);

			break;	
		}
	}

	if( current_block == NULL ) // return failure if no free block found
	{
		// release lock protecting free lists
		remote_busylock_release( lock_xp );

#if DEBUG_PPM_ALLOC_PAGES
cycle = (uint32_t)hal_get_cycles();
if( DEBUG_PPM_ALLOC_PAGES < cycle )
printk("\n[%s] thread[%x,%x] cannot allocate %d page(s) in cluster %x / cycle %d\n",
__FUNCTION__, this->process->pid, this->trdid, 1<<order, local_cxy, cycle );
#endif

		return NULL;
	}


	// split the found block in smaller sub-blocks if required
	// and update the free-lists accordingly
	while( current_order > order )
	{
		current_order --;

        // update pointer, size, and order fiels for new free block
		current_size >>= 1;
		current_block = found_block + current_size;
		current_block->order = current_order;

        // insert new free block in relevant free_list
		list_add_first( &ppm->free_pages_root[current_order] , &current_block->list );

        // update number of blocks in free list
		ppm->free_pages_nr[current_order] ++;
	}

	// update found block page descriptor
	page_clear_flag( found_block , PG_FREE );
	page_refcount_up( found_block );
	found_block->order = order;

	// release lock protecting free lists
	remote_busylock_release( lock_xp );

    // update DQDT
    dqdt_increment_pages( local_cxy , order );

#if DEBUG_PPM_ALLOC_PAGES
cycle = (uint32_t)hal_get_cycles();
if( DEBUG_PPM_ALLOC_PAGES < cycle )
printk("\n[%s] thread[%x,%x] exit for %d page(s) in cluster %x / ppn = %x / cycle %d\n",
__FUNCTION__, this->process->pid, this->trdid, 
1<<order, ppm_page2ppn(XPTR( local_cxy , found_block )), local_cxy, cycle );
#endif

#if(DEBUG_PPM_ALLOC_PAGES & 0x1)
if( DEBUG_PPM_ALLOC_PAGES < cycle )
ppm_remote_display( local_cxy );
#endif

	return found_block;

}  // end ppm_alloc_pages()


////////////////////////////////////
void ppm_free_pages( page_t * page )
{
	ppm_t * ppm = &LOCAL_CLUSTER->ppm;

#if DEBUG_PPM_FREE_PAGES
thread_t * this  = CURRENT_THREAD;
uint32_t   cycle = (uint32_t)hal_get_cycles();
if( DEBUG_PPM_FREE_PAGES < cycle )
printk("\n[%s] thread[%x,%x] enter for %d page(s) in cluster %x / ppn %x / cycle %d\n",
__FUNCTION__, this->process->pid, this->trdid, 
1<<page->order, local_cxy, ppm_page2ppn(XPTR(local_cxy , page)), cycle );
#endif

#if(DEBUG_PPM_FREE_PAGES & 0x1)
if( DEBUG_PPM_FREE_PAGES < cycle )
ppm_remote_display( local_cxy );
#endif

    //build extended pointer on lock protecting free_lists
    xptr_t lock_xp = XPTR( local_cxy , &ppm->free_lock );

	// get lock protecting free_pages[] array
	remote_busylock_acquire( lock_xp );

	ppm_free_pages_nolock( page );

	// release lock protecting free_lists
	remote_busylock_release( lock_xp );

    // update DQDT
    dqdt_decrement_pages( local_cxy , page->order );

#if DEBUG_PPM_FREE_PAGES
cycle = (uint32_t)hal_get_cycles();
if( DEBUG_PPM_FREE_PAGES < cycle )
printk("\n[%s] thread[%x,%x] exit for %d page(s) in cluster %x / ppn %x / cycle %d\n",
__FUNCTION__, this->process->pid, this->trdid, 
1<<page->order, local_cxy, ppm_page2ppn(XPTR(local_cxy , page)) , cycle );
#endif

#if(DEBUG_PPM_FREE_PAGES & 0x1)
if( DEBUG_PPM_FREE_PAGES < cycle )
ppm_remote_display( local_cxy );
#endif

}  // end ppm_free_pages()

/////////////////////////////////////////////
xptr_t ppm_remote_alloc_pages( cxy_t     cxy,
                               uint32_t  order )
{
	uint32_t   current_order;
	uint32_t   current_size;
    page_t   * current_block;   
    page_t   * found_block;

#if DEBUG_PPM_REMOTE_ALLOC_PAGES
thread_t * this  = CURRENT_THREAD;
uint32_t   cycle = (uint32_t)hal_get_cycles();
if( DEBUG_PPM_REMOTE_ALLOC_PAGES < cycle )
printk("\n[%s] thread[%x,%x] enter for %d small page(s) in cluster %x / cycle %d\n",
__FUNCTION__, this->process->pid, this->trdid, 1<<order, cxy, cycle );
#endif

#if(DEBUG_PPM_REMOTE_ALLOC_PAGES & 0x1)
if( DEBUG_PPM_REMOTE_ALLOC_PAGES < cycle )
ppm_remote_display( cxy );
#endif

// check order
assert( (order < CONFIG_PPM_MAX_ORDER) , "illegal order argument = %d\n" , order );

    // get local pointer on PPM (same in all clusters)
	ppm_t * ppm = &LOCAL_CLUSTER->ppm;

    //build extended pointer on lock protecting remote PPM
    xptr_t lock_xp = XPTR( cxy , &ppm->free_lock );

	// take lock protecting free lists in remote cluster
	remote_busylock_acquire( lock_xp );

    current_block = NULL;    

	// find in remote cluster a free block equal or larger to requested size
	for( current_order = order ; current_order < CONFIG_PPM_MAX_ORDER ; current_order ++ )
	{
        // get local pointer on the root of relevant free_list in remote cluster
        list_entry_t * root = &ppm->free_pages_root[current_order];

		if( !list_remote_is_empty( cxy , root ) )
		{
            // get local pointer on first free page descriptor in remote cluster
			current_block = LIST_REMOTE_FIRST( cxy, root , page_t , list );

            // remove first free page from the free-list in remote cluster
			list_remote_unlink( cxy , &current_block->list );

            // register found block
            found_block = current_block;

	        // decrement relevant free-list number of items in remote cluster
	        hal_remote_atomic_add( XPTR( cxy , &ppm->free_pages_nr[current_order] ), -1 );

            // compute found block size
	        current_size = (1 << current_order);

			break;
		}
	}

	if( current_block == NULL ) // return failure
	{
		// release lock protecting free lists
		remote_busylock_release( lock_xp );

#if DEBUG_REMOTE_PPM_ALLOC_PAGES
cycle = (uint32_t)hal_get_cycles();
if( DEBUG_PPM_REMOTE_ALLOC_PAGES < cycle )
printk("\n[%s] thread[%x,%x] cannot allocate %d page(s) in cluster %x / cycle %d\n",
__FUNCTION__, this->process->pid, this->trdid, 1<<order, cxy, cycle );
#endif

		return XPTR_NULL;
	}

	// split the found block in smaller sub-blocks if required
	// and update the free-lists accordingly in remote cluster
	while( current_order > order )
	{
        // update order, size, and local pointer for new free block
		current_order --;
		current_size >>= 1;
		current_block = found_block + current_size;

        // update new free block order field in remote cluster 
		hal_remote_s32( XPTR( cxy , &current_block->order ) , current_order );

        // get local pointer on the root of the relevant free_list in remote cluster  
        list_entry_t * root = &ppm->free_pages_root[current_order];

        // insert new free block in this free_list
		list_remote_add_first( cxy , root, &current_block->list );

        // update free-list number of items in remote cluster
        hal_remote_atomic_add( XPTR(cxy , &ppm->free_pages_nr[current_order]), 1 );
	}

	// update refcount, flags and order fields in found block 
	page_remote_clear_flag( XPTR( cxy , found_block ), PG_FREE );
	page_remote_refcount_up( XPTR( cxy , found_block ) );
	hal_remote_s32( XPTR( cxy , &found_block->order ) , order );
    
	// release lock protecting free lists in remote cluster
	remote_busylock_release( lock_xp );

    // update DQDT page counter in remote cluster
    dqdt_increment_pages( cxy , order );

#if DEBUG_PPM_REMOTE_ALLOC_PAGES
cycle = (uint32_t)hal_get_cycles();
if( DEBUG_PPM_REMOTE_ALLOC_PAGES < cycle )
printk("\n[%s] thread[%x,%x] exit for %d page(s) / ppn = %x in cluster %x / cycle %d\n",
__FUNCTION__, this->process->pid, this->trdid, 
1<<order, ppm_page2ppn(XPTR( local_cxy , found_block )), cxy, cycle );
#endif

#if(DEBUG_PPM_REMOTE_ALLOC_PAGES & 0x1)
if( DEBUG_PPM_REMOTE_ALLOC_PAGES < cycle )
ppm_remote_display( cxy );
#endif

	return XPTR( cxy , found_block );

}  // end ppm_remote_alloc_pages()

//////////////////////////////////////////
void ppm_remote_free_pages( cxy_t     cxy,
                            page_t  * page )
{
    xptr_t     page_xp;          // extended pointer on released page descriptor
    uint32_t   order;            // released block order
	page_t   * buddy_ptr;        // searched buddy block page descriptor
    uint32_t   buddy_order;      // searched buddy block order
	uint32_t   buddy_index;      // buddy block index in page_tbl[]
	page_t   * current_ptr;      // current (merged) block page descriptor
	uint32_t   current_index;    // current (merged) block index in page_tbl[]
	uint32_t   current_order;    // current (merged) block order

#if DEBUG_PPM_REMOTE_FREE_PAGES
thread_t * this  = CURRENT_THREAD;
uint32_t   cycle = (uint32_t)hal_get_cycles();
if( DEBUG_PPM_REMOTE_FREE_PAGES < cycle )
printk("\n[%s] thread[%x,%x] enter for %d page(s) / cxy %x / ppn %x / cycle %d\n",
__FUNCTION__, this->process->pid, this->trdid, 
1<<page->order, cxy, ppm_page2ppn(XPTR(cxy , page)), cycle );
#endif

#if(DEBUG_PPM_REMOTE_FREE_PAGES & 0x1)
if( DEBUG_PPM_REMOTE_FREE_PAGES < cycle )
ppm_remote_display( cxy );
#endif

    // build extended pointer on released page descriptor
    page_xp = XPTR( cxy , page );
    
    // get released page order
    order = hal_remote_l32( XPTR( cxy , &page->order ) );

    // get local pointer on PPM (same in all clusters)
	ppm_t * ppm = &LOCAL_CLUSTER->ppm;

    // build extended pointer on lock protecting remote PPM
    xptr_t lock_xp = XPTR( cxy , &ppm->free_lock );

    // get local pointer on remote PPM page_tbl[] array 
	page_t * pages_tbl = hal_remote_lpt( XPTR( cxy , &ppm->pages_tbl ) );

	// get lock protecting free_pages in remote cluster
	remote_busylock_acquire( lock_xp );

assert( !page_remote_is_flag( page_xp , PG_FREE ) ,
"page already released : ppn = %x\n" , ppm_page2ppn(XPTR(local_cxy,page)) );

assert( !page_remote_is_flag( page_xp , PG_RESERVED ) ,
"reserved page : ppn = %x\n" , ppm_page2ppn(XPTR(local_cxy,page)) );

	// update released page descriptor flags
	page_remote_set_flag( page_xp , PG_FREE );

	// search the buddy page descriptor
	// - merge with current page descriptor if found
	// - exit to release the current page descriptor if not found
	current_ptr   = page;
	current_index = (uint32_t)(page - ppm->pages_tbl);
	for( current_order = order ;
	     current_order < CONFIG_PPM_MAX_ORDER ;
	     current_order++ )
	{
		buddy_index = current_index ^ (1 << current_order);
		buddy_ptr   = pages_tbl + buddy_index;

        // get buddy block order
        buddy_order = hal_remote_l32( XPTR( cxy , &buddy_ptr->order ) );

        // exit loop if buddy block not found
		if( !page_remote_is_flag( XPTR( cxy , buddy_ptr ) , PG_FREE ) || 
            (buddy_order != current_order) ) break;

		// remove buddy from free list in remote cluster
		list_remote_unlink( cxy , &buddy_ptr->list );
        hal_remote_atomic_add( XPTR( cxy , &ppm->free_pages_nr[current_order] ) , -1 );

        // reset order field in buddy block page descriptor
        hal_remote_s32( XPTR( cxy , &buddy_ptr->order ) , 0 );

		// compute merged block index in page_tbl[] array 
		current_index &= buddy_index;
	}

	// update merged page descriptor order field
	current_ptr = pages_tbl + current_index;
    hal_remote_s32( XPTR( cxy , &current_ptr->order ) , current_order );

	// insert merged block into relevant free list in remote cluster
	list_remote_add_first( cxy , &ppm->free_pages_root[current_order] , &current_ptr->list );
    hal_remote_atomic_add( XPTR( cxy , &ppm->free_pages_nr[current_order] ) , 1 );

	// release lock protecting free_pages[] array
	remote_busylock_release( lock_xp );

    // update DQDT
    dqdt_decrement_pages( cxy , page->order );

#if DEBUG_PPM_REMOTE_FREE_PAGES
cycle = (uint32_t)hal_get_cycles();
if( DEBUG_PPM_REMOTE_FREE_PAGES < cycle )
printk("\n[%s] thread[%x,%x] exit for %d page(s) in cluster %x / ppn %x / cycle %d\n",
__FUNCTION__, this->process->pid, this->trdid, 
1<<page->order, cxy, ppm_page2ppn(XPTR(cxy , page)), cycle );
#endif

#if(DEBUG_PPM_REMOTE_FREE_PAGES & 0x1)
if( DEBUG_PPM_REMOTE_FREE_PAGES < cycle )
ppm_remote_display( cxy );
#endif

}  // end ppm_remote_free_pages()

////////////////////////////////////
void ppm_remote_display( cxy_t cxy )
{
	uint32_t       order;
	list_entry_t * iter;
    xptr_t         page_xp;

    ppm_t * ppm = &LOCAL_CLUSTER->ppm;

    // build extended pointer on lock protecting remote PPM
    xptr_t ppm_lock_xp = XPTR( cxy , &ppm->free_lock );

    // get pointers on TXT0 chdev
    xptr_t    txt0_xp  = chdev_dir.txt_tx[0];
    cxy_t     txt0_cxy = GET_CXY( txt0_xp );
    chdev_t * txt0_ptr = GET_PTR( txt0_xp );

    // build extended pointer on remote TXT0 lock
    xptr_t  txt_lock_xp = XPTR( txt0_cxy , &txt0_ptr->wait_lock );

	// get PPM lock 
	remote_busylock_acquire( ppm_lock_xp );

    // get TXT0 lock 
    remote_busylock_acquire( txt_lock_xp );

	nolock_printk("\n***** PPM in cluster %x / %d pages\n", local_cxy , ppm->pages_nr );

	for( order = 0 ; order < CONFIG_PPM_MAX_ORDER ; order++ )
	{
        // get number of free pages for free_list[order] in remote cluster
        uint32_t n = hal_remote_l32( XPTR( cxy , &ppm->free_pages_nr[order] ) );

		nolock_printk("- order = %d / n = %d\t: ", order , n );

		LIST_REMOTE_FOREACH( cxy , &ppm->free_pages_root[order] , iter )
		{
            // build extended pointer on page descriptor
            page_xp = XPTR( cxy , LIST_ELEMENT( iter , page_t , list ) );

            // display PPN
			nolock_printk("%x," , ppm_page2ppn( page_xp ) );
		}

		nolock_printk("\n");
	}

	// release TXT0 lock
	remote_busylock_release( txt_lock_xp );

	// release PPM lock
	remote_busylock_release( ppm_lock_xp );
}

////////////////////////////////
error_t ppm_assert_order( void )
{
	uint32_t       order;
	list_entry_t * iter;
	page_t       * page;

    ppm_t * ppm = &LOCAL_CLUSTER->ppm;

	for( order=0 ; order < CONFIG_PPM_MAX_ORDER ; order++ )
	{
		if( list_is_empty( &ppm->free_pages_root[order] ) ) continue;

		LIST_FOREACH( &ppm->free_pages_root[order] , iter )
		{
			page = LIST_ELEMENT( iter , page_t , list );
			if( page->order != order )  return -1;
		}
	}

	return 0;
}


//////////////////////////////////////////////////////////////////////////////////////
//     functions to handle  dirty physical pages
//////////////////////////////////////////////////////////////////////////////////////

//////////////////////////////////////////
bool_t ppm_page_do_dirty( xptr_t page_xp )
{
	bool_t done = false;

    // get page cluster and local pointer
    page_t * page_ptr = GET_PTR( page_xp );
    cxy_t    page_cxy = GET_CXY( page_xp );

    // get local pointer on PPM (same in all clusters)
	ppm_t * ppm = &LOCAL_CLUSTER->ppm;

    // build extended pointers on page lock, page flags, and PPM dirty list lock
    xptr_t page_lock_xp  = XPTR( page_cxy , &page_ptr->lock  );    
    xptr_t page_flags_xp = XPTR( page_cxy , &page_ptr->flags );
    xptr_t dirty_lock_xp = XPTR( page_cxy , &ppm->dirty_lock );
           
	// lock the remote PPM dirty_list
	remote_queuelock_acquire( dirty_lock_xp );

    // lock the remote page
    remote_busylock_acquire( page_lock_xp );

    // get remote page flags
    uint32_t flags = hal_remote_l32( page_flags_xp );

	if( (flags & PG_DIRTY) == 0 )
	{
		// set dirty flag in page descriptor
        hal_remote_s32( page_flags_xp , flags | PG_DIRTY );

		// insert the page in the remote dirty list
        list_remote_add_first( page_cxy , &ppm->dirty_root , &page_ptr->list );

		done = true;
	}

    // unlock the remote page
    remote_busylock_release( page_lock_xp );

	// unlock the remote PPM dirty_list
	remote_queuelock_release( dirty_lock_xp );

	return done;

} // end ppm_page_do_dirty()

////////////////////////////////////////////
bool_t ppm_page_undo_dirty( xptr_t page_xp )
{
	bool_t done = false;

    // get page cluster and local pointer
    page_t * page_ptr = GET_PTR( page_xp );
    cxy_t    page_cxy = GET_CXY( page_xp );

    // get local pointer on PPM (same in all clusters)
	ppm_t * ppm = &LOCAL_CLUSTER->ppm;

    // build extended pointers on page lock, page flags, and PPM dirty list lock
    xptr_t page_lock_xp  = XPTR( page_cxy , &page_ptr->lock  );
    xptr_t page_flags_xp = XPTR( page_cxy , &page_ptr->flags );
    xptr_t dirty_lock_xp = XPTR( page_cxy , &ppm->dirty_lock );
           
	// lock the remote PPM dirty_list
	remote_queuelock_acquire( XPTR( page_cxy , &ppm->dirty_lock ) );

    // lock the remote page
    remote_busylock_acquire( page_lock_xp );

    // get remote page flags
    uint32_t flags = hal_remote_l32( page_flags_xp );

	if( (flags & PG_DIRTY) )  // page is dirty
	{
		// reset dirty flag in page descriptor
        hal_remote_s32( page_flags_xp , flags & (~PG_DIRTY) );

        // remove the page from remote dirty list
        list_remote_unlink( page_cxy , &page_ptr->list );

		done = true;
	}

    // unlock the remote page
    remote_busylock_release( page_lock_xp );

	// unlock the remote PPM dirty_list
	remote_queuelock_release( dirty_lock_xp );

	return done;

}  // end ppm_page_undo_dirty()

/////////////////////////////////
void ppm_sync_dirty_pages( void )
{
	ppm_t * ppm = &LOCAL_CLUSTER->ppm;

    // get local pointer on PPM dirty_root
    list_entry_t * dirty_root = &ppm->dirty_root;

    // build extended pointer on PPM dirty_lock 
    xptr_t dirty_lock_xp = XPTR( local_cxy , &ppm->dirty_lock );

	// get the PPM dirty_list lock
	remote_queuelock_acquire( dirty_lock_xp );

	while( !list_is_empty( &ppm->dirty_root ) )
	{
		page_t * page = LIST_FIRST( dirty_root ,  page_t , list );
        xptr_t   page_xp = XPTR( local_cxy , page );

        // build extended pointer on page lock
        xptr_t page_lock_xp = XPTR( local_cxy , &page->lock );

		// get the page lock
		remote_busylock_acquire( page_lock_xp );

		// sync the page
		vfs_fs_move_page( page_xp , false );  // from mapper to device

		// release the page lock
		remote_busylock_release( page_lock_xp );
	}

	// release the PPM dirty_list lock
	remote_queuelock_release( dirty_lock_xp );

}  // end ppm_sync_dirty_pages()

