/*
 * thread.h -  Thread and related operations definition.
 *
 * Author  Ghassan Almaless (2008,2009,2010,2011,2012)
 *         Alain Greiner (2016)
 *
 * Copyright (c) UPMC Sorbonne Universites
 *
 * This file is part of ALMOS-MKH.
 *
 * ALMOS-MKH is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2.0 of the License.
 *
 * ALMOS-MKH is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with ALMOS-MKH; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */

#ifndef _THREAD_H_
#define _THREAD_H_

#include <hal_types.h>
#include <shared_syscalls.h>
#include <hal_special.h>
#include <xlist.h>
#include <list.h>
#include <hal_context.h>
#include <spinlock.h>
#include <core.h>
#include <cluster.h>
#include <process.h>
#include <dev_ioc.h>
#include <dev_nic.h>
#include <dev_txt.h>
#include <dev_mmc.h>
#include <dev_dma.h>

/***************************************************************************************
 * These macros are used to compose or decompose global thread identifier (TRDID)
 * to or from cluster identifier / local thread index (CXY , LTID)
 **************************************************************************************/

#define LTID_FROM_TRDID( trdid )   (ltid_t)(trdid & 0x0000FFFF)
#define CXY_FROM_TRDID( trdid )    (cxy_t)(trdid >> 16)
#define TRDID( cxy , ltid )        (trdid_t)((cxy << 16) | ltid )

/***************************************************************************************
 * This enum defines the thread types.
 **************************************************************************************/

typedef enum
{
	THREAD_USER    = 0,          /*! user thread (pthread)                            */
	THREAD_RPC     = 1,          /*! kernel thread executing pending RPCs             */
	THREAD_DEV     = 2,          /*! kernel thread executing I/O device commands      */
	THREAD_IDLE    = 3,          /*! kernel idle thread                               */
}
thread_type_t;

/***************************************************************************************
 * This defines the thread flags bit-vector.
 **************************************************************************************/

#define THREAD_FLAG_LOADABLE     0x0001  /*! This thread has not been executed yet    */
#define THREAD_FLAG_DETACHED     0x0002  /*! This thread is detached from parent      */
#define THREAD_FLAG_JOIN         0x0004  /*! Parent thread made a join                */
#define THREAD_FLAG_EXIT         0x0008  /*! This thread made an exit                 */
#define THREAD_FLAG_SCHED        0x0010  /*! Scheduling required for this thread      */

/***************************************************************************************
 * This defines the masks associated to the thread signals.
 **************************************************************************************/

#define THREAD_SIG_KILL          0x0001  /*! This thread killed by another thread     */
#define THREAD_SIG_SUICIDE       0x0002  /*! This thread required exit                */

/***************************************************************************************
 * This defines the masks associated to the blocking causes.
 **************************************************************************************/

#define THREAD_BLOCKED_GLOBAL    0x0001  /*! thread deactivated / wait activation     */
#define THREAD_BLOCKED_IO        0x0002  /*! thread wait IO operation completion      */
#define THREAD_BLOCKED_MAPPER    0x0004  /*! thread wait mapper                       */
#define THREAD_BLOCKED_JOIN      0x0008  /*! thread blocked in join / wait exit       */
#define THREAD_BLOCKED_EXIT      0x0010  /*! thread blocked in exit / wait join       */
#define THREAD_BLOCKED_KILL      0x0020  /*! thread received kill signal              */
#define THREAD_BLOCKED_SEM       0x0040  /*! thread wait semaphore                    */
#define THREAD_BLOCKED_PAGE      0x0080  /*! thread wait page access                  */
#define THREAD_BLOCKED_USERSYNC  0x0100  /*! thread wait POSIX (cond/mutex/barrier)   */
#define THREAD_BLOCKED_RPC       0x0200  /*! thread wait RPC completion               */

#define THREAD_BLOCKED_DEV_QUEUE 0x2000  /*! thread DEV wait queue                    */
#define THREAD_BLOCKED_DEV_ISR   0x4000  /*! thread DEV wait ISR                      */

/***************************************************************************************
 * This structure defines thread instrumentation informations.
 **************************************************************************************/

typedef struct thread_info_s
{
	uint32_t              pgfault_nr;    /*! cumulated number of page fault           */
	uint32_t              sched_nr;      /*! TODO ???  [AG]                           */
	uint32_t              u_err_nr;      /*! TODO ???  [AG]                           */
	uint32_t              m_err_nr;      /*! TODO ???  [AG]                           */
	uint32_t              tm_tmp;        /*! temp date to compute execution duration  */
	uint32_t              tm_exec;       /*! TODO ???  [AG]                           */
	uint32_t              tm_create;     /*! date of the creation                     */
	uint32_t              tm_born;       /*! date of the thread loading               */
	uint32_t              tm_dead;       /*! date of the death                        */
	cycle_t               tm_sleep;      /*! TODO ???  [AG]                           */
	cycle_t               tm_wait;       /*! TODO ???  [AG]                           */
	cycle_t               tm_usr;        /*! user execution duration                  */
	cycle_t               tm_sys;        /*! system execution duration                */
}
thread_info_t;

/***************************************************************************************
 * This structure defines a thread descriptor.
 * It is used for both the user threads and the kernel threads.
 * In a process, a user thread is identified by a unique TRDID (thread identifier),
 * that is returned by the kernel to the user:
 * - The TRDID 16 LSB bits contain the LTID (Local Thread Index).
 * - The TRDID 16 MSB bits contain the CXY of cluster containing the thread.
 * - The LTID is used to index the th_tbl[] array in the local process descriptor.
 * This TRDID is computed by the process_register_thread() function, when the user
 * thread is registered in the local copy of the process descriptor.
 *
 * WARNING : Don't modify the first 4 fields order, as this order is used by the
 * hal_kentry assembly code for the TSAR architecture.
 **************************************************************************************/

#define THREAD_SIGNATURE    0xDEADBEEF

typedef struct thread_s
{
	void              * cpu_context;     /*! used for context switch                  */
	void              * fpu_context;     /*! used for dynamic FPU allocation          */

	intptr_t            k_stack_base;    /*! kernel stack base address                */
	uint32_t            k_stack_size;    /*! kernel stack size (bytes)                */

	uint32_t            trdid;           /*! thread index (cxy.ltid)                  */
	thread_type_t       type;            /*! thread type                              */
	uint32_t            quantum;         /*! number of clock ticks given to thread    */
	uint32_t            ticks_nr;        /*! number of ticks used                     */
	uint32_t            time_last_check; /*! last cpu_time_stamp                      */
	core_t            * core;            /*! pointer to the owner core                */
	process_t         * process;         /*! pointer on local process descriptor      */
    xptr_t              parent;          /*! extended pointer on parent thread        */

    void              * exit_value;      /*! exit_value used in case of join          */

	uint32_t            local_locks;	 /*! number of local locks owned by thread    */
    list_entry_t        locks_root;      /*! root of local locks list                 */

    remote_spinlock_t * flags_lock;      /*! lock protecting the flags                */

	uint32_t            remote_locks;	 /*! number of local locks owned by thread    */
    xlist_entry_t       xlocks_root;     /*! root of remote locks list                */

	intptr_t            u_stack_base;    /*! user stack base address                  */
	uint32_t            u_stack_size;    /*! user stack size (bytes)                  */

    void              * entry_func;      /*! pointer on entry function                */
    void              * entry_args;      /*! pointer on entry function arguments      */

    uint32_t            flags;           /*! bit vector of flags                      */
    volatile uint32_t   blocked;         /*! bit vector of blocking causes            */
    volatile uint32_t   signals;         /*! bit vector of (KILL / SUICIDE) signals   */

	error_t             errno;           /*! errno value set by last system call      */
    uint32_t            utls;            /*! user thread local storage                */

    bool_t              fork_user;       /*! user defined placement for next fork()   */
    cxy_t               fork_cxy;        /*! target cluster  for next fork()          */

	xlist_entry_t       children_root;   /*! root of list of attached children        */
    uint32_t            children_nr;     /*! number of attached children threads      */
    remote_spinlock_t * children_lock;   /*! lock protecting the children list        */

    xlist_entry_t       brothers_list;   /*! list of attached threads to same parent  */

	list_entry_t        sched_list;      /*! member of threads attached to same core  */

    uint32_t            dev_channel;     /*! device channel for a DEV thread          */

    ioc_command_t       ioc_cmd;         /*! IOC device generic command               */
    txt_command_t       txt_cmd;         /*! TXT device generic command               */
    nic_command_t       nic_cmd;         /*! NIC device generic command               */
    mmc_command_t       mmc_cmd;         /*! MMC device generic command               */
    dma_command_t       dma_cmd;         /*! DMA device generic command               */

	cxy_t               rpc_client_cxy;  /*! client cluster index (for a RPC thread)  */

    xlist_entry_t       wait_list;       /*! member of threads blocked on same cond   */

	thread_info_t       info;            /*! embedded thread_info_t                   */

	uint32_t            signature;       /*! for kernel stack overflow detection      */
}
thread_t;

/***************************************************************************************
 * This macro returns a pointer on the calling thread from the core hardware register.
 **************************************************************************************/

#define CURRENT_THREAD  (hal_get_current_thread())

/***************************************************************************************
 * This function returns a printable string for a thread type.
 ***************************************************************************************
 * @ type    : thread type.
 * returns pointer on string.
 **************************************************************************************/
char * thread_type_str( uint32_t type );

/***************************************************************************************
 * This function allocates memory for a user thread descriptor in the local cluster,
 * and initializes it from information contained in the arguments.
 * It is used by the "pthread_create" system call.
 * The CPU context is initialized from scratch, and the "loadable" field is set.
 * The new thread is attached to the core specified in the <attr> argument.
 * It is registered in the local process descriptor specified by the <pid> argument.
 * The thread descriptor pointer is returned to allow the parent thread to register it
 * in its children list.
 * The THREAD_BLOCKED_GLOBAL bit is set => the thread must be activated to start.
 ***************************************************************************************
 * @ pid          : process identifier.
 * @ start_func   : pointer on entry function.
 * @ start_args   : pointer on function argument (can be NULL).
 * @ attr         : pointer on pthread attributes descriptor.
 * @ new_thread   : [out] address of buffer for new thread descriptor pointer.
 * @ returns 0 if success / returns ENOMEM if error.
 **************************************************************************************/
error_t thread_user_create( pid_t             pid,
                            void            * start_func,
                            void            * start_arg,
                            pthread_attr_t  * attr,
                            thread_t       ** new_thread );

/***************************************************************************************
 * This function is used by the fork() system call to create the child process main 
 * thread. It allocates memory for an user thread descriptor in the local cluster,
 * and initializes it from information contained in the calling thread descriptor.
 * The new thread is attached to the core that has the lowest load in local cluster.
 * It is registered in the child process descriptor defined by the <process> argument.
 * This new thread inherits its user stack from the parent thread, as it uses the
 * Copy-On-Write mechanism to get a private stack when required.
 * The content of the parent kernel stack is copied into the child kernel stack, as 
 * the Copy-On-Write mechanism cannot be used for kernel segments (because kernel 
 * uses physical addressing on some architectures).
 * The CPU and FPU execution contexts are created and linked to the new thread,
 * but the actual context copy is NOT done. The THREAD_BLOCKED_GLOBAL bit is set,
 * and the thread must be explicitely unblocked later to make the new thread runable.
 ***************************************************************************************
 * @ process      : local pointer on owner process descriptor.
 * @ stack_base   : user stack base address (from parent).
 * @ stack_size   : user stack size (from parent).
 * @ new_thread   : [out] address of buffer for new thread descriptor pointer.
 * @ returns 0 if success / returns ENOMEM if error.
 **************************************************************************************/
error_t thread_user_fork( process_t * process,
                          intptr_t    stack_base,
                          uint32_t    stack_size,
                          thread_t ** new_thread );

/***************************************************************************************
 * This function allocates memory for a kernel thread descriptor in the local cluster,
 * and initializes it from arguments values, calling the thread_kernel_init() function,
 * that also allocates and initializes the CPU context.
 * The THREAD_BLOCKED_GLOBAL bit is set, and the thread must be activated to start.
 ***************************************************************************************
 * @ new_thread   : address of buffer for new thread pointer.
 * @ type         : kernel thread type.
 * @ func         : pointer on function.
 * @ args         : function arguments.
 * @ core_lid     : local core index.
 * @ returns 0 if success / returns ENOMEM if error
 **************************************************************************************/
error_t thread_kernel_create( thread_t     ** new_thread,
                              thread_type_t   type,
                              void          * func,
                              void          * args,
                              lid_t           core_lid );

/***************************************************************************************
 * This function initializes an existing kernel thread descriptor from arguments values.
 * The THREAD_BLOCKED_GLOBAL bit is set, and the thread must be activated to start.
 ***************************************************************************************
 * @ thread   : pointer on existing thread descriptor.
 * @ type     : kernel thread type.
 * @ func     : pointer on function.
 * @ args     : function arguments.
 * @ core_lid : local core index.
 * @ returns 0 if success / returns EINVAL if error
 **************************************************************************************/
error_t thread_kernel_init( thread_t      * thread,
                            thread_type_t   type,
                            void          * func,
                            void          * args,
                            lid_t           core_lid );

/***************************************************************************************
 * This function releases the physical memory allocated for a thread descriptor
 * in the local cluster. It can be used for both an user and a kernel thread.
 * The physical memory dynamically allocated in the HEAP or MMAP zones by an user
 * thread will be released when the process is killed, and the page table flushed.
 ***************************************************************************************
 * @ thread  : pointer on the thread descriptor to release.
 **************************************************************************************/
void thread_destroy( thread_t * thread );

/***************************************************************************************
 * This function defines the code of the thread executed by all cores after kernel_init,
 * or when no other thread is runnable for a given core.
 *
 * TODO: In the TSAR architecture, it enters an infinite loop, in wich it forces
 * the core in sleep (low-power) mode. Any IRQ will force the core to exit this sleep
 * mode, but no ISR is executed.
 * TODO: We must analyse if we have the same behaviour for I86 architectures...
 **************************************************************************************/
void thread_idle_func();

/***************************************************************************************
 * This function registers a child thread in the global list of attached
 * children threads of a parent thread.
 * It does NOT take a lock, as this function is always called by the parent thread.
 ***************************************************************************************
 * @ parent_xp : extended pointer on the parent thread descriptor.
 * @ child_xp  : extended pointer on the child thread descriptor.
 **************************************************************************************/
void thread_child_parent_link( xptr_t  parent_xp,
                               xptr_t  child_xp );

/***************************************************************************************
 * This function removes an user thread from the parent thread global list
 * of attached children threads.
 ***************************************************************************************
 * @ parent_xp : extended pointer on the parent thread descriptor.
 * @ child_xp  : extended pointer on the child thread descriptor.
 **************************************************************************************/
void thread_child_parent_unlink( xptr_t parent_xp,
                                 xptr_t child_xp );

/***************************************************************************************
 * This function atomically sets a signal in a thread descriptor.
 ***************************************************************************************
 * @ thread    : local pointer on target thread.
 *s released all locks @ mask      : mask on selected signal.
 **************************************************************************************/
inline void thread_set_signal( thread_t * thread,
                               uint32_t   mask );

/***************************************************************************************
 * This function resets a signal in a thread descriptor.
 ***************************************************************************************
 * @ thread    : local pointer on target thread.
 * @ mask      : mask on selected signal.
 **************************************************************************************/
inline void thread_reset_signal( thread_t * thread,
                                 uint32_t   mask );

/***************************************************************************************
 * This function checks if the calling thread can deschedule.
 ***************************************************************************************
 * @ returns true if no locks taken.
 **************************************************************************************/
inline bool_t thread_can_yield();

/***************************************************************************************
 * This function implements the delayed descheduling mechanism : It is called  by 
 * all lock release functions, and calls the sched_yield() function when all locks 
 * have beeen released and the calling thread THREAD_FLAG_SCHED flag is set. 
 **************************************************************************************/
void thread_check_sched();

/***************************************************************************************
 * This function is used by the calling thread to suicide.
 * All locks must be previously released. The scenario depends on the DETACHED flag.
 * if detached :
 * 1) the calling thread sets the SIG_SUICIDE bit in the "signals" bit_vector,
 *    registers the BLOCKED_GLOBAL bit in the "blocked" bit_vector, and deschedule.
 * 2) the scheduler, detecting the SIG_SUICIDE bit, remove the thread from the
 *    scheduler list, remove the thread from its process, and destroys the thread.
 * if attached :
 * 1) the calling thread simply sets the BLOCKED_EXIT bit in the "blocked" bit vector
 *    and deschedule. 
 * 2) The SIG_KILL bit and BLOCKED_SIGNAL bits are set by the parent thread when
 *    executing the pthread_join(), and detecting the BLOCKED_EXIT bit.
 *    The scenario is a standard kill as described below.
 ***************************************************************************************
 * @ returns 0 if success / returns EINVAL if locks_count is not zero.
 **************************************************************************************/
error_t thread_exit();

/***************************************************************************************
 * This function request to kill a local target thread, with the following scenario:
 * 1. This function set the BLOCKED_GLOBAL bit in target thread "blocked" bit_vector,
 *    set the SIG_KILL bit in target thread "signals" bit_vector, and send an IPI
 *    to the target thread core to force scheduling.
 * 2. The scheduler, detecting the SIG_KILL set, removes the thread from the scheduler
 *    list, and reset the SIG_KILL bit to acknowledge the killer.
 * 3. The caller of this function, (such as the process_kill() function), must poll
 *    SIG_KILL bit until reset, detach the thread from its parent if the thread is
 *    attached, remove the thread from its process, and destroys the thread.
 * 
 * NOTE: The third step must be done by the caller to allows the process_kill()
 *       function to parallelize the work on all schedulers in a given cluster.
 ***************************************************************************************
 * @ thread   : local pointer on the target thread.
 **************************************************************************************/
void thread_kill( thread_t * thread );

/***************************************************************************************
 * This function registers a blocking cause in the target thread "blocked" bit vector.
 * Warning : this function does not deschedule the calling thread, and the descheduling
 * must be explicitely forced by a sched_yield().
 ***************************************************************************************
 * @ thread   : local pointer on target thread descriptor.
 * @ cause    : mask defining the cause (one hot).
 **************************************************************************************/
void thread_block( thread_t * thread,
                   uint32_t   cause );

/***************************************************************************************
 * This function resets the bit identified by the cause argument in the "blocked"
 * bit vector of a remote thread descriptor, using an atomic access.
 * We need an extended pointer, because the client thread of an I/O operation on a
 * given device is not in the same cluster as the associated device descriptor.
 * Warning : this function does not reschedule the remote thread.
 * The scheduling can be forced by sending an IPI to the core running the remote thread.
 ***************************************************************************************
 * @ thread   : extended pointer on the remote thread.
 * @ cause    : mask defining the cause (one hot).
 * @ return non zero if the bit-vector was actually modified / return 0 otherwise
 **************************************************************************************/
uint32_t thread_unblock( xptr_t   thread,
                         uint32_t cause );

/***************************************************************************************
 * This function updates the calling thread user_time counter, and resets the thread
 * cycles counter.
 * TODO This function is not implemented.
 ***************************************************************************************
 * @ thread   : local pointer on target thread.
 **************************************************************************************/
void thread_user_time_update( thread_t * thread );

/**************************************************************************************n
 * This function updates the calling thread kernel_time counter, and resets the thread
 * cycles counter.
 * TODO This function is not implemented.
 ***************************************************************************************
 * @ thread   : local pointer on target thread.
 **************************************************************************************/
void thread_kernel_time_update( thread_t * thread );

/***************************************************************************************
 * This function handles all pending signals for the thread identified by the <thread>
 * argument. It is called each time the core exits the kernel, after handling an
 * interrupt, exception or syscall.
 * TODO This function is not implemented.
 ***************************************************************************************
 * @ thread   : local pointer on target thread.
 **************************************************************************************/
void thread_signals_handle( thread_t * thread );

/***************************************************************************************
 * This function returns the extended pointer on a thread descriptor identified
 * by its thread identifier, and process identifier.
 * It can be called by any thread running in any cluster.
 ***************************************************************************************
 * @ pid     : process identifier.
 * @ trdid   : thread identifier.
 * @ return the extended pointer if thread found / return XPTR_NULL if not found.
 **************************************************************************************/
xptr_t thread_get_xptr( pid_t    pid,
                        trdid_t  trdid );


#endif	/* _THREAD_H_ */
