/* $NetBSD: $ */

/*-
  * Copyright (c) 2009 UPMC/LIP6
  * All rights reserved.
  * This software is distributed under the following condiions
  * compliant with the NetBSD foundation policy.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: autoconf.c,v 1.71 2007/12/03 15:34:09 ad Exp $");

#include "opt_cputype.h"
#include "opt_soclib.h"
#include "opt_ddb.h"
#include "ksyms.h"
#include "opt_multiprocessor.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/atomic.h> 
#include <sys/conf.h>
#include <sys/reboot.h>
#include <sys/device.h>
#include <sys/mbuf.h>
#include <sys/msgbuf.h>
#include <sys/cpu.h>
#include <sys/kcore.h>
#include <sys/exec.h>
#include <sys/proc.h>
#include <sys/lwp.h>
#include <sys/savar.h>
#include <sys/ucontext.h>
#include <sys/user.h>

#include <sys/ras.h>
#include <sys/sysctl.h>
#include <sys/ksyms.h>
#include <sys/bitops.h>

#include <machine/autoconf.h>
#include <machine/bootinfo.h>
#include <machine/pte.h>
#include <machine/vcache.h>
#include <machine/locore.h>

#include <mips/cache.h>
#include <mips/psl.h>
#include <mips/userret.h>
#include <mips/frame.h>

#if NKSYMS || defined(DDB) || defined(LKM)
#include <machine/db_machdep.h>
#include <ddb/db_extern.h>
#endif

#include <uvm/uvm_extern.h>

void tsar_icache_sync_all(void);
void tsar_icache_sync_range(vaddr_t, vsize_t);
void tsar_icache_sync_range_index(vaddr_t, vsize_t);
void tsar_pdcache_wbinv_all(void);
void tsar_pdcache_wbinv_range(vaddr_t, vsize_t);
void tsar_pdcache_wbinv_range_index(vaddr_t, vsize_t);
void tsar_pdcache_inv_range(vaddr_t, vsize_t);
void tsar_pdcache_wb_range(vaddr_t, vsize_t);

struct mips_cache_ops mips_cache_ops = {
	.mco_icache_sync_all = tsar_icache_sync_all,
	.mco_icache_sync_range = tsar_icache_sync_range,
	.mco_icache_sync_range_index = tsar_icache_sync_range_index,
	.mco_pdcache_wbinv_all = tsar_pdcache_wbinv_all,
	.mco_pdcache_wbinv_range = tsar_pdcache_wbinv_range,
	.mco_pdcache_wbinv_range_index = tsar_pdcache_wbinv_range_index,
	.mco_pdcache_inv_range = tsar_pdcache_inv_range,
	.mco_pdcache_wb_range = tsar_pdcache_wb_range
};
	

void mach_init(void);

int physmem;
struct vm_map *mb_map = NULL;
struct vm_map *phys_map = NULL;

paddr_t avail_end;	/* PA of last available physical page */
int mem_cluster_cnt;
phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX];

struct cpu_info cpu_info_store
#ifdef MULTIPROCESSOR
	__section(".data1")
	__aligned(1LU << ilog2((2*sizeof(struct cpu_info)-1)))
#endif
    = {
	.ci_curlwp = &lwp0,
#ifndef NOFPU
	.ci_fpcurlwp = &lwp0,
#endif
#ifdef MULTIPROCESSOR
	.ci_flags = CPUF_PRIMARY|CPUF_PRESENT|CPUF_RUNNING,
#endif
}; /* boot CPU info */

volatile uint32_t cpus_running[BOOTINFO_NCPUS_MAX / 32];

struct btinfo_cpus *bi_cpus;

vaddr_t cp0_ebase;

void
mach_init()
{
	vaddr_t msgb;
	vaddr_t vaend;
	vaddr_t fdt_va;
	paddr_t paend, ram_start;
	struct btinfo_magic *bi_magic;
	struct btinfo_symtab *bi_sym;
	struct btinfo_fdt *bi_fdt;
	extern char kernel_text[], end[];
	int err;

	mttycn_attach();
	printf("mach_init\n");

	/* look up informations from boot loader */
	bi_magic = lookup_bootinfo(BTINFO_MAGIC, bootinfo);
	if (bi_magic && bi_magic->magic == BOOTINFO_MAGIC) {
		bi_sym = lookup_bootinfo(BTINFO_SYMTAB, bootinfo);
		bi_cpus = lookup_bootinfo(BTINFO_CPUS, bootinfo);
		bi_fdt = lookup_bootinfo(BTINFO_FDT, bootinfo);
	} else {
		panic("no bootinfo");
	}
	if (bi_fdt == NULL)
		panic("no FDT provided");
	printf("fdt @0x%lx, size 0x%lx\n", (u_long)bi_fdt->fdt_physaddr,
	    (u_long)bi_fdt->fdt_size);

	vaend = mips_round_page((vaddr_t)end);

#if NKSYM > 0 || defined(DDB) || defined(LKM)
	if (bi_sym)
		vaend = mips_round_page(bi_sym->esym + KERNBASE);
#endif
	paend = vaend - KERNBASE;
	printf("vaend 0x%lx paend 0x%lx\n", vaend, (u_long)paend);
	pmap_bootstrap(&vaend, &msgb, &paend,
	    &fdt_va, bi_fdt->fdt_physaddr, bi_fdt->fdt_size);
	printf("vaend now 0x%lx paend 0x%lx\n", vaend, (u_long)paend);
	printf("uvm_setpagesize");
	uvm_setpagesize();
#if NKSYMS || defined(DDB) || defined(LKM)
	if (bi_sym) {
		printf(" load symyab");
		ksyms_init(bi_sym->nsym, (char *)(bi_sym->ssym + KERNBASE),
		    (char *)(bi_sym->esym + KERNBASE));
	}
#endif
	printf(" done\n");
	/*
	 * Copy exception-dispatch code down to exception vector.
	 * Initialize locore-function vector.
	 */
	printf("mips_vector_init");
	mips_vector_init();
	printf(" done\n");

	boothowto = RB_SINGLE | AB_DEBUG;

#if 0 /* NKSYMS || defined(DDB) || defined(LKM) XXX */
	/* init symbols if present */
	if (esym)
		ksyms_init((char *)esym - (char *)ssym, ssym, esym);
#endif
	
	err = fdt_parse_header((void *)fdt_va, fdt);
	if (err != 0) {
		printf("fdt_parse_header failed: %d\n", err);
		panic("fdt_parse_header");
	}

	mem_cluster_cnt = 0;
	if (SOCLIB_RAM_START == 0)
		ram_start = PAGE_SIZE;
	else
		ram_start = SOCLIB_RAM_START;
	if (SOCLIB_RAM_START < (vaddr_t)kernel_text - KERNBASE) {
		mem_clusters[mem_cluster_cnt].start = SOCLIB_RAM_START;
		mem_clusters[mem_cluster_cnt].size =
		    ((vaddr_t)kernel_text - KERNBASE - SOCLIB_RAM_START);
		printf("mem_clusters[%d] 0x%" PRIx32 " 0x%" PRIx32 "\n",
		    mem_cluster_cnt, (u_int)mem_clusters[mem_cluster_cnt].start,
		    (u_int)mem_clusters[mem_cluster_cnt].size);
		uvm_page_physload(atop(SOCLIB_RAM_START),
		    atop((vaddr_t)kernel_text - KERNBASE),
		    atop(ram_start),
		    atop((vaddr_t)kernel_text - KERNBASE),
		    VM_FREELIST_DEFAULT);
		mem_cluster_cnt++;
	}
	if (SOCLIB_RAM_END > ((vaddr_t)kernel_text - KERNBASE)) {
		mem_clusters[mem_cluster_cnt].start = paend;
		mem_clusters[mem_cluster_cnt].size =
		    SOCLIB_RAM_END - paend;
		printf("mem_clusters[%d] 0x%" PRIx32 " 0x%" PRIx32 "\n",
		    mem_cluster_cnt, (u_int)mem_clusters[mem_cluster_cnt].start,
		    (u_int)mem_clusters[mem_cluster_cnt].size);
		uvm_page_physload(atop((vaddr_t)kernel_text - KERNBASE),
		    atop(SOCLIB_RAM_END),
		    atop(paend),
		    atop(SOCLIB_RAM_END),
		    VM_FREELIST_DEFAULT);
		mem_cluster_cnt++;
	}

	physmem = (SOCLIB_RAM_END - SOCLIB_RAM_START) / PAGE_SIZE;
	avail_end = mips_trunc_page(SOCLIB_RAM_END - 1);

	/* initialise kernel message buffer */
	printf("initmsgbuf");
	initmsgbuf((void *)msgb, round_page(MSGBUFSIZE));
	printf(" done\n");
}

void *
lookup_bootinfo(int type, char *bootinfop)
{
	struct btinfo_common *bt;
	char *bi;

	bi = bootinfop;
	do {
		bt = (struct btinfo_common *)bi;
		//printf("bi %p %d %d\n", bi, bt->type, bt->next);
		if (bt->type == type)
			return (void *)bi;
		bi += bt->next;
	} while (bt->next != 0 &&
	    (vaddr_t)bi < (vaddr_t)bootinfop + BOOTINFO_SIZE);
	return NULL;
}

uint32_t loockup_bootinfo_cpus(char *);

uint32_t
loockup_bootinfo_cpus(char *bootinfop)
{
	struct btinfo_magic *bi_magic;
	struct btinfo_cpus *cpus;
	bi_magic = lookup_bootinfo(BTINFO_MAGIC, bootinfop);
	if (bi_magic && bi_magic->magic == BOOTINFO_MAGIC) {
		cpus = lookup_bootinfo(BTINFO_CPUS, bootinfop);
		if (cpus != NULL)
			return cpus->cpu_vectors;
	}
	return 0xffffffff;
}

void
consinit(void) {
	mttycn_attach();
}

void
cpu_initclocks()
{
}

void setstatclockrate(int r)
{
}

void
cpu_startup()
{
	vaddr_t minaddr, maxaddr;
	char pbuf[9];
	/*
	 * Allocate a submap for physio
	 */
	phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
	    VM_PHYS_SIZE, 0, false, NULL);

	/*
	 * allocate mbuf cluster submap.
	 */
	mb_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
	    nmbclusters * mclbytes, VM_MAP_INTRSAFE, false, NULL);

	/* say hello to the world */
	printf("%s%s", copyright, version);

	format_bytes(pbuf, sizeof(pbuf), physmem << PAGE_SHIFT);
	printf("total memory = %s\n", pbuf);
	format_bytes(pbuf, sizeof(pbuf), ptoa(uvmexp.free));
	printf("avail memory = %s\n", pbuf);
}

void
cpu_reboot(howto, bootstr)
        volatile int howto;     /* XXX volatile to keep gcc happy */
	char *bootstr;
{
	/* XXX todo */
	printf("%s\n\n", ((howto & RB_HALT) != 0) ? "halted." : "rebooting...");
	while (1);
	/*NOTREACHED*/
}


void
tsar_icache_sync_all(void)
{
}

void
tsar_icache_sync_range(vaddr_t va, vsize_t size)
{
}

void
tsar_icache_sync_range_index(vaddr_t va, vsize_t size)
{
}

void
tsar_pdcache_wbinv_all(void)
{
}

void
tsar_pdcache_wbinv_range(vaddr_t va, vsize_t size)
{
}

void
tsar_pdcache_wbinv_range_index(vaddr_t va, vsize_t size)
{
}

void
tsar_pdcache_inv_range(vaddr_t va, vsize_t size)
{
}

void
tsar_pdcache_wb_range(vaddr_t va, vsize_t size)
{
	wbflush();
}

void
tsar_inval_icache_parange(paddr_t pa, size_t sz)
{
#ifdef VCACHE
	paddr_t end = pa + sz;
	int lnsz = curcpu()->ci_l1_cls;

	pa = pa & ~((paddr_t)lnsz -1);
	while (pa < end) {
		tsar_vcache_write(VC_DATA_H, pa >> 32);
		tsar_vcache_write(VC_DATA_L, pa & 0xffffffff);
		tsar_vcache_write(VC_ICACHE_INVAL_PA, 0);
		pa += lnsz;
	}
#endif
}

void
tsar_inval_dcache_parange(paddr_t pa, size_t sz)
{
#ifdef VCACHE
	paddr_t end = pa + sz;
	int lnsz = curcpu()->ci_l1_cls;

	pa = pa & ~((paddr_t)lnsz -1);
	while (pa < end) {
		tsar_vcache_write(VC_DATA_H, pa >> 32);
		tsar_vcache_write(VC_DATA_L, pa & 0xffffffff);
		tsar_vcache_write(VC_DCACHE_INVAL_PA, 0);
		pa += lnsz;
	}
#endif
}

void
tsar_inval_caches_parange(paddr_t pa, size_t sz)
{
#ifdef VCACHE
	paddr_t end = pa + sz;
	int lnsz = curcpu()->ci_l1_cls;

	pa = pa & ~((paddr_t)lnsz -1);
	while (pa < end) {
		tsar_vcache_write(VC_DATA_H, pa >> 32);
		tsar_vcache_write(VC_DATA_L, pa & 0xffffffff);
		tsar_vcache_write(VC_ICACHE_INVAL_PA, 0);
		tsar_vcache_write(VC_DCACHE_INVAL_PA, 0);
		pa += lnsz;
	}
#endif
}

extern long	*mips32_locoresw[];

long *mips_locoresw[4];

int cpu_arch;
int cpu_mhz;
int mips_cpu_flags;
int mips_has_llsc;

/* the following is used externally (sysctl_hw) */
char	machine[] = MACHINE;		/* from <machine/param.h> */
char	machine_arch[] = MACHINE_ARCH;	/* from <machine/param.h> */
char	cpu_model[128];

/*
 * Initialize the hardware exception vectors, and the jump table used to
 * call locore cache and TLB management functions, based on the kind
 * of CPU the kernel is running on.
 */
void
mips_vector_init(void)
{
	/* r4000 exception handler address and end */
	extern char mips32_exception[], mips32_exceptionEnd[];
	/* MIPS32/MIPS64 interrupt exception handler */
	extern char mips32_intr[], mips32_intrEnd[];

	/*
	 * XXX Set-up curlwp/curcpu again.  They may have been clobbered
	 * beween verylocore and here.
	 */
	cpu_info_store.ci_cpuid = mips_cp0_ebase_read() & 0x1ff;
	lwp0.l_cpu = &cpu_info_store;
	curlwp = &lwp0;
	cpus_running[0] = 1; /* curcpu()->cpu_index is running */

	cpu_arch = CPU_ARCH_MIPS32;

	/*
	 * Check CPU-specific flags.
	 */
	mips_cpu_flags = CPU_MIPS_USE_WAIT;
	mips_has_llsc = 1;

	/*
	 * Now initialize our ISA-dependent function vector.
	 */

	printf(" ebase");
	/* write ebase register */
	mips_cp0_ebase_write(cp0_ebase);
	cp0_ebase = mips_cp0_ebase_read() & (~0xfff);
	printf("=0x%x", (uint32_t)cp0_ebase);

	/*
	 * Copy down exception vector code.
	 */

	printf(" memcpy1");
	if (mips32_exceptionEnd - mips32_exception > 0x80)
		panic("startup: General exception vector code too large");
	memcpy((void *)(cp0_ebase + (MIPS3_GEN_EXC_VEC & PAGE_MASK)),
	    mips32_exception,
	    mips32_exceptionEnd - mips32_exception);
	printf(" memcpy2");
	if (mips32_intrEnd - mips32_intr > 0x80)
		panic("startup: interrupt exception vector code too large");
#if 0	/* XXX - why doesn't mipsNN_intr() work? */
	memcpy((void *)MIPS3_INTR_EXC_VEC, mips32_intr,
	      mips32_intrEnd - mips32_intr);
#else
	memcpy((void *)(cp0_ebase + (MIPS3_INTR_EXC_VEC & PAGE_MASK)),
	    mips32_exception,
	    mips32_exceptionEnd - mips32_exception);
#endif
	printf(" mips_icache_sync_all");
	mips_icache_sync_all();
	printf(" mips_dcache_wbinv_all");
	mips_dcache_wbinv_all();

	/* Clear BEV in SR so we start handling our own exceptions */
	printf(" mips_cp0_status_write");
	mips_cp0_status_write(mips_cp0_status_read() & ~MIPS_SR_BEV);
	printf(" done\n");
	memcpy(mips_locoresw, mips32_locoresw, sizeof(mips_locoresw));

	CPU_IDLE = (long *)mips_wait_idle;
}

/*
 * Set registers on exec.
 * Clear all registers except sp, pc, and t9.
 * $sp is set to the stack pointer passed in.  $pc is set to the entry
 * point given by the exec_package passed in, as is $t9 (used for PIC
 * code by the MIPS elf abi).
 */
void
setregs(l, pack, stack)
	struct lwp *l;
	struct exec_package *pack;
	u_long stack;
{
	struct frame *f = (struct frame *)l->l_md.md_regs;

	memset(f, 0, sizeof(struct frame));
	f->f_regs[_R_SP] = (int)stack;
	f->f_regs[_R_PC] = (int)pack->ep_entry & ~3;
	f->f_regs[_R_T9] = (int)pack->ep_entry & ~3; /* abicall requirement */
	f->f_regs[_R_SR] = PSL_USERSET;
	/*
	 * Set up arguments for _start():
	 *	_start(stack, obj, cleanup, ps_strings);
	 *
	 * Notes:
	 *	- obj and cleanup are the auxiliary and termination
	 *	  vectors.  They are fixed up by ld.elf_so.
	 *	- ps_strings is a NetBSD extension.
	 */
	f->f_regs[_R_A0] = (uintptr_t)stack;
	f->f_regs[_R_A1] = 0;
	f->f_regs[_R_A2] = 0;
	f->f_regs[_R_A3] = (intptr_t)l->l_proc->p_psstr;

	if ((l->l_md.md_flags & MDP_FPUSED) && l == fpcurlwp)
		fpcurlwp = NULL;
	memset(&l->l_addr->u_pcb.pcb_fpregs, 0, sizeof(struct fpreg));
	l->l_md.md_flags &= ~MDP_FPUSED;
	l->l_md.md_ss_addr = 0;
}

SYSCTL_SETUP(sysctl_machdep_setup, "sysctl machdep subtree setup")
{

	sysctl_createv(clog, 0, NULL, NULL,
		       CTLFLAG_PERMANENT,
		       CTLTYPE_NODE, "machdep", NULL,
		       NULL, 0, NULL, 0,
		       CTL_MACHDEP, CTL_EOL);

	sysctl_createv(clog, 0, NULL, NULL,
		       CTLFLAG_PERMANENT,
		       CTLTYPE_STRUCT, "console_device", NULL,
		       sysctl_consdev, 0, NULL, sizeof(dev_t),
		       CTL_MACHDEP, CPU_CONSDEV, CTL_EOL);
#ifdef __HAVE_BOOTINFO_H
	sysctl_createv(clog, 0, NULL, NULL,
		       CTLFLAG_PERMANENT,
		       CTLTYPE_STRING, "booted_kernel", NULL,
		       sysctl_machdep_booted_kernel, 0, NULL, 0,
		       CTL_MACHDEP, CPU_BOOTED_KERNEL, CTL_EOL);
#endif
	sysctl_createv(clog, 0, NULL, NULL,
		       CTLFLAG_PERMANENT,
		       CTLTYPE_STRING, "root_device", NULL,
		       sysctl_root_device, 0, NULL, 0,
		       CTL_MACHDEP, CPU_ROOT_DEVICE, CTL_EOL);
	sysctl_createv(clog, 0, NULL, NULL,
		       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "llsc", NULL,
                       NULL, MIPS_HAS_LLSC, NULL, 0,
                       CTL_MACHDEP, CPU_LLSC, CTL_EOL);
}

/*
 * These variables are needed by /sbin/savecore.
 */
u_int32_t dumpmag = 0x8fca0101;	/* magic number */
int	dumpsize = 0;		/* pages */
long	dumplo = 0;		/* blocks */

struct user dumppcb;		/* Actually, struct pcb would do. */

/*
 * This is called by main to set dumplo and dumpsize.
 * Dumps always skip the first CLBYTES of disk space
 * in case there might be a disk label stored there.
 * If there is extra space, put dump at the end to
 * reduce the chance that swapping trashes it.
 */
void
cpu_dumpconf(void)
{
	/* XXX notyer */
}

/*
 * Dump the kernel's image to the swap partition.
 */
#define	BYTES_PER_DUMP	PAGE_SIZE

void
dumpsys(void)
{
	/* XXX notyet */
}

void
savefpregs(l)
	struct lwp *l;
{
#ifndef NOFPU
	u_int32_t status, fpcsr, *fp;
	struct frame *f;

	if (l == NULL)
		return;
	/*
	 * turnoff interrupts enabling CP1 to read FPCSR register.
	 */
	__asm volatile (
		".set noreorder					\n\t"
		".set noat					\n\t"
		"mfc0	%0, $" ___STRING(MIPS_COP_0_STATUS) "	\n\t"
		"li	$1, %2					\n\t"
		"mtc0	$1, $" ___STRING(MIPS_COP_0_STATUS) "	\n\t"
		___STRING(COP0_HAZARD_FPUENABLE)
		"cfc1	%1, $31					\n\t"
		"cfc1	%1, $31					\n\t"
		".set reorder					\n\t"
		".set at" 
		: "=r" (status), "=r"(fpcsr) : "i"(MIPS_SR_COP_1_BIT));
	/*
	 * this process yielded FPA.
	 */
	f = (struct frame *)l->l_md.md_regs;
	f->f_regs[_R_SR] &= ~MIPS_SR_COP_1_BIT;

	/*
	 * save FPCSR and 32bit FP register values.
	 */
	fp = (int *)l->l_addr->u_pcb.pcb_fpregs.r_regs;
	fp[32] = fpcsr;
	__asm volatile (
		".set noreorder		;"
		"swc1	$f0, 0(%0)	;"
		"swc1	$f1, 4(%0)	;"
		"swc1	$f2, 8(%0)	;"
		"swc1	$f3, 12(%0)	;"
		"swc1	$f4, 16(%0)	;"
		"swc1	$f5, 20(%0)	;"
		"swc1	$f6, 24(%0)	;"
		"swc1	$f7, 28(%0)	;"
		"swc1	$f8, 32(%0)	;"
		"swc1	$f9, 36(%0)	;"
		"swc1	$f10, 40(%0)	;"
		"swc1	$f11, 44(%0)	;"
		"swc1	$f12, 48(%0)	;"
		"swc1	$f13, 52(%0)	;"
		"swc1	$f14, 56(%0)	;"
		"swc1	$f15, 60(%0)	;"
		"swc1	$f16, 64(%0)	;"
		"swc1	$f17, 68(%0)	;"
		"swc1	$f18, 72(%0)	;"
		"swc1	$f19, 76(%0)	;"
		"swc1	$f20, 80(%0)	;"
		"swc1	$f21, 84(%0)	;"
		"swc1	$f22, 88(%0)	;"
		"swc1	$f23, 92(%0)	;"
		"swc1	$f24, 96(%0)	;"
		"swc1	$f25, 100(%0)	;"
		"swc1	$f26, 104(%0)	;"
		"swc1	$f27, 108(%0)	;"
		"swc1	$f28, 112(%0)	;"
		"swc1	$f29, 116(%0)	;"
		"swc1	$f30, 120(%0)	;"
		"swc1	$f31, 124(%0)	;"
		".set reorder" :: "r"(fp));
	/*
	 * stop CP1, enable interrupts.
	 */
	__asm volatile ("mtc0 %0, $" ___STRING(MIPS_COP_0_STATUS)
	    :: "r"(status));
#endif
}

void
loadfpregs(l)
	struct lwp *l;
{
#ifndef NOFPU
	u_int32_t status, *fp;
	struct frame *f;

	if (l == NULL)
		panic("loading fpregs for NULL proc");

	/*
	 * turnoff interrupts enabling CP1 to load FP registers.
	 */
	__asm volatile(
		".set noreorder					\n\t"
		".set noat					\n\t"
		"mfc0	%0, $" ___STRING(MIPS_COP_0_STATUS) "	\n\t"
		"li	$1, %1					\n\t"
		"mtc0	$1, $" ___STRING(MIPS_COP_0_STATUS) "	\n\t"
		___STRING(COP0_HAZARD_FPUENABLE)
		".set reorder					\n\t"
		".set at" : "=r"(status) : "i"(MIPS_SR_COP_1_BIT));

	f = (struct frame *)l->l_md.md_regs;
	fp = (int *)l->l_addr->u_pcb.pcb_fpregs.r_regs;
	/*
	 * load 32bit FP registers and establish processes' FP context.
	 */
	__asm volatile(
		".set noreorder		;"
		"lwc1	$f0, 0(%0)	;"
		"lwc1	$f1, 4(%0)	;"
		"lwc1	$f2, 8(%0)	;"
		"lwc1	$f3, 12(%0)	;"
		"lwc1	$f4, 16(%0)	;"
		"lwc1	$f5, 20(%0)	;"
		"lwc1	$f6, 24(%0)	;"
		"lwc1	$f7, 28(%0)	;"
		"lwc1	$f8, 32(%0)	;"
		"lwc1	$f9, 36(%0)	;"
		"lwc1	$f10, 40(%0)	;"
		"lwc1	$f11, 44(%0)	;"
		"lwc1	$f12, 48(%0)	;"
		"lwc1	$f13, 52(%0)	;"
		"lwc1	$f14, 56(%0)	;"
		"lwc1	$f15, 60(%0)	;"
		"lwc1	$f16, 64(%0)	;"
		"lwc1	$f17, 68(%0)	;"
		"lwc1	$f18, 72(%0)	;"
		"lwc1	$f19, 76(%0)	;"
		"lwc1	$f20, 80(%0)	;"
		"lwc1	$f21, 84(%0)	;"
		"lwc1	$f22, 88(%0)	;"
		"lwc1	$f23, 92(%0)	;"
		"lwc1	$f24, 96(%0)	;"
		"lwc1	$f25, 100(%0)	;"
		"lwc1	$f26, 104(%0)	;"
		"lwc1	$f27, 108(%0)	;"
		"lwc1	$f28, 112(%0)	;"
		"lwc1	$f29, 116(%0)	;"
		"lwc1	$f30, 120(%0)	;"
		"lwc1	$f31, 124(%0)	;"
		".set reorder" :: "r"(fp));
	/*
	 * load FPCSR and stop CP1 again while enabling interrupts.
	 */
	__asm volatile(
		".set noreorder					\n\t"
		".set noat					\n\t"
		"ctc1	%0, $31					\n\t"
		"mtc0	%1, $" ___STRING(MIPS_COP_0_STATUS) "	\n\t"
		".set reorder					\n\t"
		".set at"
		:: "r"(fp[32] &~ MIPS_FPU_EXCEPTION_BITS), "r"(status));
#endif
}

/* 
 * Start a new LWP
 */
void
startlwp(arg)
	void *arg;
{
	int err;
	ucontext_t *uc = arg;
	struct lwp *l = curlwp;

	err = cpu_setmcontext(l, &uc->uc_mcontext, uc->uc_flags);
#if DIAGNOSTIC
	if (err) {
		printf("Error %d from cpu_setmcontext.", err);
	}
#endif
	pool_put(&lwp_uc_pool, uc);

	userret(l);
}

/*
 * XXX This is a terrible name.
 */
void
upcallret(struct lwp *l)
{
	userret(l);
}

void 
cpu_upcall(struct lwp *l, int type, int nevents, int ninterrupted,
    void *sas, void *ap, void *sp, sa_upcall_t upcall)
{
	struct saframe *sf, frame;
	struct frame *f;

	f = (struct frame *)l->l_md.md_regs;

#if 0 /* First 4 args in regs (see below). */
	frame.sa_type = type;
	frame.sa_sas = sas;
	frame.sa_events = nevents;
	frame.sa_interrupted = ninterrupted;
#endif
	frame.sa_arg = ap;
	frame.sa_upcall = upcall;

	sf = (struct saframe *)sp - 1;
	if (copyout(&frame, sf, sizeof(frame)) != 0) {
		/* Copying onto the stack didn't work. Die. */
		mutex_enter(l->l_proc->p_lock);
		sigexit(l, SIGILL);
		/* NOTREACHED */
	}

	f->f_regs[_R_PC] = (uintptr_t)upcall;
	f->f_regs[_R_SP] = (uintptr_t)sf;
	f->f_regs[_R_A0] = type;
	f->f_regs[_R_A1] = (uintptr_t)sas;
	f->f_regs[_R_A2] = nevents;
	f->f_regs[_R_A3] = ninterrupted;
	f->f_regs[_R_S8] = 0;
	f->f_regs[_R_RA] = 0;
	f->f_regs[_R_T9] = (uintptr_t)upcall;  /* t9=Upcall function*/
}


void
cpu_getmcontext(l, mcp, flags)
	struct lwp *l;
	mcontext_t *mcp;
	unsigned int *flags;
{
	const struct frame *f = (struct frame *)l->l_md.md_regs;
	__greg_t *gr = mcp->__gregs;
	__greg_t ras_pc;

	/* Save register context. Dont copy R0 - it is always 0 */
	memcpy(&gr[_REG_AT], &f->f_regs[_R_AST], sizeof(mips_reg_t) * 31);

	gr[_REG_MDLO]  = f->f_regs[_R_MULLO];
	gr[_REG_MDHI]  = f->f_regs[_R_MULHI];
	gr[_REG_CAUSE] = f->f_regs[_R_CAUSE];
	gr[_REG_EPC]   = f->f_regs[_R_PC];
	gr[_REG_SR]    = f->f_regs[_R_SR];

	if ((ras_pc = (__greg_t)ras_lookup(l->l_proc,
	    (void *) gr[_REG_EPC])) != -1)
		gr[_REG_EPC] = ras_pc;

	*flags |= _UC_CPU;

	/* Save floating point register context, if any. */
	if (l->l_md.md_flags & MDP_FPUSED) {
		/*
		 * If this process is the current FP owner, dump its
		 * context to the PCB first.
		 */
		if (l == fpcurlwp)
			savefpregs(l);

		/*
		 * The PCB FP regs struct includes the FP CSR, so use the
		 * size of __fpregs.__fp_r when copying.
		 */
		memcpy(&mcp->__fpregs.__fp_r,
		    &l->l_addr->u_pcb.pcb_fpregs.r_regs,
		    sizeof(mcp->__fpregs.__fp_r));
		mcp->__fpregs.__fp_csr = l->l_addr->u_pcb.pcb_fpregs.r_regs[32];
		*flags |= _UC_FPU;
	}
}

int
cpu_setmcontext(l, mcp, flags)
	struct lwp *l;
	const mcontext_t *mcp;
	unsigned int flags;
{
	struct frame *f = (struct frame *)l->l_md.md_regs;
	const __greg_t *gr = mcp->__gregs;
	struct proc *p = l->l_proc;

	/* Restore register context, if any. */
	if (flags & _UC_CPU) {
		/* Save register context. */
		/* XXX:  Do we validate the addresses?? */
		memcpy(&f->f_regs[_R_AST], &gr[_REG_AT],
		       sizeof(mips_reg_t) * 31);

		f->f_regs[_R_MULLO] = gr[_REG_MDLO];
		f->f_regs[_R_MULHI] = gr[_REG_MDHI];
		f->f_regs[_R_CAUSE] = gr[_REG_CAUSE];
		f->f_regs[_R_PC]    = gr[_REG_EPC];
		/* Do not restore SR. */
	}

	/* Restore floating point register context, if any. */
	if (flags & _UC_FPU) {
		/* Disable the FPU to fault in FP registers. */
		f->f_regs[_R_SR] &= ~MIPS_SR_COP_1_BIT;
		if (l == fpcurlwp)
			fpcurlwp = NULL;

		/*
		 * The PCB FP regs struct includes the FP CSR, so use the
		 * size of __fpregs.__fp_r when copying.
		 */
		memcpy(&l->l_addr->u_pcb.pcb_fpregs.r_regs,
		    &mcp->__fpregs.__fp_r, sizeof(mcp->__fpregs.__fp_r));
		l->l_addr->u_pcb.pcb_fpregs.r_regs[32] = mcp->__fpregs.__fp_csr;
	}

	mutex_enter(p->p_lock);
	if (flags & _UC_SETSTACK)
		l->l_sigstk.ss_flags |= SS_ONSTACK;
	if (flags & _UC_CLRSTACK)
		l->l_sigstk.ss_flags &= ~SS_ONSTACK;
	mutex_exit(p->p_lock);

	return (0);
}

void
cpu_need_resched(struct cpu_info *ci, int flags)
{

	aston(ci->ci_data.cpu_onproc);
	ci->ci_want_resched = 1;
}

void
cpu_idle(void)
{
	void (*mach_idle)(void) = (void (*)(void))CPU_IDLE;

	while (!curcpu()->ci_want_resched)
		(*mach_idle)();
}

bool
cpu_intr_p(void)
{

	return curcpu()->ci_idepth != 0;
}

