/*
 * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il)
 *
 * Permission to use this software is hereby granted under the terms of the
 * GNU General Public License, as published by the Free Software Foundation.
 *
 * THIS  SOFTWARE  IS  PROVIDED IN ITS  "AS IS"  CONDITION, WITH NO WARRANTY
 * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING
 * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED.
 */
/*
 * Author(s): Amnon Shiloh.
 */

#ifndef _LINUX_MOSIXTASK_H
#define _LINUX_MOSIXTASK_H

#include <mos/defs.h>
#include <mos/comm.h>
#include <mos/debug.h>

typedef long long now_t;
#ifdef CONFIG_MOSIX_DFSA
struct dfsatab;
#endif /* CONFIG_MOSIX_DFSA */

#ifdef CONFIG_MOSIX_FS
struct dentry;
struct mfs_stats;
#endif /* CONFIG_MOSIX_FS */

struct opcost
{
	int page, syscall, out, outkb, in, inkb;
#ifdef CONFIG_MOSIX_TOPOLOGY
	int first, last;
#endif /* CONFIG_MOSIX_TOPOLOGY */
};

struct mosix_task
{
	volatile uint32_t dflags;	/* distributed (MOSIX) flags */
	atomic_t dreqs;			/* bits that others may request */
	/* following chars/bits should be in dflags, but modified within
	 * interrupt code... if only the compiler generated single-instruction
	 * codes for "|=" and "&=~" (but it doesn't) */
	volatile char commpri;		/* prioririty for MOSIX communication */
	volatile char ignoreoldsigs;	/* deliberately cleared ->sigpending */
	volatile char catchme;		/* catch for debugging */

	char hsleep_count;		/* count on DHEAVYSLEEP */
	volatile int whereto;		/* migration request */
	int lock_depth;			/* of the mosix_flag */
	volatile uint32_t stay;		/* reasons why process must stay */
	struct prequest *prequest;	/* requests to this task */
	int whereami;			/* where the process is */
	int deppe;			/* where the process came from */
	volatile long bstate;		/* backed-up state while in MOSIX */
	rwlock_t state_lock;		/* changes of bstate */
	kernel_cap_t remote_caps;	/* effective capabilities on REMOTE */
	struct held_files
	{
		struct file *f;		/* a file in use */
		char denywrite;		/* whether holding i_writecount down */
	} *held_files;			/* files held by remote VM */
	int held_allocated;		/* # of entries in "held_inodes" */
	struct mosix_link *contact;	/* DEPUTY <==> REMOTE connection */
	struct task_struct *ancesstor;	/* nearest ancesstor when dependent */
	uint32_t deputytime;		/* ticks spent on DEPUTY */
	long last_sigxcpu;		/* last [prof] time SIGXCPU was sent */
	int64_t passedtime;		/* time already passed to DEPUTY */
	int64_t uttime;		/* time to add(REMOTE)/subtract(LOCAL) to/from
				 * times->tms_utime for statistic purposes */
	int64_t dctime;		/* decayed user-time (ms) */
	int64_t cutime;		/* cummulative dependent-child user-time (ms) */
	int64_t pagetime;	/* time waiting for free pages (ms) */
	int64_t ndemandpages;	/* statistical # of demand pages */
	int64_t nsyscalls;	/* statistical # of of syscalls */
	int64_t ncopyouts;	/* statistical # of copying kernel to user */
	int64_t copyoutbytes;	/* statsitical # of bytes to user */
	int64_t ncopyins;	/* statistical # of copying user to kernel */
	int64_t copyinbytes;	/* statistical # of bytes from user */
	uint32_t iocounter;	/* combintaion of above statistics */
	struct opcost depcost[MAX_MOSIX_TOPOLOGY]; /* costs on DEPUTY */
	int depspeed;		/* speed of DEPUTY */
	int64_t last_consider;	/* user time since last considered migration */
	now_t last_mconsider;	/* when last selected for memory-balancing */
	unsigned short decsecs;	/* seconds within decay cycle */
	unsigned short deccycle;/* length of decay cycle in seconds */
	short decay;		/* decay factor (out of DECAY_QUOTIENT) */
	unsigned char dpolicy;	/* statistic-collection policy */
	char disclosure;	/* level of disclosure */
	uint32_t asig;		/* signals arriving on REMOTE */
	siginfo_t *forced_sigs;	/* REMOTE forced signals */
	int nforced_sigs;	/* # of REMOTE forced signals */
	int pages_i_bring;	/* # if pages still to be brought */
	int rpagecredit;	/* # of expected remote page-faults */
	int rfreepages;		/* # of mapped-pages to bring without penalty */
	int page_allocs;	/* page allocation requests decaying counter */
	short ran;		/* ticks running during last second */
	unsigned short runstart;/* "load_ticks" (+1) since running */
	int nmigs;		/* number of [successful] migrations - ever */
	int load;		/* estimated contribution to load */
	int sonpid;		/* pid of son in remote-fork */
	int loadhere;		/* original local load */
	int migpages;		/* # of migrating pages */
	void *inexec;		/* structure while in "exec" */
	wait_queue_head_t wait_dist;/* misc. wait for process */
	unsigned long exit_mem;	/* memory on REMOTE when exited */
	unsigned short deputy_regs; /* bit map of regs in charge of DEPUTY */
	unsigned short pass_regs; /* bit map of regs to pass */
	__u32 features[NCAPINTS]; /* CPU features on original node */
	short mypid;		/* original PID */
	short sigmig;		/* signal to receive on migration */
	uint32_t *altregs;	/* place of registers when not on stack-top */
	void *mosix_log;
	struct data_cache *ucache;
	char *ps;		/* common "ps" information */
	int memused, memunused, memswapped;	/* memory split-up */
	struct depinfo
	{
		pid_t pgrp;
		pid_t session;
		char comm[16];
		pid_t tgid;
	} depinfo;
	unsigned int dirty_bits;	/* hidden bit-args to Linux routines */
	unsigned int dirty_arg;		/* hidden argument to Linux routines */
#ifdef CONFIG_MOSIX_DFSA
	int copy_ins;
	int bytes_in;
	uint32_t dupdates;
	struct dfsatab *ttab;
#endif /* CONFIG_MOSIX_DFSA */
#ifdef CONFIG_MOSIX_FS
	int iget_mfs_pe;
	unsigned long long iget_mfs_handle;
	int selected;
	int lastexec;
	int lastmagic;
	struct mfs_stats *mfs_stats;
#endif /* CONFIG_MOSIX_FS */
#ifdef CONFIG_MOSIX_DIAG
	int mig_page_count;
#endif /* CONFIG_MOSIX_DIAG */
};

#ifdef CONFIG_MOSIX_DFSA
#define	DFSA_INIT_TASK	/* All fields are 0, nothing to declare */
#else
#define DFSA_INIT_TASK
#endif /* CONFIG_MOSIX_DFSA */

#ifdef CONFIG_MOSIX_FS
#define	MFS_INIT_TASK	/* All fields are 0, nothing to declare */
#else
#define	MFS_INIT_TASK
#endif /* CONFIG_MOSIX_FS */

/* only non-zero fields are mentioned: */
#define MOSIX_INIT_TASK(tsk) mosix: { \
	dreqs:		ATOMIC_INIT(0),		\
	lock_depth:  	-1,			\
	bstate:		TASK_SAME, 		\
	wait_dist: 	__WAIT_QUEUE_HEAD_INITIALIZER(tsk.mosix.wait_dist), \
	state_lock:	RW_LOCK_UNLOCKED,	\
	disclosure:	CONFIG_MOSIX_DISCLOSURE \
	DFSA_INIT_TASK \
	MFS_INIT_TASK \
}

/*
 * distributed flags (dflags) that are set only by the process itself,
 * (but may be read by others):
 * the following locks (in correct order of locking) are required to
 * modify those bits:
 *
 * DINSCHED:		runqueue_lock
 * DHEAVYSLEEP:		runqueue_lock
 * DREMOTEDFSA:		runqueue_lock
 * DPASSING:		runqueue_lock
 * DINCOMING:		runqueue_lock
 * DDEPUTY:		lock_mosix, tasklist_lock, runqueue_lock, task_lock
 * DFINISHED:		lock_mosix, runqueue_lock, task_lock
 * DPAGEIN:		runqueue_lock
 * DSENTURGENT:		runqueue_lock
 * DFAKESIGNAL:		sometimes sigmask_lock 
 */
#define DDEPUTY		0x00000001	/* process is a DEPUTY stub */
#define DREMOTE		0x00000002	/* process is running remotely */
#define	DINSCHED	0x00000004	/* MOSIX task within "schedule" */
#define DSYNC		0x00000008	/* remote process is synced and waits */
#define DPSYNC		DSYNC		/* deputy must tell us to continue */
#define DNESTED		0x00000010	/* nested requests from remote */
#define	DSENTURGENT	0x00000020	/* REMOTE has already sent OOB */
#define DINCOMING	0x00000040	/* process coming here */
#define DPASSING	0x00000080	/* process is in migration */
#define DPAGEIN		0x00000100	/* in pagein, considered running */
#define DFINISHED	0x00000200	/* wants to become zombie */
#define	DREMOTEDAEMON	0x00000400	/* set DREMOTE on "fork" */
#define	DFAKESIGNAL	0x00000800	/* produce a fake signal */
#define	DHEAVYSLEEP	0x00001000	/* prevent signals/events when asleep */
#define	DTRACESYS1	0x00002000	/* PT_TRACESYS done before syscall */
#define	DTRACESYS2	0x00004000	/* syscall done before 2nd PT_TRACESYS*/
#define	DMUSTBEBACK	0x00008000	/* MUST arrive back home */
#define	DDUMPABLE	0x00010000	/* copy of dumpable when DEPUTY */
#define	DDELAYHELD	0x00020000	/* rebuild held_files later */
#ifdef CONFIG_MOSIX_DFSA
#define	DSTATSDOWN	0x00800000	/* turn off local system-call stats */
#define	DREMOTEDFSA	0x01000000	/* on REMOTE: within a DFSA syscall */
#endif /* CONFIG_MOSIX_DFSA */

/*
 * bits in "dreqs" (anything that others can set):
 */

#define DREQ_NICECNG		0x00000001	/* priority changed */
#define	DREQ_UPDOVERHEADS	0x00000002	/* update overheads */
#define	DREQ_HOMEWAKE		0x00000004	/* wake when arrived home */
#define	DREQ_CHECKCONF		0x00000008	/* check MOSIX configuration */
#define	DREQ_CHECKSTAY		0x00000010	/* check whether still stay */
#define	DREQ_URGENT		0x00000020	/* something urgent (R=>D) */
#define	DREQ_CAPCNG		0x00000040	/* capabilities changed */
#define	DREQ_INFOCNG		0x00000080	/* disclosed info changed */
#define	DREQ_FILEUNMAP		0x00000100	/* file(s) were unmapped */
#ifdef CONFIG_MOSIX_DFSA
#define	DREQ_NOTUPTODATE	0x10000000	/* send it all again */
#define	DREQ_DFSASYNC		0x20000000	/* DFSA world changed */
#define	DREQ_EXITDFSA		0x40000000	/* call DEPUTY ASAP */
#else
#define	DREQ_DFSASYNC		0
#define	DREQ_EXITDFSA		0
#endif /* CONFIG_MOSIX_DFSA */

#define	tell_process(p,what)	atomic_set_mask(what,&(p)->mosix.dreqs)
#define	process_ack(p,what)	atomic_clear_mask(what,&((p)->mosix.dreqs))
#define	process_told(p,what)	(atomic_read(&((p)->mosix.dreqs)) & (what))

/*
 * reasons to stay:
 */

#define DSTAY_FOR_MONKEY 0x00000001	/* using monkey vnode */
#define DSTAY_FOR_DEV	 0x00000002	/* mapping a device */
#define DSTAY_FOR_86	 0x00000004	/* running in 86 mode */
#define DSTAY_ITS_DAEMON 0x00000008	/* daemon process */
#define DSTAY_FOR_PRIV	 0x00000010	/* privilleged inst. access (in/out) */
#define DSTAY_FOR_MLOCK	 0x00000020	/* has locked memory */
#define DSTAY_FOR_CLONE	 0x00000040	/* shared VM */
#define DSTAY_FOR_RT	 0x00000080	/* Real-Time scheduling */
#define	DSTAY_FOR_IOPL	0x00000100	/* direct I/O permission */
#define	DSTAY_ITS_INIT	0x00000200	/* init process */
#define	DSTAY_FOR_KIOBUF 0x00000400	/* using kiobuf */
#define	DSTAY_OTHER1	0x01000000	/* external reason for stay (1) */
#define	DSTAY_OTHER2	0x02000000	/* external reason for stay (2) */
#define	DSTAY_OTHER3	0x04000000	/* external reason for stay (3) */
#define	DSTAY_OTHER4	0x08000000	/* external reason for stay (4) */
#define DNOMIGRATE	0x80000000	/* user requested no auto-migrations */

#define DSTAY   (~DNOMIGRATE)
#define	DSTAY_PER_MM	(DSTAY_FOR_MONKEY|DSTAY_FOR_DEV|DSTAY_FOR_MLOCK|DSTAY_FOR_KIOBUF)

/*
 * where to go (whereto)
 */
#define GOBACKHOME	(-1)	/* just go back home */
#define BALANCE		(-2)	/* perform load balancing */
#define IOBALANCE	(-3)	/* perform load balancing for I/O */
#define MEMBALANCE	(-4)	/* perform balancing for memory */
#define	MFSBALANCE	(-5)	/* must go back home */
#define	MUSTGOHOME	(-6)	/* must go back home */

#define	evaluate_pending_signals_in_mosix_context() do { \
	struct task_struct *p = current; \
	unsigned long flags; \
	spin_lock_irqsave(&p->sigmask_lock,flags);\
	if(p->mosix.ignoreoldsigs) \
		p->sigpending = (p->mosix.dflags & (DFAKESIGNAL|DHEAVYSLEEP)) == DFAKESIGNAL; \
	else recalc_sigpending(p); \
	spin_unlock_irqrestore(&p->sigmask_lock,flags); \
	} while(0)

#define	deeper_sleep() do { \
	struct task_struct *p = current; \
	if(!p->mosix.hsleep_count++) { \
		unsigned long flags; \
		spin_lock_irq(&runqueue_lock); \
		p->mosix.dflags |= DHEAVYSLEEP; \
		spin_unlock_irq(&runqueue_lock); \
		spin_lock_irqsave(&p->sigmask_lock,flags); \
		p->sigpending = 0; \
		spin_unlock_irqrestore(&p->sigmask_lock,flags); \
	} } while(0)

#define	lighter_sleep() do { \
	struct task_struct *p = current; \
	if(!--p->mosix.hsleep_count) { \
		spin_lock_irq(&runqueue_lock); \
		p->mosix.dflags &= ~DHEAVYSLEEP; \
		spin_unlock_irq(&runqueue_lock); \
		evaluate_pending_signals_in_mosix_context(); \
	} } while(0)

#define	set_me_dumpable(on) do { \
		if(current->mm) current->mm->dumpable = (on); \
		else if((on)) current->mosix.dflags |= DDUMPABLE; \
		else current->mosix.dflags &= ~DDUMPABLE; \
	} while(0)

#define	i_am_dumpable()	((current->mosix.dflags & DDEPUTY) ? \
	(current->mosix.dflags & DDUMPABLE) != 0 : \
	current->mm ? current->mm->dumpable : 0)

/* dirty parameters: */
/* sorry we are not allowed to add parameters to standard Linux routines */
/* because they can be called by who knows what - even modules, so instead: */
#define	MMAP_MMDOWNED		0x01	/* MM was downed prior to mmap() */
#define	MMAP_MAYSHARE		0x02	/* ornament with VM_MAYSHARE */
#define	MAPS_NOUSER		0x04	/* do not copy maps to user */
#ifdef CONFIG_MOSIX_FS
#define	MFSARG_OLDREADDIR	0x08	/* this is "old_readdir" calling */
#define	MFSARG_GETDENTS		0x10	/* this is "getdents" calling */
#define	MFSARG_GETDENTS64	0x20	/* this is "getdents" calling */
#define	MFSARG_EMPTYF_PRI	0x40	/* obtain empty file even beyond limit*/
#define	MFSARG_RONLY		0x80	/* no write-permission on anything */
#endif /* CONFIG_MOSIX_FS */
#ifdef CONFIG_MOSIX_DFSA
#define	FILP_OPEN_SYSCALL	0x100	/* filp_open called from sys_open */
#endif /* CONFIG_MOSIX_DFSA */

#endif
