X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/1c79356b52d46aa6b508fb032f5ae709b1f2897b..ccc36f2f2d89f9115c479db4439aa5c88de5b44a:/osfmk/kern/thread.c

diff --git a/osfmk/kern/thread.c b/osfmk/kern/thread.c
index 8d4523b15..85ebb7dcf 100644
--- a/osfmk/kern/thread.c
+++ b/osfmk/kern/thread.c
@@ -80,7 +80,6 @@
 #include <simple_clock.h>
 #include <mach_debug.h>
 #include <mach_prof.h>
-#include <stack_usage.h>
 
 #include <mach/boolean.h>
 #include <mach/policy.h>
@@ -102,7 +101,6 @@
 #include <kern/queue.h>
 #include <kern/sched.h>
 #include <kern/sched_prim.h>
-#include <kern/sf.h>
 #include <kern/mk_sp.h>	/*** ??? fix so this can be removed ***/
 #include <kern/task.h>
 #include <kern/thread.h>
@@ -125,83 +123,32 @@
 #include <mach/thread_act_server.h>
 #include <mach/mach_host_server.h>
 
-/*
- * Per-Cpu stashed global state
- */
-vm_offset_t			active_stacks[NCPUS];	/* per-cpu active stacks	*/
-vm_offset_t			kernel_stack[NCPUS];	/* top of active stacks		*/
-thread_act_t		active_kloaded[NCPUS];	/*  + act if kernel loaded	*/
+static struct zone			*thread_zone;
 
-struct zone			*thread_shuttle_zone;
-
-queue_head_t		reaper_queue;
-decl_simple_lock_data(,reaper_lock)
-thread_call_t		thread_reaper_call;
+static queue_head_t			reaper_queue;
+decl_simple_lock_data(static,reaper_lock)
 
 extern int		tick;
 
-extern void		pcb_module_init(void);
-
 /* private */
-static struct thread_shuttle	thr_sh_template;
+static struct thread	thread_template, init_thread;
 
 #if	MACH_DEBUG
-#if	STACK_USAGE
-static void	stack_init(vm_offset_t stack, unsigned int bytes);
-void		stack_finalize(vm_offset_t stack);
-vm_size_t	stack_usage(vm_offset_t stack);
-#else	/*STACK_USAGE*/
-#define stack_init(stack, size)
-#define stack_finalize(stack)
-#define stack_usage(stack) (vm_size_t)0
-#endif	/*STACK_USAGE*/
 
 #ifdef	MACHINE_STACK
-extern
-#endif
-    void	stack_statistics(
+extern void	stack_statistics(
 			unsigned int	*totalp,
 			vm_size_t	*maxusagep);
-
-#define	STACK_MARKER	0xdeadbeef
-#if	STACK_USAGE
-boolean_t		stack_check_usage = TRUE;
-#else	/* STACK_USAGE */
-boolean_t		stack_check_usage = FALSE;
-#endif	/* STACK_USAGE */
-decl_simple_lock_data(,stack_usage_lock)
-vm_size_t		stack_max_usage = 0;
-vm_size_t		stack_max_use = KERNEL_STACK_SIZE - 64;
+#endif	/* MACHINE_STACK */
 #endif	/* MACH_DEBUG */
 
-/* Forwards */
-void		thread_collect_scan(void);
-
-kern_return_t thread_create_shuttle(
-	thread_act_t			thr_act,
-	integer_t				priority,
-	void					(*start)(void),
-	thread_t				*new_thread);
-
-extern void		Load_context(
-	thread_t                thread);
-
-
-/*
- *	Machine-dependent code must define:
- *		thread_machine_init
- *		thread_machine_terminate
- *		thread_machine_collect
- *
- *	The thread->pcb field is reserved for machine-dependent code.
- */
-
 #ifdef	MACHINE_STACK
 /*
  *	Machine-dependent code must define:
  *		stack_alloc_try
  *		stack_alloc
  *		stack_free
+ *		stack_free_stack
  *		stack_collect
  *	and if MACH_DEBUG:
  *		stack_statistics
@@ -216,20 +163,28 @@ extern void		Load_context(
  *	because stack_alloc_try/thread_invoke operate at splsched.
  */
 
-decl_simple_lock_data(,stack_lock_data)         /* splsched only */
-#define stack_lock()	simple_lock(&stack_lock_data)
-#define stack_unlock()	simple_unlock(&stack_lock_data)
+decl_simple_lock_data(static,stack_lock_data)
+#define stack_lock()		simple_lock(&stack_lock_data)
+#define stack_unlock()		simple_unlock(&stack_lock_data)
+
+static vm_map_t				stack_map;
+static vm_offset_t			stack_free_list;
+
+static vm_offset_t			stack_free_cache[NCPUS];
 
-vm_offset_t stack_free_list;		/* splsched only */
 unsigned int stack_free_max = 0;
-unsigned int stack_free_count = 0;	/* splsched only */
-unsigned int stack_free_limit = 1;	/* patchable */
+unsigned int stack_free_count = 0;		/* splsched only */
+unsigned int stack_free_limit = 1;		/* Arbitrary  */
 
-unsigned int stack_alloc_hits = 0;	/* debugging */
+unsigned int stack_cache_hits = 0;		/* debugging */
+
+unsigned int stack_alloc_hits = 0;		/* debugging */
 unsigned int stack_alloc_misses = 0;	/* debugging */
 
 unsigned int stack_alloc_total = 0;
 unsigned int stack_alloc_hiwater = 0;
+unsigned int stack_alloc_bndry = 0;
+
 
 /*
  *	The next field is at the base of the stack,
@@ -241,7 +196,7 @@ unsigned int stack_alloc_hiwater = 0;
 /*
  *	stack_alloc:
  *
- *	Allocate a kernel stack for an activation.
+ *	Allocate a kernel stack for a thread.
  *	May block.
  */
 vm_offset_t
@@ -249,14 +204,11 @@ stack_alloc(
 	thread_t thread,
 	void (*start_pos)(thread_t))
 {
-	vm_offset_t stack;
-	spl_t	s;
+	vm_offset_t 	stack = thread->kernel_stack;
+	spl_t			s;
 
-	/*
-	 *	We first try the free list.  It is probably empty,
-	 *	or stack_alloc_try would have succeeded, but possibly
-	 *	a stack was freed before the swapin thread got to us.
-	 */
+	if (stack)
+		return (stack);
 
 	s = splsched();
 	stack_lock();
@@ -268,50 +220,22 @@ stack_alloc(
 	stack_unlock();
 	splx(s);
 
-	if (stack == 0) {
-		/*
-		 *	Kernel stacks should be naturally aligned,
-		 *	so that it is easy to find the starting/ending
-		 *	addresses of a stack given an address in the middle.
-		 */
-
-		if (kmem_alloc_aligned(kernel_map, &stack,
-				round_page(KERNEL_STACK_SIZE)) != KERN_SUCCESS)
-			panic("stack_alloc");
-
-		stack_alloc_total++;
-		if (stack_alloc_total > stack_alloc_hiwater)
-		  stack_alloc_hiwater = stack_alloc_total;
-
-#if	MACH_DEBUG
-		stack_init(stack, round_page(KERNEL_STACK_SIZE));
-#endif	/* MACH_DEBUG */
-
-		/*
-		 * If using fractional pages, free the remainder(s)
-		 */
-		if (KERNEL_STACK_SIZE < round_page(KERNEL_STACK_SIZE)) {
-		    vm_offset_t ptr  = stack + KERNEL_STACK_SIZE;
-		    vm_offset_t endp = stack + round_page(KERNEL_STACK_SIZE);
-		    while (ptr < endp) {
-#if	MACH_DEBUG
-			    /*
-			     * We need to initialize just the end of the 
-			     * region.
-			     */
-			    stack_init(ptr, (unsigned int) (endp - ptr));
-#endif
-				stack_lock();
-				stack_next(stack) = stack_free_list;
-				stack_free_list = stack;
-				if (++stack_free_count > stack_free_max)
-				  stack_free_max = stack_free_count;
-				stack_unlock();
-			    ptr += KERNEL_STACK_SIZE;
-		    }
-		}
+	if (stack != 0) {
+		machine_stack_attach(thread, stack, start_pos);
+		return (stack);
 	}
-	stack_attach(thread, stack, start_pos);
+		
+	if (kernel_memory_allocate(
+					stack_map, &stack,
+						KERNEL_STACK_SIZE, stack_alloc_bndry - 1,
+										KMA_KOBJECT) != KERN_SUCCESS)
+		panic("stack_alloc: no space left for stack maps");
+
+	stack_alloc_total++;
+	if (stack_alloc_total > stack_alloc_hiwater)
+		stack_alloc_hiwater = stack_alloc_total;
+
+	machine_stack_attach(thread, stack, start_pos);
 	return (stack);
 }
 
@@ -319,25 +243,61 @@ stack_alloc(
  *	stack_free:
  *
  *	Free a kernel stack.
- *	Called at splsched.
  */
 
 void
 stack_free(
 	thread_t thread)
 {
-    vm_offset_t stack = stack_detach(thread);
+    vm_offset_t stack = machine_stack_detach(thread);
+
 	assert(stack);
-	if (stack != thread->stack_privilege) {
-	  stack_lock();
-	  stack_next(stack) = stack_free_list;
-	  stack_free_list = stack;
-	  if (++stack_free_count > stack_free_max)
-		stack_free_max = stack_free_count;
-	  stack_unlock();
+	if (stack != thread->reserved_stack) {
+		spl_t			s = splsched();
+		vm_offset_t		*cache;
+
+		cache = &stack_free_cache[cpu_number()];
+		if (*cache == 0) {
+			*cache = stack;
+			splx(s);
+
+			return;
+		}
+
+		stack_lock();
+		stack_next(stack) = stack_free_list;
+		stack_free_list = stack;
+		if (++stack_free_count > stack_free_max)
+			stack_free_max = stack_free_count;
+		stack_unlock();
+		splx(s);
 	}
 }
 
+void
+stack_free_stack(
+	vm_offset_t		stack)
+{
+	spl_t			s = splsched();
+	vm_offset_t		*cache;
+
+	cache = &stack_free_cache[cpu_number()];
+	if (*cache == 0) {
+		*cache = stack;
+		splx(s);
+
+		return;
+	}
+
+	stack_lock();
+	stack_next(stack) = stack_free_list;
+	stack_free_list = stack;
+	if (++stack_free_count > stack_free_max)
+		stack_free_max = stack_free_count;
+	stack_unlock();
+	splx(s);
+}
+
 /*
  *	stack_collect:
  *
@@ -348,37 +308,75 @@ stack_free(
 void
 stack_collect(void)
 {
-	register vm_offset_t stack;
-	spl_t	s;
-
-	/* If using fractional pages, Cannot just call kmem_free(),
-	 * and we're too lazy to coalesce small chunks.
-	 */
-	if (KERNEL_STACK_SIZE < round_page(KERNEL_STACK_SIZE))
-		return;
+	spl_t	s = splsched();
 
-	s = splsched();
 	stack_lock();
 	while (stack_free_count > stack_free_limit) {
-		stack = stack_free_list;
+		vm_offset_t		stack = stack_free_list;
+
 		stack_free_list = stack_next(stack);
 		stack_free_count--;
 		stack_unlock();
 		splx(s);
 
-#if	MACH_DEBUG
-		stack_finalize(stack);
-#endif	/* MACH_DEBUG */
-		kmem_free(kernel_map, stack, KERNEL_STACK_SIZE);
+		if (vm_map_remove(
+					stack_map, stack, stack + KERNEL_STACK_SIZE,
+									VM_MAP_REMOVE_KUNWIRE) != KERN_SUCCESS)
+			panic("stack_collect: vm_map_remove failed");
 
 		s = splsched();
-		stack_alloc_total--;
 		stack_lock();
+		stack_alloc_total--;
 	}
 	stack_unlock();
 	splx(s);
 }
 
+/*
+ *	stack_alloc_try:
+ *
+ *	Non-blocking attempt to allocate a kernel stack.
+ *	Called at splsched with the thread locked.
+ */
+
+boolean_t stack_alloc_try(
+	thread_t	thread,
+	void		(*start)(thread_t))
+{
+	register vm_offset_t	stack, *cache;
+
+	cache = &stack_free_cache[cpu_number()];
+	if (stack = *cache) {
+		*cache = 0;
+		machine_stack_attach(thread, stack, start);
+		stack_cache_hits++;
+
+		return (TRUE);
+	}
+
+	stack_lock();
+	stack = stack_free_list;
+	if (stack != (vm_offset_t)0) {
+		stack_free_list = stack_next(stack);
+		stack_free_count--;
+	}
+	stack_unlock();
+
+	if (stack == 0)
+		stack = thread->reserved_stack;
+
+	if (stack != 0) {
+		machine_stack_attach(thread, stack, start);
+		stack_alloc_hits++;
+
+		return (TRUE);
+	}
+	else {
+		stack_alloc_misses++;
+
+		return (FALSE);
+	}
+}
 
 #if	MACH_DEBUG
 /*
@@ -398,27 +396,9 @@ stack_statistics(
 	s = splsched();
 	stack_lock();
 
-#if	STACK_USAGE
-	if (stack_check_usage) {
-		vm_offset_t stack;
-
-		/*
-		 *	This is pretty expensive to do at splsched,
-		 *	but it only happens when someone makes
-		 *	a debugging call, so it should be OK.
-		 */
-
-		for (stack = stack_free_list; stack != 0;
-		     stack = stack_next(stack)) {
-			vm_size_t usage = stack_usage(stack);
-
-			if (usage > *maxusagep)
-				*maxusagep = usage;
-		}
-	}
-#endif	/* STACK_USAGE */
-
 	*totalp = stack_free_count;
+	*maxusagep = 0;
+
 	stack_unlock();
 	splx(s);
 }
@@ -439,142 +419,112 @@ stack_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_si
 	*exhaustable = 0;
 }
 
-
-/*
- *	stack_privilege:
- *
- *	stack_alloc_try on this thread must always succeed.
- */
-
 void
 stack_privilege(
-	register thread_t thread)
-{
-	/*
-	 *	This implementation only works for the current thread.
-	 */
-
-	if (thread != current_thread())
-		panic("stack_privilege");
-
-	if (thread->stack_privilege == 0)
-		thread->stack_privilege = current_stack();
-}
-
-/*
- *	stack_alloc_try:
- *
- *	Non-blocking attempt to allocate a kernel stack.
- *	Called at splsched with the thread locked.
- */
-
-boolean_t stack_alloc_try(
-	thread_t	thread,
-	void		(*start_pos)(thread_t))
+	register thread_t	thread)
 {
-	register vm_offset_t stack;
-
-	if ((stack = thread->stack_privilege) == (vm_offset_t)0) {
-	  stack_lock();
-	  stack = stack_free_list;
-	  if (stack != (vm_offset_t)0) {
-	    stack_free_list = stack_next(stack);
-	    stack_free_count--;
-	  }
-	  stack_unlock();
-	}
-
-	if (stack != 0) {
-		stack_attach(thread, stack, start_pos);
-		stack_alloc_hits++;
-		return TRUE;
-	} else {
-		stack_alloc_misses++;
-		return FALSE;
-	}
+	/* OBSOLETE */
 }
 
-natural_t			min_quantum_abstime;
-extern natural_t	min_quantum_ms;
-
 void
-thread_init(void)
+thread_bootstrap(void)
 {
-	thread_shuttle_zone = zinit(
-			sizeof(struct thread_shuttle),
-			THREAD_MAX * sizeof(struct thread_shuttle),
-			THREAD_CHUNK * sizeof(struct thread_shuttle),
-			"threads");
-
 	/*
-	 *	Fill in a template thread_shuttle for fast initialization.
-	 *	[Fields that must be (or are typically) reset at
-	 *	time of creation are so noted.]
+	 *	Fill in a template thread for fast initialization.
 	 */
 
-	/* thr_sh_template.links (none) */
-	thr_sh_template.runq = RUN_QUEUE_NULL;
+	thread_template.runq = RUN_QUEUE_NULL;
+
+	thread_template.ref_count = 1;
+
+	thread_template.reason = AST_NONE;
+	thread_template.at_safe_point = FALSE;
+	thread_template.wait_event = NO_EVENT64;
+	thread_template.wait_queue = WAIT_QUEUE_NULL;
+	thread_template.wait_result = THREAD_WAITING;
+	thread_template.interrupt_level = THREAD_ABORTSAFE;
+	thread_template.state = TH_STACK_HANDOFF | TH_WAIT | TH_UNINT;
+	thread_template.wake_active = FALSE;
+	thread_template.active_callout = FALSE;
+	thread_template.continuation = (void (*)(void))0;
+	thread_template.top_act = THR_ACT_NULL;
+
+	thread_template.importance = 0;
+	thread_template.sched_mode = 0;
+	thread_template.safe_mode = 0;
 
+	thread_template.priority = 0;
+	thread_template.sched_pri = 0;
+	thread_template.max_priority = 0;
+	thread_template.task_priority = 0;
+	thread_template.promotions = 0;
+	thread_template.pending_promoter_index = 0;
+	thread_template.pending_promoter[0] =
+		thread_template.pending_promoter[1] = NULL;
 
-	/* thr_sh_template.task (later) */
-	/* thr_sh_template.thread_list (later) */
-	/* thr_sh_template.pset_threads (later) */
+	thread_template.realtime.deadline = UINT64_MAX;
 
-	/* one ref for pset, one for activation */
-	thr_sh_template.ref_count = 2;
+	thread_template.current_quantum = 0;
 
-	thr_sh_template.wait_event = NO_EVENT;
-	thr_sh_template.wait_result = KERN_SUCCESS;
-	thr_sh_template.wait_queue = WAIT_QUEUE_NULL;
-	thr_sh_template.wake_active = FALSE;
-	thr_sh_template.state = TH_WAIT|TH_UNINT;
-	thr_sh_template.interruptible = TRUE;
-	thr_sh_template.continuation = (void (*)(void))0;
-	thr_sh_template.top_act = THR_ACT_NULL;
+	thread_template.computation_metered = 0;
+	thread_template.computation_epoch = 0;
 
-	thr_sh_template.importance = 0;
-	thr_sh_template.sched_mode = 0;
+	thread_template.cpu_usage = 0;
+	thread_template.cpu_delta = 0;
+	thread_template.sched_usage = 0;
+	thread_template.sched_delta = 0;
+	thread_template.sched_stamp = 0;
+	thread_template.sleep_stamp = 0;
+	thread_template.safe_release = 0;
 
-	thr_sh_template.priority = 0;
-	thr_sh_template.sched_pri = 0;
-	thr_sh_template.depress_priority = -1;
-	thr_sh_template.max_priority = 0;
+	thread_template.bound_processor = PROCESSOR_NULL;
+	thread_template.last_processor = PROCESSOR_NULL;
+	thread_template.last_switch = 0;
 
-	thr_sh_template.cpu_usage = 0;
-	thr_sh_template.sched_usage = 0;
-	thr_sh_template.sched_stamp = 0;
-	thr_sh_template.sleep_stamp = 0;
+	thread_template.vm_privilege = FALSE;
 
-	thr_sh_template.policy = POLICY_NULL;
-	thr_sh_template.sp_state = 0;
-	thr_sh_template.unconsumed_quantum = 0;
+	timer_init(&(thread_template.user_timer));
+	timer_init(&(thread_template.system_timer));
+	thread_template.user_timer_save.low = 0;
+	thread_template.user_timer_save.high = 0;
+	thread_template.system_timer_save.low = 0;
+	thread_template.system_timer_save.high = 0;
 
-	thr_sh_template.vm_privilege = FALSE;
+	thread_template.processor_set = PROCESSOR_SET_NULL;
 
-	timer_init(&(thr_sh_template.user_timer));
-	timer_init(&(thr_sh_template.system_timer));
-	thr_sh_template.user_timer_save.low = 0;
-	thr_sh_template.user_timer_save.high = 0;
-	thr_sh_template.system_timer_save.low = 0;
-	thr_sh_template.system_timer_save.high = 0;
-	thr_sh_template.cpu_delta = 0;
-	thr_sh_template.sched_delta = 0;
+	thread_template.act_ref_count = 2;
 
-	thr_sh_template.active = FALSE; /* reset */
+	thread_template.special_handler.handler = special_handler;
+	thread_template.special_handler.next = 0;
 
-	/* thr_sh_template.processor_set (later) */
-#if	NCPUS > 1
-	thr_sh_template.bound_processor = PROCESSOR_NULL;
-#endif	/*NCPUS > 1*/
 #if	MACH_HOST
-	thr_sh_template.may_assign = TRUE;
-	thr_sh_template.assign_active = FALSE;
+	thread_template.may_assign = TRUE;
+	thread_template.assign_active = FALSE;
 #endif	/* MACH_HOST */
-	thr_sh_template.funnel_state = 0;
+    thread_template.funnel_lock = THR_FUNNEL_NULL;
+	thread_template.funnel_state = 0;
+#if	MACH_LDEBUG
+	thread_template.mutex_count = 0;
+#endif	/* MACH_LDEBUG */
 
-#if	NCPUS > 1
-	/* thr_sh_template.last_processor  (later) */
-#endif	/* NCPUS > 1 */
+	init_thread = thread_template;
+
+	init_thread.top_act = &init_thread;
+	init_thread.thread = &init_thread;
+	machine_thread_set_current(&init_thread);
+}
+
+void
+thread_init(void)
+{
+	kern_return_t ret;
+	unsigned int stack;
+	
+	thread_zone = zinit(
+			sizeof(struct thread),
+			THREAD_MAX * sizeof(struct thread),
+			THREAD_CHUNK * sizeof(struct thread),
+			"threads");
 
 	/*
 	 *	Initialize other data structures used in
@@ -583,61 +533,70 @@ thread_init(void)
 
 	queue_init(&reaper_queue);
 	simple_lock_init(&reaper_lock, ETAP_THREAD_REAPER);
-    thr_sh_template.funnel_lock = THR_FUNNEL_NULL;
 
 #ifndef MACHINE_STACK
-	simple_lock_init(&stack_lock_data, ETAP_THREAD_STACK);
-#endif  /* MACHINE_STACK */
-
-#if	MACH_DEBUG
-	simple_lock_init(&stack_usage_lock, ETAP_THREAD_STACK_USAGE);
-#endif	/* MACH_DEBUG */
-
-#if	MACH_LDEBUG
-	thr_sh_template.kthread = FALSE;
-	thr_sh_template.mutex_count = 0;
-#endif	/* MACH_LDEBUG */
-
-	{
-		AbsoluteTime		abstime;
-
-		clock_interval_to_absolutetime_interval(
-							min_quantum_ms, 1000*NSEC_PER_USEC, &abstime);
-		assert(abstime.hi == 0 && abstime.lo != 0);
-		min_quantum_abstime = abstime.lo;
+	simple_lock_init(&stack_lock_data, ETAP_THREAD_STACK);	/* Initialize the stack lock */
+	
+	if (KERNEL_STACK_SIZE < round_page_32(KERNEL_STACK_SIZE)) {	/* Kernel stacks must be multiples of pages */
+		panic("thread_init: kernel stack size (%08X) must be a multiple of page size (%08X)\n", 
+			KERNEL_STACK_SIZE, PAGE_SIZE);
+	}
+	
+	for(stack_alloc_bndry = PAGE_SIZE; stack_alloc_bndry <= KERNEL_STACK_SIZE; stack_alloc_bndry <<= 1);	/* Find next power of 2 above stack size */
+
+	ret = kmem_suballoc(kernel_map, 		/* Suballocate from the kernel map */
+
+		&stack,
+		(stack_alloc_bndry * (2*THREAD_MAX + 64)),	/* Allocate enough for all of it */
+		FALSE,								/* Say not pageable so that it is wired */
+		TRUE,								/* Allocate from anywhere */
+		&stack_map);						/* Allocate a submap */
+		
+	if(ret != KERN_SUCCESS) {				/* Did we get one? */
+		panic("thread_init: kmem_suballoc for stacks failed - ret = %d\n", ret);	/* Die */
+	}	
+	stack = vm_map_min(stack_map);			/* Make sure we skip the first hunk */
+	ret = vm_map_enter(stack_map, &stack, PAGE_SIZE, 0,	/* Make sure there is nothing at the start */
+		0, 									/* Force it at start */
+		VM_OBJECT_NULL, 0, 					/* No object yet */
+		FALSE,								/* No copy */
+		VM_PROT_NONE,						/* Allow no access */
+		VM_PROT_NONE,						/* Allow no access */
+		VM_INHERIT_DEFAULT);				/* Just be normal */
+		
+	if(ret != KERN_SUCCESS) {					/* Did it work? */
+		panic("thread_init: dummy alignment allocation failed; ret = %d\n", ret);
 	}
+		
+#endif  /* MACHINE_STACK */
 
 	/*
 	 *	Initialize any machine-dependent
 	 *	per-thread structures necessary.
 	 */
-	thread_machine_init();
+	machine_thread_init();
 }
 
+/*
+ * Called at splsched.
+ */
 void
 thread_reaper_enqueue(
 	thread_t		thread)
 {
-	/*
-	 * thread lock is already held, splsched()
-	 * not necessary here.
-	 */
 	simple_lock(&reaper_lock);
-
 	enqueue_tail(&reaper_queue, (queue_entry_t)thread);
-#if 0 /* CHECKME! */
-	/*
-	 * Since thread has been put in the reaper_queue, it must no longer
-	 * be preempted (otherwise, it could be put back in a run queue).
-	 */
-	thread->preempt = TH_NOT_PREEMPTABLE;
-#endif
-
 	simple_unlock(&reaper_lock);
 
-	thread_call_enter(thread_reaper_call);
+	thread_wakeup((event_t)&reaper_queue);
 }
 
+void
+thread_termination_continue(void)
+{
+	panic("thread_termination_continue");
+	/*NOTREACHED*/
+}
 
 /*
  *	Routine: thread_terminate_self
@@ -659,367 +618,343 @@ thread_reaper_enqueue(
 void
 thread_terminate_self(void)
 {
-	register thread_t	thread = current_thread();
-	thread_act_t		thr_act = thread->top_act;
+	thread_act_t	thr_act = current_act();
+	thread_t		thread;
 	task_t			task = thr_act->task;
-	int			active_acts;
+	long			active_acts;
 	spl_t			s;
 
 	/*
 	 * We should be at the base of the inheritance chain.
 	 */
+	thread = act_lock_thread(thr_act);
 	assert(thr_act->thread == thread);
 
+	/* This will allow no more control ops on this thr_act. */
+	ipc_thr_act_disable(thr_act);
+
+	/* Clean-up any ulocks that are still owned by the thread
+	 * activation (acquired but not released or handed-off).
+	 */
+	act_ulock_release_all(thr_act);
+	
+	act_unlock_thread(thr_act);
+
+	_mk_sp_thread_depress_abort(thread, TRUE);
+
 	/*
 	 * Check to see if this is the last active activation.  By
 	 * this we mean the last activation to call thread_terminate_self.
 	 * If so, and the task is associated with a BSD process, we
 	 * need to call BSD and let them clean up.
 	 */
-	task_lock(task);
-	active_acts = --task->active_act_count;
-	task_unlock(task);
-	if (!active_acts && task->bsd_info)
-		proc_exit(task->bsd_info);
-
-#ifdef CALLOUT_RPC_MODEL
-	if (thr_act->lower) {
-		/*
-		 * JMM - RPC will not be using a callout/stack manipulation
-		 * mechanism.  instead we will let it return normally as if
-		 * from a continuation.  Accordingly, these need to be cleaned
-		 * up a bit.
-		 */
-		act_switch_swapcheck(thread, (ipc_port_t)0);
-		act_lock(thr_act);	/* hierarchy violation XXX */
-		(void) switch_act(THR_ACT_NULL);
-		assert(thr_act->ref_count == 1);	/* XXX */
-		/* act_deallocate(thr_act);		   XXX */
-		prev_act = thread->top_act;
-		/* 
-		 * disable preemption to protect kernel stack changes
-		 * disable_preemption();
-		 * MACH_RPC_RET(prev_act) = KERN_RPC_SERVER_TERMINATED;
-		 * machine_kernel_stack_init(thread, mach_rpc_return_error);
-		 */
-		act_unlock(thr_act);
-
-		/*
-		 * Load_context(thread);
-		 */
-		/* NOTREACHED */
-	}
+	active_acts = hw_atomic_sub(&task->active_thread_count, 1);
 
-#else /* !CALLOUT_RPC_MODEL */
+	if (active_acts == 0 && task->bsd_info)
+		proc_exit(task->bsd_info);
 
+	/* JMM - for now, no migration */
 	assert(!thr_act->lower);
 
-#endif /* CALLOUT_RPC_MODEL */
-
-	s = splsched();
-	thread_lock(thread);
-	thread->active = FALSE;
-	thread_unlock(thread);
-	splx(s);
-
 	thread_timer_terminate();
 
-	/* flush any lazy HW state while in own context */
-	thread_machine_flush(thr_act);
-
 	ipc_thread_terminate(thread);
 
 	s = splsched();
 	thread_lock(thread);
-	thread->state |= (TH_HALTED|TH_TERMINATE);
+	thread->state |= TH_TERMINATE;
 	assert((thread->state & TH_UNINT) == 0);
-#if 0 /* CHECKME! */
-	/*
-	 * Since thread has been put in the reaper_queue, it must no longer
-	 * be preempted (otherwise, it could be put back in a run queue).
-	 */
-	thread->preempt = TH_NOT_PREEMPTABLE;
-#endif
 	thread_mark_wait_locked(thread, THREAD_UNINT);
+	assert(thread->promotions == 0);
 	thread_unlock(thread);
 	/* splx(s); */
 
 	ETAP_SET_REASON(thread, BLOCKED_ON_TERMINATION);
-	thread_block((void (*)(void)) 0);
-	panic("the zombie walks!");
+	thread_block(thread_termination_continue);
 	/*NOTREACHED*/
 }
 
-
 /*
  * Create a new thread.
- * Doesn't start the thread running; It first must be attached to
- * an activation - then use thread_go to start it.
+ * Doesn't start the thread running.
  */
-kern_return_t
-thread_create_shuttle(
-	thread_act_t			thr_act,
+static kern_return_t
+thread_create_internal(
+	task_t					parent_task,
 	integer_t				priority,
 	void					(*start)(void),
-	thread_t				*new_thread)
+	thread_t				*out_thread)
 {
-	thread_t				new_shuttle;
-	task_t					parent_task = thr_act->task;
+	thread_t				new_thread;
 	processor_set_t			pset;
-	kern_return_t			result;
-	sched_policy_t			*policy;
-	sf_return_t				sfr;
-	int						suspcnt;
-
-	assert(!thr_act->thread);
-	assert(!thr_act->pool_port);
+	static thread_t			first_thread;
 
 	/*
 	 *	Allocate a thread and initialize static fields
 	 */
-	new_shuttle = (thread_t)zalloc(thread_shuttle_zone);
-	if (new_shuttle == THREAD_NULL)
+	if (first_thread == NULL)
+		new_thread = first_thread = current_act();
+	else
+		new_thread = (thread_t)zalloc(thread_zone);
+	if (new_thread == NULL)
 		return (KERN_RESOURCE_SHORTAGE);
 
-	*new_shuttle = thr_sh_template;
+	if (new_thread != first_thread)
+		*new_thread = thread_template;
 
-	thread_lock_init(new_shuttle);
-	rpc_lock_init(new_shuttle);
-	wake_lock_init(new_shuttle);
-	new_shuttle->sleep_stamp = sched_tick;
+#ifdef MACH_BSD
+    {
+		extern void     *uthread_alloc(task_t, thread_act_t);
 
-	pset = parent_task->processor_set;
-	if (!pset->active) {
-		pset = &default_pset;
+		new_thread->uthread = uthread_alloc(parent_task, new_thread);
+		if (new_thread->uthread == NULL) {
+			zfree(thread_zone, (vm_offset_t)new_thread);
+			return (KERN_RESOURCE_SHORTAGE);
+		}
+	}
+#endif  /* MACH_BSD */
+
+	if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) {
+#ifdef MACH_BSD
+		{
+			extern void uthread_free(task_t, void *, void *, void *);
+			void *ut = new_thread->uthread;
+
+			new_thread->uthread = NULL;
+			uthread_free(parent_task, (void *)new_thread, ut, parent_task->bsd_info);
+		}
+#endif  /* MACH_BSD */
+		zfree(thread_zone, (vm_offset_t)new_thread);
+		return (KERN_FAILURE);
 	}
+
+    new_thread->task = parent_task;
+
+	thread_lock_init(new_thread);
+	wake_lock_init(new_thread);
+
+	mutex_init(&new_thread->lock, ETAP_THREAD_ACT);
+
+	ipc_thr_act_init(parent_task, new_thread);
+
+	ipc_thread_init(new_thread);
+	queue_init(&new_thread->held_ulocks);
+	act_prof_init(new_thread, parent_task);
+
+	new_thread->continuation = start;
+	new_thread->sleep_stamp = sched_tick;
+
+	pset = parent_task->processor_set;
+	assert(pset == &default_pset);
 	pset_lock(pset);
 
 	task_lock(parent_task);
+	assert(parent_task->processor_set == pset);
 
-	/*
-	 *	Don't need to initialize because the context switch
-	 *	code will set it before it can be used.
-	 */
-	if (!parent_task->active) {
+	if (	!parent_task->active							||
+			(parent_task->thread_count >= THREAD_MAX	&&
+			 parent_task != kernel_task)) {
 		task_unlock(parent_task);
 		pset_unlock(pset);
-		zfree(thread_shuttle_zone, (vm_offset_t) new_shuttle);
+
+#ifdef MACH_BSD
+		{
+			extern void uthread_free(task_t, void *, void *, void *);
+			void *ut = new_thread->uthread;
+
+			new_thread->uthread = NULL;
+			uthread_free(parent_task, (void *)new_thread, ut, parent_task->bsd_info);
+		}
+#endif  /* MACH_BSD */
+		act_prof_deallocate(new_thread);
+		ipc_thr_act_terminate(new_thread);
+		machine_thread_destroy(new_thread);
+		zfree(thread_zone, (vm_offset_t) new_thread);
 		return (KERN_FAILURE);
 	}
 
-	act_attach(thr_act, new_shuttle, 0);
+	act_attach(new_thread, new_thread);
+
+	task_reference_locked(parent_task);
 
-	/* Chain the thr_act onto the task's list */
-	queue_enter(&parent_task->thr_acts, thr_act, thread_act_t, thr_acts);
-	parent_task->thr_act_count++;
-	parent_task->res_act_count++;
-	parent_task->active_act_count++;
+	/* Cache the task's map */
+	new_thread->map = parent_task->map;
 
-	/* Associate the thread with that scheduling policy */
-	new_shuttle->policy = parent_task->policy;
-	policy = &sched_policy[new_shuttle->policy];
-	sfr = policy->sp_ops.sp_thread_attach(policy, new_shuttle);
-	if (sfr != SF_SUCCESS)
-		panic("thread_create_shuttle: sp_thread_attach");
+	/* Chain the thread onto the task's list */
+	queue_enter(&parent_task->threads, new_thread, thread_act_t, task_threads);
+	parent_task->thread_count++;
+	parent_task->res_thread_count++;
+	
+	/* So terminating threads don't need to take the task lock to decrement */
+	hw_atomic_add(&parent_task->active_thread_count, 1);
 
 	/* Associate the thread with the processor set */
-	sfr = policy->sp_ops.sp_thread_processor_set(policy, new_shuttle, pset);
-	if (sfr != SF_SUCCESS)
-		panic("thread_create_shuttle: sp_thread_proceessor_set");
+	pset_add_thread(pset, new_thread);
+
+	thread_timer_setup(new_thread);
 
 	/* Set the thread's scheduling parameters */
-	new_shuttle->max_priority = parent_task->max_priority;
-	new_shuttle->priority = (priority < 0)? parent_task->priority: priority;
-	if (new_shuttle->priority > new_shuttle->max_priority)
-		new_shuttle->priority = new_shuttle->max_priority;
-	sfr = policy->sp_ops.sp_thread_setup(policy, new_shuttle);
-	if (sfr != SF_SUCCESS)
-		panic("thread_create_shuttle: sp_thread_setup");
+	if (parent_task != kernel_task)
+		new_thread->sched_mode |= TH_MODE_TIMESHARE;
+	new_thread->max_priority = parent_task->max_priority;
+	new_thread->task_priority = parent_task->priority;
+	new_thread->priority = (priority < 0)? parent_task->priority: priority;
+	if (new_thread->priority > new_thread->max_priority)
+		new_thread->priority = new_thread->max_priority;
+	new_thread->importance =
+					new_thread->priority - new_thread->task_priority;
+	new_thread->sched_stamp = sched_tick;
+	compute_priority(new_thread, FALSE);
 
 #if	ETAP_EVENT_MONITOR
 	new_thread->etap_reason = 0;
 	new_thread->etap_trace  = FALSE;
 #endif	/* ETAP_EVENT_MONITOR */
 
-	new_shuttle->active = TRUE;
-	thr_act->active = TRUE;
-	pset_unlock(pset);
+	new_thread->active = TRUE;
 
+	*out_thread = new_thread;
 
-	/*
-	 * No need to lock thr_act, since it can't be known to anyone --
-	 * we set its suspend_count to one more than the task suspend_count
-	 * by calling thread_hold.
-	 */
-	thr_act->user_stop_count = 1;
-	for (suspcnt = thr_act->task->suspend_count + 1; suspcnt; --suspcnt)
-		thread_hold(thr_act);
-	task_unlock(parent_task);
-
-	/*
-	 *	Thread still isn't runnable yet (our caller will do
-	 *	that).  Initialize runtime-dependent fields here.
-	 */
-	result = thread_machine_create(new_shuttle, thr_act, thread_continue);
-	assert (result == KERN_SUCCESS);
-
-	machine_kernel_stack_init(new_shuttle, thread_continue);
-	ipc_thread_init(new_shuttle);
-	thread_start(new_shuttle, start);
-	thread_timer_setup(new_shuttle);
+	{
+		long	dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
 
-	*new_thread = new_shuttle;
+		kdbg_trace_data(parent_task->bsd_info, &dbg_arg2);
 
-	{
-	  long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
+		KERNEL_DEBUG_CONSTANT(
+					TRACEDBG_CODE(DBG_TRACE_DATA, 1) | DBG_FUNC_NONE,
+							(vm_address_t)new_thread, dbg_arg2, 0, 0, 0);
 
-	  KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_DATA, 1)) | DBG_FUNC_NONE,
-				(vm_address_t)new_shuttle, 0,0,0,0);
+		kdbg_trace_string(parent_task->bsd_info,
+							&dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
 
-	  kdbg_trace_string(parent_task->bsd_info, &dbg_arg1, &dbg_arg2, &dbg_arg3, 
-			    &dbg_arg4);
-          KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_STRING, 1)) | DBG_FUNC_NONE,
-				dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
+		KERNEL_DEBUG_CONSTANT(
+					TRACEDBG_CODE(DBG_TRACE_STRING, 1) | DBG_FUNC_NONE,
+							dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
 	}
 
 	return (KERN_SUCCESS);
 }
 
+extern void			thread_bootstrap_return(void);
+
 kern_return_t
 thread_create(
 	task_t				task,
-	thread_act_t		*new_act)
+	thread_act_t		*new_thread)
 {
-	thread_act_t		thr_act;
-	thread_t			thread;
 	kern_return_t		result;
-	sched_policy_t		*policy;
-	sf_return_t			sfr;
-	spl_t				s;
-	extern void			thread_bootstrap_return(void);
+	thread_t			thread;
 
-	if (task == TASK_NULL)
-		return KERN_INVALID_ARGUMENT;
+	if (task == TASK_NULL || task == kernel_task)
+		return (KERN_INVALID_ARGUMENT);
 
-	result = act_create(task, &thr_act);
+	result = thread_create_internal(task, -1, thread_bootstrap_return, &thread);
 	if (result != KERN_SUCCESS)
 		return (result);
 
-	result = thread_create_shuttle(thr_act, -1, thread_bootstrap_return, &thread);
-	if (result != KERN_SUCCESS) {
-		act_deallocate(thr_act);
-		return (result);
-	}
-
-	if (task->kernel_loaded)
-		thread_user_to_kernel(thread);
+	thread->user_stop_count = 1;
+	thread_hold(thread);
+	if (task->suspend_count > 0)
+		thread_hold(thread);
 
-	/* Start the thread running (it will immediately suspend itself).  */
-	s = splsched();
-	thread_ast_set(thr_act, AST_APC);
-	thread_lock(thread);
-	thread_go_locked(thread, THREAD_AWAKENED);
-	thread_unlock(thread);
-	splx(s);
+	pset_unlock(task->processor_set);
+	task_unlock(task);
 	
-	*new_act = thr_act;
+	*new_thread = thread;
 
 	return (KERN_SUCCESS);
 }
 
-/*
- * Update thread that belongs to a task created via kernel_task_create().
- */
-void
-thread_user_to_kernel(
-	thread_t		thread)
-{
-	/*
-	 * Used to set special swap_func here...
-	 */
-}
-
 kern_return_t
 thread_create_running(
-	register task_t         parent_task,
+	register task_t         task,
 	int                     flavor,
 	thread_state_t          new_state,
 	mach_msg_type_number_t  new_state_count,
-	thread_act_t			*child_act)		/* OUT */
+	thread_act_t			*new_thread)
 {
 	register kern_return_t  result;
+	thread_t				thread;
+
+	if (task == TASK_NULL || task == kernel_task)
+		return (KERN_INVALID_ARGUMENT);
 
-	result = thread_create(parent_task, child_act);
+	result = thread_create_internal(task, -1, thread_bootstrap_return, &thread);
 	if (result != KERN_SUCCESS)
 		return (result);
 
-	result = act_machine_set_state(*child_act, flavor,
-				       new_state, new_state_count);
+	result = machine_thread_set_state(thread, flavor, new_state, new_state_count);
 	if (result != KERN_SUCCESS) {
-		(void) thread_terminate(*child_act);
-		return (result);
-	}
+		pset_unlock(task->processor_set);
+		task_unlock(task);
 
-	result = thread_resume(*child_act);
-	if (result != KERN_SUCCESS) {
-		(void) thread_terminate(*child_act);
+		thread_terminate(thread);
+		act_deallocate(thread);
 		return (result);
 	}
 
+	act_lock(thread);
+	clear_wait(thread, THREAD_AWAKENED);
+	thread->started = TRUE;
+	act_unlock(thread);
+	pset_unlock(task->processor_set);
+	task_unlock(task);
+
+	*new_thread = thread;
+
 	return (result);
 }
 
 /*
  *	kernel_thread:
  *
- *	Create and kernel thread in the specified task, and
- *	optionally start it running.
+ *	Create a thread in the kernel task
+ *	to execute in kernel context.
  */
 thread_t
-kernel_thread_with_priority(
-	task_t				task,
-	integer_t			priority,
+kernel_thread_create(
 	void				(*start)(void),
-	boolean_t			start_running)
+	integer_t			priority)
 {
 	kern_return_t		result;
+	task_t				task = kernel_task;
 	thread_t			thread;
-	thread_act_t		thr_act;
-	sched_policy_t		*policy;
-	sf_return_t			sfr;
-	spl_t				s;
 
-	result = act_create(task, &thr_act);
-	if (result != KERN_SUCCESS) {
-		return THREAD_NULL;
-	}
+	result = thread_create_internal(task, priority, start, &thread);
+	if (result != KERN_SUCCESS)
+		return (THREAD_NULL);
 
-	result = thread_create_shuttle(thr_act, priority, start, &thread);
-	if (result != KERN_SUCCESS) {
-		act_deallocate(thr_act);
-		return THREAD_NULL;
-	}
+	pset_unlock(task->processor_set);
+	task_unlock(task);
 
-	thread_swappable(thr_act, FALSE);
+	thread_doswapin(thread);
+	assert(thread->kernel_stack != 0);
+	thread->reserved_stack = thread->kernel_stack;
 
-	s = splsched();
-	thread_lock(thread);
+	act_deallocate(thread);
 
-	thr_act = thread->top_act;
-#if	MACH_LDEBUG
-	thread->kthread = TRUE;
-#endif	/* MACH_LDEBUG */
+	return (thread);
+}
 
-	if (start_running)
-		thread_go_locked(thread, THREAD_AWAKENED);
+thread_t
+kernel_thread_with_priority(
+	void			(*start)(void),
+	integer_t		priority)
+{
+	thread_t		thread;
 
-	thread_unlock(thread);
-	splx(s);
+	thread = kernel_thread_create(start, priority);
+	if (thread == THREAD_NULL)
+		return (THREAD_NULL);
 
-	if (start_running)
-		thread_resume(thr_act);
+	act_lock(thread);
+	clear_wait(thread, THREAD_AWAKENED);
+	thread->started = TRUE;
+	act_unlock(thread);
 
-	act_deallocate(thr_act);
+#ifdef i386
+	thread_bind(thread, master_processor);
+#endif /* i386 */
 	return (thread);
 }
 
@@ -1028,117 +963,86 @@ kernel_thread(
 	task_t			task,
 	void			(*start)(void))
 {
-	return kernel_thread_with_priority(task, -1, start, TRUE);
+	if (task != kernel_task)
+		panic("kernel_thread");
+
+	return kernel_thread_with_priority(start, -1);
 }
 
 unsigned int c_weird_pset_ref_exit = 0;	/* pset code raced us */
 
+#if	MACH_HOST
+/* Preclude thread processor set assignement */
+#define thread_freeze(thread) 	assert((thread)->processor_set == &default_pset)
+
+/* Allow thread processor set assignement */
+#define thread_unfreeze(thread)	assert((thread)->processor_set == &default_pset)
+
+#endif	/* MACH_HOST */
+
 void
 thread_deallocate(
 	thread_t			thread)
 {
 	task_t				task;
 	processor_set_t		pset;
-	sched_policy_t		*policy;
-	sf_return_t			sfr;
+	int					refs;
 	spl_t				s;
 
 	if (thread == THREAD_NULL)
 		return;
 
 	/*
-	 *	First, check for new count > 1 (the common case).
+	 *	First, check for new count > 0 (the common case).
 	 *	Only the thread needs to be locked.
 	 */
 	s = splsched();
 	thread_lock(thread);
-	if (--thread->ref_count > 1) {
-		thread_unlock(thread);
-		splx(s);
-		return;
-	}
-
-	/*
-	 *	Down to pset reference, lets try to clean up.
-	 *	However, the processor set may make more. Its lock
-	 *	also dominate the thread lock.  So, reverse the
-	 *	order of the locks and see if its still the last
-	 *	reference;
-	 */
-	assert(thread->ref_count == 1); /* Else this is an extra dealloc! */
+	refs = --thread->ref_count;
 	thread_unlock(thread);
 	splx(s);
 
-#if	MACH_HOST
-	thread_freeze(thread);
-#endif	/* MACH_HOST */
-
-	pset = thread->processor_set;
-	pset_lock(pset);
-
-	s = splsched();
-	thread_lock(thread);
-
-	if (thread->ref_count > 1) {
-#if	MACH_HOST
-		boolean_t need_wakeup = FALSE;
-		/*
-		 *	processor_set made extra reference.
-		 */
-		/* Inline the unfreeze */
-		thread->may_assign = TRUE;
-		if (thread->assign_active) {
-			need_wakeup = TRUE;
-			thread->assign_active = FALSE;
-		}
-#endif	/* MACH_HOST */
-		thread_unlock(thread);
-		splx(s);
-		pset_unlock(pset);
-#if	MACH_HOST
-		if (need_wakeup)
-			thread_wakeup((event_t)&thread->assign_active);
-#endif	/* MACH_HOST */
-		c_weird_pset_ref_exit++;
+	if (refs > 0)
 		return;
-	}
-#if	MACH_HOST
-	assert(thread->assign_active == FALSE);
-#endif	/* MACH_HOST */
+
+	if (thread == current_thread())
+	    panic("thread_deallocate");
 
 	/*
-	 *	Thread only had pset reference - we can remove it.
+	 *	There is a dangling pointer to the thread from the
+	 *	processor_set.  To clean it up, we freeze the thread
+	 *	in the pset (because pset destruction can cause even
+	 *	reference-less threads to be reassigned to the default
+	 *	pset) and then remove it.
 	 */
-	if (thread == current_thread())
-	    panic("thread deallocating itself");
 
-	/* Detach thread (shuttle) from its sched policy */
-	policy = &sched_policy[thread->policy];
-	sfr = policy->sp_ops.sp_thread_detach(policy, thread);
-	if (sfr != SF_SUCCESS)
-		panic("thread_deallocate: sp_thread_detach");
+#if MACH_HOST
+	thread_freeze(thread);
+#endif
 
+	pset = thread->processor_set;
+	pset_lock(pset);
 	pset_remove_thread(pset, thread);
-	thread->ref_count = 0;
-	thread_unlock(thread);		/* no more references - safe */
-	splx(s);
 	pset_unlock(pset);
 
-	pset_deallocate(thread->processor_set);
+#if MACH_HOST
+	thread_unfreeze(thread);
+#endif
+
+	pset_deallocate(pset);
 
-	/* frees kernel stack & other MD resources */
-	if (thread->stack_privilege && (thread->stack_privilege != thread->kernel_stack)) {
-	  vm_offset_t stack;
-	  int s = splsched();
-	  stack = thread->stack_privilege;
-	  stack_free(thread);
-	  thread->kernel_stack = stack;
-	  splx(s);
+	if (thread->reserved_stack != 0) {
+		if (thread->reserved_stack != thread->kernel_stack)
+			stack_free_stack(thread->reserved_stack);
+		thread->reserved_stack = 0;
 	}
-	thread->stack_privilege = 0;
-	thread_machine_destroy(thread);
 
-	zfree(thread_shuttle_zone, (vm_offset_t) thread);
+	if (thread->kernel_stack != 0)
+		stack_free(thread);
+
+	machine_thread_destroy(thread);
+
+	zfree(thread_zone, (vm_offset_t) thread);
 }
 
 void
@@ -1152,7 +1056,7 @@ thread_reference(
 
 	s = splsched();
 	thread_lock(thread);
-	thread->ref_count++;
+	thread_reference_locked(thread);
 	thread_unlock(thread);
 	splx(s);
 }
@@ -1192,45 +1096,41 @@ thread_info_shuttle(
 	    thread_read_times(thread, &basic_info->user_time,
 									&basic_info->system_time);
 
-	    if (thread->policy & (POLICY_TIMESHARE|POLICY_RR|POLICY_FIFO)) {
-			/*
-			 *	Update lazy-evaluated scheduler info because someone wants it.
-			 */
-			if (thread->sched_stamp != sched_tick)
-				update_priority(thread);
-
-			basic_info->sleep_time = 0;
-
-			/*
-			 *	To calculate cpu_usage, first correct for timer rate,
-			 *	then for 5/8 ageing.  The correction factor [3/5] is
-			 *	(1/(5/8) - 1).
-			 */
-			basic_info->cpu_usage = (thread->cpu_usage << SCHED_TICK_SHIFT) /
-											(TIMER_RATE / TH_USAGE_SCALE);
-			basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
+		/*
+		 *	Update lazy-evaluated scheduler info because someone wants it.
+		 */
+		if (thread->sched_stamp != sched_tick)
+			update_priority(thread);
+
+		basic_info->sleep_time = 0;
+
+		/*
+		 *	To calculate cpu_usage, first correct for timer rate,
+		 *	then for 5/8 ageing.  The correction factor [3/5] is
+		 *	(1/(5/8) - 1).
+		 */
+		basic_info->cpu_usage = (thread->cpu_usage << SCHED_TICK_SHIFT) /
+												(TIMER_RATE / TH_USAGE_SCALE);
+		basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
 #if	SIMPLE_CLOCK
-			/*
-			 *	Clock drift compensation.
-			 */
-			basic_info->cpu_usage =
-					(basic_info->cpu_usage * 1000000) / sched_usec;
+		/*
+		 *	Clock drift compensation.
+		 */
+		basic_info->cpu_usage = (basic_info->cpu_usage * 1000000) / sched_usec;
 #endif	/* SIMPLE_CLOCK */
-	    }
-		else
-			basic_info->sleep_time = basic_info->cpu_usage = 0;
 
-	    basic_info->policy	= thread->policy;
+		basic_info->policy = ((thread->sched_mode & TH_MODE_TIMESHARE)?
+												POLICY_TIMESHARE: POLICY_RR);
 
 	    flags = 0;
-	    if (thread->state & TH_SWAPPED_OUT)
-			flags = TH_FLAGS_SWAPPED;
-	    else
 		if (thread->state & TH_IDLE)
-			flags = TH_FLAGS_IDLE;
+			flags |= TH_FLAGS_IDLE;
+
+	    if (thread->state & TH_STACK_HANDOFF)
+			flags |= TH_FLAGS_SWAPPED;
 
 	    state = 0;
-	    if (thread->state & TH_HALTED)
+	    if (thread->state & TH_TERMINATE)
 			state = TH_STATE_HALTED;
 	    else
 		if (thread->state & TH_RUN)
@@ -1269,19 +1169,25 @@ thread_info_shuttle(
 	    s = splsched();
 		thread_lock(thread);
 
-	    if (thread->policy != POLICY_TIMESHARE) {
+	    if (!(thread->sched_mode & TH_MODE_TIMESHARE)) {
 	    	thread_unlock(thread);
 			splx(s);
 
 			return (KERN_INVALID_POLICY);
 	    }
 
-		ts_info->base_priority = thread->priority;
-		ts_info->max_priority =	thread->max_priority;
-		ts_info->cur_priority = thread->sched_pri;
+		ts_info->depressed = (thread->sched_mode & TH_MODE_ISDEPRESSED) != 0;
+		if (ts_info->depressed) {
+			ts_info->base_priority = DEPRESSPRI;
+			ts_info->depress_priority = thread->priority;
+		}
+		else {
+			ts_info->base_priority = thread->priority;
+			ts_info->depress_priority = -1;
+		}
 
-		ts_info->depressed = (thread->depress_priority >= 0);
-		ts_info->depress_priority = thread->depress_priority;
+		ts_info->cur_priority = thread->sched_pri;
+		ts_info->max_priority =	thread->max_priority;
 
 		thread_unlock(thread);
 	    splx(s);
@@ -1292,35 +1198,10 @@ thread_info_shuttle(
 	}
 	else
 	if (flavor == THREAD_SCHED_FIFO_INFO) {
-		policy_fifo_info_t			fifo_info;
-
 		if (*thread_info_count < POLICY_FIFO_INFO_COUNT)
 			return (KERN_INVALID_ARGUMENT);
 
-		fifo_info = (policy_fifo_info_t)thread_info_out;
-
-	    s = splsched();
-		thread_lock(thread);
-
-	    if (thread->policy != POLICY_FIFO) {
-	    	thread_unlock(thread);
-			splx(s);
-
-			return (KERN_INVALID_POLICY);
-	    }
-
-		fifo_info->base_priority = thread->priority;
-		fifo_info->max_priority = thread->max_priority;
-
-		fifo_info->depressed = (thread->depress_priority >= 0);
-		fifo_info->depress_priority = thread->depress_priority;
-
-		thread_unlock(thread);
-	    splx(s);
-
-		*thread_info_count = POLICY_FIFO_INFO_COUNT;
-
-		return (KERN_SUCCESS);	
+		return (KERN_INVALID_POLICY);
 	}
 	else
 	if (flavor == THREAD_SCHED_RR_INFO) {
@@ -1334,19 +1215,25 @@ thread_info_shuttle(
 	    s = splsched();
 		thread_lock(thread);
 
-	    if (thread->policy != POLICY_RR) {
+	    if (thread->sched_mode & TH_MODE_TIMESHARE) {
 	    	thread_unlock(thread);
 			splx(s);
 
 			return (KERN_INVALID_POLICY);
 	    }
 
-		rr_info->base_priority = thread->priority;
-		rr_info->max_priority = thread->max_priority;
-	    rr_info->quantum = min_quantum_ms;
+		rr_info->depressed = (thread->sched_mode & TH_MODE_ISDEPRESSED) != 0;
+		if (rr_info->depressed) {
+			rr_info->base_priority = DEPRESSPRI;
+			rr_info->depress_priority = thread->priority;
+		}
+		else {
+			rr_info->base_priority = thread->priority;
+			rr_info->depress_priority = -1;
+		}
 
-		rr_info->depressed = (thread->depress_priority >= 0);
-		rr_info->depress_priority = thread->depress_priority;
+		rr_info->max_priority = thread->max_priority;
+	    rr_info->quantum = std_quantum_us / 1000;
 
 		thread_unlock(thread);
 	    splx(s);
@@ -1364,14 +1251,12 @@ thread_doreap(
 	register thread_t	thread)
 {
 	thread_act_t		thr_act;
-	struct ipc_port		*pool_port;
 
 
 	thr_act = thread_lock_act(thread);
 	assert(thr_act && thr_act->thread == thread);
 
-	act_locked_act_reference(thr_act);
-	pool_port = thr_act->pool_port;
+	act_reference_locked(thr_act);
 
 	/*
 	 * Replace `act_unlock_thread()' with individual
@@ -1379,70 +1264,58 @@ thread_doreap(
 	 * to determine which locks are held, confusing
 	 * `act_unlock_thread()'.)
 	 */
-	rpc_unlock(thread);
-	if (pool_port != IP_NULL)
-		ip_unlock(pool_port);
 	act_unlock(thr_act);
 
 	/* Remove the reference held by a rooted thread */
-	if (pool_port == IP_NULL)
-		act_deallocate(thr_act);
+	act_deallocate(thr_act);
 
 	/* Remove the reference held by the thread: */
 	act_deallocate(thr_act);
 }
 
-static thread_call_data_t	thread_reaper_call_data;
-
 /*
  *	reaper_thread:
  *
- *	This kernel thread runs forever looking for threads to destroy
- *	(when they request that they be destroyed, of course).
- *
- *	The reaper thread will disappear in the next revision of thread
- *	control when it's function will be moved into thread_dispatch.
+ *	This kernel thread runs forever looking for terminating
+ *	threads, releasing their "self" references.
  */
 static void
-_thread_reaper(
-	thread_call_param_t		p0,
-	thread_call_param_t		p1)
+reaper_thread_continue(void)
 {
 	register thread_t	thread;
-	spl_t				s;
 
-	s = splsched();
+	(void)splsched();
 	simple_lock(&reaper_lock);
 
 	while ((thread = (thread_t) dequeue_head(&reaper_queue)) != THREAD_NULL) {
 		simple_unlock(&reaper_lock);
-
-		/*
-		 * wait for run bit to clear
-		 */
-		thread_lock(thread);
-		if (thread->state & TH_RUN)
-			panic("thread reaper: TH_RUN");
-		thread_unlock(thread);
-		splx(s);
+		(void)spllo();
 
 		thread_doreap(thread);
 
-		s = splsched();
+		(void)splsched();
 		simple_lock(&reaper_lock);
 	}
 
+	assert_wait((event_t)&reaper_queue, THREAD_UNINT);
 	simple_unlock(&reaper_lock);
-	splx(s);
+	(void)spllo();
+
+	thread_block(reaper_thread_continue);
+	/*NOTREACHED*/
 }
 
-void
-thread_reaper(void)
+static void
+reaper_thread(void)
 {
-	thread_call_setup(&thread_reaper_call_data,	_thread_reaper, NULL);
-	thread_reaper_call = &thread_reaper_call_data;
+	reaper_thread_continue();
+	/*NOTREACHED*/
+}
 
-	_thread_reaper(NULL, NULL);
+void
+thread_reaper_init(void)
+{
+	kernel_thread_with_priority(reaper_thread, MINPRI_KERNEL);
 }
 
 kern_return_t
@@ -1450,9 +1323,6 @@ thread_assign(
 	thread_act_t	thr_act,
 	processor_set_t	new_pset)
 {
-#ifdef	lint
-	thread++; new_pset++;
-#endif	/* lint */
 	return(KERN_FAILURE);
 }
 
@@ -1495,16 +1365,17 @@ thread_get_assignment(
 }
 
 /*
- *	thread_wire:
+ *	thread_wire_internal:
  *
  *	Specify that the target thread must always be able
  *	to run and to allocate memory.
  */
 kern_return_t
-thread_wire(
+thread_wire_internal(
 	host_priv_t	host_priv,
 	thread_act_t	thr_act,
-	boolean_t	wired)
+	boolean_t	wired,
+	boolean_t	*prev_state)
 {
 	spl_t		s;
 	thread_t	thread;
@@ -1523,7 +1394,6 @@ thread_wire(
 
 	/*
 	 * This implementation only works for the current thread.
-	 * See stack_privilege.
 	 */
 	if (thr_act != current_act())
 	    return KERN_INVALID_ARGUMENT;
@@ -1531,6 +1401,10 @@ thread_wire(
 	s = splsched();
 	thread_lock(thread);
 
+	if (prev_state) {
+	    *prev_state = thread->vm_privilege;
+	}
+	
 	if (wired) {
 	    if (thread->vm_privilege == FALSE) 
 		    vm_page_free_reserve(1);	/* XXX */
@@ -1545,116 +1419,25 @@ thread_wire(
 	splx(s);
 	act_unlock_thread(thr_act);
 
-	/*
-	 * Make the thread unswappable.
-	 */
-	if (wired)
-		thread_swappable(thr_act, FALSE);
-
 	return KERN_SUCCESS;
 }
 
-/*
- *	thread_collect_scan:
- *
- *	Attempt to free resources owned by threads.
- */
-
-void
-thread_collect_scan(void)
-{
-	/* This code runs very quickly! */
-}
-
-boolean_t thread_collect_allowed = TRUE;
-unsigned thread_collect_last_tick = 0;
-unsigned thread_collect_max_rate = 0;		/* in ticks */
 
 /*
- *	consider_thread_collect:
+ *	thread_wire:
  *
- *	Called by the pageout daemon when the system needs more free pages.
- */
-
-void
-consider_thread_collect(void)
-{
-	/*
-	 *	By default, don't attempt thread collection more frequently
-	 *	than once a second (one scheduler tick).
-	 */
-
-	if (thread_collect_max_rate == 0)
-		thread_collect_max_rate = 2;		/* sched_tick is a 1 second resolution 2 here insures at least 1 second interval */
-
-	if (thread_collect_allowed &&
-	    (sched_tick >
-	     (thread_collect_last_tick + thread_collect_max_rate))) {
-		thread_collect_last_tick = sched_tick;
-		thread_collect_scan();
-	}
-}
-
-#if	MACH_DEBUG
-#if	STACK_USAGE
-
-vm_size_t
-stack_usage(
-	register vm_offset_t stack)
-{
-	int i;
-
-	for (i = 0; i < KERNEL_STACK_SIZE/sizeof(unsigned int); i++)
-	    if (((unsigned int *)stack)[i] != STACK_MARKER)
-		break;
-
-	return KERNEL_STACK_SIZE - i * sizeof(unsigned int);
-}
-
-/*
- *	Machine-dependent code should call stack_init
- *	before doing its own initialization of the stack.
- */
-
-static void
-stack_init(
-	   register vm_offset_t stack,
-	   unsigned int bytes)
-{
-	if (stack_check_usage) {
-	    int i;
-
-	    for (i = 0; i < bytes / sizeof(unsigned int); i++)
-		((unsigned int *)stack)[i] = STACK_MARKER;
-	}
-}
-
-/*
- *	Machine-dependent code should call stack_finalize
- *	before releasing the stack memory.
+ *	User-api wrapper for thread_wire_internal()
  */
+kern_return_t
+thread_wire(
+	host_priv_t	host_priv,
+	thread_act_t	thr_act,
+	boolean_t	wired)
 
-void
-stack_finalize(
-	register vm_offset_t stack)
 {
-	if (stack_check_usage) {
-	    vm_size_t used = stack_usage(stack);
-
-	    simple_lock(&stack_usage_lock);
-	    if (used > stack_max_usage)
-		stack_max_usage = used;
-	    simple_unlock(&stack_usage_lock);
-	    if (used > stack_max_use) {
-		printf("stack usage = %x\n", used);
-		panic("stack overflow");
-	    }
-	}
+    return thread_wire_internal(host_priv, thr_act, wired, NULL);
 }
 
-#endif	/*STACK_USAGE*/
-#endif /* MACH_DEBUG */
-
 kern_return_t
 host_stack_usage(
 	host_t		host,
@@ -1674,15 +1457,13 @@ host_stack_usage(
 	if (host == HOST_NULL)
 		return KERN_INVALID_HOST;
 
-	simple_lock(&stack_usage_lock);
-	maxusage = stack_max_usage;
-	simple_unlock(&stack_usage_lock);
+	maxusage = 0;
 
 	stack_statistics(&total, &maxusage);
 
 	*reservedp = 0;
 	*totalp = total;
-	*spacep = *residentp = total * round_page(KERNEL_STACK_SIZE);
+	*spacep = *residentp = total * round_page_32(KERNEL_STACK_SIZE);
 	*maxusagep = maxusage;
 	*maxstackp = 0;
 	return KERN_SUCCESS;
@@ -1718,6 +1499,8 @@ processor_set_stack_usage(
 	vm_size_t size, size_needed;
 	vm_offset_t addr;
 
+	spl_t s;
+
 	if (pset == PROCESSOR_SET_NULL)
 		return KERN_INVALID_ARGUMENT;
 
@@ -1753,16 +1536,20 @@ processor_set_stack_usage(
 	}
 
 	/* OK, have memory and the processor_set is locked & active */
-
+	s = splsched();
 	threads = (thread_t *) addr;
 	for (i = 0, thread = (thread_t) queue_first(&pset->threads);
-	     i < actual;
-	     i++,
+	     !queue_end(&pset->threads, (queue_entry_t) thread);
 	     thread = (thread_t) queue_next(&thread->pset_threads)) {
-		thread_reference(thread);
-		threads[i] = thread;
+		thread_lock(thread);
+		if (thread->ref_count > 0) {
+			thread_reference_locked(thread);
+			threads[i++] = thread;
+		}
+		thread_unlock(thread);
 	}
-	assert(queue_end(&pset->threads, (queue_entry_t) thread));
+	splx(s);
+	assert(i <= actual);
 
 	/* can unlock processor set now that we have the thread refs */
 	pset_unlock(pset);
@@ -1772,40 +1559,12 @@ processor_set_stack_usage(
 	total = 0;
 	maxusage = 0;
 	maxstack = 0;
-	for (i = 0; i < actual; i++) {
-		int cpu;
-		thread_t thread = threads[i];
-		vm_offset_t stack = 0;
-
-		/*
-		 *	thread->kernel_stack is only accurate if the
-		 *	thread isn't swapped and is not executing.
-		 *
-		 *	Of course, we don't have the appropriate locks
-		 *	for these shenanigans.
-		 */
+	while (i > 0) {
+		thread_t thread = threads[--i];
 
-		stack = thread->kernel_stack;
-
-		for (cpu = 0; cpu < NCPUS; cpu++)
-			if (cpu_data[cpu].active_thread == thread) {
-				stack = active_stacks[cpu];
-				break;
-			}
-
-		if (stack != 0) {
+		if (thread->kernel_stack != 0)
 			total++;
 
-			if (stack_check_usage) {
-				vm_size_t usage = stack_usage(stack);
-
-				if (usage > maxusage) {
-					maxusage = usage;
-					maxstack = (vm_offset_t) thread;
-				}
-			}
-		}
-
 		thread_deallocate(thread);
 	}
 
@@ -1813,7 +1572,7 @@ processor_set_stack_usage(
 		kfree(addr, size);
 
 	*totalp = total;
-	*residentp = *spacep = total * round_page(KERNEL_STACK_SIZE);
+	*residentp = *spacep = total * round_page_32(KERNEL_STACK_SIZE);
 	*maxusagep = maxusage;
 	*maxstackp = maxstack;
 	return KERN_SUCCESS;
@@ -1821,7 +1580,7 @@ processor_set_stack_usage(
 #endif	/* MACH_DEBUG */
 }
 
-static int split_funnel_off = 0;
+int split_funnel_off = 0;
 funnel_t *
 funnel_alloc(
 	int type)
@@ -1829,9 +1588,9 @@ funnel_alloc(
 	mutex_t *m;
 	funnel_t * fnl;
 	if ((fnl = (funnel_t *)kalloc(sizeof(funnel_t))) != 0){
-		bzero(fnl, sizeof(funnel_t));
+		bzero((void *)fnl, sizeof(funnel_t));
 		if ((m = mutex_alloc(0)) == (mutex_t *)NULL) {
-			kfree(fnl, sizeof(funnel_t));
+			kfree((vm_offset_t)fnl, sizeof(funnel_t));
 			return(THR_FUNNEL_NULL);
 		}
 		fnl->fnl_mutex = m;
@@ -1847,7 +1606,7 @@ funnel_free(
 	mutex_free(fnl->fnl_mutex);
 	if (fnl->fnl_oldmutex)
 		mutex_free(fnl->fnl_oldmutex);
-	kfree(fnl, sizeof(funnel_t));
+	kfree((vm_offset_t)fnl, sizeof(funnel_t));
 }
 
 void 
@@ -1875,6 +1634,23 @@ funnel_unlock(
 	fnl->fnl_mtxrelease = current_thread();
 }
 
+int		refunnel_hint_enabled = 0;
+
+boolean_t
+refunnel_hint(
+	thread_t		thread,
+	wait_result_t	wresult)
+{
+	if (	!(thread->funnel_state & TH_FN_REFUNNEL)	||
+				wresult != THREAD_AWAKENED				)
+		return (FALSE);
+
+	if (!refunnel_hint_enabled)
+		return (FALSE);
+
+	return (mutex_preblock(thread->funnel_lock->fnl_mutex, thread));
+}
+
 funnel_t *
 thread_funnel_get(
 	void)
@@ -1972,17 +1748,20 @@ thread_funnel_merge(
 }
 
 void
-thread_set_cont_arg(int arg)
+thread_set_cont_arg(
+	int				arg)
 {
-  thread_t th = current_thread();
-  th->cont_arg = arg; 
+	thread_t		self = current_thread();
+
+	self->saved.misc = arg; 
 }
 
 int
 thread_get_cont_arg(void)
 {
-  thread_t th = current_thread();
-  return(th->cont_arg); 
+	thread_t		self = current_thread();
+
+	return (self->saved.misc); 
 }
 
 /*
@@ -1992,8 +1771,17 @@ thread_get_cont_arg(void)
 #undef thread_should_halt
 boolean_t
 thread_should_halt(
-	thread_shuttle_t th)
+	thread_t		th)
 {
 	return(thread_should_halt_fast(th));
 }
 
+vm_offset_t min_valid_stack_address(void)
+{
+	return vm_map_min(stack_map);
+}
+
+vm_offset_t max_valid_stack_address(void)
+{
+	return vm_map_max(stack_map);
+}