git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2010 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*
	29	* @OSF_COPYRIGHT@
	30	*/
	31	/*
	32	* Mach Operating System
	33	* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
	34	* All Rights Reserved.
	35	*
	36	* Permission to use, copy, modify and distribute this software and its
	37	* documentation is hereby granted, provided that both the copyright
	38	* notice and this permission notice appear in all copies of the
	39	* software, derivative works or modified versions, and any portions
	40	* thereof, and that both notices appear in supporting documentation.
	41	*
	42	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	43	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
	44	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	45	*
	46	* Carnegie Mellon requests users of this software to return to
	47	*
	48	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	49	* School of Computer Science
	50	* Carnegie Mellon University
	51	* Pittsburgh PA 15213-3890
	52	*
	53	* any improvements or extensions that they make and grant Carnegie Mellon
	54	* the rights to redistribute these changes.
	55	*/
	56	/*
	57	*/
	58	/*
	59	* File: priority.c
	60	* Author: Avadis Tevanian, Jr.
	61	* Date: 1986
	62	*
	63	* Priority related scheduler bits.
	64	*/
	65
	66	#include <mach/boolean.h>
	67	#include <mach/kern_return.h>
	68	#include <mach/machine.h>
	69	#include <kern/host.h>
	70	#include <kern/mach_param.h>
	71	#include <kern/sched.h>
	72	#include <sys/kdebug.h>
	73	#include <kern/spl.h>
	74	#include <kern/thread.h>
	75	#include <kern/processor.h>
	76	#include <kern/ledger.h>
	77	#include <machine/machparam.h>
	78	#include <kern/machine.h>
	79
	80	#ifdef CONFIG_MACH_APPROXIMATE_TIME
	81	#include <machine/commpage.h> /* for commpage_update_mach_approximate_time */
	82	#endif
	83
	84	#if MONOTONIC
	85	#include <kern/monotonic.h>
	86	#endif /* MONOTONIC */
	87
	88	static void sched_update_thread_bucket(thread_t thread);
	89
	90	/*
	91	* thread_quantum_expire:
	92	*
	93	* Recalculate the quantum and priority for a thread.
	94	*
	95	* Called at splsched.
	96	*/
	97
	98	void
	99	thread_quantum_expire(
	100	timer_call_param_t p0,
	101	timer_call_param_t p1)
	102	{
	103	processor_t processor = p0;
	104	thread_t thread = p1;
	105	ast_t preempt;
	106	uint64_t ctime;
	107	int urgency;
	108	uint64_t ignore1, ignore2;
	109
	110	assert(processor == current_processor());
	111	assert(thread == current_thread());
	112
	113	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_QUANTUM_EXPIRED) \| DBG_FUNC_START, 0, 0, 0, 0, 0);
	114
	115	SCHED_STATS_QUANTUM_TIMER_EXPIRATION(processor);
	116
	117	/*
	118	* We bill CPU time to both the individual thread and its task.
	119	*
	120	* Because this balance adjustment could potentially attempt to wake this
	121	* very thread, we must credit the ledger before taking the thread lock.
	122	* The ledger pointers are only manipulated by the thread itself at the ast
	123	* boundary.
	124	*
	125	* TODO: This fails to account for the time between when the timer was
	126	* armed and when it fired. It should be based on the system_timer and
	127	* running a timer_update operation here.
	128	*/
	129	ledger_credit(thread->t_ledger, task_ledgers.cpu_time, thread->quantum_remaining);
	130	ledger_credit(thread->t_threadledger, thread_ledgers.cpu_time, thread->quantum_remaining);
	131	if (thread->t_bankledger) {
	132	ledger_credit(thread->t_bankledger, bank_ledgers.cpu_time,
	133	(thread->quantum_remaining - thread->t_deduct_bank_ledger_time));
	134	}
	135	thread->t_deduct_bank_ledger_time = 0;
	136
	137	ctime = mach_absolute_time();
	138
	139	#ifdef CONFIG_MACH_APPROXIMATE_TIME
	140	commpage_update_mach_approximate_time(ctime);
	141	#endif
	142
	143	#if MONOTONIC
	144	mt_sched_update(thread);
	145	#endif /* MONOTONIC */
	146
	147	thread_lock(thread);
	148
	149	/*
	150	* We've run up until our quantum expiration, and will (potentially)
	151	* continue without re-entering the scheduler, so update this now.
	152	*/
	153	processor->last_dispatch = ctime;
	154	thread->last_run_time = ctime;
	155
	156	/*
	157	* Check for fail-safe trip.
	158	*/
	159	if ((thread->sched_mode == TH_MODE_REALTIME \|\| thread->sched_mode == TH_MODE_FIXED) &&
	160	!(thread->sched_flags & TH_SFLAG_PROMOTED) &&
	161	!(thread->sched_flags & TH_SFLAG_PROMOTE_REASON_MASK) &&
	162	!(thread->options & TH_OPT_SYSTEM_CRITICAL)) {
	163	uint64_t new_computation;
	164
	165	new_computation = ctime - thread->computation_epoch;
	166	new_computation += thread->computation_metered;
	167	if (new_computation > max_unsafe_computation) {
	168	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_FAILSAFE)\|DBG_FUNC_NONE,
	169	(uintptr_t)thread->sched_pri, (uintptr_t)thread->sched_mode, 0, 0, 0);
	170
	171	thread->safe_release = ctime + sched_safe_duration;
	172
	173	sched_thread_mode_demote(thread, TH_SFLAG_FAILSAFE);
	174	}
	175	}
	176
	177	/*
	178	* Recompute scheduled priority if appropriate.
	179	*/
	180	if (SCHED(can_update_priority)(thread))
	181	SCHED(update_priority)(thread);
	182	else
	183	SCHED(lightweight_update_priority)(thread);
	184
	185	if (thread->sched_mode != TH_MODE_REALTIME)
	186	SCHED(quantum_expire)(thread);
	187
	188	processor_state_update_from_thread(processor, thread);
	189
	190	/*
	191	* This quantum is up, give this thread another.
	192	*/
	193	processor->first_timeslice = FALSE;
	194
	195	thread_quantum_init(thread);
	196
	197	/* Reload precise timing global policy to thread-local policy */
	198	thread->precise_user_kernel_time = use_precise_user_kernel_time(thread);
	199
	200	/*
	201	* Since non-precise user/kernel time doesn't update the state/thread timer
	202	* during privilege transitions, synthesize an event now.
	203	*/
	204	if (!thread->precise_user_kernel_time) {
	205	timer_update(PROCESSOR_DATA(processor, current_state), ctime);
	206	timer_update(PROCESSOR_DATA(processor, thread_timer), ctime);
	207	timer_update(&thread->runnable_timer, ctime);
	208	}
	209
	210
	211	processor->quantum_end = ctime + thread->quantum_remaining;
	212
	213	/*
	214	* Context switch check
	215	*
	216	* non-urgent flags don't affect kernel threads, so upgrade to urgent
	217	* to ensure that rebalancing and non-recommendation kick in quickly.
	218	*/
	219
	220	ast_t check_reason = AST_QUANTUM;
	221	if (thread->task == kernel_task)
	222	check_reason \|= AST_URGENT;
	223
	224	if ((preempt = csw_check(processor, check_reason)) != AST_NONE)
	225	ast_on(preempt);
	226
	227	/*
	228	* AST_KEVENT does not send an IPI when setting the AST,
	229	* to avoid waiting for the next context switch to propagate the AST,
	230	* the AST is propagated here at quantum expiration.
	231	*/
	232	ast_propagate(thread);
	233
	234	thread_unlock(thread);
	235
	236	timer_call_quantum_timer_enter(&processor->quantum_timer, thread,
	237	processor->quantum_end, ctime);
	238
	239	/* Tell platform layer that we are still running this thread */
	240	urgency = thread_get_urgency(thread, &ignore1, &ignore2);
	241	machine_thread_going_on_core(thread, urgency, 0, 0, ctime);
	242	machine_switch_perfcontrol_state_update(QUANTUM_EXPIRY, ctime,
	243	0, thread);
	244
	245	#if defined(CONFIG_SCHED_TIMESHARE_CORE)
	246	sched_timeshare_consider_maintenance(ctime);
	247	#endif /* CONFIG_SCHED_TIMESHARE_CORE */
	248
	249	#if __arm__ \|\| __arm64__
	250	if (thread->sched_mode == TH_MODE_REALTIME)
	251	sched_consider_recommended_cores(ctime, thread);
	252	#endif /* __arm__ \|\| __arm64__ */
	253
	254	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_QUANTUM_EXPIRED) \| DBG_FUNC_END, preempt, 0, 0, 0, 0);
	255	}
	256
	257	/*
	258	* sched_set_thread_base_priority:
	259	*
	260	* Set the base priority of the thread
	261	* and reset its scheduled priority.
	262	*
	263	* This is the only path to change base_pri.
	264	*
	265	* Called with the thread locked.
	266	*/
	267	void
	268	sched_set_thread_base_priority(thread_t thread, int priority)
	269	{
	270	assert(priority >= MINPRI);
	271	uint64_t ctime = 0;
	272
	273	if (thread->sched_mode == TH_MODE_REALTIME)
	274	assert(priority <= BASEPRI_RTQUEUES);
	275	else
	276	assert(priority < BASEPRI_RTQUEUES);
	277
	278	int old_base_pri = thread->base_pri;
	279	thread->base_pri = priority;
	280
	281	if ((thread->state & TH_RUN) == TH_RUN) {
	282	assert(thread->last_made_runnable_time != THREAD_NOT_RUNNABLE);
	283	ctime = mach_approximate_time();
	284	thread->last_basepri_change_time = ctime;
	285	} else {
	286	assert(thread->last_basepri_change_time == THREAD_NOT_RUNNABLE);
	287	assert(thread->last_made_runnable_time == THREAD_NOT_RUNNABLE);
	288	}
	289
	290	/*
	291	* Currently the perfcontrol_attr depends on the base pri of the
	292	* thread. Therefore, we use this function as the hook for the
	293	* perfcontrol callout.
	294	*/
	295	if (thread == current_thread() && old_base_pri != priority) {
	296	if (!ctime) {
	297	ctime = mach_approximate_time();
	298	}
	299	machine_switch_perfcontrol_state_update(PERFCONTROL_ATTR_UPDATE,
	300	ctime, PERFCONTROL_CALLOUT_WAKE_UNSAFE, thread);
	301	}
	302	sched_update_thread_bucket(thread);
	303
	304	thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
	305	}
	306
	307	/*
	308	* thread_recompute_sched_pri:
	309	*
	310	* Reset the scheduled priority of the thread
	311	* according to its base priority if the
	312	* thread has not been promoted or depressed.
	313	*
	314	* This is the only way to push base_pri changes into sched_pri,
	315	* or to recalculate the appropriate sched_pri after changing
	316	* a promotion or depression.
	317	*
	318	* Called at splsched with the thread locked.
	319	*
	320	* TODO: Add an 'update urgency' flag to avoid urgency callouts on every rwlock operation
	321	*/
	322	void
	323	thread_recompute_sched_pri(thread_t thread, set_sched_pri_options_t options)
	324	{
	325	uint32_t sched_flags = thread->sched_flags;
	326	sched_mode_t sched_mode = thread->sched_mode;
	327
	328	int priority = thread->base_pri;
	329
	330	if (sched_mode == TH_MODE_TIMESHARE)
	331	priority = SCHED(compute_timeshare_priority)(thread);
	332
	333	if (sched_flags & TH_SFLAG_DEPRESS) {
	334	/* thread_yield_internal overrides kernel mutex promotion */
	335	priority = DEPRESSPRI;
	336	} else {
	337	/* poll-depress is overridden by mutex promotion and promote-reasons */
	338	if ((sched_flags & TH_SFLAG_POLLDEPRESS)) {
	339	priority = DEPRESSPRI;
	340	}
	341
	342	if (sched_flags & TH_SFLAG_PROMOTED) {
	343	priority = MAX(priority, thread->promotion_priority);
	344
	345	if (sched_mode != TH_MODE_REALTIME)
	346	priority = MIN(priority, MAXPRI_PROMOTE);
	347	}
	348
	349	if (sched_flags & TH_SFLAG_PROMOTE_REASON_MASK) {
	350	if (sched_flags & TH_SFLAG_RW_PROMOTED)
	351	priority = MAX(priority, MINPRI_RWLOCK);
	352
	353	if (sched_flags & TH_SFLAG_WAITQ_PROMOTED)
	354	priority = MAX(priority, MINPRI_WAITQ);
	355
	356	if (sched_flags & TH_SFLAG_EXEC_PROMOTED)
	357	priority = MAX(priority, MINPRI_EXEC);
	358	}
	359	}
	360
	361	set_sched_pri(thread, priority, options);
	362	}
	363
	364	void
	365	sched_default_quantum_expire(thread_t thread __unused)
	366	{
	367	/*
	368	* No special behavior when a timeshare, fixed, or realtime thread
	369	* uses up its entire quantum
	370	*/
	371	}
	372
	373	#if defined(CONFIG_SCHED_TIMESHARE_CORE)
	374
	375	/*
	376	* lightweight_update_priority:
	377	*
	378	* Update the scheduled priority for
	379	* a timesharing thread.
	380	*
	381	* Only for use on the current thread.
	382	*
	383	* Called with the thread locked.
	384	*/
	385	void
	386	lightweight_update_priority(thread_t thread)
	387	{
	388	assert(thread->runq == PROCESSOR_NULL);
	389	assert(thread == current_thread());
	390
	391	if (thread->sched_mode == TH_MODE_TIMESHARE) {
	392	int priority;
	393	uint32_t delta;
	394
	395	thread_timer_delta(thread, delta);
	396
	397	/*
	398	* Accumulate timesharing usage only
	399	* during contention for processor
	400	* resources.
	401	*/
	402	if (thread->pri_shift < INT8_MAX)
	403	thread->sched_usage += delta;
	404
	405	thread->cpu_delta += delta;
	406
	407	priority = sched_compute_timeshare_priority(thread);
	408
	409	if (priority != thread->sched_pri)
	410	thread_recompute_sched_pri(thread, SETPRI_LAZY);
	411	}
	412	}
	413
	414	/*
	415	* Define shifts for simulating (5/8) ** n
	416	*
	417	* Shift structures for holding update shifts. Actual computation
	418	* is usage = (usage >> shift1) +/- (usage >> abs(shift2)) where the
	419	* +/- is determined by the sign of shift 2.
	420	*/
	421	struct shift_data {
	422	int shift1;
	423	int shift2;
	424	};
	425
	426	#define SCHED_DECAY_TICKS 32
	427	static struct shift_data sched_decay_shifts[SCHED_DECAY_TICKS] = {
	428	{1,1},{1,3},{1,-3},{2,-7},{3,5},{3,-5},{4,-8},{5,7},
	429	{5,-7},{6,-10},{7,10},{7,-9},{8,-11},{9,12},{9,-11},{10,-13},
	430	{11,14},{11,-13},{12,-15},{13,17},{13,-15},{14,-17},{15,19},{16,18},
	431	{16,-19},{17,22},{18,20},{18,-20},{19,26},{20,22},{20,-22},{21,-27}
	432	};
	433
	434	/*
	435	* sched_compute_timeshare_priority:
	436	*
	437	* Calculate the timesharing priority based upon usage and load.
	438	*/
	439	extern int sched_pri_decay_band_limit;
	440
	441	#ifdef CONFIG_EMBEDDED
	442
	443	int
	444	sched_compute_timeshare_priority(thread_t thread)
	445	{
	446	int decay_amount = (thread->sched_usage >> thread->pri_shift);
	447	int decay_limit = sched_pri_decay_band_limit;
	448
	449	if (thread->base_pri > BASEPRI_FOREGROUND) {
	450	decay_limit += (thread->base_pri - BASEPRI_FOREGROUND);
	451	}
	452
	453	if (decay_amount > decay_limit) {
	454	decay_amount = decay_limit;
	455	}
	456
	457	/* start with base priority */
	458	int priority = thread->base_pri - decay_amount;
	459
	460	if (priority < MAXPRI_THROTTLE) {
	461	if (thread->task->max_priority > MAXPRI_THROTTLE) {
	462	priority = MAXPRI_THROTTLE;
	463	} else if (priority < MINPRI_USER) {
	464	priority = MINPRI_USER;
	465	}
	466	} else if (priority > MAXPRI_KERNEL) {
	467	priority = MAXPRI_KERNEL;
	468	}
	469
	470	return priority;
	471	}
	472
	473	#else /* CONFIG_EMBEDDED */
	474
	475	int
	476	sched_compute_timeshare_priority(thread_t thread)
	477	{
	478	/* start with base priority */
	479	int priority = thread->base_pri - (thread->sched_usage >> thread->pri_shift);
	480
	481	if (priority < MINPRI_USER)
	482	priority = MINPRI_USER;
	483	else if (priority > MAXPRI_KERNEL)
	484	priority = MAXPRI_KERNEL;
	485
	486	return priority;
	487	}
	488
	489	#endif /* CONFIG_EMBEDDED */
	490
	491	/*
	492	* can_update_priority
	493	*
	494	* Make sure we don't do re-dispatches more frequently than a scheduler tick.
	495	*
	496	* Called with the thread locked.
	497	*/
	498	boolean_t
	499	can_update_priority(
	500	thread_t thread)
	501	{
	502	if (sched_tick == thread->sched_stamp)
	503	return (FALSE);
	504	else
	505	return (TRUE);
	506	}
	507
	508	/*
	509	* update_priority
	510	*
	511	* Perform housekeeping operations driven by scheduler tick.
	512	*
	513	* Called with the thread locked.
	514	*/
	515	void
	516	update_priority(
	517	thread_t thread)
	518	{
	519	uint32_t ticks, delta;
	520
	521	ticks = sched_tick - thread->sched_stamp;
	522	assert(ticks != 0);
	523
	524	thread->sched_stamp += ticks;
	525
	526	/* If requested, accelerate aging of sched_usage */
	527	if (sched_decay_usage_age_factor > 1)
	528	ticks *= sched_decay_usage_age_factor;
	529
	530	/*
	531	* Gather cpu usage data.
	532	*/
	533	thread_timer_delta(thread, delta);
	534	if (ticks < SCHED_DECAY_TICKS) {
	535	/*
	536	* Accumulate timesharing usage only during contention for processor
	537	* resources. Use the pri_shift from the previous tick window to
	538	* determine if the system was in a contended state.
	539	*/
	540	if (thread->pri_shift < INT8_MAX)
	541	thread->sched_usage += delta;
	542
	543	thread->cpu_usage += delta + thread->cpu_delta;
	544	thread->cpu_delta = 0;
	545
	546	struct shift_data *shiftp = &sched_decay_shifts[ticks];
	547
	548	if (shiftp->shift2 > 0) {
	549	thread->cpu_usage = (thread->cpu_usage >> shiftp->shift1) +
	550	(thread->cpu_usage >> shiftp->shift2);
	551	thread->sched_usage = (thread->sched_usage >> shiftp->shift1) +
	552	(thread->sched_usage >> shiftp->shift2);
	553	} else {
	554	thread->cpu_usage = (thread->cpu_usage >> shiftp->shift1) -
	555	(thread->cpu_usage >> -(shiftp->shift2));
	556	thread->sched_usage = (thread->sched_usage >> shiftp->shift1) -
	557	(thread->sched_usage >> -(shiftp->shift2));
	558	}
	559	} else {
	560	thread->cpu_usage = thread->cpu_delta = 0;
	561	thread->sched_usage = 0;
	562	}
	563
	564	/*
	565	* Check for fail-safe release.
	566	*/
	567	if ((thread->sched_flags & TH_SFLAG_FAILSAFE) &&
	568	mach_absolute_time() >= thread->safe_release) {
	569	sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
	570	}
	571
	572	/*
	573	* Now that the thread's CPU usage has been accumulated and aged
	574	* based on contention of the previous tick window, update the
	575	* pri_shift of the thread to match the current global load/shift
	576	* values. The updated pri_shift would be used to calculate the
	577	* new priority of the thread.
	578	*/
	579	thread->pri_shift = sched_pri_shifts[thread->th_sched_bucket];
	580
	581	/* Recompute scheduled priority if appropriate. */
	582	if (thread->sched_mode == TH_MODE_TIMESHARE)
	583	thread_recompute_sched_pri(thread, SETPRI_LAZY);
	584	}
	585
	586	#endif /* CONFIG_SCHED_TIMESHARE_CORE */
	587
	588
	589	/*
	590	* TH_BUCKET_RUN is a count of all runnable non-idle threads.
	591	* Each other bucket is a count of the runnable non-idle threads
	592	* with that property.
	593	*/
	594	volatile uint32_t sched_run_buckets[TH_BUCKET_MAX];
	595
	596	static void
	597	sched_incr_bucket(sched_bucket_t bucket)
	598	{
	599	assert(bucket >= TH_BUCKET_FIXPRI &&
	600	bucket <= TH_BUCKET_SHARE_BG);
	601
	602	hw_atomic_add(&sched_run_buckets[bucket], 1);
	603	}
	604
	605	static void
	606	sched_decr_bucket(sched_bucket_t bucket)
	607	{
	608	assert(bucket >= TH_BUCKET_FIXPRI &&
	609	bucket <= TH_BUCKET_SHARE_BG);
	610
	611	assert(sched_run_buckets[bucket] > 0);
	612
	613	hw_atomic_sub(&sched_run_buckets[bucket], 1);
	614	}
	615
	616	/* TH_RUN & !TH_IDLE controls whether a thread has a run count */
	617
	618	uint32_t
	619	sched_run_incr(thread_t thread)
	620	{
	621	assert((thread->state & (TH_RUN\|TH_IDLE)) == TH_RUN);
	622
	623	uint32_t new_count = hw_atomic_add(&sched_run_buckets[TH_BUCKET_RUN], 1);
	624
	625	sched_incr_bucket(thread->th_sched_bucket);
	626
	627	return new_count;
	628	}
	629
	630	uint32_t
	631	sched_run_decr(thread_t thread)
	632	{
	633	assert((thread->state & (TH_RUN\|TH_IDLE)) != TH_RUN);
	634
	635	sched_decr_bucket(thread->th_sched_bucket);
	636
	637	uint32_t new_count = hw_atomic_sub(&sched_run_buckets[TH_BUCKET_RUN], 1);
	638
	639	return new_count;
	640	}
	641
	642	static void
	643	sched_update_thread_bucket(thread_t thread)
	644	{
	645	sched_bucket_t old_bucket = thread->th_sched_bucket;
	646	sched_bucket_t new_bucket = TH_BUCKET_RUN;
	647
	648	switch (thread->sched_mode) {
	649	case TH_MODE_FIXED:
	650	case TH_MODE_REALTIME:
	651	new_bucket = TH_BUCKET_FIXPRI;
	652	break;
	653
	654	case TH_MODE_TIMESHARE:
	655	if (thread->base_pri > BASEPRI_DEFAULT)
	656	new_bucket = TH_BUCKET_SHARE_FG;
	657	else if (thread->base_pri > BASEPRI_UTILITY)
	658	new_bucket = TH_BUCKET_SHARE_DF;
	659	else if (thread->base_pri > MAXPRI_THROTTLE)
	660	new_bucket = TH_BUCKET_SHARE_UT;
	661	else
	662	new_bucket = TH_BUCKET_SHARE_BG;
	663	break;
	664
	665	default:
	666	panic("unexpected mode: %d", thread->sched_mode);
	667	break;
	668	}
	669
	670	if (old_bucket != new_bucket) {
	671	thread->th_sched_bucket = new_bucket;
	672	thread->pri_shift = sched_pri_shifts[new_bucket];
	673
	674	if ((thread->state & (TH_RUN\|TH_IDLE)) == TH_RUN) {
	675	sched_decr_bucket(old_bucket);
	676	sched_incr_bucket(new_bucket);
	677	}
	678	}
	679	}
	680
	681	/*
	682	* Set the thread's true scheduling mode
	683	* Called with thread mutex and thread locked
	684	* The thread has already been removed from the runqueue.
	685	*
	686	* (saved_mode is handled before this point)
	687	*/
	688	void
	689	sched_set_thread_mode(thread_t thread, sched_mode_t new_mode)
	690	{
	691	assert(thread->runq == PROCESSOR_NULL);
	692
	693	switch (new_mode) {
	694	case TH_MODE_FIXED:
	695	case TH_MODE_REALTIME:
	696	case TH_MODE_TIMESHARE:
	697	break;
	698
	699	default:
	700	panic("unexpected mode: %d", new_mode);
	701	break;
	702	}
	703
	704	thread->sched_mode = new_mode;
	705
	706	sched_update_thread_bucket(thread);
	707	}
	708
	709	/*
	710	* Demote the true scheduler mode to timeshare (called with the thread locked)
	711	*/
	712	void
	713	sched_thread_mode_demote(thread_t thread, uint32_t reason)
	714	{
	715	assert(reason & TH_SFLAG_DEMOTED_MASK);
	716	assert((thread->sched_flags & reason) != reason);
	717
	718	if (thread->policy_reset)
	719	return;
	720
	721	if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) {
	722	/* Another demotion reason is already active */
	723	thread->sched_flags \|= reason;
	724	return;
	725	}
	726
	727	assert(thread->saved_mode == TH_MODE_NONE);
	728
	729	boolean_t removed = thread_run_queue_remove(thread);
	730
	731	thread->sched_flags \|= reason;
	732
	733	thread->saved_mode = thread->sched_mode;
	734
	735	sched_set_thread_mode(thread, TH_MODE_TIMESHARE);
	736
	737	thread_recompute_priority(thread);
	738
	739	if (removed)
	740	thread_run_queue_reinsert(thread, SCHED_TAILQ);
	741	}
	742
	743	/*
	744	* Un-demote the true scheduler mode back to the saved mode (called with the thread locked)
	745	*/
	746	void
	747	sched_thread_mode_undemote(thread_t thread, uint32_t reason)
	748	{
	749	assert(reason & TH_SFLAG_DEMOTED_MASK);
	750	assert((thread->sched_flags & reason) == reason);
	751	assert(thread->saved_mode != TH_MODE_NONE);
	752	assert(thread->sched_mode == TH_MODE_TIMESHARE);
	753	assert(thread->policy_reset == 0);
	754
	755	thread->sched_flags &= ~reason;
	756
	757	if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) {
	758	/* Another demotion reason is still active */
	759	return;
	760	}
	761
	762	boolean_t removed = thread_run_queue_remove(thread);
	763
	764	sched_set_thread_mode(thread, thread->saved_mode);
	765
	766	thread->saved_mode = TH_MODE_NONE;
	767
	768	thread_recompute_priority(thread);
	769
	770	if (removed)
	771	thread_run_queue_reinsert(thread, SCHED_TAILQ);
	772	}
	773
	774	/*
	775	* Promote thread to a specific priority
	776	*
	777	* Promotion must not last past syscall boundary
	778	* Clients must always pair promote and unpromote 1:1
	779	*
	780	* Called at splsched with thread locked
	781	*/
	782	void
	783	sched_thread_promote_to_pri(thread_t thread,
	784	int priority,
	785	__kdebug_only uintptr_t trace_obj /* already unslid */)
	786	{
	787	assert((thread->sched_flags & TH_SFLAG_PROMOTED) != TH_SFLAG_PROMOTED);
	788	assert(thread->promotion_priority == 0);
	789	assert(priority <= MAXPRI_PROMOTE);
	790	assert(priority > 0);
	791
	792	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTED),
	793	thread_tid(thread), trace_obj, priority);
	794
	795	thread->sched_flags \|= TH_SFLAG_PROMOTED;
	796	thread->promotion_priority = priority;
	797
	798	thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
	799	}
	800
	801
	802	/*
	803	* Update a pre-existing priority promotion to have a higher priority floor
	804	* Priority can only go up from the previous value
	805	* Update must occur while a promotion is active
	806	*
	807	* Called at splsched with thread locked
	808	*/
	809	void
	810	sched_thread_update_promotion_to_pri(thread_t thread,
	811	int priority,
	812	__kdebug_only uintptr_t trace_obj /* already unslid */)
	813	{
	814	assert(thread->promotions > 0);
	815	assert((thread->sched_flags & TH_SFLAG_PROMOTED) == TH_SFLAG_PROMOTED);
	816	assert(thread->promotion_priority > 0);
	817	assert(priority <= MAXPRI_PROMOTE);
	818
	819	if (thread->promotion_priority < priority) {
	820	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTED_UPDATE),
	821	thread_tid(thread), trace_obj, priority);
	822
	823	thread->promotion_priority = priority;
	824	thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
	825	}
	826	}
	827
	828	/*
	829	* End a priority promotion
	830	* Demotes a thread back to its expected priority without the promotion in place
	831	*
	832	* Called at splsched with thread locked
	833	*/
	834	void
	835	sched_thread_unpromote(thread_t thread,
	836	__kdebug_only uintptr_t trace_obj /* already unslid */)
	837	{
	838	assert((thread->sched_flags & TH_SFLAG_PROMOTED) == TH_SFLAG_PROMOTED);
	839	assert(thread->promotion_priority > 0);
	840
	841	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_UNPROMOTED),
	842	thread_tid(thread), trace_obj, 0);
	843
	844	thread->sched_flags &= ~TH_SFLAG_PROMOTED;
	845	thread->promotion_priority = 0;
	846
	847	thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
	848	}
	849
	850	/* called with thread locked */
	851	void
	852	assert_promotions_invariant(thread_t thread)
	853	{
	854	if (thread->promotions > 0)
	855	assert((thread->sched_flags & TH_SFLAG_PROMOTED) == TH_SFLAG_PROMOTED);
	856
	857	if (thread->promotions == 0)
	858	assert((thread->sched_flags & TH_SFLAG_PROMOTED) != TH_SFLAG_PROMOTED);
	859	}
	860
	861	/*
	862	* Promote thread to have a sched pri floor for a specific reason
	863	*
	864	* Promotion must not last past syscall boundary
	865	* Clients must always pair promote and demote 1:1,
	866	* Handling nesting of the same promote reason is the client's responsibility
	867	*
	868	* Called at splsched with thread locked
	869	*/
	870	void
	871	sched_thread_promote_reason(thread_t thread,
	872	uint32_t reason,
	873	__kdebug_only uintptr_t trace_obj /* already unslid */)
	874	{
	875	assert(reason & TH_SFLAG_PROMOTE_REASON_MASK);
	876	assert((thread->sched_flags & reason) != reason);
	877
	878	switch (reason) {
	879	case TH_SFLAG_RW_PROMOTED:
	880	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_PROMOTE),
	881	thread_tid(thread), thread->sched_pri,
	882	thread->base_pri, trace_obj);
	883	break;
	884	case TH_SFLAG_WAITQ_PROMOTED:
	885	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAITQ_PROMOTE),
	886	thread_tid(thread), thread->sched_pri,
	887	thread->base_pri, trace_obj);
	888	break;
	889	case TH_SFLAG_EXEC_PROMOTED:
	890	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_EXEC_PROMOTE),
	891	thread_tid(thread), thread->sched_pri,
	892	thread->base_pri, trace_obj);
	893	break;
	894	}
	895
	896	thread->sched_flags \|= reason;
	897
	898	thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
	899	}
	900
	901	/*
	902	* End a specific promotion reason
	903	* Demotes a thread back to its expected priority without the promotion in place
	904	*
	905	* Called at splsched with thread locked
	906	*/
	907	void
	908	sched_thread_unpromote_reason(thread_t thread,
	909	uint32_t reason,
	910	__kdebug_only uintptr_t trace_obj /* already unslid */)
	911	{
	912	assert(reason & TH_SFLAG_PROMOTE_REASON_MASK);
	913	assert((thread->sched_flags & reason) == reason);
	914
	915	switch (reason) {
	916	case TH_SFLAG_RW_PROMOTED:
	917	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE),
	918	thread_tid(thread), thread->sched_pri,
	919	thread->base_pri, trace_obj);
	920	break;
	921	case TH_SFLAG_WAITQ_PROMOTED:
	922	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAITQ_DEMOTE),
	923	thread_tid(thread), thread->sched_pri,
	924	thread->base_pri, trace_obj);
	925	break;
	926	case TH_SFLAG_EXEC_PROMOTED:
	927	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_EXEC_DEMOTE),
	928	thread_tid(thread), thread->sched_pri,
	929	thread->base_pri, trace_obj);
	930	break;
	931	}
	932
	933	thread->sched_flags &= ~reason;
	934
	935	thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
	936	}
	937
	938