2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
57 #include <mach/boolean.h>
58 #include <mach/thread_switch.h>
59 #include <ipc/ipc_port.h>
60 #include <ipc/ipc_space.h>
61 #include <kern/counters.h>
62 #include <kern/ipc_kobject.h>
63 #include <kern/processor.h>
64 #include <kern/sched.h>
65 #include <kern/sched_prim.h>
67 #include <kern/task.h>
68 #include <kern/thread.h>
69 #include <kern/policy_internal.h>
71 #include <mach/policy.h>
73 #include <kern/syscall_subr.h>
74 #include <mach/mach_host_server.h>
75 #include <mach/mach_syscalls.h>
76 #include <sys/kdebug.h>
79 static void thread_depress_abstime(uint64_t interval
);
80 static void thread_depress_ms(mach_msg_timeout_t interval
);
82 /* Called from commpage to take a delayed preemption when exiting
83 * the "Preemption Free Zone" (PFZ).
87 __unused
struct pfz_exit_args
*args
)
89 /* For now, nothing special to do. We'll pick up the ASTs on kernel exit. */
96 * swtch and swtch_pri both attempt to context switch (logic in
97 * thread_block no-ops the context switch if nothing would happen).
98 * A boolean is returned that indicates whether there is anything
99 * else runnable. That's no excuse to spin, though.
105 processor_t myprocessor
;
108 disable_preemption();
109 myprocessor
= current_processor();
110 result
= SCHED(thread_should_yield
)(myprocessor
, current_thread());
115 thread_syscall_return(result
);
121 __unused
struct swtch_args
*args
)
123 processor_t myprocessor
;
125 disable_preemption();
126 myprocessor
= current_processor();
127 if (!SCHED(thread_should_yield
)(myprocessor
, current_thread())) {
128 mp_enable_preemption();
134 counter(c_swtch_block
++);
136 thread_yield_with_continuation((thread_continue_t
)swtch_continue
, NULL
);
140 swtch_pri_continue(void)
142 processor_t myprocessor
;
145 thread_depress_abort(current_thread());
147 disable_preemption();
148 myprocessor
= current_processor();
149 result
= SCHED(thread_should_yield
)(myprocessor
, current_thread());
150 mp_enable_preemption();
154 thread_syscall_return(result
);
160 __unused
struct swtch_pri_args
*args
)
162 processor_t myprocessor
;
164 disable_preemption();
165 myprocessor
= current_processor();
166 if (!SCHED(thread_should_yield
)(myprocessor
, current_thread())) {
167 mp_enable_preemption();
173 counter(c_swtch_pri_block
++);
175 thread_depress_abstime(thread_depress_time
);
177 thread_yield_with_continuation((thread_continue_t
)swtch_pri_continue
, NULL
);
181 thread_switch_continue(void *parameter
, __unused
int ret
)
183 thread_t self
= current_thread();
184 int option
= (int)(intptr_t)parameter
;
186 if (option
== SWITCH_OPTION_DEPRESS
|| option
== SWITCH_OPTION_OSLOCK_DEPRESS
) {
187 thread_depress_abort(self
);
192 thread_syscall_return(KERN_SUCCESS
);
199 * Context switch. User may supply thread hint.
203 struct thread_switch_args
*args
)
205 thread_t thread
= THREAD_NULL
;
206 thread_t self
= current_thread();
207 mach_port_name_t thread_name
= args
->thread_name
;
208 int option
= args
->option
;
209 mach_msg_timeout_t option_time
= args
->option_time
;
210 uint32_t scale_factor
= NSEC_PER_MSEC
;
211 boolean_t depress_option
= FALSE
;
212 boolean_t wait_option
= FALSE
;
213 wait_interrupt_t interruptible
= THREAD_ABORTSAFE
;
214 port_to_thread_options_t ptt_options
= PORT_TO_THREAD_NOT_CURRENT_THREAD
;
217 * Validate and process option.
219 * OSLock boosting only applies to other threads
220 * in your same task (even if you have a port for
221 * a thread in another task)
224 case SWITCH_OPTION_NONE
:
226 case SWITCH_OPTION_WAIT
:
229 case SWITCH_OPTION_DEPRESS
:
230 depress_option
= TRUE
;
232 case SWITCH_OPTION_DISPATCH_CONTENTION
:
233 scale_factor
= NSEC_PER_USEC
;
235 interruptible
|= THREAD_WAIT_NOREPORT
;
237 case SWITCH_OPTION_OSLOCK_DEPRESS
:
238 depress_option
= TRUE
;
239 interruptible
|= THREAD_WAIT_NOREPORT
;
240 ptt_options
|= PORT_TO_THREAD_IN_CURRENT_TASK
;
242 case SWITCH_OPTION_OSLOCK_WAIT
:
244 interruptible
|= THREAD_WAIT_NOREPORT
;
245 ptt_options
|= PORT_TO_THREAD_IN_CURRENT_TASK
;
248 return KERN_INVALID_ARGUMENT
;
252 * Translate the port name if supplied.
254 if (thread_name
!= MACH_PORT_NULL
) {
255 thread
= port_name_to_thread(thread_name
, ptt_options
);
258 if (option
== SWITCH_OPTION_OSLOCK_DEPRESS
|| option
== SWITCH_OPTION_OSLOCK_WAIT
) {
259 if (thread
!= THREAD_NULL
) {
261 * Attempt to kick the lock owner up to our same IO throttling tier.
262 * If the thread is currently blocked in throttle_lowpri_io(),
263 * it will immediately break out.
265 * TODO: SFI break out?
267 int new_policy
= proc_get_effective_thread_policy(self
, TASK_POLICY_IO
);
269 set_thread_iotier_override(thread
, new_policy
);
274 * Try to handoff if supplied.
276 if (thread
!= THREAD_NULL
) {
277 spl_t s
= splsched();
279 /* This may return a different thread if the target is pushing on something */
280 thread_t pulled_thread
= thread_run_queue_remove_for_handoff(thread
);
282 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_SCHED_THREAD_SWITCH
) | DBG_FUNC_NONE
,
283 thread_tid(thread
), thread
->state
,
284 pulled_thread
? TRUE
: FALSE
, 0, 0);
286 if (pulled_thread
!= THREAD_NULL
) {
287 /* We can't be dropping the last ref here */
288 thread_deallocate_safe(thread
);
291 assert_wait_timeout((event_t
)assert_wait_timeout
, interruptible
,
292 option_time
, scale_factor
);
293 } else if (depress_option
) {
294 thread_depress_ms(option_time
);
297 thread_run(self
, thread_switch_continue
, (void *)(intptr_t)option
, pulled_thread
);
298 __builtin_unreachable();
303 thread_deallocate(thread
);
307 assert_wait_timeout((event_t
)assert_wait_timeout
, interruptible
, option_time
, scale_factor
);
309 disable_preemption();
310 bool should_yield
= SCHED(thread_should_yield
)(current_processor(), current_thread());
313 if (should_yield
== false) {
314 /* Early-return if yielding to the scheduler will not be beneficial */
318 if (depress_option
) {
319 thread_depress_ms(option_time
);
323 thread_yield_with_continuation(thread_switch_continue
, (void *)(intptr_t)option
);
324 __builtin_unreachable();
328 thread_yield_with_continuation(
329 thread_continue_t continuation
,
332 assert(continuation
);
333 thread_block_reason(continuation
, parameter
, AST_YIELD
);
334 __builtin_unreachable();
337 /* This function is called after an assert_wait(), therefore it must not
338 * cause another wait until after the thread_run() or thread_block()
340 * Following are the calling convention for thread ref deallocation.
342 * 1) If no continuation is provided, then thread ref is consumed.
343 * (thread_handoff_deallocate convention).
345 * 2) If continuation is provided with option THREAD_HANDOFF_SETRUN_NEEDED
346 * then thread ref is always consumed.
348 * 3) If continuation is provided with option THREAD_HANDOFF_NONE then thread
349 * ref is not consumed and it is upto the continuation to deallocate
350 * the thread reference.
353 thread_handoff_internal(thread_t thread
, thread_continue_t continuation
,
354 void *parameter
, thread_handoff_option_t option
)
356 thread_t self
= current_thread();
359 * Try to handoff if supplied.
361 if (thread
!= THREAD_NULL
) {
362 spl_t s
= splsched();
364 thread_t pulled_thread
= thread_prepare_for_handoff(thread
, option
);
366 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_SCHED_THREAD_SWITCH
) | DBG_FUNC_NONE
,
367 thread_tid(thread
), thread
->state
,
368 pulled_thread
? TRUE
: FALSE
, 0, 0);
370 /* Deallocate thread ref if needed */
371 if (continuation
== NULL
|| (option
& THREAD_HANDOFF_SETRUN_NEEDED
)) {
372 /* Use the safe version of thread deallocate */
373 thread_deallocate_safe(thread
);
376 if (pulled_thread
!= THREAD_NULL
) {
377 int result
= thread_run(self
, continuation
, parameter
, pulled_thread
);
386 int result
= thread_block_parameter(continuation
, parameter
);
391 thread_handoff_parameter(thread_t thread
, thread_continue_t continuation
,
392 void *parameter
, thread_handoff_option_t option
)
394 thread_handoff_internal(thread
, continuation
, parameter
, option
);
395 panic("NULL continuation passed to %s", __func__
);
396 __builtin_unreachable();
400 thread_handoff_deallocate(thread_t thread
, thread_handoff_option_t option
)
402 return thread_handoff_internal(thread
, NULL
, NULL
, option
);
408 * This mechanism drops a thread to priority 0 in order for it to yield to
409 * all other runnnable threads on the system. It can be canceled or timed out,
410 * whereupon the thread goes back to where it was.
412 * Note that TH_SFLAG_DEPRESS and TH_SFLAG_POLLDEPRESS are never set at the
413 * same time. DEPRESS always defers to POLLDEPRESS.
415 * DEPRESS only lasts across a single thread_block call, and never returns
417 * POLLDEPRESS can be active anywhere up until thread termination.
421 * Depress thread's priority to lowest possible for the specified interval,
422 * with an interval of zero resulting in no timeout being scheduled.
424 * Must block with AST_YIELD afterwards to take effect
427 thread_depress_abstime(uint64_t interval
)
429 thread_t self
= current_thread();
431 spl_t s
= splsched();
434 assert((self
->sched_flags
& TH_SFLAG_DEPRESS
) == 0);
436 if ((self
->sched_flags
& TH_SFLAG_POLLDEPRESS
) == 0) {
437 self
->sched_flags
|= TH_SFLAG_DEPRESS
;
438 thread_recompute_sched_pri(self
, SETPRI_LAZY
);
443 clock_absolutetime_interval_to_deadline(interval
, &deadline
);
444 if (!timer_call_enter(&self
->depress_timer
, deadline
, TIMER_CALL_USER_CRITICAL
)) {
445 self
->depress_timer_active
++;
455 thread_depress_ms(mach_msg_timeout_t interval
)
459 clock_interval_to_absolutetime_interval(interval
, NSEC_PER_MSEC
, &abstime
);
460 thread_depress_abstime(abstime
);
464 * Priority depression expiration.
467 thread_depress_expire(void *p0
,
470 thread_t thread
= (thread_t
)p0
;
472 spl_t s
= splsched();
475 assert((thread
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
) != TH_SFLAG_DEPRESSED_MASK
);
477 if (--thread
->depress_timer_active
== 0) {
478 thread
->sched_flags
&= ~TH_SFLAG_DEPRESSED_MASK
;
479 if ((thread
->state
& TH_RUN
) == TH_RUN
) {
480 thread
->last_basepri_change_time
= mach_absolute_time();
482 thread_recompute_sched_pri(thread
, SETPRI_DEFAULT
);
485 thread_unlock(thread
);
490 * Prematurely abort priority depression if there is one.
493 thread_depress_abort(thread_t thread
)
495 kern_return_t result
= KERN_NOT_DEPRESSED
;
497 spl_t s
= splsched();
500 assert((thread
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
) != TH_SFLAG_DEPRESSED_MASK
);
503 * User-triggered depress-aborts should not get out
504 * of the poll-depress, but they should cancel a regular depress.
506 if ((thread
->sched_flags
& TH_SFLAG_POLLDEPRESS
) == 0) {
507 result
= thread_depress_abort_locked(thread
);
510 thread_unlock(thread
);
517 * Prematurely abort priority depression or poll depression if one is active.
518 * Called with the thread locked.
521 thread_depress_abort_locked(thread_t thread
)
523 if ((thread
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
) == 0) {
524 return KERN_NOT_DEPRESSED
;
527 assert((thread
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
) != TH_SFLAG_DEPRESSED_MASK
);
529 thread
->sched_flags
&= ~TH_SFLAG_DEPRESSED_MASK
;
530 if ((thread
->state
& TH_RUN
) == TH_RUN
) {
531 thread
->last_basepri_change_time
= mach_absolute_time();
534 thread_recompute_sched_pri(thread
, SETPRI_LAZY
);
536 if (timer_call_cancel(&thread
->depress_timer
)) {
537 thread
->depress_timer_active
--;
544 * Invoked as part of a polling operation like a no-timeout port receive
546 * Forces a fixpri thread to yield if it is detected polling without blocking for too long.
549 thread_poll_yield(thread_t self
)
551 assert(self
== current_thread());
552 assert((self
->sched_flags
& TH_SFLAG_DEPRESS
) == 0);
554 if (self
->sched_mode
!= TH_MODE_FIXED
) {
558 spl_t s
= splsched();
560 uint64_t abstime
= mach_absolute_time();
561 uint64_t total_computation
= abstime
-
562 self
->computation_epoch
+ self
->computation_metered
;
564 if (total_computation
>= max_poll_computation
) {
567 self
->computation_epoch
= abstime
;
568 self
->computation_metered
= 0;
570 uint64_t yield_expiration
= abstime
+
571 (total_computation
>> sched_poll_yield_shift
);
573 if (!timer_call_enter(&self
->depress_timer
, yield_expiration
,
574 TIMER_CALL_USER_CRITICAL
)) {
575 self
->depress_timer_active
++;
578 self
->sched_flags
|= TH_SFLAG_POLLDEPRESS
;
579 thread_recompute_sched_pri(self
, SETPRI_DEFAULT
);
587 * Kernel-internal interface to yield for a specified period
589 * WARNING: Will still yield to priority 0 even if the thread is holding a contended lock!
592 thread_yield_internal(mach_msg_timeout_t ms
)
594 thread_t self
= current_thread();
596 assert((self
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
) != TH_SFLAG_DEPRESSED_MASK
);
598 processor_t myprocessor
;
600 disable_preemption();
601 myprocessor
= current_processor();
602 if (!SCHED(thread_should_yield
)(myprocessor
, self
)) {
603 mp_enable_preemption();
609 thread_depress_ms(ms
);
611 thread_block_reason(THREAD_CONTINUE_NULL
, NULL
, AST_YIELD
);
613 thread_depress_abort(self
);
617 * This yields to a possible non-urgent preemption pending on the current processor.
619 * This is useful when doing a long computation in the kernel without returning to userspace.
621 * As opposed to other yielding mechanisms, this does not drop the priority of the current thread.
624 thread_yield_to_preemption()
627 * ast_pending() should ideally be called with interrupts disabled, but
628 * the check here is fine because csw_check() will do the right thing.
630 ast_t
*pending_ast
= ast_pending();
631 ast_t ast
= AST_NONE
;
634 if (*pending_ast
& AST_PREEMPT
) {
635 thread_t self
= current_thread();
637 spl_t s
= splsched();
639 p
= current_processor();
641 ast
= csw_check(self
, p
, AST_YIELD
);
645 if (ast
!= AST_NONE
) {
646 (void)thread_block_reason(THREAD_CONTINUE_NULL
, NULL
, ast
);