2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
57 #include <mach/boolean.h>
58 #include <mach/thread_switch.h>
59 #include <ipc/ipc_port.h>
60 #include <ipc/ipc_space.h>
61 #include <kern/counters.h>
62 #include <kern/ipc_kobject.h>
63 #include <kern/processor.h>
64 #include <kern/sched.h>
65 #include <kern/sched_prim.h>
67 #include <kern/task.h>
68 #include <kern/thread.h>
69 #include <kern/policy_internal.h>
71 #include <mach/policy.h>
73 #include <kern/syscall_subr.h>
74 #include <mach/mach_host_server.h>
75 #include <mach/mach_syscalls.h>
76 #include <sys/kdebug.h>
79 static void thread_depress_abstime(uint64_t interval
);
80 static void thread_depress_ms(mach_msg_timeout_t interval
);
82 /* Called from commpage to take a delayed preemption when exiting
83 * the "Preemption Free Zone" (PFZ).
87 __unused
struct pfz_exit_args
*args
)
89 /* For now, nothing special to do. We'll pick up the ASTs on kernel exit. */
91 return (KERN_SUCCESS
);
96 * swtch and swtch_pri both attempt to context switch (logic in
97 * thread_block no-ops the context switch if nothing would happen).
98 * A boolean is returned that indicates whether there is anything
99 * else runnable. That's no excuse to spin, though.
105 processor_t myprocessor
;
108 disable_preemption();
109 myprocessor
= current_processor();
110 result
= SCHED(thread_should_yield
)(myprocessor
, current_thread());
115 thread_syscall_return(result
);
121 __unused
struct swtch_args
*args
)
123 processor_t myprocessor
;
125 disable_preemption();
126 myprocessor
= current_processor();
127 if (!SCHED(thread_should_yield
)(myprocessor
, current_thread())) {
128 mp_enable_preemption();
134 counter(c_swtch_block
++);
136 thread_yield_with_continuation((thread_continue_t
)swtch_continue
, NULL
);
140 swtch_pri_continue(void)
142 processor_t myprocessor
;
145 thread_depress_abort(current_thread());
147 disable_preemption();
148 myprocessor
= current_processor();
149 result
= SCHED(thread_should_yield
)(myprocessor
, current_thread());
150 mp_enable_preemption();
154 thread_syscall_return(result
);
160 __unused
struct swtch_pri_args
*args
)
162 processor_t myprocessor
;
164 disable_preemption();
165 myprocessor
= current_processor();
166 if (!SCHED(thread_should_yield
)(myprocessor
, current_thread())) {
167 mp_enable_preemption();
173 counter(c_swtch_pri_block
++);
175 thread_depress_abstime(thread_depress_time
);
177 thread_yield_with_continuation((thread_continue_t
)swtch_pri_continue
, NULL
);
181 thread_switch_continue(void *parameter
, __unused
int ret
)
183 thread_t self
= current_thread();
184 int option
= (int)(intptr_t)parameter
;
186 if (option
== SWITCH_OPTION_DEPRESS
|| option
== SWITCH_OPTION_OSLOCK_DEPRESS
)
187 thread_depress_abort(self
);
191 thread_syscall_return(KERN_SUCCESS
);
198 * Context switch. User may supply thread hint.
202 struct thread_switch_args
*args
)
204 thread_t thread
= THREAD_NULL
;
205 thread_t self
= current_thread();
206 mach_port_name_t thread_name
= args
->thread_name
;
207 int option
= args
->option
;
208 mach_msg_timeout_t option_time
= args
->option_time
;
209 uint32_t scale_factor
= NSEC_PER_MSEC
;
210 boolean_t depress_option
= FALSE
;
211 boolean_t wait_option
= FALSE
;
212 wait_interrupt_t interruptible
= THREAD_ABORTSAFE
;
215 * Validate and process option.
218 case SWITCH_OPTION_NONE
:
220 case SWITCH_OPTION_WAIT
:
223 case SWITCH_OPTION_DEPRESS
:
224 depress_option
= TRUE
;
226 case SWITCH_OPTION_DISPATCH_CONTENTION
:
227 scale_factor
= NSEC_PER_USEC
;
229 interruptible
|= THREAD_WAIT_NOREPORT
;
231 case SWITCH_OPTION_OSLOCK_DEPRESS
:
232 depress_option
= TRUE
;
233 interruptible
|= THREAD_WAIT_NOREPORT
;
235 case SWITCH_OPTION_OSLOCK_WAIT
:
237 interruptible
|= THREAD_WAIT_NOREPORT
;
240 return (KERN_INVALID_ARGUMENT
);
244 * Translate the port name if supplied.
246 if (thread_name
!= MACH_PORT_NULL
) {
249 if (ipc_port_translate_send(self
->task
->itk_space
,
250 thread_name
, &port
) == KERN_SUCCESS
) {
254 thread
= convert_port_to_thread(port
);
257 if (thread
== self
) {
258 thread_deallocate(thread
);
259 thread
= THREAD_NULL
;
264 if (option
== SWITCH_OPTION_OSLOCK_DEPRESS
|| option
== SWITCH_OPTION_OSLOCK_WAIT
) {
265 if (thread
!= THREAD_NULL
) {
267 if (thread
->task
!= self
->task
) {
269 * OSLock boosting only applies to other threads
270 * in your same task (even if you have a port for
271 * a thread in another task)
274 thread_deallocate(thread
);
275 thread
= THREAD_NULL
;
278 * Attempt to kick the lock owner up to our same IO throttling tier.
279 * If the thread is currently blocked in throttle_lowpri_io(),
280 * it will immediately break out.
282 * TODO: SFI break out?
284 int new_policy
= proc_get_effective_thread_policy(self
, TASK_POLICY_IO
);
286 set_thread_iotier_override(thread
, new_policy
);
292 * Try to handoff if supplied.
294 if (thread
!= THREAD_NULL
) {
295 spl_t s
= splsched();
297 /* This may return a different thread if the target is pushing on something */
298 thread_t pulled_thread
= thread_run_queue_remove_for_handoff(thread
);
300 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
,MACH_SCHED_THREAD_SWITCH
)|DBG_FUNC_NONE
,
301 thread_tid(thread
), thread
->state
,
302 pulled_thread
? TRUE
: FALSE
, 0, 0);
304 if (pulled_thread
!= THREAD_NULL
) {
305 /* We can't be dropping the last ref here */
306 thread_deallocate_safe(thread
);
309 assert_wait_timeout((event_t
)assert_wait_timeout
, interruptible
,
310 option_time
, scale_factor
);
311 else if (depress_option
)
312 thread_depress_ms(option_time
);
314 thread_run(self
, thread_switch_continue
, (void *)(intptr_t)option
, pulled_thread
);
315 __builtin_unreachable();
320 thread_deallocate(thread
);
324 assert_wait_timeout((event_t
)assert_wait_timeout
, interruptible
, option_time
, scale_factor
);
326 disable_preemption();
327 bool should_yield
= SCHED(thread_should_yield
)(current_processor(), current_thread());
330 if (should_yield
== false) {
331 /* Early-return if yielding to the scheduler will not be beneficial */
335 if (depress_option
) {
336 thread_depress_ms(option_time
);
340 thread_yield_with_continuation(thread_switch_continue
, (void *)(intptr_t)option
);
341 __builtin_unreachable();
345 thread_yield_with_continuation(
346 thread_continue_t continuation
,
349 assert(continuation
);
350 thread_block_reason(continuation
, parameter
, AST_YIELD
);
351 __builtin_unreachable();
355 /* Returns a +1 thread reference */
357 port_name_to_thread_for_ulock(mach_port_name_t thread_name
)
359 thread_t thread
= THREAD_NULL
;
360 thread_t self
= current_thread();
363 * Translate the port name if supplied.
365 if (thread_name
!= MACH_PORT_NULL
) {
368 if (ipc_port_translate_send(self
->task
->itk_space
,
369 thread_name
, &port
) == KERN_SUCCESS
) {
373 thread
= convert_port_to_thread(port
);
376 if (thread
== THREAD_NULL
) {
380 if ((thread
== self
) || (thread
->task
!= self
->task
)) {
381 thread_deallocate(thread
);
382 thread
= THREAD_NULL
;
390 /* This function is called after an assert_wait(), therefore it must not
391 * cause another wait until after the thread_run() or thread_block()
394 * When called with a NULL continuation, the thread ref is consumed
395 * (thread_handoff_deallocate calling convention) else it is up to the
396 * continuation to do the cleanup (thread_handoff_parameter calling convention)
397 * and it instead doesn't return.
400 thread_handoff_internal(thread_t thread
, thread_continue_t continuation
,
403 thread_t deallocate_thread
= THREAD_NULL
;
404 thread_t self
= current_thread();
407 * Try to handoff if supplied.
409 if (thread
!= THREAD_NULL
) {
410 spl_t s
= splsched();
412 thread_t pulled_thread
= thread_run_queue_remove_for_handoff(thread
);
414 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
,MACH_SCHED_THREAD_SWITCH
)|DBG_FUNC_NONE
,
415 thread_tid(thread
), thread
->state
,
416 pulled_thread
? TRUE
: FALSE
, 0, 0);
418 if (pulled_thread
!= THREAD_NULL
) {
419 if (continuation
== NULL
) {
420 /* We can't be dropping the last ref here */
421 thread_deallocate_safe(thread
);
424 int result
= thread_run(self
, continuation
, parameter
, pulled_thread
);
432 deallocate_thread
= thread
;
433 thread
= THREAD_NULL
;
436 int result
= thread_block_parameter(continuation
, parameter
);
437 if (deallocate_thread
!= THREAD_NULL
) {
438 thread_deallocate(deallocate_thread
);
445 thread_handoff_parameter(thread_t thread
, thread_continue_t continuation
,
448 thread_handoff_internal(thread
, continuation
, parameter
);
449 panic("NULL continuation passed to %s", __func__
);
450 __builtin_unreachable();
454 thread_handoff_deallocate(thread_t thread
)
456 return thread_handoff_internal(thread
, NULL
, NULL
);
462 * This mechanism drops a thread to priority 0 in order for it to yield to
463 * all other runnnable threads on the system. It can be canceled or timed out,
464 * whereupon the thread goes back to where it was.
466 * Note that TH_SFLAG_DEPRESS and TH_SFLAG_POLLDEPRESS are never set at the
467 * same time. DEPRESS always defers to POLLDEPRESS.
469 * DEPRESS only lasts across a single thread_block call, and never returns
471 * POLLDEPRESS can be active anywhere up until thread termination.
475 * Depress thread's priority to lowest possible for the specified interval,
476 * with an interval of zero resulting in no timeout being scheduled.
478 * Must block with AST_YIELD afterwards to take effect
481 thread_depress_abstime(uint64_t interval
)
483 thread_t self
= current_thread();
485 spl_t s
= splsched();
488 assert((self
->sched_flags
& TH_SFLAG_DEPRESS
) == 0);
490 if ((self
->sched_flags
& TH_SFLAG_POLLDEPRESS
) == 0) {
491 self
->sched_flags
|= TH_SFLAG_DEPRESS
;
492 thread_recompute_sched_pri(self
, SETPRI_LAZY
);
497 clock_absolutetime_interval_to_deadline(interval
, &deadline
);
498 if (!timer_call_enter(&self
->depress_timer
, deadline
, TIMER_CALL_USER_CRITICAL
))
499 self
->depress_timer_active
++;
508 thread_depress_ms(mach_msg_timeout_t interval
)
512 clock_interval_to_absolutetime_interval(interval
, NSEC_PER_MSEC
, &abstime
);
513 thread_depress_abstime(abstime
);
517 * Priority depression expiration.
520 thread_depress_expire(void *p0
,
523 thread_t thread
= (thread_t
)p0
;
525 spl_t s
= splsched();
528 assert((thread
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
) != TH_SFLAG_DEPRESSED_MASK
);
530 if (--thread
->depress_timer_active
== 0) {
531 thread
->sched_flags
&= ~TH_SFLAG_DEPRESSED_MASK
;
532 thread_recompute_sched_pri(thread
, SETPRI_DEFAULT
);
535 thread_unlock(thread
);
540 * Prematurely abort priority depression if there is one.
543 thread_depress_abort(thread_t thread
)
545 kern_return_t result
= KERN_NOT_DEPRESSED
;
547 spl_t s
= splsched();
550 assert((thread
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
) != TH_SFLAG_DEPRESSED_MASK
);
553 * User-triggered depress-aborts should not get out
554 * of the poll-depress, but they should cancel a regular depress.
556 if ((thread
->sched_flags
& TH_SFLAG_POLLDEPRESS
) == 0) {
557 result
= thread_depress_abort_locked(thread
);
560 thread_unlock(thread
);
567 * Prematurely abort priority depression or poll depression if one is active.
568 * Called with the thread locked.
571 thread_depress_abort_locked(thread_t thread
)
573 if ((thread
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
) == 0)
574 return KERN_NOT_DEPRESSED
;
576 assert((thread
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
) != TH_SFLAG_DEPRESSED_MASK
);
578 thread
->sched_flags
&= ~TH_SFLAG_DEPRESSED_MASK
;
580 thread_recompute_sched_pri(thread
, SETPRI_LAZY
);
582 if (timer_call_cancel(&thread
->depress_timer
))
583 thread
->depress_timer_active
--;
589 * Invoked as part of a polling operation like a no-timeout port receive
591 * Forces a fixpri thread to yield if it is detected polling without blocking for too long.
594 thread_poll_yield(thread_t self
)
596 assert(self
== current_thread());
597 assert((self
->sched_flags
& TH_SFLAG_DEPRESS
) == 0);
599 if (self
->sched_mode
!= TH_MODE_FIXED
)
602 spl_t s
= splsched();
604 uint64_t abstime
= mach_absolute_time();
605 uint64_t total_computation
= abstime
-
606 self
->computation_epoch
+ self
->computation_metered
;
608 if (total_computation
>= max_poll_computation
) {
611 self
->computation_epoch
= abstime
;
612 self
->computation_metered
= 0;
614 uint64_t yield_expiration
= abstime
+
615 (total_computation
>> sched_poll_yield_shift
);
617 if (!timer_call_enter(&self
->depress_timer
, yield_expiration
,
618 TIMER_CALL_USER_CRITICAL
))
619 self
->depress_timer_active
++;
621 self
->sched_flags
|= TH_SFLAG_POLLDEPRESS
;
622 thread_recompute_sched_pri(self
, SETPRI_DEFAULT
);
630 * Kernel-internal interface to yield for a specified period
632 * WARNING: Will still yield to priority 0 even if the thread is holding a contended lock!
635 thread_yield_internal(mach_msg_timeout_t ms
)
637 thread_t self
= current_thread();
639 assert((self
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
) != TH_SFLAG_DEPRESSED_MASK
);
641 processor_t myprocessor
;
643 disable_preemption();
644 myprocessor
= current_processor();
645 if (!SCHED(thread_should_yield
)(myprocessor
, self
)) {
646 mp_enable_preemption();
652 thread_depress_ms(ms
);
654 thread_block_reason(THREAD_CONTINUE_NULL
, NULL
, AST_YIELD
);
656 thread_depress_abort(self
);
660 * This yields to a possible non-urgent preemption pending on the current processor.
662 * This is useful when doing a long computation in the kernel without returning to userspace.
664 * As opposed to other yielding mechanisms, this does not drop the priority of the current thread.
667 thread_yield_to_preemption()
670 * ast_pending() should ideally be called with interrupts disabled, but
671 * the check here is fine because csw_check() will do the right thing.
673 ast_t
*pending_ast
= ast_pending();
674 ast_t ast
= AST_NONE
;
677 if (*pending_ast
& AST_PREEMPT
) {
678 thread_t self
= current_thread();
680 spl_t s
= splsched();
682 p
= current_processor();
684 ast
= csw_check(p
, AST_YIELD
);
688 if (ast
!= AST_NONE
) {
689 (void)thread_block_reason(THREAD_CONTINUE_NULL
, NULL
, ast
);