2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 #include <mach/boolean.h>
60 #include <mach/thread_switch.h>
61 #include <ipc/ipc_port.h>
62 #include <ipc/ipc_space.h>
63 #include <kern/counters.h>
64 #include <kern/ipc_kobject.h>
65 #include <kern/processor.h>
66 #include <kern/sched.h>
67 #include <kern/sched_prim.h>
69 #include <kern/task.h>
70 #include <kern/thread.h>
71 #include <kern/policy_internal.h>
73 #include <mach/policy.h>
75 #include <kern/syscall_subr.h>
76 #include <mach/mach_host_server.h>
77 #include <mach/mach_syscalls.h>
78 #include <sys/kdebug.h>
82 extern void workqueue_thread_yielded(void);
83 extern sched_call_t
workqueue_get_sched_callback(void);
86 extern wait_result_t
thread_handoff_reason(thread_t thread
, ast_t reason
);
88 /* Called from commpage to take a delayed preemption when exiting
89 * the "Preemption Free Zone" (PFZ).
93 __unused
struct pfz_exit_args
*args
)
95 /* For now, nothing special to do. We'll pick up the ASTs on kernel exit. */
97 return (KERN_SUCCESS
);
102 * swtch and swtch_pri both attempt to context switch (logic in
103 * thread_block no-ops the context switch if nothing would happen).
104 * A boolean is returned that indicates whether there is anything
105 * else runnable. That's no excuse to spin, though.
111 processor_t myprocessor
;
114 disable_preemption();
115 myprocessor
= current_processor();
116 result
= !SCHED(processor_queue_empty
)(myprocessor
) || rt_runq
.count
> 0;
119 thread_syscall_return(result
);
125 __unused
struct swtch_args
*args
)
127 processor_t myprocessor
;
130 disable_preemption();
131 myprocessor
= current_processor();
132 if (SCHED(processor_queue_empty
)(myprocessor
) && rt_runq
.count
== 0) {
133 mp_enable_preemption();
139 counter(c_swtch_block
++);
141 thread_block_reason((thread_continue_t
)swtch_continue
, NULL
, AST_YIELD
);
143 disable_preemption();
144 myprocessor
= current_processor();
145 result
= !SCHED(processor_queue_empty
)(myprocessor
) || rt_runq
.count
> 0;
152 swtch_pri_continue(void)
154 processor_t myprocessor
;
157 thread_depress_abort_internal(current_thread());
159 disable_preemption();
160 myprocessor
= current_processor();
161 result
= !SCHED(processor_queue_empty
)(myprocessor
) || rt_runq
.count
> 0;
162 mp_enable_preemption();
164 thread_syscall_return(result
);
170 __unused
struct swtch_pri_args
*args
)
172 processor_t myprocessor
;
175 disable_preemption();
176 myprocessor
= current_processor();
177 if (SCHED(processor_queue_empty
)(myprocessor
) && rt_runq
.count
== 0) {
178 mp_enable_preemption();
184 counter(c_swtch_pri_block
++);
186 thread_depress_abstime(thread_depress_time
);
188 thread_block_reason((thread_continue_t
)swtch_pri_continue
, NULL
, AST_YIELD
);
190 thread_depress_abort_internal(current_thread());
192 disable_preemption();
193 myprocessor
= current_processor();
194 result
= !SCHED(processor_queue_empty
)(myprocessor
) || rt_runq
.count
> 0;
201 thread_switch_disable_workqueue_sched_callback(void)
203 sched_call_t callback
= workqueue_get_sched_callback();
204 return thread_disable_sched_call(current_thread(), callback
) != NULL
;
208 thread_switch_enable_workqueue_sched_callback(void)
210 sched_call_t callback
= workqueue_get_sched_callback();
211 thread_reenable_sched_call(current_thread(), callback
);
215 thread_switch_continue(void)
217 thread_t self
= current_thread();
218 int option
= self
->saved
.swtch
.option
;
219 boolean_t reenable_workq_callback
= self
->saved
.swtch
.reenable_workq_callback
;
222 if (option
== SWITCH_OPTION_DEPRESS
|| option
== SWITCH_OPTION_OSLOCK_DEPRESS
)
223 thread_depress_abort_internal(self
);
225 if (reenable_workq_callback
)
226 thread_switch_enable_workqueue_sched_callback();
228 thread_syscall_return(KERN_SUCCESS
);
235 * Context switch. User may supply thread hint.
239 struct thread_switch_args
*args
)
241 thread_t thread
= THREAD_NULL
;
242 thread_t self
= current_thread();
243 mach_port_name_t thread_name
= args
->thread_name
;
244 int option
= args
->option
;
245 mach_msg_timeout_t option_time
= args
->option_time
;
246 uint32_t scale_factor
= NSEC_PER_MSEC
;
247 boolean_t reenable_workq_callback
= FALSE
;
248 boolean_t depress_option
= FALSE
;
249 boolean_t wait_option
= FALSE
;
252 * Validate and process option.
256 case SWITCH_OPTION_NONE
:
257 workqueue_thread_yielded();
259 case SWITCH_OPTION_WAIT
:
261 workqueue_thread_yielded();
263 case SWITCH_OPTION_DEPRESS
:
264 depress_option
= TRUE
;
265 workqueue_thread_yielded();
267 case SWITCH_OPTION_DISPATCH_CONTENTION
:
268 scale_factor
= NSEC_PER_USEC
;
270 if (thread_switch_disable_workqueue_sched_callback())
271 reenable_workq_callback
= TRUE
;
273 case SWITCH_OPTION_OSLOCK_DEPRESS
:
274 depress_option
= TRUE
;
275 if (thread_switch_disable_workqueue_sched_callback())
276 reenable_workq_callback
= TRUE
;
278 case SWITCH_OPTION_OSLOCK_WAIT
:
280 if (thread_switch_disable_workqueue_sched_callback())
281 reenable_workq_callback
= TRUE
;
284 return (KERN_INVALID_ARGUMENT
);
288 * Translate the port name if supplied.
290 if (thread_name
!= MACH_PORT_NULL
) {
293 if (ipc_port_translate_send(self
->task
->itk_space
,
294 thread_name
, &port
) == KERN_SUCCESS
) {
298 thread
= convert_port_to_thread(port
);
301 if (thread
== self
) {
302 thread_deallocate(thread
);
303 thread
= THREAD_NULL
;
308 if (option
== SWITCH_OPTION_OSLOCK_DEPRESS
|| option
== SWITCH_OPTION_OSLOCK_WAIT
) {
309 if (thread
!= THREAD_NULL
) {
311 if (thread
->task
!= self
->task
) {
313 * OSLock boosting only applies to other threads
314 * in your same task (even if you have a port for
315 * a thread in another task)
318 thread_deallocate(thread
);
319 thread
= THREAD_NULL
;
322 * Attempt to kick the lock owner up to our same IO throttling tier.
323 * If the thread is currently blocked in throttle_lowpri_io(),
324 * it will immediately break out.
326 * TODO: SFI break out?
328 int new_policy
= proc_get_effective_thread_policy(self
, TASK_POLICY_IO
);
330 set_thread_iotier_override(thread
, new_policy
);
336 * Try to handoff if supplied.
338 if (thread
!= THREAD_NULL
) {
339 spl_t s
= splsched();
341 /* This may return a different thread if the target is pushing on something */
342 thread_t pulled_thread
= thread_run_queue_remove_for_handoff(thread
);
344 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
,MACH_SCHED_THREAD_SWITCH
)|DBG_FUNC_NONE
,
345 thread_tid(thread
), thread
->state
,
346 pulled_thread
? TRUE
: FALSE
, 0, 0);
348 if (pulled_thread
!= THREAD_NULL
) {
349 /* We can't be dropping the last ref here */
350 thread_deallocate_safe(thread
);
353 assert_wait_timeout((event_t
)assert_wait_timeout
, THREAD_ABORTSAFE
,
354 option_time
, scale_factor
);
355 else if (depress_option
)
356 thread_depress_ms(option_time
);
358 self
->saved
.swtch
.option
= option
;
359 self
->saved
.swtch
.reenable_workq_callback
= reenable_workq_callback
;
361 thread_run(self
, (thread_continue_t
)thread_switch_continue
, NULL
, pulled_thread
);
363 panic("returned from thread_run!");
368 thread_deallocate(thread
);
372 assert_wait_timeout((event_t
)assert_wait_timeout
, THREAD_ABORTSAFE
, option_time
, scale_factor
);
373 else if (depress_option
)
374 thread_depress_ms(option_time
);
376 self
->saved
.swtch
.option
= option
;
377 self
->saved
.swtch
.reenable_workq_callback
= reenable_workq_callback
;
379 thread_block_reason((thread_continue_t
)thread_switch_continue
, NULL
, AST_YIELD
);
382 thread_depress_abort_internal(self
);
384 if (reenable_workq_callback
)
385 thread_switch_enable_workqueue_sched_callback();
387 return (KERN_SUCCESS
);
390 /* Returns a +1 thread reference */
392 port_name_to_thread_for_ulock(mach_port_name_t thread_name
)
394 thread_t thread
= THREAD_NULL
;
395 thread_t self
= current_thread();
398 * Translate the port name if supplied.
400 if (thread_name
!= MACH_PORT_NULL
) {
403 if (ipc_port_translate_send(self
->task
->itk_space
,
404 thread_name
, &port
) == KERN_SUCCESS
) {
408 thread
= convert_port_to_thread(port
);
411 if (thread
== THREAD_NULL
) {
415 if ((thread
== self
) || (thread
->task
!= self
->task
)) {
416 thread_deallocate(thread
);
417 thread
= THREAD_NULL
;
425 /* This function is called after an assert_wait(), therefore it must not
426 * cause another wait until after the thread_run() or thread_block()
428 * Consumes a ref on thread
431 thread_handoff(thread_t thread
)
433 thread_t deallocate_thread
= THREAD_NULL
;
434 thread_t self
= current_thread();
437 * Try to handoff if supplied.
439 if (thread
!= THREAD_NULL
) {
440 spl_t s
= splsched();
442 thread_t pulled_thread
= thread_run_queue_remove_for_handoff(thread
);
444 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
,MACH_SCHED_THREAD_SWITCH
)|DBG_FUNC_NONE
,
445 thread_tid(thread
), thread
->state
,
446 pulled_thread
? TRUE
: FALSE
, 0, 0);
448 if (pulled_thread
!= THREAD_NULL
) {
449 /* We can't be dropping the last ref here */
450 thread_deallocate_safe(thread
);
452 int result
= thread_run(self
, THREAD_CONTINUE_NULL
, NULL
, pulled_thread
);
460 deallocate_thread
= thread
;
461 thread
= THREAD_NULL
;
464 int result
= thread_block(THREAD_CONTINUE_NULL
);
465 if (deallocate_thread
!= THREAD_NULL
) {
466 thread_deallocate(deallocate_thread
);
473 * Depress thread's priority to lowest possible for the specified interval,
474 * with a value of zero resulting in no timeout being scheduled.
477 thread_depress_abstime(
480 thread_t self
= current_thread();
486 if (!(self
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
)) {
487 processor_t myprocessor
= self
->last_processor
;
489 self
->sched_pri
= DEPRESSPRI
;
491 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_SCHED_CHANGE_PRIORITY
),
492 (uintptr_t)thread_tid(self
),
495 0, /* eventually, 'reason' */
498 myprocessor
->current_pri
= self
->sched_pri
;
499 self
->sched_flags
|= TH_SFLAG_DEPRESS
;
502 clock_absolutetime_interval_to_deadline(interval
, &deadline
);
503 if (!timer_call_enter(&self
->depress_timer
, deadline
, TIMER_CALL_USER_CRITICAL
))
504 self
->depress_timer_active
++;
513 mach_msg_timeout_t interval
)
517 clock_interval_to_absolutetime_interval(
518 interval
, NSEC_PER_MSEC
, &abstime
);
519 thread_depress_abstime(abstime
);
523 * Priority depression expiration.
526 thread_depress_expire(
530 thread_t thread
= p0
;
535 if (--thread
->depress_timer_active
== 0) {
536 thread
->sched_flags
&= ~TH_SFLAG_DEPRESSED_MASK
;
537 thread_recompute_sched_pri(thread
, FALSE
);
539 thread_unlock(thread
);
544 * Prematurely abort priority depression if there is one.
547 thread_depress_abort_internal(
550 kern_return_t result
= KERN_NOT_DEPRESSED
;
555 if (!(thread
->sched_flags
& TH_SFLAG_POLLDEPRESS
)) {
556 if (thread
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
) {
557 thread
->sched_flags
&= ~TH_SFLAG_DEPRESSED_MASK
;
558 thread_recompute_sched_pri(thread
, FALSE
);
559 result
= KERN_SUCCESS
;
562 if (timer_call_cancel(&thread
->depress_timer
))
563 thread
->depress_timer_active
--;
565 thread_unlock(thread
);
577 assert(self
== current_thread());
580 if (self
->sched_mode
== TH_MODE_FIXED
) {
581 uint64_t total_computation
, abstime
;
583 abstime
= mach_absolute_time();
584 total_computation
= abstime
- self
->computation_epoch
;
585 total_computation
+= self
->computation_metered
;
586 if (total_computation
>= max_poll_computation
) {
587 processor_t myprocessor
= current_processor();
591 if (!(self
->sched_flags
& TH_SFLAG_DEPRESSED_MASK
)) {
592 self
->sched_pri
= DEPRESSPRI
;
594 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_SCHED_CHANGE_PRIORITY
),
595 (uintptr_t)thread_tid(self
),
598 0, /* eventually, 'reason' */
601 myprocessor
->current_pri
= self
->sched_pri
;
603 self
->computation_epoch
= abstime
;
604 self
->computation_metered
= 0;
605 self
->sched_flags
|= TH_SFLAG_POLLDEPRESS
;
607 abstime
+= (total_computation
>> sched_poll_yield_shift
);
608 if (!timer_call_enter(&self
->depress_timer
, abstime
, TIMER_CALL_USER_CRITICAL
))
609 self
->depress_timer_active
++;
611 if ((preempt
= csw_check(myprocessor
, AST_NONE
)) != AST_NONE
)
622 thread_yield_internal(
623 mach_msg_timeout_t ms
)
625 processor_t myprocessor
;
627 disable_preemption();
628 myprocessor
= current_processor();
629 if (SCHED(processor_queue_empty
)(myprocessor
) && rt_runq
.count
== 0) {
630 mp_enable_preemption();
636 thread_depress_ms(ms
);
638 thread_block_reason(THREAD_CONTINUE_NULL
, NULL
, AST_YIELD
);
640 thread_depress_abort_internal(current_thread());
644 * This yields to a possible non-urgent preemption pending on the current processor.
646 * This is useful when doing a long computation in the kernel without returning to userspace.
648 * As opposed to other yielding mechanisms, this does not drop the priority of the current thread.
651 thread_yield_to_preemption()
654 * ast_pending() should ideally be called with interrupts disabled, but
655 * the check here is fine because csw_check() will do the right thing.
657 ast_t
*pending_ast
= ast_pending();
658 ast_t ast
= AST_NONE
;
661 if (*pending_ast
& AST_PREEMPT
) {
662 thread_t self
= current_thread();
664 spl_t s
= splsched();
666 p
= current_processor();
668 ast
= csw_check(p
, AST_YIELD
);
672 if (ast
!= AST_NONE
) {
673 (void)thread_block_reason(THREAD_CONTINUE_NULL
, NULL
, ast
);