2 * Copyright (c) 1993-1995, 1999-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <mach/mach_types.h>
30 #include <mach/thread_act.h>
32 #include <kern/kern_types.h>
33 #include <kern/zalloc.h>
34 #include <kern/sched_prim.h>
35 #include <kern/clock.h>
36 #include <kern/task.h>
37 #include <kern/thread.h>
38 #include <kern/waitq.h>
39 #include <kern/ledger.h>
40 #include <kern/policy_internal.h>
42 #include <vm/vm_pageout.h>
44 #include <kern/thread_call.h>
45 #include <kern/timer_call.h>
47 #include <libkern/OSAtomic.h>
48 #include <kern/timer_queue.h>
50 #include <sys/kdebug.h>
54 #include <machine/machine_routines.h>
56 static ZONE_DECLARE(thread_call_zone
, "thread_call",
57 sizeof(thread_call_data_t
), ZC_NOENCRYPT
);
63 } thread_call_flavor_t
;
65 __options_decl(thread_call_group_flags_t
, uint32_t, {
68 TCG_DEALLOC_ACTIVE
= 0x2,
71 static struct thread_call_group
{
72 __attribute__((aligned(128))) lck_ticket_t tcg_lock
;
74 const char * tcg_name
;
76 queue_head_t pending_queue
;
77 uint32_t pending_count
;
79 queue_head_t delayed_queues
[TCF_COUNT
];
80 struct priority_queue_deadline_min delayed_pqueues
[TCF_COUNT
];
81 timer_call_data_t delayed_timers
[TCF_COUNT
];
83 timer_call_data_t dealloc_timer
;
85 struct waitq idle_waitq
;
86 uint64_t idle_timestamp
;
87 uint32_t idle_count
, active_count
, blocked_count
;
89 uint32_t tcg_thread_pri
;
90 uint32_t target_thread_count
;
92 thread_call_group_flags_t tcg_flags
;
94 struct waitq waiters_waitq
;
95 } thread_call_groups
[THREAD_CALL_INDEX_MAX
] = {
96 [THREAD_CALL_INDEX_HIGH
] = {
98 .tcg_thread_pri
= BASEPRI_PREEMPT_HIGH
,
99 .target_thread_count
= 4,
100 .tcg_flags
= TCG_NONE
,
102 [THREAD_CALL_INDEX_KERNEL
] = {
103 .tcg_name
= "kernel",
104 .tcg_thread_pri
= BASEPRI_KERNEL
,
105 .target_thread_count
= 1,
106 .tcg_flags
= TCG_PARALLEL
,
108 [THREAD_CALL_INDEX_USER
] = {
110 .tcg_thread_pri
= BASEPRI_DEFAULT
,
111 .target_thread_count
= 1,
112 .tcg_flags
= TCG_PARALLEL
,
114 [THREAD_CALL_INDEX_LOW
] = {
116 .tcg_thread_pri
= MAXPRI_THROTTLE
,
117 .target_thread_count
= 1,
118 .tcg_flags
= TCG_PARALLEL
,
120 [THREAD_CALL_INDEX_KERNEL_HIGH
] = {
121 .tcg_name
= "kernel-high",
122 .tcg_thread_pri
= BASEPRI_PREEMPT
,
123 .target_thread_count
= 2,
124 .tcg_flags
= TCG_NONE
,
126 [THREAD_CALL_INDEX_QOS_UI
] = {
127 .tcg_name
= "qos-ui",
128 .tcg_thread_pri
= BASEPRI_FOREGROUND
,
129 .target_thread_count
= 1,
130 .tcg_flags
= TCG_NONE
,
132 [THREAD_CALL_INDEX_QOS_IN
] = {
133 .tcg_name
= "qos-in",
134 .tcg_thread_pri
= BASEPRI_USER_INITIATED
,
135 .target_thread_count
= 1,
136 .tcg_flags
= TCG_NONE
,
138 [THREAD_CALL_INDEX_QOS_UT
] = {
139 .tcg_name
= "qos-ut",
140 .tcg_thread_pri
= BASEPRI_UTILITY
,
141 .target_thread_count
= 1,
142 .tcg_flags
= TCG_NONE
,
146 typedef struct thread_call_group
*thread_call_group_t
;
148 #define INTERNAL_CALL_COUNT 768
149 #define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * NSEC_PER_MSEC) /* 5 ms */
150 #define THREAD_CALL_ADD_RATIO 4
151 #define THREAD_CALL_MACH_FACTOR_CAP 3
152 #define THREAD_CALL_GROUP_MAX_THREADS 500
154 struct thread_call_thread_state
{
155 struct thread_call_group
* thc_group
;
156 struct thread_call
* thc_call
; /* debug only, may be deallocated */
157 uint64_t thc_call_start
;
158 uint64_t thc_call_soft_deadline
;
159 uint64_t thc_call_hard_deadline
;
160 uint64_t thc_call_pending_timestamp
;
161 uint64_t thc_IOTES_invocation_timestamp
;
162 thread_call_func_t thc_func
;
163 thread_call_param_t thc_param0
;
164 thread_call_param_t thc_param1
;
167 static bool thread_call_daemon_awake
= true;
169 * This special waitq exists because the daemon thread
170 * might need to be woken while already holding a global waitq locked.
172 static struct waitq daemon_waitq
;
174 static thread_call_data_t internal_call_storage
[INTERNAL_CALL_COUNT
];
175 static queue_head_t thread_call_internal_queue
;
176 int thread_call_internal_queue_count
= 0;
177 static uint64_t thread_call_dealloc_interval_abs
;
179 static void _internal_call_init(void);
181 static thread_call_t
_internal_call_allocate(thread_call_func_t func
, thread_call_param_t param0
);
182 static bool _is_internal_call(thread_call_t call
);
183 static void _internal_call_release(thread_call_t call
);
184 static bool _pending_call_enqueue(thread_call_t call
, thread_call_group_t group
, uint64_t now
);
185 static bool _delayed_call_enqueue(thread_call_t call
, thread_call_group_t group
,
186 uint64_t deadline
, thread_call_flavor_t flavor
);
187 static bool _call_dequeue(thread_call_t call
, thread_call_group_t group
);
188 static void thread_call_wake(thread_call_group_t group
);
189 static void thread_call_daemon(void *arg
);
190 static void thread_call_thread(thread_call_group_t group
, wait_result_t wres
);
191 static void thread_call_dealloc_timer(timer_call_param_t p0
, timer_call_param_t p1
);
192 static void thread_call_group_setup(thread_call_group_t group
);
193 static void sched_call_thread(int type
, thread_t thread
);
194 static void thread_call_start_deallocate_timer(thread_call_group_t group
);
195 static void thread_call_wait_locked(thread_call_t call
, spl_t s
);
196 static bool thread_call_wait_once_locked(thread_call_t call
, spl_t s
);
198 static boolean_t
thread_call_enter_delayed_internal(thread_call_t call
,
199 thread_call_func_t alt_func
, thread_call_param_t alt_param0
,
200 thread_call_param_t param1
, uint64_t deadline
,
201 uint64_t leeway
, unsigned int flags
);
203 /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
204 extern void thread_call_delayed_timer(timer_call_param_t p0
, timer_call_param_t p1
);
206 LCK_GRP_DECLARE(thread_call_lck_grp
, "thread_call");
210 thread_call_lock_spin(thread_call_group_t group
)
212 lck_ticket_lock(&group
->tcg_lock
, &thread_call_lck_grp
);
216 thread_call_unlock(thread_call_group_t group
)
218 lck_ticket_unlock(&group
->tcg_lock
);
221 static void __assert_only
222 thread_call_assert_locked(thread_call_group_t group
)
224 lck_ticket_assert_owned(&group
->tcg_lock
);
229 disable_ints_and_lock(thread_call_group_t group
)
231 spl_t s
= splsched();
232 thread_call_lock_spin(group
);
238 enable_ints_and_unlock(thread_call_group_t group
, spl_t s
)
240 thread_call_unlock(group
);
245 static thread_call_group_t
246 thread_call_get_group(thread_call_t call
)
248 thread_call_index_t index
= call
->tc_index
;
250 assert(index
>= 0 && index
< THREAD_CALL_INDEX_MAX
);
252 return &thread_call_groups
[index
];
256 static thread_call_flavor_t
257 thread_call_get_flavor(thread_call_t call
)
259 return (call
->tc_flags
& THREAD_CALL_FLAG_CONTINUOUS
) ? TCF_CONTINUOUS
: TCF_ABSOLUTE
;
263 static thread_call_flavor_t
264 thread_call_set_flavor(thread_call_t call
, thread_call_flavor_t flavor
)
266 assert(flavor
== TCF_CONTINUOUS
|| flavor
== TCF_ABSOLUTE
);
267 thread_call_flavor_t old_flavor
= thread_call_get_flavor(call
);
269 if (old_flavor
!= flavor
) {
270 if (flavor
== TCF_CONTINUOUS
) {
271 call
->tc_flags
|= THREAD_CALL_FLAG_CONTINUOUS
;
273 call
->tc_flags
&= ~THREAD_CALL_FLAG_CONTINUOUS
;
280 /* returns true if it was on a queue */
282 thread_call_enqueue_tail(
286 queue_t old_queue
= call
->tc_queue
;
288 thread_call_group_t group
= thread_call_get_group(call
);
289 thread_call_flavor_t flavor
= thread_call_get_flavor(call
);
291 if (old_queue
!= NULL
&&
292 old_queue
!= &group
->delayed_queues
[flavor
]) {
293 panic("thread call (%p) on bad queue (old_queue: %p)", call
, old_queue
);
296 if (old_queue
== &group
->delayed_queues
[flavor
]) {
297 priority_queue_remove(&group
->delayed_pqueues
[flavor
], &call
->tc_pqlink
);
300 if (old_queue
== NULL
) {
301 enqueue_tail(new_queue
, &call
->tc_qlink
);
303 re_queue_tail(new_queue
, &call
->tc_qlink
);
306 call
->tc_queue
= new_queue
;
308 return old_queue
!= NULL
;
311 static queue_head_t
*
315 queue_t old_queue
= call
->tc_queue
;
317 thread_call_group_t group
= thread_call_get_group(call
);
318 thread_call_flavor_t flavor
= thread_call_get_flavor(call
);
320 if (old_queue
!= NULL
&&
321 old_queue
!= &group
->pending_queue
&&
322 old_queue
!= &group
->delayed_queues
[flavor
]) {
323 panic("thread call (%p) on bad queue (old_queue: %p)", call
, old_queue
);
326 if (old_queue
== &group
->delayed_queues
[flavor
]) {
327 priority_queue_remove(&group
->delayed_pqueues
[flavor
], &call
->tc_pqlink
);
330 if (old_queue
!= NULL
) {
331 remqueue(&call
->tc_qlink
);
333 call
->tc_queue
= NULL
;
338 static queue_head_t
*
339 thread_call_enqueue_deadline(
341 thread_call_group_t group
,
342 thread_call_flavor_t flavor
,
345 queue_t old_queue
= call
->tc_queue
;
346 queue_t new_queue
= &group
->delayed_queues
[flavor
];
348 thread_call_flavor_t old_flavor
= thread_call_set_flavor(call
, flavor
);
350 if (old_queue
!= NULL
&&
351 old_queue
!= &group
->pending_queue
&&
352 old_queue
!= &group
->delayed_queues
[old_flavor
]) {
353 panic("thread call (%p) on bad queue (old_queue: %p)", call
, old_queue
);
356 if (old_queue
== new_queue
) {
357 /* optimize the same-queue case to avoid a full re-insert */
358 uint64_t old_deadline
= call
->tc_pqlink
.deadline
;
359 call
->tc_pqlink
.deadline
= deadline
;
361 if (old_deadline
< deadline
) {
362 priority_queue_entry_increased(&group
->delayed_pqueues
[flavor
],
365 priority_queue_entry_decreased(&group
->delayed_pqueues
[flavor
],
369 if (old_queue
== &group
->delayed_queues
[old_flavor
]) {
370 priority_queue_remove(&group
->delayed_pqueues
[old_flavor
],
374 call
->tc_pqlink
.deadline
= deadline
;
376 priority_queue_insert(&group
->delayed_pqueues
[flavor
], &call
->tc_pqlink
);
379 if (old_queue
== NULL
) {
380 enqueue_tail(new_queue
, &call
->tc_qlink
);
381 } else if (old_queue
!= new_queue
) {
382 re_queue_tail(new_queue
, &call
->tc_qlink
);
385 call
->tc_queue
= new_queue
;
391 thread_call_get_armed_deadline(thread_call_t call
)
393 return call
->tc_pqlink
.deadline
;
398 group_isparallel(thread_call_group_t group
)
400 return (group
->tcg_flags
& TCG_PARALLEL
) != 0;
404 thread_call_group_should_add_thread(thread_call_group_t group
)
406 if ((group
->active_count
+ group
->blocked_count
+ group
->idle_count
) >= THREAD_CALL_GROUP_MAX_THREADS
) {
407 panic("thread_call group '%s' reached max thread cap (%d): active: %d, blocked: %d, idle: %d",
408 group
->tcg_name
, THREAD_CALL_GROUP_MAX_THREADS
,
409 group
->active_count
, group
->blocked_count
, group
->idle_count
);
412 if (group_isparallel(group
) == false) {
413 if (group
->pending_count
> 0 && group
->active_count
== 0) {
420 if (group
->pending_count
> 0) {
421 if (group
->idle_count
> 0) {
425 uint32_t thread_count
= group
->active_count
;
428 * Add a thread if either there are no threads,
429 * the group has fewer than its target number of
430 * threads, or the amount of work is large relative
431 * to the number of threads. In the last case, pay attention
432 * to the total load on the system, and back off if
435 if ((thread_count
== 0) ||
436 (thread_count
< group
->target_thread_count
) ||
437 ((group
->pending_count
> THREAD_CALL_ADD_RATIO
* thread_count
) &&
438 (sched_mach_factor
< THREAD_CALL_MACH_FACTOR_CAP
))) {
447 thread_call_group_setup(thread_call_group_t group
)
449 lck_ticket_init(&group
->tcg_lock
, &thread_call_lck_grp
);
451 queue_init(&group
->pending_queue
);
453 for (thread_call_flavor_t flavor
= 0; flavor
< TCF_COUNT
; flavor
++) {
454 queue_init(&group
->delayed_queues
[flavor
]);
455 priority_queue_init(&group
->delayed_pqueues
[flavor
]);
456 timer_call_setup(&group
->delayed_timers
[flavor
], thread_call_delayed_timer
, group
);
459 timer_call_setup(&group
->dealloc_timer
, thread_call_dealloc_timer
, group
);
461 waitq_init(&group
->waiters_waitq
, SYNC_POLICY_DISABLE_IRQ
);
463 /* Reverse the wait order so we re-use the most recently parked thread from the pool */
464 waitq_init(&group
->idle_waitq
, SYNC_POLICY_REVERSED
| SYNC_POLICY_DISABLE_IRQ
);
468 * Simple wrapper for creating threads bound to
469 * thread call groups.
472 thread_call_thread_create(
473 thread_call_group_t group
)
476 kern_return_t result
;
478 int thread_pri
= group
->tcg_thread_pri
;
480 result
= kernel_thread_start_priority((thread_continue_t
)thread_call_thread
,
481 group
, thread_pri
, &thread
);
482 if (result
!= KERN_SUCCESS
) {
483 panic("cannot create new thread call thread %d", result
);
486 if (thread_pri
<= BASEPRI_KERNEL
) {
488 * THREAD_CALL_PRIORITY_KERNEL and lower don't get to run to completion
489 * in kernel if there are higher priority threads available.
491 thread_set_eager_preempt(thread
);
494 char name
[MAXTHREADNAMESIZE
] = "";
496 int group_thread_count
= group
->idle_count
+ group
->active_count
+ group
->blocked_count
;
498 snprintf(name
, sizeof(name
), "thread call %s #%d", group
->tcg_name
, group_thread_count
);
499 thread_set_thread_name(thread
, name
);
501 thread_deallocate(thread
);
505 * thread_call_initialize:
507 * Initialize this module, called
508 * early during system initialization.
511 thread_call_initialize(void)
513 nanotime_to_absolutetime(0, THREAD_CALL_DEALLOC_INTERVAL_NS
, &thread_call_dealloc_interval_abs
);
514 waitq_init(&daemon_waitq
, SYNC_POLICY_DISABLE_IRQ
| SYNC_POLICY_FIFO
);
516 for (uint32_t i
= 0; i
< THREAD_CALL_INDEX_MAX
; i
++) {
517 thread_call_group_setup(&thread_call_groups
[i
]);
520 _internal_call_init();
523 kern_return_t result
;
525 result
= kernel_thread_start_priority((thread_continue_t
)thread_call_daemon
,
526 NULL
, BASEPRI_PREEMPT_HIGH
+ 1, &thread
);
527 if (result
!= KERN_SUCCESS
) {
528 panic("thread_call_initialize");
531 thread_deallocate(thread
);
535 thread_call_setup_with_options(
537 thread_call_func_t func
,
538 thread_call_param_t param0
,
539 thread_call_priority_t pri
,
540 thread_call_options_t options
)
542 bzero(call
, sizeof(*call
));
544 *call
= (struct thread_call
) {
550 case THREAD_CALL_PRIORITY_HIGH
:
551 call
->tc_index
= THREAD_CALL_INDEX_HIGH
;
553 case THREAD_CALL_PRIORITY_KERNEL
:
554 call
->tc_index
= THREAD_CALL_INDEX_KERNEL
;
556 case THREAD_CALL_PRIORITY_USER
:
557 call
->tc_index
= THREAD_CALL_INDEX_USER
;
559 case THREAD_CALL_PRIORITY_LOW
:
560 call
->tc_index
= THREAD_CALL_INDEX_LOW
;
562 case THREAD_CALL_PRIORITY_KERNEL_HIGH
:
563 call
->tc_index
= THREAD_CALL_INDEX_KERNEL_HIGH
;
566 panic("Invalid thread call pri value: %d", pri
);
570 if (options
& THREAD_CALL_OPTIONS_ONCE
) {
571 call
->tc_flags
|= THREAD_CALL_ONCE
;
573 if (options
& THREAD_CALL_OPTIONS_SIGNAL
) {
574 call
->tc_flags
|= THREAD_CALL_SIGNAL
| THREAD_CALL_ONCE
;
581 thread_call_func_t func
,
582 thread_call_param_t param0
)
584 thread_call_setup_with_options(call
, func
, param0
,
585 THREAD_CALL_PRIORITY_HIGH
, 0);
589 _internal_call_init(void)
591 /* Function-only thread calls are only kept in the default HIGH group */
592 thread_call_group_t group
= &thread_call_groups
[THREAD_CALL_INDEX_HIGH
];
594 spl_t s
= disable_ints_and_lock(group
);
596 queue_init(&thread_call_internal_queue
);
598 for (unsigned i
= 0; i
< INTERNAL_CALL_COUNT
; i
++) {
599 enqueue_tail(&thread_call_internal_queue
, &internal_call_storage
[i
].tc_qlink
);
600 thread_call_internal_queue_count
++;
603 enable_ints_and_unlock(group
, s
);
607 * _internal_call_allocate:
609 * Allocate an internal callout entry.
611 * Called with thread_call_lock held.
614 _internal_call_allocate(thread_call_func_t func
, thread_call_param_t param0
)
616 /* Function-only thread calls are only kept in the default HIGH group */
617 thread_call_group_t group
= &thread_call_groups
[THREAD_CALL_INDEX_HIGH
];
619 spl_t s
= disable_ints_and_lock(group
);
621 thread_call_t call
= qe_dequeue_head(&thread_call_internal_queue
,
622 struct thread_call
, tc_qlink
);
625 panic("_internal_call_allocate: thread_call_internal_queue empty");
628 thread_call_internal_queue_count
--;
630 thread_call_setup(call
, func
, param0
);
631 /* THREAD_CALL_ALLOC not set, do not free back to zone */
632 assert((call
->tc_flags
& THREAD_CALL_ALLOC
) == 0);
633 enable_ints_and_unlock(group
, s
);
638 /* Check if a call is internal and needs to be returned to the internal pool. */
640 _is_internal_call(thread_call_t call
)
642 if (call
>= internal_call_storage
&&
643 call
< &internal_call_storage
[INTERNAL_CALL_COUNT
]) {
644 assert((call
->tc_flags
& THREAD_CALL_ALLOC
) == 0);
651 * _internal_call_release:
653 * Release an internal callout entry which
654 * is no longer pending (or delayed).
656 * Called with thread_call_lock held.
659 _internal_call_release(thread_call_t call
)
661 assert(_is_internal_call(call
));
663 thread_call_group_t group
= thread_call_get_group(call
);
665 assert(group
== &thread_call_groups
[THREAD_CALL_INDEX_HIGH
]);
666 thread_call_assert_locked(group
);
668 enqueue_head(&thread_call_internal_queue
, &call
->tc_qlink
);
669 thread_call_internal_queue_count
++;
673 * _pending_call_enqueue:
675 * Place an entry at the end of the
676 * pending queue, to be executed soon.
678 * Returns TRUE if the entry was already
681 * Called with thread_call_lock held.
684 _pending_call_enqueue(thread_call_t call
,
685 thread_call_group_t group
,
688 if ((THREAD_CALL_ONCE
| THREAD_CALL_RUNNING
)
689 == (call
->tc_flags
& (THREAD_CALL_ONCE
| THREAD_CALL_RUNNING
))) {
690 call
->tc_pqlink
.deadline
= 0;
692 thread_call_flags_t flags
= call
->tc_flags
;
693 call
->tc_flags
|= THREAD_CALL_RESCHEDULE
;
695 assert(call
->tc_queue
== NULL
);
697 return flags
& THREAD_CALL_RESCHEDULE
;
700 call
->tc_pending_timestamp
= now
;
702 bool was_on_queue
= thread_call_enqueue_tail(call
, &group
->pending_queue
);
705 call
->tc_submit_count
++;
708 group
->pending_count
++;
710 thread_call_wake(group
);
716 * _delayed_call_enqueue:
718 * Place an entry on the delayed queue,
719 * after existing entries with an earlier
720 * (or identical) deadline.
722 * Returns TRUE if the entry was already
725 * Called with thread_call_lock held.
728 _delayed_call_enqueue(
730 thread_call_group_t group
,
732 thread_call_flavor_t flavor
)
734 if ((THREAD_CALL_ONCE
| THREAD_CALL_RUNNING
)
735 == (call
->tc_flags
& (THREAD_CALL_ONCE
| THREAD_CALL_RUNNING
))) {
736 call
->tc_pqlink
.deadline
= deadline
;
738 thread_call_flags_t flags
= call
->tc_flags
;
739 call
->tc_flags
|= THREAD_CALL_RESCHEDULE
;
741 assert(call
->tc_queue
== NULL
);
742 thread_call_set_flavor(call
, flavor
);
744 return flags
& THREAD_CALL_RESCHEDULE
;
747 queue_head_t
*old_queue
= thread_call_enqueue_deadline(call
, group
, flavor
, deadline
);
749 if (old_queue
== &group
->pending_queue
) {
750 group
->pending_count
--;
751 } else if (old_queue
== NULL
) {
752 call
->tc_submit_count
++;
755 return old_queue
!= NULL
;
761 * Remove an entry from a queue.
763 * Returns TRUE if the entry was on a queue.
765 * Called with thread_call_lock held.
770 thread_call_group_t group
)
772 queue_head_t
*old_queue
= thread_call_dequeue(call
);
774 if (old_queue
== NULL
) {
778 call
->tc_finish_count
++;
780 if (old_queue
== &group
->pending_queue
) {
781 group
->pending_count
--;
788 * _arm_delayed_call_timer:
790 * Check if the timer needs to be armed for this flavor,
793 * If call is non-NULL, only re-arm the timer if the specified call
794 * is the first in the queue.
796 * Returns true if the timer was armed/re-armed, false if it was left unset
797 * Caller should cancel the timer if need be.
799 * Called with thread_call_lock held.
802 _arm_delayed_call_timer(thread_call_t new_call
,
803 thread_call_group_t group
,
804 thread_call_flavor_t flavor
)
806 /* No calls implies no timer needed */
807 if (queue_empty(&group
->delayed_queues
[flavor
])) {
811 thread_call_t call
= priority_queue_min(&group
->delayed_pqueues
[flavor
], struct thread_call
, tc_pqlink
);
813 /* We only need to change the hard timer if this new call is the first in the list */
814 if (new_call
!= NULL
&& new_call
!= call
) {
818 assert((call
->tc_soft_deadline
!= 0) && ((call
->tc_soft_deadline
<= call
->tc_pqlink
.deadline
)));
820 uint64_t fire_at
= call
->tc_soft_deadline
;
822 if (flavor
== TCF_CONTINUOUS
) {
823 assert(call
->tc_flags
& THREAD_CALL_FLAG_CONTINUOUS
);
824 fire_at
= continuoustime_to_absolutetime(fire_at
);
826 assert((call
->tc_flags
& THREAD_CALL_FLAG_CONTINUOUS
) == 0);
830 * Note: This picks the soonest-deadline call's leeway as the hard timer's leeway,
831 * which does not take into account later-deadline timers with a larger leeway.
832 * This is a valid coalescing behavior, but masks a possible window to
833 * fire a timer instead of going idle.
835 uint64_t leeway
= call
->tc_pqlink
.deadline
- call
->tc_soft_deadline
;
837 timer_call_enter_with_leeway(&group
->delayed_timers
[flavor
], (timer_call_param_t
)flavor
,
839 TIMER_CALL_SYS_CRITICAL
| TIMER_CALL_LEEWAY
,
840 ((call
->tc_flags
& THREAD_CALL_RATELIMITED
) == THREAD_CALL_RATELIMITED
));
846 * _cancel_func_from_queue:
848 * Remove the first (or all) matching
849 * entries from the specified queue.
851 * Returns TRUE if any matching entries
854 * Called with thread_call_lock held.
857 _cancel_func_from_queue(thread_call_func_t func
,
858 thread_call_param_t param0
,
859 thread_call_group_t group
,
860 boolean_t remove_all
,
863 boolean_t call_removed
= FALSE
;
866 qe_foreach_element_safe(call
, queue
, tc_qlink
) {
867 if (call
->tc_func
!= func
||
868 call
->tc_param0
!= param0
) {
872 _call_dequeue(call
, group
);
874 if (_is_internal_call(call
)) {
875 _internal_call_release(call
);
888 * thread_call_func_delayed:
890 * Enqueue a function callout to
891 * occur at the stated time.
894 thread_call_func_delayed(
895 thread_call_func_t func
,
896 thread_call_param_t param
,
899 (void)thread_call_enter_delayed_internal(NULL
, func
, param
, 0, deadline
, 0, 0);
903 * thread_call_func_delayed_with_leeway:
905 * Same as thread_call_func_delayed(), but with
906 * leeway/flags threaded through.
910 thread_call_func_delayed_with_leeway(
911 thread_call_func_t func
,
912 thread_call_param_t param
,
917 (void)thread_call_enter_delayed_internal(NULL
, func
, param
, 0, deadline
, leeway
, flags
);
921 * thread_call_func_cancel:
923 * Dequeue a function callout.
925 * Removes one (or all) { function, argument }
926 * instance(s) from either (or both)
927 * the pending and the delayed queue,
930 * Returns TRUE if any calls were cancelled.
932 * This iterates all of the pending or delayed thread calls in the group,
933 * which is really inefficient. Switch to an allocated thread call instead.
935 * TODO: Give 'func' thread calls their own group, so this silliness doesn't
936 * affect the main 'high' group.
939 thread_call_func_cancel(
940 thread_call_func_t func
,
941 thread_call_param_t param
,
942 boolean_t cancel_all
)
946 assert(func
!= NULL
);
948 /* Function-only thread calls are only kept in the default HIGH group */
949 thread_call_group_t group
= &thread_call_groups
[THREAD_CALL_INDEX_HIGH
];
951 spl_t s
= disable_ints_and_lock(group
);
954 /* exhaustively search every queue, and return true if any search found something */
955 result
= _cancel_func_from_queue(func
, param
, group
, cancel_all
, &group
->pending_queue
) |
956 _cancel_func_from_queue(func
, param
, group
, cancel_all
, &group
->delayed_queues
[TCF_ABSOLUTE
]) |
957 _cancel_func_from_queue(func
, param
, group
, cancel_all
, &group
->delayed_queues
[TCF_CONTINUOUS
]);
959 /* early-exit as soon as we find something, don't search other queues */
960 result
= _cancel_func_from_queue(func
, param
, group
, cancel_all
, &group
->pending_queue
) ||
961 _cancel_func_from_queue(func
, param
, group
, cancel_all
, &group
->delayed_queues
[TCF_ABSOLUTE
]) ||
962 _cancel_func_from_queue(func
, param
, group
, cancel_all
, &group
->delayed_queues
[TCF_CONTINUOUS
]);
965 enable_ints_and_unlock(group
, s
);
971 * Allocate a thread call with a given priority. Importances other than
972 * THREAD_CALL_PRIORITY_HIGH or THREAD_CALL_PRIORITY_KERNEL_HIGH will be run in threads
973 * with eager preemption enabled (i.e. may be aggressively preempted by higher-priority
974 * threads which are not in the normal "urgent" bands).
977 thread_call_allocate_with_priority(
978 thread_call_func_t func
,
979 thread_call_param_t param0
,
980 thread_call_priority_t pri
)
982 return thread_call_allocate_with_options(func
, param0
, pri
, 0);
986 thread_call_allocate_with_options(
987 thread_call_func_t func
,
988 thread_call_param_t param0
,
989 thread_call_priority_t pri
,
990 thread_call_options_t options
)
992 thread_call_t call
= zalloc(thread_call_zone
);
994 thread_call_setup_with_options(call
, func
, param0
, pri
, options
);
996 call
->tc_flags
|= THREAD_CALL_ALLOC
;
1002 thread_call_allocate_with_qos(thread_call_func_t func
,
1003 thread_call_param_t param0
,
1005 thread_call_options_t options
)
1007 thread_call_t call
= thread_call_allocate(func
, param0
);
1010 case THREAD_QOS_UNSPECIFIED
:
1011 call
->tc_index
= THREAD_CALL_INDEX_HIGH
;
1013 case THREAD_QOS_LEGACY
:
1014 call
->tc_index
= THREAD_CALL_INDEX_USER
;
1016 case THREAD_QOS_MAINTENANCE
:
1017 case THREAD_QOS_BACKGROUND
:
1018 call
->tc_index
= THREAD_CALL_INDEX_LOW
;
1020 case THREAD_QOS_UTILITY
:
1021 call
->tc_index
= THREAD_CALL_INDEX_QOS_UT
;
1023 case THREAD_QOS_USER_INITIATED
:
1024 call
->tc_index
= THREAD_CALL_INDEX_QOS_IN
;
1026 case THREAD_QOS_USER_INTERACTIVE
:
1027 call
->tc_index
= THREAD_CALL_INDEX_QOS_UI
;
1030 panic("Invalid thread call qos value: %d", qos_tier
);
1034 if (options
& THREAD_CALL_OPTIONS_ONCE
) {
1035 call
->tc_flags
|= THREAD_CALL_ONCE
;
1038 /* does not support THREAD_CALL_OPTIONS_SIGNAL */
1045 * thread_call_allocate:
1047 * Allocate a callout entry.
1050 thread_call_allocate(
1051 thread_call_func_t func
,
1052 thread_call_param_t param0
)
1054 return thread_call_allocate_with_options(func
, param0
,
1055 THREAD_CALL_PRIORITY_HIGH
, 0);
1061 * Release a callout. If the callout is currently
1062 * executing, it will be freed when all invocations
1065 * If the callout is currently armed to fire again, then
1066 * freeing is not allowed and returns FALSE. The
1067 * client must have canceled the pending invocation before freeing.
1073 thread_call_group_t group
= thread_call_get_group(call
);
1075 spl_t s
= disable_ints_and_lock(group
);
1077 if (call
->tc_queue
!= NULL
||
1078 ((call
->tc_flags
& THREAD_CALL_RESCHEDULE
) != 0)) {
1079 thread_call_unlock(group
);
1085 int32_t refs
= --call
->tc_refs
;
1087 panic("Refcount negative: %d\n", refs
);
1090 if ((THREAD_CALL_SIGNAL
| THREAD_CALL_RUNNING
)
1091 == ((THREAD_CALL_SIGNAL
| THREAD_CALL_RUNNING
) & call
->tc_flags
)) {
1092 thread_call_wait_once_locked(call
, s
);
1093 /* thread call lock has been unlocked */
1095 enable_ints_and_unlock(group
, s
);
1099 assert(call
->tc_finish_count
== call
->tc_submit_count
);
1100 zfree(thread_call_zone
, call
);
1107 * thread_call_enter:
1109 * Enqueue a callout entry to occur "soon".
1111 * Returns TRUE if the call was
1112 * already on a queue.
1118 return thread_call_enter1(call
, 0);
1124 thread_call_param_t param1
)
1126 assert(call
->tc_func
!= NULL
);
1127 assert((call
->tc_flags
& THREAD_CALL_SIGNAL
) == 0);
1129 thread_call_group_t group
= thread_call_get_group(call
);
1132 spl_t s
= disable_ints_and_lock(group
);
1134 if (call
->tc_queue
!= &group
->pending_queue
) {
1135 result
= _pending_call_enqueue(call
, group
, mach_absolute_time());
1138 call
->tc_param1
= param1
;
1140 enable_ints_and_unlock(group
, s
);
1146 * thread_call_enter_delayed:
1148 * Enqueue a callout entry to occur
1149 * at the stated time.
1151 * Returns TRUE if the call was
1152 * already on a queue.
1155 thread_call_enter_delayed(
1159 assert(call
!= NULL
);
1160 return thread_call_enter_delayed_internal(call
, NULL
, 0, 0, deadline
, 0, 0);
1164 thread_call_enter1_delayed(
1166 thread_call_param_t param1
,
1169 assert(call
!= NULL
);
1170 return thread_call_enter_delayed_internal(call
, NULL
, 0, param1
, deadline
, 0, 0);
1174 thread_call_enter_delayed_with_leeway(
1176 thread_call_param_t param1
,
1181 assert(call
!= NULL
);
1182 return thread_call_enter_delayed_internal(call
, NULL
, 0, param1
, deadline
, leeway
, flags
);
1187 * thread_call_enter_delayed_internal:
1188 * enqueue a callout entry to occur at the stated time
1190 * Returns True if the call was already on a queue
1192 * call - structure encapsulating state of the callout
1193 * alt_func/alt_param0 - if call is NULL, allocate temporary storage using these parameters
1194 * deadline - time deadline in nanoseconds
1195 * leeway - timer slack represented as delta of deadline.
1196 * flags - THREAD_CALL_DELAY_XXX : classification of caller's desires wrt timer coalescing.
1197 * THREAD_CALL_DELAY_LEEWAY : value in leeway is used for timer coalescing.
1198 * THREAD_CALL_CONTINUOUS: thread call will be called according to mach_continuous_time rather
1199 * than mach_absolute_time
1202 thread_call_enter_delayed_internal(
1204 thread_call_func_t alt_func
,
1205 thread_call_param_t alt_param0
,
1206 thread_call_param_t param1
,
1211 uint64_t now
, sdeadline
;
1213 thread_call_flavor_t flavor
= (flags
& THREAD_CALL_CONTINUOUS
) ? TCF_CONTINUOUS
: TCF_ABSOLUTE
;
1215 /* direct mapping between thread_call, timer_call, and timeout_urgency values */
1216 uint32_t urgency
= (flags
& TIMEOUT_URGENCY_MASK
);
1219 /* allocate a structure out of internal storage, as a convenience for BSD callers */
1220 call
= _internal_call_allocate(alt_func
, alt_param0
);
1223 assert(call
->tc_func
!= NULL
);
1224 thread_call_group_t group
= thread_call_get_group(call
);
1226 spl_t s
= disable_ints_and_lock(group
);
1229 * kevent and IOTES let you change flavor for an existing timer, so we have to
1230 * support flipping flavors for enqueued thread calls.
1232 if (flavor
== TCF_CONTINUOUS
) {
1233 now
= mach_continuous_time();
1235 now
= mach_absolute_time();
1238 call
->tc_flags
|= THREAD_CALL_DELAYED
;
1240 call
->tc_soft_deadline
= sdeadline
= deadline
;
1242 boolean_t ratelimited
= FALSE
;
1243 uint64_t slop
= timer_call_slop(deadline
, now
, urgency
, current_thread(), &ratelimited
);
1245 if ((flags
& THREAD_CALL_DELAY_LEEWAY
) != 0 && leeway
> slop
) {
1249 if (UINT64_MAX
- deadline
<= slop
) {
1250 deadline
= UINT64_MAX
;
1256 call
->tc_flags
|= THREAD_CALL_RATELIMITED
;
1258 call
->tc_flags
&= ~THREAD_CALL_RATELIMITED
;
1261 call
->tc_param1
= param1
;
1263 call
->tc_ttd
= (sdeadline
> now
) ? (sdeadline
- now
) : 0;
1265 bool result
= _delayed_call_enqueue(call
, group
, deadline
, flavor
);
1267 _arm_delayed_call_timer(call
, group
, flavor
);
1270 DTRACE_TMR5(thread_callout__create
, thread_call_func_t
, call
->tc_func
,
1271 uint64_t, (deadline
- sdeadline
), uint64_t, (call
->tc_ttd
>> 32),
1272 (unsigned) (call
->tc_ttd
& 0xFFFFFFFF), call
);
1275 enable_ints_and_unlock(group
, s
);
1281 * Remove a callout entry from the queue
1282 * Called with thread_call_lock held
1285 thread_call_cancel_locked(thread_call_t call
)
1289 if (call
->tc_flags
& THREAD_CALL_RESCHEDULE
) {
1290 call
->tc_flags
&= ~THREAD_CALL_RESCHEDULE
;
1293 /* if reschedule was set, it must not have been queued */
1294 assert(call
->tc_queue
== NULL
);
1296 bool queue_head_changed
= false;
1298 thread_call_flavor_t flavor
= thread_call_get_flavor(call
);
1299 thread_call_group_t group
= thread_call_get_group(call
);
1301 if (call
->tc_pqlink
.deadline
!= 0 &&
1302 call
== priority_queue_min(&group
->delayed_pqueues
[flavor
], struct thread_call
, tc_pqlink
)) {
1303 assert(call
->tc_queue
== &group
->delayed_queues
[flavor
]);
1304 queue_head_changed
= true;
1307 canceled
= _call_dequeue(call
, group
);
1309 if (queue_head_changed
) {
1310 if (_arm_delayed_call_timer(NULL
, group
, flavor
) == false) {
1311 timer_call_cancel(&group
->delayed_timers
[flavor
]);
1317 DTRACE_TMR4(thread_callout__cancel
, thread_call_func_t
, call
->tc_func
,
1318 0, (call
->tc_ttd
>> 32), (unsigned) (call
->tc_ttd
& 0xFFFFFFFF));
1325 * thread_call_cancel:
1327 * Dequeue a callout entry.
1329 * Returns TRUE if the call was
1333 thread_call_cancel(thread_call_t call
)
1335 thread_call_group_t group
= thread_call_get_group(call
);
1337 spl_t s
= disable_ints_and_lock(group
);
1339 boolean_t result
= thread_call_cancel_locked(call
);
1341 enable_ints_and_unlock(group
, s
);
1347 * Cancel a thread call. If it cannot be cancelled (i.e.
1348 * is already in flight), waits for the most recent invocation
1349 * to finish. Note that if clients re-submit this thread call,
1350 * it may still be pending or in flight when thread_call_cancel_wait
1351 * returns, but all requests to execute this work item prior
1352 * to the call to thread_call_cancel_wait will have finished.
1355 thread_call_cancel_wait(thread_call_t call
)
1357 thread_call_group_t group
= thread_call_get_group(call
);
1359 if ((call
->tc_flags
& THREAD_CALL_ALLOC
) == 0) {
1360 panic("thread_call_cancel_wait: can't wait on thread call whose storage I don't own");
1363 if (!ml_get_interrupts_enabled()) {
1364 panic("unsafe thread_call_cancel_wait");
1367 thread_t self
= current_thread();
1369 if ((thread_get_tag_internal(self
) & THREAD_TAG_CALLOUT
) &&
1370 self
->thc_state
&& self
->thc_state
->thc_call
== call
) {
1371 panic("thread_call_cancel_wait: deadlock waiting on self from inside call: %p to function %p",
1372 call
, call
->tc_func
);
1375 spl_t s
= disable_ints_and_lock(group
);
1377 boolean_t canceled
= thread_call_cancel_locked(call
);
1379 if ((call
->tc_flags
& THREAD_CALL_ONCE
) == THREAD_CALL_ONCE
) {
1381 * A cancel-wait on a 'once' call will both cancel
1382 * the pending call and wait for the in-flight call
1385 thread_call_wait_once_locked(call
, s
);
1386 /* thread call lock unlocked */
1389 * A cancel-wait on a normal call will only wait for the in-flight calls
1390 * if it did not cancel the pending call.
1392 * TODO: This seems less than useful - shouldn't it do the wait as well?
1395 if (canceled
== FALSE
) {
1396 thread_call_wait_locked(call
, s
);
1397 /* thread call lock unlocked */
1399 enable_ints_and_unlock(group
, s
);
1410 * Wake a call thread to service
1411 * pending call entries. May wake
1412 * the daemon thread in order to
1413 * create additional call threads.
1415 * Called with thread_call_lock held.
1417 * For high-priority group, only does wakeup/creation if there are no threads
1422 thread_call_group_t group
)
1425 * New behavior: use threads if you've got 'em.
1426 * Traditional behavior: wake only if no threads running.
1428 if (group_isparallel(group
) || group
->active_count
== 0) {
1429 if (group
->idle_count
) {
1430 __assert_only kern_return_t kr
;
1432 kr
= waitq_wakeup64_one(&group
->idle_waitq
, CAST_EVENT64_T(group
),
1433 THREAD_AWAKENED
, WAITQ_ALL_PRIORITIES
);
1434 assert(kr
== KERN_SUCCESS
);
1436 group
->idle_count
--;
1437 group
->active_count
++;
1439 if (group
->idle_count
== 0 && (group
->tcg_flags
& TCG_DEALLOC_ACTIVE
) == TCG_DEALLOC_ACTIVE
) {
1440 if (timer_call_cancel(&group
->dealloc_timer
) == TRUE
) {
1441 group
->tcg_flags
&= ~TCG_DEALLOC_ACTIVE
;
1445 if (thread_call_group_should_add_thread(group
) &&
1446 os_atomic_cmpxchg(&thread_call_daemon_awake
,
1447 false, true, relaxed
)) {
1448 waitq_wakeup64_all(&daemon_waitq
, CAST_EVENT64_T(&thread_call_daemon_awake
),
1449 THREAD_AWAKENED
, WAITQ_ALL_PRIORITIES
);
1456 * sched_call_thread:
1458 * Call out invoked by the scheduler.
1465 thread_call_group_t group
;
1467 assert(thread_get_tag_internal(thread
) & THREAD_TAG_CALLOUT
);
1468 assert(thread
->thc_state
!= NULL
);
1470 group
= thread
->thc_state
->thc_group
;
1471 assert((group
- &thread_call_groups
[0]) < THREAD_CALL_INDEX_MAX
);
1473 thread_call_lock_spin(group
);
1476 case SCHED_CALL_BLOCK
:
1477 assert(group
->active_count
);
1478 --group
->active_count
;
1479 group
->blocked_count
++;
1480 if (group
->pending_count
> 0) {
1481 thread_call_wake(group
);
1485 case SCHED_CALL_UNBLOCK
:
1486 assert(group
->blocked_count
);
1487 --group
->blocked_count
;
1488 group
->active_count
++;
1492 thread_call_unlock(group
);
1496 * Interrupts disabled, lock held; returns the same way.
1497 * Only called on thread calls whose storage we own. Wakes up
1498 * anyone who might be waiting on this work item and frees it
1499 * if the client has so requested.
1502 thread_call_finish(thread_call_t call
, thread_call_group_t group
, spl_t
*s
)
1504 assert(thread_call_get_group(call
) == group
);
1506 bool repend
= false;
1507 bool signal
= call
->tc_flags
& THREAD_CALL_SIGNAL
;
1508 bool alloc
= call
->tc_flags
& THREAD_CALL_ALLOC
;
1510 call
->tc_finish_count
++;
1512 if (!signal
&& alloc
) {
1513 /* The thread call thread owns a ref until the call is finished */
1514 if (call
->tc_refs
<= 0) {
1515 panic("thread_call_finish: detected over-released thread call: %p", call
);
1520 thread_call_flags_t old_flags
= call
->tc_flags
;
1521 call
->tc_flags
&= ~(THREAD_CALL_RESCHEDULE
| THREAD_CALL_RUNNING
| THREAD_CALL_WAIT
);
1523 if ((!alloc
|| call
->tc_refs
!= 0) &&
1524 (old_flags
& THREAD_CALL_RESCHEDULE
) != 0) {
1525 assert(old_flags
& THREAD_CALL_ONCE
);
1526 thread_call_flavor_t flavor
= thread_call_get_flavor(call
);
1528 if (old_flags
& THREAD_CALL_DELAYED
) {
1529 uint64_t now
= mach_absolute_time();
1530 if (flavor
== TCF_CONTINUOUS
) {
1531 now
= absolutetime_to_continuoustime(now
);
1533 if (call
->tc_soft_deadline
<= now
) {
1534 /* The deadline has already expired, go straight to pending */
1535 call
->tc_flags
&= ~(THREAD_CALL_DELAYED
| THREAD_CALL_RATELIMITED
);
1536 call
->tc_pqlink
.deadline
= 0;
1540 if (call
->tc_pqlink
.deadline
) {
1541 _delayed_call_enqueue(call
, group
, call
->tc_pqlink
.deadline
, flavor
);
1543 _arm_delayed_call_timer(call
, group
, flavor
);
1545 } else if (signal
) {
1546 call
->tc_submit_count
++;
1549 _pending_call_enqueue(call
, group
, mach_absolute_time());
1553 if (!signal
&& alloc
&& call
->tc_refs
== 0) {
1554 if ((old_flags
& THREAD_CALL_WAIT
) != 0) {
1555 panic("Someone waiting on a thread call that is scheduled for free: %p\n", call
->tc_func
);
1558 assert(call
->tc_finish_count
== call
->tc_submit_count
);
1560 enable_ints_and_unlock(group
, *s
);
1562 zfree(thread_call_zone
, call
);
1564 *s
= disable_ints_and_lock(group
);
1567 if ((old_flags
& THREAD_CALL_WAIT
) != 0) {
1569 * This may wake up a thread with a registered sched_call.
1570 * That call might need the group lock, so we drop the lock
1571 * to avoid deadlocking.
1573 * We also must use a separate waitq from the idle waitq, as
1574 * this path goes waitq lock->thread lock->group lock, but
1575 * the idle wait goes group lock->waitq_lock->thread_lock.
1577 thread_call_unlock(group
);
1579 waitq_wakeup64_all(&group
->waiters_waitq
, CAST_EVENT64_T(call
),
1580 THREAD_AWAKENED
, WAITQ_ALL_PRIORITIES
);
1582 thread_call_lock_spin(group
);
1583 /* THREAD_CALL_SIGNAL call may have been freed */
1590 * thread_call_invoke
1592 * Invoke the function provided for this thread call
1594 * Note that the thread call object can be deallocated by the function if we do not control its storage.
1596 static void __attribute__((noinline
))
1597 thread_call_invoke(thread_call_func_t func
,
1598 thread_call_param_t param0
,
1599 thread_call_param_t param1
,
1600 __unused thread_call_t call
)
1602 #if DEVELOPMENT || DEBUG
1603 KERNEL_DEBUG_CONSTANT(
1604 MACHDBG_CODE(DBG_MACH_SCHED
, MACH_CALLOUT
) | DBG_FUNC_START
,
1605 VM_KERNEL_UNSLIDE(func
), VM_KERNEL_ADDRHIDE(param0
), VM_KERNEL_ADDRHIDE(param1
), 0, 0);
1606 #endif /* DEVELOPMENT || DEBUG */
1609 uint64_t tc_ttd
= call
->tc_ttd
;
1610 boolean_t is_delayed
= call
->tc_flags
& THREAD_CALL_DELAYED
;
1611 DTRACE_TMR6(thread_callout__start
, thread_call_func_t
, func
, int, 0, int, (tc_ttd
>> 32),
1612 (unsigned) (tc_ttd
& 0xFFFFFFFF), is_delayed
, call
);
1615 (*func
)(param0
, param1
);
1618 DTRACE_TMR6(thread_callout__end
, thread_call_func_t
, func
, int, 0, int, (tc_ttd
>> 32),
1619 (unsigned) (tc_ttd
& 0xFFFFFFFF), is_delayed
, call
);
1622 #if DEVELOPMENT || DEBUG
1623 KERNEL_DEBUG_CONSTANT(
1624 MACHDBG_CODE(DBG_MACH_SCHED
, MACH_CALLOUT
) | DBG_FUNC_END
,
1625 VM_KERNEL_UNSLIDE(func
), 0, 0, 0, 0);
1626 #endif /* DEVELOPMENT || DEBUG */
1630 * thread_call_thread:
1634 thread_call_group_t group
,
1637 thread_t self
= current_thread();
1639 if ((thread_get_tag_internal(self
) & THREAD_TAG_CALLOUT
) == 0) {
1640 (void)thread_set_tag_internal(self
, THREAD_TAG_CALLOUT
);
1644 * A wakeup with THREAD_INTERRUPTED indicates that
1645 * we should terminate.
1647 if (wres
== THREAD_INTERRUPTED
) {
1648 thread_terminate(self
);
1651 panic("thread_terminate() returned?");
1654 spl_t s
= disable_ints_and_lock(group
);
1656 struct thread_call_thread_state thc_state
= { .thc_group
= group
};
1657 self
->thc_state
= &thc_state
;
1659 thread_sched_call(self
, sched_call_thread
);
1661 while (group
->pending_count
> 0) {
1662 thread_call_t call
= qe_dequeue_head(&group
->pending_queue
,
1663 struct thread_call
, tc_qlink
);
1664 assert(call
!= NULL
);
1666 group
->pending_count
--;
1667 if (group
->pending_count
== 0) {
1668 assert(queue_empty(&group
->pending_queue
));
1671 thread_call_func_t func
= call
->tc_func
;
1672 thread_call_param_t param0
= call
->tc_param0
;
1673 thread_call_param_t param1
= call
->tc_param1
;
1675 call
->tc_queue
= NULL
;
1677 if (_is_internal_call(call
)) {
1678 _internal_call_release(call
);
1682 * Can only do wakeups for thread calls whose storage
1685 bool needs_finish
= false;
1686 if (call
->tc_flags
& THREAD_CALL_ALLOC
) {
1687 call
->tc_refs
++; /* Delay free until we're done */
1689 if (call
->tc_flags
& (THREAD_CALL_ALLOC
| THREAD_CALL_ONCE
)) {
1691 * If THREAD_CALL_ONCE is used, and the timer wasn't
1692 * THREAD_CALL_ALLOC, then clients swear they will use
1693 * thread_call_cancel_wait() before destroying
1696 * Else, the storage for the thread call might have
1697 * disappeared when thread_call_invoke() ran.
1699 needs_finish
= true;
1700 call
->tc_flags
|= THREAD_CALL_RUNNING
;
1703 thc_state
.thc_call
= call
;
1704 thc_state
.thc_call_pending_timestamp
= call
->tc_pending_timestamp
;
1705 thc_state
.thc_call_soft_deadline
= call
->tc_soft_deadline
;
1706 thc_state
.thc_call_hard_deadline
= call
->tc_pqlink
.deadline
;
1707 thc_state
.thc_func
= func
;
1708 thc_state
.thc_param0
= param0
;
1709 thc_state
.thc_param1
= param1
;
1710 thc_state
.thc_IOTES_invocation_timestamp
= 0;
1712 enable_ints_and_unlock(group
, s
);
1714 thc_state
.thc_call_start
= mach_absolute_time();
1716 thread_call_invoke(func
, param0
, param1
, call
);
1718 thc_state
.thc_call
= NULL
;
1720 if (get_preemption_level() != 0) {
1721 int pl
= get_preemption_level();
1722 panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
1723 pl
, (void *)VM_KERNEL_UNSLIDE(func
), param0
, param1
);
1726 s
= disable_ints_and_lock(group
);
1729 /* Release refcount, may free, may temporarily drop lock */
1730 thread_call_finish(call
, group
, &s
);
1734 thread_sched_call(self
, NULL
);
1735 group
->active_count
--;
1737 if (self
->callout_woken_from_icontext
&& !self
->callout_woke_thread
) {
1738 ledger_credit(self
->t_ledger
, task_ledgers
.interrupt_wakeups
, 1);
1739 if (self
->callout_woken_from_platform_idle
) {
1740 ledger_credit(self
->t_ledger
, task_ledgers
.platform_idle_wakeups
, 1);
1744 self
->callout_woken_from_icontext
= FALSE
;
1745 self
->callout_woken_from_platform_idle
= FALSE
;
1746 self
->callout_woke_thread
= FALSE
;
1748 self
->thc_state
= NULL
;
1750 if (group_isparallel(group
)) {
1752 * For new style of thread group, thread always blocks.
1753 * If we have more than the target number of threads,
1754 * and this is the first to block, and it isn't active
1755 * already, set a timer for deallocating a thread if we
1756 * continue to have a surplus.
1758 group
->idle_count
++;
1760 if (group
->idle_count
== 1) {
1761 group
->idle_timestamp
= mach_absolute_time();
1764 if (((group
->tcg_flags
& TCG_DEALLOC_ACTIVE
) == 0) &&
1765 ((group
->active_count
+ group
->idle_count
) > group
->target_thread_count
)) {
1766 thread_call_start_deallocate_timer(group
);
1769 /* Wait for more work (or termination) */
1770 wres
= waitq_assert_wait64(&group
->idle_waitq
, CAST_EVENT64_T(group
), THREAD_INTERRUPTIBLE
, 0);
1771 if (wres
!= THREAD_WAITING
) {
1772 panic("kcall worker unable to assert wait?");
1775 enable_ints_and_unlock(group
, s
);
1777 thread_block_parameter((thread_continue_t
)thread_call_thread
, group
);
1779 if (group
->idle_count
< group
->target_thread_count
) {
1780 group
->idle_count
++;
1782 waitq_assert_wait64(&group
->idle_waitq
, CAST_EVENT64_T(group
), THREAD_UNINT
, 0); /* Interrupted means to exit */
1784 enable_ints_and_unlock(group
, s
);
1786 thread_block_parameter((thread_continue_t
)thread_call_thread
, group
);
1791 enable_ints_and_unlock(group
, s
);
1793 thread_terminate(self
);
1798 thread_call_start_iotes_invocation(__assert_only thread_call_t call
)
1800 thread_t self
= current_thread();
1802 if ((thread_get_tag_internal(self
) & THREAD_TAG_CALLOUT
) == 0) {
1803 /* not a thread call thread, might be a workloop IOTES */
1807 assert(self
->thc_state
);
1808 assert(self
->thc_state
->thc_call
== call
);
1810 self
->thc_state
->thc_IOTES_invocation_timestamp
= mach_absolute_time();
1815 * thread_call_daemon: walk list of groups, allocating
1816 * threads if appropriate (as determined by
1817 * thread_call_group_should_add_thread()).
1820 thread_call_daemon_continue(__unused
void *arg
)
1823 os_atomic_store(&thread_call_daemon_awake
, false, relaxed
);
1825 /* Starting at zero happens to be high-priority first. */
1826 for (int i
= 0; i
< THREAD_CALL_INDEX_MAX
; i
++) {
1827 thread_call_group_t group
= &thread_call_groups
[i
];
1829 spl_t s
= disable_ints_and_lock(group
);
1831 while (thread_call_group_should_add_thread(group
)) {
1832 group
->active_count
++;
1834 enable_ints_and_unlock(group
, s
);
1836 thread_call_thread_create(group
);
1838 s
= disable_ints_and_lock(group
);
1841 enable_ints_and_unlock(group
, s
);
1843 } while (os_atomic_load(&thread_call_daemon_awake
, relaxed
));
1845 waitq_assert_wait64(&daemon_waitq
, CAST_EVENT64_T(&thread_call_daemon_awake
), THREAD_UNINT
, 0);
1847 if (os_atomic_load(&thread_call_daemon_awake
, relaxed
)) {
1848 clear_wait(current_thread(), THREAD_AWAKENED
);
1851 thread_block_parameter((thread_continue_t
)thread_call_daemon_continue
, NULL
);
1859 thread_t self
= current_thread();
1861 self
->options
|= TH_OPT_VMPRIV
;
1862 vm_page_free_reserve(2); /* XXX */
1864 thread_set_thread_name(self
, "thread_call_daemon");
1866 thread_call_daemon_continue(NULL
);
1871 * Schedule timer to deallocate a worker thread if we have a surplus
1872 * of threads (in excess of the group's target) and at least one thread
1873 * is idle the whole time.
1876 thread_call_start_deallocate_timer(thread_call_group_t group
)
1878 __assert_only
bool already_enqueued
;
1880 assert(group
->idle_count
> 0);
1881 assert((group
->tcg_flags
& TCG_DEALLOC_ACTIVE
) == 0);
1883 group
->tcg_flags
|= TCG_DEALLOC_ACTIVE
;
1885 uint64_t deadline
= group
->idle_timestamp
+ thread_call_dealloc_interval_abs
;
1887 already_enqueued
= timer_call_enter(&group
->dealloc_timer
, deadline
, 0);
1889 assert(already_enqueued
== false);
1892 /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
1894 thread_call_delayed_timer(timer_call_param_t p0
, timer_call_param_t p1
)
1896 thread_call_group_t group
= (thread_call_group_t
) p0
;
1897 thread_call_flavor_t flavor
= (thread_call_flavor_t
) p1
;
1902 thread_call_lock_spin(group
);
1904 if (flavor
== TCF_CONTINUOUS
) {
1905 now
= mach_continuous_time();
1906 } else if (flavor
== TCF_ABSOLUTE
) {
1907 now
= mach_absolute_time();
1909 panic("invalid timer flavor: %d", flavor
);
1912 while ((call
= priority_queue_min(&group
->delayed_pqueues
[flavor
],
1913 struct thread_call
, tc_pqlink
)) != NULL
) {
1914 assert(thread_call_get_group(call
) == group
);
1915 assert(thread_call_get_flavor(call
) == flavor
);
1918 * if we hit a call that isn't yet ready to expire,
1919 * then we're done for now
1920 * TODO: The next timer in the list could have a larger leeway
1921 * and therefore be ready to expire.
1923 if (call
->tc_soft_deadline
> now
) {
1928 * If we hit a rate-limited timer, don't eagerly wake it up.
1929 * Wait until it reaches the end of the leeway window.
1931 * TODO: What if the next timer is not rate-limited?
1932 * Have a separate rate-limited queue to avoid this
1934 if ((call
->tc_flags
& THREAD_CALL_RATELIMITED
) &&
1935 (call
->tc_pqlink
.deadline
> now
) &&
1936 (ml_timer_forced_evaluation() == FALSE
)) {
1940 if (THREAD_CALL_SIGNAL
& call
->tc_flags
) {
1941 __assert_only queue_head_t
*old_queue
;
1942 old_queue
= thread_call_dequeue(call
);
1943 assert(old_queue
== &group
->delayed_queues
[flavor
]);
1946 thread_call_func_t func
= call
->tc_func
;
1947 thread_call_param_t param0
= call
->tc_param0
;
1948 thread_call_param_t param1
= call
->tc_param1
;
1950 call
->tc_flags
|= THREAD_CALL_RUNNING
;
1952 thread_call_unlock(group
);
1953 thread_call_invoke(func
, param0
, param1
, call
);
1954 thread_call_lock_spin(group
);
1956 /* finish may detect that the call has been re-pended */
1957 } while (thread_call_finish(call
, group
, NULL
));
1958 /* call may have been freed by the finish */
1960 _pending_call_enqueue(call
, group
, now
);
1964 _arm_delayed_call_timer(call
, group
, flavor
);
1966 thread_call_unlock(group
);
1970 thread_call_delayed_timer_rescan(thread_call_group_t group
,
1971 thread_call_flavor_t flavor
)
1976 spl_t s
= disable_ints_and_lock(group
);
1978 assert(ml_timer_forced_evaluation() == TRUE
);
1980 if (flavor
== TCF_CONTINUOUS
) {
1981 now
= mach_continuous_time();
1983 now
= mach_absolute_time();
1986 qe_foreach_element_safe(call
, &group
->delayed_queues
[flavor
], tc_qlink
) {
1987 if (call
->tc_soft_deadline
<= now
) {
1988 _pending_call_enqueue(call
, group
, now
);
1990 uint64_t skew
= call
->tc_pqlink
.deadline
- call
->tc_soft_deadline
;
1991 assert(call
->tc_pqlink
.deadline
>= call
->tc_soft_deadline
);
1993 * On a latency quality-of-service level change,
1994 * re-sort potentially rate-limited callout. The platform
1995 * layer determines which timers require this.
1997 * This trick works by updating the deadline value to
1998 * equal soft-deadline, effectively crushing away
1999 * timer coalescing slop values for any armed
2000 * timer in the queue.
2002 * TODO: keep a hint on the timer to tell whether its inputs changed, so we
2003 * only have to crush coalescing for timers that need it.
2005 * TODO: Keep a separate queue of timers above the re-sort
2006 * threshold, so we only have to look at those.
2008 if (timer_resort_threshold(skew
)) {
2009 _call_dequeue(call
, group
);
2010 _delayed_call_enqueue(call
, group
, call
->tc_soft_deadline
, flavor
);
2015 _arm_delayed_call_timer(NULL
, group
, flavor
);
2017 enable_ints_and_unlock(group
, s
);
2021 thread_call_delayed_timer_rescan_all(void)
2023 for (int i
= 0; i
< THREAD_CALL_INDEX_MAX
; i
++) {
2024 for (thread_call_flavor_t flavor
= 0; flavor
< TCF_COUNT
; flavor
++) {
2025 thread_call_delayed_timer_rescan(&thread_call_groups
[i
], flavor
);
2031 * Timer callback to tell a thread to terminate if
2032 * we have an excess of threads and at least one has been
2033 * idle for a long time.
2036 thread_call_dealloc_timer(
2037 timer_call_param_t p0
,
2038 __unused timer_call_param_t p1
)
2040 thread_call_group_t group
= (thread_call_group_t
)p0
;
2043 bool terminated
= false;
2045 thread_call_lock_spin(group
);
2047 assert(group
->tcg_flags
& TCG_DEALLOC_ACTIVE
);
2049 now
= mach_absolute_time();
2051 if (group
->idle_count
> 0) {
2052 if (now
> group
->idle_timestamp
+ thread_call_dealloc_interval_abs
) {
2054 group
->idle_count
--;
2055 res
= waitq_wakeup64_one(&group
->idle_waitq
, CAST_EVENT64_T(group
),
2056 THREAD_INTERRUPTED
, WAITQ_ALL_PRIORITIES
);
2057 if (res
!= KERN_SUCCESS
) {
2058 panic("Unable to wake up idle thread for termination?");
2063 group
->tcg_flags
&= ~TCG_DEALLOC_ACTIVE
;
2066 * If we still have an excess of threads, schedule another
2067 * invocation of this function.
2069 if (group
->idle_count
> 0 && (group
->idle_count
+ group
->active_count
> group
->target_thread_count
)) {
2071 * If we killed someone just now, push out the
2075 group
->idle_timestamp
= now
;
2078 thread_call_start_deallocate_timer(group
);
2081 thread_call_unlock(group
);
2085 * Wait for the invocation of the thread call to complete
2086 * We know there's only one in flight because of the 'once' flag.
2088 * If a subsequent invocation comes in before we wake up, that's OK
2090 * TODO: Here is where we will add priority inheritance to the thread executing
2091 * the thread call in case it's lower priority than the current thread
2092 * <rdar://problem/30321792> Priority inheritance for thread_call_wait_once
2094 * Takes the thread call lock locked, returns unlocked
2095 * This lets us avoid a spurious take/drop after waking up from thread_block
2097 * This thread could be a thread call thread itself, blocking and therefore making a
2098 * sched_call upcall into the thread call subsystem, needing the group lock.
2099 * However, we're saved from deadlock because the 'block' upcall is made in
2100 * thread_block, not in assert_wait.
2103 thread_call_wait_once_locked(thread_call_t call
, spl_t s
)
2105 assert(call
->tc_flags
& THREAD_CALL_ALLOC
);
2106 assert(call
->tc_flags
& THREAD_CALL_ONCE
);
2108 thread_call_group_t group
= thread_call_get_group(call
);
2110 if ((call
->tc_flags
& THREAD_CALL_RUNNING
) == 0) {
2111 enable_ints_and_unlock(group
, s
);
2115 /* call is running, so we have to wait for it */
2116 call
->tc_flags
|= THREAD_CALL_WAIT
;
2118 wait_result_t res
= waitq_assert_wait64(&group
->waiters_waitq
, CAST_EVENT64_T(call
), THREAD_UNINT
, 0);
2119 if (res
!= THREAD_WAITING
) {
2120 panic("Unable to assert wait: %d", res
);
2123 enable_ints_and_unlock(group
, s
);
2125 res
= thread_block(THREAD_CONTINUE_NULL
);
2126 if (res
!= THREAD_AWAKENED
) {
2127 panic("Awoken with %d?", res
);
2130 /* returns unlocked */
2135 * Wait for an in-flight invocation to complete
2136 * Does NOT try to cancel, so the client doesn't need to hold their
2137 * lock while calling this function.
2139 * Returns whether or not it had to wait.
2141 * Only works for THREAD_CALL_ONCE calls.
2144 thread_call_wait_once(thread_call_t call
)
2146 if ((call
->tc_flags
& THREAD_CALL_ALLOC
) == 0) {
2147 panic("thread_call_wait_once: can't wait on thread call whose storage I don't own");
2150 if ((call
->tc_flags
& THREAD_CALL_ONCE
) == 0) {
2151 panic("thread_call_wait_once: can't wait_once on a non-once call");
2154 if (!ml_get_interrupts_enabled()) {
2155 panic("unsafe thread_call_wait_once");
2158 thread_t self
= current_thread();
2160 if ((thread_get_tag_internal(self
) & THREAD_TAG_CALLOUT
) &&
2161 self
->thc_state
&& self
->thc_state
->thc_call
== call
) {
2162 panic("thread_call_wait_once: deadlock waiting on self from inside call: %p to function %p",
2163 call
, call
->tc_func
);
2166 thread_call_group_t group
= thread_call_get_group(call
);
2168 spl_t s
= disable_ints_and_lock(group
);
2170 bool waited
= thread_call_wait_once_locked(call
, s
);
2171 /* thread call lock unlocked */
2178 * Wait for all requested invocations of a thread call prior to now
2179 * to finish. Can only be invoked on thread calls whose storage we manage.
2180 * Just waits for the finish count to catch up to the submit count we find
2181 * at the beginning of our wait.
2183 * Called with thread_call_lock held. Returns with lock released.
2186 thread_call_wait_locked(thread_call_t call
, spl_t s
)
2188 thread_call_group_t group
= thread_call_get_group(call
);
2190 assert(call
->tc_flags
& THREAD_CALL_ALLOC
);
2192 uint64_t submit_count
= call
->tc_submit_count
;
2194 while (call
->tc_finish_count
< submit_count
) {
2195 call
->tc_flags
|= THREAD_CALL_WAIT
;
2197 wait_result_t res
= waitq_assert_wait64(&group
->waiters_waitq
,
2198 CAST_EVENT64_T(call
), THREAD_UNINT
, 0);
2200 if (res
!= THREAD_WAITING
) {
2201 panic("Unable to assert wait: %d", res
);
2204 enable_ints_and_unlock(group
, s
);
2206 res
= thread_block(THREAD_CONTINUE_NULL
);
2207 if (res
!= THREAD_AWAKENED
) {
2208 panic("Awoken with %d?", res
);
2211 s
= disable_ints_and_lock(group
);
2214 enable_ints_and_unlock(group
, s
);
2218 * Determine whether a thread call is either on a queue or
2219 * currently being executed.
2222 thread_call_isactive(thread_call_t call
)
2224 thread_call_group_t group
= thread_call_get_group(call
);
2226 spl_t s
= disable_ints_and_lock(group
);
2227 boolean_t active
= (call
->tc_submit_count
> call
->tc_finish_count
);
2228 enable_ints_and_unlock(group
, s
);
2234 * adjust_cont_time_thread_calls
2235 * on wake, reenqueue delayed call timer for continuous time thread call groups
2238 adjust_cont_time_thread_calls(void)
2240 for (int i
= 0; i
< THREAD_CALL_INDEX_MAX
; i
++) {
2241 thread_call_group_t group
= &thread_call_groups
[i
];
2242 spl_t s
= disable_ints_and_lock(group
);
2244 /* only the continuous timers need to be re-armed */
2246 _arm_delayed_call_timer(NULL
, group
, TCF_CONTINUOUS
);
2247 enable_ints_and_unlock(group
, s
);