]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/thread_call.c
xnu-4570.1.46.tar.gz
[apple/xnu.git] / osfmk / kern / thread_call.c
1 /*
2 * Copyright (c) 1993-1995, 1999-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <mach/thread_act.h>
31
32 #include <kern/kern_types.h>
33 #include <kern/zalloc.h>
34 #include <kern/sched_prim.h>
35 #include <kern/clock.h>
36 #include <kern/task.h>
37 #include <kern/thread.h>
38 #include <kern/waitq.h>
39 #include <kern/ledger.h>
40 #include <kern/policy_internal.h>
41
42 #include <vm/vm_pageout.h>
43
44 #include <kern/thread_call.h>
45 #include <kern/call_entry.h>
46 #include <kern/timer_call.h>
47
48 #include <libkern/OSAtomic.h>
49 #include <kern/timer_queue.h>
50
51 #include <sys/kdebug.h>
52 #if CONFIG_DTRACE
53 #include <mach/sdt.h>
54 #endif
55 #include <machine/machine_routines.h>
56
57 static zone_t thread_call_zone;
58 static struct waitq daemon_waitq;
59
60 typedef enum {
61 TCF_ABSOLUTE = 0,
62 TCF_CONTINUOUS = 1,
63 TCF_COUNT = 2,
64 } thread_call_flavor_t;
65
66 typedef enum {
67 TCG_NONE = 0x0,
68 TCG_PARALLEL = 0x1,
69 TCG_DEALLOC_ACTIVE = 0x2,
70 } thread_call_group_flags_t;
71
72 static struct thread_call_group {
73 const char * tcg_name;
74
75 queue_head_t pending_queue;
76 uint32_t pending_count;
77
78 queue_head_t delayed_queues[TCF_COUNT];
79 timer_call_data_t delayed_timers[TCF_COUNT];
80
81 timer_call_data_t dealloc_timer;
82
83 struct waitq idle_waitq;
84 uint32_t idle_count, active_count, blocked_count;
85
86 uint32_t tcg_thread_pri;
87 uint32_t target_thread_count;
88 uint64_t idle_timestamp;
89
90 thread_call_group_flags_t flags;
91
92 } thread_call_groups[THREAD_CALL_INDEX_MAX] = {
93 [THREAD_CALL_INDEX_HIGH] = {
94 .tcg_name = "high",
95 .tcg_thread_pri = BASEPRI_PREEMPT_HIGH,
96 .target_thread_count = 4,
97 .flags = TCG_NONE,
98 },
99 [THREAD_CALL_INDEX_KERNEL] = {
100 .tcg_name = "kernel",
101 .tcg_thread_pri = BASEPRI_KERNEL,
102 .target_thread_count = 1,
103 .flags = TCG_PARALLEL,
104 },
105 [THREAD_CALL_INDEX_USER] = {
106 .tcg_name = "user",
107 .tcg_thread_pri = BASEPRI_DEFAULT,
108 .target_thread_count = 1,
109 .flags = TCG_PARALLEL,
110 },
111 [THREAD_CALL_INDEX_LOW] = {
112 .tcg_name = "low",
113 .tcg_thread_pri = MAXPRI_THROTTLE,
114 .target_thread_count = 1,
115 .flags = TCG_PARALLEL,
116 },
117 [THREAD_CALL_INDEX_KERNEL_HIGH] = {
118 .tcg_name = "kernel-high",
119 .tcg_thread_pri = BASEPRI_PREEMPT,
120 .target_thread_count = 2,
121 .flags = TCG_NONE,
122 },
123 [THREAD_CALL_INDEX_QOS_UI] = {
124 .tcg_name = "qos-ui",
125 .tcg_thread_pri = BASEPRI_FOREGROUND,
126 .target_thread_count = 1,
127 .flags = TCG_NONE,
128 },
129 [THREAD_CALL_INDEX_QOS_IN] = {
130 .tcg_name = "qos-in",
131 .tcg_thread_pri = BASEPRI_USER_INITIATED,
132 .target_thread_count = 1,
133 .flags = TCG_NONE,
134 },
135 [THREAD_CALL_INDEX_QOS_UT] = {
136 .tcg_name = "qos-ut",
137 .tcg_thread_pri = BASEPRI_UTILITY,
138 .target_thread_count = 1,
139 .flags = TCG_NONE,
140 },
141 };
142
143 typedef struct thread_call_group *thread_call_group_t;
144
145 #define INTERNAL_CALL_COUNT 768
146 #define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * NSEC_PER_MSEC) /* 5 ms */
147 #define THREAD_CALL_ADD_RATIO 4
148 #define THREAD_CALL_MACH_FACTOR_CAP 3
149 #define THREAD_CALL_GROUP_MAX_THREADS 500
150
151 static boolean_t thread_call_daemon_awake;
152 static thread_call_data_t internal_call_storage[INTERNAL_CALL_COUNT];
153 static queue_head_t thread_call_internal_queue;
154 int thread_call_internal_queue_count = 0;
155 static uint64_t thread_call_dealloc_interval_abs;
156
157 static __inline__ thread_call_t _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0);
158 static __inline__ void _internal_call_release(thread_call_t call);
159 static __inline__ boolean_t _pending_call_enqueue(thread_call_t call, thread_call_group_t group);
160 static boolean_t _delayed_call_enqueue(thread_call_t call, thread_call_group_t group,
161 uint64_t deadline, thread_call_flavor_t flavor);
162 static __inline__ boolean_t _call_dequeue(thread_call_t call, thread_call_group_t group);
163 static __inline__ void thread_call_wake(thread_call_group_t group);
164 static void thread_call_daemon(void *arg);
165 static void thread_call_thread(thread_call_group_t group, wait_result_t wres);
166 static void thread_call_dealloc_timer(timer_call_param_t p0, timer_call_param_t p1);
167 static void thread_call_group_setup(thread_call_group_t group);
168 static void sched_call_thread(int type, thread_t thread);
169 static void thread_call_start_deallocate_timer(thread_call_group_t group);
170 static void thread_call_wait_locked(thread_call_t call, spl_t s);
171 static boolean_t thread_call_wait_once_locked(thread_call_t call, spl_t s);
172
173 static boolean_t thread_call_enter_delayed_internal(thread_call_t call,
174 thread_call_func_t alt_func, thread_call_param_t alt_param0,
175 thread_call_param_t param1, uint64_t deadline,
176 uint64_t leeway, unsigned int flags);
177
178 /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
179 extern void thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1);
180
181 lck_grp_t thread_call_lck_grp;
182 lck_mtx_t thread_call_lock_data;
183
184 #define thread_call_lock_spin() \
185 lck_mtx_lock_spin_always(&thread_call_lock_data)
186
187 #define thread_call_unlock() \
188 lck_mtx_unlock_always(&thread_call_lock_data)
189
190 #define tc_deadline tc_call.deadline
191
192 extern boolean_t mach_timer_coalescing_enabled;
193
194 static inline spl_t
195 disable_ints_and_lock(void)
196 {
197 spl_t s = splsched();
198 thread_call_lock_spin();
199
200 return s;
201 }
202
203 static inline void
204 enable_ints_and_unlock(spl_t s)
205 {
206 thread_call_unlock();
207 splx(s);
208 }
209
210 static inline boolean_t
211 group_isparallel(thread_call_group_t group)
212 {
213 return ((group->flags & TCG_PARALLEL) != 0);
214 }
215
216 static boolean_t
217 thread_call_group_should_add_thread(thread_call_group_t group)
218 {
219 if ((group->active_count + group->blocked_count + group->idle_count) >= THREAD_CALL_GROUP_MAX_THREADS) {
220 panic("thread_call group '%s' reached max thread cap (%d): active: %d, blocked: %d, idle: %d",
221 group->tcg_name, THREAD_CALL_GROUP_MAX_THREADS,
222 group->active_count, group->blocked_count, group->idle_count);
223 }
224
225 if (group_isparallel(group) == FALSE) {
226 if (group->pending_count > 0 && group->active_count == 0) {
227 return TRUE;
228 }
229
230 return FALSE;
231 }
232
233 if (group->pending_count > 0) {
234 if (group->idle_count > 0) {
235 return FALSE;
236 }
237
238 uint32_t thread_count = group->active_count;
239
240 /*
241 * Add a thread if either there are no threads,
242 * the group has fewer than its target number of
243 * threads, or the amount of work is large relative
244 * to the number of threads. In the last case, pay attention
245 * to the total load on the system, and back off if
246 * it's high.
247 */
248 if ((thread_count == 0) ||
249 (thread_count < group->target_thread_count) ||
250 ((group->pending_count > THREAD_CALL_ADD_RATIO * thread_count) &&
251 (sched_mach_factor < THREAD_CALL_MACH_FACTOR_CAP))) {
252 return TRUE;
253 }
254 }
255
256 return FALSE;
257 }
258
259 /* Lock held */
260 static inline thread_call_group_t
261 thread_call_get_group(thread_call_t call)
262 {
263 thread_call_index_t index = call->tc_index;
264
265 assert(index >= 0 && index < THREAD_CALL_INDEX_MAX);
266
267 return &thread_call_groups[index];
268 }
269
270 /* Lock held */
271 static inline thread_call_flavor_t
272 thread_call_get_flavor(thread_call_t call)
273 {
274 return (call->tc_flags & THREAD_CALL_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
275 }
276
277 static void
278 thread_call_group_setup(thread_call_group_t group)
279 {
280 queue_init(&group->pending_queue);
281 queue_init(&group->delayed_queues[TCF_ABSOLUTE]);
282 queue_init(&group->delayed_queues[TCF_CONTINUOUS]);
283
284 /* TODO: Consolidate to one hard timer for each group */
285 timer_call_setup(&group->delayed_timers[TCF_ABSOLUTE], thread_call_delayed_timer, group);
286 timer_call_setup(&group->delayed_timers[TCF_CONTINUOUS], thread_call_delayed_timer, group);
287 timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
288
289 /* Reverse the wait order so we re-use the most recently parked thread from the pool */
290 waitq_init(&group->idle_waitq, SYNC_POLICY_REVERSED|SYNC_POLICY_DISABLE_IRQ);
291 }
292
293 /*
294 * Simple wrapper for creating threads bound to
295 * thread call groups.
296 */
297 static kern_return_t
298 thread_call_thread_create(
299 thread_call_group_t group)
300 {
301 thread_t thread;
302 kern_return_t result;
303
304 int thread_pri = group->tcg_thread_pri;
305
306 result = kernel_thread_start_priority((thread_continue_t)thread_call_thread,
307 group, thread_pri, &thread);
308 if (result != KERN_SUCCESS) {
309 return result;
310 }
311
312 if (thread_pri <= BASEPRI_KERNEL) {
313 /*
314 * THREAD_CALL_PRIORITY_KERNEL and lower don't get to run to completion
315 * in kernel if there are higher priority threads available.
316 */
317 thread_set_eager_preempt(thread);
318 }
319
320 char name[MAXTHREADNAMESIZE] = "";
321
322 int group_thread_count = group->idle_count + group->active_count + group->blocked_count;
323
324 snprintf(name, sizeof(name), "thread call %s #%d", group->tcg_name, group_thread_count);
325 thread_set_thread_name(thread, name);
326
327 thread_deallocate(thread);
328 return KERN_SUCCESS;
329 }
330
331 /*
332 * thread_call_initialize:
333 *
334 * Initialize this module, called
335 * early during system initialization.
336 */
337 void
338 thread_call_initialize(void)
339 {
340 int tc_size = sizeof (thread_call_data_t);
341 thread_call_zone = zinit(tc_size, 4096 * tc_size, 16 * tc_size, "thread_call");
342 zone_change(thread_call_zone, Z_CALLERACCT, FALSE);
343 zone_change(thread_call_zone, Z_NOENCRYPT, TRUE);
344
345 lck_grp_init(&thread_call_lck_grp, "thread_call", LCK_GRP_ATTR_NULL);
346 lck_mtx_init(&thread_call_lock_data, &thread_call_lck_grp, LCK_ATTR_NULL);
347
348 nanotime_to_absolutetime(0, THREAD_CALL_DEALLOC_INTERVAL_NS, &thread_call_dealloc_interval_abs);
349 waitq_init(&daemon_waitq, SYNC_POLICY_DISABLE_IRQ | SYNC_POLICY_FIFO);
350
351 for (uint32_t i = 0; i < THREAD_CALL_INDEX_MAX; i++)
352 thread_call_group_setup(&thread_call_groups[i]);
353
354 spl_t s = disable_ints_and_lock();
355
356 queue_init(&thread_call_internal_queue);
357 for (
358 thread_call_t call = internal_call_storage;
359 call < &internal_call_storage[INTERNAL_CALL_COUNT];
360 call++) {
361
362 enqueue_tail(&thread_call_internal_queue, &call->tc_call.q_link);
363 thread_call_internal_queue_count++;
364 }
365
366 thread_call_daemon_awake = TRUE;
367
368 enable_ints_and_unlock(s);
369
370 thread_t thread;
371 kern_return_t result;
372
373 result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon,
374 NULL, BASEPRI_PREEMPT_HIGH + 1, &thread);
375 if (result != KERN_SUCCESS)
376 panic("thread_call_initialize");
377
378 thread_deallocate(thread);
379 }
380
381 void
382 thread_call_setup(
383 thread_call_t call,
384 thread_call_func_t func,
385 thread_call_param_t param0)
386 {
387 bzero(call, sizeof(*call));
388 call_entry_setup((call_entry_t)call, func, param0);
389
390 /* Thread calls default to the HIGH group unless otherwise specified */
391 call->tc_index = THREAD_CALL_INDEX_HIGH;
392
393 /* THREAD_CALL_ALLOC not set, memory owned by caller */
394 }
395
396 /*
397 * _internal_call_allocate:
398 *
399 * Allocate an internal callout entry.
400 *
401 * Called with thread_call_lock held.
402 */
403 static __inline__ thread_call_t
404 _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0)
405 {
406 thread_call_t call;
407
408 if (queue_empty(&thread_call_internal_queue))
409 panic("_internal_call_allocate");
410
411 call = qe_dequeue_head(&thread_call_internal_queue, struct thread_call, tc_call.q_link);
412
413 thread_call_internal_queue_count--;
414
415 thread_call_setup(call, func, param0);
416 call->tc_refs = 0;
417 call->tc_flags = 0; /* THREAD_CALL_ALLOC not set, do not free back to zone */
418
419 return (call);
420 }
421
422 /*
423 * _internal_call_release:
424 *
425 * Release an internal callout entry which
426 * is no longer pending (or delayed). This is
427 * safe to call on a non-internal entry, in which
428 * case nothing happens.
429 *
430 * Called with thread_call_lock held.
431 */
432 static __inline__ void
433 _internal_call_release(thread_call_t call)
434 {
435 if (call >= internal_call_storage &&
436 call < &internal_call_storage[INTERNAL_CALL_COUNT]) {
437 assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
438 enqueue_head(&thread_call_internal_queue, &call->tc_call.q_link);
439 thread_call_internal_queue_count++;
440 }
441 }
442
443 /*
444 * _pending_call_enqueue:
445 *
446 * Place an entry at the end of the
447 * pending queue, to be executed soon.
448 *
449 * Returns TRUE if the entry was already
450 * on a queue.
451 *
452 * Called with thread_call_lock held.
453 */
454 static __inline__ boolean_t
455 _pending_call_enqueue(thread_call_t call,
456 thread_call_group_t group)
457 {
458 if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
459 == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
460 call->tc_deadline = 0;
461
462 uint32_t flags = call->tc_flags;
463 call->tc_flags |= THREAD_CALL_RESCHEDULE;
464
465 if ((flags & THREAD_CALL_RESCHEDULE) != 0)
466 return (TRUE);
467 else
468 return (FALSE);
469 }
470
471 queue_head_t *old_queue = call_entry_enqueue_tail(CE(call), &group->pending_queue);
472
473 if (old_queue == NULL) {
474 call->tc_submit_count++;
475 } else if (old_queue != &group->pending_queue &&
476 old_queue != &group->delayed_queues[TCF_ABSOLUTE] &&
477 old_queue != &group->delayed_queues[TCF_CONTINUOUS]) {
478 panic("tried to move a thread call (%p) between groups (old_queue: %p)", call, old_queue);
479 }
480
481 group->pending_count++;
482
483 thread_call_wake(group);
484
485 return (old_queue != NULL);
486 }
487
488 /*
489 * _delayed_call_enqueue:
490 *
491 * Place an entry on the delayed queue,
492 * after existing entries with an earlier
493 * (or identical) deadline.
494 *
495 * Returns TRUE if the entry was already
496 * on a queue.
497 *
498 * Called with thread_call_lock held.
499 */
500 static boolean_t
501 _delayed_call_enqueue(
502 thread_call_t call,
503 thread_call_group_t group,
504 uint64_t deadline,
505 thread_call_flavor_t flavor)
506 {
507 if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
508 == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
509 call->tc_deadline = deadline;
510
511 uint32_t flags = call->tc_flags;
512 call->tc_flags |= THREAD_CALL_RESCHEDULE;
513
514 if ((flags & THREAD_CALL_RESCHEDULE) != 0)
515 return (TRUE);
516 else
517 return (FALSE);
518 }
519
520 queue_head_t *old_queue = call_entry_enqueue_deadline(CE(call),
521 &group->delayed_queues[flavor],
522 deadline);
523
524 if (old_queue == &group->pending_queue) {
525 group->pending_count--;
526 } else if (old_queue == NULL) {
527 call->tc_submit_count++;
528 } else if (old_queue == &group->delayed_queues[TCF_ABSOLUTE] ||
529 old_queue == &group->delayed_queues[TCF_CONTINUOUS]) {
530 /* TODO: if it's in the other delayed queue, that might not be OK */
531 // we did nothing, and that's fine
532 } else {
533 panic("tried to move a thread call (%p) between groups (old_queue: %p)", call, old_queue);
534 }
535
536 return (old_queue != NULL);
537 }
538
539 /*
540 * _call_dequeue:
541 *
542 * Remove an entry from a queue.
543 *
544 * Returns TRUE if the entry was on a queue.
545 *
546 * Called with thread_call_lock held.
547 */
548 static __inline__ boolean_t
549 _call_dequeue(
550 thread_call_t call,
551 thread_call_group_t group)
552 {
553 queue_head_t *old_queue;
554
555 old_queue = call_entry_dequeue(CE(call));
556
557 if (old_queue != NULL) {
558 assert(old_queue == &group->pending_queue ||
559 old_queue == &group->delayed_queues[TCF_ABSOLUTE] ||
560 old_queue == &group->delayed_queues[TCF_CONTINUOUS]);
561
562 call->tc_finish_count++;
563 if (old_queue == &group->pending_queue)
564 group->pending_count--;
565 }
566
567 return (old_queue != NULL);
568 }
569
570 /*
571 * _arm_delayed_call_timer:
572 *
573 * Check if the timer needs to be armed for this flavor,
574 * and if so, arm it.
575 *
576 * If call is non-NULL, only re-arm the timer if the specified call
577 * is the first in the queue.
578 *
579 * Returns true if the timer was armed/re-armed, false if it was left unset
580 * Caller should cancel the timer if need be.
581 *
582 * Called with thread_call_lock held.
583 */
584 static bool
585 _arm_delayed_call_timer(thread_call_t new_call,
586 thread_call_group_t group,
587 thread_call_flavor_t flavor)
588 {
589 /* No calls implies no timer needed */
590 if (queue_empty(&group->delayed_queues[flavor]))
591 return false;
592
593 thread_call_t call = qe_queue_first(&group->delayed_queues[flavor], struct thread_call, tc_call.q_link);
594
595 /* We only need to change the hard timer if this new call is the first in the list */
596 if (new_call != NULL && new_call != call)
597 return false;
598
599 assert((call->tc_soft_deadline != 0) && ((call->tc_soft_deadline <= call->tc_call.deadline)));
600
601 uint64_t fire_at = call->tc_soft_deadline;
602
603 if (flavor == TCF_CONTINUOUS) {
604 assert((call->tc_flags & THREAD_CALL_CONTINUOUS) == THREAD_CALL_CONTINUOUS);
605 fire_at = continuoustime_to_absolutetime(fire_at);
606 } else {
607 assert((call->tc_flags & THREAD_CALL_CONTINUOUS) == 0);
608 }
609
610 /*
611 * Note: This picks the soonest-deadline call's leeway as the hard timer's leeway,
612 * which does not take into account later-deadline timers with a larger leeway.
613 * This is a valid coalescing behavior, but masks a possible window to
614 * fire a timer instead of going idle.
615 */
616 uint64_t leeway = call->tc_call.deadline - call->tc_soft_deadline;
617
618 timer_call_enter_with_leeway(&group->delayed_timers[flavor], (timer_call_param_t)flavor,
619 fire_at, leeway,
620 TIMER_CALL_SYS_CRITICAL|TIMER_CALL_LEEWAY,
621 ((call->tc_flags & THREAD_CALL_RATELIMITED) == THREAD_CALL_RATELIMITED));
622
623 return true;
624 }
625
626 /*
627 * _cancel_func_from_queue:
628 *
629 * Remove the first (or all) matching
630 * entries from the specified queue.
631 *
632 * Returns TRUE if any matching entries
633 * were found.
634 *
635 * Called with thread_call_lock held.
636 */
637 static boolean_t
638 _cancel_func_from_queue(thread_call_func_t func,
639 thread_call_param_t param0,
640 thread_call_group_t group,
641 boolean_t remove_all,
642 queue_head_t *queue)
643 {
644 boolean_t call_removed = FALSE;
645 thread_call_t call;
646
647 qe_foreach_element_safe(call, queue, tc_call.q_link) {
648 if (call->tc_call.func != func ||
649 call->tc_call.param0 != param0) {
650 continue;
651 }
652
653 _call_dequeue(call, group);
654
655 _internal_call_release(call);
656
657 call_removed = TRUE;
658 if (!remove_all)
659 break;
660 }
661
662 return (call_removed);
663 }
664
665 /*
666 * thread_call_func_delayed:
667 *
668 * Enqueue a function callout to
669 * occur at the stated time.
670 */
671 void
672 thread_call_func_delayed(
673 thread_call_func_t func,
674 thread_call_param_t param,
675 uint64_t deadline)
676 {
677 (void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, 0, 0);
678 }
679
680 /*
681 * thread_call_func_delayed_with_leeway:
682 *
683 * Same as thread_call_func_delayed(), but with
684 * leeway/flags threaded through.
685 */
686
687 void
688 thread_call_func_delayed_with_leeway(
689 thread_call_func_t func,
690 thread_call_param_t param,
691 uint64_t deadline,
692 uint64_t leeway,
693 uint32_t flags)
694 {
695 (void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, leeway, flags);
696 }
697
698 /*
699 * thread_call_func_cancel:
700 *
701 * Dequeue a function callout.
702 *
703 * Removes one (or all) { function, argument }
704 * instance(s) from either (or both)
705 * the pending and the delayed queue,
706 * in that order.
707 *
708 * Returns TRUE if any calls were cancelled.
709 *
710 * This iterates all of the pending or delayed thread calls in the group,
711 * which is really inefficient. Switch to an allocated thread call instead.
712 */
713 boolean_t
714 thread_call_func_cancel(
715 thread_call_func_t func,
716 thread_call_param_t param,
717 boolean_t cancel_all)
718 {
719 boolean_t result;
720
721 assert(func != NULL);
722
723 spl_t s = disable_ints_and_lock();
724
725 /* Function-only thread calls are only kept in the default HIGH group */
726 thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
727
728 if (cancel_all) {
729 /* exhaustively search every queue, and return true if any search found something */
730 result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) |
731 _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE]) |
732 _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
733 } else {
734 /* early-exit as soon as we find something, don't search other queues */
735 result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) ||
736 _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE]) ||
737 _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
738 }
739
740 enable_ints_and_unlock(s);
741
742 return (result);
743 }
744
745 /*
746 * Allocate a thread call with a given priority. Importances other than
747 * THREAD_CALL_PRIORITY_HIGH or THREAD_CALL_PRIORITY_KERNEL_HIGH will be run in threads
748 * with eager preemption enabled (i.e. may be aggressively preempted by higher-priority
749 * threads which are not in the normal "urgent" bands).
750 */
751 thread_call_t
752 thread_call_allocate_with_priority(
753 thread_call_func_t func,
754 thread_call_param_t param0,
755 thread_call_priority_t pri)
756 {
757 return thread_call_allocate_with_options(func, param0, pri, 0);
758 }
759
760 thread_call_t
761 thread_call_allocate_with_options(
762 thread_call_func_t func,
763 thread_call_param_t param0,
764 thread_call_priority_t pri,
765 thread_call_options_t options)
766 {
767 thread_call_t call = thread_call_allocate(func, param0);
768
769 switch (pri) {
770 case THREAD_CALL_PRIORITY_HIGH:
771 call->tc_index = THREAD_CALL_INDEX_HIGH;
772 break;
773 case THREAD_CALL_PRIORITY_KERNEL:
774 call->tc_index = THREAD_CALL_INDEX_KERNEL;
775 break;
776 case THREAD_CALL_PRIORITY_USER:
777 call->tc_index = THREAD_CALL_INDEX_USER;
778 break;
779 case THREAD_CALL_PRIORITY_LOW:
780 call->tc_index = THREAD_CALL_INDEX_LOW;
781 break;
782 case THREAD_CALL_PRIORITY_KERNEL_HIGH:
783 call->tc_index = THREAD_CALL_INDEX_KERNEL_HIGH;
784 break;
785 default:
786 panic("Invalid thread call pri value: %d", pri);
787 break;
788 }
789
790 if (options & THREAD_CALL_OPTIONS_ONCE) {
791 call->tc_flags |= THREAD_CALL_ONCE;
792 }
793 if (options & THREAD_CALL_OPTIONS_SIGNAL) {
794 call->tc_flags |= THREAD_CALL_SIGNAL | THREAD_CALL_ONCE;
795 }
796
797 return call;
798 }
799
800 thread_call_t
801 thread_call_allocate_with_qos(thread_call_func_t func,
802 thread_call_param_t param0,
803 int qos_tier,
804 thread_call_options_t options)
805 {
806 thread_call_t call = thread_call_allocate(func, param0);
807
808 switch (qos_tier) {
809 case THREAD_QOS_UNSPECIFIED:
810 call->tc_index = THREAD_CALL_INDEX_HIGH;
811 break;
812 case THREAD_QOS_LEGACY:
813 call->tc_index = THREAD_CALL_INDEX_USER;
814 break;
815 case THREAD_QOS_MAINTENANCE:
816 case THREAD_QOS_BACKGROUND:
817 call->tc_index = THREAD_CALL_INDEX_LOW;
818 break;
819 case THREAD_QOS_UTILITY:
820 call->tc_index = THREAD_CALL_INDEX_QOS_UT;
821 break;
822 case THREAD_QOS_USER_INITIATED:
823 call->tc_index = THREAD_CALL_INDEX_QOS_IN;
824 break;
825 case THREAD_QOS_USER_INTERACTIVE:
826 call->tc_index = THREAD_CALL_INDEX_QOS_UI;
827 break;
828 default:
829 panic("Invalid thread call qos value: %d", qos_tier);
830 break;
831 }
832
833 if (options & THREAD_CALL_OPTIONS_ONCE)
834 call->tc_flags |= THREAD_CALL_ONCE;
835
836 /* does not support THREAD_CALL_OPTIONS_SIGNAL */
837
838 return call;
839 }
840
841
842 /*
843 * thread_call_allocate:
844 *
845 * Allocate a callout entry.
846 */
847 thread_call_t
848 thread_call_allocate(
849 thread_call_func_t func,
850 thread_call_param_t param0)
851 {
852 thread_call_t call = zalloc(thread_call_zone);
853
854 thread_call_setup(call, func, param0);
855 call->tc_refs = 1;
856 call->tc_flags = THREAD_CALL_ALLOC;
857
858 return (call);
859 }
860
861 /*
862 * thread_call_free:
863 *
864 * Release a callout. If the callout is currently
865 * executing, it will be freed when all invocations
866 * finish.
867 *
868 * If the callout is currently armed to fire again, then
869 * freeing is not allowed and returns FALSE. The
870 * client must have canceled the pending invocation before freeing.
871 */
872 boolean_t
873 thread_call_free(
874 thread_call_t call)
875 {
876 spl_t s = disable_ints_and_lock();
877
878 if (call->tc_call.queue != NULL ||
879 ((call->tc_flags & THREAD_CALL_RESCHEDULE) != 0)) {
880 thread_call_unlock();
881 splx(s);
882
883 return (FALSE);
884 }
885
886 int32_t refs = --call->tc_refs;
887 if (refs < 0) {
888 panic("Refcount negative: %d\n", refs);
889 }
890
891 if ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING)
892 == ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING) & call->tc_flags)) {
893 thread_call_wait_once_locked(call, s);
894 /* thread call lock has been unlocked */
895 } else {
896 enable_ints_and_unlock(s);
897 }
898
899 if (refs == 0) {
900 assert(call->tc_finish_count == call->tc_submit_count);
901 zfree(thread_call_zone, call);
902 }
903
904 return (TRUE);
905 }
906
907 /*
908 * thread_call_enter:
909 *
910 * Enqueue a callout entry to occur "soon".
911 *
912 * Returns TRUE if the call was
913 * already on a queue.
914 */
915 boolean_t
916 thread_call_enter(
917 thread_call_t call)
918 {
919 return thread_call_enter1(call, 0);
920 }
921
922 boolean_t
923 thread_call_enter1(
924 thread_call_t call,
925 thread_call_param_t param1)
926 {
927 boolean_t result = TRUE;
928 thread_call_group_t group;
929
930 assert(call->tc_call.func != NULL);
931
932 assert((call->tc_flags & THREAD_CALL_SIGNAL) == 0);
933
934 group = thread_call_get_group(call);
935
936 spl_t s = disable_ints_and_lock();
937
938 if (call->tc_call.queue != &group->pending_queue) {
939 result = _pending_call_enqueue(call, group);
940 }
941
942 call->tc_call.param1 = param1;
943
944 enable_ints_and_unlock(s);
945
946 return (result);
947 }
948
949 /*
950 * thread_call_enter_delayed:
951 *
952 * Enqueue a callout entry to occur
953 * at the stated time.
954 *
955 * Returns TRUE if the call was
956 * already on a queue.
957 */
958 boolean_t
959 thread_call_enter_delayed(
960 thread_call_t call,
961 uint64_t deadline)
962 {
963 assert(call != NULL);
964 return thread_call_enter_delayed_internal(call, NULL, 0, 0, deadline, 0, 0);
965 }
966
967 boolean_t
968 thread_call_enter1_delayed(
969 thread_call_t call,
970 thread_call_param_t param1,
971 uint64_t deadline)
972 {
973 assert(call != NULL);
974 return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, 0, 0);
975 }
976
977 boolean_t
978 thread_call_enter_delayed_with_leeway(
979 thread_call_t call,
980 thread_call_param_t param1,
981 uint64_t deadline,
982 uint64_t leeway,
983 unsigned int flags)
984 {
985 assert(call != NULL);
986 return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, leeway, flags);
987 }
988
989
990 /*
991 * thread_call_enter_delayed_internal:
992 * enqueue a callout entry to occur at the stated time
993 *
994 * Returns True if the call was already on a queue
995 * params:
996 * call - structure encapsulating state of the callout
997 * alt_func/alt_param0 - if call is NULL, allocate temporary storage using these parameters
998 * deadline - time deadline in nanoseconds
999 * leeway - timer slack represented as delta of deadline.
1000 * flags - THREAD_CALL_DELAY_XXX : classification of caller's desires wrt timer coalescing.
1001 * THREAD_CALL_DELAY_LEEWAY : value in leeway is used for timer coalescing.
1002 * THREAD_CALL_CONTINUOUS: thread call will be called according to mach_continuous_time rather
1003 * than mach_absolute_time
1004 */
1005 boolean_t
1006 thread_call_enter_delayed_internal(
1007 thread_call_t call,
1008 thread_call_func_t alt_func,
1009 thread_call_param_t alt_param0,
1010 thread_call_param_t param1,
1011 uint64_t deadline,
1012 uint64_t leeway,
1013 unsigned int flags)
1014 {
1015 boolean_t result = TRUE;
1016 thread_call_group_t group;
1017 uint64_t now, sdeadline, slop;
1018 uint32_t urgency;
1019
1020 thread_call_flavor_t flavor = (flags & THREAD_CALL_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
1021
1022 /* direct mapping between thread_call, timer_call, and timeout_urgency values */
1023 urgency = (flags & TIMEOUT_URGENCY_MASK);
1024
1025 spl_t s = disable_ints_and_lock();
1026
1027 if (call == NULL) {
1028 /* allocate a structure out of internal storage, as a convenience for BSD callers */
1029 call = _internal_call_allocate(alt_func, alt_param0);
1030 }
1031
1032 assert(call->tc_call.func != NULL);
1033 group = thread_call_get_group(call);
1034
1035 /* TODO: assert that call is not enqueued before flipping the flag */
1036 if (flavor == TCF_CONTINUOUS) {
1037 now = mach_continuous_time();
1038 call->tc_flags |= THREAD_CALL_CONTINUOUS;
1039 } else {
1040 now = mach_absolute_time();
1041 call->tc_flags &= ~THREAD_CALL_CONTINUOUS;
1042 }
1043
1044 call->tc_flags |= THREAD_CALL_DELAYED;
1045
1046 call->tc_soft_deadline = sdeadline = deadline;
1047
1048 boolean_t ratelimited = FALSE;
1049 slop = timer_call_slop(deadline, now, urgency, current_thread(), &ratelimited);
1050
1051 if ((flags & THREAD_CALL_DELAY_LEEWAY) != 0 && leeway > slop)
1052 slop = leeway;
1053
1054 if (UINT64_MAX - deadline <= slop)
1055 deadline = UINT64_MAX;
1056 else
1057 deadline += slop;
1058
1059 if (ratelimited) {
1060 call->tc_flags |= TIMER_CALL_RATELIMITED;
1061 } else {
1062 call->tc_flags &= ~TIMER_CALL_RATELIMITED;
1063 }
1064
1065 call->tc_call.param1 = param1;
1066
1067 call->tc_ttd = (sdeadline > now) ? (sdeadline - now) : 0;
1068
1069 result = _delayed_call_enqueue(call, group, deadline, flavor);
1070
1071 _arm_delayed_call_timer(call, group, flavor);
1072
1073 #if CONFIG_DTRACE
1074 DTRACE_TMR5(thread_callout__create, thread_call_func_t, call->tc_call.func,
1075 uint64_t, (deadline - sdeadline), uint64_t, (call->tc_ttd >> 32),
1076 (unsigned) (call->tc_ttd & 0xFFFFFFFF), call);
1077 #endif
1078
1079 enable_ints_and_unlock(s);
1080
1081 return (result);
1082 }
1083
1084 /*
1085 * Remove a callout entry from the queue
1086 * Called with thread_call_lock held
1087 */
1088 static boolean_t
1089 thread_call_cancel_locked(thread_call_t call)
1090 {
1091 boolean_t canceled = (0 != (THREAD_CALL_RESCHEDULE & call->tc_flags));
1092 call->tc_flags &= ~THREAD_CALL_RESCHEDULE;
1093
1094 if (canceled) {
1095 /* if reschedule was set, it must not have been queued */
1096 assert(call->tc_call.queue == NULL);
1097 } else {
1098 boolean_t do_cancel_callout = FALSE;
1099
1100 thread_call_flavor_t flavor = thread_call_get_flavor(call);
1101 thread_call_group_t group = thread_call_get_group(call);
1102
1103 if ((call->tc_call.deadline != 0) &&
1104 (call == qe_queue_first(&group->delayed_queues[flavor], struct thread_call, tc_call.q_link))) {
1105 assert(call->tc_call.queue == &group->delayed_queues[flavor]);
1106 do_cancel_callout = TRUE;
1107 }
1108
1109 canceled = _call_dequeue(call, group);
1110
1111 if (do_cancel_callout) {
1112 if (_arm_delayed_call_timer(NULL, group, flavor) == false)
1113 timer_call_cancel(&group->delayed_timers[flavor]);
1114 }
1115 }
1116
1117 #if CONFIG_DTRACE
1118 DTRACE_TMR4(thread_callout__cancel, thread_call_func_t, call->tc_call.func,
1119 0, (call->tc_ttd >> 32), (unsigned) (call->tc_ttd & 0xFFFFFFFF));
1120 #endif
1121
1122 return canceled;
1123 }
1124
1125 /*
1126 * thread_call_cancel:
1127 *
1128 * Dequeue a callout entry.
1129 *
1130 * Returns TRUE if the call was
1131 * on a queue.
1132 */
1133 boolean_t
1134 thread_call_cancel(thread_call_t call)
1135 {
1136 spl_t s = disable_ints_and_lock();
1137
1138 boolean_t result = thread_call_cancel_locked(call);
1139
1140 enable_ints_and_unlock(s);
1141
1142 return result;
1143 }
1144
1145 /*
1146 * Cancel a thread call. If it cannot be cancelled (i.e.
1147 * is already in flight), waits for the most recent invocation
1148 * to finish. Note that if clients re-submit this thread call,
1149 * it may still be pending or in flight when thread_call_cancel_wait
1150 * returns, but all requests to execute this work item prior
1151 * to the call to thread_call_cancel_wait will have finished.
1152 */
1153 boolean_t
1154 thread_call_cancel_wait(thread_call_t call)
1155 {
1156 if ((call->tc_flags & THREAD_CALL_ALLOC) == 0)
1157 panic("thread_call_cancel_wait: can't wait on thread call whose storage I don't own");
1158
1159 if (!ml_get_interrupts_enabled())
1160 panic("unsafe thread_call_cancel_wait");
1161
1162 if (current_thread()->thc_state.thc_call == call)
1163 panic("thread_call_cancel_wait: deadlock waiting on self from inside call: %p to function %p",
1164 call, call->tc_call.func);
1165
1166 spl_t s = disable_ints_and_lock();
1167
1168 boolean_t canceled = thread_call_cancel_locked(call);
1169
1170 if ((call->tc_flags & THREAD_CALL_ONCE) == THREAD_CALL_ONCE) {
1171 /*
1172 * A cancel-wait on a 'once' call will both cancel
1173 * the pending call and wait for the in-flight call
1174 */
1175
1176 thread_call_wait_once_locked(call, s);
1177 /* thread call lock unlocked */
1178 } else {
1179 /*
1180 * A cancel-wait on a normal call will only wait for the in-flight calls
1181 * if it did not cancel the pending call.
1182 *
1183 * TODO: This seems less than useful - shouldn't it do the wait as well?
1184 */
1185
1186 if (canceled == FALSE) {
1187 thread_call_wait_locked(call, s);
1188 /* thread call lock unlocked */
1189 } else {
1190 enable_ints_and_unlock(s);
1191 }
1192 }
1193
1194 return canceled;
1195 }
1196
1197
1198 /*
1199 * thread_call_wake:
1200 *
1201 * Wake a call thread to service
1202 * pending call entries. May wake
1203 * the daemon thread in order to
1204 * create additional call threads.
1205 *
1206 * Called with thread_call_lock held.
1207 *
1208 * For high-priority group, only does wakeup/creation if there are no threads
1209 * running.
1210 */
1211 static __inline__ void
1212 thread_call_wake(
1213 thread_call_group_t group)
1214 {
1215 /*
1216 * New behavior: use threads if you've got 'em.
1217 * Traditional behavior: wake only if no threads running.
1218 */
1219 if (group_isparallel(group) || group->active_count == 0) {
1220 if (waitq_wakeup64_one(&group->idle_waitq, NO_EVENT64,
1221 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES) == KERN_SUCCESS) {
1222 group->idle_count--; group->active_count++;
1223
1224 if (group->idle_count == 0) {
1225 timer_call_cancel(&group->dealloc_timer);
1226 group->flags &= ~TCG_DEALLOC_ACTIVE;
1227 }
1228 } else {
1229 if (!thread_call_daemon_awake && thread_call_group_should_add_thread(group)) {
1230 thread_call_daemon_awake = TRUE;
1231 waitq_wakeup64_one(&daemon_waitq, NO_EVENT64,
1232 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1233 }
1234 }
1235 }
1236 }
1237
1238 /*
1239 * sched_call_thread:
1240 *
1241 * Call out invoked by the scheduler.
1242 */
1243 static void
1244 sched_call_thread(
1245 int type,
1246 thread_t thread)
1247 {
1248 thread_call_group_t group;
1249
1250 group = thread->thc_state.thc_group;
1251 assert((group - &thread_call_groups[0]) < THREAD_CALL_INDEX_MAX);
1252
1253 thread_call_lock_spin();
1254
1255 switch (type) {
1256
1257 case SCHED_CALL_BLOCK:
1258 assert(group->active_count);
1259 --group->active_count;
1260 group->blocked_count++;
1261 if (group->pending_count > 0)
1262 thread_call_wake(group);
1263 break;
1264
1265 case SCHED_CALL_UNBLOCK:
1266 assert(group->blocked_count);
1267 --group->blocked_count;
1268 group->active_count++;
1269 break;
1270 }
1271
1272 thread_call_unlock();
1273 }
1274
1275 /*
1276 * Interrupts disabled, lock held; returns the same way.
1277 * Only called on thread calls whose storage we own. Wakes up
1278 * anyone who might be waiting on this work item and frees it
1279 * if the client has so requested.
1280 */
1281 static boolean_t
1282 thread_call_finish(thread_call_t call, thread_call_group_t group, spl_t *s)
1283 {
1284 uint64_t time;
1285 uint32_t flags;
1286 boolean_t signal;
1287 boolean_t dowake = FALSE;
1288 boolean_t repend = FALSE;
1289
1290 call->tc_finish_count++;
1291 flags = call->tc_flags;
1292 signal = ((THREAD_CALL_SIGNAL & flags) != 0);
1293
1294 if (!signal) {
1295 /* The thread call thread owns a ref until the call is finished */
1296 if (call->tc_refs <= 0)
1297 panic("thread_call_finish: detected over-released thread call: %p", call);
1298 call->tc_refs--;
1299 }
1300
1301 call->tc_flags &= ~(THREAD_CALL_RESCHEDULE | THREAD_CALL_RUNNING | THREAD_CALL_WAIT);
1302
1303 if ((call->tc_refs != 0) && ((flags & THREAD_CALL_RESCHEDULE) != 0)) {
1304 assert(flags & THREAD_CALL_ONCE);
1305 thread_call_flavor_t flavor = thread_call_get_flavor(call);
1306
1307 if (THREAD_CALL_DELAYED & flags) {
1308 time = mach_absolute_time();
1309 if (flavor == TCF_CONTINUOUS) {
1310 time = absolutetime_to_continuoustime(time);
1311 }
1312 if (call->tc_soft_deadline <= time) {
1313 call->tc_flags &= ~(THREAD_CALL_DELAYED | TIMER_CALL_RATELIMITED);
1314 call->tc_deadline = 0;
1315 }
1316 }
1317 if (call->tc_deadline) {
1318 _delayed_call_enqueue(call, group, call->tc_deadline, flavor);
1319 if (!signal) {
1320 _arm_delayed_call_timer(call, group, flavor);
1321 }
1322 } else if (signal) {
1323 call->tc_submit_count++;
1324 repend = TRUE;
1325 } else {
1326 _pending_call_enqueue(call, group);
1327 }
1328 }
1329
1330 if ((flags & THREAD_CALL_WAIT) != 0) {
1331 dowake = TRUE;
1332
1333 /*
1334 * Dropping lock here because the sched call for the
1335 * high-pri group can take the big lock from under
1336 * a thread lock.
1337 */
1338 thread_call_unlock();
1339 thread_wakeup((event_t)call);
1340 thread_call_lock_spin();
1341 /* THREAD_CALL_SIGNAL call may have been freed */
1342 }
1343
1344 if (!signal && (call->tc_refs == 0)) {
1345 if (dowake) {
1346 panic("Someone waiting on a thread call that is scheduled for free: %p\n", call->tc_call.func);
1347 }
1348
1349 assert(call->tc_finish_count == call->tc_submit_count);
1350
1351 enable_ints_and_unlock(*s);
1352
1353 zfree(thread_call_zone, call);
1354
1355 *s = disable_ints_and_lock();
1356 }
1357
1358 return (repend);
1359 }
1360
1361 /*
1362 * thread_call_invoke
1363 *
1364 * Invoke the function provided for this thread call
1365 *
1366 * Note that the thread call object can be deallocated by the function if we do not control its storage.
1367 */
1368 static void __attribute__((noinline))
1369 thread_call_invoke(thread_call_func_t func, thread_call_param_t param0, thread_call_param_t param1, thread_call_t call)
1370 {
1371 current_thread()->thc_state.thc_call = call;
1372
1373 #if DEVELOPMENT || DEBUG
1374 KERNEL_DEBUG_CONSTANT(
1375 MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_START,
1376 VM_KERNEL_UNSLIDE(func), VM_KERNEL_ADDRHIDE(param0), VM_KERNEL_ADDRHIDE(param1), 0, 0);
1377 #endif /* DEVELOPMENT || DEBUG */
1378
1379 #if CONFIG_DTRACE
1380 uint64_t tc_ttd = call->tc_ttd;
1381 boolean_t is_delayed = call->tc_flags & THREAD_CALL_DELAYED;
1382 DTRACE_TMR6(thread_callout__start, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1383 (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1384 #endif
1385
1386 (*func)(param0, param1);
1387
1388 #if CONFIG_DTRACE
1389 DTRACE_TMR6(thread_callout__end, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1390 (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1391 #endif
1392
1393 #if DEVELOPMENT || DEBUG
1394 KERNEL_DEBUG_CONSTANT(
1395 MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_END,
1396 VM_KERNEL_UNSLIDE(func), 0, 0, 0, 0);
1397 #endif /* DEVELOPMENT || DEBUG */
1398
1399 current_thread()->thc_state.thc_call = NULL;
1400 }
1401
1402 /*
1403 * thread_call_thread:
1404 */
1405 static void
1406 thread_call_thread(
1407 thread_call_group_t group,
1408 wait_result_t wres)
1409 {
1410 thread_t self = current_thread();
1411 boolean_t canwait;
1412
1413 if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0)
1414 (void)thread_set_tag_internal(self, THREAD_TAG_CALLOUT);
1415
1416 /*
1417 * A wakeup with THREAD_INTERRUPTED indicates that
1418 * we should terminate.
1419 */
1420 if (wres == THREAD_INTERRUPTED) {
1421 thread_terminate(self);
1422
1423 /* NOTREACHED */
1424 panic("thread_terminate() returned?");
1425 }
1426
1427 spl_t s = disable_ints_and_lock();
1428
1429 self->thc_state.thc_group = group;
1430 thread_sched_call(self, sched_call_thread);
1431
1432 while (group->pending_count > 0) {
1433 thread_call_t call;
1434 thread_call_func_t func;
1435 thread_call_param_t param0, param1;
1436
1437 call = qe_dequeue_head(&group->pending_queue, struct thread_call, tc_call.q_link);
1438 assert(call != NULL);
1439 group->pending_count--;
1440
1441 func = call->tc_call.func;
1442 param0 = call->tc_call.param0;
1443 param1 = call->tc_call.param1;
1444
1445 call->tc_call.queue = NULL;
1446
1447 _internal_call_release(call);
1448
1449 /*
1450 * Can only do wakeups for thread calls whose storage
1451 * we control.
1452 */
1453 if ((call->tc_flags & THREAD_CALL_ALLOC) != 0) {
1454 canwait = TRUE;
1455 call->tc_flags |= THREAD_CALL_RUNNING;
1456 call->tc_refs++; /* Delay free until we're done */
1457 } else
1458 canwait = FALSE;
1459
1460 enable_ints_and_unlock(s);
1461
1462 thread_call_invoke(func, param0, param1, call);
1463
1464 if (get_preemption_level() != 0) {
1465 int pl = get_preemption_level();
1466 panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
1467 pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
1468 }
1469
1470 s = disable_ints_and_lock();
1471
1472 if (canwait) {
1473 /* Frees if so desired */
1474 thread_call_finish(call, group, &s);
1475 }
1476 }
1477
1478 thread_sched_call(self, NULL);
1479 group->active_count--;
1480
1481 if (self->callout_woken_from_icontext && !self->callout_woke_thread) {
1482 ledger_credit(self->t_ledger, task_ledgers.interrupt_wakeups, 1);
1483 if (self->callout_woken_from_platform_idle)
1484 ledger_credit(self->t_ledger, task_ledgers.platform_idle_wakeups, 1);
1485 }
1486
1487 self->callout_woken_from_icontext = FALSE;
1488 self->callout_woken_from_platform_idle = FALSE;
1489 self->callout_woke_thread = FALSE;
1490
1491 if (group_isparallel(group)) {
1492 /*
1493 * For new style of thread group, thread always blocks.
1494 * If we have more than the target number of threads,
1495 * and this is the first to block, and it isn't active
1496 * already, set a timer for deallocating a thread if we
1497 * continue to have a surplus.
1498 */
1499 group->idle_count++;
1500
1501 if (group->idle_count == 1) {
1502 group->idle_timestamp = mach_absolute_time();
1503 }
1504
1505 if (((group->flags & TCG_DEALLOC_ACTIVE) == 0) &&
1506 ((group->active_count + group->idle_count) > group->target_thread_count)) {
1507 group->flags |= TCG_DEALLOC_ACTIVE;
1508 thread_call_start_deallocate_timer(group);
1509 }
1510
1511 /* Wait for more work (or termination) */
1512 wres = waitq_assert_wait64(&group->idle_waitq, NO_EVENT64, THREAD_INTERRUPTIBLE, 0);
1513 if (wres != THREAD_WAITING) {
1514 panic("kcall worker unable to assert wait?");
1515 }
1516
1517 enable_ints_and_unlock(s);
1518
1519 thread_block_parameter((thread_continue_t)thread_call_thread, group);
1520 } else {
1521 if (group->idle_count < group->target_thread_count) {
1522 group->idle_count++;
1523
1524 waitq_assert_wait64(&group->idle_waitq, NO_EVENT64, THREAD_UNINT, 0); /* Interrupted means to exit */
1525
1526 enable_ints_and_unlock(s);
1527
1528 thread_block_parameter((thread_continue_t)thread_call_thread, group);
1529 /* NOTREACHED */
1530 }
1531 }
1532
1533 enable_ints_and_unlock(s);
1534
1535 thread_terminate(self);
1536 /* NOTREACHED */
1537 }
1538
1539 /*
1540 * thread_call_daemon: walk list of groups, allocating
1541 * threads if appropriate (as determined by
1542 * thread_call_group_should_add_thread()).
1543 */
1544 static void
1545 thread_call_daemon_continue(__unused void *arg)
1546 {
1547 spl_t s = disable_ints_and_lock();
1548
1549 /* Starting at zero happens to be high-priority first. */
1550 for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
1551 thread_call_group_t group = &thread_call_groups[i];
1552 while (thread_call_group_should_add_thread(group)) {
1553 group->active_count++;
1554
1555 enable_ints_and_unlock(s);
1556
1557 kern_return_t kr = thread_call_thread_create(group);
1558 if (kr != KERN_SUCCESS) {
1559 /*
1560 * On failure, just pause for a moment and give up.
1561 * We can try again later.
1562 */
1563 delay(10000); /* 10 ms */
1564 s = disable_ints_and_lock();
1565 goto out;
1566 }
1567
1568 s = disable_ints_and_lock();
1569 }
1570 }
1571
1572 out:
1573 thread_call_daemon_awake = FALSE;
1574 waitq_assert_wait64(&daemon_waitq, NO_EVENT64, THREAD_UNINT, 0);
1575
1576 enable_ints_and_unlock(s);
1577
1578 thread_block_parameter((thread_continue_t)thread_call_daemon_continue, NULL);
1579 /* NOTREACHED */
1580 }
1581
1582 static void
1583 thread_call_daemon(
1584 __unused void *arg)
1585 {
1586 thread_t self = current_thread();
1587
1588 self->options |= TH_OPT_VMPRIV;
1589 vm_page_free_reserve(2); /* XXX */
1590
1591 thread_set_thread_name(self, "thread_call_daemon");
1592
1593 thread_call_daemon_continue(NULL);
1594 /* NOTREACHED */
1595 }
1596
1597 /*
1598 * Schedule timer to deallocate a worker thread if we have a surplus
1599 * of threads (in excess of the group's target) and at least one thread
1600 * is idle the whole time.
1601 */
1602 static void
1603 thread_call_start_deallocate_timer(
1604 thread_call_group_t group)
1605 {
1606 uint64_t deadline;
1607 boolean_t onqueue;
1608
1609 assert(group->idle_count > 0);
1610
1611 group->flags |= TCG_DEALLOC_ACTIVE;
1612 deadline = group->idle_timestamp + thread_call_dealloc_interval_abs;
1613 onqueue = timer_call_enter(&group->dealloc_timer, deadline, 0);
1614
1615 if (onqueue) {
1616 panic("Deallocate timer already active?");
1617 }
1618 }
1619
1620 /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
1621 void
1622 thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1)
1623 {
1624 thread_call_group_t group = (thread_call_group_t) p0;
1625 thread_call_flavor_t flavor = (thread_call_flavor_t) p1;
1626
1627 thread_call_t call;
1628 uint64_t now;
1629 boolean_t restart;
1630 boolean_t repend;
1631
1632 thread_call_lock_spin();
1633
1634 if (flavor == TCF_CONTINUOUS)
1635 now = mach_continuous_time();
1636 else if (flavor == TCF_ABSOLUTE)
1637 now = mach_absolute_time();
1638 else
1639 panic("invalid timer flavor: %d", flavor);
1640
1641 do {
1642 restart = FALSE;
1643 qe_foreach_element_safe(call, &group->delayed_queues[flavor], tc_call.q_link) {
1644 if (flavor == TCF_CONTINUOUS)
1645 assert((call->tc_flags & THREAD_CALL_CONTINUOUS) == THREAD_CALL_CONTINUOUS);
1646 else
1647 assert((call->tc_flags & THREAD_CALL_CONTINUOUS) == 0);
1648
1649 /*
1650 * if we hit a call that isn't yet ready to expire,
1651 * then we're done for now
1652 * TODO: The next timer in the list could have a larger leeway
1653 * and therefore be ready to expire.
1654 * Sort by deadline then by soft deadline to avoid this
1655 */
1656 if (call->tc_soft_deadline > now)
1657 break;
1658
1659 /*
1660 * If we hit a rate-limited timer, don't eagerly wake it up.
1661 * Wait until it reaches the end of the leeway window.
1662 *
1663 * TODO: What if the next timer is not rate-limited?
1664 * Have a separate rate-limited queue to avoid this
1665 */
1666 if ((call->tc_flags & THREAD_CALL_RATELIMITED) &&
1667 (call->tc_call.deadline > now) &&
1668 (ml_timer_forced_evaluation() == FALSE)) {
1669 break;
1670 }
1671
1672 if (THREAD_CALL_SIGNAL & call->tc_flags) {
1673 __assert_only queue_head_t *old_queue;
1674 old_queue = call_entry_dequeue(&call->tc_call);
1675 assert(old_queue == &group->delayed_queues[flavor]);
1676
1677 do {
1678 thread_call_func_t func = call->tc_call.func;
1679 thread_call_param_t param0 = call->tc_call.param0;
1680 thread_call_param_t param1 = call->tc_call.param1;
1681
1682 call->tc_flags |= THREAD_CALL_RUNNING;
1683 thread_call_unlock();
1684 thread_call_invoke(func, param0, param1, call);
1685 thread_call_lock_spin();
1686
1687 repend = thread_call_finish(call, group, NULL);
1688 } while (repend);
1689
1690 /* call may have been freed */
1691 restart = TRUE;
1692 break;
1693 } else {
1694 _pending_call_enqueue(call, group);
1695 }
1696 }
1697 } while (restart);
1698
1699 _arm_delayed_call_timer(call, group, flavor);
1700
1701 thread_call_unlock();
1702 }
1703
1704 static void
1705 thread_call_delayed_timer_rescan(thread_call_group_t group,
1706 thread_call_flavor_t flavor)
1707 {
1708 thread_call_t call;
1709 uint64_t now;
1710
1711 spl_t s = disable_ints_and_lock();
1712
1713 assert(ml_timer_forced_evaluation() == TRUE);
1714
1715 if (flavor == TCF_CONTINUOUS) {
1716 now = mach_continuous_time();
1717 } else {
1718 now = mach_absolute_time();
1719 }
1720
1721 qe_foreach_element_safe(call, &group->delayed_queues[flavor], tc_call.q_link) {
1722 if (call->tc_soft_deadline <= now) {
1723 _pending_call_enqueue(call, group);
1724 } else {
1725 uint64_t skew = call->tc_call.deadline - call->tc_soft_deadline;
1726 assert (call->tc_call.deadline >= call->tc_soft_deadline);
1727 /*
1728 * On a latency quality-of-service level change,
1729 * re-sort potentially rate-limited callout. The platform
1730 * layer determines which timers require this.
1731 */
1732 if (timer_resort_threshold(skew)) {
1733 _call_dequeue(call, group);
1734 _delayed_call_enqueue(call, group, call->tc_soft_deadline, flavor);
1735 }
1736 }
1737 }
1738
1739 _arm_delayed_call_timer(NULL, group, flavor);
1740
1741 enable_ints_and_unlock(s);
1742 }
1743
1744 void
1745 thread_call_delayed_timer_rescan_all(void) {
1746 for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
1747 thread_call_delayed_timer_rescan(&thread_call_groups[i], TCF_ABSOLUTE);
1748 thread_call_delayed_timer_rescan(&thread_call_groups[i], TCF_CONTINUOUS);
1749 }
1750 }
1751
1752 /*
1753 * Timer callback to tell a thread to terminate if
1754 * we have an excess of threads and at least one has been
1755 * idle for a long time.
1756 */
1757 static void
1758 thread_call_dealloc_timer(
1759 timer_call_param_t p0,
1760 __unused timer_call_param_t p1)
1761 {
1762 thread_call_group_t group = (thread_call_group_t)p0;
1763 uint64_t now;
1764 kern_return_t res;
1765 boolean_t terminated = FALSE;
1766
1767 thread_call_lock_spin();
1768
1769 now = mach_absolute_time();
1770 if (group->idle_count > 0) {
1771 if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
1772 terminated = TRUE;
1773 group->idle_count--;
1774 res = waitq_wakeup64_one(&group->idle_waitq, NO_EVENT64,
1775 THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES);
1776 if (res != KERN_SUCCESS) {
1777 panic("Unable to wake up idle thread for termination?");
1778 }
1779 }
1780
1781 }
1782
1783 /*
1784 * If we still have an excess of threads, schedule another
1785 * invocation of this function.
1786 */
1787 if (group->idle_count > 0 && (group->idle_count + group->active_count > group->target_thread_count)) {
1788 /*
1789 * If we killed someone just now, push out the
1790 * next deadline.
1791 */
1792 if (terminated) {
1793 group->idle_timestamp = now;
1794 }
1795
1796 thread_call_start_deallocate_timer(group);
1797 } else {
1798 group->flags &= ~TCG_DEALLOC_ACTIVE;
1799 }
1800
1801 thread_call_unlock();
1802 }
1803
1804 /*
1805 * Wait for the invocation of the thread call to complete
1806 * We know there's only one in flight because of the 'once' flag.
1807 *
1808 * If a subsequent invocation comes in before we wake up, that's OK
1809 *
1810 * TODO: Here is where we will add priority inheritance to the thread executing
1811 * the thread call in case it's lower priority than the current thread
1812 * <rdar://problem/30321792> Priority inheritance for thread_call_wait_once
1813 *
1814 * Takes the thread call lock locked, returns unlocked
1815 * This lets us avoid a spurious take/drop after waking up from thread_block
1816 */
1817 static boolean_t
1818 thread_call_wait_once_locked(thread_call_t call, spl_t s)
1819 {
1820 assert(call->tc_flags & THREAD_CALL_ALLOC);
1821 assert(call->tc_flags & THREAD_CALL_ONCE);
1822
1823 if ((call->tc_flags & THREAD_CALL_RUNNING) == 0) {
1824 enable_ints_and_unlock(s);
1825 return FALSE;
1826 }
1827
1828 /* call is running, so we have to wait for it */
1829 call->tc_flags |= THREAD_CALL_WAIT;
1830
1831 wait_result_t res = assert_wait(call, THREAD_UNINT);
1832 if (res != THREAD_WAITING)
1833 panic("Unable to assert wait: %d", res);
1834
1835 enable_ints_and_unlock(s);
1836
1837 res = thread_block(THREAD_CONTINUE_NULL);
1838 if (res != THREAD_AWAKENED)
1839 panic("Awoken with %d?", res);
1840
1841 /* returns unlocked */
1842 return TRUE;
1843 }
1844
1845 /*
1846 * Wait for an in-flight invocation to complete
1847 * Does NOT try to cancel, so the client doesn't need to hold their
1848 * lock while calling this function.
1849 *
1850 * Returns whether or not it had to wait.
1851 *
1852 * Only works for THREAD_CALL_ONCE calls.
1853 */
1854 boolean_t
1855 thread_call_wait_once(thread_call_t call)
1856 {
1857 if ((call->tc_flags & THREAD_CALL_ALLOC) == 0)
1858 panic("thread_call_wait_once: can't wait on thread call whose storage I don't own");
1859
1860 if ((call->tc_flags & THREAD_CALL_ONCE) == 0)
1861 panic("thread_call_wait_once: can't wait_once on a non-once call");
1862
1863 if (!ml_get_interrupts_enabled())
1864 panic("unsafe thread_call_wait_once");
1865
1866 if (current_thread()->thc_state.thc_call == call)
1867 panic("thread_call_wait_once: deadlock waiting on self from inside call: %p to function %p",
1868 call, call->tc_call.func);
1869
1870 spl_t s = disable_ints_and_lock();
1871
1872 boolean_t waited = thread_call_wait_once_locked(call, s);
1873 /* thread call lock unlocked */
1874
1875 return waited;
1876 }
1877
1878
1879 /*
1880 * Wait for all requested invocations of a thread call prior to now
1881 * to finish. Can only be invoked on thread calls whose storage we manage.
1882 * Just waits for the finish count to catch up to the submit count we find
1883 * at the beginning of our wait.
1884 *
1885 * Called with thread_call_lock held. Returns with lock released.
1886 */
1887 static void
1888 thread_call_wait_locked(thread_call_t call, spl_t s)
1889 {
1890 uint64_t submit_count;
1891 wait_result_t res;
1892
1893 assert(call->tc_flags & THREAD_CALL_ALLOC);
1894
1895 submit_count = call->tc_submit_count;
1896
1897 while (call->tc_finish_count < submit_count) {
1898 call->tc_flags |= THREAD_CALL_WAIT;
1899
1900 res = assert_wait(call, THREAD_UNINT);
1901 if (res != THREAD_WAITING)
1902 panic("Unable to assert wait: %d", res);
1903
1904 enable_ints_and_unlock(s);
1905
1906 res = thread_block(THREAD_CONTINUE_NULL);
1907 if (res != THREAD_AWAKENED)
1908 panic("Awoken with %d?", res);
1909
1910 s = disable_ints_and_lock();
1911 }
1912
1913 enable_ints_and_unlock(s);
1914 }
1915
1916 /*
1917 * Determine whether a thread call is either on a queue or
1918 * currently being executed.
1919 */
1920 boolean_t
1921 thread_call_isactive(thread_call_t call)
1922 {
1923 boolean_t active;
1924
1925 spl_t s = disable_ints_and_lock();
1926 active = (call->tc_submit_count > call->tc_finish_count);
1927 enable_ints_and_unlock(s);
1928
1929 return active;
1930 }
1931
1932 /*
1933 * adjust_cont_time_thread_calls
1934 * on wake, reenqueue delayed call timer for continuous time thread call groups
1935 */
1936 void
1937 adjust_cont_time_thread_calls(void)
1938 {
1939 spl_t s = disable_ints_and_lock();
1940
1941 for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
1942 thread_call_group_t group = &thread_call_groups[i];
1943
1944 /* only the continuous timers need to be re-armed */
1945
1946 _arm_delayed_call_timer(NULL, group, TCF_CONTINUOUS);
1947 }
1948
1949 enable_ints_and_unlock(s);
1950 }
1951