2 * Copyright (c) 1993-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Timer interrupt callout module.
32 #include <mach/mach_types.h>
34 #include <kern/clock.h>
36 #include <kern/processor.h>
37 #include <kern/timer_call.h>
38 #include <kern/timer_queue.h>
39 #include <kern/thread.h>
40 #include <kern/policy_internal.h>
42 #include <sys/kdebug.h>
50 #define TIMER_ASSERT 1
53 //#define TIMER_ASSERT 1
57 #define DBG(x...) kprintf("DBG: " x);
63 #define TIMER_KDEBUG_TRACE KERNEL_DEBUG_CONSTANT_IST
65 #define TIMER_KDEBUG_TRACE(x...)
68 LCK_GRP_DECLARE(timer_call_lck_grp
, "timer_call");
69 LCK_GRP_DECLARE(timer_longterm_lck_grp
, "timer_longterm");
71 /* Timer queue lock must be acquired with interrupts disabled (under splclock()) */
72 #define timer_queue_lock_spin(queue) \
73 lck_mtx_lock_spin_always(&queue->lock_data)
75 #define timer_queue_unlock(queue) \
76 lck_mtx_unlock_always(&queue->lock_data)
79 * The longterm timer object is a global structure holding all timers
80 * beyond the short-term, local timer queue threshold. The boot processor
81 * is responsible for moving each timer to its local timer queue
82 * if and when that timer becomes due within the threshold.
85 /* Sentinel for "no time set": */
86 #define TIMER_LONGTERM_NONE EndOfAllTime
87 /* The default threadhold is the delta above which a timer is "long-term" */
88 #if defined(__x86_64__)
89 #define TIMER_LONGTERM_THRESHOLD (1ULL * NSEC_PER_SEC) /* 1 sec */
91 #define TIMER_LONGTERM_THRESHOLD TIMER_LONGTERM_NONE /* disabled */
95 * The scan_limit throttles processing of the longterm queue.
96 * If the scan time exceeds this limit, we terminate, unlock
97 * and defer for scan_interval. This prevents unbounded holding of
98 * timer queue locks with interrupts masked.
100 #define TIMER_LONGTERM_SCAN_LIMIT (100ULL * NSEC_PER_USEC) /* 100 us */
101 #define TIMER_LONGTERM_SCAN_INTERVAL (100ULL * NSEC_PER_USEC) /* 100 us */
102 /* Sentinel for "scan limit exceeded": */
103 #define TIMER_LONGTERM_SCAN_AGAIN 0
106 uint64_t interval
; /* longterm timer interval */
107 uint64_t margin
; /* fudge factor (10% of interval */
108 uint64_t deadline
; /* first/soonest longterm deadline */
109 uint64_t preempted
; /* sooner timer has pre-empted */
110 timer_call_t call
; /* first/soonest longterm timer call */
111 uint64_t deadline_set
; /* next timer set */
112 timer_call_data_t timer
; /* timer used by threshold management */
114 uint64_t scans
; /* num threshold timer scans */
115 uint64_t preempts
; /* num threshold reductions */
116 uint64_t latency
; /* average threshold latency */
117 uint64_t latency_min
; /* minimum threshold latency */
118 uint64_t latency_max
; /* maximum threshold latency */
122 mpqueue_head_t queue
; /* longterm timer list */
123 uint64_t enqueues
; /* num timers queued */
124 uint64_t dequeues
; /* num timers dequeued */
125 uint64_t escalates
; /* num timers becoming shortterm */
126 uint64_t scan_time
; /* last time the list was scanned */
127 threshold_t threshold
; /* longterm timer threshold */
128 uint64_t scan_limit
; /* maximum scan time */
129 uint64_t scan_interval
; /* interval between LT "escalation" scans */
130 uint64_t scan_pauses
; /* num scans exceeding time limit */
133 timer_longterm_t timer_longterm
= {
134 .scan_limit
= TIMER_LONGTERM_SCAN_LIMIT
,
135 .scan_interval
= TIMER_LONGTERM_SCAN_INTERVAL
,
138 static mpqueue_head_t
*timer_longterm_queue
= NULL
;
140 static void timer_longterm_init(void);
141 static void timer_longterm_callout(
142 timer_call_param_t p0
,
143 timer_call_param_t p1
);
144 extern void timer_longterm_scan(
145 timer_longterm_t
*tlp
,
147 static void timer_longterm_update(
148 timer_longterm_t
*tlp
);
149 static void timer_longterm_update_locked(
150 timer_longterm_t
*tlp
);
151 static mpqueue_head_t
* timer_longterm_enqueue_unlocked(
155 mpqueue_head_t
** old_queue
,
156 uint64_t soft_deadline
,
158 timer_call_param_t param1
,
159 uint32_t callout_flags
);
160 static void timer_longterm_dequeued_locked(
163 uint64_t past_deadline_timers
;
164 uint64_t past_deadline_deltas
;
165 uint64_t past_deadline_longest
;
166 uint64_t past_deadline_shortest
= ~0ULL;
167 enum {PAST_DEADLINE_TIMER_ADJUSTMENT_NS
= 10 * 1000};
169 uint64_t past_deadline_timer_adjustment
;
171 static boolean_t
timer_call_enter_internal(timer_call_t call
, timer_call_param_t param1
, uint64_t deadline
, uint64_t leeway
, uint32_t flags
, boolean_t ratelimited
);
172 boolean_t mach_timer_coalescing_enabled
= TRUE
;
174 mpqueue_head_t
*timer_call_enqueue_deadline_unlocked(
176 mpqueue_head_t
*queue
,
178 uint64_t soft_deadline
,
180 timer_call_param_t param1
,
183 mpqueue_head_t
*timer_call_dequeue_unlocked(
186 timer_coalescing_priority_params_t tcoal_prio_params
;
189 int32_t nc_tcl
, rt_tcl
, bg_tcl
, kt_tcl
, fp_tcl
, ts_tcl
, qos_tcl
;
190 #define TCOAL_PRIO_STAT(x) (x++)
192 #define TCOAL_PRIO_STAT(x)
196 timer_call_init_abstime(void)
200 timer_coalescing_priority_params_ns_t
* tcoal_prio_params_init
= timer_call_get_priority_params();
201 nanoseconds_to_absolutetime(PAST_DEADLINE_TIMER_ADJUSTMENT_NS
, &past_deadline_timer_adjustment
);
202 nanoseconds_to_absolutetime(tcoal_prio_params_init
->idle_entry_timer_processing_hdeadline_threshold_ns
, &result
);
203 tcoal_prio_params
.idle_entry_timer_processing_hdeadline_threshold_abstime
= (uint32_t)result
;
204 nanoseconds_to_absolutetime(tcoal_prio_params_init
->interrupt_timer_coalescing_ilat_threshold_ns
, &result
);
205 tcoal_prio_params
.interrupt_timer_coalescing_ilat_threshold_abstime
= (uint32_t)result
;
206 nanoseconds_to_absolutetime(tcoal_prio_params_init
->timer_resort_threshold_ns
, &result
);
207 tcoal_prio_params
.timer_resort_threshold_abstime
= (uint32_t)result
;
208 tcoal_prio_params
.timer_coalesce_rt_shift
= tcoal_prio_params_init
->timer_coalesce_rt_shift
;
209 tcoal_prio_params
.timer_coalesce_bg_shift
= tcoal_prio_params_init
->timer_coalesce_bg_shift
;
210 tcoal_prio_params
.timer_coalesce_kt_shift
= tcoal_prio_params_init
->timer_coalesce_kt_shift
;
211 tcoal_prio_params
.timer_coalesce_fp_shift
= tcoal_prio_params_init
->timer_coalesce_fp_shift
;
212 tcoal_prio_params
.timer_coalesce_ts_shift
= tcoal_prio_params_init
->timer_coalesce_ts_shift
;
214 nanoseconds_to_absolutetime(tcoal_prio_params_init
->timer_coalesce_rt_ns_max
,
215 &tcoal_prio_params
.timer_coalesce_rt_abstime_max
);
216 nanoseconds_to_absolutetime(tcoal_prio_params_init
->timer_coalesce_bg_ns_max
,
217 &tcoal_prio_params
.timer_coalesce_bg_abstime_max
);
218 nanoseconds_to_absolutetime(tcoal_prio_params_init
->timer_coalesce_kt_ns_max
,
219 &tcoal_prio_params
.timer_coalesce_kt_abstime_max
);
220 nanoseconds_to_absolutetime(tcoal_prio_params_init
->timer_coalesce_fp_ns_max
,
221 &tcoal_prio_params
.timer_coalesce_fp_abstime_max
);
222 nanoseconds_to_absolutetime(tcoal_prio_params_init
->timer_coalesce_ts_ns_max
,
223 &tcoal_prio_params
.timer_coalesce_ts_abstime_max
);
225 for (i
= 0; i
< NUM_LATENCY_QOS_TIERS
; i
++) {
226 tcoal_prio_params
.latency_qos_scale
[i
] = tcoal_prio_params_init
->latency_qos_scale
[i
];
227 nanoseconds_to_absolutetime(tcoal_prio_params_init
->latency_qos_ns_max
[i
],
228 &tcoal_prio_params
.latency_qos_abstime_max
[i
]);
229 tcoal_prio_params
.latency_tier_rate_limited
[i
] = tcoal_prio_params_init
->latency_tier_rate_limited
[i
];
235 timer_call_init(void)
237 timer_longterm_init();
238 timer_call_init_abstime();
243 timer_call_queue_init(mpqueue_head_t
*queue
)
245 DBG("timer_call_queue_init(%p)\n", queue
);
246 mpqueue_init(queue
, &timer_call_lck_grp
, LCK_ATTR_NULL
);
253 timer_call_func_t func
,
254 timer_call_param_t param0
)
256 DBG("timer_call_setup(%p,%p,%p)\n", call
, func
, param0
);
258 *call
= (struct timer_call
) {
261 .tc_async_dequeue
= false,
264 simple_lock_init(&(call
)->tc_lock
, 0);
267 static mpqueue_head_t
*
268 mpqueue_for_timer_call(timer_call_t entry
)
270 queue_t queue_entry_is_on
= entry
->tc_queue
;
271 /* 'cast' the queue back to the orignal mpqueue */
272 return __container_of(queue_entry_is_on
, struct mpqueue_head
, head
);
276 static __inline__ mpqueue_head_t
*
277 timer_call_entry_dequeue(
280 mpqueue_head_t
*old_mpqueue
= mpqueue_for_timer_call(entry
);
282 /* The entry was always on a queue */
283 assert(old_mpqueue
!= NULL
);
286 if (!hw_lock_held((hw_lock_t
)&entry
->tc_lock
)) {
287 panic("_call_entry_dequeue() "
288 "entry %p is not locked\n", entry
);
292 * XXX The queue lock is actually a mutex in spin mode
293 * but there's no way to test for it being held
294 * so we pretend it's a spinlock!
296 if (!hw_lock_held((hw_lock_t
)&old_mpqueue
->lock_data
)) {
297 panic("_call_entry_dequeue() "
298 "queue %p is not locked\n", old_mpqueue
);
300 #endif /* TIMER_ASSERT */
302 if (old_mpqueue
!= timer_longterm_queue
) {
303 priority_queue_remove(&old_mpqueue
->mpq_pqhead
,
307 remqueue(&entry
->tc_qlink
);
309 entry
->tc_queue
= NULL
;
311 old_mpqueue
->count
--;
316 static __inline__ mpqueue_head_t
*
317 timer_call_entry_enqueue_deadline(
319 mpqueue_head_t
*new_mpqueue
,
322 mpqueue_head_t
*old_mpqueue
= mpqueue_for_timer_call(entry
);
325 if (!hw_lock_held((hw_lock_t
)&entry
->tc_lock
)) {
326 panic("_call_entry_enqueue_deadline() "
327 "entry %p is not locked\n", entry
);
330 /* XXX More lock pretense: */
331 if (!hw_lock_held((hw_lock_t
)&new_mpqueue
->lock_data
)) {
332 panic("_call_entry_enqueue_deadline() "
333 "queue %p is not locked\n", new_mpqueue
);
336 if (old_mpqueue
!= NULL
&& old_mpqueue
!= new_mpqueue
) {
337 panic("_call_entry_enqueue_deadline() "
338 "old_mpqueue %p != new_mpqueue", old_mpqueue
);
340 #endif /* TIMER_ASSERT */
342 /* no longterm queue involved */
343 assert(new_mpqueue
!= timer_longterm_queue
);
344 assert(old_mpqueue
!= timer_longterm_queue
);
346 if (old_mpqueue
== new_mpqueue
) {
347 /* optimize the same-queue case to avoid a full re-insert */
348 uint64_t old_deadline
= entry
->tc_pqlink
.deadline
;
349 entry
->tc_pqlink
.deadline
= deadline
;
351 if (old_deadline
< deadline
) {
352 priority_queue_entry_increased(&new_mpqueue
->mpq_pqhead
,
355 priority_queue_entry_decreased(&new_mpqueue
->mpq_pqhead
,
359 if (old_mpqueue
!= NULL
) {
360 priority_queue_remove(&old_mpqueue
->mpq_pqhead
,
363 re_queue_tail(&new_mpqueue
->head
, &entry
->tc_qlink
);
365 enqueue_tail(&new_mpqueue
->head
, &entry
->tc_qlink
);
368 entry
->tc_queue
= &new_mpqueue
->head
;
369 entry
->tc_pqlink
.deadline
= deadline
;
371 priority_queue_insert(&new_mpqueue
->mpq_pqhead
, &entry
->tc_pqlink
);
375 /* For efficiency, track the earliest soft deadline on the queue,
376 * so that fuzzy decisions can be made without lock acquisitions.
379 timer_call_t thead
= priority_queue_min(&new_mpqueue
->mpq_pqhead
, struct timer_call
, tc_pqlink
);
381 new_mpqueue
->earliest_soft_deadline
= thead
->tc_flags
& TIMER_CALL_RATELIMITED
? thead
->tc_pqlink
.deadline
: thead
->tc_soft_deadline
;
384 old_mpqueue
->count
--;
386 new_mpqueue
->count
++;
391 static __inline__
void
392 timer_call_entry_enqueue_tail(
394 mpqueue_head_t
*queue
)
396 /* entry is always dequeued before this call */
397 assert(entry
->tc_queue
== NULL
);
400 * this is only used for timer_longterm_queue, which is unordered
401 * and thus needs no priority queueing
403 assert(queue
== timer_longterm_queue
);
405 enqueue_tail(&queue
->head
, &entry
->tc_qlink
);
407 entry
->tc_queue
= &queue
->head
;
414 * Remove timer entry from its queue but don't change the queue pointer
415 * and set the async_dequeue flag. This is locking case 2b.
417 static __inline__
void
418 timer_call_entry_dequeue_async(
421 mpqueue_head_t
*old_mpqueue
= mpqueue_for_timer_call(entry
);
423 old_mpqueue
->count
--;
425 if (old_mpqueue
!= timer_longterm_queue
) {
426 priority_queue_remove(&old_mpqueue
->mpq_pqhead
,
430 remqueue(&entry
->tc_qlink
);
431 entry
->tc_async_dequeue
= true;
437 unsigned timer_call_enqueue_deadline_unlocked_async1
;
438 unsigned timer_call_enqueue_deadline_unlocked_async2
;
441 * Assumes call_entry and queues unlocked, interrupts disabled.
443 __inline__ mpqueue_head_t
*
444 timer_call_enqueue_deadline_unlocked(
446 mpqueue_head_t
*queue
,
448 uint64_t soft_deadline
,
450 timer_call_param_t param1
,
451 uint32_t callout_flags
)
453 DBG("timer_call_enqueue_deadline_unlocked(%p,%p,)\n", call
, queue
);
455 simple_lock(&call
->tc_lock
, LCK_GRP_NULL
);
457 mpqueue_head_t
*old_queue
= mpqueue_for_timer_call(call
);
459 if (old_queue
!= NULL
) {
460 timer_queue_lock_spin(old_queue
);
461 if (call
->tc_async_dequeue
) {
462 /* collision (1c): timer already dequeued, clear flag */
464 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
465 DECR_TIMER_ASYNC_DEQ
| DBG_FUNC_NONE
,
466 VM_KERNEL_UNSLIDE_OR_PERM(call
),
467 call
->tc_async_dequeue
,
468 VM_KERNEL_UNSLIDE_OR_PERM(call
->tc_queue
),
470 timer_call_enqueue_deadline_unlocked_async1
++;
472 call
->tc_async_dequeue
= false;
473 call
->tc_queue
= NULL
;
474 } else if (old_queue
!= queue
) {
475 timer_call_entry_dequeue(call
);
477 timer_call_enqueue_deadline_unlocked_async2
++;
480 if (old_queue
== timer_longterm_queue
) {
481 timer_longterm_dequeued_locked(call
);
483 if (old_queue
!= queue
) {
484 timer_queue_unlock(old_queue
);
485 timer_queue_lock_spin(queue
);
488 timer_queue_lock_spin(queue
);
491 call
->tc_soft_deadline
= soft_deadline
;
492 call
->tc_flags
= callout_flags
;
493 call
->tc_param1
= param1
;
496 timer_call_entry_enqueue_deadline(call
, queue
, deadline
);
497 timer_queue_unlock(queue
);
498 simple_unlock(&call
->tc_lock
);
504 unsigned timer_call_dequeue_unlocked_async1
;
505 unsigned timer_call_dequeue_unlocked_async2
;
508 timer_call_dequeue_unlocked(
511 DBG("timer_call_dequeue_unlocked(%p)\n", call
);
513 simple_lock(&call
->tc_lock
, LCK_GRP_NULL
);
515 mpqueue_head_t
*old_queue
= mpqueue_for_timer_call(call
);
518 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
519 DECR_TIMER_ASYNC_DEQ
| DBG_FUNC_NONE
,
520 VM_KERNEL_UNSLIDE_OR_PERM(call
),
521 call
->tc_async_dequeue
,
522 VM_KERNEL_UNSLIDE_OR_PERM(call
->tc_queue
),
525 if (old_queue
!= NULL
) {
526 timer_queue_lock_spin(old_queue
);
527 if (call
->tc_async_dequeue
) {
528 /* collision (1c): timer already dequeued, clear flag */
530 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
531 DECR_TIMER_ASYNC_DEQ
| DBG_FUNC_NONE
,
532 VM_KERNEL_UNSLIDE_OR_PERM(call
),
533 call
->tc_async_dequeue
,
534 VM_KERNEL_UNSLIDE_OR_PERM(call
->tc_queue
),
536 timer_call_dequeue_unlocked_async1
++;
538 call
->tc_async_dequeue
= false;
539 call
->tc_queue
= NULL
;
541 timer_call_entry_dequeue(call
);
543 if (old_queue
== timer_longterm_queue
) {
544 timer_longterm_dequeued_locked(call
);
546 timer_queue_unlock(old_queue
);
548 simple_unlock(&call
->tc_lock
);
553 timer_call_past_deadline_timer_handle(uint64_t deadline
, uint64_t ctime
)
555 uint64_t delta
= (ctime
- deadline
);
557 past_deadline_timers
++;
558 past_deadline_deltas
+= delta
;
559 if (delta
> past_deadline_longest
) {
560 past_deadline_longest
= deadline
;
562 if (delta
< past_deadline_shortest
) {
563 past_deadline_shortest
= delta
;
566 return ctime
+ past_deadline_timer_adjustment
;
570 * Timer call entry locking model
571 * ==============================
573 * Timer call entries are linked on per-cpu timer queues which are protected
574 * by the queue lock and the call entry lock. The locking protocol is:
576 * 0) The canonical locking order is timer call entry followed by queue.
578 * 1) With only the entry lock held, entry.queue is valid:
579 * 1a) NULL: the entry is not queued, or
580 * 1b) non-NULL: this queue must be locked before the entry is modified.
581 * After locking the queue, the call.async_dequeue flag must be checked:
582 * 1c) TRUE: the entry was removed from the queue by another thread
583 * and we must NULL the entry.queue and reset this flag, or
584 * 1d) FALSE: (ie. queued), the entry can be manipulated.
586 * 2) If a queue lock is obtained first, the queue is stable:
587 * 2a) If a try-lock of a queued entry succeeds, the call can be operated on
589 * 2b) If a try-lock fails, it indicates that another thread is attempting
590 * to change the entry and move it to a different position in this queue
591 * or to different queue. The entry can be dequeued but it should not be
592 * operated upon since it is being changed. Furthermore, we don't null
593 * the entry.queue pointer (protected by the entry lock we don't own).
594 * Instead, we set the async_dequeue flag -- see (1c).
595 * 2c) Same as 2b but occurring when a longterm timer is matured.
596 * 3) A callout's parameters (deadline, flags, parameters, soft deadline &c.)
597 * should be manipulated with the appropriate timer queue lock held,
598 * to prevent queue traversal observations from observing inconsistent
599 * updates to an in-flight callout.
603 * In the debug case, we assert that the timer call locking protocol
608 timer_call_enter_internal(
610 timer_call_param_t param1
,
614 boolean_t ratelimited
)
616 mpqueue_head_t
*queue
= NULL
;
617 mpqueue_head_t
*old_queue
;
621 uint64_t sdeadline
, ttd
;
623 assert(call
->tc_func
!= NULL
);
626 sdeadline
= deadline
;
627 uint64_t ctime
= mach_absolute_time();
629 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
630 DECR_TIMER_ENTER
| DBG_FUNC_START
,
631 VM_KERNEL_UNSLIDE_OR_PERM(call
),
632 VM_KERNEL_ADDRHIDE(param1
), deadline
, flags
, 0);
634 urgency
= (flags
& TIMER_CALL_URGENCY_MASK
);
636 boolean_t slop_ratelimited
= FALSE
;
637 slop
= timer_call_slop(deadline
, ctime
, urgency
, current_thread(), &slop_ratelimited
);
639 if ((flags
& TIMER_CALL_LEEWAY
) != 0 && leeway
> slop
) {
643 if (UINT64_MAX
- deadline
<= slop
) {
644 deadline
= UINT64_MAX
;
649 if (__improbable(deadline
< ctime
)) {
650 deadline
= timer_call_past_deadline_timer_handle(deadline
, ctime
);
651 sdeadline
= deadline
;
654 if (ratelimited
|| slop_ratelimited
) {
655 flags
|= TIMER_CALL_RATELIMITED
;
657 flags
&= ~TIMER_CALL_RATELIMITED
;
660 ttd
= sdeadline
- ctime
;
662 DTRACE_TMR7(callout__create
, timer_call_func_t
, call
->tc_func
,
663 timer_call_param_t
, call
->tc_param0
, uint32_t, flags
,
664 (deadline
- sdeadline
),
665 (ttd
>> 32), (unsigned) (ttd
& 0xFFFFFFFF), call
);
668 /* Program timer callout parameters under the appropriate per-CPU or
669 * longterm queue lock. The callout may have been previously enqueued
670 * and in-flight on this or another timer queue.
672 if (!ratelimited
&& !slop_ratelimited
) {
673 queue
= timer_longterm_enqueue_unlocked(call
, ctime
, deadline
, &old_queue
, sdeadline
, ttd
, param1
, flags
);
677 queue
= timer_queue_assign(deadline
);
678 old_queue
= timer_call_enqueue_deadline_unlocked(call
, queue
, deadline
, sdeadline
, ttd
, param1
, flags
);
682 call
->tc_entry_time
= ctime
;
685 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
686 DECR_TIMER_ENTER
| DBG_FUNC_END
,
687 VM_KERNEL_UNSLIDE_OR_PERM(call
),
688 (old_queue
!= NULL
), deadline
, queue
->count
, 0);
692 return old_queue
!= NULL
;
697 * return boolean indicating whether the call was previously queued.
705 return timer_call_enter_internal(call
, NULL
, deadline
, 0, flags
, FALSE
);
711 timer_call_param_t param1
,
715 return timer_call_enter_internal(call
, param1
, deadline
, 0, flags
, FALSE
);
719 timer_call_enter_with_leeway(
721 timer_call_param_t param1
,
725 boolean_t ratelimited
)
727 return timer_call_enter_internal(call
, param1
, deadline
, leeway
, flags
, ratelimited
);
734 mpqueue_head_t
*old_queue
;
739 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
740 DECR_TIMER_CANCEL
| DBG_FUNC_START
,
741 VM_KERNEL_UNSLIDE_OR_PERM(call
),
742 call
->tc_pqlink
.deadline
, call
->tc_soft_deadline
, call
->tc_flags
, 0);
744 old_queue
= timer_call_dequeue_unlocked(call
);
746 if (old_queue
!= NULL
) {
747 timer_queue_lock_spin(old_queue
);
749 timer_call_t new_head
= priority_queue_min(&old_queue
->mpq_pqhead
, struct timer_call
, tc_pqlink
);
752 timer_queue_cancel(old_queue
, call
->tc_pqlink
.deadline
, new_head
->tc_pqlink
.deadline
);
753 old_queue
->earliest_soft_deadline
= new_head
->tc_flags
& TIMER_CALL_RATELIMITED
? new_head
->tc_pqlink
.deadline
: new_head
->tc_soft_deadline
;
755 timer_queue_cancel(old_queue
, call
->tc_pqlink
.deadline
, UINT64_MAX
);
756 old_queue
->earliest_soft_deadline
= UINT64_MAX
;
759 timer_queue_unlock(old_queue
);
761 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
762 DECR_TIMER_CANCEL
| DBG_FUNC_END
,
763 VM_KERNEL_UNSLIDE_OR_PERM(call
),
764 VM_KERNEL_UNSLIDE_OR_PERM(old_queue
),
765 call
->tc_pqlink
.deadline
- mach_absolute_time(),
766 call
->tc_pqlink
.deadline
- call
->tc_entry_time
, 0);
770 DTRACE_TMR6(callout__cancel
, timer_call_func_t
, call
->tc_func
,
771 timer_call_param_t
, call
->tc_param0
, uint32_t, call
->tc_flags
, 0,
772 (call
->tc_ttd
>> 32), (unsigned) (call
->tc_ttd
& 0xFFFFFFFF));
773 #endif /* CONFIG_DTRACE */
775 return old_queue
!= NULL
;
778 static uint32_t timer_queue_shutdown_lock_skips
;
779 static uint32_t timer_queue_shutdown_discarded
;
782 timer_queue_shutdown(
783 mpqueue_head_t
*queue
)
786 mpqueue_head_t
*new_queue
;
790 DBG("timer_queue_shutdown(%p)\n", queue
);
795 timer_queue_lock_spin(queue
);
797 call
= qe_queue_first(&queue
->head
, struct timer_call
, tc_qlink
);
803 if (!simple_lock_try(&call
->tc_lock
, LCK_GRP_NULL
)) {
805 * case (2b) lock order inversion, dequeue and skip
806 * Don't change the call_entry queue back-pointer
807 * but set the async_dequeue field.
809 timer_queue_shutdown_lock_skips
++;
810 timer_call_entry_dequeue_async(call
);
812 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
813 DECR_TIMER_ASYNC_DEQ
| DBG_FUNC_NONE
,
814 VM_KERNEL_UNSLIDE_OR_PERM(call
),
815 call
->tc_async_dequeue
,
816 VM_KERNEL_UNSLIDE_OR_PERM(call
->tc_queue
),
819 timer_queue_unlock(queue
);
823 boolean_t call_local
= ((call
->tc_flags
& TIMER_CALL_LOCAL
) != 0);
825 /* remove entry from old queue */
826 timer_call_entry_dequeue(call
);
827 timer_queue_unlock(queue
);
829 if (call_local
== FALSE
) {
830 /* and queue it on new, discarding LOCAL timers */
831 new_queue
= timer_queue_assign(call
->tc_pqlink
.deadline
);
832 timer_queue_lock_spin(new_queue
);
833 timer_call_entry_enqueue_deadline(
834 call
, new_queue
, call
->tc_pqlink
.deadline
);
835 timer_queue_unlock(new_queue
);
837 timer_queue_shutdown_discarded
++;
840 assert(call_local
== FALSE
);
841 simple_unlock(&call
->tc_lock
);
844 timer_queue_unlock(queue
);
849 static uint32_t timer_queue_expire_lock_skips
;
851 timer_queue_expire_with_options(
852 mpqueue_head_t
*queue
,
856 timer_call_t call
= NULL
;
857 uint32_t tc_iterations
= 0;
858 DBG("timer_queue_expire(%p,)\n", queue
);
860 /* 'rescan' means look at every timer in the list, instead of
861 * early-exiting when the head of the list expires in the future.
862 * when 'rescan' is true, iterate by linked list instead of priority queue.
864 * TODO: if we keep a deadline ordered and soft-deadline ordered
865 * priority queue, then it's no longer necessary to do that
868 uint64_t cur_deadline
= deadline
;
869 timer_queue_lock_spin(queue
);
871 while (!queue_empty(&queue
->head
)) {
872 /* Upon processing one or more timer calls, refresh the
873 * deadline to account for time elapsed in the callout
875 if (++tc_iterations
> 1) {
876 cur_deadline
= mach_absolute_time();
880 if (rescan
== FALSE
) {
881 call
= priority_queue_min(&queue
->mpq_pqhead
, struct timer_call
, tc_pqlink
);
883 call
= qe_queue_first(&queue
->head
, struct timer_call
, tc_qlink
);
887 if (call
->tc_soft_deadline
<= cur_deadline
) {
888 timer_call_func_t func
;
889 timer_call_param_t param0
, param1
;
891 TCOAL_DEBUG(0xDDDD0000, queue
->earliest_soft_deadline
, call
->tc_soft_deadline
, 0, 0, 0);
892 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
893 DECR_TIMER_EXPIRE
| DBG_FUNC_NONE
,
894 VM_KERNEL_UNSLIDE_OR_PERM(call
),
895 call
->tc_soft_deadline
,
896 call
->tc_pqlink
.deadline
,
897 call
->tc_entry_time
, 0);
899 if ((call
->tc_flags
& TIMER_CALL_RATELIMITED
) &&
900 (call
->tc_pqlink
.deadline
> cur_deadline
)) {
901 if (rescan
== FALSE
) {
906 if (!simple_lock_try(&call
->tc_lock
, LCK_GRP_NULL
)) {
907 /* case (2b) lock inversion, dequeue and skip */
908 timer_queue_expire_lock_skips
++;
909 timer_call_entry_dequeue_async(call
);
914 timer_call_entry_dequeue(call
);
916 func
= call
->tc_func
;
917 param0
= call
->tc_param0
;
918 param1
= call
->tc_param1
;
920 simple_unlock(&call
->tc_lock
);
921 timer_queue_unlock(queue
);
923 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
924 DECR_TIMER_CALLOUT
| DBG_FUNC_START
,
925 VM_KERNEL_UNSLIDE_OR_PERM(call
), VM_KERNEL_UNSLIDE(func
),
926 VM_KERNEL_ADDRHIDE(param0
),
927 VM_KERNEL_ADDRHIDE(param1
),
931 DTRACE_TMR7(callout__start
, timer_call_func_t
, func
,
932 timer_call_param_t
, param0
, unsigned, call
->tc_flags
,
933 0, (call
->tc_ttd
>> 32),
934 (unsigned) (call
->tc_ttd
& 0xFFFFFFFF), call
);
936 /* Maintain time-to-deadline in per-processor data
937 * structure for thread wakeup deadline statistics.
939 uint64_t *ttdp
= ¤t_processor()->timer_call_ttd
;
940 *ttdp
= call
->tc_ttd
;
941 (*func
)(param0
, param1
);
944 DTRACE_TMR4(callout__end
, timer_call_func_t
, func
,
945 param0
, param1
, call
);
948 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
949 DECR_TIMER_CALLOUT
| DBG_FUNC_END
,
950 VM_KERNEL_UNSLIDE_OR_PERM(call
), VM_KERNEL_UNSLIDE(func
),
951 VM_KERNEL_ADDRHIDE(param0
),
952 VM_KERNEL_ADDRHIDE(param1
),
955 timer_queue_lock_spin(queue
);
957 if (__probable(rescan
== FALSE
)) {
960 int64_t skew
= call
->tc_pqlink
.deadline
- call
->tc_soft_deadline
;
961 assert(call
->tc_pqlink
.deadline
>= call
->tc_soft_deadline
);
963 /* DRK: On a latency quality-of-service level change,
964 * re-sort potentially rate-limited timers. The platform
965 * layer determines which timers require
966 * this. In the absence of the per-callout
967 * synchronization requirement, a global resort could
968 * be more efficient. The re-sort effectively
969 * annuls all timer adjustments, i.e. the "soft
970 * deadline" is the sort key.
973 if (timer_resort_threshold(skew
)) {
974 if (__probable(simple_lock_try(&call
->tc_lock
, LCK_GRP_NULL
))) {
975 /* TODO: don't need to dequeue before enqueue */
976 timer_call_entry_dequeue(call
);
977 timer_call_entry_enqueue_deadline(call
, queue
, call
->tc_soft_deadline
);
978 simple_unlock(&call
->tc_lock
);
983 call
= qe_queue_next(&queue
->head
, call
, struct timer_call
, tc_qlink
);
993 call
= priority_queue_min(&queue
->mpq_pqhead
, struct timer_call
, tc_pqlink
);
996 cur_deadline
= call
->tc_pqlink
.deadline
;
997 queue
->earliest_soft_deadline
= (call
->tc_flags
& TIMER_CALL_RATELIMITED
) ? call
->tc_pqlink
.deadline
: call
->tc_soft_deadline
;
999 queue
->earliest_soft_deadline
= cur_deadline
= UINT64_MAX
;
1002 timer_queue_unlock(queue
);
1004 return cur_deadline
;
1009 mpqueue_head_t
*queue
,
1012 return timer_queue_expire_with_options(queue
, deadline
, FALSE
);
1015 extern int serverperfmode
;
1016 static uint32_t timer_queue_migrate_lock_skips
;
1018 * timer_queue_migrate() is called by timer_queue_migrate_cpu()
1019 * to move timer requests from the local processor (queue_from)
1020 * to a target processor's (queue_to).
1023 timer_queue_migrate(mpqueue_head_t
*queue_from
, mpqueue_head_t
*queue_to
)
1026 timer_call_t head_to
;
1027 int timers_migrated
= 0;
1029 DBG("timer_queue_migrate(%p,%p)\n", queue_from
, queue_to
);
1031 assert(!ml_get_interrupts_enabled());
1032 assert(queue_from
!= queue_to
);
1034 if (serverperfmode
) {
1036 * if we're running a high end server
1037 * avoid migrations... they add latency
1038 * and don't save us power under typical
1045 * Take both local (from) and target (to) timer queue locks while
1046 * moving the timers from the local queue to the target processor.
1047 * We assume that the target is always the boot processor.
1048 * But only move if all of the following is true:
1049 * - the target queue is non-empty
1050 * - the local queue is non-empty
1051 * - the local queue's first deadline is later than the target's
1052 * - the local queue contains no non-migrateable "local" call
1053 * so that we need not have the target resync.
1056 timer_queue_lock_spin(queue_to
);
1058 head_to
= priority_queue_min(&queue_to
->mpq_pqhead
, struct timer_call
, tc_pqlink
);
1060 if (head_to
== NULL
) {
1061 timers_migrated
= -1;
1065 timer_queue_lock_spin(queue_from
);
1067 call
= priority_queue_min(&queue_from
->mpq_pqhead
, struct timer_call
, tc_pqlink
);
1070 timers_migrated
= -2;
1074 if (call
->tc_pqlink
.deadline
< head_to
->tc_pqlink
.deadline
) {
1075 timers_migrated
= 0;
1079 /* perform scan for non-migratable timers */
1080 qe_foreach_element(call
, &queue_from
->head
, tc_qlink
) {
1081 if (call
->tc_flags
& TIMER_CALL_LOCAL
) {
1082 timers_migrated
= -3;
1087 /* migration loop itself -- both queues are locked */
1088 qe_foreach_element_safe(call
, &queue_from
->head
, tc_qlink
) {
1089 if (!simple_lock_try(&call
->tc_lock
, LCK_GRP_NULL
)) {
1090 /* case (2b) lock order inversion, dequeue only */
1092 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
1093 DECR_TIMER_ASYNC_DEQ
| DBG_FUNC_NONE
,
1094 VM_KERNEL_UNSLIDE_OR_PERM(call
),
1095 VM_KERNEL_UNSLIDE_OR_PERM(call
->tc_queue
),
1099 timer_queue_migrate_lock_skips
++;
1100 timer_call_entry_dequeue_async(call
);
1103 timer_call_entry_dequeue(call
);
1104 timer_call_entry_enqueue_deadline(
1105 call
, queue_to
, call
->tc_pqlink
.deadline
);
1107 simple_unlock(&call
->tc_lock
);
1109 queue_from
->earliest_soft_deadline
= UINT64_MAX
;
1111 timer_queue_unlock(queue_from
);
1113 timer_queue_unlock(queue_to
);
1115 return timers_migrated
;
1119 timer_queue_trace_cpu(int ncpu
)
1121 timer_call_nosync_cpu(
1123 (void (*)(void *))timer_queue_trace
,
1124 (void*) timer_queue_cpu(ncpu
));
1129 mpqueue_head_t
*queue
)
1134 if (!kdebug_enable
) {
1139 timer_queue_lock_spin(queue
);
1141 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
1142 DECR_TIMER_QUEUE
| DBG_FUNC_START
,
1143 queue
->count
, mach_absolute_time(), 0, 0, 0);
1145 qe_foreach_element(call
, &queue
->head
, tc_qlink
) {
1146 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
1147 DECR_TIMER_QUEUE
| DBG_FUNC_NONE
,
1148 call
->tc_soft_deadline
,
1149 call
->tc_pqlink
.deadline
,
1150 call
->tc_entry_time
,
1151 VM_KERNEL_UNSLIDE(call
->tc_func
),
1155 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
1156 DECR_TIMER_QUEUE
| DBG_FUNC_END
,
1157 queue
->count
, mach_absolute_time(), 0, 0, 0);
1159 timer_queue_unlock(queue
);
1164 timer_longterm_dequeued_locked(timer_call_t call
)
1166 timer_longterm_t
*tlp
= &timer_longterm
;
1169 if (call
== tlp
->threshold
.call
) {
1170 tlp
->threshold
.call
= NULL
;
1175 * Place a timer call in the longterm list
1176 * and adjust the next timer callout deadline if the new timer is first.
1179 timer_longterm_enqueue_unlocked(timer_call_t call
,
1182 mpqueue_head_t
**old_queue
,
1183 uint64_t soft_deadline
,
1185 timer_call_param_t param1
,
1186 uint32_t callout_flags
)
1188 timer_longterm_t
*tlp
= &timer_longterm
;
1189 boolean_t update_required
= FALSE
;
1190 uint64_t longterm_threshold
;
1192 longterm_threshold
= now
+ tlp
->threshold
.interval
;
1195 * Return NULL without doing anything if:
1196 * - this timer is local, or
1197 * - the longterm mechanism is disabled, or
1198 * - this deadline is too short.
1200 if ((callout_flags
& TIMER_CALL_LOCAL
) != 0 ||
1201 (tlp
->threshold
.interval
== TIMER_LONGTERM_NONE
) ||
1202 (deadline
<= longterm_threshold
)) {
1207 * Remove timer from its current queue, if any.
1209 *old_queue
= timer_call_dequeue_unlocked(call
);
1212 * Lock the longterm queue, queue timer and determine
1213 * whether an update is necessary.
1215 assert(!ml_get_interrupts_enabled());
1216 simple_lock(&call
->tc_lock
, LCK_GRP_NULL
);
1217 timer_queue_lock_spin(timer_longterm_queue
);
1218 call
->tc_pqlink
.deadline
= deadline
;
1219 call
->tc_param1
= param1
;
1221 call
->tc_soft_deadline
= soft_deadline
;
1222 call
->tc_flags
= callout_flags
;
1223 timer_call_entry_enqueue_tail(call
, timer_longterm_queue
);
1228 * We'll need to update the currently set threshold timer
1229 * if the new deadline is sooner and no sooner update is in flight.
1231 if (deadline
< tlp
->threshold
.deadline
&&
1232 deadline
< tlp
->threshold
.preempted
) {
1233 tlp
->threshold
.preempted
= deadline
;
1234 tlp
->threshold
.call
= call
;
1235 update_required
= TRUE
;
1237 timer_queue_unlock(timer_longterm_queue
);
1238 simple_unlock(&call
->tc_lock
);
1240 if (update_required
) {
1242 * Note: this call expects that calling the master cpu
1243 * alone does not involve locking the topo lock.
1245 timer_call_nosync_cpu(
1247 (void (*)(void *))timer_longterm_update
,
1251 return timer_longterm_queue
;
1255 * Scan for timers below the longterm threshold.
1256 * Move these to the local timer queue (of the boot processor on which the
1257 * calling thread is running).
1258 * Both the local (boot) queue and the longterm queue are locked.
1259 * The scan is similar to the timer migrate sequence but is performed by
1260 * successively examining each timer on the longterm queue:
1261 * - if within the short-term threshold
1262 * - enter on the local queue (unless being deleted),
1264 * - if sooner, deadline becomes the next threshold deadline.
1265 * The total scan time is limited to TIMER_LONGTERM_SCAN_LIMIT. Should this be
1266 * exceeded, we abort and reschedule again so that we don't shut others from
1267 * the timer queues. Longterm timers firing late is not critical.
1270 timer_longterm_scan(timer_longterm_t
*tlp
,
1271 uint64_t time_start
)
1276 uint64_t time_limit
= time_start
+ tlp
->scan_limit
;
1277 mpqueue_head_t
*timer_master_queue
;
1279 assert(!ml_get_interrupts_enabled());
1280 assert(cpu_number() == master_cpu
);
1282 if (tlp
->threshold
.interval
!= TIMER_LONGTERM_NONE
) {
1283 threshold
= time_start
+ tlp
->threshold
.interval
;
1286 tlp
->threshold
.deadline
= TIMER_LONGTERM_NONE
;
1287 tlp
->threshold
.call
= NULL
;
1289 if (queue_empty(&timer_longterm_queue
->head
)) {
1293 timer_master_queue
= timer_queue_cpu(master_cpu
);
1294 timer_queue_lock_spin(timer_master_queue
);
1296 qe_foreach_element_safe(call
, &timer_longterm_queue
->head
, tc_qlink
) {
1297 deadline
= call
->tc_soft_deadline
;
1298 if (!simple_lock_try(&call
->tc_lock
, LCK_GRP_NULL
)) {
1299 /* case (2c) lock order inversion, dequeue only */
1301 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
1302 DECR_TIMER_ASYNC_DEQ
| DBG_FUNC_NONE
,
1303 VM_KERNEL_UNSLIDE_OR_PERM(call
),
1304 VM_KERNEL_UNSLIDE_OR_PERM(call
->tc_queue
),
1308 timer_call_entry_dequeue_async(call
);
1311 if (deadline
< threshold
) {
1313 * This timer needs moving (escalating)
1314 * to the local (boot) processor's queue.
1317 if (deadline
< time_start
) {
1318 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
1319 DECR_TIMER_OVERDUE
| DBG_FUNC_NONE
,
1320 VM_KERNEL_UNSLIDE_OR_PERM(call
),
1327 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
1328 DECR_TIMER_ESCALATE
| DBG_FUNC_NONE
,
1329 VM_KERNEL_UNSLIDE_OR_PERM(call
),
1330 call
->tc_pqlink
.deadline
,
1331 call
->tc_entry_time
,
1332 VM_KERNEL_UNSLIDE(call
->tc_func
),
1335 timer_call_entry_dequeue(call
);
1336 timer_call_entry_enqueue_deadline(
1337 call
, timer_master_queue
, call
->tc_pqlink
.deadline
);
1339 * A side-effect of the following call is to update
1340 * the actual hardware deadline if required.
1342 (void) timer_queue_assign(deadline
);
1344 if (deadline
< tlp
->threshold
.deadline
) {
1345 tlp
->threshold
.deadline
= deadline
;
1346 tlp
->threshold
.call
= call
;
1349 simple_unlock(&call
->tc_lock
);
1351 /* Abort scan if we're taking too long. */
1352 if (mach_absolute_time() > time_limit
) {
1353 tlp
->threshold
.deadline
= TIMER_LONGTERM_SCAN_AGAIN
;
1355 DBG("timer_longterm_scan() paused %llu, qlen: %llu\n",
1356 time_limit
, tlp
->queue
.count
);
1361 timer_queue_unlock(timer_master_queue
);
1365 timer_longterm_callout(timer_call_param_t p0
, __unused timer_call_param_t p1
)
1367 timer_longterm_t
*tlp
= (timer_longterm_t
*) p0
;
1369 timer_longterm_update(tlp
);
1373 timer_longterm_update_locked(timer_longterm_t
*tlp
)
1377 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
1378 DECR_TIMER_UPDATE
| DBG_FUNC_START
,
1379 VM_KERNEL_UNSLIDE_OR_PERM(&tlp
->queue
),
1380 tlp
->threshold
.deadline
,
1381 tlp
->threshold
.preempted
,
1382 tlp
->queue
.count
, 0);
1384 tlp
->scan_time
= mach_absolute_time();
1385 if (tlp
->threshold
.preempted
!= TIMER_LONGTERM_NONE
) {
1386 tlp
->threshold
.preempts
++;
1387 tlp
->threshold
.deadline
= tlp
->threshold
.preempted
;
1388 tlp
->threshold
.preempted
= TIMER_LONGTERM_NONE
;
1390 * Note: in the unlikely event that a pre-empted timer has
1391 * itself been cancelled, we'll simply re-scan later at the
1392 * time of the preempted/cancelled timer.
1395 tlp
->threshold
.scans
++;
1398 * Maintain a moving average of our wakeup latency.
1399 * Clamp latency to 0 and ignore above threshold interval.
1401 if (tlp
->scan_time
> tlp
->threshold
.deadline_set
) {
1402 latency
= tlp
->scan_time
- tlp
->threshold
.deadline_set
;
1406 if (latency
< tlp
->threshold
.interval
) {
1407 tlp
->threshold
.latency_min
=
1408 MIN(tlp
->threshold
.latency_min
, latency
);
1409 tlp
->threshold
.latency_max
=
1410 MAX(tlp
->threshold
.latency_max
, latency
);
1411 tlp
->threshold
.latency
=
1412 (tlp
->threshold
.latency
* 99 + latency
) / 100;
1415 timer_longterm_scan(tlp
, tlp
->scan_time
);
1418 tlp
->threshold
.deadline_set
= tlp
->threshold
.deadline
;
1419 /* The next deadline timer to be set is adjusted */
1420 if (tlp
->threshold
.deadline
!= TIMER_LONGTERM_NONE
&&
1421 tlp
->threshold
.deadline
!= TIMER_LONGTERM_SCAN_AGAIN
) {
1422 tlp
->threshold
.deadline_set
-= tlp
->threshold
.margin
;
1423 tlp
->threshold
.deadline_set
-= tlp
->threshold
.latency
;
1426 /* Throttle next scan time */
1427 uint64_t scan_clamp
= mach_absolute_time() + tlp
->scan_interval
;
1428 if (tlp
->threshold
.deadline_set
< scan_clamp
) {
1429 tlp
->threshold
.deadline_set
= scan_clamp
;
1432 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
,
1433 DECR_TIMER_UPDATE
| DBG_FUNC_END
,
1434 VM_KERNEL_UNSLIDE_OR_PERM(&tlp
->queue
),
1435 tlp
->threshold
.deadline
,
1436 tlp
->threshold
.scans
,
1437 tlp
->queue
.count
, 0);
1441 timer_longterm_update(timer_longterm_t
*tlp
)
1443 spl_t s
= splclock();
1445 timer_queue_lock_spin(timer_longterm_queue
);
1447 if (cpu_number() != master_cpu
) {
1448 panic("timer_longterm_update_master() on non-boot cpu");
1451 timer_longterm_update_locked(tlp
);
1453 if (tlp
->threshold
.deadline
!= TIMER_LONGTERM_NONE
) {
1455 &tlp
->threshold
.timer
,
1456 tlp
->threshold
.deadline_set
,
1457 TIMER_CALL_LOCAL
| TIMER_CALL_SYS_CRITICAL
);
1460 timer_queue_unlock(timer_longterm_queue
);
1465 timer_longterm_init(void)
1468 timer_longterm_t
*tlp
= &timer_longterm
;
1470 DBG("timer_longterm_init() tlp: %p, queue: %p\n", tlp
, &tlp
->queue
);
1473 * Set the longterm timer threshold. Defaults to TIMER_LONGTERM_THRESHOLD
1474 * or TIMER_LONGTERM_NONE (disabled) for server;
1475 * overridden longterm boot-arg
1477 tlp
->threshold
.interval
= serverperfmode
? TIMER_LONGTERM_NONE
1478 : TIMER_LONGTERM_THRESHOLD
;
1479 if (PE_parse_boot_argn("longterm", &longterm
, sizeof(longterm
))) {
1480 tlp
->threshold
.interval
= (longterm
== 0) ?
1481 TIMER_LONGTERM_NONE
:
1482 longterm
* NSEC_PER_MSEC
;
1484 if (tlp
->threshold
.interval
!= TIMER_LONGTERM_NONE
) {
1485 printf("Longterm timer threshold: %llu ms\n",
1486 tlp
->threshold
.interval
/ NSEC_PER_MSEC
);
1487 kprintf("Longterm timer threshold: %llu ms\n",
1488 tlp
->threshold
.interval
/ NSEC_PER_MSEC
);
1489 nanoseconds_to_absolutetime(tlp
->threshold
.interval
,
1490 &tlp
->threshold
.interval
);
1491 tlp
->threshold
.margin
= tlp
->threshold
.interval
/ 10;
1492 tlp
->threshold
.latency_min
= EndOfAllTime
;
1493 tlp
->threshold
.latency_max
= 0;
1496 tlp
->threshold
.preempted
= TIMER_LONGTERM_NONE
;
1497 tlp
->threshold
.deadline
= TIMER_LONGTERM_NONE
;
1499 mpqueue_init(&tlp
->queue
, &timer_longterm_lck_grp
, LCK_ATTR_NULL
);
1501 timer_call_setup(&tlp
->threshold
.timer
,
1502 timer_longterm_callout
, (timer_call_param_t
) tlp
);
1504 timer_longterm_queue
= &tlp
->queue
;
1509 ENQUEUES
, DEQUEUES
, ESCALATES
, SCANS
, PREEMPTS
,
1510 LATENCY
, LATENCY_MIN
, LATENCY_MAX
, SCAN_LIMIT
, SCAN_INTERVAL
, PAUSES
1513 timer_sysctl_get(int oid
)
1515 timer_longterm_t
*tlp
= &timer_longterm
;
1519 return (tlp
->threshold
.interval
== TIMER_LONGTERM_NONE
) ?
1520 0 : tlp
->threshold
.interval
/ NSEC_PER_MSEC
;
1522 return tlp
->queue
.count
;
1524 return tlp
->enqueues
;
1526 return tlp
->dequeues
;
1528 return tlp
->escalates
;
1530 return tlp
->threshold
.scans
;
1532 return tlp
->threshold
.preempts
;
1534 return tlp
->threshold
.latency
;
1536 return tlp
->threshold
.latency_min
;
1538 return tlp
->threshold
.latency_max
;
1540 return tlp
->scan_limit
;
1542 return tlp
->scan_interval
;
1544 return tlp
->scan_pauses
;
1551 * timer_master_scan() is the inverse of timer_longterm_scan()
1552 * since it un-escalates timers to the longterm queue.
1555 timer_master_scan(timer_longterm_t
*tlp
,
1561 mpqueue_head_t
*timer_master_queue
;
1563 if (tlp
->threshold
.interval
!= TIMER_LONGTERM_NONE
) {
1564 threshold
= now
+ tlp
->threshold
.interval
;
1566 threshold
= TIMER_LONGTERM_NONE
;
1569 timer_master_queue
= timer_queue_cpu(master_cpu
);
1570 timer_queue_lock_spin(timer_master_queue
);
1572 qe_foreach_element_safe(call
, &timer_master_queue
->head
, tc_qlink
) {
1573 deadline
= call
->tc_pqlink
.deadline
;
1574 if ((call
->tc_flags
& TIMER_CALL_LOCAL
) != 0) {
1577 if (!simple_lock_try(&call
->tc_lock
, LCK_GRP_NULL
)) {
1578 /* case (2c) lock order inversion, dequeue only */
1579 timer_call_entry_dequeue_async(call
);
1582 if (deadline
> threshold
) {
1583 /* move from master to longterm */
1584 timer_call_entry_dequeue(call
);
1585 timer_call_entry_enqueue_tail(call
, timer_longterm_queue
);
1586 if (deadline
< tlp
->threshold
.deadline
) {
1587 tlp
->threshold
.deadline
= deadline
;
1588 tlp
->threshold
.call
= call
;
1591 simple_unlock(&call
->tc_lock
);
1593 timer_queue_unlock(timer_master_queue
);
1597 timer_sysctl_set_threshold(uint64_t value
)
1599 timer_longterm_t
*tlp
= &timer_longterm
;
1600 spl_t s
= splclock();
1601 boolean_t threshold_increase
;
1603 timer_queue_lock_spin(timer_longterm_queue
);
1605 timer_call_cancel(&tlp
->threshold
.timer
);
1608 * Set the new threshold and note whther it's increasing.
1611 tlp
->threshold
.interval
= TIMER_LONGTERM_NONE
;
1612 threshold_increase
= TRUE
;
1613 timer_call_cancel(&tlp
->threshold
.timer
);
1615 uint64_t old_interval
= tlp
->threshold
.interval
;
1616 tlp
->threshold
.interval
= value
* NSEC_PER_MSEC
;
1617 nanoseconds_to_absolutetime(tlp
->threshold
.interval
,
1618 &tlp
->threshold
.interval
);
1619 tlp
->threshold
.margin
= tlp
->threshold
.interval
/ 10;
1620 if (old_interval
== TIMER_LONGTERM_NONE
) {
1621 threshold_increase
= FALSE
;
1623 threshold_increase
= (tlp
->threshold
.interval
> old_interval
);
1627 if (threshold_increase
/* or removal */) {
1628 /* Escalate timers from the longterm queue */
1629 timer_longterm_scan(tlp
, mach_absolute_time());
1630 } else { /* decrease or addition */
1632 * We scan the local/master queue for timers now longterm.
1633 * To be strictly correct, we should scan all processor queues
1634 * but timer migration results in most timers gravitating to the
1635 * master processor in any case.
1637 timer_master_scan(tlp
, mach_absolute_time());
1640 /* Set new timer accordingly */
1641 tlp
->threshold
.deadline_set
= tlp
->threshold
.deadline
;
1642 if (tlp
->threshold
.deadline
!= TIMER_LONGTERM_NONE
) {
1643 tlp
->threshold
.deadline_set
-= tlp
->threshold
.margin
;
1644 tlp
->threshold
.deadline_set
-= tlp
->threshold
.latency
;
1646 &tlp
->threshold
.timer
,
1647 tlp
->threshold
.deadline_set
,
1648 TIMER_CALL_LOCAL
| TIMER_CALL_SYS_CRITICAL
);
1655 tlp
->scan_pauses
= 0;
1656 tlp
->threshold
.scans
= 0;
1657 tlp
->threshold
.preempts
= 0;
1658 tlp
->threshold
.latency
= 0;
1659 tlp
->threshold
.latency_min
= EndOfAllTime
;
1660 tlp
->threshold
.latency_max
= 0;
1662 timer_queue_unlock(timer_longterm_queue
);
1667 timer_sysctl_set(int oid
, uint64_t value
)
1673 (void (*)(void *))timer_sysctl_set_threshold
,
1675 return KERN_SUCCESS
;
1677 timer_longterm
.scan_limit
= value
;
1678 return KERN_SUCCESS
;
1680 timer_longterm
.scan_interval
= value
;
1681 return KERN_SUCCESS
;
1683 return KERN_INVALID_ARGUMENT
;
1688 /* Select timer coalescing window based on per-task quality-of-service hints */
1690 tcoal_qos_adjust(thread_t t
, int32_t *tshift
, uint64_t *tmax_abstime
, boolean_t
*pratelimited
)
1692 uint32_t latency_qos
;
1693 boolean_t adjusted
= FALSE
;
1694 task_t ctask
= t
->task
;
1697 latency_qos
= proc_get_effective_thread_policy(t
, TASK_POLICY_LATENCY_QOS
);
1699 assert(latency_qos
<= NUM_LATENCY_QOS_TIERS
);
1702 *tshift
= tcoal_prio_params
.latency_qos_scale
[latency_qos
- 1];
1703 *tmax_abstime
= tcoal_prio_params
.latency_qos_abstime_max
[latency_qos
- 1];
1704 *pratelimited
= tcoal_prio_params
.latency_tier_rate_limited
[latency_qos
- 1];
1712 /* Adjust timer deadlines based on priority of the thread and the
1713 * urgency value provided at timeout establishment. With this mechanism,
1714 * timers are no longer necessarily sorted in order of soft deadline
1715 * on a given timer queue, i.e. they may be differentially skewed.
1716 * In the current scheme, this could lead to fewer pending timers
1717 * processed than is technically possible when the HW deadline arrives.
1720 timer_compute_leeway(thread_t cthread
, int32_t urgency
, int32_t *tshift
, uint64_t *tmax_abstime
, boolean_t
*pratelimited
)
1722 int16_t tpri
= cthread
->sched_pri
;
1723 if ((urgency
& TIMER_CALL_USER_MASK
) != 0) {
1724 if (tpri
>= BASEPRI_RTQUEUES
||
1725 urgency
== TIMER_CALL_USER_CRITICAL
) {
1726 *tshift
= tcoal_prio_params
.timer_coalesce_rt_shift
;
1727 *tmax_abstime
= tcoal_prio_params
.timer_coalesce_rt_abstime_max
;
1728 TCOAL_PRIO_STAT(rt_tcl
);
1729 } else if (proc_get_effective_thread_policy(cthread
, TASK_POLICY_DARWIN_BG
) ||
1730 (urgency
== TIMER_CALL_USER_BACKGROUND
)) {
1731 /* Determine if timer should be subjected to a lower QoS */
1732 if (tcoal_qos_adjust(cthread
, tshift
, tmax_abstime
, pratelimited
)) {
1733 if (*tmax_abstime
> tcoal_prio_params
.timer_coalesce_bg_abstime_max
) {
1736 *pratelimited
= FALSE
;
1739 *tshift
= tcoal_prio_params
.timer_coalesce_bg_shift
;
1740 *tmax_abstime
= tcoal_prio_params
.timer_coalesce_bg_abstime_max
;
1741 TCOAL_PRIO_STAT(bg_tcl
);
1742 } else if (tpri
>= MINPRI_KERNEL
) {
1743 *tshift
= tcoal_prio_params
.timer_coalesce_kt_shift
;
1744 *tmax_abstime
= tcoal_prio_params
.timer_coalesce_kt_abstime_max
;
1745 TCOAL_PRIO_STAT(kt_tcl
);
1746 } else if (cthread
->sched_mode
== TH_MODE_FIXED
) {
1747 *tshift
= tcoal_prio_params
.timer_coalesce_fp_shift
;
1748 *tmax_abstime
= tcoal_prio_params
.timer_coalesce_fp_abstime_max
;
1749 TCOAL_PRIO_STAT(fp_tcl
);
1750 } else if (tcoal_qos_adjust(cthread
, tshift
, tmax_abstime
, pratelimited
)) {
1751 TCOAL_PRIO_STAT(qos_tcl
);
1752 } else if (cthread
->sched_mode
== TH_MODE_TIMESHARE
) {
1753 *tshift
= tcoal_prio_params
.timer_coalesce_ts_shift
;
1754 *tmax_abstime
= tcoal_prio_params
.timer_coalesce_ts_abstime_max
;
1755 TCOAL_PRIO_STAT(ts_tcl
);
1757 TCOAL_PRIO_STAT(nc_tcl
);
1759 } else if (urgency
== TIMER_CALL_SYS_BACKGROUND
) {
1760 *tshift
= tcoal_prio_params
.timer_coalesce_bg_shift
;
1761 *tmax_abstime
= tcoal_prio_params
.timer_coalesce_bg_abstime_max
;
1762 TCOAL_PRIO_STAT(bg_tcl
);
1764 *tshift
= tcoal_prio_params
.timer_coalesce_kt_shift
;
1765 *tmax_abstime
= tcoal_prio_params
.timer_coalesce_kt_abstime_max
;
1766 TCOAL_PRIO_STAT(kt_tcl
);
1771 int timer_user_idle_level
;
1774 timer_call_slop(uint64_t deadline
, uint64_t now
, uint32_t flags
, thread_t cthread
, boolean_t
*pratelimited
)
1776 int32_t tcs_shift
= 0;
1777 uint64_t tcs_max_abstime
= 0;
1779 uint32_t urgency
= (flags
& TIMER_CALL_URGENCY_MASK
);
1781 if (mach_timer_coalescing_enabled
&&
1782 (deadline
> now
) && (urgency
!= TIMER_CALL_SYS_CRITICAL
)) {
1783 timer_compute_leeway(cthread
, urgency
, &tcs_shift
, &tcs_max_abstime
, pratelimited
);
1785 if (tcs_shift
>= 0) {
1786 adjval
= MIN((deadline
- now
) >> tcs_shift
, tcs_max_abstime
);
1788 adjval
= MIN((deadline
- now
) << (-tcs_shift
), tcs_max_abstime
);
1790 /* Apply adjustments derived from "user idle level" heuristic */
1791 adjval
+= (adjval
* timer_user_idle_level
) >> 7;
1799 timer_get_user_idle_level(void)
1801 return timer_user_idle_level
;
1805 timer_set_user_idle_level(int ilevel
)
1807 boolean_t do_reeval
= FALSE
;
1809 if ((ilevel
< 0) || (ilevel
> 128)) {
1810 return KERN_INVALID_ARGUMENT
;
1813 if (ilevel
< timer_user_idle_level
) {
1817 timer_user_idle_level
= ilevel
;
1820 ml_timer_evaluate();
1823 return KERN_SUCCESS
;
1826 #pragma mark - running timers
1828 #define RUNNING_TIMER_FAKE_FLAGS (TIMER_CALL_SYS_CRITICAL | \
1832 * timer_call_trace_* functions mimic the tracing behavior from the normal
1833 * timer_call subsystem, so tools continue to function.
1837 timer_call_trace_enter_before(struct timer_call
*call
, uint64_t deadline
,
1838 uint32_t flags
, uint64_t now
)
1840 #pragma unused(call, deadline, flags, now)
1841 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
, DECR_TIMER_ENTER
| DBG_FUNC_START
,
1842 VM_KERNEL_UNSLIDE_OR_PERM(call
), VM_KERNEL_ADDRHIDE(call
->tc_param1
),
1843 deadline
, flags
, 0);
1845 uint64_t ttd
= deadline
- now
;
1846 DTRACE_TMR7(callout__create
, timer_call_func_t
, call
->tc_func
,
1847 timer_call_param_t
, call
->tc_param0
, uint32_t, flags
, 0,
1848 (ttd
>> 32), (unsigned int)(ttd
& 0xFFFFFFFF), NULL
);
1849 #endif /* CONFIG_DTRACE */
1850 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
, DECR_TIMER_ENTER
| DBG_FUNC_END
,
1851 VM_KERNEL_UNSLIDE_OR_PERM(call
), 0, deadline
, 0, 0);
1855 timer_call_trace_enter_after(struct timer_call
*call
, uint64_t deadline
)
1857 #pragma unused(call, deadline)
1858 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
, DECR_TIMER_ENTER
| DBG_FUNC_END
,
1859 VM_KERNEL_UNSLIDE_OR_PERM(call
), 0, deadline
, 0, 0);
1863 timer_call_trace_cancel(struct timer_call
*call
)
1865 #pragma unused(call)
1866 __unused
uint64_t deadline
= call
->tc_pqlink
.deadline
;
1867 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
, DECR_TIMER_CANCEL
| DBG_FUNC_START
,
1868 VM_KERNEL_UNSLIDE_OR_PERM(call
), deadline
, 0,
1870 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
, DECR_TIMER_CANCEL
| DBG_FUNC_END
,
1871 VM_KERNEL_UNSLIDE_OR_PERM(call
), 0, deadline
- mach_absolute_time(),
1872 deadline
- call
->tc_entry_time
, 0);
1875 uint64_t ttd
= deadline
- call
->tc_entry_time
;
1877 uint64_t ttd
= UINT64_MAX
;
1878 #endif /* TIMER_TRACE */
1879 DTRACE_TMR6(callout__cancel
, timer_call_func_t
, call
->tc_func
,
1880 timer_call_param_t
, call
->tc_param0
, uint32_t, call
->tc_flags
, 0,
1881 (ttd
>> 32), (unsigned int)(ttd
& 0xFFFFFFFF));
1882 #endif /* CONFIG_DTRACE */
1886 timer_call_trace_expire_entry(struct timer_call
*call
)
1888 #pragma unused(call)
1889 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
, DECR_TIMER_CALLOUT
| DBG_FUNC_START
,
1890 VM_KERNEL_UNSLIDE_OR_PERM(call
), VM_KERNEL_UNSLIDE(call
->tc_func
),
1891 VM_KERNEL_ADDRHIDE(call
->tc_param0
),
1892 VM_KERNEL_ADDRHIDE(call
->tc_param1
),
1896 uint64_t ttd
= call
->tc_pqlink
.deadline
- call
->tc_entry_time
;
1897 #else /* TIMER_TRACE */
1898 uint64_t ttd
= UINT64_MAX
;
1899 #endif /* TIMER_TRACE */
1900 DTRACE_TMR7(callout__start
, timer_call_func_t
, call
->tc_func
,
1901 timer_call_param_t
, call
->tc_param0
, unsigned, call
->tc_flags
,
1902 0, (ttd
>> 32), (unsigned int)(ttd
& 0xFFFFFFFF), NULL
);
1903 #endif /* CONFIG_DTRACE */
1907 timer_call_trace_expire_return(struct timer_call
*call
)
1909 #pragma unused(call)
1911 DTRACE_TMR4(callout__end
, timer_call_func_t
, call
->tc_func
,
1912 call
->tc_param0
, call
->tc_param1
, NULL
);
1913 #endif /* CONFIG_DTRACE */
1914 TIMER_KDEBUG_TRACE(KDEBUG_TRACE
, DECR_TIMER_CALLOUT
| DBG_FUNC_END
,
1915 VM_KERNEL_UNSLIDE_OR_PERM(call
),
1916 VM_KERNEL_UNSLIDE(call
->tc_func
),
1917 VM_KERNEL_ADDRHIDE(call
->tc_param0
),
1918 VM_KERNEL_ADDRHIDE(call
->tc_param1
),
1923 * Set a new deadline for a running timer on this processor.
1926 running_timer_setup(processor_t processor
, enum running_timer timer
,
1927 void *param
, uint64_t deadline
, uint64_t now
)
1929 assert(timer
< RUNNING_TIMER_MAX
);
1930 assert(ml_get_interrupts_enabled() == FALSE
);
1932 struct timer_call
*call
= &processor
->running_timers
[timer
];
1934 timer_call_trace_enter_before(call
, deadline
, RUNNING_TIMER_FAKE_FLAGS
,
1937 if (__improbable(deadline
< now
)) {
1938 deadline
= timer_call_past_deadline_timer_handle(deadline
, now
);
1941 call
->tc_pqlink
.deadline
= deadline
;
1943 call
->tc_entry_time
= now
;
1944 #endif /* TIMER_TRACE */
1945 call
->tc_param1
= param
;
1947 timer_call_trace_enter_after(call
, deadline
);
1951 running_timers_sync(void)
1953 timer_resync_deadlines();
1957 running_timer_enter(processor_t processor
, unsigned int timer
,
1958 void *param
, uint64_t deadline
, uint64_t now
)
1960 running_timer_setup(processor
, timer
, param
, deadline
, now
);
1961 running_timers_sync();
1965 * Call the callback for any running timers that fired for this processor.
1966 * Returns true if any timers were past their deadline.
1969 running_timers_expire(processor_t processor
, uint64_t now
)
1971 bool expired
= false;
1973 if (!processor
->running_timers_active
) {
1977 for (int i
= 0; i
< RUNNING_TIMER_MAX
; i
++) {
1978 struct timer_call
*call
= &processor
->running_timers
[i
];
1980 uint64_t deadline
= call
->tc_pqlink
.deadline
;
1981 if (deadline
> now
) {
1986 timer_call_trace_expire_entry(call
);
1987 call
->tc_func(call
->tc_param0
, call
->tc_param1
);
1988 timer_call_trace_expire_return(call
);
1995 running_timer_clear(processor_t processor
, enum running_timer timer
)
1997 struct timer_call
*call
= &processor
->running_timers
[timer
];
1998 uint64_t deadline
= call
->tc_pqlink
.deadline
;
1999 if (deadline
== EndOfAllTime
) {
2003 call
->tc_pqlink
.deadline
= EndOfAllTime
;
2005 call
->tc_entry_time
= 0;
2006 #endif /* TIMER_TRACE */
2007 timer_call_trace_cancel(call
);
2011 running_timer_cancel(processor_t processor
, unsigned int timer
)
2013 running_timer_clear(processor
, timer
);
2014 running_timers_sync();
2018 running_timers_deadline(processor_t processor
)
2020 if (!processor
->running_timers_active
) {
2021 return EndOfAllTime
;
2024 uint64_t deadline
= EndOfAllTime
;
2025 for (int i
= 0; i
< RUNNING_TIMER_MAX
; i
++) {
2026 uint64_t candidate
=
2027 processor
->running_timers
[i
].tc_pqlink
.deadline
;
2028 if (candidate
!= 0 && candidate
< deadline
) {
2029 deadline
= candidate
;
2037 running_timers_activate(processor_t processor
)
2039 processor
->running_timers_active
= true;
2040 running_timers_sync();
2044 running_timers_deactivate(processor_t processor
)
2046 assert(processor
->running_timers_active
== true);
2047 processor
->running_timers_active
= false;
2048 running_timers_sync();