]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/timer_call.c
xnu-7195.81.3.tar.gz
[apple/xnu.git] / osfmk / kern / timer_call.c
1 /*
2 * Copyright (c) 1993-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Timer interrupt callout module.
30 */
31
32 #include <mach/mach_types.h>
33
34 #include <kern/clock.h>
35 #include <kern/smp.h>
36 #include <kern/processor.h>
37 #include <kern/timer_call.h>
38 #include <kern/timer_queue.h>
39 #include <kern/thread.h>
40 #include <kern/policy_internal.h>
41
42 #include <sys/kdebug.h>
43
44 #if CONFIG_DTRACE
45 #include <mach/sdt.h>
46 #endif
47
48
49 #if DEBUG
50 #define TIMER_ASSERT 1
51 #endif
52
53 //#define TIMER_ASSERT 1
54 //#define TIMER_DBG 1
55
56 #if TIMER_DBG
57 #define DBG(x...) kprintf("DBG: " x);
58 #else
59 #define DBG(x...)
60 #endif
61
62 #if TIMER_TRACE
63 #define TIMER_KDEBUG_TRACE KERNEL_DEBUG_CONSTANT_IST
64 #else
65 #define TIMER_KDEBUG_TRACE(x...)
66 #endif
67
68 LCK_GRP_DECLARE(timer_call_lck_grp, "timer_call");
69 LCK_GRP_DECLARE(timer_longterm_lck_grp, "timer_longterm");
70
71 /* Timer queue lock must be acquired with interrupts disabled (under splclock()) */
72 #define timer_queue_lock_spin(queue) \
73 lck_mtx_lock_spin_always(&queue->lock_data)
74
75 #define timer_queue_unlock(queue) \
76 lck_mtx_unlock_always(&queue->lock_data)
77
78 /*
79 * The longterm timer object is a global structure holding all timers
80 * beyond the short-term, local timer queue threshold. The boot processor
81 * is responsible for moving each timer to its local timer queue
82 * if and when that timer becomes due within the threshold.
83 */
84
85 /* Sentinel for "no time set": */
86 #define TIMER_LONGTERM_NONE EndOfAllTime
87 /* The default threadhold is the delta above which a timer is "long-term" */
88 #if defined(__x86_64__)
89 #define TIMER_LONGTERM_THRESHOLD (1ULL * NSEC_PER_SEC) /* 1 sec */
90 #else
91 #define TIMER_LONGTERM_THRESHOLD TIMER_LONGTERM_NONE /* disabled */
92 #endif
93
94 /*
95 * The scan_limit throttles processing of the longterm queue.
96 * If the scan time exceeds this limit, we terminate, unlock
97 * and defer for scan_interval. This prevents unbounded holding of
98 * timer queue locks with interrupts masked.
99 */
100 #define TIMER_LONGTERM_SCAN_LIMIT (100ULL * NSEC_PER_USEC) /* 100 us */
101 #define TIMER_LONGTERM_SCAN_INTERVAL (100ULL * NSEC_PER_USEC) /* 100 us */
102 /* Sentinel for "scan limit exceeded": */
103 #define TIMER_LONGTERM_SCAN_AGAIN 0
104
105 typedef struct {
106 uint64_t interval; /* longterm timer interval */
107 uint64_t margin; /* fudge factor (10% of interval */
108 uint64_t deadline; /* first/soonest longterm deadline */
109 uint64_t preempted; /* sooner timer has pre-empted */
110 timer_call_t call; /* first/soonest longterm timer call */
111 uint64_t deadline_set; /* next timer set */
112 timer_call_data_t timer; /* timer used by threshold management */
113 /* Stats: */
114 uint64_t scans; /* num threshold timer scans */
115 uint64_t preempts; /* num threshold reductions */
116 uint64_t latency; /* average threshold latency */
117 uint64_t latency_min; /* minimum threshold latency */
118 uint64_t latency_max; /* maximum threshold latency */
119 } threshold_t;
120
121 typedef struct {
122 mpqueue_head_t queue; /* longterm timer list */
123 uint64_t enqueues; /* num timers queued */
124 uint64_t dequeues; /* num timers dequeued */
125 uint64_t escalates; /* num timers becoming shortterm */
126 uint64_t scan_time; /* last time the list was scanned */
127 threshold_t threshold; /* longterm timer threshold */
128 uint64_t scan_limit; /* maximum scan time */
129 uint64_t scan_interval; /* interval between LT "escalation" scans */
130 uint64_t scan_pauses; /* num scans exceeding time limit */
131 } timer_longterm_t;
132
133 timer_longterm_t timer_longterm = {
134 .scan_limit = TIMER_LONGTERM_SCAN_LIMIT,
135 .scan_interval = TIMER_LONGTERM_SCAN_INTERVAL,
136 };
137
138 static mpqueue_head_t *timer_longterm_queue = NULL;
139
140 static void timer_longterm_init(void);
141 static void timer_longterm_callout(
142 timer_call_param_t p0,
143 timer_call_param_t p1);
144 extern void timer_longterm_scan(
145 timer_longterm_t *tlp,
146 uint64_t now);
147 static void timer_longterm_update(
148 timer_longterm_t *tlp);
149 static void timer_longterm_update_locked(
150 timer_longterm_t *tlp);
151 static mpqueue_head_t * timer_longterm_enqueue_unlocked(
152 timer_call_t call,
153 uint64_t now,
154 uint64_t deadline,
155 mpqueue_head_t ** old_queue,
156 uint64_t soft_deadline,
157 uint64_t ttd,
158 timer_call_param_t param1,
159 uint32_t callout_flags);
160 static void timer_longterm_dequeued_locked(
161 timer_call_t call);
162
163 uint64_t past_deadline_timers;
164 uint64_t past_deadline_deltas;
165 uint64_t past_deadline_longest;
166 uint64_t past_deadline_shortest = ~0ULL;
167 enum {PAST_DEADLINE_TIMER_ADJUSTMENT_NS = 10 * 1000};
168
169 uint64_t past_deadline_timer_adjustment;
170
171 static boolean_t timer_call_enter_internal(timer_call_t call, timer_call_param_t param1, uint64_t deadline, uint64_t leeway, uint32_t flags, boolean_t ratelimited);
172 boolean_t mach_timer_coalescing_enabled = TRUE;
173
174 mpqueue_head_t *timer_call_enqueue_deadline_unlocked(
175 timer_call_t call,
176 mpqueue_head_t *queue,
177 uint64_t deadline,
178 uint64_t soft_deadline,
179 uint64_t ttd,
180 timer_call_param_t param1,
181 uint32_t flags);
182
183 mpqueue_head_t *timer_call_dequeue_unlocked(
184 timer_call_t call);
185
186 timer_coalescing_priority_params_t tcoal_prio_params;
187
188 #if TCOAL_PRIO_STATS
189 int32_t nc_tcl, rt_tcl, bg_tcl, kt_tcl, fp_tcl, ts_tcl, qos_tcl;
190 #define TCOAL_PRIO_STAT(x) (x++)
191 #else
192 #define TCOAL_PRIO_STAT(x)
193 #endif
194
195 static void
196 timer_call_init_abstime(void)
197 {
198 int i;
199 uint64_t result;
200 timer_coalescing_priority_params_ns_t * tcoal_prio_params_init = timer_call_get_priority_params();
201 nanoseconds_to_absolutetime(PAST_DEADLINE_TIMER_ADJUSTMENT_NS, &past_deadline_timer_adjustment);
202 nanoseconds_to_absolutetime(tcoal_prio_params_init->idle_entry_timer_processing_hdeadline_threshold_ns, &result);
203 tcoal_prio_params.idle_entry_timer_processing_hdeadline_threshold_abstime = (uint32_t)result;
204 nanoseconds_to_absolutetime(tcoal_prio_params_init->interrupt_timer_coalescing_ilat_threshold_ns, &result);
205 tcoal_prio_params.interrupt_timer_coalescing_ilat_threshold_abstime = (uint32_t)result;
206 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_resort_threshold_ns, &result);
207 tcoal_prio_params.timer_resort_threshold_abstime = (uint32_t)result;
208 tcoal_prio_params.timer_coalesce_rt_shift = tcoal_prio_params_init->timer_coalesce_rt_shift;
209 tcoal_prio_params.timer_coalesce_bg_shift = tcoal_prio_params_init->timer_coalesce_bg_shift;
210 tcoal_prio_params.timer_coalesce_kt_shift = tcoal_prio_params_init->timer_coalesce_kt_shift;
211 tcoal_prio_params.timer_coalesce_fp_shift = tcoal_prio_params_init->timer_coalesce_fp_shift;
212 tcoal_prio_params.timer_coalesce_ts_shift = tcoal_prio_params_init->timer_coalesce_ts_shift;
213
214 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_rt_ns_max,
215 &tcoal_prio_params.timer_coalesce_rt_abstime_max);
216 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_bg_ns_max,
217 &tcoal_prio_params.timer_coalesce_bg_abstime_max);
218 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_kt_ns_max,
219 &tcoal_prio_params.timer_coalesce_kt_abstime_max);
220 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_fp_ns_max,
221 &tcoal_prio_params.timer_coalesce_fp_abstime_max);
222 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_ts_ns_max,
223 &tcoal_prio_params.timer_coalesce_ts_abstime_max);
224
225 for (i = 0; i < NUM_LATENCY_QOS_TIERS; i++) {
226 tcoal_prio_params.latency_qos_scale[i] = tcoal_prio_params_init->latency_qos_scale[i];
227 nanoseconds_to_absolutetime(tcoal_prio_params_init->latency_qos_ns_max[i],
228 &tcoal_prio_params.latency_qos_abstime_max[i]);
229 tcoal_prio_params.latency_tier_rate_limited[i] = tcoal_prio_params_init->latency_tier_rate_limited[i];
230 }
231 }
232
233
234 void
235 timer_call_init(void)
236 {
237 timer_longterm_init();
238 timer_call_init_abstime();
239 }
240
241
242 void
243 timer_call_queue_init(mpqueue_head_t *queue)
244 {
245 DBG("timer_call_queue_init(%p)\n", queue);
246 mpqueue_init(queue, &timer_call_lck_grp, LCK_ATTR_NULL);
247 }
248
249
250 void
251 timer_call_setup(
252 timer_call_t call,
253 timer_call_func_t func,
254 timer_call_param_t param0)
255 {
256 DBG("timer_call_setup(%p,%p,%p)\n", call, func, param0);
257
258 *call = (struct timer_call) {
259 .tc_func = func,
260 .tc_param0 = param0,
261 .tc_async_dequeue = false,
262 };
263
264 simple_lock_init(&(call)->tc_lock, 0);
265 }
266
267 static mpqueue_head_t*
268 mpqueue_for_timer_call(timer_call_t entry)
269 {
270 queue_t queue_entry_is_on = entry->tc_queue;
271 /* 'cast' the queue back to the orignal mpqueue */
272 return __container_of(queue_entry_is_on, struct mpqueue_head, head);
273 }
274
275
276 static __inline__ mpqueue_head_t *
277 timer_call_entry_dequeue(
278 timer_call_t entry)
279 {
280 mpqueue_head_t *old_mpqueue = mpqueue_for_timer_call(entry);
281
282 /* The entry was always on a queue */
283 assert(old_mpqueue != NULL);
284
285 #if TIMER_ASSERT
286 if (!hw_lock_held((hw_lock_t)&entry->tc_lock)) {
287 panic("_call_entry_dequeue() "
288 "entry %p is not locked\n", entry);
289 }
290
291 /*
292 * XXX The queue lock is actually a mutex in spin mode
293 * but there's no way to test for it being held
294 * so we pretend it's a spinlock!
295 */
296 if (!hw_lock_held((hw_lock_t)&old_mpqueue->lock_data)) {
297 panic("_call_entry_dequeue() "
298 "queue %p is not locked\n", old_mpqueue);
299 }
300 #endif /* TIMER_ASSERT */
301
302 if (old_mpqueue != timer_longterm_queue) {
303 priority_queue_remove(&old_mpqueue->mpq_pqhead,
304 &entry->tc_pqlink);
305 }
306
307 remqueue(&entry->tc_qlink);
308
309 entry->tc_queue = NULL;
310
311 old_mpqueue->count--;
312
313 return old_mpqueue;
314 }
315
316 static __inline__ mpqueue_head_t *
317 timer_call_entry_enqueue_deadline(
318 timer_call_t entry,
319 mpqueue_head_t *new_mpqueue,
320 uint64_t deadline)
321 {
322 mpqueue_head_t *old_mpqueue = mpqueue_for_timer_call(entry);
323
324 #if TIMER_ASSERT
325 if (!hw_lock_held((hw_lock_t)&entry->tc_lock)) {
326 panic("_call_entry_enqueue_deadline() "
327 "entry %p is not locked\n", entry);
328 }
329
330 /* XXX More lock pretense: */
331 if (!hw_lock_held((hw_lock_t)&new_mpqueue->lock_data)) {
332 panic("_call_entry_enqueue_deadline() "
333 "queue %p is not locked\n", new_mpqueue);
334 }
335
336 if (old_mpqueue != NULL && old_mpqueue != new_mpqueue) {
337 panic("_call_entry_enqueue_deadline() "
338 "old_mpqueue %p != new_mpqueue", old_mpqueue);
339 }
340 #endif /* TIMER_ASSERT */
341
342 /* no longterm queue involved */
343 assert(new_mpqueue != timer_longterm_queue);
344 assert(old_mpqueue != timer_longterm_queue);
345
346 if (old_mpqueue == new_mpqueue) {
347 /* optimize the same-queue case to avoid a full re-insert */
348 uint64_t old_deadline = entry->tc_pqlink.deadline;
349 entry->tc_pqlink.deadline = deadline;
350
351 if (old_deadline < deadline) {
352 priority_queue_entry_increased(&new_mpqueue->mpq_pqhead,
353 &entry->tc_pqlink);
354 } else {
355 priority_queue_entry_decreased(&new_mpqueue->mpq_pqhead,
356 &entry->tc_pqlink);
357 }
358 } else {
359 if (old_mpqueue != NULL) {
360 priority_queue_remove(&old_mpqueue->mpq_pqhead,
361 &entry->tc_pqlink);
362
363 re_queue_tail(&new_mpqueue->head, &entry->tc_qlink);
364 } else {
365 enqueue_tail(&new_mpqueue->head, &entry->tc_qlink);
366 }
367
368 entry->tc_queue = &new_mpqueue->head;
369 entry->tc_pqlink.deadline = deadline;
370
371 priority_queue_insert(&new_mpqueue->mpq_pqhead, &entry->tc_pqlink);
372 }
373
374
375 /* For efficiency, track the earliest soft deadline on the queue,
376 * so that fuzzy decisions can be made without lock acquisitions.
377 */
378
379 timer_call_t thead = priority_queue_min(&new_mpqueue->mpq_pqhead, struct timer_call, tc_pqlink);
380
381 new_mpqueue->earliest_soft_deadline = thead->tc_flags & TIMER_CALL_RATELIMITED ? thead->tc_pqlink.deadline : thead->tc_soft_deadline;
382
383 if (old_mpqueue) {
384 old_mpqueue->count--;
385 }
386 new_mpqueue->count++;
387
388 return old_mpqueue;
389 }
390
391 static __inline__ void
392 timer_call_entry_enqueue_tail(
393 timer_call_t entry,
394 mpqueue_head_t *queue)
395 {
396 /* entry is always dequeued before this call */
397 assert(entry->tc_queue == NULL);
398
399 /*
400 * this is only used for timer_longterm_queue, which is unordered
401 * and thus needs no priority queueing
402 */
403 assert(queue == timer_longterm_queue);
404
405 enqueue_tail(&queue->head, &entry->tc_qlink);
406
407 entry->tc_queue = &queue->head;
408
409 queue->count++;
410 return;
411 }
412
413 /*
414 * Remove timer entry from its queue but don't change the queue pointer
415 * and set the async_dequeue flag. This is locking case 2b.
416 */
417 static __inline__ void
418 timer_call_entry_dequeue_async(
419 timer_call_t entry)
420 {
421 mpqueue_head_t *old_mpqueue = mpqueue_for_timer_call(entry);
422 if (old_mpqueue) {
423 old_mpqueue->count--;
424
425 if (old_mpqueue != timer_longterm_queue) {
426 priority_queue_remove(&old_mpqueue->mpq_pqhead,
427 &entry->tc_pqlink);
428 }
429
430 remqueue(&entry->tc_qlink);
431 entry->tc_async_dequeue = true;
432 }
433 return;
434 }
435
436 #if TIMER_ASSERT
437 unsigned timer_call_enqueue_deadline_unlocked_async1;
438 unsigned timer_call_enqueue_deadline_unlocked_async2;
439 #endif
440 /*
441 * Assumes call_entry and queues unlocked, interrupts disabled.
442 */
443 __inline__ mpqueue_head_t *
444 timer_call_enqueue_deadline_unlocked(
445 timer_call_t call,
446 mpqueue_head_t *queue,
447 uint64_t deadline,
448 uint64_t soft_deadline,
449 uint64_t ttd,
450 timer_call_param_t param1,
451 uint32_t callout_flags)
452 {
453 DBG("timer_call_enqueue_deadline_unlocked(%p,%p,)\n", call, queue);
454
455 simple_lock(&call->tc_lock, LCK_GRP_NULL);
456
457 mpqueue_head_t *old_queue = mpqueue_for_timer_call(call);
458
459 if (old_queue != NULL) {
460 timer_queue_lock_spin(old_queue);
461 if (call->tc_async_dequeue) {
462 /* collision (1c): timer already dequeued, clear flag */
463 #if TIMER_ASSERT
464 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
465 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
466 VM_KERNEL_UNSLIDE_OR_PERM(call),
467 call->tc_async_dequeue,
468 VM_KERNEL_UNSLIDE_OR_PERM(call->tc_queue),
469 0x1c, 0);
470 timer_call_enqueue_deadline_unlocked_async1++;
471 #endif
472 call->tc_async_dequeue = false;
473 call->tc_queue = NULL;
474 } else if (old_queue != queue) {
475 timer_call_entry_dequeue(call);
476 #if TIMER_ASSERT
477 timer_call_enqueue_deadline_unlocked_async2++;
478 #endif
479 }
480 if (old_queue == timer_longterm_queue) {
481 timer_longterm_dequeued_locked(call);
482 }
483 if (old_queue != queue) {
484 timer_queue_unlock(old_queue);
485 timer_queue_lock_spin(queue);
486 }
487 } else {
488 timer_queue_lock_spin(queue);
489 }
490
491 call->tc_soft_deadline = soft_deadline;
492 call->tc_flags = callout_flags;
493 call->tc_param1 = param1;
494 call->tc_ttd = ttd;
495
496 timer_call_entry_enqueue_deadline(call, queue, deadline);
497 timer_queue_unlock(queue);
498 simple_unlock(&call->tc_lock);
499
500 return old_queue;
501 }
502
503 #if TIMER_ASSERT
504 unsigned timer_call_dequeue_unlocked_async1;
505 unsigned timer_call_dequeue_unlocked_async2;
506 #endif
507 mpqueue_head_t *
508 timer_call_dequeue_unlocked(
509 timer_call_t call)
510 {
511 DBG("timer_call_dequeue_unlocked(%p)\n", call);
512
513 simple_lock(&call->tc_lock, LCK_GRP_NULL);
514
515 mpqueue_head_t *old_queue = mpqueue_for_timer_call(call);
516
517 #if TIMER_ASSERT
518 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
519 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
520 VM_KERNEL_UNSLIDE_OR_PERM(call),
521 call->tc_async_dequeue,
522 VM_KERNEL_UNSLIDE_OR_PERM(call->tc_queue),
523 0, 0);
524 #endif
525 if (old_queue != NULL) {
526 timer_queue_lock_spin(old_queue);
527 if (call->tc_async_dequeue) {
528 /* collision (1c): timer already dequeued, clear flag */
529 #if TIMER_ASSERT
530 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
531 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
532 VM_KERNEL_UNSLIDE_OR_PERM(call),
533 call->tc_async_dequeue,
534 VM_KERNEL_UNSLIDE_OR_PERM(call->tc_queue),
535 0x1c, 0);
536 timer_call_dequeue_unlocked_async1++;
537 #endif
538 call->tc_async_dequeue = false;
539 call->tc_queue = NULL;
540 } else {
541 timer_call_entry_dequeue(call);
542 }
543 if (old_queue == timer_longterm_queue) {
544 timer_longterm_dequeued_locked(call);
545 }
546 timer_queue_unlock(old_queue);
547 }
548 simple_unlock(&call->tc_lock);
549 return old_queue;
550 }
551
552 uint64_t
553 timer_call_past_deadline_timer_handle(uint64_t deadline, uint64_t ctime)
554 {
555 uint64_t delta = (ctime - deadline);
556
557 past_deadline_timers++;
558 past_deadline_deltas += delta;
559 if (delta > past_deadline_longest) {
560 past_deadline_longest = deadline;
561 }
562 if (delta < past_deadline_shortest) {
563 past_deadline_shortest = delta;
564 }
565
566 return ctime + past_deadline_timer_adjustment;
567 }
568
569 /*
570 * Timer call entry locking model
571 * ==============================
572 *
573 * Timer call entries are linked on per-cpu timer queues which are protected
574 * by the queue lock and the call entry lock. The locking protocol is:
575 *
576 * 0) The canonical locking order is timer call entry followed by queue.
577 *
578 * 1) With only the entry lock held, entry.queue is valid:
579 * 1a) NULL: the entry is not queued, or
580 * 1b) non-NULL: this queue must be locked before the entry is modified.
581 * After locking the queue, the call.async_dequeue flag must be checked:
582 * 1c) TRUE: the entry was removed from the queue by another thread
583 * and we must NULL the entry.queue and reset this flag, or
584 * 1d) FALSE: (ie. queued), the entry can be manipulated.
585 *
586 * 2) If a queue lock is obtained first, the queue is stable:
587 * 2a) If a try-lock of a queued entry succeeds, the call can be operated on
588 * and dequeued.
589 * 2b) If a try-lock fails, it indicates that another thread is attempting
590 * to change the entry and move it to a different position in this queue
591 * or to different queue. The entry can be dequeued but it should not be
592 * operated upon since it is being changed. Furthermore, we don't null
593 * the entry.queue pointer (protected by the entry lock we don't own).
594 * Instead, we set the async_dequeue flag -- see (1c).
595 * 2c) Same as 2b but occurring when a longterm timer is matured.
596 * 3) A callout's parameters (deadline, flags, parameters, soft deadline &c.)
597 * should be manipulated with the appropriate timer queue lock held,
598 * to prevent queue traversal observations from observing inconsistent
599 * updates to an in-flight callout.
600 */
601
602 /*
603 * In the debug case, we assert that the timer call locking protocol
604 * is being obeyed.
605 */
606
607 static boolean_t
608 timer_call_enter_internal(
609 timer_call_t call,
610 timer_call_param_t param1,
611 uint64_t deadline,
612 uint64_t leeway,
613 uint32_t flags,
614 boolean_t ratelimited)
615 {
616 mpqueue_head_t *queue = NULL;
617 mpqueue_head_t *old_queue;
618 spl_t s;
619 uint64_t slop;
620 uint32_t urgency;
621 uint64_t sdeadline, ttd;
622
623 assert(call->tc_func != NULL);
624 s = splclock();
625
626 sdeadline = deadline;
627 uint64_t ctime = mach_absolute_time();
628
629 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
630 DECR_TIMER_ENTER | DBG_FUNC_START,
631 VM_KERNEL_UNSLIDE_OR_PERM(call),
632 VM_KERNEL_ADDRHIDE(param1), deadline, flags, 0);
633
634 urgency = (flags & TIMER_CALL_URGENCY_MASK);
635
636 boolean_t slop_ratelimited = FALSE;
637 slop = timer_call_slop(deadline, ctime, urgency, current_thread(), &slop_ratelimited);
638
639 if ((flags & TIMER_CALL_LEEWAY) != 0 && leeway > slop) {
640 slop = leeway;
641 }
642
643 if (UINT64_MAX - deadline <= slop) {
644 deadline = UINT64_MAX;
645 } else {
646 deadline += slop;
647 }
648
649 if (__improbable(deadline < ctime)) {
650 deadline = timer_call_past_deadline_timer_handle(deadline, ctime);
651 sdeadline = deadline;
652 }
653
654 if (ratelimited || slop_ratelimited) {
655 flags |= TIMER_CALL_RATELIMITED;
656 } else {
657 flags &= ~TIMER_CALL_RATELIMITED;
658 }
659
660 ttd = sdeadline - ctime;
661 #if CONFIG_DTRACE
662 DTRACE_TMR7(callout__create, timer_call_func_t, call->tc_func,
663 timer_call_param_t, call->tc_param0, uint32_t, flags,
664 (deadline - sdeadline),
665 (ttd >> 32), (unsigned) (ttd & 0xFFFFFFFF), call);
666 #endif
667
668 /* Program timer callout parameters under the appropriate per-CPU or
669 * longterm queue lock. The callout may have been previously enqueued
670 * and in-flight on this or another timer queue.
671 */
672 if (!ratelimited && !slop_ratelimited) {
673 queue = timer_longterm_enqueue_unlocked(call, ctime, deadline, &old_queue, sdeadline, ttd, param1, flags);
674 }
675
676 if (queue == NULL) {
677 queue = timer_queue_assign(deadline);
678 old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline, sdeadline, ttd, param1, flags);
679 }
680
681 #if TIMER_TRACE
682 call->tc_entry_time = ctime;
683 #endif
684
685 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
686 DECR_TIMER_ENTER | DBG_FUNC_END,
687 VM_KERNEL_UNSLIDE_OR_PERM(call),
688 (old_queue != NULL), deadline, queue->count, 0);
689
690 splx(s);
691
692 return old_queue != NULL;
693 }
694
695 /*
696 * timer_call_*()
697 * return boolean indicating whether the call was previously queued.
698 */
699 boolean_t
700 timer_call_enter(
701 timer_call_t call,
702 uint64_t deadline,
703 uint32_t flags)
704 {
705 return timer_call_enter_internal(call, NULL, deadline, 0, flags, FALSE);
706 }
707
708 boolean_t
709 timer_call_enter1(
710 timer_call_t call,
711 timer_call_param_t param1,
712 uint64_t deadline,
713 uint32_t flags)
714 {
715 return timer_call_enter_internal(call, param1, deadline, 0, flags, FALSE);
716 }
717
718 boolean_t
719 timer_call_enter_with_leeway(
720 timer_call_t call,
721 timer_call_param_t param1,
722 uint64_t deadline,
723 uint64_t leeway,
724 uint32_t flags,
725 boolean_t ratelimited)
726 {
727 return timer_call_enter_internal(call, param1, deadline, leeway, flags, ratelimited);
728 }
729
730 boolean_t
731 timer_call_cancel(
732 timer_call_t call)
733 {
734 mpqueue_head_t *old_queue;
735 spl_t s;
736
737 s = splclock();
738
739 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
740 DECR_TIMER_CANCEL | DBG_FUNC_START,
741 VM_KERNEL_UNSLIDE_OR_PERM(call),
742 call->tc_pqlink.deadline, call->tc_soft_deadline, call->tc_flags, 0);
743
744 old_queue = timer_call_dequeue_unlocked(call);
745
746 if (old_queue != NULL) {
747 timer_queue_lock_spin(old_queue);
748
749 timer_call_t new_head = priority_queue_min(&old_queue->mpq_pqhead, struct timer_call, tc_pqlink);
750
751 if (new_head) {
752 timer_queue_cancel(old_queue, call->tc_pqlink.deadline, new_head->tc_pqlink.deadline);
753 old_queue->earliest_soft_deadline = new_head->tc_flags & TIMER_CALL_RATELIMITED ? new_head->tc_pqlink.deadline : new_head->tc_soft_deadline;
754 } else {
755 timer_queue_cancel(old_queue, call->tc_pqlink.deadline, UINT64_MAX);
756 old_queue->earliest_soft_deadline = UINT64_MAX;
757 }
758
759 timer_queue_unlock(old_queue);
760 }
761 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
762 DECR_TIMER_CANCEL | DBG_FUNC_END,
763 VM_KERNEL_UNSLIDE_OR_PERM(call),
764 VM_KERNEL_UNSLIDE_OR_PERM(old_queue),
765 call->tc_pqlink.deadline - mach_absolute_time(),
766 call->tc_pqlink.deadline - call->tc_entry_time, 0);
767 splx(s);
768
769 #if CONFIG_DTRACE
770 DTRACE_TMR6(callout__cancel, timer_call_func_t, call->tc_func,
771 timer_call_param_t, call->tc_param0, uint32_t, call->tc_flags, 0,
772 (call->tc_ttd >> 32), (unsigned) (call->tc_ttd & 0xFFFFFFFF));
773 #endif /* CONFIG_DTRACE */
774
775 return old_queue != NULL;
776 }
777
778 static uint32_t timer_queue_shutdown_lock_skips;
779 static uint32_t timer_queue_shutdown_discarded;
780
781 void
782 timer_queue_shutdown(
783 mpqueue_head_t *queue)
784 {
785 timer_call_t call;
786 mpqueue_head_t *new_queue;
787 spl_t s;
788
789
790 DBG("timer_queue_shutdown(%p)\n", queue);
791
792 s = splclock();
793
794 while (TRUE) {
795 timer_queue_lock_spin(queue);
796
797 call = qe_queue_first(&queue->head, struct timer_call, tc_qlink);
798
799 if (call == NULL) {
800 break;
801 }
802
803 if (!simple_lock_try(&call->tc_lock, LCK_GRP_NULL)) {
804 /*
805 * case (2b) lock order inversion, dequeue and skip
806 * Don't change the call_entry queue back-pointer
807 * but set the async_dequeue field.
808 */
809 timer_queue_shutdown_lock_skips++;
810 timer_call_entry_dequeue_async(call);
811 #if TIMER_ASSERT
812 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
813 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
814 VM_KERNEL_UNSLIDE_OR_PERM(call),
815 call->tc_async_dequeue,
816 VM_KERNEL_UNSLIDE_OR_PERM(call->tc_queue),
817 0x2b, 0);
818 #endif
819 timer_queue_unlock(queue);
820 continue;
821 }
822
823 boolean_t call_local = ((call->tc_flags & TIMER_CALL_LOCAL) != 0);
824
825 /* remove entry from old queue */
826 timer_call_entry_dequeue(call);
827 timer_queue_unlock(queue);
828
829 if (call_local == FALSE) {
830 /* and queue it on new, discarding LOCAL timers */
831 new_queue = timer_queue_assign(call->tc_pqlink.deadline);
832 timer_queue_lock_spin(new_queue);
833 timer_call_entry_enqueue_deadline(
834 call, new_queue, call->tc_pqlink.deadline);
835 timer_queue_unlock(new_queue);
836 } else {
837 timer_queue_shutdown_discarded++;
838 }
839
840 assert(call_local == FALSE);
841 simple_unlock(&call->tc_lock);
842 }
843
844 timer_queue_unlock(queue);
845 splx(s);
846 }
847
848
849 static uint32_t timer_queue_expire_lock_skips;
850 uint64_t
851 timer_queue_expire_with_options(
852 mpqueue_head_t *queue,
853 uint64_t deadline,
854 boolean_t rescan)
855 {
856 timer_call_t call = NULL;
857 uint32_t tc_iterations = 0;
858 DBG("timer_queue_expire(%p,)\n", queue);
859
860 /* 'rescan' means look at every timer in the list, instead of
861 * early-exiting when the head of the list expires in the future.
862 * when 'rescan' is true, iterate by linked list instead of priority queue.
863 *
864 * TODO: if we keep a deadline ordered and soft-deadline ordered
865 * priority queue, then it's no longer necessary to do that
866 */
867
868 uint64_t cur_deadline = deadline;
869 timer_queue_lock_spin(queue);
870
871 while (!queue_empty(&queue->head)) {
872 /* Upon processing one or more timer calls, refresh the
873 * deadline to account for time elapsed in the callout
874 */
875 if (++tc_iterations > 1) {
876 cur_deadline = mach_absolute_time();
877 }
878
879 if (call == NULL) {
880 if (rescan == FALSE) {
881 call = priority_queue_min(&queue->mpq_pqhead, struct timer_call, tc_pqlink);
882 } else {
883 call = qe_queue_first(&queue->head, struct timer_call, tc_qlink);
884 }
885 }
886
887 if (call->tc_soft_deadline <= cur_deadline) {
888 timer_call_func_t func;
889 timer_call_param_t param0, param1;
890
891 TCOAL_DEBUG(0xDDDD0000, queue->earliest_soft_deadline, call->tc_soft_deadline, 0, 0, 0);
892 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
893 DECR_TIMER_EXPIRE | DBG_FUNC_NONE,
894 VM_KERNEL_UNSLIDE_OR_PERM(call),
895 call->tc_soft_deadline,
896 call->tc_pqlink.deadline,
897 call->tc_entry_time, 0);
898
899 if ((call->tc_flags & TIMER_CALL_RATELIMITED) &&
900 (call->tc_pqlink.deadline > cur_deadline)) {
901 if (rescan == FALSE) {
902 break;
903 }
904 }
905
906 if (!simple_lock_try(&call->tc_lock, LCK_GRP_NULL)) {
907 /* case (2b) lock inversion, dequeue and skip */
908 timer_queue_expire_lock_skips++;
909 timer_call_entry_dequeue_async(call);
910 call = NULL;
911 continue;
912 }
913
914 timer_call_entry_dequeue(call);
915
916 func = call->tc_func;
917 param0 = call->tc_param0;
918 param1 = call->tc_param1;
919
920 simple_unlock(&call->tc_lock);
921 timer_queue_unlock(queue);
922
923 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
924 DECR_TIMER_CALLOUT | DBG_FUNC_START,
925 VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func),
926 VM_KERNEL_ADDRHIDE(param0),
927 VM_KERNEL_ADDRHIDE(param1),
928 0);
929
930 #if CONFIG_DTRACE
931 DTRACE_TMR7(callout__start, timer_call_func_t, func,
932 timer_call_param_t, param0, unsigned, call->tc_flags,
933 0, (call->tc_ttd >> 32),
934 (unsigned) (call->tc_ttd & 0xFFFFFFFF), call);
935 #endif
936 /* Maintain time-to-deadline in per-processor data
937 * structure for thread wakeup deadline statistics.
938 */
939 uint64_t *ttdp = &current_processor()->timer_call_ttd;
940 *ttdp = call->tc_ttd;
941 (*func)(param0, param1);
942 *ttdp = 0;
943 #if CONFIG_DTRACE
944 DTRACE_TMR4(callout__end, timer_call_func_t, func,
945 param0, param1, call);
946 #endif
947
948 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
949 DECR_TIMER_CALLOUT | DBG_FUNC_END,
950 VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func),
951 VM_KERNEL_ADDRHIDE(param0),
952 VM_KERNEL_ADDRHIDE(param1),
953 0);
954 call = NULL;
955 timer_queue_lock_spin(queue);
956 } else {
957 if (__probable(rescan == FALSE)) {
958 break;
959 } else {
960 int64_t skew = call->tc_pqlink.deadline - call->tc_soft_deadline;
961 assert(call->tc_pqlink.deadline >= call->tc_soft_deadline);
962
963 /* DRK: On a latency quality-of-service level change,
964 * re-sort potentially rate-limited timers. The platform
965 * layer determines which timers require
966 * this. In the absence of the per-callout
967 * synchronization requirement, a global resort could
968 * be more efficient. The re-sort effectively
969 * annuls all timer adjustments, i.e. the "soft
970 * deadline" is the sort key.
971 */
972
973 if (timer_resort_threshold(skew)) {
974 if (__probable(simple_lock_try(&call->tc_lock, LCK_GRP_NULL))) {
975 /* TODO: don't need to dequeue before enqueue */
976 timer_call_entry_dequeue(call);
977 timer_call_entry_enqueue_deadline(call, queue, call->tc_soft_deadline);
978 simple_unlock(&call->tc_lock);
979 call = NULL;
980 }
981 }
982 if (call) {
983 call = qe_queue_next(&queue->head, call, struct timer_call, tc_qlink);
984
985 if (call == NULL) {
986 break;
987 }
988 }
989 }
990 }
991 }
992
993 call = priority_queue_min(&queue->mpq_pqhead, struct timer_call, tc_pqlink);
994
995 if (call) {
996 cur_deadline = call->tc_pqlink.deadline;
997 queue->earliest_soft_deadline = (call->tc_flags & TIMER_CALL_RATELIMITED) ? call->tc_pqlink.deadline: call->tc_soft_deadline;
998 } else {
999 queue->earliest_soft_deadline = cur_deadline = UINT64_MAX;
1000 }
1001
1002 timer_queue_unlock(queue);
1003
1004 return cur_deadline;
1005 }
1006
1007 uint64_t
1008 timer_queue_expire(
1009 mpqueue_head_t *queue,
1010 uint64_t deadline)
1011 {
1012 return timer_queue_expire_with_options(queue, deadline, FALSE);
1013 }
1014
1015 extern int serverperfmode;
1016 static uint32_t timer_queue_migrate_lock_skips;
1017 /*
1018 * timer_queue_migrate() is called by timer_queue_migrate_cpu()
1019 * to move timer requests from the local processor (queue_from)
1020 * to a target processor's (queue_to).
1021 */
1022 int
1023 timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to)
1024 {
1025 timer_call_t call;
1026 timer_call_t head_to;
1027 int timers_migrated = 0;
1028
1029 DBG("timer_queue_migrate(%p,%p)\n", queue_from, queue_to);
1030
1031 assert(!ml_get_interrupts_enabled());
1032 assert(queue_from != queue_to);
1033
1034 if (serverperfmode) {
1035 /*
1036 * if we're running a high end server
1037 * avoid migrations... they add latency
1038 * and don't save us power under typical
1039 * server workloads
1040 */
1041 return -4;
1042 }
1043
1044 /*
1045 * Take both local (from) and target (to) timer queue locks while
1046 * moving the timers from the local queue to the target processor.
1047 * We assume that the target is always the boot processor.
1048 * But only move if all of the following is true:
1049 * - the target queue is non-empty
1050 * - the local queue is non-empty
1051 * - the local queue's first deadline is later than the target's
1052 * - the local queue contains no non-migrateable "local" call
1053 * so that we need not have the target resync.
1054 */
1055
1056 timer_queue_lock_spin(queue_to);
1057
1058 head_to = priority_queue_min(&queue_to->mpq_pqhead, struct timer_call, tc_pqlink);
1059
1060 if (head_to == NULL) {
1061 timers_migrated = -1;
1062 goto abort1;
1063 }
1064
1065 timer_queue_lock_spin(queue_from);
1066
1067 call = priority_queue_min(&queue_from->mpq_pqhead, struct timer_call, tc_pqlink);
1068
1069 if (call == NULL) {
1070 timers_migrated = -2;
1071 goto abort2;
1072 }
1073
1074 if (call->tc_pqlink.deadline < head_to->tc_pqlink.deadline) {
1075 timers_migrated = 0;
1076 goto abort2;
1077 }
1078
1079 /* perform scan for non-migratable timers */
1080 qe_foreach_element(call, &queue_from->head, tc_qlink) {
1081 if (call->tc_flags & TIMER_CALL_LOCAL) {
1082 timers_migrated = -3;
1083 goto abort2;
1084 }
1085 }
1086
1087 /* migration loop itself -- both queues are locked */
1088 qe_foreach_element_safe(call, &queue_from->head, tc_qlink) {
1089 if (!simple_lock_try(&call->tc_lock, LCK_GRP_NULL)) {
1090 /* case (2b) lock order inversion, dequeue only */
1091 #ifdef TIMER_ASSERT
1092 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1093 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
1094 VM_KERNEL_UNSLIDE_OR_PERM(call),
1095 VM_KERNEL_UNSLIDE_OR_PERM(call->tc_queue),
1096 0,
1097 0x2b, 0);
1098 #endif
1099 timer_queue_migrate_lock_skips++;
1100 timer_call_entry_dequeue_async(call);
1101 continue;
1102 }
1103 timer_call_entry_dequeue(call);
1104 timer_call_entry_enqueue_deadline(
1105 call, queue_to, call->tc_pqlink.deadline);
1106 timers_migrated++;
1107 simple_unlock(&call->tc_lock);
1108 }
1109 queue_from->earliest_soft_deadline = UINT64_MAX;
1110 abort2:
1111 timer_queue_unlock(queue_from);
1112 abort1:
1113 timer_queue_unlock(queue_to);
1114
1115 return timers_migrated;
1116 }
1117
1118 void
1119 timer_queue_trace_cpu(int ncpu)
1120 {
1121 timer_call_nosync_cpu(
1122 ncpu,
1123 (void (*)(void *))timer_queue_trace,
1124 (void*) timer_queue_cpu(ncpu));
1125 }
1126
1127 void
1128 timer_queue_trace(
1129 mpqueue_head_t *queue)
1130 {
1131 timer_call_t call;
1132 spl_t s;
1133
1134 if (!kdebug_enable) {
1135 return;
1136 }
1137
1138 s = splclock();
1139 timer_queue_lock_spin(queue);
1140
1141 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1142 DECR_TIMER_QUEUE | DBG_FUNC_START,
1143 queue->count, mach_absolute_time(), 0, 0, 0);
1144
1145 qe_foreach_element(call, &queue->head, tc_qlink) {
1146 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1147 DECR_TIMER_QUEUE | DBG_FUNC_NONE,
1148 call->tc_soft_deadline,
1149 call->tc_pqlink.deadline,
1150 call->tc_entry_time,
1151 VM_KERNEL_UNSLIDE(call->tc_func),
1152 0);
1153 }
1154
1155 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1156 DECR_TIMER_QUEUE | DBG_FUNC_END,
1157 queue->count, mach_absolute_time(), 0, 0, 0);
1158
1159 timer_queue_unlock(queue);
1160 splx(s);
1161 }
1162
1163 void
1164 timer_longterm_dequeued_locked(timer_call_t call)
1165 {
1166 timer_longterm_t *tlp = &timer_longterm;
1167
1168 tlp->dequeues++;
1169 if (call == tlp->threshold.call) {
1170 tlp->threshold.call = NULL;
1171 }
1172 }
1173
1174 /*
1175 * Place a timer call in the longterm list
1176 * and adjust the next timer callout deadline if the new timer is first.
1177 */
1178 mpqueue_head_t *
1179 timer_longterm_enqueue_unlocked(timer_call_t call,
1180 uint64_t now,
1181 uint64_t deadline,
1182 mpqueue_head_t **old_queue,
1183 uint64_t soft_deadline,
1184 uint64_t ttd,
1185 timer_call_param_t param1,
1186 uint32_t callout_flags)
1187 {
1188 timer_longterm_t *tlp = &timer_longterm;
1189 boolean_t update_required = FALSE;
1190 uint64_t longterm_threshold;
1191
1192 longterm_threshold = now + tlp->threshold.interval;
1193
1194 /*
1195 * Return NULL without doing anything if:
1196 * - this timer is local, or
1197 * - the longterm mechanism is disabled, or
1198 * - this deadline is too short.
1199 */
1200 if ((callout_flags & TIMER_CALL_LOCAL) != 0 ||
1201 (tlp->threshold.interval == TIMER_LONGTERM_NONE) ||
1202 (deadline <= longterm_threshold)) {
1203 return NULL;
1204 }
1205
1206 /*
1207 * Remove timer from its current queue, if any.
1208 */
1209 *old_queue = timer_call_dequeue_unlocked(call);
1210
1211 /*
1212 * Lock the longterm queue, queue timer and determine
1213 * whether an update is necessary.
1214 */
1215 assert(!ml_get_interrupts_enabled());
1216 simple_lock(&call->tc_lock, LCK_GRP_NULL);
1217 timer_queue_lock_spin(timer_longterm_queue);
1218 call->tc_pqlink.deadline = deadline;
1219 call->tc_param1 = param1;
1220 call->tc_ttd = ttd;
1221 call->tc_soft_deadline = soft_deadline;
1222 call->tc_flags = callout_flags;
1223 timer_call_entry_enqueue_tail(call, timer_longterm_queue);
1224
1225 tlp->enqueues++;
1226
1227 /*
1228 * We'll need to update the currently set threshold timer
1229 * if the new deadline is sooner and no sooner update is in flight.
1230 */
1231 if (deadline < tlp->threshold.deadline &&
1232 deadline < tlp->threshold.preempted) {
1233 tlp->threshold.preempted = deadline;
1234 tlp->threshold.call = call;
1235 update_required = TRUE;
1236 }
1237 timer_queue_unlock(timer_longterm_queue);
1238 simple_unlock(&call->tc_lock);
1239
1240 if (update_required) {
1241 /*
1242 * Note: this call expects that calling the master cpu
1243 * alone does not involve locking the topo lock.
1244 */
1245 timer_call_nosync_cpu(
1246 master_cpu,
1247 (void (*)(void *))timer_longterm_update,
1248 (void *)tlp);
1249 }
1250
1251 return timer_longterm_queue;
1252 }
1253
1254 /*
1255 * Scan for timers below the longterm threshold.
1256 * Move these to the local timer queue (of the boot processor on which the
1257 * calling thread is running).
1258 * Both the local (boot) queue and the longterm queue are locked.
1259 * The scan is similar to the timer migrate sequence but is performed by
1260 * successively examining each timer on the longterm queue:
1261 * - if within the short-term threshold
1262 * - enter on the local queue (unless being deleted),
1263 * - otherwise:
1264 * - if sooner, deadline becomes the next threshold deadline.
1265 * The total scan time is limited to TIMER_LONGTERM_SCAN_LIMIT. Should this be
1266 * exceeded, we abort and reschedule again so that we don't shut others from
1267 * the timer queues. Longterm timers firing late is not critical.
1268 */
1269 void
1270 timer_longterm_scan(timer_longterm_t *tlp,
1271 uint64_t time_start)
1272 {
1273 timer_call_t call;
1274 uint64_t threshold;
1275 uint64_t deadline;
1276 uint64_t time_limit = time_start + tlp->scan_limit;
1277 mpqueue_head_t *timer_master_queue;
1278
1279 assert(!ml_get_interrupts_enabled());
1280 assert(cpu_number() == master_cpu);
1281
1282 if (tlp->threshold.interval != TIMER_LONGTERM_NONE) {
1283 threshold = time_start + tlp->threshold.interval;
1284 }
1285
1286 tlp->threshold.deadline = TIMER_LONGTERM_NONE;
1287 tlp->threshold.call = NULL;
1288
1289 if (queue_empty(&timer_longterm_queue->head)) {
1290 return;
1291 }
1292
1293 timer_master_queue = timer_queue_cpu(master_cpu);
1294 timer_queue_lock_spin(timer_master_queue);
1295
1296 qe_foreach_element_safe(call, &timer_longterm_queue->head, tc_qlink) {
1297 deadline = call->tc_soft_deadline;
1298 if (!simple_lock_try(&call->tc_lock, LCK_GRP_NULL)) {
1299 /* case (2c) lock order inversion, dequeue only */
1300 #ifdef TIMER_ASSERT
1301 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1302 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
1303 VM_KERNEL_UNSLIDE_OR_PERM(call),
1304 VM_KERNEL_UNSLIDE_OR_PERM(call->tc_queue),
1305 0,
1306 0x2c, 0);
1307 #endif
1308 timer_call_entry_dequeue_async(call);
1309 continue;
1310 }
1311 if (deadline < threshold) {
1312 /*
1313 * This timer needs moving (escalating)
1314 * to the local (boot) processor's queue.
1315 */
1316 #ifdef TIMER_ASSERT
1317 if (deadline < time_start) {
1318 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1319 DECR_TIMER_OVERDUE | DBG_FUNC_NONE,
1320 VM_KERNEL_UNSLIDE_OR_PERM(call),
1321 deadline,
1322 time_start,
1323 threshold,
1324 0);
1325 }
1326 #endif
1327 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1328 DECR_TIMER_ESCALATE | DBG_FUNC_NONE,
1329 VM_KERNEL_UNSLIDE_OR_PERM(call),
1330 call->tc_pqlink.deadline,
1331 call->tc_entry_time,
1332 VM_KERNEL_UNSLIDE(call->tc_func),
1333 0);
1334 tlp->escalates++;
1335 timer_call_entry_dequeue(call);
1336 timer_call_entry_enqueue_deadline(
1337 call, timer_master_queue, call->tc_pqlink.deadline);
1338 /*
1339 * A side-effect of the following call is to update
1340 * the actual hardware deadline if required.
1341 */
1342 (void) timer_queue_assign(deadline);
1343 } else {
1344 if (deadline < tlp->threshold.deadline) {
1345 tlp->threshold.deadline = deadline;
1346 tlp->threshold.call = call;
1347 }
1348 }
1349 simple_unlock(&call->tc_lock);
1350
1351 /* Abort scan if we're taking too long. */
1352 if (mach_absolute_time() > time_limit) {
1353 tlp->threshold.deadline = TIMER_LONGTERM_SCAN_AGAIN;
1354 tlp->scan_pauses++;
1355 DBG("timer_longterm_scan() paused %llu, qlen: %llu\n",
1356 time_limit, tlp->queue.count);
1357 break;
1358 }
1359 }
1360
1361 timer_queue_unlock(timer_master_queue);
1362 }
1363
1364 void
1365 timer_longterm_callout(timer_call_param_t p0, __unused timer_call_param_t p1)
1366 {
1367 timer_longterm_t *tlp = (timer_longterm_t *) p0;
1368
1369 timer_longterm_update(tlp);
1370 }
1371
1372 void
1373 timer_longterm_update_locked(timer_longterm_t *tlp)
1374 {
1375 uint64_t latency;
1376
1377 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1378 DECR_TIMER_UPDATE | DBG_FUNC_START,
1379 VM_KERNEL_UNSLIDE_OR_PERM(&tlp->queue),
1380 tlp->threshold.deadline,
1381 tlp->threshold.preempted,
1382 tlp->queue.count, 0);
1383
1384 tlp->scan_time = mach_absolute_time();
1385 if (tlp->threshold.preempted != TIMER_LONGTERM_NONE) {
1386 tlp->threshold.preempts++;
1387 tlp->threshold.deadline = tlp->threshold.preempted;
1388 tlp->threshold.preempted = TIMER_LONGTERM_NONE;
1389 /*
1390 * Note: in the unlikely event that a pre-empted timer has
1391 * itself been cancelled, we'll simply re-scan later at the
1392 * time of the preempted/cancelled timer.
1393 */
1394 } else {
1395 tlp->threshold.scans++;
1396
1397 /*
1398 * Maintain a moving average of our wakeup latency.
1399 * Clamp latency to 0 and ignore above threshold interval.
1400 */
1401 if (tlp->scan_time > tlp->threshold.deadline_set) {
1402 latency = tlp->scan_time - tlp->threshold.deadline_set;
1403 } else {
1404 latency = 0;
1405 }
1406 if (latency < tlp->threshold.interval) {
1407 tlp->threshold.latency_min =
1408 MIN(tlp->threshold.latency_min, latency);
1409 tlp->threshold.latency_max =
1410 MAX(tlp->threshold.latency_max, latency);
1411 tlp->threshold.latency =
1412 (tlp->threshold.latency * 99 + latency) / 100;
1413 }
1414
1415 timer_longterm_scan(tlp, tlp->scan_time);
1416 }
1417
1418 tlp->threshold.deadline_set = tlp->threshold.deadline;
1419 /* The next deadline timer to be set is adjusted */
1420 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE &&
1421 tlp->threshold.deadline != TIMER_LONGTERM_SCAN_AGAIN) {
1422 tlp->threshold.deadline_set -= tlp->threshold.margin;
1423 tlp->threshold.deadline_set -= tlp->threshold.latency;
1424 }
1425
1426 /* Throttle next scan time */
1427 uint64_t scan_clamp = mach_absolute_time() + tlp->scan_interval;
1428 if (tlp->threshold.deadline_set < scan_clamp) {
1429 tlp->threshold.deadline_set = scan_clamp;
1430 }
1431
1432 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1433 DECR_TIMER_UPDATE | DBG_FUNC_END,
1434 VM_KERNEL_UNSLIDE_OR_PERM(&tlp->queue),
1435 tlp->threshold.deadline,
1436 tlp->threshold.scans,
1437 tlp->queue.count, 0);
1438 }
1439
1440 void
1441 timer_longterm_update(timer_longterm_t *tlp)
1442 {
1443 spl_t s = splclock();
1444
1445 timer_queue_lock_spin(timer_longterm_queue);
1446
1447 if (cpu_number() != master_cpu) {
1448 panic("timer_longterm_update_master() on non-boot cpu");
1449 }
1450
1451 timer_longterm_update_locked(tlp);
1452
1453 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) {
1454 timer_call_enter(
1455 &tlp->threshold.timer,
1456 tlp->threshold.deadline_set,
1457 TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL);
1458 }
1459
1460 timer_queue_unlock(timer_longterm_queue);
1461 splx(s);
1462 }
1463
1464 void
1465 timer_longterm_init(void)
1466 {
1467 uint32_t longterm;
1468 timer_longterm_t *tlp = &timer_longterm;
1469
1470 DBG("timer_longterm_init() tlp: %p, queue: %p\n", tlp, &tlp->queue);
1471
1472 /*
1473 * Set the longterm timer threshold. Defaults to TIMER_LONGTERM_THRESHOLD
1474 * or TIMER_LONGTERM_NONE (disabled) for server;
1475 * overridden longterm boot-arg
1476 */
1477 tlp->threshold.interval = serverperfmode ? TIMER_LONGTERM_NONE
1478 : TIMER_LONGTERM_THRESHOLD;
1479 if (PE_parse_boot_argn("longterm", &longterm, sizeof(longterm))) {
1480 tlp->threshold.interval = (longterm == 0) ?
1481 TIMER_LONGTERM_NONE :
1482 longterm * NSEC_PER_MSEC;
1483 }
1484 if (tlp->threshold.interval != TIMER_LONGTERM_NONE) {
1485 printf("Longterm timer threshold: %llu ms\n",
1486 tlp->threshold.interval / NSEC_PER_MSEC);
1487 kprintf("Longterm timer threshold: %llu ms\n",
1488 tlp->threshold.interval / NSEC_PER_MSEC);
1489 nanoseconds_to_absolutetime(tlp->threshold.interval,
1490 &tlp->threshold.interval);
1491 tlp->threshold.margin = tlp->threshold.interval / 10;
1492 tlp->threshold.latency_min = EndOfAllTime;
1493 tlp->threshold.latency_max = 0;
1494 }
1495
1496 tlp->threshold.preempted = TIMER_LONGTERM_NONE;
1497 tlp->threshold.deadline = TIMER_LONGTERM_NONE;
1498
1499 mpqueue_init(&tlp->queue, &timer_longterm_lck_grp, LCK_ATTR_NULL);
1500
1501 timer_call_setup(&tlp->threshold.timer,
1502 timer_longterm_callout, (timer_call_param_t) tlp);
1503
1504 timer_longterm_queue = &tlp->queue;
1505 }
1506
1507 enum {
1508 THRESHOLD, QCOUNT,
1509 ENQUEUES, DEQUEUES, ESCALATES, SCANS, PREEMPTS,
1510 LATENCY, LATENCY_MIN, LATENCY_MAX, SCAN_LIMIT, SCAN_INTERVAL, PAUSES
1511 };
1512 uint64_t
1513 timer_sysctl_get(int oid)
1514 {
1515 timer_longterm_t *tlp = &timer_longterm;
1516
1517 switch (oid) {
1518 case THRESHOLD:
1519 return (tlp->threshold.interval == TIMER_LONGTERM_NONE) ?
1520 0 : tlp->threshold.interval / NSEC_PER_MSEC;
1521 case QCOUNT:
1522 return tlp->queue.count;
1523 case ENQUEUES:
1524 return tlp->enqueues;
1525 case DEQUEUES:
1526 return tlp->dequeues;
1527 case ESCALATES:
1528 return tlp->escalates;
1529 case SCANS:
1530 return tlp->threshold.scans;
1531 case PREEMPTS:
1532 return tlp->threshold.preempts;
1533 case LATENCY:
1534 return tlp->threshold.latency;
1535 case LATENCY_MIN:
1536 return tlp->threshold.latency_min;
1537 case LATENCY_MAX:
1538 return tlp->threshold.latency_max;
1539 case SCAN_LIMIT:
1540 return tlp->scan_limit;
1541 case SCAN_INTERVAL:
1542 return tlp->scan_interval;
1543 case PAUSES:
1544 return tlp->scan_pauses;
1545 default:
1546 return 0;
1547 }
1548 }
1549
1550 /*
1551 * timer_master_scan() is the inverse of timer_longterm_scan()
1552 * since it un-escalates timers to the longterm queue.
1553 */
1554 static void
1555 timer_master_scan(timer_longterm_t *tlp,
1556 uint64_t now)
1557 {
1558 timer_call_t call;
1559 uint64_t threshold;
1560 uint64_t deadline;
1561 mpqueue_head_t *timer_master_queue;
1562
1563 if (tlp->threshold.interval != TIMER_LONGTERM_NONE) {
1564 threshold = now + tlp->threshold.interval;
1565 } else {
1566 threshold = TIMER_LONGTERM_NONE;
1567 }
1568
1569 timer_master_queue = timer_queue_cpu(master_cpu);
1570 timer_queue_lock_spin(timer_master_queue);
1571
1572 qe_foreach_element_safe(call, &timer_master_queue->head, tc_qlink) {
1573 deadline = call->tc_pqlink.deadline;
1574 if ((call->tc_flags & TIMER_CALL_LOCAL) != 0) {
1575 continue;
1576 }
1577 if (!simple_lock_try(&call->tc_lock, LCK_GRP_NULL)) {
1578 /* case (2c) lock order inversion, dequeue only */
1579 timer_call_entry_dequeue_async(call);
1580 continue;
1581 }
1582 if (deadline > threshold) {
1583 /* move from master to longterm */
1584 timer_call_entry_dequeue(call);
1585 timer_call_entry_enqueue_tail(call, timer_longterm_queue);
1586 if (deadline < tlp->threshold.deadline) {
1587 tlp->threshold.deadline = deadline;
1588 tlp->threshold.call = call;
1589 }
1590 }
1591 simple_unlock(&call->tc_lock);
1592 }
1593 timer_queue_unlock(timer_master_queue);
1594 }
1595
1596 static void
1597 timer_sysctl_set_threshold(uint64_t value)
1598 {
1599 timer_longterm_t *tlp = &timer_longterm;
1600 spl_t s = splclock();
1601 boolean_t threshold_increase;
1602
1603 timer_queue_lock_spin(timer_longterm_queue);
1604
1605 timer_call_cancel(&tlp->threshold.timer);
1606
1607 /*
1608 * Set the new threshold and note whther it's increasing.
1609 */
1610 if (value == 0) {
1611 tlp->threshold.interval = TIMER_LONGTERM_NONE;
1612 threshold_increase = TRUE;
1613 timer_call_cancel(&tlp->threshold.timer);
1614 } else {
1615 uint64_t old_interval = tlp->threshold.interval;
1616 tlp->threshold.interval = value * NSEC_PER_MSEC;
1617 nanoseconds_to_absolutetime(tlp->threshold.interval,
1618 &tlp->threshold.interval);
1619 tlp->threshold.margin = tlp->threshold.interval / 10;
1620 if (old_interval == TIMER_LONGTERM_NONE) {
1621 threshold_increase = FALSE;
1622 } else {
1623 threshold_increase = (tlp->threshold.interval > old_interval);
1624 }
1625 }
1626
1627 if (threshold_increase /* or removal */) {
1628 /* Escalate timers from the longterm queue */
1629 timer_longterm_scan(tlp, mach_absolute_time());
1630 } else { /* decrease or addition */
1631 /*
1632 * We scan the local/master queue for timers now longterm.
1633 * To be strictly correct, we should scan all processor queues
1634 * but timer migration results in most timers gravitating to the
1635 * master processor in any case.
1636 */
1637 timer_master_scan(tlp, mach_absolute_time());
1638 }
1639
1640 /* Set new timer accordingly */
1641 tlp->threshold.deadline_set = tlp->threshold.deadline;
1642 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) {
1643 tlp->threshold.deadline_set -= tlp->threshold.margin;
1644 tlp->threshold.deadline_set -= tlp->threshold.latency;
1645 timer_call_enter(
1646 &tlp->threshold.timer,
1647 tlp->threshold.deadline_set,
1648 TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL);
1649 }
1650
1651 /* Reset stats */
1652 tlp->enqueues = 0;
1653 tlp->dequeues = 0;
1654 tlp->escalates = 0;
1655 tlp->scan_pauses = 0;
1656 tlp->threshold.scans = 0;
1657 tlp->threshold.preempts = 0;
1658 tlp->threshold.latency = 0;
1659 tlp->threshold.latency_min = EndOfAllTime;
1660 tlp->threshold.latency_max = 0;
1661
1662 timer_queue_unlock(timer_longterm_queue);
1663 splx(s);
1664 }
1665
1666 int
1667 timer_sysctl_set(int oid, uint64_t value)
1668 {
1669 switch (oid) {
1670 case THRESHOLD:
1671 timer_call_cpu(
1672 master_cpu,
1673 (void (*)(void *))timer_sysctl_set_threshold,
1674 (void *) value);
1675 return KERN_SUCCESS;
1676 case SCAN_LIMIT:
1677 timer_longterm.scan_limit = value;
1678 return KERN_SUCCESS;
1679 case SCAN_INTERVAL:
1680 timer_longterm.scan_interval = value;
1681 return KERN_SUCCESS;
1682 default:
1683 return KERN_INVALID_ARGUMENT;
1684 }
1685 }
1686
1687
1688 /* Select timer coalescing window based on per-task quality-of-service hints */
1689 static boolean_t
1690 tcoal_qos_adjust(thread_t t, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited)
1691 {
1692 uint32_t latency_qos;
1693 boolean_t adjusted = FALSE;
1694 task_t ctask = t->task;
1695
1696 if (ctask) {
1697 latency_qos = proc_get_effective_thread_policy(t, TASK_POLICY_LATENCY_QOS);
1698
1699 assert(latency_qos <= NUM_LATENCY_QOS_TIERS);
1700
1701 if (latency_qos) {
1702 *tshift = tcoal_prio_params.latency_qos_scale[latency_qos - 1];
1703 *tmax_abstime = tcoal_prio_params.latency_qos_abstime_max[latency_qos - 1];
1704 *pratelimited = tcoal_prio_params.latency_tier_rate_limited[latency_qos - 1];
1705 adjusted = TRUE;
1706 }
1707 }
1708 return adjusted;
1709 }
1710
1711
1712 /* Adjust timer deadlines based on priority of the thread and the
1713 * urgency value provided at timeout establishment. With this mechanism,
1714 * timers are no longer necessarily sorted in order of soft deadline
1715 * on a given timer queue, i.e. they may be differentially skewed.
1716 * In the current scheme, this could lead to fewer pending timers
1717 * processed than is technically possible when the HW deadline arrives.
1718 */
1719 static void
1720 timer_compute_leeway(thread_t cthread, int32_t urgency, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited)
1721 {
1722 int16_t tpri = cthread->sched_pri;
1723 if ((urgency & TIMER_CALL_USER_MASK) != 0) {
1724 if (tpri >= BASEPRI_RTQUEUES ||
1725 urgency == TIMER_CALL_USER_CRITICAL) {
1726 *tshift = tcoal_prio_params.timer_coalesce_rt_shift;
1727 *tmax_abstime = tcoal_prio_params.timer_coalesce_rt_abstime_max;
1728 TCOAL_PRIO_STAT(rt_tcl);
1729 } else if (proc_get_effective_thread_policy(cthread, TASK_POLICY_DARWIN_BG) ||
1730 (urgency == TIMER_CALL_USER_BACKGROUND)) {
1731 /* Determine if timer should be subjected to a lower QoS */
1732 if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) {
1733 if (*tmax_abstime > tcoal_prio_params.timer_coalesce_bg_abstime_max) {
1734 return;
1735 } else {
1736 *pratelimited = FALSE;
1737 }
1738 }
1739 *tshift = tcoal_prio_params.timer_coalesce_bg_shift;
1740 *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max;
1741 TCOAL_PRIO_STAT(bg_tcl);
1742 } else if (tpri >= MINPRI_KERNEL) {
1743 *tshift = tcoal_prio_params.timer_coalesce_kt_shift;
1744 *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max;
1745 TCOAL_PRIO_STAT(kt_tcl);
1746 } else if (cthread->sched_mode == TH_MODE_FIXED) {
1747 *tshift = tcoal_prio_params.timer_coalesce_fp_shift;
1748 *tmax_abstime = tcoal_prio_params.timer_coalesce_fp_abstime_max;
1749 TCOAL_PRIO_STAT(fp_tcl);
1750 } else if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) {
1751 TCOAL_PRIO_STAT(qos_tcl);
1752 } else if (cthread->sched_mode == TH_MODE_TIMESHARE) {
1753 *tshift = tcoal_prio_params.timer_coalesce_ts_shift;
1754 *tmax_abstime = tcoal_prio_params.timer_coalesce_ts_abstime_max;
1755 TCOAL_PRIO_STAT(ts_tcl);
1756 } else {
1757 TCOAL_PRIO_STAT(nc_tcl);
1758 }
1759 } else if (urgency == TIMER_CALL_SYS_BACKGROUND) {
1760 *tshift = tcoal_prio_params.timer_coalesce_bg_shift;
1761 *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max;
1762 TCOAL_PRIO_STAT(bg_tcl);
1763 } else {
1764 *tshift = tcoal_prio_params.timer_coalesce_kt_shift;
1765 *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max;
1766 TCOAL_PRIO_STAT(kt_tcl);
1767 }
1768 }
1769
1770
1771 int timer_user_idle_level;
1772
1773 uint64_t
1774 timer_call_slop(uint64_t deadline, uint64_t now, uint32_t flags, thread_t cthread, boolean_t *pratelimited)
1775 {
1776 int32_t tcs_shift = 0;
1777 uint64_t tcs_max_abstime = 0;
1778 uint64_t adjval;
1779 uint32_t urgency = (flags & TIMER_CALL_URGENCY_MASK);
1780
1781 if (mach_timer_coalescing_enabled &&
1782 (deadline > now) && (urgency != TIMER_CALL_SYS_CRITICAL)) {
1783 timer_compute_leeway(cthread, urgency, &tcs_shift, &tcs_max_abstime, pratelimited);
1784
1785 if (tcs_shift >= 0) {
1786 adjval = MIN((deadline - now) >> tcs_shift, tcs_max_abstime);
1787 } else {
1788 adjval = MIN((deadline - now) << (-tcs_shift), tcs_max_abstime);
1789 }
1790 /* Apply adjustments derived from "user idle level" heuristic */
1791 adjval += (adjval * timer_user_idle_level) >> 7;
1792 return adjval;
1793 } else {
1794 return 0;
1795 }
1796 }
1797
1798 int
1799 timer_get_user_idle_level(void)
1800 {
1801 return timer_user_idle_level;
1802 }
1803
1804 kern_return_t
1805 timer_set_user_idle_level(int ilevel)
1806 {
1807 boolean_t do_reeval = FALSE;
1808
1809 if ((ilevel < 0) || (ilevel > 128)) {
1810 return KERN_INVALID_ARGUMENT;
1811 }
1812
1813 if (ilevel < timer_user_idle_level) {
1814 do_reeval = TRUE;
1815 }
1816
1817 timer_user_idle_level = ilevel;
1818
1819 if (do_reeval) {
1820 ml_timer_evaluate();
1821 }
1822
1823 return KERN_SUCCESS;
1824 }
1825
1826 #pragma mark - running timers
1827
1828 #define RUNNING_TIMER_FAKE_FLAGS (TIMER_CALL_SYS_CRITICAL | \
1829 TIMER_CALL_LOCAL)
1830
1831 /*
1832 * timer_call_trace_* functions mimic the tracing behavior from the normal
1833 * timer_call subsystem, so tools continue to function.
1834 */
1835
1836 static void
1837 timer_call_trace_enter_before(struct timer_call *call, uint64_t deadline,
1838 uint32_t flags, uint64_t now)
1839 {
1840 #pragma unused(call, deadline, flags, now)
1841 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_ENTER | DBG_FUNC_START,
1842 VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_ADDRHIDE(call->tc_param1),
1843 deadline, flags, 0);
1844 #if CONFIG_DTRACE
1845 uint64_t ttd = deadline - now;
1846 DTRACE_TMR7(callout__create, timer_call_func_t, call->tc_func,
1847 timer_call_param_t, call->tc_param0, uint32_t, flags, 0,
1848 (ttd >> 32), (unsigned int)(ttd & 0xFFFFFFFF), NULL);
1849 #endif /* CONFIG_DTRACE */
1850 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_ENTER | DBG_FUNC_END,
1851 VM_KERNEL_UNSLIDE_OR_PERM(call), 0, deadline, 0, 0);
1852 }
1853
1854 static void
1855 timer_call_trace_enter_after(struct timer_call *call, uint64_t deadline)
1856 {
1857 #pragma unused(call, deadline)
1858 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_ENTER | DBG_FUNC_END,
1859 VM_KERNEL_UNSLIDE_OR_PERM(call), 0, deadline, 0, 0);
1860 }
1861
1862 static void
1863 timer_call_trace_cancel(struct timer_call *call)
1864 {
1865 #pragma unused(call)
1866 __unused uint64_t deadline = call->tc_pqlink.deadline;
1867 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_CANCEL | DBG_FUNC_START,
1868 VM_KERNEL_UNSLIDE_OR_PERM(call), deadline, 0,
1869 call->tc_flags, 0);
1870 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_CANCEL | DBG_FUNC_END,
1871 VM_KERNEL_UNSLIDE_OR_PERM(call), 0, deadline - mach_absolute_time(),
1872 deadline - call->tc_entry_time, 0);
1873 #if CONFIG_DTRACE
1874 #if TIMER_TRACE
1875 uint64_t ttd = deadline - call->tc_entry_time;
1876 #else
1877 uint64_t ttd = UINT64_MAX;
1878 #endif /* TIMER_TRACE */
1879 DTRACE_TMR6(callout__cancel, timer_call_func_t, call->tc_func,
1880 timer_call_param_t, call->tc_param0, uint32_t, call->tc_flags, 0,
1881 (ttd >> 32), (unsigned int)(ttd & 0xFFFFFFFF));
1882 #endif /* CONFIG_DTRACE */
1883 }
1884
1885 static void
1886 timer_call_trace_expire_entry(struct timer_call *call)
1887 {
1888 #pragma unused(call)
1889 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_CALLOUT | DBG_FUNC_START,
1890 VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(call->tc_func),
1891 VM_KERNEL_ADDRHIDE(call->tc_param0),
1892 VM_KERNEL_ADDRHIDE(call->tc_param1),
1893 0);
1894 #if CONFIG_DTRACE
1895 #if TIMER_TRACE
1896 uint64_t ttd = call->tc_pqlink.deadline - call->tc_entry_time;
1897 #else /* TIMER_TRACE */
1898 uint64_t ttd = UINT64_MAX;
1899 #endif /* TIMER_TRACE */
1900 DTRACE_TMR7(callout__start, timer_call_func_t, call->tc_func,
1901 timer_call_param_t, call->tc_param0, unsigned, call->tc_flags,
1902 0, (ttd >> 32), (unsigned int)(ttd & 0xFFFFFFFF), NULL);
1903 #endif /* CONFIG_DTRACE */
1904 }
1905
1906 static void
1907 timer_call_trace_expire_return(struct timer_call *call)
1908 {
1909 #pragma unused(call)
1910 #if CONFIG_DTRACE
1911 DTRACE_TMR4(callout__end, timer_call_func_t, call->tc_func,
1912 call->tc_param0, call->tc_param1, NULL);
1913 #endif /* CONFIG_DTRACE */
1914 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_CALLOUT | DBG_FUNC_END,
1915 VM_KERNEL_UNSLIDE_OR_PERM(call),
1916 VM_KERNEL_UNSLIDE(call->tc_func),
1917 VM_KERNEL_ADDRHIDE(call->tc_param0),
1918 VM_KERNEL_ADDRHIDE(call->tc_param1),
1919 0);
1920 }
1921
1922 /*
1923 * Set a new deadline for a running timer on this processor.
1924 */
1925 void
1926 running_timer_setup(processor_t processor, enum running_timer timer,
1927 void *param, uint64_t deadline, uint64_t now)
1928 {
1929 assert(timer < RUNNING_TIMER_MAX);
1930 assert(ml_get_interrupts_enabled() == FALSE);
1931
1932 struct timer_call *call = &processor->running_timers[timer];
1933
1934 timer_call_trace_enter_before(call, deadline, RUNNING_TIMER_FAKE_FLAGS,
1935 now);
1936
1937 if (__improbable(deadline < now)) {
1938 deadline = timer_call_past_deadline_timer_handle(deadline, now);
1939 }
1940
1941 call->tc_pqlink.deadline = deadline;
1942 #if TIMER_TRACE
1943 call->tc_entry_time = now;
1944 #endif /* TIMER_TRACE */
1945 call->tc_param1 = param;
1946
1947 timer_call_trace_enter_after(call, deadline);
1948 }
1949
1950 void
1951 running_timers_sync(void)
1952 {
1953 timer_resync_deadlines();
1954 }
1955
1956 void
1957 running_timer_enter(processor_t processor, unsigned int timer,
1958 void *param, uint64_t deadline, uint64_t now)
1959 {
1960 running_timer_setup(processor, timer, param, deadline, now);
1961 running_timers_sync();
1962 }
1963
1964 /*
1965 * Call the callback for any running timers that fired for this processor.
1966 * Returns true if any timers were past their deadline.
1967 */
1968 bool
1969 running_timers_expire(processor_t processor, uint64_t now)
1970 {
1971 bool expired = false;
1972
1973 if (!processor->running_timers_active) {
1974 return expired;
1975 }
1976
1977 for (int i = 0; i < RUNNING_TIMER_MAX; i++) {
1978 struct timer_call *call = &processor->running_timers[i];
1979
1980 uint64_t deadline = call->tc_pqlink.deadline;
1981 if (deadline > now) {
1982 continue;
1983 }
1984
1985 expired = true;
1986 timer_call_trace_expire_entry(call);
1987 call->tc_func(call->tc_param0, call->tc_param1);
1988 timer_call_trace_expire_return(call);
1989 }
1990
1991 return expired;
1992 }
1993
1994 void
1995 running_timer_clear(processor_t processor, enum running_timer timer)
1996 {
1997 struct timer_call *call = &processor->running_timers[timer];
1998 uint64_t deadline = call->tc_pqlink.deadline;
1999 if (deadline == EndOfAllTime) {
2000 return;
2001 }
2002
2003 call->tc_pqlink.deadline = EndOfAllTime;
2004 #if TIMER_TRACE
2005 call->tc_entry_time = 0;
2006 #endif /* TIMER_TRACE */
2007 timer_call_trace_cancel(call);
2008 }
2009
2010 void
2011 running_timer_cancel(processor_t processor, unsigned int timer)
2012 {
2013 running_timer_clear(processor, timer);
2014 running_timers_sync();
2015 }
2016
2017 uint64_t
2018 running_timers_deadline(processor_t processor)
2019 {
2020 if (!processor->running_timers_active) {
2021 return EndOfAllTime;
2022 }
2023
2024 uint64_t deadline = EndOfAllTime;
2025 for (int i = 0; i < RUNNING_TIMER_MAX; i++) {
2026 uint64_t candidate =
2027 processor->running_timers[i].tc_pqlink.deadline;
2028 if (candidate != 0 && candidate < deadline) {
2029 deadline = candidate;
2030 }
2031 }
2032
2033 return deadline;
2034 }
2035
2036 void
2037 running_timers_activate(processor_t processor)
2038 {
2039 processor->running_timers_active = true;
2040 running_timers_sync();
2041 }
2042
2043 void
2044 running_timers_deactivate(processor_t processor)
2045 {
2046 assert(processor->running_timers_active == true);
2047 processor->running_timers_active = false;
2048 running_timers_sync();
2049 }