]> git.saurik.com Git - apple/xnu.git/blame - osfmk/kern/timer_call.c
xnu-4903.241.1.tar.gz
[apple/xnu.git] / osfmk / kern / timer_call.c
CommitLineData
1c79356b 1/*
c910b4d9 2 * Copyright (c) 1993-2008 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Timer interrupt callout module.
1c79356b
A
30 */
31
32#include <mach/mach_types.h>
33
34#include <kern/clock.h>
3e170ce0 35#include <kern/smp.h>
9bccf70c 36#include <kern/processor.h>
1c79356b 37#include <kern/timer_call.h>
c910b4d9 38#include <kern/timer_queue.h>
1c79356b 39#include <kern/call_entry.h>
39236c6e 40#include <kern/thread.h>
39037602 41#include <kern/policy_internal.h>
1c79356b 42
0c530ab8
A
43#include <sys/kdebug.h>
44
4b17d6b6 45#if CONFIG_DTRACE
2d21ac55
A
46#include <mach/sdt.h>
47#endif
1c79356b 48
1c79356b 49
6d2010ae
A
50#if DEBUG
51#define TIMER_ASSERT 1
52#endif
53
54//#define TIMER_ASSERT 1
55//#define TIMER_DBG 1
56
57#if TIMER_DBG
58#define DBG(x...) kprintf("DBG: " x);
59#else
60#define DBG(x...)
61#endif
62
39236c6e
A
63#if TIMER_TRACE
64#define TIMER_KDEBUG_TRACE KERNEL_DEBUG_CONSTANT_IST
65#else
66#define TIMER_KDEBUG_TRACE(x...)
67#endif
68
69
6d2010ae
A
70lck_grp_t timer_call_lck_grp;
71lck_attr_t timer_call_lck_attr;
72lck_grp_attr_t timer_call_lck_grp_attr;
73
39236c6e
A
74lck_grp_t timer_longterm_lck_grp;
75lck_attr_t timer_longterm_lck_attr;
76lck_grp_attr_t timer_longterm_lck_grp_attr;
77
3e170ce0
A
78/* Timer queue lock must be acquired with interrupts disabled (under splclock()) */
79#if __SMP__
39236c6e 80#define timer_queue_lock_spin(queue) \
6d2010ae
A
81 lck_mtx_lock_spin_always(&queue->lock_data)
82
39236c6e 83#define timer_queue_unlock(queue) \
6d2010ae 84 lck_mtx_unlock_always(&queue->lock_data)
3e170ce0
A
85#else
86#define timer_queue_lock_spin(queue) (void)1
87#define timer_queue_unlock(queue) (void)1
88#endif
6d2010ae
A
89
90#define QUEUE(x) ((queue_t)(x))
91#define MPQUEUE(x) ((mpqueue_head_t *)(x))
92#define TIMER_CALL(x) ((timer_call_t)(x))
fe8ab488 93#define TCE(x) (&(x->call_entry))
39236c6e
A
94/*
95 * The longterm timer object is a global structure holding all timers
96 * beyond the short-term, local timer queue threshold. The boot processor
97 * is responsible for moving each timer to its local timer queue
98 * if and when that timer becomes due within the threshold.
99 */
5ba3f43e
A
100
101/* Sentinel for "no time set": */
39236c6e 102#define TIMER_LONGTERM_NONE EndOfAllTime
5ba3f43e 103/* The default threadhold is the delta above which a timer is "long-term" */
39236c6e 104#if defined(__x86_64__)
5ba3f43e 105#define TIMER_LONGTERM_THRESHOLD (1ULL * NSEC_PER_SEC) /* 1 sec */
39236c6e 106#else
5ba3f43e 107#define TIMER_LONGTERM_THRESHOLD TIMER_LONGTERM_NONE /* disabled */
39236c6e
A
108#endif
109
5ba3f43e 110/*
a39ff7e2 111 * The scan_limit throttles processing of the longterm queue.
5ba3f43e 112 * If the scan time exceeds this limit, we terminate, unlock
a39ff7e2 113 * and defer for scan_interval. This prevents unbounded holding of
5ba3f43e
A
114 * timer queue locks with interrupts masked.
115 */
a39ff7e2
A
116#define TIMER_LONGTERM_SCAN_LIMIT (100ULL * NSEC_PER_USEC) /* 100 us */
117#define TIMER_LONGTERM_SCAN_INTERVAL (100ULL * NSEC_PER_USEC) /* 100 us */
5ba3f43e
A
118/* Sentinel for "scan limit exceeded": */
119#define TIMER_LONGTERM_SCAN_AGAIN 0
120
39236c6e
A
121typedef struct {
122 uint64_t interval; /* longterm timer interval */
123 uint64_t margin; /* fudge factor (10% of interval */
124 uint64_t deadline; /* first/soonest longterm deadline */
125 uint64_t preempted; /* sooner timer has pre-empted */
126 timer_call_t call; /* first/soonest longterm timer call */
127 uint64_t deadline_set; /* next timer set */
128 timer_call_data_t timer; /* timer used by threshold management */
129 /* Stats: */
130 uint64_t scans; /* num threshold timer scans */
131 uint64_t preempts; /* num threshold reductions */
132 uint64_t latency; /* average threshold latency */
133 uint64_t latency_min; /* minimum threshold latency */
134 uint64_t latency_max; /* maximum threshold latency */
135} threshold_t;
136
137typedef struct {
138 mpqueue_head_t queue; /* longterm timer list */
139 uint64_t enqueues; /* num timers queued */
140 uint64_t dequeues; /* num timers dequeued */
141 uint64_t escalates; /* num timers becoming shortterm */
142 uint64_t scan_time; /* last time the list was scanned */
143 threshold_t threshold; /* longterm timer threshold */
5ba3f43e 144 uint64_t scan_limit; /* maximum scan time */
a39ff7e2 145 uint64_t scan_interval; /* interval between LT "escalation" scans */
5ba3f43e 146 uint64_t scan_pauses; /* num scans exceeding time limit */
39236c6e
A
147} timer_longterm_t;
148
5ba3f43e
A
149timer_longterm_t timer_longterm = {
150 .scan_limit = TIMER_LONGTERM_SCAN_LIMIT,
a39ff7e2 151 .scan_interval = TIMER_LONGTERM_SCAN_INTERVAL,
5ba3f43e 152 };
39236c6e
A
153
154static mpqueue_head_t *timer_longterm_queue = NULL;
155
156static void timer_longterm_init(void);
157static void timer_longterm_callout(
158 timer_call_param_t p0,
159 timer_call_param_t p1);
160extern void timer_longterm_scan(
161 timer_longterm_t *tlp,
162 uint64_t now);
163static void timer_longterm_update(
164 timer_longterm_t *tlp);
165static void timer_longterm_update_locked(
166 timer_longterm_t *tlp);
167static mpqueue_head_t * timer_longterm_enqueue_unlocked(
168 timer_call_t call,
169 uint64_t now,
170 uint64_t deadline,
fe8ab488
A
171 mpqueue_head_t ** old_queue,
172 uint64_t soft_deadline,
173 uint64_t ttd,
174 timer_call_param_t param1,
175 uint32_t callout_flags);
39236c6e
A
176static void timer_longterm_dequeued_locked(
177 timer_call_t call);
316670eb
A
178
179uint64_t past_deadline_timers;
180uint64_t past_deadline_deltas;
181uint64_t past_deadline_longest;
182uint64_t past_deadline_shortest = ~0ULL;
183enum {PAST_DEADLINE_TIMER_ADJUSTMENT_NS = 10 * 1000};
184
185uint64_t past_deadline_timer_adjustment;
186
39236c6e 187static boolean_t timer_call_enter_internal(timer_call_t call, timer_call_param_t param1, uint64_t deadline, uint64_t leeway, uint32_t flags, boolean_t ratelimited);
6d2010ae
A
188boolean_t mach_timer_coalescing_enabled = TRUE;
189
190mpqueue_head_t *timer_call_enqueue_deadline_unlocked(
191 timer_call_t call,
192 mpqueue_head_t *queue,
fe8ab488
A
193 uint64_t deadline,
194 uint64_t soft_deadline,
195 uint64_t ttd,
196 timer_call_param_t param1,
197 uint32_t flags);
6d2010ae
A
198
199mpqueue_head_t *timer_call_dequeue_unlocked(
200 timer_call_t call);
201
fe8ab488
A
202timer_coalescing_priority_params_t tcoal_prio_params;
203
204#if TCOAL_PRIO_STATS
205int32_t nc_tcl, rt_tcl, bg_tcl, kt_tcl, fp_tcl, ts_tcl, qos_tcl;
206#define TCOAL_PRIO_STAT(x) (x++)
207#else
208#define TCOAL_PRIO_STAT(x)
209#endif
210
211static void
212timer_call_init_abstime(void)
213{
214 int i;
215 uint64_t result;
216 timer_coalescing_priority_params_ns_t * tcoal_prio_params_init = timer_call_get_priority_params();
217 nanoseconds_to_absolutetime(PAST_DEADLINE_TIMER_ADJUSTMENT_NS, &past_deadline_timer_adjustment);
218 nanoseconds_to_absolutetime(tcoal_prio_params_init->idle_entry_timer_processing_hdeadline_threshold_ns, &result);
219 tcoal_prio_params.idle_entry_timer_processing_hdeadline_threshold_abstime = (uint32_t)result;
220 nanoseconds_to_absolutetime(tcoal_prio_params_init->interrupt_timer_coalescing_ilat_threshold_ns, &result);
221 tcoal_prio_params.interrupt_timer_coalescing_ilat_threshold_abstime = (uint32_t)result;
222 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_resort_threshold_ns, &result);
223 tcoal_prio_params.timer_resort_threshold_abstime = (uint32_t)result;
224 tcoal_prio_params.timer_coalesce_rt_shift = tcoal_prio_params_init->timer_coalesce_rt_shift;
225 tcoal_prio_params.timer_coalesce_bg_shift = tcoal_prio_params_init->timer_coalesce_bg_shift;
226 tcoal_prio_params.timer_coalesce_kt_shift = tcoal_prio_params_init->timer_coalesce_kt_shift;
227 tcoal_prio_params.timer_coalesce_fp_shift = tcoal_prio_params_init->timer_coalesce_fp_shift;
228 tcoal_prio_params.timer_coalesce_ts_shift = tcoal_prio_params_init->timer_coalesce_ts_shift;
229
230 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_rt_ns_max,
231 &tcoal_prio_params.timer_coalesce_rt_abstime_max);
232 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_bg_ns_max,
233 &tcoal_prio_params.timer_coalesce_bg_abstime_max);
234 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_kt_ns_max,
235 &tcoal_prio_params.timer_coalesce_kt_abstime_max);
236 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_fp_ns_max,
237 &tcoal_prio_params.timer_coalesce_fp_abstime_max);
238 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_ts_ns_max,
239 &tcoal_prio_params.timer_coalesce_ts_abstime_max);
240
241 for (i = 0; i < NUM_LATENCY_QOS_TIERS; i++) {
242 tcoal_prio_params.latency_qos_scale[i] = tcoal_prio_params_init->latency_qos_scale[i];
243 nanoseconds_to_absolutetime(tcoal_prio_params_init->latency_qos_ns_max[i],
244 &tcoal_prio_params.latency_qos_abstime_max[i]);
245 tcoal_prio_params.latency_tier_rate_limited[i] = tcoal_prio_params_init->latency_tier_rate_limited[i];
246 }
247}
248
1c79356b
A
249
250void
39236c6e 251timer_call_init(void)
1c79356b 252{
6d2010ae
A
253 lck_attr_setdefault(&timer_call_lck_attr);
254 lck_grp_attr_setdefault(&timer_call_lck_grp_attr);
255 lck_grp_init(&timer_call_lck_grp, "timer_call", &timer_call_lck_grp_attr);
39236c6e
A
256
257 timer_longterm_init();
fe8ab488 258 timer_call_init_abstime();
1c79356b
A
259}
260
6d2010ae
A
261
262void
39236c6e 263timer_call_queue_init(mpqueue_head_t *queue)
6d2010ae 264{
39236c6e 265 DBG("timer_call_queue_init(%p)\n", queue);
6d2010ae
A
266 mpqueue_init(queue, &timer_call_lck_grp, &timer_call_lck_attr);
267}
268
269
1c79356b
A
270void
271timer_call_setup(
272 timer_call_t call,
273 timer_call_func_t func,
274 timer_call_param_t param0)
275{
6d2010ae 276 DBG("timer_call_setup(%p,%p,%p)\n", call, func, param0);
fe8ab488 277 call_entry_setup(TCE(call), func, param0);
6d2010ae
A
278 simple_lock_init(&(call)->lock, 0);
279 call->async_dequeue = FALSE;
1c79356b 280}
6d2010ae
A
281#if TIMER_ASSERT
282static __inline__ mpqueue_head_t *
283timer_call_entry_dequeue(
284 timer_call_t entry)
285{
fe8ab488 286 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
6d2010ae
A
287
288 if (!hw_lock_held((hw_lock_t)&entry->lock))
289 panic("_call_entry_dequeue() "
290 "entry %p is not locked\n", entry);
291 /*
292 * XXX The queue lock is actually a mutex in spin mode
293 * but there's no way to test for it being held
294 * so we pretend it's a spinlock!
295 */
296 if (!hw_lock_held((hw_lock_t)&old_queue->lock_data))
297 panic("_call_entry_dequeue() "
298 "queue %p is not locked\n", old_queue);
299
fe8ab488 300 call_entry_dequeue(TCE(entry));
39236c6e 301 old_queue->count--;
c910b4d9 302
6d2010ae
A
303 return (old_queue);
304}
1c79356b 305
6d2010ae
A
306static __inline__ mpqueue_head_t *
307timer_call_entry_enqueue_deadline(
308 timer_call_t entry,
309 mpqueue_head_t *queue,
310 uint64_t deadline)
311{
fe8ab488 312 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
1c79356b 313
6d2010ae
A
314 if (!hw_lock_held((hw_lock_t)&entry->lock))
315 panic("_call_entry_enqueue_deadline() "
316 "entry %p is not locked\n", entry);
317 /* XXX More lock pretense: */
318 if (!hw_lock_held((hw_lock_t)&queue->lock_data))
319 panic("_call_entry_enqueue_deadline() "
320 "queue %p is not locked\n", queue);
321 if (old_queue != NULL && old_queue != queue)
322 panic("_call_entry_enqueue_deadline() "
323 "old_queue %p != queue", old_queue);
1c79356b 324
fe8ab488 325 call_entry_enqueue_deadline(TCE(entry), QUEUE(queue), deadline);
1c79356b 326
39236c6e
A
327/* For efficiency, track the earliest soft deadline on the queue, so that
328 * fuzzy decisions can be made without lock acquisitions.
329 */
fe8ab488
A
330 timer_call_t thead = (timer_call_t)queue_first(&queue->head);
331
332 queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline;
39236c6e
A
333
334 if (old_queue)
335 old_queue->count--;
336 queue->count++;
337
6d2010ae
A
338 return (old_queue);
339}
1c79356b 340
6d2010ae 341#else
1c79356b 342
6d2010ae
A
343static __inline__ mpqueue_head_t *
344timer_call_entry_dequeue(
345 timer_call_t entry)
346{
fe8ab488 347 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
39236c6e 348
fe8ab488 349 call_entry_dequeue(TCE(entry));
39236c6e
A
350 old_queue->count--;
351
352 return old_queue;
6d2010ae 353}
c910b4d9 354
6d2010ae
A
355static __inline__ mpqueue_head_t *
356timer_call_entry_enqueue_deadline(
357 timer_call_t entry,
358 mpqueue_head_t *queue,
359 uint64_t deadline)
360{
fe8ab488 361 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
39236c6e 362
fe8ab488 363 call_entry_enqueue_deadline(TCE(entry), QUEUE(queue), deadline);
39236c6e
A
364
365 /* For efficiency, track the earliest soft deadline on the queue,
366 * so that fuzzy decisions can be made without lock acquisitions.
367 */
fe8ab488
A
368
369 timer_call_t thead = (timer_call_t)queue_first(&queue->head);
370 queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline;
39236c6e
A
371
372 if (old_queue)
373 old_queue->count--;
374 queue->count++;
375
376 return old_queue;
1c79356b
A
377}
378
6d2010ae
A
379#endif
380
39236c6e
A
381static __inline__ void
382timer_call_entry_enqueue_tail(
383 timer_call_t entry,
384 mpqueue_head_t *queue)
385{
fe8ab488 386 call_entry_enqueue_tail(TCE(entry), QUEUE(queue));
39236c6e
A
387 queue->count++;
388 return;
389}
390
391/*
392 * Remove timer entry from its queue but don't change the queue pointer
393 * and set the async_dequeue flag. This is locking case 2b.
394 */
395static __inline__ void
396timer_call_entry_dequeue_async(
397 timer_call_t entry)
398{
fe8ab488 399 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
39236c6e
A
400 if (old_queue) {
401 old_queue->count--;
402 (void) remque(qe(entry));
403 entry->async_dequeue = TRUE;
404 }
405 return;
406}
407
6d2010ae
A
408#if TIMER_ASSERT
409unsigned timer_call_enqueue_deadline_unlocked_async1;
410unsigned timer_call_enqueue_deadline_unlocked_async2;
411#endif
412/*
413 * Assumes call_entry and queues unlocked, interrupts disabled.
414 */
415__inline__ mpqueue_head_t *
416timer_call_enqueue_deadline_unlocked(
417 timer_call_t call,
418 mpqueue_head_t *queue,
fe8ab488
A
419 uint64_t deadline,
420 uint64_t soft_deadline,
421 uint64_t ttd,
422 timer_call_param_t param1,
423 uint32_t callout_flags)
1c79356b 424{
fe8ab488 425 call_entry_t entry = TCE(call);
6d2010ae 426 mpqueue_head_t *old_queue;
1c79356b 427
6d2010ae 428 DBG("timer_call_enqueue_deadline_unlocked(%p,%p,)\n", call, queue);
1c79356b 429
6d2010ae 430 simple_lock(&call->lock);
fe8ab488 431
6d2010ae 432 old_queue = MPQUEUE(entry->queue);
fe8ab488 433
6d2010ae 434 if (old_queue != NULL) {
39236c6e 435 timer_queue_lock_spin(old_queue);
6d2010ae 436 if (call->async_dequeue) {
39236c6e 437 /* collision (1c): timer already dequeued, clear flag */
6d2010ae 438#if TIMER_ASSERT
39236c6e
A
439 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
440 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
4bd07ac2 441 VM_KERNEL_UNSLIDE_OR_PERM(call),
39236c6e 442 call->async_dequeue,
4bd07ac2 443 VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
39236c6e 444 0x1c, 0);
6d2010ae
A
445 timer_call_enqueue_deadline_unlocked_async1++;
446#endif
39236c6e 447 call->async_dequeue = FALSE;
6d2010ae 448 entry->queue = NULL;
39236c6e
A
449 } else if (old_queue != queue) {
450 timer_call_entry_dequeue(call);
6d2010ae
A
451#if TIMER_ASSERT
452 timer_call_enqueue_deadline_unlocked_async2++;
453#endif
454 }
39236c6e
A
455 if (old_queue == timer_longterm_queue)
456 timer_longterm_dequeued_locked(call);
6d2010ae 457 if (old_queue != queue) {
39236c6e
A
458 timer_queue_unlock(old_queue);
459 timer_queue_lock_spin(queue);
6d2010ae
A
460 }
461 } else {
39236c6e 462 timer_queue_lock_spin(queue);
6d2010ae 463 }
1c79356b 464
fe8ab488
A
465 call->soft_deadline = soft_deadline;
466 call->flags = callout_flags;
467 TCE(call)->param1 = param1;
468 call->ttd = ttd;
469
6d2010ae 470 timer_call_entry_enqueue_deadline(call, queue, deadline);
39236c6e 471 timer_queue_unlock(queue);
6d2010ae 472 simple_unlock(&call->lock);
1c79356b 473
c910b4d9
A
474 return (old_queue);
475}
1c79356b 476
6d2010ae
A
477#if TIMER_ASSERT
478unsigned timer_call_dequeue_unlocked_async1;
479unsigned timer_call_dequeue_unlocked_async2;
480#endif
481mpqueue_head_t *
482timer_call_dequeue_unlocked(
483 timer_call_t call)
c910b4d9 484{
fe8ab488 485 call_entry_t entry = TCE(call);
6d2010ae 486 mpqueue_head_t *old_queue;
1c79356b 487
6d2010ae 488 DBG("timer_call_dequeue_unlocked(%p)\n", call);
1c79356b 489
6d2010ae
A
490 simple_lock(&call->lock);
491 old_queue = MPQUEUE(entry->queue);
39236c6e
A
492#if TIMER_ASSERT
493 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
494 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
4bd07ac2 495 VM_KERNEL_UNSLIDE_OR_PERM(call),
39236c6e 496 call->async_dequeue,
4bd07ac2 497 VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
39236c6e
A
498 0, 0);
499#endif
6d2010ae 500 if (old_queue != NULL) {
39236c6e 501 timer_queue_lock_spin(old_queue);
6d2010ae 502 if (call->async_dequeue) {
39236c6e 503 /* collision (1c): timer already dequeued, clear flag */
6d2010ae 504#if TIMER_ASSERT
39236c6e
A
505 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
506 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
4bd07ac2 507 VM_KERNEL_UNSLIDE_OR_PERM(call),
39236c6e 508 call->async_dequeue,
4bd07ac2 509 VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
39236c6e 510 0x1c, 0);
6d2010ae
A
511 timer_call_dequeue_unlocked_async1++;
512#endif
39236c6e
A
513 call->async_dequeue = FALSE;
514 entry->queue = NULL;
6d2010ae 515 } else {
39236c6e 516 timer_call_entry_dequeue(call);
6d2010ae 517 }
39236c6e
A
518 if (old_queue == timer_longterm_queue)
519 timer_longterm_dequeued_locked(call);
520 timer_queue_unlock(old_queue);
6d2010ae
A
521 }
522 simple_unlock(&call->lock);
c910b4d9 523 return (old_queue);
1c79356b
A
524}
525
5ba3f43e
A
526static uint64_t
527past_deadline_timer_handle(uint64_t deadline, uint64_t ctime)
528{
529 uint64_t delta = (ctime - deadline);
530
531 past_deadline_timers++;
532 past_deadline_deltas += delta;
533 if (delta > past_deadline_longest)
534 past_deadline_longest = deadline;
535 if (delta < past_deadline_shortest)
536 past_deadline_shortest = delta;
537
538 return (ctime + past_deadline_timer_adjustment);
539}
fe8ab488
A
540
541/*
542 * Timer call entry locking model
543 * ==============================
544 *
545 * Timer call entries are linked on per-cpu timer queues which are protected
546 * by the queue lock and the call entry lock. The locking protocol is:
547 *
548 * 0) The canonical locking order is timer call entry followed by queue.
549 *
550 * 1) With only the entry lock held, entry.queue is valid:
551 * 1a) NULL: the entry is not queued, or
552 * 1b) non-NULL: this queue must be locked before the entry is modified.
553 * After locking the queue, the call.async_dequeue flag must be checked:
554 * 1c) TRUE: the entry was removed from the queue by another thread
555 * and we must NULL the entry.queue and reset this flag, or
556 * 1d) FALSE: (ie. queued), the entry can be manipulated.
557 *
558 * 2) If a queue lock is obtained first, the queue is stable:
559 * 2a) If a try-lock of a queued entry succeeds, the call can be operated on
560 * and dequeued.
561 * 2b) If a try-lock fails, it indicates that another thread is attempting
562 * to change the entry and move it to a different position in this queue
563 * or to different queue. The entry can be dequeued but it should not be
564 * operated upon since it is being changed. Furthermore, we don't null
565 * the entry.queue pointer (protected by the entry lock we don't own).
566 * Instead, we set the async_dequeue flag -- see (1c).
567 * 2c) Same as 2b but occurring when a longterm timer is matured.
568 * 3) A callout's parameters (deadline, flags, parameters, soft deadline &c.)
569 * should be manipulated with the appropriate timer queue lock held,
570 * to prevent queue traversal observations from observing inconsistent
571 * updates to an in-flight callout.
572 */
573
574/*
575 * Inlines timer_call_entry_dequeue() and timer_call_entry_enqueue_deadline()
576 * cast between pointer types (mpqueue_head_t *) and (queue_t) so that
577 * we can use the call_entry_dequeue() and call_entry_enqueue_deadline()
578 * methods to operate on timer_call structs as if they are call_entry structs.
579 * These structures are identical except for their queue head pointer fields.
580 *
581 * In the debug case, we assert that the timer call locking protocol
582 * is being obeyed.
583 */
584
6d2010ae
A
585static boolean_t
586timer_call_enter_internal(
587 timer_call_t call,
588 timer_call_param_t param1,
589 uint64_t deadline,
39236c6e
A
590 uint64_t leeway,
591 uint32_t flags,
592 boolean_t ratelimited)
1c79356b 593{
39236c6e 594 mpqueue_head_t *queue = NULL;
6d2010ae 595 mpqueue_head_t *old_queue;
1c79356b 596 spl_t s;
39236c6e
A
597 uint64_t slop;
598 uint32_t urgency;
fe8ab488 599 uint64_t sdeadline, ttd;
1c79356b 600
39037602 601 assert(call->call_entry.func != NULL);
1c79356b 602 s = splclock();
6d2010ae 603
fe8ab488 604 sdeadline = deadline;
39236c6e
A
605 uint64_t ctime = mach_absolute_time();
606
607 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
608 DECR_TIMER_ENTER | DBG_FUNC_START,
4bd07ac2 609 VM_KERNEL_UNSLIDE_OR_PERM(call),
5ba3f43e 610 VM_KERNEL_ADDRHIDE(param1), deadline, flags, 0);
39236c6e
A
611
612 urgency = (flags & TIMER_CALL_URGENCY_MASK);
613
614 boolean_t slop_ratelimited = FALSE;
615 slop = timer_call_slop(deadline, ctime, urgency, current_thread(), &slop_ratelimited);
616
617 if ((flags & TIMER_CALL_LEEWAY) != 0 && leeway > slop)
618 slop = leeway;
619
620 if (UINT64_MAX - deadline <= slop) {
621 deadline = UINT64_MAX;
622 } else {
6d2010ae
A
623 deadline += slop;
624 }
1c79356b 625
316670eb 626 if (__improbable(deadline < ctime)) {
5ba3f43e 627 deadline = past_deadline_timer_handle(deadline, ctime);
fe8ab488 628 sdeadline = deadline;
316670eb 629 }
39236c6e 630
39236c6e 631 if (ratelimited || slop_ratelimited) {
fe8ab488 632 flags |= TIMER_CALL_RATELIMITED;
39236c6e 633 } else {
fe8ab488 634 flags &= ~TIMER_CALL_RATELIMITED;
39236c6e
A
635 }
636
fe8ab488 637 ttd = sdeadline - ctime;
4b17d6b6 638#if CONFIG_DTRACE
fe8ab488
A
639 DTRACE_TMR7(callout__create, timer_call_func_t, TCE(call)->func,
640 timer_call_param_t, TCE(call)->param0, uint32_t, flags,
641 (deadline - sdeadline),
642 (ttd >> 32), (unsigned) (ttd & 0xFFFFFFFF), call);
4b17d6b6
A
643#endif
644
fe8ab488
A
645 /* Program timer callout parameters under the appropriate per-CPU or
646 * longterm queue lock. The callout may have been previously enqueued
647 * and in-flight on this or another timer queue.
648 */
39236c6e 649 if (!ratelimited && !slop_ratelimited) {
fe8ab488 650 queue = timer_longterm_enqueue_unlocked(call, ctime, deadline, &old_queue, sdeadline, ttd, param1, flags);
39236c6e 651 }
1c79356b 652
39236c6e
A
653 if (queue == NULL) {
654 queue = timer_queue_assign(deadline);
fe8ab488 655 old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline, sdeadline, ttd, param1, flags);
39236c6e 656 }
1c79356b 657
39236c6e 658#if TIMER_TRACE
fe8ab488 659 TCE(call)->entry_time = ctime;
39236c6e
A
660#endif
661
662 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
663 DECR_TIMER_ENTER | DBG_FUNC_END,
4bd07ac2 664 VM_KERNEL_UNSLIDE_OR_PERM(call),
fe8ab488 665 (old_queue != NULL), deadline, queue->count, 0);
1c79356b 666
1c79356b
A
667 splx(s);
668
c910b4d9 669 return (old_queue != NULL);
1c79356b
A
670}
671
39236c6e
A
672/*
673 * timer_call_*()
674 * return boolean indicating whether the call was previously queued.
675 */
6d2010ae
A
676boolean_t
677timer_call_enter(
678 timer_call_t call,
679 uint64_t deadline,
680 uint32_t flags)
681{
39236c6e 682 return timer_call_enter_internal(call, NULL, deadline, 0, flags, FALSE);
6d2010ae
A
683}
684
1c79356b 685boolean_t
c910b4d9
A
686timer_call_enter1(
687 timer_call_t call,
688 timer_call_param_t param1,
6d2010ae
A
689 uint64_t deadline,
690 uint32_t flags)
1c79356b 691{
39236c6e
A
692 return timer_call_enter_internal(call, param1, deadline, 0, flags, FALSE);
693}
694
695boolean_t
696timer_call_enter_with_leeway(
697 timer_call_t call,
698 timer_call_param_t param1,
699 uint64_t deadline,
700 uint64_t leeway,
701 uint32_t flags,
702 boolean_t ratelimited)
703{
704 return timer_call_enter_internal(call, param1, deadline, leeway, flags, ratelimited);
1c79356b
A
705}
706
5ba3f43e
A
707boolean_t
708timer_call_quantum_timer_enter(
709 timer_call_t call,
710 timer_call_param_t param1,
711 uint64_t deadline,
712 uint64_t ctime)
713{
714 assert(call->call_entry.func != NULL);
715 assert(ml_get_interrupts_enabled() == FALSE);
716
717 uint32_t flags = TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL;
718
719 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_ENTER | DBG_FUNC_START,
720 VM_KERNEL_UNSLIDE_OR_PERM(call),
721 VM_KERNEL_ADDRHIDE(param1), deadline,
722 flags, 0);
723
724 if (__improbable(deadline < ctime)) {
725 deadline = past_deadline_timer_handle(deadline, ctime);
726 }
727
728 uint64_t ttd = deadline - ctime;
729#if CONFIG_DTRACE
730 DTRACE_TMR7(callout__create, timer_call_func_t, TCE(call)->func,
731 timer_call_param_t, TCE(call)->param0, uint32_t, flags, 0,
732 (ttd >> 32), (unsigned) (ttd & 0xFFFFFFFF), call);
733#endif
734
735 quantum_timer_set_deadline(deadline);
736 TCE(call)->deadline = deadline;
737 TCE(call)->param1 = param1;
738 call->ttd = ttd;
739 call->flags = flags;
740
741#if TIMER_TRACE
742 TCE(call)->entry_time = ctime;
743#endif
744
745 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_ENTER | DBG_FUNC_END,
746 VM_KERNEL_UNSLIDE_OR_PERM(call),
747 1, deadline, 0, 0);
748
749 return true;
750}
751
752
753boolean_t
754timer_call_quantum_timer_cancel(
755 timer_call_t call)
756{
757 assert(ml_get_interrupts_enabled() == FALSE);
758
759 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
760 DECR_TIMER_CANCEL | DBG_FUNC_START,
761 VM_KERNEL_UNSLIDE_OR_PERM(call), TCE(call)->deadline,
762 0, call->flags, 0);
763
764 TCE(call)->deadline = 0;
765 quantum_timer_set_deadline(0);
766
767 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
768 DECR_TIMER_CANCEL | DBG_FUNC_END,
769 VM_KERNEL_UNSLIDE_OR_PERM(call), 0,
770 TCE(call)->deadline - mach_absolute_time(),
771 TCE(call)->deadline - TCE(call)->entry_time, 0);
772
773#if CONFIG_DTRACE
774 DTRACE_TMR6(callout__cancel, timer_call_func_t, TCE(call)->func,
775 timer_call_param_t, TCE(call)->param0, uint32_t, call->flags, 0,
776 (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF));
777#endif
778
779 return true;
780}
781
1c79356b 782boolean_t
c910b4d9
A
783timer_call_cancel(
784 timer_call_t call)
1c79356b 785{
6d2010ae 786 mpqueue_head_t *old_queue;
1c79356b
A
787 spl_t s;
788
789 s = splclock();
1c79356b 790
39236c6e
A
791 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
792 DECR_TIMER_CANCEL | DBG_FUNC_START,
4bd07ac2 793 VM_KERNEL_UNSLIDE_OR_PERM(call),
fe8ab488 794 TCE(call)->deadline, call->soft_deadline, call->flags, 0);
39236c6e 795
6d2010ae 796 old_queue = timer_call_dequeue_unlocked(call);
c910b4d9
A
797
798 if (old_queue != NULL) {
39236c6e
A
799 timer_queue_lock_spin(old_queue);
800 if (!queue_empty(&old_queue->head)) {
fe8ab488
A
801 timer_queue_cancel(old_queue, TCE(call)->deadline, CE(queue_first(&old_queue->head))->deadline);
802 timer_call_t thead = (timer_call_t)queue_first(&old_queue->head);
803 old_queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline;
39236c6e
A
804 }
805 else {
fe8ab488 806 timer_queue_cancel(old_queue, TCE(call)->deadline, UINT64_MAX);
39236c6e
A
807 old_queue->earliest_soft_deadline = UINT64_MAX;
808 }
809 timer_queue_unlock(old_queue);
1c79356b 810 }
39236c6e
A
811 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
812 DECR_TIMER_CANCEL | DBG_FUNC_END,
4bd07ac2
A
813 VM_KERNEL_UNSLIDE_OR_PERM(call),
814 VM_KERNEL_UNSLIDE_OR_PERM(old_queue),
fe8ab488
A
815 TCE(call)->deadline - mach_absolute_time(),
816 TCE(call)->deadline - TCE(call)->entry_time, 0);
1c79356b
A
817 splx(s);
818
4b17d6b6 819#if CONFIG_DTRACE
fe8ab488
A
820 DTRACE_TMR6(callout__cancel, timer_call_func_t, TCE(call)->func,
821 timer_call_param_t, TCE(call)->param0, uint32_t, call->flags, 0,
4b17d6b6
A
822 (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF));
823#endif
824
c910b4d9 825 return (old_queue != NULL);
1c79356b
A
826}
827
fe8ab488
A
828static uint32_t timer_queue_shutdown_lock_skips;
829static uint32_t timer_queue_shutdown_discarded;
830
9bccf70c 831void
c910b4d9 832timer_queue_shutdown(
6d2010ae 833 mpqueue_head_t *queue)
9bccf70c 834{
6d2010ae
A
835 timer_call_t call;
836 mpqueue_head_t *new_queue;
c910b4d9 837 spl_t s;
9bccf70c 838
fe8ab488 839
6d2010ae
A
840 DBG("timer_queue_shutdown(%p)\n", queue);
841
c910b4d9 842 s = splclock();
9bccf70c 843
6d2010ae 844 /* Note comma operator in while expression re-locking each iteration */
39037602 845 while ((void)timer_queue_lock_spin(queue), !queue_empty(&queue->head)) {
6d2010ae 846 call = TIMER_CALL(queue_first(&queue->head));
fe8ab488 847
6d2010ae
A
848 if (!simple_lock_try(&call->lock)) {
849 /*
850 * case (2b) lock order inversion, dequeue and skip
851 * Don't change the call_entry queue back-pointer
852 * but set the async_dequeue field.
853 */
854 timer_queue_shutdown_lock_skips++;
39236c6e
A
855 timer_call_entry_dequeue_async(call);
856#if TIMER_ASSERT
857 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
858 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
4bd07ac2 859 VM_KERNEL_UNSLIDE_OR_PERM(call),
39236c6e 860 call->async_dequeue,
4bd07ac2 861 VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
39236c6e
A
862 0x2b, 0);
863#endif
864 timer_queue_unlock(queue);
6d2010ae
A
865 continue;
866 }
9bccf70c 867
fe8ab488
A
868 boolean_t call_local = ((call->flags & TIMER_CALL_LOCAL) != 0);
869
6d2010ae
A
870 /* remove entry from old queue */
871 timer_call_entry_dequeue(call);
39236c6e 872 timer_queue_unlock(queue);
9bccf70c 873
fe8ab488
A
874 if (call_local == FALSE) {
875 /* and queue it on new, discarding LOCAL timers */
876 new_queue = timer_queue_assign(TCE(call)->deadline);
877 timer_queue_lock_spin(new_queue);
878 timer_call_entry_enqueue_deadline(
879 call, new_queue, TCE(call)->deadline);
880 timer_queue_unlock(new_queue);
881 } else {
882 timer_queue_shutdown_discarded++;
883 }
884
5ba3f43e 885 assert(call_local == FALSE);
6d2010ae 886 simple_unlock(&call->lock);
9bccf70c
A
887 }
888
39236c6e 889 timer_queue_unlock(queue);
c910b4d9 890 splx(s);
9bccf70c
A
891}
892
5ba3f43e
A
893
894void
895quantum_timer_expire(
896 uint64_t deadline)
897{
898 processor_t processor = current_processor();
899 timer_call_t call = TIMER_CALL(&(processor->quantum_timer));
900
901 if (__improbable(TCE(call)->deadline > deadline))
902 panic("CPU quantum timer deadlin out of sync with timer call deadline");
903
904 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
905 DECR_TIMER_EXPIRE | DBG_FUNC_NONE,
906 VM_KERNEL_UNSLIDE_OR_PERM(call),
907 TCE(call)->deadline,
908 TCE(call)->deadline,
909 TCE(call)->entry_time, 0);
910
911 timer_call_func_t func = TCE(call)->func;
912 timer_call_param_t param0 = TCE(call)->param0;
913 timer_call_param_t param1 = TCE(call)->param1;
914
915 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
916 DECR_TIMER_CALLOUT | DBG_FUNC_START,
917 VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func),
918 VM_KERNEL_ADDRHIDE(param0),
919 VM_KERNEL_ADDRHIDE(param1),
920 0);
921
922#if CONFIG_DTRACE
923 DTRACE_TMR7(callout__start, timer_call_func_t, func,
924 timer_call_param_t, param0, unsigned, call->flags,
925 0, (call->ttd >> 32),
926 (unsigned) (call->ttd & 0xFFFFFFFF), call);
927#endif
928 (*func)(param0, param1);
929
930 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
931 DECR_TIMER_CALLOUT | DBG_FUNC_END,
932 VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func),
933 VM_KERNEL_ADDRHIDE(param0),
934 VM_KERNEL_ADDRHIDE(param1),
935 0);
936}
937
fe8ab488 938static uint32_t timer_queue_expire_lock_skips;
c910b4d9 939uint64_t
39236c6e 940timer_queue_expire_with_options(
6d2010ae 941 mpqueue_head_t *queue,
39236c6e
A
942 uint64_t deadline,
943 boolean_t rescan)
1c79356b 944{
39236c6e
A
945 timer_call_t call = NULL;
946 uint32_t tc_iterations = 0;
6d2010ae
A
947 DBG("timer_queue_expire(%p,)\n", queue);
948
39236c6e
A
949 uint64_t cur_deadline = deadline;
950 timer_queue_lock_spin(queue);
1c79356b 951
6d2010ae 952 while (!queue_empty(&queue->head)) {
39236c6e
A
953 /* Upon processing one or more timer calls, refresh the
954 * deadline to account for time elapsed in the callout
955 */
956 if (++tc_iterations > 1)
957 cur_deadline = mach_absolute_time();
958
959 if (call == NULL)
960 call = TIMER_CALL(queue_first(&queue->head));
1c79356b 961
39236c6e 962 if (call->soft_deadline <= cur_deadline) {
1c79356b
A
963 timer_call_func_t func;
964 timer_call_param_t param0, param1;
965
39236c6e
A
966 TCOAL_DEBUG(0xDDDD0000, queue->earliest_soft_deadline, call->soft_deadline, 0, 0, 0);
967 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
968 DECR_TIMER_EXPIRE | DBG_FUNC_NONE,
4bd07ac2 969 VM_KERNEL_UNSLIDE_OR_PERM(call),
39236c6e 970 call->soft_deadline,
fe8ab488
A
971 TCE(call)->deadline,
972 TCE(call)->entry_time, 0);
39236c6e 973
fe8ab488
A
974 if ((call->flags & TIMER_CALL_RATELIMITED) &&
975 (TCE(call)->deadline > cur_deadline)) {
39236c6e
A
976 if (rescan == FALSE)
977 break;
978 }
979
6d2010ae
A
980 if (!simple_lock_try(&call->lock)) {
981 /* case (2b) lock inversion, dequeue and skip */
982 timer_queue_expire_lock_skips++;
39236c6e
A
983 timer_call_entry_dequeue_async(call);
984 call = NULL;
6d2010ae
A
985 continue;
986 }
987
988 timer_call_entry_dequeue(call);
1c79356b 989
fe8ab488
A
990 func = TCE(call)->func;
991 param0 = TCE(call)->param0;
992 param1 = TCE(call)->param1;
1c79356b 993
6d2010ae 994 simple_unlock(&call->lock);
39236c6e 995 timer_queue_unlock(queue);
1c79356b 996
39236c6e 997 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
316670eb 998 DECR_TIMER_CALLOUT | DBG_FUNC_START,
4bd07ac2 999 VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func),
5ba3f43e
A
1000 VM_KERNEL_ADDRHIDE(param0),
1001 VM_KERNEL_ADDRHIDE(param1),
4bd07ac2 1002 0);
2d21ac55 1003
4b17d6b6 1004#if CONFIG_DTRACE
39236c6e 1005 DTRACE_TMR7(callout__start, timer_call_func_t, func,
4b17d6b6
A
1006 timer_call_param_t, param0, unsigned, call->flags,
1007 0, (call->ttd >> 32),
39236c6e 1008 (unsigned) (call->ttd & 0xFFFFFFFF), call);
2d21ac55 1009#endif
4b17d6b6
A
1010 /* Maintain time-to-deadline in per-processor data
1011 * structure for thread wakeup deadline statistics.
1012 */
1013 uint64_t *ttdp = &(PROCESSOR_DATA(current_processor(), timer_call_ttd));
1014 *ttdp = call->ttd;
1c79356b 1015 (*func)(param0, param1);
4b17d6b6 1016 *ttdp = 0;
4b17d6b6 1017#if CONFIG_DTRACE
39236c6e
A
1018 DTRACE_TMR4(callout__end, timer_call_func_t, func,
1019 param0, param1, call);
2d21ac55
A
1020#endif
1021
39236c6e 1022 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
316670eb 1023 DECR_TIMER_CALLOUT | DBG_FUNC_END,
4bd07ac2 1024 VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func),
5ba3f43e
A
1025 VM_KERNEL_ADDRHIDE(param0),
1026 VM_KERNEL_ADDRHIDE(param1),
4bd07ac2 1027 0);
39236c6e
A
1028 call = NULL;
1029 timer_queue_lock_spin(queue);
1030 } else {
1031 if (__probable(rescan == FALSE)) {
1032 break;
1033 } else {
fe8ab488
A
1034 int64_t skew = TCE(call)->deadline - call->soft_deadline;
1035 assert(TCE(call)->deadline >= call->soft_deadline);
39236c6e
A
1036
1037 /* DRK: On a latency quality-of-service level change,
1038 * re-sort potentially rate-limited timers. The platform
1039 * layer determines which timers require
1040 * this. In the absence of the per-callout
1041 * synchronization requirement, a global resort could
1042 * be more efficient. The re-sort effectively
1043 * annuls all timer adjustments, i.e. the "soft
1044 * deadline" is the sort key.
1045 */
1046
1047 if (timer_resort_threshold(skew)) {
1048 if (__probable(simple_lock_try(&call->lock))) {
1049 timer_call_entry_dequeue(call);
1050 timer_call_entry_enqueue_deadline(call, queue, call->soft_deadline);
1051 simple_unlock(&call->lock);
1052 call = NULL;
1053 }
1054 }
1055 if (call) {
1056 call = TIMER_CALL(queue_next(qe(call)));
1057 if (queue_end(&queue->head, qe(call)))
1058 break;
1059 }
1060 }
c910b4d9 1061 }
1c79356b
A
1062 }
1063
39236c6e
A
1064 if (!queue_empty(&queue->head)) {
1065 call = TIMER_CALL(queue_first(&queue->head));
fe8ab488
A
1066 cur_deadline = TCE(call)->deadline;
1067 queue->earliest_soft_deadline = (call->flags & TIMER_CALL_RATELIMITED) ? TCE(call)->deadline: call->soft_deadline;
39236c6e
A
1068 } else {
1069 queue->earliest_soft_deadline = cur_deadline = UINT64_MAX;
1070 }
1c79356b 1071
39236c6e 1072 timer_queue_unlock(queue);
c910b4d9 1073
39236c6e 1074 return (cur_deadline);
1c79356b 1075}
6d2010ae 1076
39236c6e
A
1077uint64_t
1078timer_queue_expire(
1079 mpqueue_head_t *queue,
1080 uint64_t deadline)
1081{
1082 return timer_queue_expire_with_options(queue, deadline, FALSE);
1083}
6d2010ae
A
1084
1085extern int serverperfmode;
fe8ab488 1086static uint32_t timer_queue_migrate_lock_skips;
6d2010ae 1087/*
39236c6e 1088 * timer_queue_migrate() is called by timer_queue_migrate_cpu()
6d2010ae
A
1089 * to move timer requests from the local processor (queue_from)
1090 * to a target processor's (queue_to).
1091 */
1092int
1093timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to)
1094{
1095 timer_call_t call;
1096 timer_call_t head_to;
1097 int timers_migrated = 0;
1098
1099 DBG("timer_queue_migrate(%p,%p)\n", queue_from, queue_to);
1100
1101 assert(!ml_get_interrupts_enabled());
1102 assert(queue_from != queue_to);
1103
1104 if (serverperfmode) {
1105 /*
1106 * if we're running a high end server
1107 * avoid migrations... they add latency
1108 * and don't save us power under typical
1109 * server workloads
1110 */
1111 return -4;
1112 }
1113
1114 /*
1115 * Take both local (from) and target (to) timer queue locks while
1116 * moving the timers from the local queue to the target processor.
1117 * We assume that the target is always the boot processor.
1118 * But only move if all of the following is true:
1119 * - the target queue is non-empty
1120 * - the local queue is non-empty
1121 * - the local queue's first deadline is later than the target's
1122 * - the local queue contains no non-migrateable "local" call
1123 * so that we need not have the target resync.
1124 */
1125
39236c6e 1126 timer_queue_lock_spin(queue_to);
6d2010ae
A
1127
1128 head_to = TIMER_CALL(queue_first(&queue_to->head));
1129 if (queue_empty(&queue_to->head)) {
1130 timers_migrated = -1;
1131 goto abort1;
1132 }
1133
39236c6e 1134 timer_queue_lock_spin(queue_from);
6d2010ae
A
1135
1136 if (queue_empty(&queue_from->head)) {
1137 timers_migrated = -2;
1138 goto abort2;
1139 }
1140
1141 call = TIMER_CALL(queue_first(&queue_from->head));
fe8ab488 1142 if (TCE(call)->deadline < TCE(head_to)->deadline) {
6d2010ae
A
1143 timers_migrated = 0;
1144 goto abort2;
1145 }
1146
1147 /* perform scan for non-migratable timers */
1148 do {
1149 if (call->flags & TIMER_CALL_LOCAL) {
1150 timers_migrated = -3;
1151 goto abort2;
1152 }
1153 call = TIMER_CALL(queue_next(qe(call)));
1154 } while (!queue_end(&queue_from->head, qe(call)));
1155
1156 /* migration loop itself -- both queues are locked */
1157 while (!queue_empty(&queue_from->head)) {
1158 call = TIMER_CALL(queue_first(&queue_from->head));
1159 if (!simple_lock_try(&call->lock)) {
1160 /* case (2b) lock order inversion, dequeue only */
39236c6e
A
1161#ifdef TIMER_ASSERT
1162 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1163 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
4bd07ac2
A
1164 VM_KERNEL_UNSLIDE_OR_PERM(call),
1165 VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
1166 VM_KERNEL_UNSLIDE_OR_PERM(call->lock.interlock.lock_data),
39236c6e
A
1167 0x2b, 0);
1168#endif
6d2010ae 1169 timer_queue_migrate_lock_skips++;
39236c6e 1170 timer_call_entry_dequeue_async(call);
6d2010ae
A
1171 continue;
1172 }
1173 timer_call_entry_dequeue(call);
1174 timer_call_entry_enqueue_deadline(
fe8ab488 1175 call, queue_to, TCE(call)->deadline);
6d2010ae
A
1176 timers_migrated++;
1177 simple_unlock(&call->lock);
1178 }
39236c6e 1179 queue_from->earliest_soft_deadline = UINT64_MAX;
6d2010ae 1180abort2:
39236c6e 1181 timer_queue_unlock(queue_from);
6d2010ae 1182abort1:
39236c6e 1183 timer_queue_unlock(queue_to);
6d2010ae
A
1184
1185 return timers_migrated;
1186}
39236c6e
A
1187
1188void
1189timer_queue_trace_cpu(int ncpu)
1190{
1191 timer_call_nosync_cpu(
1192 ncpu,
5ba3f43e 1193 (void(*)(void *))timer_queue_trace,
39236c6e
A
1194 (void*) timer_queue_cpu(ncpu));
1195}
1196
1197void
1198timer_queue_trace(
1199 mpqueue_head_t *queue)
1200{
1201 timer_call_t call;
1202 spl_t s;
1203
1204 if (!kdebug_enable)
1205 return;
1206
1207 s = splclock();
1208 timer_queue_lock_spin(queue);
1209
1210 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1211 DECR_TIMER_QUEUE | DBG_FUNC_START,
1212 queue->count, mach_absolute_time(), 0, 0, 0);
1213
1214 if (!queue_empty(&queue->head)) {
1215 call = TIMER_CALL(queue_first(&queue->head));
1216 do {
1217 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1218 DECR_TIMER_QUEUE | DBG_FUNC_NONE,
1219 call->soft_deadline,
fe8ab488
A
1220 TCE(call)->deadline,
1221 TCE(call)->entry_time,
4bd07ac2 1222 VM_KERNEL_UNSLIDE(TCE(call)->func),
39236c6e
A
1223 0);
1224 call = TIMER_CALL(queue_next(qe(call)));
1225 } while (!queue_end(&queue->head, qe(call)));
1226 }
1227
1228 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1229 DECR_TIMER_QUEUE | DBG_FUNC_END,
1230 queue->count, mach_absolute_time(), 0, 0, 0);
1231
1232 timer_queue_unlock(queue);
1233 splx(s);
1234}
1235
1236void
1237timer_longterm_dequeued_locked(timer_call_t call)
1238{
1239 timer_longterm_t *tlp = &timer_longterm;
1240
1241 tlp->dequeues++;
1242 if (call == tlp->threshold.call)
1243 tlp->threshold.call = NULL;
1244}
1245
1246/*
1247 * Place a timer call in the longterm list
1248 * and adjust the next timer callout deadline if the new timer is first.
1249 */
1250mpqueue_head_t *
1251timer_longterm_enqueue_unlocked(timer_call_t call,
1252 uint64_t now,
1253 uint64_t deadline,
fe8ab488
A
1254 mpqueue_head_t **old_queue,
1255 uint64_t soft_deadline,
1256 uint64_t ttd,
1257 timer_call_param_t param1,
1258 uint32_t callout_flags)
39236c6e
A
1259{
1260 timer_longterm_t *tlp = &timer_longterm;
1261 boolean_t update_required = FALSE;
1262 uint64_t longterm_threshold;
1263
1264 longterm_threshold = now + tlp->threshold.interval;
1265
1266 /*
1267 * Return NULL without doing anything if:
1268 * - this timer is local, or
1269 * - the longterm mechanism is disabled, or
1270 * - this deadline is too short.
1271 */
fe8ab488 1272 if ((callout_flags & TIMER_CALL_LOCAL) != 0 ||
39236c6e 1273 (tlp->threshold.interval == TIMER_LONGTERM_NONE) ||
fe8ab488 1274 (deadline <= longterm_threshold))
39236c6e
A
1275 return NULL;
1276
1277 /*
1278 * Remove timer from its current queue, if any.
1279 */
1280 *old_queue = timer_call_dequeue_unlocked(call);
1281
1282 /*
1283 * Lock the longterm queue, queue timer and determine
1284 * whether an update is necessary.
1285 */
1286 assert(!ml_get_interrupts_enabled());
1287 simple_lock(&call->lock);
1288 timer_queue_lock_spin(timer_longterm_queue);
fe8ab488
A
1289 TCE(call)->deadline = deadline;
1290 TCE(call)->param1 = param1;
1291 call->ttd = ttd;
1292 call->soft_deadline = soft_deadline;
1293 call->flags = callout_flags;
39236c6e 1294 timer_call_entry_enqueue_tail(call, timer_longterm_queue);
39236c6e
A
1295
1296 tlp->enqueues++;
1297
1298 /*
1299 * We'll need to update the currently set threshold timer
1300 * if the new deadline is sooner and no sooner update is in flight.
1301 */
1302 if (deadline < tlp->threshold.deadline &&
1303 deadline < tlp->threshold.preempted) {
1304 tlp->threshold.preempted = deadline;
1305 tlp->threshold.call = call;
1306 update_required = TRUE;
1307 }
1308 timer_queue_unlock(timer_longterm_queue);
1309 simple_unlock(&call->lock);
1310
1311 if (update_required) {
fe8ab488
A
1312 /*
1313 * Note: this call expects that calling the master cpu
1314 * alone does not involve locking the topo lock.
1315 */
39236c6e
A
1316 timer_call_nosync_cpu(
1317 master_cpu,
1318 (void (*)(void *)) timer_longterm_update,
1319 (void *)tlp);
1320 }
1321
1322 return timer_longterm_queue;
1323}
1324
1325/*
1326 * Scan for timers below the longterm threshold.
1327 * Move these to the local timer queue (of the boot processor on which the
1328 * calling thread is running).
1329 * Both the local (boot) queue and the longterm queue are locked.
1330 * The scan is similar to the timer migrate sequence but is performed by
1331 * successively examining each timer on the longterm queue:
1332 * - if within the short-term threshold
1333 * - enter on the local queue (unless being deleted),
1334 * - otherwise:
1335 * - if sooner, deadline becomes the next threshold deadline.
5ba3f43e
A
1336 * The total scan time is limited to TIMER_LONGTERM_SCAN_LIMIT. Should this be
1337 * exceeded, we abort and reschedule again so that we don't shut others from
1338 * the timer queues. Longterm timers firing late is not critical.
39236c6e
A
1339 */
1340void
1341timer_longterm_scan(timer_longterm_t *tlp,
5ba3f43e 1342 uint64_t time_start)
39236c6e
A
1343{
1344 queue_entry_t qe;
1345 timer_call_t call;
1346 uint64_t threshold;
1347 uint64_t deadline;
5ba3f43e 1348 uint64_t time_limit = time_start + tlp->scan_limit;
39236c6e
A
1349 mpqueue_head_t *timer_master_queue;
1350
1351 assert(!ml_get_interrupts_enabled());
1352 assert(cpu_number() == master_cpu);
1353
1354 if (tlp->threshold.interval != TIMER_LONGTERM_NONE)
5ba3f43e 1355 threshold = time_start + tlp->threshold.interval;
39236c6e
A
1356
1357 tlp->threshold.deadline = TIMER_LONGTERM_NONE;
1358 tlp->threshold.call = NULL;
1359
1360 if (queue_empty(&timer_longterm_queue->head))
1361 return;
1362
1363 timer_master_queue = timer_queue_cpu(master_cpu);
1364 timer_queue_lock_spin(timer_master_queue);
1365
1366 qe = queue_first(&timer_longterm_queue->head);
1367 while (!queue_end(&timer_longterm_queue->head, qe)) {
1368 call = TIMER_CALL(qe);
1369 deadline = call->soft_deadline;
1370 qe = queue_next(qe);
1371 if (!simple_lock_try(&call->lock)) {
1372 /* case (2c) lock order inversion, dequeue only */
1373#ifdef TIMER_ASSERT
1374 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1375 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
4bd07ac2
A
1376 VM_KERNEL_UNSLIDE_OR_PERM(call),
1377 VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
1378 VM_KERNEL_UNSLIDE_OR_PERM(call->lock.interlock.lock_data),
39236c6e
A
1379 0x2c, 0);
1380#endif
1381 timer_call_entry_dequeue_async(call);
1382 continue;
1383 }
1384 if (deadline < threshold) {
1385 /*
1386 * This timer needs moving (escalating)
1387 * to the local (boot) processor's queue.
1388 */
1389#ifdef TIMER_ASSERT
5ba3f43e 1390 if (deadline < time_start)
39236c6e
A
1391 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1392 DECR_TIMER_OVERDUE | DBG_FUNC_NONE,
4bd07ac2 1393 VM_KERNEL_UNSLIDE_OR_PERM(call),
39236c6e 1394 deadline,
5ba3f43e 1395 time_start,
39236c6e
A
1396 threshold,
1397 0);
1398#endif
1399 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1400 DECR_TIMER_ESCALATE | DBG_FUNC_NONE,
4bd07ac2 1401 VM_KERNEL_UNSLIDE_OR_PERM(call),
fe8ab488
A
1402 TCE(call)->deadline,
1403 TCE(call)->entry_time,
4bd07ac2 1404 VM_KERNEL_UNSLIDE(TCE(call)->func),
39236c6e
A
1405 0);
1406 tlp->escalates++;
1407 timer_call_entry_dequeue(call);
1408 timer_call_entry_enqueue_deadline(
fe8ab488 1409 call, timer_master_queue, TCE(call)->deadline);
39236c6e
A
1410 /*
1411 * A side-effect of the following call is to update
1412 * the actual hardware deadline if required.
1413 */
1414 (void) timer_queue_assign(deadline);
1415 } else {
1416 if (deadline < tlp->threshold.deadline) {
1417 tlp->threshold.deadline = deadline;
1418 tlp->threshold.call = call;
1419 }
1420 }
1421 simple_unlock(&call->lock);
5ba3f43e
A
1422
1423 /* Abort scan if we're taking too long. */
1424 if (mach_absolute_time() > time_limit) {
1425 tlp->threshold.deadline = TIMER_LONGTERM_SCAN_AGAIN;
1426 tlp->scan_pauses++;
1427 DBG("timer_longterm_scan() paused %llu, qlen: %llu\n",
1428 time_limit, tlp->queue.count);
1429 break;
1430 }
39236c6e
A
1431 }
1432
1433 timer_queue_unlock(timer_master_queue);
1434}
1435
1436void
1437timer_longterm_callout(timer_call_param_t p0, __unused timer_call_param_t p1)
1438{
1439 timer_longterm_t *tlp = (timer_longterm_t *) p0;
1440
1441 timer_longterm_update(tlp);
1442}
1443
1444void
1445timer_longterm_update_locked(timer_longterm_t *tlp)
1446{
1447 uint64_t latency;
1448
1449 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1450 DECR_TIMER_UPDATE | DBG_FUNC_START,
4bd07ac2 1451 VM_KERNEL_UNSLIDE_OR_PERM(&tlp->queue),
39236c6e
A
1452 tlp->threshold.deadline,
1453 tlp->threshold.preempted,
1454 tlp->queue.count, 0);
1455
1456 tlp->scan_time = mach_absolute_time();
1457 if (tlp->threshold.preempted != TIMER_LONGTERM_NONE) {
1458 tlp->threshold.preempts++;
1459 tlp->threshold.deadline = tlp->threshold.preempted;
1460 tlp->threshold.preempted = TIMER_LONGTERM_NONE;
1461 /*
1462 * Note: in the unlikely event that a pre-empted timer has
1463 * itself been cancelled, we'll simply re-scan later at the
1464 * time of the preempted/cancelled timer.
1465 */
1466 } else {
1467 tlp->threshold.scans++;
1468
1469 /*
1470 * Maintain a moving average of our wakeup latency.
1471 * Clamp latency to 0 and ignore above threshold interval.
1472 */
1473 if (tlp->scan_time > tlp->threshold.deadline_set)
1474 latency = tlp->scan_time - tlp->threshold.deadline_set;
1475 else
1476 latency = 0;
1477 if (latency < tlp->threshold.interval) {
1478 tlp->threshold.latency_min =
1479 MIN(tlp->threshold.latency_min, latency);
1480 tlp->threshold.latency_max =
1481 MAX(tlp->threshold.latency_max, latency);
1482 tlp->threshold.latency =
1483 (tlp->threshold.latency*99 + latency) / 100;
1484 }
1485
1486 timer_longterm_scan(tlp, tlp->scan_time);
1487 }
1488
1489 tlp->threshold.deadline_set = tlp->threshold.deadline;
1490 /* The next deadline timer to be set is adjusted */
5ba3f43e
A
1491 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE &&
1492 tlp->threshold.deadline != TIMER_LONGTERM_SCAN_AGAIN) {
39236c6e
A
1493 tlp->threshold.deadline_set -= tlp->threshold.margin;
1494 tlp->threshold.deadline_set -= tlp->threshold.latency;
1495 }
5ba3f43e
A
1496
1497 /* Throttle next scan time */
a39ff7e2 1498 uint64_t scan_clamp = mach_absolute_time() + tlp->scan_interval;
5ba3f43e
A
1499 if (tlp->threshold.deadline_set < scan_clamp)
1500 tlp->threshold.deadline_set = scan_clamp;
39236c6e
A
1501
1502 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1503 DECR_TIMER_UPDATE | DBG_FUNC_END,
4bd07ac2 1504 VM_KERNEL_UNSLIDE_OR_PERM(&tlp->queue),
39236c6e
A
1505 tlp->threshold.deadline,
1506 tlp->threshold.scans,
1507 tlp->queue.count, 0);
1508}
1509
1510void
1511timer_longterm_update(timer_longterm_t *tlp)
1512{
1513 spl_t s = splclock();
1514
1515 timer_queue_lock_spin(timer_longterm_queue);
1516
1517 if (cpu_number() != master_cpu)
1518 panic("timer_longterm_update_master() on non-boot cpu");
1519
1520 timer_longterm_update_locked(tlp);
1521
1522 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE)
1523 timer_call_enter(
1524 &tlp->threshold.timer,
1525 tlp->threshold.deadline_set,
1526 TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL);
1527
1528 timer_queue_unlock(timer_longterm_queue);
1529 splx(s);
1530}
1531
1532void
1533timer_longterm_init(void)
1534{
1535 uint32_t longterm;
1536 timer_longterm_t *tlp = &timer_longterm;
1537
1538 DBG("timer_longterm_init() tlp: %p, queue: %p\n", tlp, &tlp->queue);
1539
1540 /*
15129b1c
A
1541 * Set the longterm timer threshold. Defaults to TIMER_LONGTERM_THRESHOLD
1542 * or TIMER_LONGTERM_NONE (disabled) for server;
1543 * overridden longterm boot-arg
39236c6e 1544 */
15129b1c
A
1545 tlp->threshold.interval = serverperfmode ? TIMER_LONGTERM_NONE
1546 : TIMER_LONGTERM_THRESHOLD;
39236c6e
A
1547 if (PE_parse_boot_argn("longterm", &longterm, sizeof (longterm))) {
1548 tlp->threshold.interval = (longterm == 0) ?
1549 TIMER_LONGTERM_NONE :
1550 longterm * NSEC_PER_MSEC;
1551 }
1552 if (tlp->threshold.interval != TIMER_LONGTERM_NONE) {
1553 printf("Longterm timer threshold: %llu ms\n",
1554 tlp->threshold.interval / NSEC_PER_MSEC);
1555 kprintf("Longterm timer threshold: %llu ms\n",
1556 tlp->threshold.interval / NSEC_PER_MSEC);
1557 nanoseconds_to_absolutetime(tlp->threshold.interval,
1558 &tlp->threshold.interval);
1559 tlp->threshold.margin = tlp->threshold.interval / 10;
1560 tlp->threshold.latency_min = EndOfAllTime;
1561 tlp->threshold.latency_max = 0;
1562 }
1563
1564 tlp->threshold.preempted = TIMER_LONGTERM_NONE;
1565 tlp->threshold.deadline = TIMER_LONGTERM_NONE;
1566
1567 lck_attr_setdefault(&timer_longterm_lck_attr);
1568 lck_grp_attr_setdefault(&timer_longterm_lck_grp_attr);
1569 lck_grp_init(&timer_longterm_lck_grp,
1570 "timer_longterm", &timer_longterm_lck_grp_attr);
1571 mpqueue_init(&tlp->queue,
1572 &timer_longterm_lck_grp, &timer_longterm_lck_attr);
1573
1574 timer_call_setup(&tlp->threshold.timer,
1575 timer_longterm_callout, (timer_call_param_t) tlp);
1576
1577 timer_longterm_queue = &tlp->queue;
1578}
1579
1580enum {
1581 THRESHOLD, QCOUNT,
1582 ENQUEUES, DEQUEUES, ESCALATES, SCANS, PREEMPTS,
a39ff7e2 1583 LATENCY, LATENCY_MIN, LATENCY_MAX, SCAN_LIMIT, SCAN_INTERVAL, PAUSES
39236c6e
A
1584};
1585uint64_t
1586timer_sysctl_get(int oid)
1587{
1588 timer_longterm_t *tlp = &timer_longterm;
1589
1590 switch (oid) {
1591 case THRESHOLD:
1592 return (tlp->threshold.interval == TIMER_LONGTERM_NONE) ?
1593 0 : tlp->threshold.interval / NSEC_PER_MSEC;
1594 case QCOUNT:
1595 return tlp->queue.count;
1596 case ENQUEUES:
1597 return tlp->enqueues;
1598 case DEQUEUES:
1599 return tlp->dequeues;
1600 case ESCALATES:
1601 return tlp->escalates;
1602 case SCANS:
1603 return tlp->threshold.scans;
1604 case PREEMPTS:
1605 return tlp->threshold.preempts;
1606 case LATENCY:
1607 return tlp->threshold.latency;
1608 case LATENCY_MIN:
1609 return tlp->threshold.latency_min;
1610 case LATENCY_MAX:
1611 return tlp->threshold.latency_max;
5ba3f43e
A
1612 case SCAN_LIMIT:
1613 return tlp->scan_limit;
a39ff7e2
A
1614 case SCAN_INTERVAL:
1615 return tlp->scan_interval;
5ba3f43e
A
1616 case PAUSES:
1617 return tlp->scan_pauses;
39236c6e
A
1618 default:
1619 return 0;
1620 }
1621}
1622
1623/*
1624 * timer_master_scan() is the inverse of timer_longterm_scan()
1625 * since it un-escalates timers to the longterm queue.
1626 */
1627static void
1628timer_master_scan(timer_longterm_t *tlp,
1629 uint64_t now)
1630{
1631 queue_entry_t qe;
1632 timer_call_t call;
1633 uint64_t threshold;
1634 uint64_t deadline;
1635 mpqueue_head_t *timer_master_queue;
1636
1637 if (tlp->threshold.interval != TIMER_LONGTERM_NONE)
1638 threshold = now + tlp->threshold.interval;
1639 else
1640 threshold = TIMER_LONGTERM_NONE;
1641
1642 timer_master_queue = timer_queue_cpu(master_cpu);
1643 timer_queue_lock_spin(timer_master_queue);
1644
1645 qe = queue_first(&timer_master_queue->head);
1646 while (!queue_end(&timer_master_queue->head, qe)) {
1647 call = TIMER_CALL(qe);
fe8ab488 1648 deadline = TCE(call)->deadline;
39236c6e
A
1649 qe = queue_next(qe);
1650 if ((call->flags & TIMER_CALL_LOCAL) != 0)
1651 continue;
1652 if (!simple_lock_try(&call->lock)) {
1653 /* case (2c) lock order inversion, dequeue only */
1654 timer_call_entry_dequeue_async(call);
1655 continue;
1656 }
1657 if (deadline > threshold) {
1658 /* move from master to longterm */
1659 timer_call_entry_dequeue(call);
1660 timer_call_entry_enqueue_tail(call, timer_longterm_queue);
1661 if (deadline < tlp->threshold.deadline) {
1662 tlp->threshold.deadline = deadline;
1663 tlp->threshold.call = call;
1664 }
1665 }
1666 simple_unlock(&call->lock);
1667 }
1668 timer_queue_unlock(timer_master_queue);
1669}
1670
1671static void
1672timer_sysctl_set_threshold(uint64_t value)
1673{
1674 timer_longterm_t *tlp = &timer_longterm;
1675 spl_t s = splclock();
1676 boolean_t threshold_increase;
1677
1678 timer_queue_lock_spin(timer_longterm_queue);
1679
1680 timer_call_cancel(&tlp->threshold.timer);
1681
1682 /*
1683 * Set the new threshold and note whther it's increasing.
1684 */
1685 if (value == 0) {
1686 tlp->threshold.interval = TIMER_LONGTERM_NONE;
1687 threshold_increase = TRUE;
1688 timer_call_cancel(&tlp->threshold.timer);
1689 } else {
1690 uint64_t old_interval = tlp->threshold.interval;
1691 tlp->threshold.interval = value * NSEC_PER_MSEC;
1692 nanoseconds_to_absolutetime(tlp->threshold.interval,
1693 &tlp->threshold.interval);
1694 tlp->threshold.margin = tlp->threshold.interval / 10;
1695 if (old_interval == TIMER_LONGTERM_NONE)
1696 threshold_increase = FALSE;
1697 else
1698 threshold_increase = (tlp->threshold.interval > old_interval);
1699 }
1700
1701 if (threshold_increase /* or removal */) {
1702 /* Escalate timers from the longterm queue */
1703 timer_longterm_scan(tlp, mach_absolute_time());
1704 } else /* decrease or addition */ {
1705 /*
1706 * We scan the local/master queue for timers now longterm.
1707 * To be strictly correct, we should scan all processor queues
1708 * but timer migration results in most timers gravitating to the
1709 * master processor in any case.
1710 */
1711 timer_master_scan(tlp, mach_absolute_time());
1712 }
1713
1714 /* Set new timer accordingly */
1715 tlp->threshold.deadline_set = tlp->threshold.deadline;
1716 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) {
1717 tlp->threshold.deadline_set -= tlp->threshold.margin;
1718 tlp->threshold.deadline_set -= tlp->threshold.latency;
1719 timer_call_enter(
1720 &tlp->threshold.timer,
1721 tlp->threshold.deadline_set,
1722 TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL);
1723 }
1724
1725 /* Reset stats */
1726 tlp->enqueues = 0;
1727 tlp->dequeues = 0;
1728 tlp->escalates = 0;
5ba3f43e 1729 tlp->scan_pauses = 0;
39236c6e
A
1730 tlp->threshold.scans = 0;
1731 tlp->threshold.preempts = 0;
1732 tlp->threshold.latency = 0;
1733 tlp->threshold.latency_min = EndOfAllTime;
1734 tlp->threshold.latency_max = 0;
1735
1736 timer_queue_unlock(timer_longterm_queue);
1737 splx(s);
1738}
1739
1740int
1741timer_sysctl_set(int oid, uint64_t value)
1742{
1743 switch (oid) {
1744 case THRESHOLD:
1745 timer_call_cpu(
1746 master_cpu,
1747 (void (*)(void *)) timer_sysctl_set_threshold,
1748 (void *) value);
1749 return KERN_SUCCESS;
5ba3f43e
A
1750 case SCAN_LIMIT:
1751 timer_longterm.scan_limit = value;
1752 return KERN_SUCCESS;
a39ff7e2
A
1753 case SCAN_INTERVAL:
1754 timer_longterm.scan_interval = value;
1755 return KERN_SUCCESS;
39236c6e
A
1756 default:
1757 return KERN_INVALID_ARGUMENT;
1758 }
1759}
fe8ab488
A
1760
1761
1762/* Select timer coalescing window based on per-task quality-of-service hints */
1763static boolean_t tcoal_qos_adjust(thread_t t, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited) {
1764 uint32_t latency_qos;
1765 boolean_t adjusted = FALSE;
1766 task_t ctask = t->task;
1767
1768 if (ctask) {
1769 latency_qos = proc_get_effective_thread_policy(t, TASK_POLICY_LATENCY_QOS);
1770
1771 assert(latency_qos <= NUM_LATENCY_QOS_TIERS);
1772
1773 if (latency_qos) {
1774 *tshift = tcoal_prio_params.latency_qos_scale[latency_qos - 1];
1775 *tmax_abstime = tcoal_prio_params.latency_qos_abstime_max[latency_qos - 1];
1776 *pratelimited = tcoal_prio_params.latency_tier_rate_limited[latency_qos - 1];
1777 adjusted = TRUE;
1778 }
1779 }
1780 return adjusted;
1781}
1782
1783
1784/* Adjust timer deadlines based on priority of the thread and the
1785 * urgency value provided at timeout establishment. With this mechanism,
1786 * timers are no longer necessarily sorted in order of soft deadline
1787 * on a given timer queue, i.e. they may be differentially skewed.
1788 * In the current scheme, this could lead to fewer pending timers
1789 * processed than is technically possible when the HW deadline arrives.
1790 */
1791static void
1792timer_compute_leeway(thread_t cthread, int32_t urgency, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited) {
1793 int16_t tpri = cthread->sched_pri;
1794 if ((urgency & TIMER_CALL_USER_MASK) != 0) {
1795 if (tpri >= BASEPRI_RTQUEUES ||
1796 urgency == TIMER_CALL_USER_CRITICAL) {
1797 *tshift = tcoal_prio_params.timer_coalesce_rt_shift;
1798 *tmax_abstime = tcoal_prio_params.timer_coalesce_rt_abstime_max;
1799 TCOAL_PRIO_STAT(rt_tcl);
1800 } else if (proc_get_effective_thread_policy(cthread, TASK_POLICY_DARWIN_BG) ||
1801 (urgency == TIMER_CALL_USER_BACKGROUND)) {
1802 /* Determine if timer should be subjected to a lower QoS */
1803 if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) {
1804 if (*tmax_abstime > tcoal_prio_params.timer_coalesce_bg_abstime_max) {
1805 return;
1806 } else {
1807 *pratelimited = FALSE;
1808 }
1809 }
1810 *tshift = tcoal_prio_params.timer_coalesce_bg_shift;
1811 *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max;
1812 TCOAL_PRIO_STAT(bg_tcl);
1813 } else if (tpri >= MINPRI_KERNEL) {
1814 *tshift = tcoal_prio_params.timer_coalesce_kt_shift;
1815 *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max;
1816 TCOAL_PRIO_STAT(kt_tcl);
1817 } else if (cthread->sched_mode == TH_MODE_FIXED) {
1818 *tshift = tcoal_prio_params.timer_coalesce_fp_shift;
1819 *tmax_abstime = tcoal_prio_params.timer_coalesce_fp_abstime_max;
1820 TCOAL_PRIO_STAT(fp_tcl);
1821 } else if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) {
1822 TCOAL_PRIO_STAT(qos_tcl);
1823 } else if (cthread->sched_mode == TH_MODE_TIMESHARE) {
1824 *tshift = tcoal_prio_params.timer_coalesce_ts_shift;
1825 *tmax_abstime = tcoal_prio_params.timer_coalesce_ts_abstime_max;
1826 TCOAL_PRIO_STAT(ts_tcl);
1827 } else {
1828 TCOAL_PRIO_STAT(nc_tcl);
1829 }
1830 } else if (urgency == TIMER_CALL_SYS_BACKGROUND) {
1831 *tshift = tcoal_prio_params.timer_coalesce_bg_shift;
1832 *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max;
1833 TCOAL_PRIO_STAT(bg_tcl);
1834 } else {
1835 *tshift = tcoal_prio_params.timer_coalesce_kt_shift;
1836 *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max;
1837 TCOAL_PRIO_STAT(kt_tcl);
1838 }
1839}
1840
1841
1842int timer_user_idle_level;
1843
1844uint64_t
1845timer_call_slop(uint64_t deadline, uint64_t now, uint32_t flags, thread_t cthread, boolean_t *pratelimited)
1846{
1847 int32_t tcs_shift = 0;
1848 uint64_t tcs_max_abstime = 0;
1849 uint64_t adjval;
1850 uint32_t urgency = (flags & TIMER_CALL_URGENCY_MASK);
1851
1852 if (mach_timer_coalescing_enabled &&
1853 (deadline > now) && (urgency != TIMER_CALL_SYS_CRITICAL)) {
1854 timer_compute_leeway(cthread, urgency, &tcs_shift, &tcs_max_abstime, pratelimited);
1855
1856 if (tcs_shift >= 0)
1857 adjval = MIN((deadline - now) >> tcs_shift, tcs_max_abstime);
1858 else
1859 adjval = MIN((deadline - now) << (-tcs_shift), tcs_max_abstime);
1860 /* Apply adjustments derived from "user idle level" heuristic */
1861 adjval += (adjval * timer_user_idle_level) >> 7;
1862 return adjval;
1863 } else {
1864 return 0;
1865 }
1866}
1867
1868int
1869timer_get_user_idle_level(void) {
1870 return timer_user_idle_level;
1871}
1872
1873kern_return_t timer_set_user_idle_level(int ilevel) {
1874 boolean_t do_reeval = FALSE;
1875
1876 if ((ilevel < 0) || (ilevel > 128))
1877 return KERN_INVALID_ARGUMENT;
1878
1879 if (ilevel < timer_user_idle_level) {
1880 do_reeval = TRUE;
1881 }
1882
1883 timer_user_idle_level = ilevel;
1884
1885 if (do_reeval)
1886 ml_timer_evaluate();
1887
1888 return KERN_SUCCESS;
1889}