]> git.saurik.com Git - apple/xnu.git/blame - osfmk/kern/timer_call.c
xnu-2782.20.48.tar.gz
[apple/xnu.git] / osfmk / kern / timer_call.c
CommitLineData
1c79356b 1/*
c910b4d9 2 * Copyright (c) 1993-2008 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Timer interrupt callout module.
1c79356b
A
30 */
31
32#include <mach/mach_types.h>
33
34#include <kern/clock.h>
9bccf70c 35#include <kern/processor.h>
1c79356b 36#include <kern/timer_call.h>
c910b4d9 37#include <kern/timer_queue.h>
1c79356b 38#include <kern/call_entry.h>
39236c6e 39#include <kern/thread.h>
1c79356b 40
0c530ab8
A
41#include <sys/kdebug.h>
42
4b17d6b6 43#if CONFIG_DTRACE
2d21ac55
A
44#include <mach/sdt.h>
45#endif
1c79356b 46
1c79356b 47
6d2010ae
A
48#if DEBUG
49#define TIMER_ASSERT 1
50#endif
51
52//#define TIMER_ASSERT 1
53//#define TIMER_DBG 1
54
55#if TIMER_DBG
56#define DBG(x...) kprintf("DBG: " x);
57#else
58#define DBG(x...)
59#endif
60
39236c6e
A
61#if TIMER_TRACE
62#define TIMER_KDEBUG_TRACE KERNEL_DEBUG_CONSTANT_IST
63#else
64#define TIMER_KDEBUG_TRACE(x...)
65#endif
66
67
6d2010ae
A
68lck_grp_t timer_call_lck_grp;
69lck_attr_t timer_call_lck_attr;
70lck_grp_attr_t timer_call_lck_grp_attr;
71
39236c6e
A
72lck_grp_t timer_longterm_lck_grp;
73lck_attr_t timer_longterm_lck_attr;
74lck_grp_attr_t timer_longterm_lck_grp_attr;
75
6d2010ae 76
39236c6e 77#define timer_queue_lock_spin(queue) \
6d2010ae
A
78 lck_mtx_lock_spin_always(&queue->lock_data)
79
39236c6e 80#define timer_queue_unlock(queue) \
6d2010ae
A
81 lck_mtx_unlock_always(&queue->lock_data)
82
83
84#define QUEUE(x) ((queue_t)(x))
85#define MPQUEUE(x) ((mpqueue_head_t *)(x))
86#define TIMER_CALL(x) ((timer_call_t)(x))
fe8ab488 87#define TCE(x) (&(x->call_entry))
39236c6e
A
88/*
89 * The longterm timer object is a global structure holding all timers
90 * beyond the short-term, local timer queue threshold. The boot processor
91 * is responsible for moving each timer to its local timer queue
92 * if and when that timer becomes due within the threshold.
93 */
94#define TIMER_LONGTERM_NONE EndOfAllTime
95#if defined(__x86_64__)
96#define TIMER_LONGTERM_THRESHOLD (1ULL * NSEC_PER_SEC)
97#else
98#define TIMER_LONGTERM_THRESHOLD TIMER_LONGTERM_NONE
99#endif
100
101typedef struct {
102 uint64_t interval; /* longterm timer interval */
103 uint64_t margin; /* fudge factor (10% of interval */
104 uint64_t deadline; /* first/soonest longterm deadline */
105 uint64_t preempted; /* sooner timer has pre-empted */
106 timer_call_t call; /* first/soonest longterm timer call */
107 uint64_t deadline_set; /* next timer set */
108 timer_call_data_t timer; /* timer used by threshold management */
109 /* Stats: */
110 uint64_t scans; /* num threshold timer scans */
111 uint64_t preempts; /* num threshold reductions */
112 uint64_t latency; /* average threshold latency */
113 uint64_t latency_min; /* minimum threshold latency */
114 uint64_t latency_max; /* maximum threshold latency */
115} threshold_t;
116
117typedef struct {
118 mpqueue_head_t queue; /* longterm timer list */
119 uint64_t enqueues; /* num timers queued */
120 uint64_t dequeues; /* num timers dequeued */
121 uint64_t escalates; /* num timers becoming shortterm */
122 uint64_t scan_time; /* last time the list was scanned */
123 threshold_t threshold; /* longterm timer threshold */
124} timer_longterm_t;
125
126timer_longterm_t timer_longterm;
127
128static mpqueue_head_t *timer_longterm_queue = NULL;
129
130static void timer_longterm_init(void);
131static void timer_longterm_callout(
132 timer_call_param_t p0,
133 timer_call_param_t p1);
134extern void timer_longterm_scan(
135 timer_longterm_t *tlp,
136 uint64_t now);
137static void timer_longterm_update(
138 timer_longterm_t *tlp);
139static void timer_longterm_update_locked(
140 timer_longterm_t *tlp);
141static mpqueue_head_t * timer_longterm_enqueue_unlocked(
142 timer_call_t call,
143 uint64_t now,
144 uint64_t deadline,
fe8ab488
A
145 mpqueue_head_t ** old_queue,
146 uint64_t soft_deadline,
147 uint64_t ttd,
148 timer_call_param_t param1,
149 uint32_t callout_flags);
39236c6e
A
150static void timer_longterm_dequeued_locked(
151 timer_call_t call);
316670eb
A
152
153uint64_t past_deadline_timers;
154uint64_t past_deadline_deltas;
155uint64_t past_deadline_longest;
156uint64_t past_deadline_shortest = ~0ULL;
157enum {PAST_DEADLINE_TIMER_ADJUSTMENT_NS = 10 * 1000};
158
159uint64_t past_deadline_timer_adjustment;
160
39236c6e 161static boolean_t timer_call_enter_internal(timer_call_t call, timer_call_param_t param1, uint64_t deadline, uint64_t leeway, uint32_t flags, boolean_t ratelimited);
6d2010ae
A
162boolean_t mach_timer_coalescing_enabled = TRUE;
163
164mpqueue_head_t *timer_call_enqueue_deadline_unlocked(
165 timer_call_t call,
166 mpqueue_head_t *queue,
fe8ab488
A
167 uint64_t deadline,
168 uint64_t soft_deadline,
169 uint64_t ttd,
170 timer_call_param_t param1,
171 uint32_t flags);
6d2010ae
A
172
173mpqueue_head_t *timer_call_dequeue_unlocked(
174 timer_call_t call);
175
fe8ab488
A
176timer_coalescing_priority_params_t tcoal_prio_params;
177
178#if TCOAL_PRIO_STATS
179int32_t nc_tcl, rt_tcl, bg_tcl, kt_tcl, fp_tcl, ts_tcl, qos_tcl;
180#define TCOAL_PRIO_STAT(x) (x++)
181#else
182#define TCOAL_PRIO_STAT(x)
183#endif
184
185static void
186timer_call_init_abstime(void)
187{
188 int i;
189 uint64_t result;
190 timer_coalescing_priority_params_ns_t * tcoal_prio_params_init = timer_call_get_priority_params();
191 nanoseconds_to_absolutetime(PAST_DEADLINE_TIMER_ADJUSTMENT_NS, &past_deadline_timer_adjustment);
192 nanoseconds_to_absolutetime(tcoal_prio_params_init->idle_entry_timer_processing_hdeadline_threshold_ns, &result);
193 tcoal_prio_params.idle_entry_timer_processing_hdeadline_threshold_abstime = (uint32_t)result;
194 nanoseconds_to_absolutetime(tcoal_prio_params_init->interrupt_timer_coalescing_ilat_threshold_ns, &result);
195 tcoal_prio_params.interrupt_timer_coalescing_ilat_threshold_abstime = (uint32_t)result;
196 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_resort_threshold_ns, &result);
197 tcoal_prio_params.timer_resort_threshold_abstime = (uint32_t)result;
198 tcoal_prio_params.timer_coalesce_rt_shift = tcoal_prio_params_init->timer_coalesce_rt_shift;
199 tcoal_prio_params.timer_coalesce_bg_shift = tcoal_prio_params_init->timer_coalesce_bg_shift;
200 tcoal_prio_params.timer_coalesce_kt_shift = tcoal_prio_params_init->timer_coalesce_kt_shift;
201 tcoal_prio_params.timer_coalesce_fp_shift = tcoal_prio_params_init->timer_coalesce_fp_shift;
202 tcoal_prio_params.timer_coalesce_ts_shift = tcoal_prio_params_init->timer_coalesce_ts_shift;
203
204 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_rt_ns_max,
205 &tcoal_prio_params.timer_coalesce_rt_abstime_max);
206 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_bg_ns_max,
207 &tcoal_prio_params.timer_coalesce_bg_abstime_max);
208 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_kt_ns_max,
209 &tcoal_prio_params.timer_coalesce_kt_abstime_max);
210 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_fp_ns_max,
211 &tcoal_prio_params.timer_coalesce_fp_abstime_max);
212 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_ts_ns_max,
213 &tcoal_prio_params.timer_coalesce_ts_abstime_max);
214
215 for (i = 0; i < NUM_LATENCY_QOS_TIERS; i++) {
216 tcoal_prio_params.latency_qos_scale[i] = tcoal_prio_params_init->latency_qos_scale[i];
217 nanoseconds_to_absolutetime(tcoal_prio_params_init->latency_qos_ns_max[i],
218 &tcoal_prio_params.latency_qos_abstime_max[i]);
219 tcoal_prio_params.latency_tier_rate_limited[i] = tcoal_prio_params_init->latency_tier_rate_limited[i];
220 }
221}
222
1c79356b
A
223
224void
39236c6e 225timer_call_init(void)
1c79356b 226{
6d2010ae
A
227 lck_attr_setdefault(&timer_call_lck_attr);
228 lck_grp_attr_setdefault(&timer_call_lck_grp_attr);
229 lck_grp_init(&timer_call_lck_grp, "timer_call", &timer_call_lck_grp_attr);
39236c6e
A
230
231 timer_longterm_init();
fe8ab488 232 timer_call_init_abstime();
1c79356b
A
233}
234
6d2010ae
A
235
236void
39236c6e 237timer_call_queue_init(mpqueue_head_t *queue)
6d2010ae 238{
39236c6e 239 DBG("timer_call_queue_init(%p)\n", queue);
6d2010ae
A
240 mpqueue_init(queue, &timer_call_lck_grp, &timer_call_lck_attr);
241}
242
243
1c79356b
A
244void
245timer_call_setup(
246 timer_call_t call,
247 timer_call_func_t func,
248 timer_call_param_t param0)
249{
6d2010ae 250 DBG("timer_call_setup(%p,%p,%p)\n", call, func, param0);
fe8ab488 251 call_entry_setup(TCE(call), func, param0);
6d2010ae
A
252 simple_lock_init(&(call)->lock, 0);
253 call->async_dequeue = FALSE;
1c79356b 254}
6d2010ae
A
255#if TIMER_ASSERT
256static __inline__ mpqueue_head_t *
257timer_call_entry_dequeue(
258 timer_call_t entry)
259{
fe8ab488 260 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
6d2010ae
A
261
262 if (!hw_lock_held((hw_lock_t)&entry->lock))
263 panic("_call_entry_dequeue() "
264 "entry %p is not locked\n", entry);
265 /*
266 * XXX The queue lock is actually a mutex in spin mode
267 * but there's no way to test for it being held
268 * so we pretend it's a spinlock!
269 */
270 if (!hw_lock_held((hw_lock_t)&old_queue->lock_data))
271 panic("_call_entry_dequeue() "
272 "queue %p is not locked\n", old_queue);
273
fe8ab488 274 call_entry_dequeue(TCE(entry));
39236c6e 275 old_queue->count--;
c910b4d9 276
6d2010ae
A
277 return (old_queue);
278}
1c79356b 279
6d2010ae
A
280static __inline__ mpqueue_head_t *
281timer_call_entry_enqueue_deadline(
282 timer_call_t entry,
283 mpqueue_head_t *queue,
284 uint64_t deadline)
285{
fe8ab488 286 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
1c79356b 287
6d2010ae
A
288 if (!hw_lock_held((hw_lock_t)&entry->lock))
289 panic("_call_entry_enqueue_deadline() "
290 "entry %p is not locked\n", entry);
291 /* XXX More lock pretense: */
292 if (!hw_lock_held((hw_lock_t)&queue->lock_data))
293 panic("_call_entry_enqueue_deadline() "
294 "queue %p is not locked\n", queue);
295 if (old_queue != NULL && old_queue != queue)
296 panic("_call_entry_enqueue_deadline() "
297 "old_queue %p != queue", old_queue);
1c79356b 298
fe8ab488 299 call_entry_enqueue_deadline(TCE(entry), QUEUE(queue), deadline);
1c79356b 300
39236c6e
A
301/* For efficiency, track the earliest soft deadline on the queue, so that
302 * fuzzy decisions can be made without lock acquisitions.
303 */
fe8ab488
A
304 timer_call_t thead = (timer_call_t)queue_first(&queue->head);
305
306 queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline;
39236c6e
A
307
308 if (old_queue)
309 old_queue->count--;
310 queue->count++;
311
6d2010ae
A
312 return (old_queue);
313}
1c79356b 314
6d2010ae 315#else
1c79356b 316
6d2010ae
A
317static __inline__ mpqueue_head_t *
318timer_call_entry_dequeue(
319 timer_call_t entry)
320{
fe8ab488 321 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
39236c6e 322
fe8ab488 323 call_entry_dequeue(TCE(entry));
39236c6e
A
324 old_queue->count--;
325
326 return old_queue;
6d2010ae 327}
c910b4d9 328
6d2010ae
A
329static __inline__ mpqueue_head_t *
330timer_call_entry_enqueue_deadline(
331 timer_call_t entry,
332 mpqueue_head_t *queue,
333 uint64_t deadline)
334{
fe8ab488 335 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
39236c6e 336
fe8ab488 337 call_entry_enqueue_deadline(TCE(entry), QUEUE(queue), deadline);
39236c6e
A
338
339 /* For efficiency, track the earliest soft deadline on the queue,
340 * so that fuzzy decisions can be made without lock acquisitions.
341 */
fe8ab488
A
342
343 timer_call_t thead = (timer_call_t)queue_first(&queue->head);
344 queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline;
39236c6e
A
345
346 if (old_queue)
347 old_queue->count--;
348 queue->count++;
349
350 return old_queue;
1c79356b
A
351}
352
6d2010ae
A
353#endif
354
39236c6e
A
355static __inline__ void
356timer_call_entry_enqueue_tail(
357 timer_call_t entry,
358 mpqueue_head_t *queue)
359{
fe8ab488 360 call_entry_enqueue_tail(TCE(entry), QUEUE(queue));
39236c6e
A
361 queue->count++;
362 return;
363}
364
365/*
366 * Remove timer entry from its queue but don't change the queue pointer
367 * and set the async_dequeue flag. This is locking case 2b.
368 */
369static __inline__ void
370timer_call_entry_dequeue_async(
371 timer_call_t entry)
372{
fe8ab488 373 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
39236c6e
A
374 if (old_queue) {
375 old_queue->count--;
376 (void) remque(qe(entry));
377 entry->async_dequeue = TRUE;
378 }
379 return;
380}
381
6d2010ae
A
382#if TIMER_ASSERT
383unsigned timer_call_enqueue_deadline_unlocked_async1;
384unsigned timer_call_enqueue_deadline_unlocked_async2;
385#endif
386/*
387 * Assumes call_entry and queues unlocked, interrupts disabled.
388 */
389__inline__ mpqueue_head_t *
390timer_call_enqueue_deadline_unlocked(
391 timer_call_t call,
392 mpqueue_head_t *queue,
fe8ab488
A
393 uint64_t deadline,
394 uint64_t soft_deadline,
395 uint64_t ttd,
396 timer_call_param_t param1,
397 uint32_t callout_flags)
1c79356b 398{
fe8ab488 399 call_entry_t entry = TCE(call);
6d2010ae 400 mpqueue_head_t *old_queue;
1c79356b 401
6d2010ae 402 DBG("timer_call_enqueue_deadline_unlocked(%p,%p,)\n", call, queue);
1c79356b 403
6d2010ae 404 simple_lock(&call->lock);
fe8ab488 405
6d2010ae 406 old_queue = MPQUEUE(entry->queue);
fe8ab488 407
6d2010ae 408 if (old_queue != NULL) {
39236c6e 409 timer_queue_lock_spin(old_queue);
6d2010ae 410 if (call->async_dequeue) {
39236c6e 411 /* collision (1c): timer already dequeued, clear flag */
6d2010ae 412#if TIMER_ASSERT
39236c6e
A
413 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
414 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
415 call,
416 call->async_dequeue,
fe8ab488 417 TCE(call)->queue,
39236c6e 418 0x1c, 0);
6d2010ae
A
419 timer_call_enqueue_deadline_unlocked_async1++;
420#endif
39236c6e 421 call->async_dequeue = FALSE;
6d2010ae 422 entry->queue = NULL;
39236c6e
A
423 } else if (old_queue != queue) {
424 timer_call_entry_dequeue(call);
6d2010ae
A
425#if TIMER_ASSERT
426 timer_call_enqueue_deadline_unlocked_async2++;
427#endif
428 }
39236c6e
A
429 if (old_queue == timer_longterm_queue)
430 timer_longterm_dequeued_locked(call);
6d2010ae 431 if (old_queue != queue) {
39236c6e
A
432 timer_queue_unlock(old_queue);
433 timer_queue_lock_spin(queue);
6d2010ae
A
434 }
435 } else {
39236c6e 436 timer_queue_lock_spin(queue);
6d2010ae 437 }
1c79356b 438
fe8ab488
A
439 call->soft_deadline = soft_deadline;
440 call->flags = callout_flags;
441 TCE(call)->param1 = param1;
442 call->ttd = ttd;
443
6d2010ae 444 timer_call_entry_enqueue_deadline(call, queue, deadline);
39236c6e 445 timer_queue_unlock(queue);
6d2010ae 446 simple_unlock(&call->lock);
1c79356b 447
c910b4d9
A
448 return (old_queue);
449}
1c79356b 450
6d2010ae
A
451#if TIMER_ASSERT
452unsigned timer_call_dequeue_unlocked_async1;
453unsigned timer_call_dequeue_unlocked_async2;
454#endif
455mpqueue_head_t *
456timer_call_dequeue_unlocked(
457 timer_call_t call)
c910b4d9 458{
fe8ab488 459 call_entry_t entry = TCE(call);
6d2010ae 460 mpqueue_head_t *old_queue;
1c79356b 461
6d2010ae 462 DBG("timer_call_dequeue_unlocked(%p)\n", call);
1c79356b 463
6d2010ae
A
464 simple_lock(&call->lock);
465 old_queue = MPQUEUE(entry->queue);
39236c6e
A
466#if TIMER_ASSERT
467 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
468 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
469 call,
470 call->async_dequeue,
fe8ab488 471 TCE(call)->queue,
39236c6e
A
472 0, 0);
473#endif
6d2010ae 474 if (old_queue != NULL) {
39236c6e 475 timer_queue_lock_spin(old_queue);
6d2010ae 476 if (call->async_dequeue) {
39236c6e 477 /* collision (1c): timer already dequeued, clear flag */
6d2010ae 478#if TIMER_ASSERT
39236c6e
A
479 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
480 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
481 call,
482 call->async_dequeue,
fe8ab488 483 TCE(call)->queue,
39236c6e 484 0x1c, 0);
6d2010ae
A
485 timer_call_dequeue_unlocked_async1++;
486#endif
39236c6e
A
487 call->async_dequeue = FALSE;
488 entry->queue = NULL;
6d2010ae 489 } else {
39236c6e 490 timer_call_entry_dequeue(call);
6d2010ae 491 }
39236c6e
A
492 if (old_queue == timer_longterm_queue)
493 timer_longterm_dequeued_locked(call);
494 timer_queue_unlock(old_queue);
6d2010ae
A
495 }
496 simple_unlock(&call->lock);
c910b4d9 497 return (old_queue);
1c79356b
A
498}
499
fe8ab488
A
500
501/*
502 * Timer call entry locking model
503 * ==============================
504 *
505 * Timer call entries are linked on per-cpu timer queues which are protected
506 * by the queue lock and the call entry lock. The locking protocol is:
507 *
508 * 0) The canonical locking order is timer call entry followed by queue.
509 *
510 * 1) With only the entry lock held, entry.queue is valid:
511 * 1a) NULL: the entry is not queued, or
512 * 1b) non-NULL: this queue must be locked before the entry is modified.
513 * After locking the queue, the call.async_dequeue flag must be checked:
514 * 1c) TRUE: the entry was removed from the queue by another thread
515 * and we must NULL the entry.queue and reset this flag, or
516 * 1d) FALSE: (ie. queued), the entry can be manipulated.
517 *
518 * 2) If a queue lock is obtained first, the queue is stable:
519 * 2a) If a try-lock of a queued entry succeeds, the call can be operated on
520 * and dequeued.
521 * 2b) If a try-lock fails, it indicates that another thread is attempting
522 * to change the entry and move it to a different position in this queue
523 * or to different queue. The entry can be dequeued but it should not be
524 * operated upon since it is being changed. Furthermore, we don't null
525 * the entry.queue pointer (protected by the entry lock we don't own).
526 * Instead, we set the async_dequeue flag -- see (1c).
527 * 2c) Same as 2b but occurring when a longterm timer is matured.
528 * 3) A callout's parameters (deadline, flags, parameters, soft deadline &c.)
529 * should be manipulated with the appropriate timer queue lock held,
530 * to prevent queue traversal observations from observing inconsistent
531 * updates to an in-flight callout.
532 */
533
534/*
535 * Inlines timer_call_entry_dequeue() and timer_call_entry_enqueue_deadline()
536 * cast between pointer types (mpqueue_head_t *) and (queue_t) so that
537 * we can use the call_entry_dequeue() and call_entry_enqueue_deadline()
538 * methods to operate on timer_call structs as if they are call_entry structs.
539 * These structures are identical except for their queue head pointer fields.
540 *
541 * In the debug case, we assert that the timer call locking protocol
542 * is being obeyed.
543 */
544
6d2010ae
A
545static boolean_t
546timer_call_enter_internal(
547 timer_call_t call,
548 timer_call_param_t param1,
549 uint64_t deadline,
39236c6e
A
550 uint64_t leeway,
551 uint32_t flags,
552 boolean_t ratelimited)
1c79356b 553{
39236c6e 554 mpqueue_head_t *queue = NULL;
6d2010ae 555 mpqueue_head_t *old_queue;
1c79356b 556 spl_t s;
39236c6e
A
557 uint64_t slop;
558 uint32_t urgency;
fe8ab488 559 uint64_t sdeadline, ttd;
1c79356b
A
560
561 s = splclock();
6d2010ae 562
fe8ab488 563 sdeadline = deadline;
39236c6e
A
564 uint64_t ctime = mach_absolute_time();
565
566 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
567 DECR_TIMER_ENTER | DBG_FUNC_START,
568 call,
569 param1, deadline, flags, 0);
570
571 urgency = (flags & TIMER_CALL_URGENCY_MASK);
572
573 boolean_t slop_ratelimited = FALSE;
574 slop = timer_call_slop(deadline, ctime, urgency, current_thread(), &slop_ratelimited);
575
576 if ((flags & TIMER_CALL_LEEWAY) != 0 && leeway > slop)
577 slop = leeway;
578
579 if (UINT64_MAX - deadline <= slop) {
580 deadline = UINT64_MAX;
581 } else {
6d2010ae
A
582 deadline += slop;
583 }
1c79356b 584
316670eb
A
585 if (__improbable(deadline < ctime)) {
586 uint64_t delta = (ctime - deadline);
587
588 past_deadline_timers++;
589 past_deadline_deltas += delta;
590 if (delta > past_deadline_longest)
591 past_deadline_longest = deadline;
592 if (delta < past_deadline_shortest)
593 past_deadline_shortest = delta;
594
595 deadline = ctime + past_deadline_timer_adjustment;
fe8ab488 596 sdeadline = deadline;
316670eb 597 }
39236c6e 598
39236c6e 599 if (ratelimited || slop_ratelimited) {
fe8ab488 600 flags |= TIMER_CALL_RATELIMITED;
39236c6e 601 } else {
fe8ab488 602 flags &= ~TIMER_CALL_RATELIMITED;
39236c6e
A
603 }
604
fe8ab488 605 ttd = sdeadline - ctime;
4b17d6b6 606#if CONFIG_DTRACE
fe8ab488
A
607 DTRACE_TMR7(callout__create, timer_call_func_t, TCE(call)->func,
608 timer_call_param_t, TCE(call)->param0, uint32_t, flags,
609 (deadline - sdeadline),
610 (ttd >> 32), (unsigned) (ttd & 0xFFFFFFFF), call);
4b17d6b6
A
611#endif
612
fe8ab488
A
613 /* Program timer callout parameters under the appropriate per-CPU or
614 * longterm queue lock. The callout may have been previously enqueued
615 * and in-flight on this or another timer queue.
616 */
39236c6e 617 if (!ratelimited && !slop_ratelimited) {
fe8ab488 618 queue = timer_longterm_enqueue_unlocked(call, ctime, deadline, &old_queue, sdeadline, ttd, param1, flags);
39236c6e 619 }
1c79356b 620
39236c6e
A
621 if (queue == NULL) {
622 queue = timer_queue_assign(deadline);
fe8ab488 623 old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline, sdeadline, ttd, param1, flags);
39236c6e 624 }
1c79356b 625
39236c6e 626#if TIMER_TRACE
fe8ab488 627 TCE(call)->entry_time = ctime;
39236c6e
A
628#endif
629
630 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
631 DECR_TIMER_ENTER | DBG_FUNC_END,
632 call,
fe8ab488 633 (old_queue != NULL), deadline, queue->count, 0);
1c79356b 634
1c79356b
A
635 splx(s);
636
c910b4d9 637 return (old_queue != NULL);
1c79356b
A
638}
639
39236c6e
A
640/*
641 * timer_call_*()
642 * return boolean indicating whether the call was previously queued.
643 */
6d2010ae
A
644boolean_t
645timer_call_enter(
646 timer_call_t call,
647 uint64_t deadline,
648 uint32_t flags)
649{
39236c6e 650 return timer_call_enter_internal(call, NULL, deadline, 0, flags, FALSE);
6d2010ae
A
651}
652
1c79356b 653boolean_t
c910b4d9
A
654timer_call_enter1(
655 timer_call_t call,
656 timer_call_param_t param1,
6d2010ae
A
657 uint64_t deadline,
658 uint32_t flags)
1c79356b 659{
39236c6e
A
660 return timer_call_enter_internal(call, param1, deadline, 0, flags, FALSE);
661}
662
663boolean_t
664timer_call_enter_with_leeway(
665 timer_call_t call,
666 timer_call_param_t param1,
667 uint64_t deadline,
668 uint64_t leeway,
669 uint32_t flags,
670 boolean_t ratelimited)
671{
672 return timer_call_enter_internal(call, param1, deadline, leeway, flags, ratelimited);
1c79356b
A
673}
674
675boolean_t
c910b4d9
A
676timer_call_cancel(
677 timer_call_t call)
1c79356b 678{
6d2010ae 679 mpqueue_head_t *old_queue;
1c79356b
A
680 spl_t s;
681
682 s = splclock();
1c79356b 683
39236c6e
A
684 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
685 DECR_TIMER_CANCEL | DBG_FUNC_START,
686 call,
fe8ab488 687 TCE(call)->deadline, call->soft_deadline, call->flags, 0);
39236c6e 688
6d2010ae 689 old_queue = timer_call_dequeue_unlocked(call);
c910b4d9
A
690
691 if (old_queue != NULL) {
39236c6e
A
692 timer_queue_lock_spin(old_queue);
693 if (!queue_empty(&old_queue->head)) {
fe8ab488
A
694 timer_queue_cancel(old_queue, TCE(call)->deadline, CE(queue_first(&old_queue->head))->deadline);
695 timer_call_t thead = (timer_call_t)queue_first(&old_queue->head);
696 old_queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline;
39236c6e
A
697 }
698 else {
fe8ab488 699 timer_queue_cancel(old_queue, TCE(call)->deadline, UINT64_MAX);
39236c6e
A
700 old_queue->earliest_soft_deadline = UINT64_MAX;
701 }
702 timer_queue_unlock(old_queue);
1c79356b 703 }
39236c6e
A
704 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
705 DECR_TIMER_CANCEL | DBG_FUNC_END,
706 call,
707 old_queue,
fe8ab488
A
708 TCE(call)->deadline - mach_absolute_time(),
709 TCE(call)->deadline - TCE(call)->entry_time, 0);
1c79356b
A
710 splx(s);
711
4b17d6b6 712#if CONFIG_DTRACE
fe8ab488
A
713 DTRACE_TMR6(callout__cancel, timer_call_func_t, TCE(call)->func,
714 timer_call_param_t, TCE(call)->param0, uint32_t, call->flags, 0,
4b17d6b6
A
715 (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF));
716#endif
717
c910b4d9 718 return (old_queue != NULL);
1c79356b
A
719}
720
fe8ab488
A
721static uint32_t timer_queue_shutdown_lock_skips;
722static uint32_t timer_queue_shutdown_discarded;
723
9bccf70c 724void
c910b4d9 725timer_queue_shutdown(
6d2010ae 726 mpqueue_head_t *queue)
9bccf70c 727{
6d2010ae
A
728 timer_call_t call;
729 mpqueue_head_t *new_queue;
c910b4d9 730 spl_t s;
9bccf70c 731
fe8ab488 732
6d2010ae
A
733 DBG("timer_queue_shutdown(%p)\n", queue);
734
c910b4d9 735 s = splclock();
9bccf70c 736
6d2010ae 737 /* Note comma operator in while expression re-locking each iteration */
39236c6e 738 while (timer_queue_lock_spin(queue), !queue_empty(&queue->head)) {
6d2010ae 739 call = TIMER_CALL(queue_first(&queue->head));
fe8ab488 740
6d2010ae
A
741 if (!simple_lock_try(&call->lock)) {
742 /*
743 * case (2b) lock order inversion, dequeue and skip
744 * Don't change the call_entry queue back-pointer
745 * but set the async_dequeue field.
746 */
747 timer_queue_shutdown_lock_skips++;
39236c6e
A
748 timer_call_entry_dequeue_async(call);
749#if TIMER_ASSERT
750 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
751 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
752 call,
753 call->async_dequeue,
fe8ab488 754 TCE(call)->queue,
39236c6e
A
755 0x2b, 0);
756#endif
757 timer_queue_unlock(queue);
6d2010ae
A
758 continue;
759 }
9bccf70c 760
fe8ab488
A
761 boolean_t call_local = ((call->flags & TIMER_CALL_LOCAL) != 0);
762
6d2010ae
A
763 /* remove entry from old queue */
764 timer_call_entry_dequeue(call);
39236c6e 765 timer_queue_unlock(queue);
9bccf70c 766
fe8ab488
A
767 if (call_local == FALSE) {
768 /* and queue it on new, discarding LOCAL timers */
769 new_queue = timer_queue_assign(TCE(call)->deadline);
770 timer_queue_lock_spin(new_queue);
771 timer_call_entry_enqueue_deadline(
772 call, new_queue, TCE(call)->deadline);
773 timer_queue_unlock(new_queue);
774 } else {
775 timer_queue_shutdown_discarded++;
776 }
777
778 /* The only lingering LOCAL timer should be this thread's
779 * quantum expiration timer.
780 */
781 assert((call_local == FALSE) ||
782 (TCE(call)->func == thread_quantum_expire));
9bccf70c 783
6d2010ae 784 simple_unlock(&call->lock);
9bccf70c
A
785 }
786
39236c6e 787 timer_queue_unlock(queue);
c910b4d9 788 splx(s);
9bccf70c
A
789}
790
fe8ab488 791static uint32_t timer_queue_expire_lock_skips;
c910b4d9 792uint64_t
39236c6e 793timer_queue_expire_with_options(
6d2010ae 794 mpqueue_head_t *queue,
39236c6e
A
795 uint64_t deadline,
796 boolean_t rescan)
1c79356b 797{
39236c6e
A
798 timer_call_t call = NULL;
799 uint32_t tc_iterations = 0;
6d2010ae
A
800 DBG("timer_queue_expire(%p,)\n", queue);
801
39236c6e
A
802 uint64_t cur_deadline = deadline;
803 timer_queue_lock_spin(queue);
1c79356b 804
6d2010ae 805 while (!queue_empty(&queue->head)) {
39236c6e
A
806 /* Upon processing one or more timer calls, refresh the
807 * deadline to account for time elapsed in the callout
808 */
809 if (++tc_iterations > 1)
810 cur_deadline = mach_absolute_time();
811
812 if (call == NULL)
813 call = TIMER_CALL(queue_first(&queue->head));
1c79356b 814
39236c6e 815 if (call->soft_deadline <= cur_deadline) {
1c79356b
A
816 timer_call_func_t func;
817 timer_call_param_t param0, param1;
818
39236c6e
A
819 TCOAL_DEBUG(0xDDDD0000, queue->earliest_soft_deadline, call->soft_deadline, 0, 0, 0);
820 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
821 DECR_TIMER_EXPIRE | DBG_FUNC_NONE,
822 call,
823 call->soft_deadline,
fe8ab488
A
824 TCE(call)->deadline,
825 TCE(call)->entry_time, 0);
39236c6e 826
fe8ab488
A
827 if ((call->flags & TIMER_CALL_RATELIMITED) &&
828 (TCE(call)->deadline > cur_deadline)) {
39236c6e
A
829 if (rescan == FALSE)
830 break;
831 }
832
6d2010ae
A
833 if (!simple_lock_try(&call->lock)) {
834 /* case (2b) lock inversion, dequeue and skip */
835 timer_queue_expire_lock_skips++;
39236c6e
A
836 timer_call_entry_dequeue_async(call);
837 call = NULL;
6d2010ae
A
838 continue;
839 }
840
841 timer_call_entry_dequeue(call);
1c79356b 842
fe8ab488
A
843 func = TCE(call)->func;
844 param0 = TCE(call)->param0;
845 param1 = TCE(call)->param1;
1c79356b 846
6d2010ae 847 simple_unlock(&call->lock);
39236c6e 848 timer_queue_unlock(queue);
1c79356b 849
39236c6e 850 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
316670eb 851 DECR_TIMER_CALLOUT | DBG_FUNC_START,
39236c6e 852 call, VM_KERNEL_UNSLIDE(func), param0, param1, 0);
2d21ac55 853
4b17d6b6 854#if CONFIG_DTRACE
39236c6e 855 DTRACE_TMR7(callout__start, timer_call_func_t, func,
4b17d6b6
A
856 timer_call_param_t, param0, unsigned, call->flags,
857 0, (call->ttd >> 32),
39236c6e 858 (unsigned) (call->ttd & 0xFFFFFFFF), call);
2d21ac55 859#endif
4b17d6b6
A
860 /* Maintain time-to-deadline in per-processor data
861 * structure for thread wakeup deadline statistics.
862 */
863 uint64_t *ttdp = &(PROCESSOR_DATA(current_processor(), timer_call_ttd));
864 *ttdp = call->ttd;
1c79356b 865 (*func)(param0, param1);
4b17d6b6 866 *ttdp = 0;
4b17d6b6 867#if CONFIG_DTRACE
39236c6e
A
868 DTRACE_TMR4(callout__end, timer_call_func_t, func,
869 param0, param1, call);
2d21ac55
A
870#endif
871
39236c6e 872 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
316670eb 873 DECR_TIMER_CALLOUT | DBG_FUNC_END,
39236c6e
A
874 call, VM_KERNEL_UNSLIDE(func), param0, param1, 0);
875 call = NULL;
876 timer_queue_lock_spin(queue);
877 } else {
878 if (__probable(rescan == FALSE)) {
879 break;
880 } else {
fe8ab488
A
881 int64_t skew = TCE(call)->deadline - call->soft_deadline;
882 assert(TCE(call)->deadline >= call->soft_deadline);
39236c6e
A
883
884 /* DRK: On a latency quality-of-service level change,
885 * re-sort potentially rate-limited timers. The platform
886 * layer determines which timers require
887 * this. In the absence of the per-callout
888 * synchronization requirement, a global resort could
889 * be more efficient. The re-sort effectively
890 * annuls all timer adjustments, i.e. the "soft
891 * deadline" is the sort key.
892 */
893
894 if (timer_resort_threshold(skew)) {
895 if (__probable(simple_lock_try(&call->lock))) {
896 timer_call_entry_dequeue(call);
897 timer_call_entry_enqueue_deadline(call, queue, call->soft_deadline);
898 simple_unlock(&call->lock);
899 call = NULL;
900 }
901 }
902 if (call) {
903 call = TIMER_CALL(queue_next(qe(call)));
904 if (queue_end(&queue->head, qe(call)))
905 break;
906 }
907 }
c910b4d9 908 }
1c79356b
A
909 }
910
39236c6e
A
911 if (!queue_empty(&queue->head)) {
912 call = TIMER_CALL(queue_first(&queue->head));
fe8ab488
A
913 cur_deadline = TCE(call)->deadline;
914 queue->earliest_soft_deadline = (call->flags & TIMER_CALL_RATELIMITED) ? TCE(call)->deadline: call->soft_deadline;
39236c6e
A
915 } else {
916 queue->earliest_soft_deadline = cur_deadline = UINT64_MAX;
917 }
1c79356b 918
39236c6e 919 timer_queue_unlock(queue);
c910b4d9 920
39236c6e 921 return (cur_deadline);
1c79356b 922}
6d2010ae 923
39236c6e
A
924uint64_t
925timer_queue_expire(
926 mpqueue_head_t *queue,
927 uint64_t deadline)
928{
929 return timer_queue_expire_with_options(queue, deadline, FALSE);
930}
6d2010ae
A
931
932extern int serverperfmode;
fe8ab488 933static uint32_t timer_queue_migrate_lock_skips;
6d2010ae 934/*
39236c6e 935 * timer_queue_migrate() is called by timer_queue_migrate_cpu()
6d2010ae
A
936 * to move timer requests from the local processor (queue_from)
937 * to a target processor's (queue_to).
938 */
939int
940timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to)
941{
942 timer_call_t call;
943 timer_call_t head_to;
944 int timers_migrated = 0;
945
946 DBG("timer_queue_migrate(%p,%p)\n", queue_from, queue_to);
947
948 assert(!ml_get_interrupts_enabled());
949 assert(queue_from != queue_to);
950
951 if (serverperfmode) {
952 /*
953 * if we're running a high end server
954 * avoid migrations... they add latency
955 * and don't save us power under typical
956 * server workloads
957 */
958 return -4;
959 }
960
961 /*
962 * Take both local (from) and target (to) timer queue locks while
963 * moving the timers from the local queue to the target processor.
964 * We assume that the target is always the boot processor.
965 * But only move if all of the following is true:
966 * - the target queue is non-empty
967 * - the local queue is non-empty
968 * - the local queue's first deadline is later than the target's
969 * - the local queue contains no non-migrateable "local" call
970 * so that we need not have the target resync.
971 */
972
39236c6e 973 timer_queue_lock_spin(queue_to);
6d2010ae
A
974
975 head_to = TIMER_CALL(queue_first(&queue_to->head));
976 if (queue_empty(&queue_to->head)) {
977 timers_migrated = -1;
978 goto abort1;
979 }
980
39236c6e 981 timer_queue_lock_spin(queue_from);
6d2010ae
A
982
983 if (queue_empty(&queue_from->head)) {
984 timers_migrated = -2;
985 goto abort2;
986 }
987
988 call = TIMER_CALL(queue_first(&queue_from->head));
fe8ab488 989 if (TCE(call)->deadline < TCE(head_to)->deadline) {
6d2010ae
A
990 timers_migrated = 0;
991 goto abort2;
992 }
993
994 /* perform scan for non-migratable timers */
995 do {
996 if (call->flags & TIMER_CALL_LOCAL) {
997 timers_migrated = -3;
998 goto abort2;
999 }
1000 call = TIMER_CALL(queue_next(qe(call)));
1001 } while (!queue_end(&queue_from->head, qe(call)));
1002
1003 /* migration loop itself -- both queues are locked */
1004 while (!queue_empty(&queue_from->head)) {
1005 call = TIMER_CALL(queue_first(&queue_from->head));
1006 if (!simple_lock_try(&call->lock)) {
1007 /* case (2b) lock order inversion, dequeue only */
39236c6e
A
1008#ifdef TIMER_ASSERT
1009 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1010 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
1011 call,
fe8ab488 1012 TCE(call)->queue,
39236c6e
A
1013 call->lock.interlock.lock_data,
1014 0x2b, 0);
1015#endif
6d2010ae 1016 timer_queue_migrate_lock_skips++;
39236c6e 1017 timer_call_entry_dequeue_async(call);
6d2010ae
A
1018 continue;
1019 }
1020 timer_call_entry_dequeue(call);
1021 timer_call_entry_enqueue_deadline(
fe8ab488 1022 call, queue_to, TCE(call)->deadline);
6d2010ae
A
1023 timers_migrated++;
1024 simple_unlock(&call->lock);
1025 }
39236c6e 1026 queue_from->earliest_soft_deadline = UINT64_MAX;
6d2010ae 1027abort2:
39236c6e 1028 timer_queue_unlock(queue_from);
6d2010ae 1029abort1:
39236c6e 1030 timer_queue_unlock(queue_to);
6d2010ae
A
1031
1032 return timers_migrated;
1033}
39236c6e
A
1034
1035void
1036timer_queue_trace_cpu(int ncpu)
1037{
1038 timer_call_nosync_cpu(
1039 ncpu,
1040 (void(*)())timer_queue_trace,
1041 (void*) timer_queue_cpu(ncpu));
1042}
1043
1044void
1045timer_queue_trace(
1046 mpqueue_head_t *queue)
1047{
1048 timer_call_t call;
1049 spl_t s;
1050
1051 if (!kdebug_enable)
1052 return;
1053
1054 s = splclock();
1055 timer_queue_lock_spin(queue);
1056
1057 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1058 DECR_TIMER_QUEUE | DBG_FUNC_START,
1059 queue->count, mach_absolute_time(), 0, 0, 0);
1060
1061 if (!queue_empty(&queue->head)) {
1062 call = TIMER_CALL(queue_first(&queue->head));
1063 do {
1064 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1065 DECR_TIMER_QUEUE | DBG_FUNC_NONE,
1066 call->soft_deadline,
fe8ab488
A
1067 TCE(call)->deadline,
1068 TCE(call)->entry_time,
1069 TCE(call)->func,
39236c6e
A
1070 0);
1071 call = TIMER_CALL(queue_next(qe(call)));
1072 } while (!queue_end(&queue->head, qe(call)));
1073 }
1074
1075 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1076 DECR_TIMER_QUEUE | DBG_FUNC_END,
1077 queue->count, mach_absolute_time(), 0, 0, 0);
1078
1079 timer_queue_unlock(queue);
1080 splx(s);
1081}
1082
1083void
1084timer_longterm_dequeued_locked(timer_call_t call)
1085{
1086 timer_longterm_t *tlp = &timer_longterm;
1087
1088 tlp->dequeues++;
1089 if (call == tlp->threshold.call)
1090 tlp->threshold.call = NULL;
1091}
1092
1093/*
1094 * Place a timer call in the longterm list
1095 * and adjust the next timer callout deadline if the new timer is first.
1096 */
1097mpqueue_head_t *
1098timer_longterm_enqueue_unlocked(timer_call_t call,
1099 uint64_t now,
1100 uint64_t deadline,
fe8ab488
A
1101 mpqueue_head_t **old_queue,
1102 uint64_t soft_deadline,
1103 uint64_t ttd,
1104 timer_call_param_t param1,
1105 uint32_t callout_flags)
39236c6e
A
1106{
1107 timer_longterm_t *tlp = &timer_longterm;
1108 boolean_t update_required = FALSE;
1109 uint64_t longterm_threshold;
1110
1111 longterm_threshold = now + tlp->threshold.interval;
1112
1113 /*
1114 * Return NULL without doing anything if:
1115 * - this timer is local, or
1116 * - the longterm mechanism is disabled, or
1117 * - this deadline is too short.
1118 */
fe8ab488 1119 if ((callout_flags & TIMER_CALL_LOCAL) != 0 ||
39236c6e 1120 (tlp->threshold.interval == TIMER_LONGTERM_NONE) ||
fe8ab488 1121 (deadline <= longterm_threshold))
39236c6e
A
1122 return NULL;
1123
1124 /*
1125 * Remove timer from its current queue, if any.
1126 */
1127 *old_queue = timer_call_dequeue_unlocked(call);
1128
1129 /*
1130 * Lock the longterm queue, queue timer and determine
1131 * whether an update is necessary.
1132 */
1133 assert(!ml_get_interrupts_enabled());
1134 simple_lock(&call->lock);
1135 timer_queue_lock_spin(timer_longterm_queue);
fe8ab488
A
1136 TCE(call)->deadline = deadline;
1137 TCE(call)->param1 = param1;
1138 call->ttd = ttd;
1139 call->soft_deadline = soft_deadline;
1140 call->flags = callout_flags;
39236c6e 1141 timer_call_entry_enqueue_tail(call, timer_longterm_queue);
39236c6e
A
1142
1143 tlp->enqueues++;
1144
1145 /*
1146 * We'll need to update the currently set threshold timer
1147 * if the new deadline is sooner and no sooner update is in flight.
1148 */
1149 if (deadline < tlp->threshold.deadline &&
1150 deadline < tlp->threshold.preempted) {
1151 tlp->threshold.preempted = deadline;
1152 tlp->threshold.call = call;
1153 update_required = TRUE;
1154 }
1155 timer_queue_unlock(timer_longterm_queue);
1156 simple_unlock(&call->lock);
1157
1158 if (update_required) {
fe8ab488
A
1159 /*
1160 * Note: this call expects that calling the master cpu
1161 * alone does not involve locking the topo lock.
1162 */
39236c6e
A
1163 timer_call_nosync_cpu(
1164 master_cpu,
1165 (void (*)(void *)) timer_longterm_update,
1166 (void *)tlp);
1167 }
1168
1169 return timer_longterm_queue;
1170}
1171
1172/*
1173 * Scan for timers below the longterm threshold.
1174 * Move these to the local timer queue (of the boot processor on which the
1175 * calling thread is running).
1176 * Both the local (boot) queue and the longterm queue are locked.
1177 * The scan is similar to the timer migrate sequence but is performed by
1178 * successively examining each timer on the longterm queue:
1179 * - if within the short-term threshold
1180 * - enter on the local queue (unless being deleted),
1181 * - otherwise:
1182 * - if sooner, deadline becomes the next threshold deadline.
1183 */
1184void
1185timer_longterm_scan(timer_longterm_t *tlp,
1186 uint64_t now)
1187{
1188 queue_entry_t qe;
1189 timer_call_t call;
1190 uint64_t threshold;
1191 uint64_t deadline;
1192 mpqueue_head_t *timer_master_queue;
1193
1194 assert(!ml_get_interrupts_enabled());
1195 assert(cpu_number() == master_cpu);
1196
1197 if (tlp->threshold.interval != TIMER_LONGTERM_NONE)
1198 threshold = now + tlp->threshold.interval;
1199 else
1200 threshold = TIMER_LONGTERM_NONE;
1201
1202 tlp->threshold.deadline = TIMER_LONGTERM_NONE;
1203 tlp->threshold.call = NULL;
1204
1205 if (queue_empty(&timer_longterm_queue->head))
1206 return;
1207
1208 timer_master_queue = timer_queue_cpu(master_cpu);
1209 timer_queue_lock_spin(timer_master_queue);
1210
1211 qe = queue_first(&timer_longterm_queue->head);
1212 while (!queue_end(&timer_longterm_queue->head, qe)) {
1213 call = TIMER_CALL(qe);
1214 deadline = call->soft_deadline;
1215 qe = queue_next(qe);
1216 if (!simple_lock_try(&call->lock)) {
1217 /* case (2c) lock order inversion, dequeue only */
1218#ifdef TIMER_ASSERT
1219 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1220 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
1221 call,
fe8ab488 1222 TCE(call)->queue,
39236c6e
A
1223 call->lock.interlock.lock_data,
1224 0x2c, 0);
1225#endif
1226 timer_call_entry_dequeue_async(call);
1227 continue;
1228 }
1229 if (deadline < threshold) {
1230 /*
1231 * This timer needs moving (escalating)
1232 * to the local (boot) processor's queue.
1233 */
1234#ifdef TIMER_ASSERT
1235 if (deadline < now)
1236 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1237 DECR_TIMER_OVERDUE | DBG_FUNC_NONE,
1238 call,
1239 deadline,
1240 now,
1241 threshold,
1242 0);
1243#endif
1244 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1245 DECR_TIMER_ESCALATE | DBG_FUNC_NONE,
1246 call,
fe8ab488
A
1247 TCE(call)->deadline,
1248 TCE(call)->entry_time,
1249 TCE(call)->func,
39236c6e
A
1250 0);
1251 tlp->escalates++;
1252 timer_call_entry_dequeue(call);
1253 timer_call_entry_enqueue_deadline(
fe8ab488 1254 call, timer_master_queue, TCE(call)->deadline);
39236c6e
A
1255 /*
1256 * A side-effect of the following call is to update
1257 * the actual hardware deadline if required.
1258 */
1259 (void) timer_queue_assign(deadline);
1260 } else {
1261 if (deadline < tlp->threshold.deadline) {
1262 tlp->threshold.deadline = deadline;
1263 tlp->threshold.call = call;
1264 }
1265 }
1266 simple_unlock(&call->lock);
1267 }
1268
1269 timer_queue_unlock(timer_master_queue);
1270}
1271
1272void
1273timer_longterm_callout(timer_call_param_t p0, __unused timer_call_param_t p1)
1274{
1275 timer_longterm_t *tlp = (timer_longterm_t *) p0;
1276
1277 timer_longterm_update(tlp);
1278}
1279
1280void
1281timer_longterm_update_locked(timer_longterm_t *tlp)
1282{
1283 uint64_t latency;
1284
1285 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1286 DECR_TIMER_UPDATE | DBG_FUNC_START,
1287 &tlp->queue,
1288 tlp->threshold.deadline,
1289 tlp->threshold.preempted,
1290 tlp->queue.count, 0);
1291
1292 tlp->scan_time = mach_absolute_time();
1293 if (tlp->threshold.preempted != TIMER_LONGTERM_NONE) {
1294 tlp->threshold.preempts++;
1295 tlp->threshold.deadline = tlp->threshold.preempted;
1296 tlp->threshold.preempted = TIMER_LONGTERM_NONE;
1297 /*
1298 * Note: in the unlikely event that a pre-empted timer has
1299 * itself been cancelled, we'll simply re-scan later at the
1300 * time of the preempted/cancelled timer.
1301 */
1302 } else {
1303 tlp->threshold.scans++;
1304
1305 /*
1306 * Maintain a moving average of our wakeup latency.
1307 * Clamp latency to 0 and ignore above threshold interval.
1308 */
1309 if (tlp->scan_time > tlp->threshold.deadline_set)
1310 latency = tlp->scan_time - tlp->threshold.deadline_set;
1311 else
1312 latency = 0;
1313 if (latency < tlp->threshold.interval) {
1314 tlp->threshold.latency_min =
1315 MIN(tlp->threshold.latency_min, latency);
1316 tlp->threshold.latency_max =
1317 MAX(tlp->threshold.latency_max, latency);
1318 tlp->threshold.latency =
1319 (tlp->threshold.latency*99 + latency) / 100;
1320 }
1321
1322 timer_longterm_scan(tlp, tlp->scan_time);
1323 }
1324
1325 tlp->threshold.deadline_set = tlp->threshold.deadline;
1326 /* The next deadline timer to be set is adjusted */
1327 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) {
1328 tlp->threshold.deadline_set -= tlp->threshold.margin;
1329 tlp->threshold.deadline_set -= tlp->threshold.latency;
1330 }
1331
1332 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1333 DECR_TIMER_UPDATE | DBG_FUNC_END,
1334 &tlp->queue,
1335 tlp->threshold.deadline,
1336 tlp->threshold.scans,
1337 tlp->queue.count, 0);
1338}
1339
1340void
1341timer_longterm_update(timer_longterm_t *tlp)
1342{
1343 spl_t s = splclock();
1344
1345 timer_queue_lock_spin(timer_longterm_queue);
1346
1347 if (cpu_number() != master_cpu)
1348 panic("timer_longterm_update_master() on non-boot cpu");
1349
1350 timer_longterm_update_locked(tlp);
1351
1352 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE)
1353 timer_call_enter(
1354 &tlp->threshold.timer,
1355 tlp->threshold.deadline_set,
1356 TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL);
1357
1358 timer_queue_unlock(timer_longterm_queue);
1359 splx(s);
1360}
1361
1362void
1363timer_longterm_init(void)
1364{
1365 uint32_t longterm;
1366 timer_longterm_t *tlp = &timer_longterm;
1367
1368 DBG("timer_longterm_init() tlp: %p, queue: %p\n", tlp, &tlp->queue);
1369
1370 /*
15129b1c
A
1371 * Set the longterm timer threshold. Defaults to TIMER_LONGTERM_THRESHOLD
1372 * or TIMER_LONGTERM_NONE (disabled) for server;
1373 * overridden longterm boot-arg
39236c6e 1374 */
15129b1c
A
1375 tlp->threshold.interval = serverperfmode ? TIMER_LONGTERM_NONE
1376 : TIMER_LONGTERM_THRESHOLD;
39236c6e
A
1377 if (PE_parse_boot_argn("longterm", &longterm, sizeof (longterm))) {
1378 tlp->threshold.interval = (longterm == 0) ?
1379 TIMER_LONGTERM_NONE :
1380 longterm * NSEC_PER_MSEC;
1381 }
1382 if (tlp->threshold.interval != TIMER_LONGTERM_NONE) {
1383 printf("Longterm timer threshold: %llu ms\n",
1384 tlp->threshold.interval / NSEC_PER_MSEC);
1385 kprintf("Longterm timer threshold: %llu ms\n",
1386 tlp->threshold.interval / NSEC_PER_MSEC);
1387 nanoseconds_to_absolutetime(tlp->threshold.interval,
1388 &tlp->threshold.interval);
1389 tlp->threshold.margin = tlp->threshold.interval / 10;
1390 tlp->threshold.latency_min = EndOfAllTime;
1391 tlp->threshold.latency_max = 0;
1392 }
1393
1394 tlp->threshold.preempted = TIMER_LONGTERM_NONE;
1395 tlp->threshold.deadline = TIMER_LONGTERM_NONE;
1396
1397 lck_attr_setdefault(&timer_longterm_lck_attr);
1398 lck_grp_attr_setdefault(&timer_longterm_lck_grp_attr);
1399 lck_grp_init(&timer_longterm_lck_grp,
1400 "timer_longterm", &timer_longterm_lck_grp_attr);
1401 mpqueue_init(&tlp->queue,
1402 &timer_longterm_lck_grp, &timer_longterm_lck_attr);
1403
1404 timer_call_setup(&tlp->threshold.timer,
1405 timer_longterm_callout, (timer_call_param_t) tlp);
1406
1407 timer_longterm_queue = &tlp->queue;
1408}
1409
1410enum {
1411 THRESHOLD, QCOUNT,
1412 ENQUEUES, DEQUEUES, ESCALATES, SCANS, PREEMPTS,
1413 LATENCY, LATENCY_MIN, LATENCY_MAX
1414};
1415uint64_t
1416timer_sysctl_get(int oid)
1417{
1418 timer_longterm_t *tlp = &timer_longterm;
1419
1420 switch (oid) {
1421 case THRESHOLD:
1422 return (tlp->threshold.interval == TIMER_LONGTERM_NONE) ?
1423 0 : tlp->threshold.interval / NSEC_PER_MSEC;
1424 case QCOUNT:
1425 return tlp->queue.count;
1426 case ENQUEUES:
1427 return tlp->enqueues;
1428 case DEQUEUES:
1429 return tlp->dequeues;
1430 case ESCALATES:
1431 return tlp->escalates;
1432 case SCANS:
1433 return tlp->threshold.scans;
1434 case PREEMPTS:
1435 return tlp->threshold.preempts;
1436 case LATENCY:
1437 return tlp->threshold.latency;
1438 case LATENCY_MIN:
1439 return tlp->threshold.latency_min;
1440 case LATENCY_MAX:
1441 return tlp->threshold.latency_max;
1442 default:
1443 return 0;
1444 }
1445}
1446
1447/*
1448 * timer_master_scan() is the inverse of timer_longterm_scan()
1449 * since it un-escalates timers to the longterm queue.
1450 */
1451static void
1452timer_master_scan(timer_longterm_t *tlp,
1453 uint64_t now)
1454{
1455 queue_entry_t qe;
1456 timer_call_t call;
1457 uint64_t threshold;
1458 uint64_t deadline;
1459 mpqueue_head_t *timer_master_queue;
1460
1461 if (tlp->threshold.interval != TIMER_LONGTERM_NONE)
1462 threshold = now + tlp->threshold.interval;
1463 else
1464 threshold = TIMER_LONGTERM_NONE;
1465
1466 timer_master_queue = timer_queue_cpu(master_cpu);
1467 timer_queue_lock_spin(timer_master_queue);
1468
1469 qe = queue_first(&timer_master_queue->head);
1470 while (!queue_end(&timer_master_queue->head, qe)) {
1471 call = TIMER_CALL(qe);
fe8ab488 1472 deadline = TCE(call)->deadline;
39236c6e
A
1473 qe = queue_next(qe);
1474 if ((call->flags & TIMER_CALL_LOCAL) != 0)
1475 continue;
1476 if (!simple_lock_try(&call->lock)) {
1477 /* case (2c) lock order inversion, dequeue only */
1478 timer_call_entry_dequeue_async(call);
1479 continue;
1480 }
1481 if (deadline > threshold) {
1482 /* move from master to longterm */
1483 timer_call_entry_dequeue(call);
1484 timer_call_entry_enqueue_tail(call, timer_longterm_queue);
1485 if (deadline < tlp->threshold.deadline) {
1486 tlp->threshold.deadline = deadline;
1487 tlp->threshold.call = call;
1488 }
1489 }
1490 simple_unlock(&call->lock);
1491 }
1492 timer_queue_unlock(timer_master_queue);
1493}
1494
1495static void
1496timer_sysctl_set_threshold(uint64_t value)
1497{
1498 timer_longterm_t *tlp = &timer_longterm;
1499 spl_t s = splclock();
1500 boolean_t threshold_increase;
1501
1502 timer_queue_lock_spin(timer_longterm_queue);
1503
1504 timer_call_cancel(&tlp->threshold.timer);
1505
1506 /*
1507 * Set the new threshold and note whther it's increasing.
1508 */
1509 if (value == 0) {
1510 tlp->threshold.interval = TIMER_LONGTERM_NONE;
1511 threshold_increase = TRUE;
1512 timer_call_cancel(&tlp->threshold.timer);
1513 } else {
1514 uint64_t old_interval = tlp->threshold.interval;
1515 tlp->threshold.interval = value * NSEC_PER_MSEC;
1516 nanoseconds_to_absolutetime(tlp->threshold.interval,
1517 &tlp->threshold.interval);
1518 tlp->threshold.margin = tlp->threshold.interval / 10;
1519 if (old_interval == TIMER_LONGTERM_NONE)
1520 threshold_increase = FALSE;
1521 else
1522 threshold_increase = (tlp->threshold.interval > old_interval);
1523 }
1524
1525 if (threshold_increase /* or removal */) {
1526 /* Escalate timers from the longterm queue */
1527 timer_longterm_scan(tlp, mach_absolute_time());
1528 } else /* decrease or addition */ {
1529 /*
1530 * We scan the local/master queue for timers now longterm.
1531 * To be strictly correct, we should scan all processor queues
1532 * but timer migration results in most timers gravitating to the
1533 * master processor in any case.
1534 */
1535 timer_master_scan(tlp, mach_absolute_time());
1536 }
1537
1538 /* Set new timer accordingly */
1539 tlp->threshold.deadline_set = tlp->threshold.deadline;
1540 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) {
1541 tlp->threshold.deadline_set -= tlp->threshold.margin;
1542 tlp->threshold.deadline_set -= tlp->threshold.latency;
1543 timer_call_enter(
1544 &tlp->threshold.timer,
1545 tlp->threshold.deadline_set,
1546 TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL);
1547 }
1548
1549 /* Reset stats */
1550 tlp->enqueues = 0;
1551 tlp->dequeues = 0;
1552 tlp->escalates = 0;
1553 tlp->threshold.scans = 0;
1554 tlp->threshold.preempts = 0;
1555 tlp->threshold.latency = 0;
1556 tlp->threshold.latency_min = EndOfAllTime;
1557 tlp->threshold.latency_max = 0;
1558
1559 timer_queue_unlock(timer_longterm_queue);
1560 splx(s);
1561}
1562
1563int
1564timer_sysctl_set(int oid, uint64_t value)
1565{
1566 switch (oid) {
1567 case THRESHOLD:
1568 timer_call_cpu(
1569 master_cpu,
1570 (void (*)(void *)) timer_sysctl_set_threshold,
1571 (void *) value);
1572 return KERN_SUCCESS;
1573 default:
1574 return KERN_INVALID_ARGUMENT;
1575 }
1576}
fe8ab488
A
1577
1578
1579/* Select timer coalescing window based on per-task quality-of-service hints */
1580static boolean_t tcoal_qos_adjust(thread_t t, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited) {
1581 uint32_t latency_qos;
1582 boolean_t adjusted = FALSE;
1583 task_t ctask = t->task;
1584
1585 if (ctask) {
1586 latency_qos = proc_get_effective_thread_policy(t, TASK_POLICY_LATENCY_QOS);
1587
1588 assert(latency_qos <= NUM_LATENCY_QOS_TIERS);
1589
1590 if (latency_qos) {
1591 *tshift = tcoal_prio_params.latency_qos_scale[latency_qos - 1];
1592 *tmax_abstime = tcoal_prio_params.latency_qos_abstime_max[latency_qos - 1];
1593 *pratelimited = tcoal_prio_params.latency_tier_rate_limited[latency_qos - 1];
1594 adjusted = TRUE;
1595 }
1596 }
1597 return adjusted;
1598}
1599
1600
1601/* Adjust timer deadlines based on priority of the thread and the
1602 * urgency value provided at timeout establishment. With this mechanism,
1603 * timers are no longer necessarily sorted in order of soft deadline
1604 * on a given timer queue, i.e. they may be differentially skewed.
1605 * In the current scheme, this could lead to fewer pending timers
1606 * processed than is technically possible when the HW deadline arrives.
1607 */
1608static void
1609timer_compute_leeway(thread_t cthread, int32_t urgency, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited) {
1610 int16_t tpri = cthread->sched_pri;
1611 if ((urgency & TIMER_CALL_USER_MASK) != 0) {
1612 if (tpri >= BASEPRI_RTQUEUES ||
1613 urgency == TIMER_CALL_USER_CRITICAL) {
1614 *tshift = tcoal_prio_params.timer_coalesce_rt_shift;
1615 *tmax_abstime = tcoal_prio_params.timer_coalesce_rt_abstime_max;
1616 TCOAL_PRIO_STAT(rt_tcl);
1617 } else if (proc_get_effective_thread_policy(cthread, TASK_POLICY_DARWIN_BG) ||
1618 (urgency == TIMER_CALL_USER_BACKGROUND)) {
1619 /* Determine if timer should be subjected to a lower QoS */
1620 if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) {
1621 if (*tmax_abstime > tcoal_prio_params.timer_coalesce_bg_abstime_max) {
1622 return;
1623 } else {
1624 *pratelimited = FALSE;
1625 }
1626 }
1627 *tshift = tcoal_prio_params.timer_coalesce_bg_shift;
1628 *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max;
1629 TCOAL_PRIO_STAT(bg_tcl);
1630 } else if (tpri >= MINPRI_KERNEL) {
1631 *tshift = tcoal_prio_params.timer_coalesce_kt_shift;
1632 *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max;
1633 TCOAL_PRIO_STAT(kt_tcl);
1634 } else if (cthread->sched_mode == TH_MODE_FIXED) {
1635 *tshift = tcoal_prio_params.timer_coalesce_fp_shift;
1636 *tmax_abstime = tcoal_prio_params.timer_coalesce_fp_abstime_max;
1637 TCOAL_PRIO_STAT(fp_tcl);
1638 } else if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) {
1639 TCOAL_PRIO_STAT(qos_tcl);
1640 } else if (cthread->sched_mode == TH_MODE_TIMESHARE) {
1641 *tshift = tcoal_prio_params.timer_coalesce_ts_shift;
1642 *tmax_abstime = tcoal_prio_params.timer_coalesce_ts_abstime_max;
1643 TCOAL_PRIO_STAT(ts_tcl);
1644 } else {
1645 TCOAL_PRIO_STAT(nc_tcl);
1646 }
1647 } else if (urgency == TIMER_CALL_SYS_BACKGROUND) {
1648 *tshift = tcoal_prio_params.timer_coalesce_bg_shift;
1649 *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max;
1650 TCOAL_PRIO_STAT(bg_tcl);
1651 } else {
1652 *tshift = tcoal_prio_params.timer_coalesce_kt_shift;
1653 *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max;
1654 TCOAL_PRIO_STAT(kt_tcl);
1655 }
1656}
1657
1658
1659int timer_user_idle_level;
1660
1661uint64_t
1662timer_call_slop(uint64_t deadline, uint64_t now, uint32_t flags, thread_t cthread, boolean_t *pratelimited)
1663{
1664 int32_t tcs_shift = 0;
1665 uint64_t tcs_max_abstime = 0;
1666 uint64_t adjval;
1667 uint32_t urgency = (flags & TIMER_CALL_URGENCY_MASK);
1668
1669 if (mach_timer_coalescing_enabled &&
1670 (deadline > now) && (urgency != TIMER_CALL_SYS_CRITICAL)) {
1671 timer_compute_leeway(cthread, urgency, &tcs_shift, &tcs_max_abstime, pratelimited);
1672
1673 if (tcs_shift >= 0)
1674 adjval = MIN((deadline - now) >> tcs_shift, tcs_max_abstime);
1675 else
1676 adjval = MIN((deadline - now) << (-tcs_shift), tcs_max_abstime);
1677 /* Apply adjustments derived from "user idle level" heuristic */
1678 adjval += (adjval * timer_user_idle_level) >> 7;
1679 return adjval;
1680 } else {
1681 return 0;
1682 }
1683}
1684
1685int
1686timer_get_user_idle_level(void) {
1687 return timer_user_idle_level;
1688}
1689
1690kern_return_t timer_set_user_idle_level(int ilevel) {
1691 boolean_t do_reeval = FALSE;
1692
1693 if ((ilevel < 0) || (ilevel > 128))
1694 return KERN_INVALID_ARGUMENT;
1695
1696 if (ilevel < timer_user_idle_level) {
1697 do_reeval = TRUE;
1698 }
1699
1700 timer_user_idle_level = ilevel;
1701
1702 if (do_reeval)
1703 ml_timer_evaluate();
1704
1705 return KERN_SUCCESS;
1706}