]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
c910b4d9 | 2 | * Copyright (c) 1993-2008 Apple Inc. All rights reserved. |
1c79356b | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
29 | * Timer interrupt callout module. | |
1c79356b A |
30 | */ |
31 | ||
32 | #include <mach/mach_types.h> | |
33 | ||
34 | #include <kern/clock.h> | |
3e170ce0 | 35 | #include <kern/smp.h> |
9bccf70c | 36 | #include <kern/processor.h> |
1c79356b | 37 | #include <kern/timer_call.h> |
c910b4d9 | 38 | #include <kern/timer_queue.h> |
1c79356b | 39 | #include <kern/call_entry.h> |
39236c6e | 40 | #include <kern/thread.h> |
39037602 | 41 | #include <kern/policy_internal.h> |
1c79356b | 42 | |
0c530ab8 A |
43 | #include <sys/kdebug.h> |
44 | ||
4b17d6b6 | 45 | #if CONFIG_DTRACE |
2d21ac55 A |
46 | #include <mach/sdt.h> |
47 | #endif | |
1c79356b | 48 | |
1c79356b | 49 | |
6d2010ae A |
50 | #if DEBUG |
51 | #define TIMER_ASSERT 1 | |
52 | #endif | |
53 | ||
54 | //#define TIMER_ASSERT 1 | |
55 | //#define TIMER_DBG 1 | |
56 | ||
57 | #if TIMER_DBG | |
58 | #define DBG(x...) kprintf("DBG: " x); | |
59 | #else | |
60 | #define DBG(x...) | |
61 | #endif | |
62 | ||
39236c6e A |
63 | #if TIMER_TRACE |
64 | #define TIMER_KDEBUG_TRACE KERNEL_DEBUG_CONSTANT_IST | |
65 | #else | |
66 | #define TIMER_KDEBUG_TRACE(x...) | |
67 | #endif | |
68 | ||
69 | ||
6d2010ae A |
70 | lck_grp_t timer_call_lck_grp; |
71 | lck_attr_t timer_call_lck_attr; | |
72 | lck_grp_attr_t timer_call_lck_grp_attr; | |
73 | ||
39236c6e A |
74 | lck_grp_t timer_longterm_lck_grp; |
75 | lck_attr_t timer_longterm_lck_attr; | |
76 | lck_grp_attr_t timer_longterm_lck_grp_attr; | |
77 | ||
3e170ce0 A |
78 | /* Timer queue lock must be acquired with interrupts disabled (under splclock()) */ |
79 | #if __SMP__ | |
39236c6e | 80 | #define timer_queue_lock_spin(queue) \ |
6d2010ae A |
81 | lck_mtx_lock_spin_always(&queue->lock_data) |
82 | ||
39236c6e | 83 | #define timer_queue_unlock(queue) \ |
6d2010ae | 84 | lck_mtx_unlock_always(&queue->lock_data) |
3e170ce0 A |
85 | #else |
86 | #define timer_queue_lock_spin(queue) (void)1 | |
87 | #define timer_queue_unlock(queue) (void)1 | |
88 | #endif | |
6d2010ae A |
89 | |
90 | #define QUEUE(x) ((queue_t)(x)) | |
91 | #define MPQUEUE(x) ((mpqueue_head_t *)(x)) | |
92 | #define TIMER_CALL(x) ((timer_call_t)(x)) | |
fe8ab488 | 93 | #define TCE(x) (&(x->call_entry)) |
39236c6e A |
94 | /* |
95 | * The longterm timer object is a global structure holding all timers | |
96 | * beyond the short-term, local timer queue threshold. The boot processor | |
97 | * is responsible for moving each timer to its local timer queue | |
98 | * if and when that timer becomes due within the threshold. | |
99 | */ | |
100 | #define TIMER_LONGTERM_NONE EndOfAllTime | |
101 | #if defined(__x86_64__) | |
102 | #define TIMER_LONGTERM_THRESHOLD (1ULL * NSEC_PER_SEC) | |
103 | #else | |
104 | #define TIMER_LONGTERM_THRESHOLD TIMER_LONGTERM_NONE | |
105 | #endif | |
106 | ||
107 | typedef struct { | |
108 | uint64_t interval; /* longterm timer interval */ | |
109 | uint64_t margin; /* fudge factor (10% of interval */ | |
110 | uint64_t deadline; /* first/soonest longterm deadline */ | |
111 | uint64_t preempted; /* sooner timer has pre-empted */ | |
112 | timer_call_t call; /* first/soonest longterm timer call */ | |
113 | uint64_t deadline_set; /* next timer set */ | |
114 | timer_call_data_t timer; /* timer used by threshold management */ | |
115 | /* Stats: */ | |
116 | uint64_t scans; /* num threshold timer scans */ | |
117 | uint64_t preempts; /* num threshold reductions */ | |
118 | uint64_t latency; /* average threshold latency */ | |
119 | uint64_t latency_min; /* minimum threshold latency */ | |
120 | uint64_t latency_max; /* maximum threshold latency */ | |
121 | } threshold_t; | |
122 | ||
123 | typedef struct { | |
124 | mpqueue_head_t queue; /* longterm timer list */ | |
125 | uint64_t enqueues; /* num timers queued */ | |
126 | uint64_t dequeues; /* num timers dequeued */ | |
127 | uint64_t escalates; /* num timers becoming shortterm */ | |
128 | uint64_t scan_time; /* last time the list was scanned */ | |
129 | threshold_t threshold; /* longterm timer threshold */ | |
130 | } timer_longterm_t; | |
131 | ||
132 | timer_longterm_t timer_longterm; | |
133 | ||
134 | static mpqueue_head_t *timer_longterm_queue = NULL; | |
135 | ||
136 | static void timer_longterm_init(void); | |
137 | static void timer_longterm_callout( | |
138 | timer_call_param_t p0, | |
139 | timer_call_param_t p1); | |
140 | extern void timer_longterm_scan( | |
141 | timer_longterm_t *tlp, | |
142 | uint64_t now); | |
143 | static void timer_longterm_update( | |
144 | timer_longterm_t *tlp); | |
145 | static void timer_longterm_update_locked( | |
146 | timer_longterm_t *tlp); | |
147 | static mpqueue_head_t * timer_longterm_enqueue_unlocked( | |
148 | timer_call_t call, | |
149 | uint64_t now, | |
150 | uint64_t deadline, | |
fe8ab488 A |
151 | mpqueue_head_t ** old_queue, |
152 | uint64_t soft_deadline, | |
153 | uint64_t ttd, | |
154 | timer_call_param_t param1, | |
155 | uint32_t callout_flags); | |
39236c6e A |
156 | static void timer_longterm_dequeued_locked( |
157 | timer_call_t call); | |
316670eb A |
158 | |
159 | uint64_t past_deadline_timers; | |
160 | uint64_t past_deadline_deltas; | |
161 | uint64_t past_deadline_longest; | |
162 | uint64_t past_deadline_shortest = ~0ULL; | |
163 | enum {PAST_DEADLINE_TIMER_ADJUSTMENT_NS = 10 * 1000}; | |
164 | ||
165 | uint64_t past_deadline_timer_adjustment; | |
166 | ||
39236c6e | 167 | static boolean_t timer_call_enter_internal(timer_call_t call, timer_call_param_t param1, uint64_t deadline, uint64_t leeway, uint32_t flags, boolean_t ratelimited); |
6d2010ae A |
168 | boolean_t mach_timer_coalescing_enabled = TRUE; |
169 | ||
170 | mpqueue_head_t *timer_call_enqueue_deadline_unlocked( | |
171 | timer_call_t call, | |
172 | mpqueue_head_t *queue, | |
fe8ab488 A |
173 | uint64_t deadline, |
174 | uint64_t soft_deadline, | |
175 | uint64_t ttd, | |
176 | timer_call_param_t param1, | |
177 | uint32_t flags); | |
6d2010ae A |
178 | |
179 | mpqueue_head_t *timer_call_dequeue_unlocked( | |
180 | timer_call_t call); | |
181 | ||
fe8ab488 A |
182 | timer_coalescing_priority_params_t tcoal_prio_params; |
183 | ||
184 | #if TCOAL_PRIO_STATS | |
185 | int32_t nc_tcl, rt_tcl, bg_tcl, kt_tcl, fp_tcl, ts_tcl, qos_tcl; | |
186 | #define TCOAL_PRIO_STAT(x) (x++) | |
187 | #else | |
188 | #define TCOAL_PRIO_STAT(x) | |
189 | #endif | |
190 | ||
191 | static void | |
192 | timer_call_init_abstime(void) | |
193 | { | |
194 | int i; | |
195 | uint64_t result; | |
196 | timer_coalescing_priority_params_ns_t * tcoal_prio_params_init = timer_call_get_priority_params(); | |
197 | nanoseconds_to_absolutetime(PAST_DEADLINE_TIMER_ADJUSTMENT_NS, &past_deadline_timer_adjustment); | |
198 | nanoseconds_to_absolutetime(tcoal_prio_params_init->idle_entry_timer_processing_hdeadline_threshold_ns, &result); | |
199 | tcoal_prio_params.idle_entry_timer_processing_hdeadline_threshold_abstime = (uint32_t)result; | |
200 | nanoseconds_to_absolutetime(tcoal_prio_params_init->interrupt_timer_coalescing_ilat_threshold_ns, &result); | |
201 | tcoal_prio_params.interrupt_timer_coalescing_ilat_threshold_abstime = (uint32_t)result; | |
202 | nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_resort_threshold_ns, &result); | |
203 | tcoal_prio_params.timer_resort_threshold_abstime = (uint32_t)result; | |
204 | tcoal_prio_params.timer_coalesce_rt_shift = tcoal_prio_params_init->timer_coalesce_rt_shift; | |
205 | tcoal_prio_params.timer_coalesce_bg_shift = tcoal_prio_params_init->timer_coalesce_bg_shift; | |
206 | tcoal_prio_params.timer_coalesce_kt_shift = tcoal_prio_params_init->timer_coalesce_kt_shift; | |
207 | tcoal_prio_params.timer_coalesce_fp_shift = tcoal_prio_params_init->timer_coalesce_fp_shift; | |
208 | tcoal_prio_params.timer_coalesce_ts_shift = tcoal_prio_params_init->timer_coalesce_ts_shift; | |
209 | ||
210 | nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_rt_ns_max, | |
211 | &tcoal_prio_params.timer_coalesce_rt_abstime_max); | |
212 | nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_bg_ns_max, | |
213 | &tcoal_prio_params.timer_coalesce_bg_abstime_max); | |
214 | nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_kt_ns_max, | |
215 | &tcoal_prio_params.timer_coalesce_kt_abstime_max); | |
216 | nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_fp_ns_max, | |
217 | &tcoal_prio_params.timer_coalesce_fp_abstime_max); | |
218 | nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_ts_ns_max, | |
219 | &tcoal_prio_params.timer_coalesce_ts_abstime_max); | |
220 | ||
221 | for (i = 0; i < NUM_LATENCY_QOS_TIERS; i++) { | |
222 | tcoal_prio_params.latency_qos_scale[i] = tcoal_prio_params_init->latency_qos_scale[i]; | |
223 | nanoseconds_to_absolutetime(tcoal_prio_params_init->latency_qos_ns_max[i], | |
224 | &tcoal_prio_params.latency_qos_abstime_max[i]); | |
225 | tcoal_prio_params.latency_tier_rate_limited[i] = tcoal_prio_params_init->latency_tier_rate_limited[i]; | |
226 | } | |
227 | } | |
228 | ||
1c79356b A |
229 | |
230 | void | |
39236c6e | 231 | timer_call_init(void) |
1c79356b | 232 | { |
6d2010ae A |
233 | lck_attr_setdefault(&timer_call_lck_attr); |
234 | lck_grp_attr_setdefault(&timer_call_lck_grp_attr); | |
235 | lck_grp_init(&timer_call_lck_grp, "timer_call", &timer_call_lck_grp_attr); | |
39236c6e A |
236 | |
237 | timer_longterm_init(); | |
fe8ab488 | 238 | timer_call_init_abstime(); |
1c79356b A |
239 | } |
240 | ||
6d2010ae A |
241 | |
242 | void | |
39236c6e | 243 | timer_call_queue_init(mpqueue_head_t *queue) |
6d2010ae | 244 | { |
39236c6e | 245 | DBG("timer_call_queue_init(%p)\n", queue); |
6d2010ae A |
246 | mpqueue_init(queue, &timer_call_lck_grp, &timer_call_lck_attr); |
247 | } | |
248 | ||
249 | ||
1c79356b A |
250 | void |
251 | timer_call_setup( | |
252 | timer_call_t call, | |
253 | timer_call_func_t func, | |
254 | timer_call_param_t param0) | |
255 | { | |
6d2010ae | 256 | DBG("timer_call_setup(%p,%p,%p)\n", call, func, param0); |
fe8ab488 | 257 | call_entry_setup(TCE(call), func, param0); |
6d2010ae A |
258 | simple_lock_init(&(call)->lock, 0); |
259 | call->async_dequeue = FALSE; | |
1c79356b | 260 | } |
6d2010ae A |
261 | #if TIMER_ASSERT |
262 | static __inline__ mpqueue_head_t * | |
263 | timer_call_entry_dequeue( | |
264 | timer_call_t entry) | |
265 | { | |
fe8ab488 | 266 | mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); |
6d2010ae A |
267 | |
268 | if (!hw_lock_held((hw_lock_t)&entry->lock)) | |
269 | panic("_call_entry_dequeue() " | |
270 | "entry %p is not locked\n", entry); | |
271 | /* | |
272 | * XXX The queue lock is actually a mutex in spin mode | |
273 | * but there's no way to test for it being held | |
274 | * so we pretend it's a spinlock! | |
275 | */ | |
276 | if (!hw_lock_held((hw_lock_t)&old_queue->lock_data)) | |
277 | panic("_call_entry_dequeue() " | |
278 | "queue %p is not locked\n", old_queue); | |
279 | ||
fe8ab488 | 280 | call_entry_dequeue(TCE(entry)); |
39236c6e | 281 | old_queue->count--; |
c910b4d9 | 282 | |
6d2010ae A |
283 | return (old_queue); |
284 | } | |
1c79356b | 285 | |
6d2010ae A |
286 | static __inline__ mpqueue_head_t * |
287 | timer_call_entry_enqueue_deadline( | |
288 | timer_call_t entry, | |
289 | mpqueue_head_t *queue, | |
290 | uint64_t deadline) | |
291 | { | |
fe8ab488 | 292 | mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); |
1c79356b | 293 | |
6d2010ae A |
294 | if (!hw_lock_held((hw_lock_t)&entry->lock)) |
295 | panic("_call_entry_enqueue_deadline() " | |
296 | "entry %p is not locked\n", entry); | |
297 | /* XXX More lock pretense: */ | |
298 | if (!hw_lock_held((hw_lock_t)&queue->lock_data)) | |
299 | panic("_call_entry_enqueue_deadline() " | |
300 | "queue %p is not locked\n", queue); | |
301 | if (old_queue != NULL && old_queue != queue) | |
302 | panic("_call_entry_enqueue_deadline() " | |
303 | "old_queue %p != queue", old_queue); | |
1c79356b | 304 | |
fe8ab488 | 305 | call_entry_enqueue_deadline(TCE(entry), QUEUE(queue), deadline); |
1c79356b | 306 | |
39236c6e A |
307 | /* For efficiency, track the earliest soft deadline on the queue, so that |
308 | * fuzzy decisions can be made without lock acquisitions. | |
309 | */ | |
fe8ab488 A |
310 | timer_call_t thead = (timer_call_t)queue_first(&queue->head); |
311 | ||
312 | queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline; | |
39236c6e A |
313 | |
314 | if (old_queue) | |
315 | old_queue->count--; | |
316 | queue->count++; | |
317 | ||
6d2010ae A |
318 | return (old_queue); |
319 | } | |
1c79356b | 320 | |
6d2010ae | 321 | #else |
1c79356b | 322 | |
6d2010ae A |
323 | static __inline__ mpqueue_head_t * |
324 | timer_call_entry_dequeue( | |
325 | timer_call_t entry) | |
326 | { | |
fe8ab488 | 327 | mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); |
39236c6e | 328 | |
fe8ab488 | 329 | call_entry_dequeue(TCE(entry)); |
39236c6e A |
330 | old_queue->count--; |
331 | ||
332 | return old_queue; | |
6d2010ae | 333 | } |
c910b4d9 | 334 | |
6d2010ae A |
335 | static __inline__ mpqueue_head_t * |
336 | timer_call_entry_enqueue_deadline( | |
337 | timer_call_t entry, | |
338 | mpqueue_head_t *queue, | |
339 | uint64_t deadline) | |
340 | { | |
fe8ab488 | 341 | mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); |
39236c6e | 342 | |
fe8ab488 | 343 | call_entry_enqueue_deadline(TCE(entry), QUEUE(queue), deadline); |
39236c6e A |
344 | |
345 | /* For efficiency, track the earliest soft deadline on the queue, | |
346 | * so that fuzzy decisions can be made without lock acquisitions. | |
347 | */ | |
fe8ab488 A |
348 | |
349 | timer_call_t thead = (timer_call_t)queue_first(&queue->head); | |
350 | queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline; | |
39236c6e A |
351 | |
352 | if (old_queue) | |
353 | old_queue->count--; | |
354 | queue->count++; | |
355 | ||
356 | return old_queue; | |
1c79356b A |
357 | } |
358 | ||
6d2010ae A |
359 | #endif |
360 | ||
39236c6e A |
361 | static __inline__ void |
362 | timer_call_entry_enqueue_tail( | |
363 | timer_call_t entry, | |
364 | mpqueue_head_t *queue) | |
365 | { | |
fe8ab488 | 366 | call_entry_enqueue_tail(TCE(entry), QUEUE(queue)); |
39236c6e A |
367 | queue->count++; |
368 | return; | |
369 | } | |
370 | ||
371 | /* | |
372 | * Remove timer entry from its queue but don't change the queue pointer | |
373 | * and set the async_dequeue flag. This is locking case 2b. | |
374 | */ | |
375 | static __inline__ void | |
376 | timer_call_entry_dequeue_async( | |
377 | timer_call_t entry) | |
378 | { | |
fe8ab488 | 379 | mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); |
39236c6e A |
380 | if (old_queue) { |
381 | old_queue->count--; | |
382 | (void) remque(qe(entry)); | |
383 | entry->async_dequeue = TRUE; | |
384 | } | |
385 | return; | |
386 | } | |
387 | ||
6d2010ae A |
388 | #if TIMER_ASSERT |
389 | unsigned timer_call_enqueue_deadline_unlocked_async1; | |
390 | unsigned timer_call_enqueue_deadline_unlocked_async2; | |
391 | #endif | |
392 | /* | |
393 | * Assumes call_entry and queues unlocked, interrupts disabled. | |
394 | */ | |
395 | __inline__ mpqueue_head_t * | |
396 | timer_call_enqueue_deadline_unlocked( | |
397 | timer_call_t call, | |
398 | mpqueue_head_t *queue, | |
fe8ab488 A |
399 | uint64_t deadline, |
400 | uint64_t soft_deadline, | |
401 | uint64_t ttd, | |
402 | timer_call_param_t param1, | |
403 | uint32_t callout_flags) | |
1c79356b | 404 | { |
fe8ab488 | 405 | call_entry_t entry = TCE(call); |
6d2010ae | 406 | mpqueue_head_t *old_queue; |
1c79356b | 407 | |
6d2010ae | 408 | DBG("timer_call_enqueue_deadline_unlocked(%p,%p,)\n", call, queue); |
1c79356b | 409 | |
6d2010ae | 410 | simple_lock(&call->lock); |
fe8ab488 | 411 | |
6d2010ae | 412 | old_queue = MPQUEUE(entry->queue); |
fe8ab488 | 413 | |
6d2010ae | 414 | if (old_queue != NULL) { |
39236c6e | 415 | timer_queue_lock_spin(old_queue); |
6d2010ae | 416 | if (call->async_dequeue) { |
39236c6e | 417 | /* collision (1c): timer already dequeued, clear flag */ |
6d2010ae | 418 | #if TIMER_ASSERT |
39236c6e A |
419 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, |
420 | DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, | |
4bd07ac2 | 421 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
39236c6e | 422 | call->async_dequeue, |
4bd07ac2 | 423 | VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue), |
39236c6e | 424 | 0x1c, 0); |
6d2010ae A |
425 | timer_call_enqueue_deadline_unlocked_async1++; |
426 | #endif | |
39236c6e | 427 | call->async_dequeue = FALSE; |
6d2010ae | 428 | entry->queue = NULL; |
39236c6e A |
429 | } else if (old_queue != queue) { |
430 | timer_call_entry_dequeue(call); | |
6d2010ae A |
431 | #if TIMER_ASSERT |
432 | timer_call_enqueue_deadline_unlocked_async2++; | |
433 | #endif | |
434 | } | |
39236c6e A |
435 | if (old_queue == timer_longterm_queue) |
436 | timer_longterm_dequeued_locked(call); | |
6d2010ae | 437 | if (old_queue != queue) { |
39236c6e A |
438 | timer_queue_unlock(old_queue); |
439 | timer_queue_lock_spin(queue); | |
6d2010ae A |
440 | } |
441 | } else { | |
39236c6e | 442 | timer_queue_lock_spin(queue); |
6d2010ae | 443 | } |
1c79356b | 444 | |
fe8ab488 A |
445 | call->soft_deadline = soft_deadline; |
446 | call->flags = callout_flags; | |
447 | TCE(call)->param1 = param1; | |
448 | call->ttd = ttd; | |
449 | ||
6d2010ae | 450 | timer_call_entry_enqueue_deadline(call, queue, deadline); |
39236c6e | 451 | timer_queue_unlock(queue); |
6d2010ae | 452 | simple_unlock(&call->lock); |
1c79356b | 453 | |
c910b4d9 A |
454 | return (old_queue); |
455 | } | |
1c79356b | 456 | |
6d2010ae A |
457 | #if TIMER_ASSERT |
458 | unsigned timer_call_dequeue_unlocked_async1; | |
459 | unsigned timer_call_dequeue_unlocked_async2; | |
460 | #endif | |
461 | mpqueue_head_t * | |
462 | timer_call_dequeue_unlocked( | |
463 | timer_call_t call) | |
c910b4d9 | 464 | { |
fe8ab488 | 465 | call_entry_t entry = TCE(call); |
6d2010ae | 466 | mpqueue_head_t *old_queue; |
1c79356b | 467 | |
6d2010ae | 468 | DBG("timer_call_dequeue_unlocked(%p)\n", call); |
1c79356b | 469 | |
6d2010ae A |
470 | simple_lock(&call->lock); |
471 | old_queue = MPQUEUE(entry->queue); | |
39236c6e A |
472 | #if TIMER_ASSERT |
473 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
474 | DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, | |
4bd07ac2 | 475 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
39236c6e | 476 | call->async_dequeue, |
4bd07ac2 | 477 | VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue), |
39236c6e A |
478 | 0, 0); |
479 | #endif | |
6d2010ae | 480 | if (old_queue != NULL) { |
39236c6e | 481 | timer_queue_lock_spin(old_queue); |
6d2010ae | 482 | if (call->async_dequeue) { |
39236c6e | 483 | /* collision (1c): timer already dequeued, clear flag */ |
6d2010ae | 484 | #if TIMER_ASSERT |
39236c6e A |
485 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, |
486 | DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, | |
4bd07ac2 | 487 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
39236c6e | 488 | call->async_dequeue, |
4bd07ac2 | 489 | VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue), |
39236c6e | 490 | 0x1c, 0); |
6d2010ae A |
491 | timer_call_dequeue_unlocked_async1++; |
492 | #endif | |
39236c6e A |
493 | call->async_dequeue = FALSE; |
494 | entry->queue = NULL; | |
6d2010ae | 495 | } else { |
39236c6e | 496 | timer_call_entry_dequeue(call); |
6d2010ae | 497 | } |
39236c6e A |
498 | if (old_queue == timer_longterm_queue) |
499 | timer_longterm_dequeued_locked(call); | |
500 | timer_queue_unlock(old_queue); | |
6d2010ae A |
501 | } |
502 | simple_unlock(&call->lock); | |
c910b4d9 | 503 | return (old_queue); |
1c79356b A |
504 | } |
505 | ||
fe8ab488 A |
506 | |
507 | /* | |
508 | * Timer call entry locking model | |
509 | * ============================== | |
510 | * | |
511 | * Timer call entries are linked on per-cpu timer queues which are protected | |
512 | * by the queue lock and the call entry lock. The locking protocol is: | |
513 | * | |
514 | * 0) The canonical locking order is timer call entry followed by queue. | |
515 | * | |
516 | * 1) With only the entry lock held, entry.queue is valid: | |
517 | * 1a) NULL: the entry is not queued, or | |
518 | * 1b) non-NULL: this queue must be locked before the entry is modified. | |
519 | * After locking the queue, the call.async_dequeue flag must be checked: | |
520 | * 1c) TRUE: the entry was removed from the queue by another thread | |
521 | * and we must NULL the entry.queue and reset this flag, or | |
522 | * 1d) FALSE: (ie. queued), the entry can be manipulated. | |
523 | * | |
524 | * 2) If a queue lock is obtained first, the queue is stable: | |
525 | * 2a) If a try-lock of a queued entry succeeds, the call can be operated on | |
526 | * and dequeued. | |
527 | * 2b) If a try-lock fails, it indicates that another thread is attempting | |
528 | * to change the entry and move it to a different position in this queue | |
529 | * or to different queue. The entry can be dequeued but it should not be | |
530 | * operated upon since it is being changed. Furthermore, we don't null | |
531 | * the entry.queue pointer (protected by the entry lock we don't own). | |
532 | * Instead, we set the async_dequeue flag -- see (1c). | |
533 | * 2c) Same as 2b but occurring when a longterm timer is matured. | |
534 | * 3) A callout's parameters (deadline, flags, parameters, soft deadline &c.) | |
535 | * should be manipulated with the appropriate timer queue lock held, | |
536 | * to prevent queue traversal observations from observing inconsistent | |
537 | * updates to an in-flight callout. | |
538 | */ | |
539 | ||
540 | /* | |
541 | * Inlines timer_call_entry_dequeue() and timer_call_entry_enqueue_deadline() | |
542 | * cast between pointer types (mpqueue_head_t *) and (queue_t) so that | |
543 | * we can use the call_entry_dequeue() and call_entry_enqueue_deadline() | |
544 | * methods to operate on timer_call structs as if they are call_entry structs. | |
545 | * These structures are identical except for their queue head pointer fields. | |
546 | * | |
547 | * In the debug case, we assert that the timer call locking protocol | |
548 | * is being obeyed. | |
549 | */ | |
550 | ||
6d2010ae A |
551 | static boolean_t |
552 | timer_call_enter_internal( | |
553 | timer_call_t call, | |
554 | timer_call_param_t param1, | |
555 | uint64_t deadline, | |
39236c6e A |
556 | uint64_t leeway, |
557 | uint32_t flags, | |
558 | boolean_t ratelimited) | |
1c79356b | 559 | { |
39236c6e | 560 | mpqueue_head_t *queue = NULL; |
6d2010ae | 561 | mpqueue_head_t *old_queue; |
1c79356b | 562 | spl_t s; |
39236c6e A |
563 | uint64_t slop; |
564 | uint32_t urgency; | |
fe8ab488 | 565 | uint64_t sdeadline, ttd; |
1c79356b | 566 | |
39037602 | 567 | assert(call->call_entry.func != NULL); |
1c79356b | 568 | s = splclock(); |
6d2010ae | 569 | |
fe8ab488 | 570 | sdeadline = deadline; |
39236c6e A |
571 | uint64_t ctime = mach_absolute_time(); |
572 | ||
573 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
574 | DECR_TIMER_ENTER | DBG_FUNC_START, | |
4bd07ac2 A |
575 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
576 | VM_KERNEL_UNSLIDE_OR_PERM(param1), deadline, flags, 0); | |
39236c6e A |
577 | |
578 | urgency = (flags & TIMER_CALL_URGENCY_MASK); | |
579 | ||
580 | boolean_t slop_ratelimited = FALSE; | |
581 | slop = timer_call_slop(deadline, ctime, urgency, current_thread(), &slop_ratelimited); | |
582 | ||
583 | if ((flags & TIMER_CALL_LEEWAY) != 0 && leeway > slop) | |
584 | slop = leeway; | |
585 | ||
586 | if (UINT64_MAX - deadline <= slop) { | |
587 | deadline = UINT64_MAX; | |
588 | } else { | |
6d2010ae A |
589 | deadline += slop; |
590 | } | |
1c79356b | 591 | |
316670eb A |
592 | if (__improbable(deadline < ctime)) { |
593 | uint64_t delta = (ctime - deadline); | |
594 | ||
595 | past_deadline_timers++; | |
596 | past_deadline_deltas += delta; | |
597 | if (delta > past_deadline_longest) | |
598 | past_deadline_longest = deadline; | |
599 | if (delta < past_deadline_shortest) | |
600 | past_deadline_shortest = delta; | |
601 | ||
602 | deadline = ctime + past_deadline_timer_adjustment; | |
fe8ab488 | 603 | sdeadline = deadline; |
316670eb | 604 | } |
39236c6e | 605 | |
39236c6e | 606 | if (ratelimited || slop_ratelimited) { |
fe8ab488 | 607 | flags |= TIMER_CALL_RATELIMITED; |
39236c6e | 608 | } else { |
fe8ab488 | 609 | flags &= ~TIMER_CALL_RATELIMITED; |
39236c6e A |
610 | } |
611 | ||
fe8ab488 | 612 | ttd = sdeadline - ctime; |
4b17d6b6 | 613 | #if CONFIG_DTRACE |
fe8ab488 A |
614 | DTRACE_TMR7(callout__create, timer_call_func_t, TCE(call)->func, |
615 | timer_call_param_t, TCE(call)->param0, uint32_t, flags, | |
616 | (deadline - sdeadline), | |
617 | (ttd >> 32), (unsigned) (ttd & 0xFFFFFFFF), call); | |
4b17d6b6 A |
618 | #endif |
619 | ||
fe8ab488 A |
620 | /* Program timer callout parameters under the appropriate per-CPU or |
621 | * longterm queue lock. The callout may have been previously enqueued | |
622 | * and in-flight on this or another timer queue. | |
623 | */ | |
39236c6e | 624 | if (!ratelimited && !slop_ratelimited) { |
fe8ab488 | 625 | queue = timer_longterm_enqueue_unlocked(call, ctime, deadline, &old_queue, sdeadline, ttd, param1, flags); |
39236c6e | 626 | } |
1c79356b | 627 | |
39236c6e A |
628 | if (queue == NULL) { |
629 | queue = timer_queue_assign(deadline); | |
fe8ab488 | 630 | old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline, sdeadline, ttd, param1, flags); |
39236c6e | 631 | } |
1c79356b | 632 | |
39236c6e | 633 | #if TIMER_TRACE |
fe8ab488 | 634 | TCE(call)->entry_time = ctime; |
39236c6e A |
635 | #endif |
636 | ||
637 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
638 | DECR_TIMER_ENTER | DBG_FUNC_END, | |
4bd07ac2 | 639 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
fe8ab488 | 640 | (old_queue != NULL), deadline, queue->count, 0); |
1c79356b | 641 | |
1c79356b A |
642 | splx(s); |
643 | ||
c910b4d9 | 644 | return (old_queue != NULL); |
1c79356b A |
645 | } |
646 | ||
39236c6e A |
647 | /* |
648 | * timer_call_*() | |
649 | * return boolean indicating whether the call was previously queued. | |
650 | */ | |
6d2010ae A |
651 | boolean_t |
652 | timer_call_enter( | |
653 | timer_call_t call, | |
654 | uint64_t deadline, | |
655 | uint32_t flags) | |
656 | { | |
39236c6e | 657 | return timer_call_enter_internal(call, NULL, deadline, 0, flags, FALSE); |
6d2010ae A |
658 | } |
659 | ||
1c79356b | 660 | boolean_t |
c910b4d9 A |
661 | timer_call_enter1( |
662 | timer_call_t call, | |
663 | timer_call_param_t param1, | |
6d2010ae A |
664 | uint64_t deadline, |
665 | uint32_t flags) | |
1c79356b | 666 | { |
39236c6e A |
667 | return timer_call_enter_internal(call, param1, deadline, 0, flags, FALSE); |
668 | } | |
669 | ||
670 | boolean_t | |
671 | timer_call_enter_with_leeway( | |
672 | timer_call_t call, | |
673 | timer_call_param_t param1, | |
674 | uint64_t deadline, | |
675 | uint64_t leeway, | |
676 | uint32_t flags, | |
677 | boolean_t ratelimited) | |
678 | { | |
679 | return timer_call_enter_internal(call, param1, deadline, leeway, flags, ratelimited); | |
1c79356b A |
680 | } |
681 | ||
682 | boolean_t | |
c910b4d9 A |
683 | timer_call_cancel( |
684 | timer_call_t call) | |
1c79356b | 685 | { |
6d2010ae | 686 | mpqueue_head_t *old_queue; |
1c79356b A |
687 | spl_t s; |
688 | ||
689 | s = splclock(); | |
1c79356b | 690 | |
39236c6e A |
691 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, |
692 | DECR_TIMER_CANCEL | DBG_FUNC_START, | |
4bd07ac2 | 693 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
fe8ab488 | 694 | TCE(call)->deadline, call->soft_deadline, call->flags, 0); |
39236c6e | 695 | |
6d2010ae | 696 | old_queue = timer_call_dequeue_unlocked(call); |
c910b4d9 A |
697 | |
698 | if (old_queue != NULL) { | |
39236c6e A |
699 | timer_queue_lock_spin(old_queue); |
700 | if (!queue_empty(&old_queue->head)) { | |
fe8ab488 A |
701 | timer_queue_cancel(old_queue, TCE(call)->deadline, CE(queue_first(&old_queue->head))->deadline); |
702 | timer_call_t thead = (timer_call_t)queue_first(&old_queue->head); | |
703 | old_queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline; | |
39236c6e A |
704 | } |
705 | else { | |
fe8ab488 | 706 | timer_queue_cancel(old_queue, TCE(call)->deadline, UINT64_MAX); |
39236c6e A |
707 | old_queue->earliest_soft_deadline = UINT64_MAX; |
708 | } | |
709 | timer_queue_unlock(old_queue); | |
1c79356b | 710 | } |
39236c6e A |
711 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, |
712 | DECR_TIMER_CANCEL | DBG_FUNC_END, | |
4bd07ac2 A |
713 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
714 | VM_KERNEL_UNSLIDE_OR_PERM(old_queue), | |
fe8ab488 A |
715 | TCE(call)->deadline - mach_absolute_time(), |
716 | TCE(call)->deadline - TCE(call)->entry_time, 0); | |
1c79356b A |
717 | splx(s); |
718 | ||
4b17d6b6 | 719 | #if CONFIG_DTRACE |
fe8ab488 A |
720 | DTRACE_TMR6(callout__cancel, timer_call_func_t, TCE(call)->func, |
721 | timer_call_param_t, TCE(call)->param0, uint32_t, call->flags, 0, | |
4b17d6b6 A |
722 | (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF)); |
723 | #endif | |
724 | ||
c910b4d9 | 725 | return (old_queue != NULL); |
1c79356b A |
726 | } |
727 | ||
fe8ab488 A |
728 | static uint32_t timer_queue_shutdown_lock_skips; |
729 | static uint32_t timer_queue_shutdown_discarded; | |
730 | ||
9bccf70c | 731 | void |
c910b4d9 | 732 | timer_queue_shutdown( |
6d2010ae | 733 | mpqueue_head_t *queue) |
9bccf70c | 734 | { |
6d2010ae A |
735 | timer_call_t call; |
736 | mpqueue_head_t *new_queue; | |
c910b4d9 | 737 | spl_t s; |
9bccf70c | 738 | |
fe8ab488 | 739 | |
6d2010ae A |
740 | DBG("timer_queue_shutdown(%p)\n", queue); |
741 | ||
c910b4d9 | 742 | s = splclock(); |
9bccf70c | 743 | |
6d2010ae | 744 | /* Note comma operator in while expression re-locking each iteration */ |
39037602 | 745 | while ((void)timer_queue_lock_spin(queue), !queue_empty(&queue->head)) { |
6d2010ae | 746 | call = TIMER_CALL(queue_first(&queue->head)); |
fe8ab488 | 747 | |
6d2010ae A |
748 | if (!simple_lock_try(&call->lock)) { |
749 | /* | |
750 | * case (2b) lock order inversion, dequeue and skip | |
751 | * Don't change the call_entry queue back-pointer | |
752 | * but set the async_dequeue field. | |
753 | */ | |
754 | timer_queue_shutdown_lock_skips++; | |
39236c6e A |
755 | timer_call_entry_dequeue_async(call); |
756 | #if TIMER_ASSERT | |
757 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
758 | DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, | |
4bd07ac2 | 759 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
39236c6e | 760 | call->async_dequeue, |
4bd07ac2 | 761 | VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue), |
39236c6e A |
762 | 0x2b, 0); |
763 | #endif | |
764 | timer_queue_unlock(queue); | |
6d2010ae A |
765 | continue; |
766 | } | |
9bccf70c | 767 | |
fe8ab488 A |
768 | boolean_t call_local = ((call->flags & TIMER_CALL_LOCAL) != 0); |
769 | ||
6d2010ae A |
770 | /* remove entry from old queue */ |
771 | timer_call_entry_dequeue(call); | |
39236c6e | 772 | timer_queue_unlock(queue); |
9bccf70c | 773 | |
fe8ab488 A |
774 | if (call_local == FALSE) { |
775 | /* and queue it on new, discarding LOCAL timers */ | |
776 | new_queue = timer_queue_assign(TCE(call)->deadline); | |
777 | timer_queue_lock_spin(new_queue); | |
778 | timer_call_entry_enqueue_deadline( | |
779 | call, new_queue, TCE(call)->deadline); | |
780 | timer_queue_unlock(new_queue); | |
781 | } else { | |
782 | timer_queue_shutdown_discarded++; | |
783 | } | |
784 | ||
785 | /* The only lingering LOCAL timer should be this thread's | |
786 | * quantum expiration timer. | |
787 | */ | |
788 | assert((call_local == FALSE) || | |
789 | (TCE(call)->func == thread_quantum_expire)); | |
9bccf70c | 790 | |
6d2010ae | 791 | simple_unlock(&call->lock); |
9bccf70c A |
792 | } |
793 | ||
39236c6e | 794 | timer_queue_unlock(queue); |
c910b4d9 | 795 | splx(s); |
9bccf70c A |
796 | } |
797 | ||
fe8ab488 | 798 | static uint32_t timer_queue_expire_lock_skips; |
c910b4d9 | 799 | uint64_t |
39236c6e | 800 | timer_queue_expire_with_options( |
6d2010ae | 801 | mpqueue_head_t *queue, |
39236c6e A |
802 | uint64_t deadline, |
803 | boolean_t rescan) | |
1c79356b | 804 | { |
39236c6e A |
805 | timer_call_t call = NULL; |
806 | uint32_t tc_iterations = 0; | |
6d2010ae A |
807 | DBG("timer_queue_expire(%p,)\n", queue); |
808 | ||
39236c6e A |
809 | uint64_t cur_deadline = deadline; |
810 | timer_queue_lock_spin(queue); | |
1c79356b | 811 | |
6d2010ae | 812 | while (!queue_empty(&queue->head)) { |
39236c6e A |
813 | /* Upon processing one or more timer calls, refresh the |
814 | * deadline to account for time elapsed in the callout | |
815 | */ | |
816 | if (++tc_iterations > 1) | |
817 | cur_deadline = mach_absolute_time(); | |
818 | ||
819 | if (call == NULL) | |
820 | call = TIMER_CALL(queue_first(&queue->head)); | |
1c79356b | 821 | |
39236c6e | 822 | if (call->soft_deadline <= cur_deadline) { |
1c79356b A |
823 | timer_call_func_t func; |
824 | timer_call_param_t param0, param1; | |
825 | ||
39236c6e A |
826 | TCOAL_DEBUG(0xDDDD0000, queue->earliest_soft_deadline, call->soft_deadline, 0, 0, 0); |
827 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
828 | DECR_TIMER_EXPIRE | DBG_FUNC_NONE, | |
4bd07ac2 | 829 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
39236c6e | 830 | call->soft_deadline, |
fe8ab488 A |
831 | TCE(call)->deadline, |
832 | TCE(call)->entry_time, 0); | |
39236c6e | 833 | |
fe8ab488 A |
834 | if ((call->flags & TIMER_CALL_RATELIMITED) && |
835 | (TCE(call)->deadline > cur_deadline)) { | |
39236c6e A |
836 | if (rescan == FALSE) |
837 | break; | |
838 | } | |
839 | ||
6d2010ae A |
840 | if (!simple_lock_try(&call->lock)) { |
841 | /* case (2b) lock inversion, dequeue and skip */ | |
842 | timer_queue_expire_lock_skips++; | |
39236c6e A |
843 | timer_call_entry_dequeue_async(call); |
844 | call = NULL; | |
6d2010ae A |
845 | continue; |
846 | } | |
847 | ||
848 | timer_call_entry_dequeue(call); | |
1c79356b | 849 | |
fe8ab488 A |
850 | func = TCE(call)->func; |
851 | param0 = TCE(call)->param0; | |
852 | param1 = TCE(call)->param1; | |
1c79356b | 853 | |
6d2010ae | 854 | simple_unlock(&call->lock); |
39236c6e | 855 | timer_queue_unlock(queue); |
1c79356b | 856 | |
39236c6e | 857 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, |
316670eb | 858 | DECR_TIMER_CALLOUT | DBG_FUNC_START, |
4bd07ac2 A |
859 | VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func), |
860 | VM_KERNEL_UNSLIDE_OR_PERM(param0), | |
861 | VM_KERNEL_UNSLIDE_OR_PERM(param1), | |
862 | 0); | |
2d21ac55 | 863 | |
4b17d6b6 | 864 | #if CONFIG_DTRACE |
39236c6e | 865 | DTRACE_TMR7(callout__start, timer_call_func_t, func, |
4b17d6b6 A |
866 | timer_call_param_t, param0, unsigned, call->flags, |
867 | 0, (call->ttd >> 32), | |
39236c6e | 868 | (unsigned) (call->ttd & 0xFFFFFFFF), call); |
2d21ac55 | 869 | #endif |
4b17d6b6 A |
870 | /* Maintain time-to-deadline in per-processor data |
871 | * structure for thread wakeup deadline statistics. | |
872 | */ | |
873 | uint64_t *ttdp = &(PROCESSOR_DATA(current_processor(), timer_call_ttd)); | |
874 | *ttdp = call->ttd; | |
1c79356b | 875 | (*func)(param0, param1); |
4b17d6b6 | 876 | *ttdp = 0; |
4b17d6b6 | 877 | #if CONFIG_DTRACE |
39236c6e A |
878 | DTRACE_TMR4(callout__end, timer_call_func_t, func, |
879 | param0, param1, call); | |
2d21ac55 A |
880 | #endif |
881 | ||
39236c6e | 882 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, |
316670eb | 883 | DECR_TIMER_CALLOUT | DBG_FUNC_END, |
4bd07ac2 A |
884 | VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func), |
885 | VM_KERNEL_UNSLIDE_OR_PERM(param0), | |
886 | VM_KERNEL_UNSLIDE_OR_PERM(param1), | |
887 | 0); | |
39236c6e A |
888 | call = NULL; |
889 | timer_queue_lock_spin(queue); | |
890 | } else { | |
891 | if (__probable(rescan == FALSE)) { | |
892 | break; | |
893 | } else { | |
fe8ab488 A |
894 | int64_t skew = TCE(call)->deadline - call->soft_deadline; |
895 | assert(TCE(call)->deadline >= call->soft_deadline); | |
39236c6e A |
896 | |
897 | /* DRK: On a latency quality-of-service level change, | |
898 | * re-sort potentially rate-limited timers. The platform | |
899 | * layer determines which timers require | |
900 | * this. In the absence of the per-callout | |
901 | * synchronization requirement, a global resort could | |
902 | * be more efficient. The re-sort effectively | |
903 | * annuls all timer adjustments, i.e. the "soft | |
904 | * deadline" is the sort key. | |
905 | */ | |
906 | ||
907 | if (timer_resort_threshold(skew)) { | |
908 | if (__probable(simple_lock_try(&call->lock))) { | |
909 | timer_call_entry_dequeue(call); | |
910 | timer_call_entry_enqueue_deadline(call, queue, call->soft_deadline); | |
911 | simple_unlock(&call->lock); | |
912 | call = NULL; | |
913 | } | |
914 | } | |
915 | if (call) { | |
916 | call = TIMER_CALL(queue_next(qe(call))); | |
917 | if (queue_end(&queue->head, qe(call))) | |
918 | break; | |
919 | } | |
920 | } | |
c910b4d9 | 921 | } |
1c79356b A |
922 | } |
923 | ||
39236c6e A |
924 | if (!queue_empty(&queue->head)) { |
925 | call = TIMER_CALL(queue_first(&queue->head)); | |
fe8ab488 A |
926 | cur_deadline = TCE(call)->deadline; |
927 | queue->earliest_soft_deadline = (call->flags & TIMER_CALL_RATELIMITED) ? TCE(call)->deadline: call->soft_deadline; | |
39236c6e A |
928 | } else { |
929 | queue->earliest_soft_deadline = cur_deadline = UINT64_MAX; | |
930 | } | |
1c79356b | 931 | |
39236c6e | 932 | timer_queue_unlock(queue); |
c910b4d9 | 933 | |
39236c6e | 934 | return (cur_deadline); |
1c79356b | 935 | } |
6d2010ae | 936 | |
39236c6e A |
937 | uint64_t |
938 | timer_queue_expire( | |
939 | mpqueue_head_t *queue, | |
940 | uint64_t deadline) | |
941 | { | |
942 | return timer_queue_expire_with_options(queue, deadline, FALSE); | |
943 | } | |
6d2010ae A |
944 | |
945 | extern int serverperfmode; | |
fe8ab488 | 946 | static uint32_t timer_queue_migrate_lock_skips; |
6d2010ae | 947 | /* |
39236c6e | 948 | * timer_queue_migrate() is called by timer_queue_migrate_cpu() |
6d2010ae A |
949 | * to move timer requests from the local processor (queue_from) |
950 | * to a target processor's (queue_to). | |
951 | */ | |
952 | int | |
953 | timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to) | |
954 | { | |
955 | timer_call_t call; | |
956 | timer_call_t head_to; | |
957 | int timers_migrated = 0; | |
958 | ||
959 | DBG("timer_queue_migrate(%p,%p)\n", queue_from, queue_to); | |
960 | ||
961 | assert(!ml_get_interrupts_enabled()); | |
962 | assert(queue_from != queue_to); | |
963 | ||
964 | if (serverperfmode) { | |
965 | /* | |
966 | * if we're running a high end server | |
967 | * avoid migrations... they add latency | |
968 | * and don't save us power under typical | |
969 | * server workloads | |
970 | */ | |
971 | return -4; | |
972 | } | |
973 | ||
974 | /* | |
975 | * Take both local (from) and target (to) timer queue locks while | |
976 | * moving the timers from the local queue to the target processor. | |
977 | * We assume that the target is always the boot processor. | |
978 | * But only move if all of the following is true: | |
979 | * - the target queue is non-empty | |
980 | * - the local queue is non-empty | |
981 | * - the local queue's first deadline is later than the target's | |
982 | * - the local queue contains no non-migrateable "local" call | |
983 | * so that we need not have the target resync. | |
984 | */ | |
985 | ||
39236c6e | 986 | timer_queue_lock_spin(queue_to); |
6d2010ae A |
987 | |
988 | head_to = TIMER_CALL(queue_first(&queue_to->head)); | |
989 | if (queue_empty(&queue_to->head)) { | |
990 | timers_migrated = -1; | |
991 | goto abort1; | |
992 | } | |
993 | ||
39236c6e | 994 | timer_queue_lock_spin(queue_from); |
6d2010ae A |
995 | |
996 | if (queue_empty(&queue_from->head)) { | |
997 | timers_migrated = -2; | |
998 | goto abort2; | |
999 | } | |
1000 | ||
1001 | call = TIMER_CALL(queue_first(&queue_from->head)); | |
fe8ab488 | 1002 | if (TCE(call)->deadline < TCE(head_to)->deadline) { |
6d2010ae A |
1003 | timers_migrated = 0; |
1004 | goto abort2; | |
1005 | } | |
1006 | ||
1007 | /* perform scan for non-migratable timers */ | |
1008 | do { | |
1009 | if (call->flags & TIMER_CALL_LOCAL) { | |
1010 | timers_migrated = -3; | |
1011 | goto abort2; | |
1012 | } | |
1013 | call = TIMER_CALL(queue_next(qe(call))); | |
1014 | } while (!queue_end(&queue_from->head, qe(call))); | |
1015 | ||
1016 | /* migration loop itself -- both queues are locked */ | |
1017 | while (!queue_empty(&queue_from->head)) { | |
1018 | call = TIMER_CALL(queue_first(&queue_from->head)); | |
1019 | if (!simple_lock_try(&call->lock)) { | |
1020 | /* case (2b) lock order inversion, dequeue only */ | |
39236c6e A |
1021 | #ifdef TIMER_ASSERT |
1022 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1023 | DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, | |
4bd07ac2 A |
1024 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
1025 | VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue), | |
1026 | VM_KERNEL_UNSLIDE_OR_PERM(call->lock.interlock.lock_data), | |
39236c6e A |
1027 | 0x2b, 0); |
1028 | #endif | |
6d2010ae | 1029 | timer_queue_migrate_lock_skips++; |
39236c6e | 1030 | timer_call_entry_dequeue_async(call); |
6d2010ae A |
1031 | continue; |
1032 | } | |
1033 | timer_call_entry_dequeue(call); | |
1034 | timer_call_entry_enqueue_deadline( | |
fe8ab488 | 1035 | call, queue_to, TCE(call)->deadline); |
6d2010ae A |
1036 | timers_migrated++; |
1037 | simple_unlock(&call->lock); | |
1038 | } | |
39236c6e | 1039 | queue_from->earliest_soft_deadline = UINT64_MAX; |
6d2010ae | 1040 | abort2: |
39236c6e | 1041 | timer_queue_unlock(queue_from); |
6d2010ae | 1042 | abort1: |
39236c6e | 1043 | timer_queue_unlock(queue_to); |
6d2010ae A |
1044 | |
1045 | return timers_migrated; | |
1046 | } | |
39236c6e A |
1047 | |
1048 | void | |
1049 | timer_queue_trace_cpu(int ncpu) | |
1050 | { | |
1051 | timer_call_nosync_cpu( | |
1052 | ncpu, | |
1053 | (void(*)())timer_queue_trace, | |
1054 | (void*) timer_queue_cpu(ncpu)); | |
1055 | } | |
1056 | ||
1057 | void | |
1058 | timer_queue_trace( | |
1059 | mpqueue_head_t *queue) | |
1060 | { | |
1061 | timer_call_t call; | |
1062 | spl_t s; | |
1063 | ||
1064 | if (!kdebug_enable) | |
1065 | return; | |
1066 | ||
1067 | s = splclock(); | |
1068 | timer_queue_lock_spin(queue); | |
1069 | ||
1070 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1071 | DECR_TIMER_QUEUE | DBG_FUNC_START, | |
1072 | queue->count, mach_absolute_time(), 0, 0, 0); | |
1073 | ||
1074 | if (!queue_empty(&queue->head)) { | |
1075 | call = TIMER_CALL(queue_first(&queue->head)); | |
1076 | do { | |
1077 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1078 | DECR_TIMER_QUEUE | DBG_FUNC_NONE, | |
1079 | call->soft_deadline, | |
fe8ab488 A |
1080 | TCE(call)->deadline, |
1081 | TCE(call)->entry_time, | |
4bd07ac2 | 1082 | VM_KERNEL_UNSLIDE(TCE(call)->func), |
39236c6e A |
1083 | 0); |
1084 | call = TIMER_CALL(queue_next(qe(call))); | |
1085 | } while (!queue_end(&queue->head, qe(call))); | |
1086 | } | |
1087 | ||
1088 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1089 | DECR_TIMER_QUEUE | DBG_FUNC_END, | |
1090 | queue->count, mach_absolute_time(), 0, 0, 0); | |
1091 | ||
1092 | timer_queue_unlock(queue); | |
1093 | splx(s); | |
1094 | } | |
1095 | ||
1096 | void | |
1097 | timer_longterm_dequeued_locked(timer_call_t call) | |
1098 | { | |
1099 | timer_longterm_t *tlp = &timer_longterm; | |
1100 | ||
1101 | tlp->dequeues++; | |
1102 | if (call == tlp->threshold.call) | |
1103 | tlp->threshold.call = NULL; | |
1104 | } | |
1105 | ||
1106 | /* | |
1107 | * Place a timer call in the longterm list | |
1108 | * and adjust the next timer callout deadline if the new timer is first. | |
1109 | */ | |
1110 | mpqueue_head_t * | |
1111 | timer_longterm_enqueue_unlocked(timer_call_t call, | |
1112 | uint64_t now, | |
1113 | uint64_t deadline, | |
fe8ab488 A |
1114 | mpqueue_head_t **old_queue, |
1115 | uint64_t soft_deadline, | |
1116 | uint64_t ttd, | |
1117 | timer_call_param_t param1, | |
1118 | uint32_t callout_flags) | |
39236c6e A |
1119 | { |
1120 | timer_longterm_t *tlp = &timer_longterm; | |
1121 | boolean_t update_required = FALSE; | |
1122 | uint64_t longterm_threshold; | |
1123 | ||
1124 | longterm_threshold = now + tlp->threshold.interval; | |
1125 | ||
1126 | /* | |
1127 | * Return NULL without doing anything if: | |
1128 | * - this timer is local, or | |
1129 | * - the longterm mechanism is disabled, or | |
1130 | * - this deadline is too short. | |
1131 | */ | |
fe8ab488 | 1132 | if ((callout_flags & TIMER_CALL_LOCAL) != 0 || |
39236c6e | 1133 | (tlp->threshold.interval == TIMER_LONGTERM_NONE) || |
fe8ab488 | 1134 | (deadline <= longterm_threshold)) |
39236c6e A |
1135 | return NULL; |
1136 | ||
1137 | /* | |
1138 | * Remove timer from its current queue, if any. | |
1139 | */ | |
1140 | *old_queue = timer_call_dequeue_unlocked(call); | |
1141 | ||
1142 | /* | |
1143 | * Lock the longterm queue, queue timer and determine | |
1144 | * whether an update is necessary. | |
1145 | */ | |
1146 | assert(!ml_get_interrupts_enabled()); | |
1147 | simple_lock(&call->lock); | |
1148 | timer_queue_lock_spin(timer_longterm_queue); | |
fe8ab488 A |
1149 | TCE(call)->deadline = deadline; |
1150 | TCE(call)->param1 = param1; | |
1151 | call->ttd = ttd; | |
1152 | call->soft_deadline = soft_deadline; | |
1153 | call->flags = callout_flags; | |
39236c6e | 1154 | timer_call_entry_enqueue_tail(call, timer_longterm_queue); |
39236c6e A |
1155 | |
1156 | tlp->enqueues++; | |
1157 | ||
1158 | /* | |
1159 | * We'll need to update the currently set threshold timer | |
1160 | * if the new deadline is sooner and no sooner update is in flight. | |
1161 | */ | |
1162 | if (deadline < tlp->threshold.deadline && | |
1163 | deadline < tlp->threshold.preempted) { | |
1164 | tlp->threshold.preempted = deadline; | |
1165 | tlp->threshold.call = call; | |
1166 | update_required = TRUE; | |
1167 | } | |
1168 | timer_queue_unlock(timer_longterm_queue); | |
1169 | simple_unlock(&call->lock); | |
1170 | ||
1171 | if (update_required) { | |
fe8ab488 A |
1172 | /* |
1173 | * Note: this call expects that calling the master cpu | |
1174 | * alone does not involve locking the topo lock. | |
1175 | */ | |
39236c6e A |
1176 | timer_call_nosync_cpu( |
1177 | master_cpu, | |
1178 | (void (*)(void *)) timer_longterm_update, | |
1179 | (void *)tlp); | |
1180 | } | |
1181 | ||
1182 | return timer_longterm_queue; | |
1183 | } | |
1184 | ||
1185 | /* | |
1186 | * Scan for timers below the longterm threshold. | |
1187 | * Move these to the local timer queue (of the boot processor on which the | |
1188 | * calling thread is running). | |
1189 | * Both the local (boot) queue and the longterm queue are locked. | |
1190 | * The scan is similar to the timer migrate sequence but is performed by | |
1191 | * successively examining each timer on the longterm queue: | |
1192 | * - if within the short-term threshold | |
1193 | * - enter on the local queue (unless being deleted), | |
1194 | * - otherwise: | |
1195 | * - if sooner, deadline becomes the next threshold deadline. | |
1196 | */ | |
1197 | void | |
1198 | timer_longterm_scan(timer_longterm_t *tlp, | |
1199 | uint64_t now) | |
1200 | { | |
1201 | queue_entry_t qe; | |
1202 | timer_call_t call; | |
1203 | uint64_t threshold; | |
1204 | uint64_t deadline; | |
1205 | mpqueue_head_t *timer_master_queue; | |
1206 | ||
1207 | assert(!ml_get_interrupts_enabled()); | |
1208 | assert(cpu_number() == master_cpu); | |
1209 | ||
1210 | if (tlp->threshold.interval != TIMER_LONGTERM_NONE) | |
1211 | threshold = now + tlp->threshold.interval; | |
1212 | else | |
1213 | threshold = TIMER_LONGTERM_NONE; | |
1214 | ||
1215 | tlp->threshold.deadline = TIMER_LONGTERM_NONE; | |
1216 | tlp->threshold.call = NULL; | |
1217 | ||
1218 | if (queue_empty(&timer_longterm_queue->head)) | |
1219 | return; | |
1220 | ||
1221 | timer_master_queue = timer_queue_cpu(master_cpu); | |
1222 | timer_queue_lock_spin(timer_master_queue); | |
1223 | ||
1224 | qe = queue_first(&timer_longterm_queue->head); | |
1225 | while (!queue_end(&timer_longterm_queue->head, qe)) { | |
1226 | call = TIMER_CALL(qe); | |
1227 | deadline = call->soft_deadline; | |
1228 | qe = queue_next(qe); | |
1229 | if (!simple_lock_try(&call->lock)) { | |
1230 | /* case (2c) lock order inversion, dequeue only */ | |
1231 | #ifdef TIMER_ASSERT | |
1232 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1233 | DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, | |
4bd07ac2 A |
1234 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
1235 | VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue), | |
1236 | VM_KERNEL_UNSLIDE_OR_PERM(call->lock.interlock.lock_data), | |
39236c6e A |
1237 | 0x2c, 0); |
1238 | #endif | |
1239 | timer_call_entry_dequeue_async(call); | |
1240 | continue; | |
1241 | } | |
1242 | if (deadline < threshold) { | |
1243 | /* | |
1244 | * This timer needs moving (escalating) | |
1245 | * to the local (boot) processor's queue. | |
1246 | */ | |
1247 | #ifdef TIMER_ASSERT | |
1248 | if (deadline < now) | |
1249 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1250 | DECR_TIMER_OVERDUE | DBG_FUNC_NONE, | |
4bd07ac2 | 1251 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
39236c6e A |
1252 | deadline, |
1253 | now, | |
1254 | threshold, | |
1255 | 0); | |
1256 | #endif | |
1257 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1258 | DECR_TIMER_ESCALATE | DBG_FUNC_NONE, | |
4bd07ac2 | 1259 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
fe8ab488 A |
1260 | TCE(call)->deadline, |
1261 | TCE(call)->entry_time, | |
4bd07ac2 | 1262 | VM_KERNEL_UNSLIDE(TCE(call)->func), |
39236c6e A |
1263 | 0); |
1264 | tlp->escalates++; | |
1265 | timer_call_entry_dequeue(call); | |
1266 | timer_call_entry_enqueue_deadline( | |
fe8ab488 | 1267 | call, timer_master_queue, TCE(call)->deadline); |
39236c6e A |
1268 | /* |
1269 | * A side-effect of the following call is to update | |
1270 | * the actual hardware deadline if required. | |
1271 | */ | |
1272 | (void) timer_queue_assign(deadline); | |
1273 | } else { | |
1274 | if (deadline < tlp->threshold.deadline) { | |
1275 | tlp->threshold.deadline = deadline; | |
1276 | tlp->threshold.call = call; | |
1277 | } | |
1278 | } | |
1279 | simple_unlock(&call->lock); | |
1280 | } | |
1281 | ||
1282 | timer_queue_unlock(timer_master_queue); | |
1283 | } | |
1284 | ||
1285 | void | |
1286 | timer_longterm_callout(timer_call_param_t p0, __unused timer_call_param_t p1) | |
1287 | { | |
1288 | timer_longterm_t *tlp = (timer_longterm_t *) p0; | |
1289 | ||
1290 | timer_longterm_update(tlp); | |
1291 | } | |
1292 | ||
1293 | void | |
1294 | timer_longterm_update_locked(timer_longterm_t *tlp) | |
1295 | { | |
1296 | uint64_t latency; | |
1297 | ||
1298 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1299 | DECR_TIMER_UPDATE | DBG_FUNC_START, | |
4bd07ac2 | 1300 | VM_KERNEL_UNSLIDE_OR_PERM(&tlp->queue), |
39236c6e A |
1301 | tlp->threshold.deadline, |
1302 | tlp->threshold.preempted, | |
1303 | tlp->queue.count, 0); | |
1304 | ||
1305 | tlp->scan_time = mach_absolute_time(); | |
1306 | if (tlp->threshold.preempted != TIMER_LONGTERM_NONE) { | |
1307 | tlp->threshold.preempts++; | |
1308 | tlp->threshold.deadline = tlp->threshold.preempted; | |
1309 | tlp->threshold.preempted = TIMER_LONGTERM_NONE; | |
1310 | /* | |
1311 | * Note: in the unlikely event that a pre-empted timer has | |
1312 | * itself been cancelled, we'll simply re-scan later at the | |
1313 | * time of the preempted/cancelled timer. | |
1314 | */ | |
1315 | } else { | |
1316 | tlp->threshold.scans++; | |
1317 | ||
1318 | /* | |
1319 | * Maintain a moving average of our wakeup latency. | |
1320 | * Clamp latency to 0 and ignore above threshold interval. | |
1321 | */ | |
1322 | if (tlp->scan_time > tlp->threshold.deadline_set) | |
1323 | latency = tlp->scan_time - tlp->threshold.deadline_set; | |
1324 | else | |
1325 | latency = 0; | |
1326 | if (latency < tlp->threshold.interval) { | |
1327 | tlp->threshold.latency_min = | |
1328 | MIN(tlp->threshold.latency_min, latency); | |
1329 | tlp->threshold.latency_max = | |
1330 | MAX(tlp->threshold.latency_max, latency); | |
1331 | tlp->threshold.latency = | |
1332 | (tlp->threshold.latency*99 + latency) / 100; | |
1333 | } | |
1334 | ||
1335 | timer_longterm_scan(tlp, tlp->scan_time); | |
1336 | } | |
1337 | ||
1338 | tlp->threshold.deadline_set = tlp->threshold.deadline; | |
1339 | /* The next deadline timer to be set is adjusted */ | |
1340 | if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) { | |
1341 | tlp->threshold.deadline_set -= tlp->threshold.margin; | |
1342 | tlp->threshold.deadline_set -= tlp->threshold.latency; | |
1343 | } | |
1344 | ||
1345 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1346 | DECR_TIMER_UPDATE | DBG_FUNC_END, | |
4bd07ac2 | 1347 | VM_KERNEL_UNSLIDE_OR_PERM(&tlp->queue), |
39236c6e A |
1348 | tlp->threshold.deadline, |
1349 | tlp->threshold.scans, | |
1350 | tlp->queue.count, 0); | |
1351 | } | |
1352 | ||
1353 | void | |
1354 | timer_longterm_update(timer_longterm_t *tlp) | |
1355 | { | |
1356 | spl_t s = splclock(); | |
1357 | ||
1358 | timer_queue_lock_spin(timer_longterm_queue); | |
1359 | ||
1360 | if (cpu_number() != master_cpu) | |
1361 | panic("timer_longterm_update_master() on non-boot cpu"); | |
1362 | ||
1363 | timer_longterm_update_locked(tlp); | |
1364 | ||
1365 | if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) | |
1366 | timer_call_enter( | |
1367 | &tlp->threshold.timer, | |
1368 | tlp->threshold.deadline_set, | |
1369 | TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL); | |
1370 | ||
1371 | timer_queue_unlock(timer_longterm_queue); | |
1372 | splx(s); | |
1373 | } | |
1374 | ||
1375 | void | |
1376 | timer_longterm_init(void) | |
1377 | { | |
1378 | uint32_t longterm; | |
1379 | timer_longterm_t *tlp = &timer_longterm; | |
1380 | ||
1381 | DBG("timer_longterm_init() tlp: %p, queue: %p\n", tlp, &tlp->queue); | |
1382 | ||
1383 | /* | |
15129b1c A |
1384 | * Set the longterm timer threshold. Defaults to TIMER_LONGTERM_THRESHOLD |
1385 | * or TIMER_LONGTERM_NONE (disabled) for server; | |
1386 | * overridden longterm boot-arg | |
39236c6e | 1387 | */ |
15129b1c A |
1388 | tlp->threshold.interval = serverperfmode ? TIMER_LONGTERM_NONE |
1389 | : TIMER_LONGTERM_THRESHOLD; | |
39236c6e A |
1390 | if (PE_parse_boot_argn("longterm", &longterm, sizeof (longterm))) { |
1391 | tlp->threshold.interval = (longterm == 0) ? | |
1392 | TIMER_LONGTERM_NONE : | |
1393 | longterm * NSEC_PER_MSEC; | |
1394 | } | |
1395 | if (tlp->threshold.interval != TIMER_LONGTERM_NONE) { | |
1396 | printf("Longterm timer threshold: %llu ms\n", | |
1397 | tlp->threshold.interval / NSEC_PER_MSEC); | |
1398 | kprintf("Longterm timer threshold: %llu ms\n", | |
1399 | tlp->threshold.interval / NSEC_PER_MSEC); | |
1400 | nanoseconds_to_absolutetime(tlp->threshold.interval, | |
1401 | &tlp->threshold.interval); | |
1402 | tlp->threshold.margin = tlp->threshold.interval / 10; | |
1403 | tlp->threshold.latency_min = EndOfAllTime; | |
1404 | tlp->threshold.latency_max = 0; | |
1405 | } | |
1406 | ||
1407 | tlp->threshold.preempted = TIMER_LONGTERM_NONE; | |
1408 | tlp->threshold.deadline = TIMER_LONGTERM_NONE; | |
1409 | ||
1410 | lck_attr_setdefault(&timer_longterm_lck_attr); | |
1411 | lck_grp_attr_setdefault(&timer_longterm_lck_grp_attr); | |
1412 | lck_grp_init(&timer_longterm_lck_grp, | |
1413 | "timer_longterm", &timer_longterm_lck_grp_attr); | |
1414 | mpqueue_init(&tlp->queue, | |
1415 | &timer_longterm_lck_grp, &timer_longterm_lck_attr); | |
1416 | ||
1417 | timer_call_setup(&tlp->threshold.timer, | |
1418 | timer_longterm_callout, (timer_call_param_t) tlp); | |
1419 | ||
1420 | timer_longterm_queue = &tlp->queue; | |
1421 | } | |
1422 | ||
1423 | enum { | |
1424 | THRESHOLD, QCOUNT, | |
1425 | ENQUEUES, DEQUEUES, ESCALATES, SCANS, PREEMPTS, | |
1426 | LATENCY, LATENCY_MIN, LATENCY_MAX | |
1427 | }; | |
1428 | uint64_t | |
1429 | timer_sysctl_get(int oid) | |
1430 | { | |
1431 | timer_longterm_t *tlp = &timer_longterm; | |
1432 | ||
1433 | switch (oid) { | |
1434 | case THRESHOLD: | |
1435 | return (tlp->threshold.interval == TIMER_LONGTERM_NONE) ? | |
1436 | 0 : tlp->threshold.interval / NSEC_PER_MSEC; | |
1437 | case QCOUNT: | |
1438 | return tlp->queue.count; | |
1439 | case ENQUEUES: | |
1440 | return tlp->enqueues; | |
1441 | case DEQUEUES: | |
1442 | return tlp->dequeues; | |
1443 | case ESCALATES: | |
1444 | return tlp->escalates; | |
1445 | case SCANS: | |
1446 | return tlp->threshold.scans; | |
1447 | case PREEMPTS: | |
1448 | return tlp->threshold.preempts; | |
1449 | case LATENCY: | |
1450 | return tlp->threshold.latency; | |
1451 | case LATENCY_MIN: | |
1452 | return tlp->threshold.latency_min; | |
1453 | case LATENCY_MAX: | |
1454 | return tlp->threshold.latency_max; | |
1455 | default: | |
1456 | return 0; | |
1457 | } | |
1458 | } | |
1459 | ||
1460 | /* | |
1461 | * timer_master_scan() is the inverse of timer_longterm_scan() | |
1462 | * since it un-escalates timers to the longterm queue. | |
1463 | */ | |
1464 | static void | |
1465 | timer_master_scan(timer_longterm_t *tlp, | |
1466 | uint64_t now) | |
1467 | { | |
1468 | queue_entry_t qe; | |
1469 | timer_call_t call; | |
1470 | uint64_t threshold; | |
1471 | uint64_t deadline; | |
1472 | mpqueue_head_t *timer_master_queue; | |
1473 | ||
1474 | if (tlp->threshold.interval != TIMER_LONGTERM_NONE) | |
1475 | threshold = now + tlp->threshold.interval; | |
1476 | else | |
1477 | threshold = TIMER_LONGTERM_NONE; | |
1478 | ||
1479 | timer_master_queue = timer_queue_cpu(master_cpu); | |
1480 | timer_queue_lock_spin(timer_master_queue); | |
1481 | ||
1482 | qe = queue_first(&timer_master_queue->head); | |
1483 | while (!queue_end(&timer_master_queue->head, qe)) { | |
1484 | call = TIMER_CALL(qe); | |
fe8ab488 | 1485 | deadline = TCE(call)->deadline; |
39236c6e A |
1486 | qe = queue_next(qe); |
1487 | if ((call->flags & TIMER_CALL_LOCAL) != 0) | |
1488 | continue; | |
1489 | if (!simple_lock_try(&call->lock)) { | |
1490 | /* case (2c) lock order inversion, dequeue only */ | |
1491 | timer_call_entry_dequeue_async(call); | |
1492 | continue; | |
1493 | } | |
1494 | if (deadline > threshold) { | |
1495 | /* move from master to longterm */ | |
1496 | timer_call_entry_dequeue(call); | |
1497 | timer_call_entry_enqueue_tail(call, timer_longterm_queue); | |
1498 | if (deadline < tlp->threshold.deadline) { | |
1499 | tlp->threshold.deadline = deadline; | |
1500 | tlp->threshold.call = call; | |
1501 | } | |
1502 | } | |
1503 | simple_unlock(&call->lock); | |
1504 | } | |
1505 | timer_queue_unlock(timer_master_queue); | |
1506 | } | |
1507 | ||
1508 | static void | |
1509 | timer_sysctl_set_threshold(uint64_t value) | |
1510 | { | |
1511 | timer_longterm_t *tlp = &timer_longterm; | |
1512 | spl_t s = splclock(); | |
1513 | boolean_t threshold_increase; | |
1514 | ||
1515 | timer_queue_lock_spin(timer_longterm_queue); | |
1516 | ||
1517 | timer_call_cancel(&tlp->threshold.timer); | |
1518 | ||
1519 | /* | |
1520 | * Set the new threshold and note whther it's increasing. | |
1521 | */ | |
1522 | if (value == 0) { | |
1523 | tlp->threshold.interval = TIMER_LONGTERM_NONE; | |
1524 | threshold_increase = TRUE; | |
1525 | timer_call_cancel(&tlp->threshold.timer); | |
1526 | } else { | |
1527 | uint64_t old_interval = tlp->threshold.interval; | |
1528 | tlp->threshold.interval = value * NSEC_PER_MSEC; | |
1529 | nanoseconds_to_absolutetime(tlp->threshold.interval, | |
1530 | &tlp->threshold.interval); | |
1531 | tlp->threshold.margin = tlp->threshold.interval / 10; | |
1532 | if (old_interval == TIMER_LONGTERM_NONE) | |
1533 | threshold_increase = FALSE; | |
1534 | else | |
1535 | threshold_increase = (tlp->threshold.interval > old_interval); | |
1536 | } | |
1537 | ||
1538 | if (threshold_increase /* or removal */) { | |
1539 | /* Escalate timers from the longterm queue */ | |
1540 | timer_longterm_scan(tlp, mach_absolute_time()); | |
1541 | } else /* decrease or addition */ { | |
1542 | /* | |
1543 | * We scan the local/master queue for timers now longterm. | |
1544 | * To be strictly correct, we should scan all processor queues | |
1545 | * but timer migration results in most timers gravitating to the | |
1546 | * master processor in any case. | |
1547 | */ | |
1548 | timer_master_scan(tlp, mach_absolute_time()); | |
1549 | } | |
1550 | ||
1551 | /* Set new timer accordingly */ | |
1552 | tlp->threshold.deadline_set = tlp->threshold.deadline; | |
1553 | if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) { | |
1554 | tlp->threshold.deadline_set -= tlp->threshold.margin; | |
1555 | tlp->threshold.deadline_set -= tlp->threshold.latency; | |
1556 | timer_call_enter( | |
1557 | &tlp->threshold.timer, | |
1558 | tlp->threshold.deadline_set, | |
1559 | TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL); | |
1560 | } | |
1561 | ||
1562 | /* Reset stats */ | |
1563 | tlp->enqueues = 0; | |
1564 | tlp->dequeues = 0; | |
1565 | tlp->escalates = 0; | |
1566 | tlp->threshold.scans = 0; | |
1567 | tlp->threshold.preempts = 0; | |
1568 | tlp->threshold.latency = 0; | |
1569 | tlp->threshold.latency_min = EndOfAllTime; | |
1570 | tlp->threshold.latency_max = 0; | |
1571 | ||
1572 | timer_queue_unlock(timer_longterm_queue); | |
1573 | splx(s); | |
1574 | } | |
1575 | ||
1576 | int | |
1577 | timer_sysctl_set(int oid, uint64_t value) | |
1578 | { | |
1579 | switch (oid) { | |
1580 | case THRESHOLD: | |
1581 | timer_call_cpu( | |
1582 | master_cpu, | |
1583 | (void (*)(void *)) timer_sysctl_set_threshold, | |
1584 | (void *) value); | |
1585 | return KERN_SUCCESS; | |
1586 | default: | |
1587 | return KERN_INVALID_ARGUMENT; | |
1588 | } | |
1589 | } | |
fe8ab488 A |
1590 | |
1591 | ||
1592 | /* Select timer coalescing window based on per-task quality-of-service hints */ | |
1593 | static boolean_t tcoal_qos_adjust(thread_t t, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited) { | |
1594 | uint32_t latency_qos; | |
1595 | boolean_t adjusted = FALSE; | |
1596 | task_t ctask = t->task; | |
1597 | ||
1598 | if (ctask) { | |
1599 | latency_qos = proc_get_effective_thread_policy(t, TASK_POLICY_LATENCY_QOS); | |
1600 | ||
1601 | assert(latency_qos <= NUM_LATENCY_QOS_TIERS); | |
1602 | ||
1603 | if (latency_qos) { | |
1604 | *tshift = tcoal_prio_params.latency_qos_scale[latency_qos - 1]; | |
1605 | *tmax_abstime = tcoal_prio_params.latency_qos_abstime_max[latency_qos - 1]; | |
1606 | *pratelimited = tcoal_prio_params.latency_tier_rate_limited[latency_qos - 1]; | |
1607 | adjusted = TRUE; | |
1608 | } | |
1609 | } | |
1610 | return adjusted; | |
1611 | } | |
1612 | ||
1613 | ||
1614 | /* Adjust timer deadlines based on priority of the thread and the | |
1615 | * urgency value provided at timeout establishment. With this mechanism, | |
1616 | * timers are no longer necessarily sorted in order of soft deadline | |
1617 | * on a given timer queue, i.e. they may be differentially skewed. | |
1618 | * In the current scheme, this could lead to fewer pending timers | |
1619 | * processed than is technically possible when the HW deadline arrives. | |
1620 | */ | |
1621 | static void | |
1622 | timer_compute_leeway(thread_t cthread, int32_t urgency, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited) { | |
1623 | int16_t tpri = cthread->sched_pri; | |
1624 | if ((urgency & TIMER_CALL_USER_MASK) != 0) { | |
1625 | if (tpri >= BASEPRI_RTQUEUES || | |
1626 | urgency == TIMER_CALL_USER_CRITICAL) { | |
1627 | *tshift = tcoal_prio_params.timer_coalesce_rt_shift; | |
1628 | *tmax_abstime = tcoal_prio_params.timer_coalesce_rt_abstime_max; | |
1629 | TCOAL_PRIO_STAT(rt_tcl); | |
1630 | } else if (proc_get_effective_thread_policy(cthread, TASK_POLICY_DARWIN_BG) || | |
1631 | (urgency == TIMER_CALL_USER_BACKGROUND)) { | |
1632 | /* Determine if timer should be subjected to a lower QoS */ | |
1633 | if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) { | |
1634 | if (*tmax_abstime > tcoal_prio_params.timer_coalesce_bg_abstime_max) { | |
1635 | return; | |
1636 | } else { | |
1637 | *pratelimited = FALSE; | |
1638 | } | |
1639 | } | |
1640 | *tshift = tcoal_prio_params.timer_coalesce_bg_shift; | |
1641 | *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max; | |
1642 | TCOAL_PRIO_STAT(bg_tcl); | |
1643 | } else if (tpri >= MINPRI_KERNEL) { | |
1644 | *tshift = tcoal_prio_params.timer_coalesce_kt_shift; | |
1645 | *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max; | |
1646 | TCOAL_PRIO_STAT(kt_tcl); | |
1647 | } else if (cthread->sched_mode == TH_MODE_FIXED) { | |
1648 | *tshift = tcoal_prio_params.timer_coalesce_fp_shift; | |
1649 | *tmax_abstime = tcoal_prio_params.timer_coalesce_fp_abstime_max; | |
1650 | TCOAL_PRIO_STAT(fp_tcl); | |
1651 | } else if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) { | |
1652 | TCOAL_PRIO_STAT(qos_tcl); | |
1653 | } else if (cthread->sched_mode == TH_MODE_TIMESHARE) { | |
1654 | *tshift = tcoal_prio_params.timer_coalesce_ts_shift; | |
1655 | *tmax_abstime = tcoal_prio_params.timer_coalesce_ts_abstime_max; | |
1656 | TCOAL_PRIO_STAT(ts_tcl); | |
1657 | } else { | |
1658 | TCOAL_PRIO_STAT(nc_tcl); | |
1659 | } | |
1660 | } else if (urgency == TIMER_CALL_SYS_BACKGROUND) { | |
1661 | *tshift = tcoal_prio_params.timer_coalesce_bg_shift; | |
1662 | *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max; | |
1663 | TCOAL_PRIO_STAT(bg_tcl); | |
1664 | } else { | |
1665 | *tshift = tcoal_prio_params.timer_coalesce_kt_shift; | |
1666 | *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max; | |
1667 | TCOAL_PRIO_STAT(kt_tcl); | |
1668 | } | |
1669 | } | |
1670 | ||
1671 | ||
1672 | int timer_user_idle_level; | |
1673 | ||
1674 | uint64_t | |
1675 | timer_call_slop(uint64_t deadline, uint64_t now, uint32_t flags, thread_t cthread, boolean_t *pratelimited) | |
1676 | { | |
1677 | int32_t tcs_shift = 0; | |
1678 | uint64_t tcs_max_abstime = 0; | |
1679 | uint64_t adjval; | |
1680 | uint32_t urgency = (flags & TIMER_CALL_URGENCY_MASK); | |
1681 | ||
1682 | if (mach_timer_coalescing_enabled && | |
1683 | (deadline > now) && (urgency != TIMER_CALL_SYS_CRITICAL)) { | |
1684 | timer_compute_leeway(cthread, urgency, &tcs_shift, &tcs_max_abstime, pratelimited); | |
1685 | ||
1686 | if (tcs_shift >= 0) | |
1687 | adjval = MIN((deadline - now) >> tcs_shift, tcs_max_abstime); | |
1688 | else | |
1689 | adjval = MIN((deadline - now) << (-tcs_shift), tcs_max_abstime); | |
1690 | /* Apply adjustments derived from "user idle level" heuristic */ | |
1691 | adjval += (adjval * timer_user_idle_level) >> 7; | |
1692 | return adjval; | |
1693 | } else { | |
1694 | return 0; | |
1695 | } | |
1696 | } | |
1697 | ||
1698 | int | |
1699 | timer_get_user_idle_level(void) { | |
1700 | return timer_user_idle_level; | |
1701 | } | |
1702 | ||
1703 | kern_return_t timer_set_user_idle_level(int ilevel) { | |
1704 | boolean_t do_reeval = FALSE; | |
1705 | ||
1706 | if ((ilevel < 0) || (ilevel > 128)) | |
1707 | return KERN_INVALID_ARGUMENT; | |
1708 | ||
1709 | if (ilevel < timer_user_idle_level) { | |
1710 | do_reeval = TRUE; | |
1711 | } | |
1712 | ||
1713 | timer_user_idle_level = ilevel; | |
1714 | ||
1715 | if (do_reeval) | |
1716 | ml_timer_evaluate(); | |
1717 | ||
1718 | return KERN_SUCCESS; | |
1719 | } |