]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
c910b4d9 | 2 | * Copyright (c) 1993-2008 Apple Inc. All rights reserved. |
1c79356b | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
29 | * Timer interrupt callout module. | |
1c79356b A |
30 | */ |
31 | ||
32 | #include <mach/mach_types.h> | |
33 | ||
34 | #include <kern/clock.h> | |
3e170ce0 | 35 | #include <kern/smp.h> |
9bccf70c | 36 | #include <kern/processor.h> |
1c79356b | 37 | #include <kern/timer_call.h> |
c910b4d9 | 38 | #include <kern/timer_queue.h> |
1c79356b | 39 | #include <kern/call_entry.h> |
39236c6e | 40 | #include <kern/thread.h> |
1c79356b | 41 | |
0c530ab8 A |
42 | #include <sys/kdebug.h> |
43 | ||
4b17d6b6 | 44 | #if CONFIG_DTRACE |
2d21ac55 A |
45 | #include <mach/sdt.h> |
46 | #endif | |
1c79356b | 47 | |
1c79356b | 48 | |
6d2010ae A |
49 | #if DEBUG |
50 | #define TIMER_ASSERT 1 | |
51 | #endif | |
52 | ||
53 | //#define TIMER_ASSERT 1 | |
54 | //#define TIMER_DBG 1 | |
55 | ||
56 | #if TIMER_DBG | |
57 | #define DBG(x...) kprintf("DBG: " x); | |
58 | #else | |
59 | #define DBG(x...) | |
60 | #endif | |
61 | ||
39236c6e A |
62 | #if TIMER_TRACE |
63 | #define TIMER_KDEBUG_TRACE KERNEL_DEBUG_CONSTANT_IST | |
64 | #else | |
65 | #define TIMER_KDEBUG_TRACE(x...) | |
66 | #endif | |
67 | ||
68 | ||
6d2010ae A |
69 | lck_grp_t timer_call_lck_grp; |
70 | lck_attr_t timer_call_lck_attr; | |
71 | lck_grp_attr_t timer_call_lck_grp_attr; | |
72 | ||
39236c6e A |
73 | lck_grp_t timer_longterm_lck_grp; |
74 | lck_attr_t timer_longterm_lck_attr; | |
75 | lck_grp_attr_t timer_longterm_lck_grp_attr; | |
76 | ||
3e170ce0 A |
77 | /* Timer queue lock must be acquired with interrupts disabled (under splclock()) */ |
78 | #if __SMP__ | |
39236c6e | 79 | #define timer_queue_lock_spin(queue) \ |
6d2010ae A |
80 | lck_mtx_lock_spin_always(&queue->lock_data) |
81 | ||
39236c6e | 82 | #define timer_queue_unlock(queue) \ |
6d2010ae | 83 | lck_mtx_unlock_always(&queue->lock_data) |
3e170ce0 A |
84 | #else |
85 | #define timer_queue_lock_spin(queue) (void)1 | |
86 | #define timer_queue_unlock(queue) (void)1 | |
87 | #endif | |
6d2010ae A |
88 | |
89 | #define QUEUE(x) ((queue_t)(x)) | |
90 | #define MPQUEUE(x) ((mpqueue_head_t *)(x)) | |
91 | #define TIMER_CALL(x) ((timer_call_t)(x)) | |
fe8ab488 | 92 | #define TCE(x) (&(x->call_entry)) |
39236c6e A |
93 | /* |
94 | * The longterm timer object is a global structure holding all timers | |
95 | * beyond the short-term, local timer queue threshold. The boot processor | |
96 | * is responsible for moving each timer to its local timer queue | |
97 | * if and when that timer becomes due within the threshold. | |
98 | */ | |
99 | #define TIMER_LONGTERM_NONE EndOfAllTime | |
100 | #if defined(__x86_64__) | |
101 | #define TIMER_LONGTERM_THRESHOLD (1ULL * NSEC_PER_SEC) | |
102 | #else | |
103 | #define TIMER_LONGTERM_THRESHOLD TIMER_LONGTERM_NONE | |
104 | #endif | |
105 | ||
106 | typedef struct { | |
107 | uint64_t interval; /* longterm timer interval */ | |
108 | uint64_t margin; /* fudge factor (10% of interval */ | |
109 | uint64_t deadline; /* first/soonest longterm deadline */ | |
110 | uint64_t preempted; /* sooner timer has pre-empted */ | |
111 | timer_call_t call; /* first/soonest longterm timer call */ | |
112 | uint64_t deadline_set; /* next timer set */ | |
113 | timer_call_data_t timer; /* timer used by threshold management */ | |
114 | /* Stats: */ | |
115 | uint64_t scans; /* num threshold timer scans */ | |
116 | uint64_t preempts; /* num threshold reductions */ | |
117 | uint64_t latency; /* average threshold latency */ | |
118 | uint64_t latency_min; /* minimum threshold latency */ | |
119 | uint64_t latency_max; /* maximum threshold latency */ | |
120 | } threshold_t; | |
121 | ||
122 | typedef struct { | |
123 | mpqueue_head_t queue; /* longterm timer list */ | |
124 | uint64_t enqueues; /* num timers queued */ | |
125 | uint64_t dequeues; /* num timers dequeued */ | |
126 | uint64_t escalates; /* num timers becoming shortterm */ | |
127 | uint64_t scan_time; /* last time the list was scanned */ | |
128 | threshold_t threshold; /* longterm timer threshold */ | |
129 | } timer_longterm_t; | |
130 | ||
131 | timer_longterm_t timer_longterm; | |
132 | ||
133 | static mpqueue_head_t *timer_longterm_queue = NULL; | |
134 | ||
135 | static void timer_longterm_init(void); | |
136 | static void timer_longterm_callout( | |
137 | timer_call_param_t p0, | |
138 | timer_call_param_t p1); | |
139 | extern void timer_longterm_scan( | |
140 | timer_longterm_t *tlp, | |
141 | uint64_t now); | |
142 | static void timer_longterm_update( | |
143 | timer_longterm_t *tlp); | |
144 | static void timer_longterm_update_locked( | |
145 | timer_longterm_t *tlp); | |
146 | static mpqueue_head_t * timer_longterm_enqueue_unlocked( | |
147 | timer_call_t call, | |
148 | uint64_t now, | |
149 | uint64_t deadline, | |
fe8ab488 A |
150 | mpqueue_head_t ** old_queue, |
151 | uint64_t soft_deadline, | |
152 | uint64_t ttd, | |
153 | timer_call_param_t param1, | |
154 | uint32_t callout_flags); | |
39236c6e A |
155 | static void timer_longterm_dequeued_locked( |
156 | timer_call_t call); | |
316670eb A |
157 | |
158 | uint64_t past_deadline_timers; | |
159 | uint64_t past_deadline_deltas; | |
160 | uint64_t past_deadline_longest; | |
161 | uint64_t past_deadline_shortest = ~0ULL; | |
162 | enum {PAST_DEADLINE_TIMER_ADJUSTMENT_NS = 10 * 1000}; | |
163 | ||
164 | uint64_t past_deadline_timer_adjustment; | |
165 | ||
39236c6e | 166 | static boolean_t timer_call_enter_internal(timer_call_t call, timer_call_param_t param1, uint64_t deadline, uint64_t leeway, uint32_t flags, boolean_t ratelimited); |
6d2010ae A |
167 | boolean_t mach_timer_coalescing_enabled = TRUE; |
168 | ||
169 | mpqueue_head_t *timer_call_enqueue_deadline_unlocked( | |
170 | timer_call_t call, | |
171 | mpqueue_head_t *queue, | |
fe8ab488 A |
172 | uint64_t deadline, |
173 | uint64_t soft_deadline, | |
174 | uint64_t ttd, | |
175 | timer_call_param_t param1, | |
176 | uint32_t flags); | |
6d2010ae A |
177 | |
178 | mpqueue_head_t *timer_call_dequeue_unlocked( | |
179 | timer_call_t call); | |
180 | ||
fe8ab488 A |
181 | timer_coalescing_priority_params_t tcoal_prio_params; |
182 | ||
183 | #if TCOAL_PRIO_STATS | |
184 | int32_t nc_tcl, rt_tcl, bg_tcl, kt_tcl, fp_tcl, ts_tcl, qos_tcl; | |
185 | #define TCOAL_PRIO_STAT(x) (x++) | |
186 | #else | |
187 | #define TCOAL_PRIO_STAT(x) | |
188 | #endif | |
189 | ||
190 | static void | |
191 | timer_call_init_abstime(void) | |
192 | { | |
193 | int i; | |
194 | uint64_t result; | |
195 | timer_coalescing_priority_params_ns_t * tcoal_prio_params_init = timer_call_get_priority_params(); | |
196 | nanoseconds_to_absolutetime(PAST_DEADLINE_TIMER_ADJUSTMENT_NS, &past_deadline_timer_adjustment); | |
197 | nanoseconds_to_absolutetime(tcoal_prio_params_init->idle_entry_timer_processing_hdeadline_threshold_ns, &result); | |
198 | tcoal_prio_params.idle_entry_timer_processing_hdeadline_threshold_abstime = (uint32_t)result; | |
199 | nanoseconds_to_absolutetime(tcoal_prio_params_init->interrupt_timer_coalescing_ilat_threshold_ns, &result); | |
200 | tcoal_prio_params.interrupt_timer_coalescing_ilat_threshold_abstime = (uint32_t)result; | |
201 | nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_resort_threshold_ns, &result); | |
202 | tcoal_prio_params.timer_resort_threshold_abstime = (uint32_t)result; | |
203 | tcoal_prio_params.timer_coalesce_rt_shift = tcoal_prio_params_init->timer_coalesce_rt_shift; | |
204 | tcoal_prio_params.timer_coalesce_bg_shift = tcoal_prio_params_init->timer_coalesce_bg_shift; | |
205 | tcoal_prio_params.timer_coalesce_kt_shift = tcoal_prio_params_init->timer_coalesce_kt_shift; | |
206 | tcoal_prio_params.timer_coalesce_fp_shift = tcoal_prio_params_init->timer_coalesce_fp_shift; | |
207 | tcoal_prio_params.timer_coalesce_ts_shift = tcoal_prio_params_init->timer_coalesce_ts_shift; | |
208 | ||
209 | nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_rt_ns_max, | |
210 | &tcoal_prio_params.timer_coalesce_rt_abstime_max); | |
211 | nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_bg_ns_max, | |
212 | &tcoal_prio_params.timer_coalesce_bg_abstime_max); | |
213 | nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_kt_ns_max, | |
214 | &tcoal_prio_params.timer_coalesce_kt_abstime_max); | |
215 | nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_fp_ns_max, | |
216 | &tcoal_prio_params.timer_coalesce_fp_abstime_max); | |
217 | nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_ts_ns_max, | |
218 | &tcoal_prio_params.timer_coalesce_ts_abstime_max); | |
219 | ||
220 | for (i = 0; i < NUM_LATENCY_QOS_TIERS; i++) { | |
221 | tcoal_prio_params.latency_qos_scale[i] = tcoal_prio_params_init->latency_qos_scale[i]; | |
222 | nanoseconds_to_absolutetime(tcoal_prio_params_init->latency_qos_ns_max[i], | |
223 | &tcoal_prio_params.latency_qos_abstime_max[i]); | |
224 | tcoal_prio_params.latency_tier_rate_limited[i] = tcoal_prio_params_init->latency_tier_rate_limited[i]; | |
225 | } | |
226 | } | |
227 | ||
1c79356b A |
228 | |
229 | void | |
39236c6e | 230 | timer_call_init(void) |
1c79356b | 231 | { |
6d2010ae A |
232 | lck_attr_setdefault(&timer_call_lck_attr); |
233 | lck_grp_attr_setdefault(&timer_call_lck_grp_attr); | |
234 | lck_grp_init(&timer_call_lck_grp, "timer_call", &timer_call_lck_grp_attr); | |
39236c6e A |
235 | |
236 | timer_longterm_init(); | |
fe8ab488 | 237 | timer_call_init_abstime(); |
1c79356b A |
238 | } |
239 | ||
6d2010ae A |
240 | |
241 | void | |
39236c6e | 242 | timer_call_queue_init(mpqueue_head_t *queue) |
6d2010ae | 243 | { |
39236c6e | 244 | DBG("timer_call_queue_init(%p)\n", queue); |
6d2010ae A |
245 | mpqueue_init(queue, &timer_call_lck_grp, &timer_call_lck_attr); |
246 | } | |
247 | ||
248 | ||
1c79356b A |
249 | void |
250 | timer_call_setup( | |
251 | timer_call_t call, | |
252 | timer_call_func_t func, | |
253 | timer_call_param_t param0) | |
254 | { | |
6d2010ae | 255 | DBG("timer_call_setup(%p,%p,%p)\n", call, func, param0); |
fe8ab488 | 256 | call_entry_setup(TCE(call), func, param0); |
6d2010ae A |
257 | simple_lock_init(&(call)->lock, 0); |
258 | call->async_dequeue = FALSE; | |
1c79356b | 259 | } |
6d2010ae A |
260 | #if TIMER_ASSERT |
261 | static __inline__ mpqueue_head_t * | |
262 | timer_call_entry_dequeue( | |
263 | timer_call_t entry) | |
264 | { | |
fe8ab488 | 265 | mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); |
6d2010ae A |
266 | |
267 | if (!hw_lock_held((hw_lock_t)&entry->lock)) | |
268 | panic("_call_entry_dequeue() " | |
269 | "entry %p is not locked\n", entry); | |
270 | /* | |
271 | * XXX The queue lock is actually a mutex in spin mode | |
272 | * but there's no way to test for it being held | |
273 | * so we pretend it's a spinlock! | |
274 | */ | |
275 | if (!hw_lock_held((hw_lock_t)&old_queue->lock_data)) | |
276 | panic("_call_entry_dequeue() " | |
277 | "queue %p is not locked\n", old_queue); | |
278 | ||
fe8ab488 | 279 | call_entry_dequeue(TCE(entry)); |
39236c6e | 280 | old_queue->count--; |
c910b4d9 | 281 | |
6d2010ae A |
282 | return (old_queue); |
283 | } | |
1c79356b | 284 | |
6d2010ae A |
285 | static __inline__ mpqueue_head_t * |
286 | timer_call_entry_enqueue_deadline( | |
287 | timer_call_t entry, | |
288 | mpqueue_head_t *queue, | |
289 | uint64_t deadline) | |
290 | { | |
fe8ab488 | 291 | mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); |
1c79356b | 292 | |
6d2010ae A |
293 | if (!hw_lock_held((hw_lock_t)&entry->lock)) |
294 | panic("_call_entry_enqueue_deadline() " | |
295 | "entry %p is not locked\n", entry); | |
296 | /* XXX More lock pretense: */ | |
297 | if (!hw_lock_held((hw_lock_t)&queue->lock_data)) | |
298 | panic("_call_entry_enqueue_deadline() " | |
299 | "queue %p is not locked\n", queue); | |
300 | if (old_queue != NULL && old_queue != queue) | |
301 | panic("_call_entry_enqueue_deadline() " | |
302 | "old_queue %p != queue", old_queue); | |
1c79356b | 303 | |
fe8ab488 | 304 | call_entry_enqueue_deadline(TCE(entry), QUEUE(queue), deadline); |
1c79356b | 305 | |
39236c6e A |
306 | /* For efficiency, track the earliest soft deadline on the queue, so that |
307 | * fuzzy decisions can be made without lock acquisitions. | |
308 | */ | |
fe8ab488 A |
309 | timer_call_t thead = (timer_call_t)queue_first(&queue->head); |
310 | ||
311 | queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline; | |
39236c6e A |
312 | |
313 | if (old_queue) | |
314 | old_queue->count--; | |
315 | queue->count++; | |
316 | ||
6d2010ae A |
317 | return (old_queue); |
318 | } | |
1c79356b | 319 | |
6d2010ae | 320 | #else |
1c79356b | 321 | |
6d2010ae A |
322 | static __inline__ mpqueue_head_t * |
323 | timer_call_entry_dequeue( | |
324 | timer_call_t entry) | |
325 | { | |
fe8ab488 | 326 | mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); |
39236c6e | 327 | |
fe8ab488 | 328 | call_entry_dequeue(TCE(entry)); |
39236c6e A |
329 | old_queue->count--; |
330 | ||
331 | return old_queue; | |
6d2010ae | 332 | } |
c910b4d9 | 333 | |
6d2010ae A |
334 | static __inline__ mpqueue_head_t * |
335 | timer_call_entry_enqueue_deadline( | |
336 | timer_call_t entry, | |
337 | mpqueue_head_t *queue, | |
338 | uint64_t deadline) | |
339 | { | |
fe8ab488 | 340 | mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); |
39236c6e | 341 | |
fe8ab488 | 342 | call_entry_enqueue_deadline(TCE(entry), QUEUE(queue), deadline); |
39236c6e A |
343 | |
344 | /* For efficiency, track the earliest soft deadline on the queue, | |
345 | * so that fuzzy decisions can be made without lock acquisitions. | |
346 | */ | |
fe8ab488 A |
347 | |
348 | timer_call_t thead = (timer_call_t)queue_first(&queue->head); | |
349 | queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline; | |
39236c6e A |
350 | |
351 | if (old_queue) | |
352 | old_queue->count--; | |
353 | queue->count++; | |
354 | ||
355 | return old_queue; | |
1c79356b A |
356 | } |
357 | ||
6d2010ae A |
358 | #endif |
359 | ||
39236c6e A |
360 | static __inline__ void |
361 | timer_call_entry_enqueue_tail( | |
362 | timer_call_t entry, | |
363 | mpqueue_head_t *queue) | |
364 | { | |
fe8ab488 | 365 | call_entry_enqueue_tail(TCE(entry), QUEUE(queue)); |
39236c6e A |
366 | queue->count++; |
367 | return; | |
368 | } | |
369 | ||
370 | /* | |
371 | * Remove timer entry from its queue but don't change the queue pointer | |
372 | * and set the async_dequeue flag. This is locking case 2b. | |
373 | */ | |
374 | static __inline__ void | |
375 | timer_call_entry_dequeue_async( | |
376 | timer_call_t entry) | |
377 | { | |
fe8ab488 | 378 | mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); |
39236c6e A |
379 | if (old_queue) { |
380 | old_queue->count--; | |
381 | (void) remque(qe(entry)); | |
382 | entry->async_dequeue = TRUE; | |
383 | } | |
384 | return; | |
385 | } | |
386 | ||
6d2010ae A |
387 | #if TIMER_ASSERT |
388 | unsigned timer_call_enqueue_deadline_unlocked_async1; | |
389 | unsigned timer_call_enqueue_deadline_unlocked_async2; | |
390 | #endif | |
391 | /* | |
392 | * Assumes call_entry and queues unlocked, interrupts disabled. | |
393 | */ | |
394 | __inline__ mpqueue_head_t * | |
395 | timer_call_enqueue_deadline_unlocked( | |
396 | timer_call_t call, | |
397 | mpqueue_head_t *queue, | |
fe8ab488 A |
398 | uint64_t deadline, |
399 | uint64_t soft_deadline, | |
400 | uint64_t ttd, | |
401 | timer_call_param_t param1, | |
402 | uint32_t callout_flags) | |
1c79356b | 403 | { |
fe8ab488 | 404 | call_entry_t entry = TCE(call); |
6d2010ae | 405 | mpqueue_head_t *old_queue; |
1c79356b | 406 | |
6d2010ae | 407 | DBG("timer_call_enqueue_deadline_unlocked(%p,%p,)\n", call, queue); |
1c79356b | 408 | |
6d2010ae | 409 | simple_lock(&call->lock); |
fe8ab488 | 410 | |
6d2010ae | 411 | old_queue = MPQUEUE(entry->queue); |
fe8ab488 | 412 | |
6d2010ae | 413 | if (old_queue != NULL) { |
39236c6e | 414 | timer_queue_lock_spin(old_queue); |
6d2010ae | 415 | if (call->async_dequeue) { |
39236c6e | 416 | /* collision (1c): timer already dequeued, clear flag */ |
6d2010ae | 417 | #if TIMER_ASSERT |
39236c6e A |
418 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, |
419 | DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, | |
4bd07ac2 | 420 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
39236c6e | 421 | call->async_dequeue, |
4bd07ac2 | 422 | VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue), |
39236c6e | 423 | 0x1c, 0); |
6d2010ae A |
424 | timer_call_enqueue_deadline_unlocked_async1++; |
425 | #endif | |
39236c6e | 426 | call->async_dequeue = FALSE; |
6d2010ae | 427 | entry->queue = NULL; |
39236c6e A |
428 | } else if (old_queue != queue) { |
429 | timer_call_entry_dequeue(call); | |
6d2010ae A |
430 | #if TIMER_ASSERT |
431 | timer_call_enqueue_deadline_unlocked_async2++; | |
432 | #endif | |
433 | } | |
39236c6e A |
434 | if (old_queue == timer_longterm_queue) |
435 | timer_longterm_dequeued_locked(call); | |
6d2010ae | 436 | if (old_queue != queue) { |
39236c6e A |
437 | timer_queue_unlock(old_queue); |
438 | timer_queue_lock_spin(queue); | |
6d2010ae A |
439 | } |
440 | } else { | |
39236c6e | 441 | timer_queue_lock_spin(queue); |
6d2010ae | 442 | } |
1c79356b | 443 | |
fe8ab488 A |
444 | call->soft_deadline = soft_deadline; |
445 | call->flags = callout_flags; | |
446 | TCE(call)->param1 = param1; | |
447 | call->ttd = ttd; | |
448 | ||
6d2010ae | 449 | timer_call_entry_enqueue_deadline(call, queue, deadline); |
39236c6e | 450 | timer_queue_unlock(queue); |
6d2010ae | 451 | simple_unlock(&call->lock); |
1c79356b | 452 | |
c910b4d9 A |
453 | return (old_queue); |
454 | } | |
1c79356b | 455 | |
6d2010ae A |
456 | #if TIMER_ASSERT |
457 | unsigned timer_call_dequeue_unlocked_async1; | |
458 | unsigned timer_call_dequeue_unlocked_async2; | |
459 | #endif | |
460 | mpqueue_head_t * | |
461 | timer_call_dequeue_unlocked( | |
462 | timer_call_t call) | |
c910b4d9 | 463 | { |
fe8ab488 | 464 | call_entry_t entry = TCE(call); |
6d2010ae | 465 | mpqueue_head_t *old_queue; |
1c79356b | 466 | |
6d2010ae | 467 | DBG("timer_call_dequeue_unlocked(%p)\n", call); |
1c79356b | 468 | |
6d2010ae A |
469 | simple_lock(&call->lock); |
470 | old_queue = MPQUEUE(entry->queue); | |
39236c6e A |
471 | #if TIMER_ASSERT |
472 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
473 | DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, | |
4bd07ac2 | 474 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
39236c6e | 475 | call->async_dequeue, |
4bd07ac2 | 476 | VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue), |
39236c6e A |
477 | 0, 0); |
478 | #endif | |
6d2010ae | 479 | if (old_queue != NULL) { |
39236c6e | 480 | timer_queue_lock_spin(old_queue); |
6d2010ae | 481 | if (call->async_dequeue) { |
39236c6e | 482 | /* collision (1c): timer already dequeued, clear flag */ |
6d2010ae | 483 | #if TIMER_ASSERT |
39236c6e A |
484 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, |
485 | DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, | |
4bd07ac2 | 486 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
39236c6e | 487 | call->async_dequeue, |
4bd07ac2 | 488 | VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue), |
39236c6e | 489 | 0x1c, 0); |
6d2010ae A |
490 | timer_call_dequeue_unlocked_async1++; |
491 | #endif | |
39236c6e A |
492 | call->async_dequeue = FALSE; |
493 | entry->queue = NULL; | |
6d2010ae | 494 | } else { |
39236c6e | 495 | timer_call_entry_dequeue(call); |
6d2010ae | 496 | } |
39236c6e A |
497 | if (old_queue == timer_longterm_queue) |
498 | timer_longterm_dequeued_locked(call); | |
499 | timer_queue_unlock(old_queue); | |
6d2010ae A |
500 | } |
501 | simple_unlock(&call->lock); | |
c910b4d9 | 502 | return (old_queue); |
1c79356b A |
503 | } |
504 | ||
fe8ab488 A |
505 | |
506 | /* | |
507 | * Timer call entry locking model | |
508 | * ============================== | |
509 | * | |
510 | * Timer call entries are linked on per-cpu timer queues which are protected | |
511 | * by the queue lock and the call entry lock. The locking protocol is: | |
512 | * | |
513 | * 0) The canonical locking order is timer call entry followed by queue. | |
514 | * | |
515 | * 1) With only the entry lock held, entry.queue is valid: | |
516 | * 1a) NULL: the entry is not queued, or | |
517 | * 1b) non-NULL: this queue must be locked before the entry is modified. | |
518 | * After locking the queue, the call.async_dequeue flag must be checked: | |
519 | * 1c) TRUE: the entry was removed from the queue by another thread | |
520 | * and we must NULL the entry.queue and reset this flag, or | |
521 | * 1d) FALSE: (ie. queued), the entry can be manipulated. | |
522 | * | |
523 | * 2) If a queue lock is obtained first, the queue is stable: | |
524 | * 2a) If a try-lock of a queued entry succeeds, the call can be operated on | |
525 | * and dequeued. | |
526 | * 2b) If a try-lock fails, it indicates that another thread is attempting | |
527 | * to change the entry and move it to a different position in this queue | |
528 | * or to different queue. The entry can be dequeued but it should not be | |
529 | * operated upon since it is being changed. Furthermore, we don't null | |
530 | * the entry.queue pointer (protected by the entry lock we don't own). | |
531 | * Instead, we set the async_dequeue flag -- see (1c). | |
532 | * 2c) Same as 2b but occurring when a longterm timer is matured. | |
533 | * 3) A callout's parameters (deadline, flags, parameters, soft deadline &c.) | |
534 | * should be manipulated with the appropriate timer queue lock held, | |
535 | * to prevent queue traversal observations from observing inconsistent | |
536 | * updates to an in-flight callout. | |
537 | */ | |
538 | ||
539 | /* | |
540 | * Inlines timer_call_entry_dequeue() and timer_call_entry_enqueue_deadline() | |
541 | * cast between pointer types (mpqueue_head_t *) and (queue_t) so that | |
542 | * we can use the call_entry_dequeue() and call_entry_enqueue_deadline() | |
543 | * methods to operate on timer_call structs as if they are call_entry structs. | |
544 | * These structures are identical except for their queue head pointer fields. | |
545 | * | |
546 | * In the debug case, we assert that the timer call locking protocol | |
547 | * is being obeyed. | |
548 | */ | |
549 | ||
6d2010ae A |
550 | static boolean_t |
551 | timer_call_enter_internal( | |
552 | timer_call_t call, | |
553 | timer_call_param_t param1, | |
554 | uint64_t deadline, | |
39236c6e A |
555 | uint64_t leeway, |
556 | uint32_t flags, | |
557 | boolean_t ratelimited) | |
1c79356b | 558 | { |
39236c6e | 559 | mpqueue_head_t *queue = NULL; |
6d2010ae | 560 | mpqueue_head_t *old_queue; |
1c79356b | 561 | spl_t s; |
39236c6e A |
562 | uint64_t slop; |
563 | uint32_t urgency; | |
fe8ab488 | 564 | uint64_t sdeadline, ttd; |
1c79356b A |
565 | |
566 | s = splclock(); | |
6d2010ae | 567 | |
fe8ab488 | 568 | sdeadline = deadline; |
39236c6e A |
569 | uint64_t ctime = mach_absolute_time(); |
570 | ||
571 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
572 | DECR_TIMER_ENTER | DBG_FUNC_START, | |
4bd07ac2 A |
573 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
574 | VM_KERNEL_UNSLIDE_OR_PERM(param1), deadline, flags, 0); | |
39236c6e A |
575 | |
576 | urgency = (flags & TIMER_CALL_URGENCY_MASK); | |
577 | ||
578 | boolean_t slop_ratelimited = FALSE; | |
579 | slop = timer_call_slop(deadline, ctime, urgency, current_thread(), &slop_ratelimited); | |
580 | ||
581 | if ((flags & TIMER_CALL_LEEWAY) != 0 && leeway > slop) | |
582 | slop = leeway; | |
583 | ||
584 | if (UINT64_MAX - deadline <= slop) { | |
585 | deadline = UINT64_MAX; | |
586 | } else { | |
6d2010ae A |
587 | deadline += slop; |
588 | } | |
1c79356b | 589 | |
316670eb A |
590 | if (__improbable(deadline < ctime)) { |
591 | uint64_t delta = (ctime - deadline); | |
592 | ||
593 | past_deadline_timers++; | |
594 | past_deadline_deltas += delta; | |
595 | if (delta > past_deadline_longest) | |
596 | past_deadline_longest = deadline; | |
597 | if (delta < past_deadline_shortest) | |
598 | past_deadline_shortest = delta; | |
599 | ||
600 | deadline = ctime + past_deadline_timer_adjustment; | |
fe8ab488 | 601 | sdeadline = deadline; |
316670eb | 602 | } |
39236c6e | 603 | |
39236c6e | 604 | if (ratelimited || slop_ratelimited) { |
fe8ab488 | 605 | flags |= TIMER_CALL_RATELIMITED; |
39236c6e | 606 | } else { |
fe8ab488 | 607 | flags &= ~TIMER_CALL_RATELIMITED; |
39236c6e A |
608 | } |
609 | ||
fe8ab488 | 610 | ttd = sdeadline - ctime; |
4b17d6b6 | 611 | #if CONFIG_DTRACE |
fe8ab488 A |
612 | DTRACE_TMR7(callout__create, timer_call_func_t, TCE(call)->func, |
613 | timer_call_param_t, TCE(call)->param0, uint32_t, flags, | |
614 | (deadline - sdeadline), | |
615 | (ttd >> 32), (unsigned) (ttd & 0xFFFFFFFF), call); | |
4b17d6b6 A |
616 | #endif |
617 | ||
fe8ab488 A |
618 | /* Program timer callout parameters under the appropriate per-CPU or |
619 | * longterm queue lock. The callout may have been previously enqueued | |
620 | * and in-flight on this or another timer queue. | |
621 | */ | |
39236c6e | 622 | if (!ratelimited && !slop_ratelimited) { |
fe8ab488 | 623 | queue = timer_longterm_enqueue_unlocked(call, ctime, deadline, &old_queue, sdeadline, ttd, param1, flags); |
39236c6e | 624 | } |
1c79356b | 625 | |
39236c6e A |
626 | if (queue == NULL) { |
627 | queue = timer_queue_assign(deadline); | |
fe8ab488 | 628 | old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline, sdeadline, ttd, param1, flags); |
39236c6e | 629 | } |
1c79356b | 630 | |
39236c6e | 631 | #if TIMER_TRACE |
fe8ab488 | 632 | TCE(call)->entry_time = ctime; |
39236c6e A |
633 | #endif |
634 | ||
635 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
636 | DECR_TIMER_ENTER | DBG_FUNC_END, | |
4bd07ac2 | 637 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
fe8ab488 | 638 | (old_queue != NULL), deadline, queue->count, 0); |
1c79356b | 639 | |
1c79356b A |
640 | splx(s); |
641 | ||
c910b4d9 | 642 | return (old_queue != NULL); |
1c79356b A |
643 | } |
644 | ||
39236c6e A |
645 | /* |
646 | * timer_call_*() | |
647 | * return boolean indicating whether the call was previously queued. | |
648 | */ | |
6d2010ae A |
649 | boolean_t |
650 | timer_call_enter( | |
651 | timer_call_t call, | |
652 | uint64_t deadline, | |
653 | uint32_t flags) | |
654 | { | |
39236c6e | 655 | return timer_call_enter_internal(call, NULL, deadline, 0, flags, FALSE); |
6d2010ae A |
656 | } |
657 | ||
1c79356b | 658 | boolean_t |
c910b4d9 A |
659 | timer_call_enter1( |
660 | timer_call_t call, | |
661 | timer_call_param_t param1, | |
6d2010ae A |
662 | uint64_t deadline, |
663 | uint32_t flags) | |
1c79356b | 664 | { |
39236c6e A |
665 | return timer_call_enter_internal(call, param1, deadline, 0, flags, FALSE); |
666 | } | |
667 | ||
668 | boolean_t | |
669 | timer_call_enter_with_leeway( | |
670 | timer_call_t call, | |
671 | timer_call_param_t param1, | |
672 | uint64_t deadline, | |
673 | uint64_t leeway, | |
674 | uint32_t flags, | |
675 | boolean_t ratelimited) | |
676 | { | |
677 | return timer_call_enter_internal(call, param1, deadline, leeway, flags, ratelimited); | |
1c79356b A |
678 | } |
679 | ||
680 | boolean_t | |
c910b4d9 A |
681 | timer_call_cancel( |
682 | timer_call_t call) | |
1c79356b | 683 | { |
6d2010ae | 684 | mpqueue_head_t *old_queue; |
1c79356b A |
685 | spl_t s; |
686 | ||
687 | s = splclock(); | |
1c79356b | 688 | |
39236c6e A |
689 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, |
690 | DECR_TIMER_CANCEL | DBG_FUNC_START, | |
4bd07ac2 | 691 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
fe8ab488 | 692 | TCE(call)->deadline, call->soft_deadline, call->flags, 0); |
39236c6e | 693 | |
6d2010ae | 694 | old_queue = timer_call_dequeue_unlocked(call); |
c910b4d9 A |
695 | |
696 | if (old_queue != NULL) { | |
39236c6e A |
697 | timer_queue_lock_spin(old_queue); |
698 | if (!queue_empty(&old_queue->head)) { | |
fe8ab488 A |
699 | timer_queue_cancel(old_queue, TCE(call)->deadline, CE(queue_first(&old_queue->head))->deadline); |
700 | timer_call_t thead = (timer_call_t)queue_first(&old_queue->head); | |
701 | old_queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline; | |
39236c6e A |
702 | } |
703 | else { | |
fe8ab488 | 704 | timer_queue_cancel(old_queue, TCE(call)->deadline, UINT64_MAX); |
39236c6e A |
705 | old_queue->earliest_soft_deadline = UINT64_MAX; |
706 | } | |
707 | timer_queue_unlock(old_queue); | |
1c79356b | 708 | } |
39236c6e A |
709 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, |
710 | DECR_TIMER_CANCEL | DBG_FUNC_END, | |
4bd07ac2 A |
711 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
712 | VM_KERNEL_UNSLIDE_OR_PERM(old_queue), | |
fe8ab488 A |
713 | TCE(call)->deadline - mach_absolute_time(), |
714 | TCE(call)->deadline - TCE(call)->entry_time, 0); | |
1c79356b A |
715 | splx(s); |
716 | ||
4b17d6b6 | 717 | #if CONFIG_DTRACE |
fe8ab488 A |
718 | DTRACE_TMR6(callout__cancel, timer_call_func_t, TCE(call)->func, |
719 | timer_call_param_t, TCE(call)->param0, uint32_t, call->flags, 0, | |
4b17d6b6 A |
720 | (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF)); |
721 | #endif | |
722 | ||
c910b4d9 | 723 | return (old_queue != NULL); |
1c79356b A |
724 | } |
725 | ||
fe8ab488 A |
726 | static uint32_t timer_queue_shutdown_lock_skips; |
727 | static uint32_t timer_queue_shutdown_discarded; | |
728 | ||
9bccf70c | 729 | void |
c910b4d9 | 730 | timer_queue_shutdown( |
6d2010ae | 731 | mpqueue_head_t *queue) |
9bccf70c | 732 | { |
6d2010ae A |
733 | timer_call_t call; |
734 | mpqueue_head_t *new_queue; | |
c910b4d9 | 735 | spl_t s; |
9bccf70c | 736 | |
fe8ab488 | 737 | |
6d2010ae A |
738 | DBG("timer_queue_shutdown(%p)\n", queue); |
739 | ||
c910b4d9 | 740 | s = splclock(); |
9bccf70c | 741 | |
6d2010ae | 742 | /* Note comma operator in while expression re-locking each iteration */ |
39236c6e | 743 | while (timer_queue_lock_spin(queue), !queue_empty(&queue->head)) { |
6d2010ae | 744 | call = TIMER_CALL(queue_first(&queue->head)); |
fe8ab488 | 745 | |
6d2010ae A |
746 | if (!simple_lock_try(&call->lock)) { |
747 | /* | |
748 | * case (2b) lock order inversion, dequeue and skip | |
749 | * Don't change the call_entry queue back-pointer | |
750 | * but set the async_dequeue field. | |
751 | */ | |
752 | timer_queue_shutdown_lock_skips++; | |
39236c6e A |
753 | timer_call_entry_dequeue_async(call); |
754 | #if TIMER_ASSERT | |
755 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
756 | DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, | |
4bd07ac2 | 757 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
39236c6e | 758 | call->async_dequeue, |
4bd07ac2 | 759 | VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue), |
39236c6e A |
760 | 0x2b, 0); |
761 | #endif | |
762 | timer_queue_unlock(queue); | |
6d2010ae A |
763 | continue; |
764 | } | |
9bccf70c | 765 | |
fe8ab488 A |
766 | boolean_t call_local = ((call->flags & TIMER_CALL_LOCAL) != 0); |
767 | ||
6d2010ae A |
768 | /* remove entry from old queue */ |
769 | timer_call_entry_dequeue(call); | |
39236c6e | 770 | timer_queue_unlock(queue); |
9bccf70c | 771 | |
fe8ab488 A |
772 | if (call_local == FALSE) { |
773 | /* and queue it on new, discarding LOCAL timers */ | |
774 | new_queue = timer_queue_assign(TCE(call)->deadline); | |
775 | timer_queue_lock_spin(new_queue); | |
776 | timer_call_entry_enqueue_deadline( | |
777 | call, new_queue, TCE(call)->deadline); | |
778 | timer_queue_unlock(new_queue); | |
779 | } else { | |
780 | timer_queue_shutdown_discarded++; | |
781 | } | |
782 | ||
783 | /* The only lingering LOCAL timer should be this thread's | |
784 | * quantum expiration timer. | |
785 | */ | |
786 | assert((call_local == FALSE) || | |
787 | (TCE(call)->func == thread_quantum_expire)); | |
9bccf70c | 788 | |
6d2010ae | 789 | simple_unlock(&call->lock); |
9bccf70c A |
790 | } |
791 | ||
39236c6e | 792 | timer_queue_unlock(queue); |
c910b4d9 | 793 | splx(s); |
9bccf70c A |
794 | } |
795 | ||
fe8ab488 | 796 | static uint32_t timer_queue_expire_lock_skips; |
c910b4d9 | 797 | uint64_t |
39236c6e | 798 | timer_queue_expire_with_options( |
6d2010ae | 799 | mpqueue_head_t *queue, |
39236c6e A |
800 | uint64_t deadline, |
801 | boolean_t rescan) | |
1c79356b | 802 | { |
39236c6e A |
803 | timer_call_t call = NULL; |
804 | uint32_t tc_iterations = 0; | |
6d2010ae A |
805 | DBG("timer_queue_expire(%p,)\n", queue); |
806 | ||
39236c6e A |
807 | uint64_t cur_deadline = deadline; |
808 | timer_queue_lock_spin(queue); | |
1c79356b | 809 | |
6d2010ae | 810 | while (!queue_empty(&queue->head)) { |
39236c6e A |
811 | /* Upon processing one or more timer calls, refresh the |
812 | * deadline to account for time elapsed in the callout | |
813 | */ | |
814 | if (++tc_iterations > 1) | |
815 | cur_deadline = mach_absolute_time(); | |
816 | ||
817 | if (call == NULL) | |
818 | call = TIMER_CALL(queue_first(&queue->head)); | |
1c79356b | 819 | |
39236c6e | 820 | if (call->soft_deadline <= cur_deadline) { |
1c79356b A |
821 | timer_call_func_t func; |
822 | timer_call_param_t param0, param1; | |
823 | ||
39236c6e A |
824 | TCOAL_DEBUG(0xDDDD0000, queue->earliest_soft_deadline, call->soft_deadline, 0, 0, 0); |
825 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
826 | DECR_TIMER_EXPIRE | DBG_FUNC_NONE, | |
4bd07ac2 | 827 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
39236c6e | 828 | call->soft_deadline, |
fe8ab488 A |
829 | TCE(call)->deadline, |
830 | TCE(call)->entry_time, 0); | |
39236c6e | 831 | |
fe8ab488 A |
832 | if ((call->flags & TIMER_CALL_RATELIMITED) && |
833 | (TCE(call)->deadline > cur_deadline)) { | |
39236c6e A |
834 | if (rescan == FALSE) |
835 | break; | |
836 | } | |
837 | ||
6d2010ae A |
838 | if (!simple_lock_try(&call->lock)) { |
839 | /* case (2b) lock inversion, dequeue and skip */ | |
840 | timer_queue_expire_lock_skips++; | |
39236c6e A |
841 | timer_call_entry_dequeue_async(call); |
842 | call = NULL; | |
6d2010ae A |
843 | continue; |
844 | } | |
845 | ||
846 | timer_call_entry_dequeue(call); | |
1c79356b | 847 | |
fe8ab488 A |
848 | func = TCE(call)->func; |
849 | param0 = TCE(call)->param0; | |
850 | param1 = TCE(call)->param1; | |
1c79356b | 851 | |
6d2010ae | 852 | simple_unlock(&call->lock); |
39236c6e | 853 | timer_queue_unlock(queue); |
1c79356b | 854 | |
39236c6e | 855 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, |
316670eb | 856 | DECR_TIMER_CALLOUT | DBG_FUNC_START, |
4bd07ac2 A |
857 | VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func), |
858 | VM_KERNEL_UNSLIDE_OR_PERM(param0), | |
859 | VM_KERNEL_UNSLIDE_OR_PERM(param1), | |
860 | 0); | |
2d21ac55 | 861 | |
4b17d6b6 | 862 | #if CONFIG_DTRACE |
39236c6e | 863 | DTRACE_TMR7(callout__start, timer_call_func_t, func, |
4b17d6b6 A |
864 | timer_call_param_t, param0, unsigned, call->flags, |
865 | 0, (call->ttd >> 32), | |
39236c6e | 866 | (unsigned) (call->ttd & 0xFFFFFFFF), call); |
2d21ac55 | 867 | #endif |
4b17d6b6 A |
868 | /* Maintain time-to-deadline in per-processor data |
869 | * structure for thread wakeup deadline statistics. | |
870 | */ | |
871 | uint64_t *ttdp = &(PROCESSOR_DATA(current_processor(), timer_call_ttd)); | |
872 | *ttdp = call->ttd; | |
1c79356b | 873 | (*func)(param0, param1); |
4b17d6b6 | 874 | *ttdp = 0; |
4b17d6b6 | 875 | #if CONFIG_DTRACE |
39236c6e A |
876 | DTRACE_TMR4(callout__end, timer_call_func_t, func, |
877 | param0, param1, call); | |
2d21ac55 A |
878 | #endif |
879 | ||
39236c6e | 880 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, |
316670eb | 881 | DECR_TIMER_CALLOUT | DBG_FUNC_END, |
4bd07ac2 A |
882 | VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func), |
883 | VM_KERNEL_UNSLIDE_OR_PERM(param0), | |
884 | VM_KERNEL_UNSLIDE_OR_PERM(param1), | |
885 | 0); | |
39236c6e A |
886 | call = NULL; |
887 | timer_queue_lock_spin(queue); | |
888 | } else { | |
889 | if (__probable(rescan == FALSE)) { | |
890 | break; | |
891 | } else { | |
fe8ab488 A |
892 | int64_t skew = TCE(call)->deadline - call->soft_deadline; |
893 | assert(TCE(call)->deadline >= call->soft_deadline); | |
39236c6e A |
894 | |
895 | /* DRK: On a latency quality-of-service level change, | |
896 | * re-sort potentially rate-limited timers. The platform | |
897 | * layer determines which timers require | |
898 | * this. In the absence of the per-callout | |
899 | * synchronization requirement, a global resort could | |
900 | * be more efficient. The re-sort effectively | |
901 | * annuls all timer adjustments, i.e. the "soft | |
902 | * deadline" is the sort key. | |
903 | */ | |
904 | ||
905 | if (timer_resort_threshold(skew)) { | |
906 | if (__probable(simple_lock_try(&call->lock))) { | |
907 | timer_call_entry_dequeue(call); | |
908 | timer_call_entry_enqueue_deadline(call, queue, call->soft_deadline); | |
909 | simple_unlock(&call->lock); | |
910 | call = NULL; | |
911 | } | |
912 | } | |
913 | if (call) { | |
914 | call = TIMER_CALL(queue_next(qe(call))); | |
915 | if (queue_end(&queue->head, qe(call))) | |
916 | break; | |
917 | } | |
918 | } | |
c910b4d9 | 919 | } |
1c79356b A |
920 | } |
921 | ||
39236c6e A |
922 | if (!queue_empty(&queue->head)) { |
923 | call = TIMER_CALL(queue_first(&queue->head)); | |
fe8ab488 A |
924 | cur_deadline = TCE(call)->deadline; |
925 | queue->earliest_soft_deadline = (call->flags & TIMER_CALL_RATELIMITED) ? TCE(call)->deadline: call->soft_deadline; | |
39236c6e A |
926 | } else { |
927 | queue->earliest_soft_deadline = cur_deadline = UINT64_MAX; | |
928 | } | |
1c79356b | 929 | |
39236c6e | 930 | timer_queue_unlock(queue); |
c910b4d9 | 931 | |
39236c6e | 932 | return (cur_deadline); |
1c79356b | 933 | } |
6d2010ae | 934 | |
39236c6e A |
935 | uint64_t |
936 | timer_queue_expire( | |
937 | mpqueue_head_t *queue, | |
938 | uint64_t deadline) | |
939 | { | |
940 | return timer_queue_expire_with_options(queue, deadline, FALSE); | |
941 | } | |
6d2010ae A |
942 | |
943 | extern int serverperfmode; | |
fe8ab488 | 944 | static uint32_t timer_queue_migrate_lock_skips; |
6d2010ae | 945 | /* |
39236c6e | 946 | * timer_queue_migrate() is called by timer_queue_migrate_cpu() |
6d2010ae A |
947 | * to move timer requests from the local processor (queue_from) |
948 | * to a target processor's (queue_to). | |
949 | */ | |
950 | int | |
951 | timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to) | |
952 | { | |
953 | timer_call_t call; | |
954 | timer_call_t head_to; | |
955 | int timers_migrated = 0; | |
956 | ||
957 | DBG("timer_queue_migrate(%p,%p)\n", queue_from, queue_to); | |
958 | ||
959 | assert(!ml_get_interrupts_enabled()); | |
960 | assert(queue_from != queue_to); | |
961 | ||
962 | if (serverperfmode) { | |
963 | /* | |
964 | * if we're running a high end server | |
965 | * avoid migrations... they add latency | |
966 | * and don't save us power under typical | |
967 | * server workloads | |
968 | */ | |
969 | return -4; | |
970 | } | |
971 | ||
972 | /* | |
973 | * Take both local (from) and target (to) timer queue locks while | |
974 | * moving the timers from the local queue to the target processor. | |
975 | * We assume that the target is always the boot processor. | |
976 | * But only move if all of the following is true: | |
977 | * - the target queue is non-empty | |
978 | * - the local queue is non-empty | |
979 | * - the local queue's first deadline is later than the target's | |
980 | * - the local queue contains no non-migrateable "local" call | |
981 | * so that we need not have the target resync. | |
982 | */ | |
983 | ||
39236c6e | 984 | timer_queue_lock_spin(queue_to); |
6d2010ae A |
985 | |
986 | head_to = TIMER_CALL(queue_first(&queue_to->head)); | |
987 | if (queue_empty(&queue_to->head)) { | |
988 | timers_migrated = -1; | |
989 | goto abort1; | |
990 | } | |
991 | ||
39236c6e | 992 | timer_queue_lock_spin(queue_from); |
6d2010ae A |
993 | |
994 | if (queue_empty(&queue_from->head)) { | |
995 | timers_migrated = -2; | |
996 | goto abort2; | |
997 | } | |
998 | ||
999 | call = TIMER_CALL(queue_first(&queue_from->head)); | |
fe8ab488 | 1000 | if (TCE(call)->deadline < TCE(head_to)->deadline) { |
6d2010ae A |
1001 | timers_migrated = 0; |
1002 | goto abort2; | |
1003 | } | |
1004 | ||
1005 | /* perform scan for non-migratable timers */ | |
1006 | do { | |
1007 | if (call->flags & TIMER_CALL_LOCAL) { | |
1008 | timers_migrated = -3; | |
1009 | goto abort2; | |
1010 | } | |
1011 | call = TIMER_CALL(queue_next(qe(call))); | |
1012 | } while (!queue_end(&queue_from->head, qe(call))); | |
1013 | ||
1014 | /* migration loop itself -- both queues are locked */ | |
1015 | while (!queue_empty(&queue_from->head)) { | |
1016 | call = TIMER_CALL(queue_first(&queue_from->head)); | |
1017 | if (!simple_lock_try(&call->lock)) { | |
1018 | /* case (2b) lock order inversion, dequeue only */ | |
39236c6e A |
1019 | #ifdef TIMER_ASSERT |
1020 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1021 | DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, | |
4bd07ac2 A |
1022 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
1023 | VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue), | |
1024 | VM_KERNEL_UNSLIDE_OR_PERM(call->lock.interlock.lock_data), | |
39236c6e A |
1025 | 0x2b, 0); |
1026 | #endif | |
6d2010ae | 1027 | timer_queue_migrate_lock_skips++; |
39236c6e | 1028 | timer_call_entry_dequeue_async(call); |
6d2010ae A |
1029 | continue; |
1030 | } | |
1031 | timer_call_entry_dequeue(call); | |
1032 | timer_call_entry_enqueue_deadline( | |
fe8ab488 | 1033 | call, queue_to, TCE(call)->deadline); |
6d2010ae A |
1034 | timers_migrated++; |
1035 | simple_unlock(&call->lock); | |
1036 | } | |
39236c6e | 1037 | queue_from->earliest_soft_deadline = UINT64_MAX; |
6d2010ae | 1038 | abort2: |
39236c6e | 1039 | timer_queue_unlock(queue_from); |
6d2010ae | 1040 | abort1: |
39236c6e | 1041 | timer_queue_unlock(queue_to); |
6d2010ae A |
1042 | |
1043 | return timers_migrated; | |
1044 | } | |
39236c6e A |
1045 | |
1046 | void | |
1047 | timer_queue_trace_cpu(int ncpu) | |
1048 | { | |
1049 | timer_call_nosync_cpu( | |
1050 | ncpu, | |
1051 | (void(*)())timer_queue_trace, | |
1052 | (void*) timer_queue_cpu(ncpu)); | |
1053 | } | |
1054 | ||
1055 | void | |
1056 | timer_queue_trace( | |
1057 | mpqueue_head_t *queue) | |
1058 | { | |
1059 | timer_call_t call; | |
1060 | spl_t s; | |
1061 | ||
1062 | if (!kdebug_enable) | |
1063 | return; | |
1064 | ||
1065 | s = splclock(); | |
1066 | timer_queue_lock_spin(queue); | |
1067 | ||
1068 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1069 | DECR_TIMER_QUEUE | DBG_FUNC_START, | |
1070 | queue->count, mach_absolute_time(), 0, 0, 0); | |
1071 | ||
1072 | if (!queue_empty(&queue->head)) { | |
1073 | call = TIMER_CALL(queue_first(&queue->head)); | |
1074 | do { | |
1075 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1076 | DECR_TIMER_QUEUE | DBG_FUNC_NONE, | |
1077 | call->soft_deadline, | |
fe8ab488 A |
1078 | TCE(call)->deadline, |
1079 | TCE(call)->entry_time, | |
4bd07ac2 | 1080 | VM_KERNEL_UNSLIDE(TCE(call)->func), |
39236c6e A |
1081 | 0); |
1082 | call = TIMER_CALL(queue_next(qe(call))); | |
1083 | } while (!queue_end(&queue->head, qe(call))); | |
1084 | } | |
1085 | ||
1086 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1087 | DECR_TIMER_QUEUE | DBG_FUNC_END, | |
1088 | queue->count, mach_absolute_time(), 0, 0, 0); | |
1089 | ||
1090 | timer_queue_unlock(queue); | |
1091 | splx(s); | |
1092 | } | |
1093 | ||
1094 | void | |
1095 | timer_longterm_dequeued_locked(timer_call_t call) | |
1096 | { | |
1097 | timer_longterm_t *tlp = &timer_longterm; | |
1098 | ||
1099 | tlp->dequeues++; | |
1100 | if (call == tlp->threshold.call) | |
1101 | tlp->threshold.call = NULL; | |
1102 | } | |
1103 | ||
1104 | /* | |
1105 | * Place a timer call in the longterm list | |
1106 | * and adjust the next timer callout deadline if the new timer is first. | |
1107 | */ | |
1108 | mpqueue_head_t * | |
1109 | timer_longterm_enqueue_unlocked(timer_call_t call, | |
1110 | uint64_t now, | |
1111 | uint64_t deadline, | |
fe8ab488 A |
1112 | mpqueue_head_t **old_queue, |
1113 | uint64_t soft_deadline, | |
1114 | uint64_t ttd, | |
1115 | timer_call_param_t param1, | |
1116 | uint32_t callout_flags) | |
39236c6e A |
1117 | { |
1118 | timer_longterm_t *tlp = &timer_longterm; | |
1119 | boolean_t update_required = FALSE; | |
1120 | uint64_t longterm_threshold; | |
1121 | ||
1122 | longterm_threshold = now + tlp->threshold.interval; | |
1123 | ||
1124 | /* | |
1125 | * Return NULL without doing anything if: | |
1126 | * - this timer is local, or | |
1127 | * - the longterm mechanism is disabled, or | |
1128 | * - this deadline is too short. | |
1129 | */ | |
fe8ab488 | 1130 | if ((callout_flags & TIMER_CALL_LOCAL) != 0 || |
39236c6e | 1131 | (tlp->threshold.interval == TIMER_LONGTERM_NONE) || |
fe8ab488 | 1132 | (deadline <= longterm_threshold)) |
39236c6e A |
1133 | return NULL; |
1134 | ||
1135 | /* | |
1136 | * Remove timer from its current queue, if any. | |
1137 | */ | |
1138 | *old_queue = timer_call_dequeue_unlocked(call); | |
1139 | ||
1140 | /* | |
1141 | * Lock the longterm queue, queue timer and determine | |
1142 | * whether an update is necessary. | |
1143 | */ | |
1144 | assert(!ml_get_interrupts_enabled()); | |
1145 | simple_lock(&call->lock); | |
1146 | timer_queue_lock_spin(timer_longterm_queue); | |
fe8ab488 A |
1147 | TCE(call)->deadline = deadline; |
1148 | TCE(call)->param1 = param1; | |
1149 | call->ttd = ttd; | |
1150 | call->soft_deadline = soft_deadline; | |
1151 | call->flags = callout_flags; | |
39236c6e | 1152 | timer_call_entry_enqueue_tail(call, timer_longterm_queue); |
39236c6e A |
1153 | |
1154 | tlp->enqueues++; | |
1155 | ||
1156 | /* | |
1157 | * We'll need to update the currently set threshold timer | |
1158 | * if the new deadline is sooner and no sooner update is in flight. | |
1159 | */ | |
1160 | if (deadline < tlp->threshold.deadline && | |
1161 | deadline < tlp->threshold.preempted) { | |
1162 | tlp->threshold.preempted = deadline; | |
1163 | tlp->threshold.call = call; | |
1164 | update_required = TRUE; | |
1165 | } | |
1166 | timer_queue_unlock(timer_longterm_queue); | |
1167 | simple_unlock(&call->lock); | |
1168 | ||
1169 | if (update_required) { | |
fe8ab488 A |
1170 | /* |
1171 | * Note: this call expects that calling the master cpu | |
1172 | * alone does not involve locking the topo lock. | |
1173 | */ | |
39236c6e A |
1174 | timer_call_nosync_cpu( |
1175 | master_cpu, | |
1176 | (void (*)(void *)) timer_longterm_update, | |
1177 | (void *)tlp); | |
1178 | } | |
1179 | ||
1180 | return timer_longterm_queue; | |
1181 | } | |
1182 | ||
1183 | /* | |
1184 | * Scan for timers below the longterm threshold. | |
1185 | * Move these to the local timer queue (of the boot processor on which the | |
1186 | * calling thread is running). | |
1187 | * Both the local (boot) queue and the longterm queue are locked. | |
1188 | * The scan is similar to the timer migrate sequence but is performed by | |
1189 | * successively examining each timer on the longterm queue: | |
1190 | * - if within the short-term threshold | |
1191 | * - enter on the local queue (unless being deleted), | |
1192 | * - otherwise: | |
1193 | * - if sooner, deadline becomes the next threshold deadline. | |
1194 | */ | |
1195 | void | |
1196 | timer_longterm_scan(timer_longterm_t *tlp, | |
1197 | uint64_t now) | |
1198 | { | |
1199 | queue_entry_t qe; | |
1200 | timer_call_t call; | |
1201 | uint64_t threshold; | |
1202 | uint64_t deadline; | |
1203 | mpqueue_head_t *timer_master_queue; | |
1204 | ||
1205 | assert(!ml_get_interrupts_enabled()); | |
1206 | assert(cpu_number() == master_cpu); | |
1207 | ||
1208 | if (tlp->threshold.interval != TIMER_LONGTERM_NONE) | |
1209 | threshold = now + tlp->threshold.interval; | |
1210 | else | |
1211 | threshold = TIMER_LONGTERM_NONE; | |
1212 | ||
1213 | tlp->threshold.deadline = TIMER_LONGTERM_NONE; | |
1214 | tlp->threshold.call = NULL; | |
1215 | ||
1216 | if (queue_empty(&timer_longterm_queue->head)) | |
1217 | return; | |
1218 | ||
1219 | timer_master_queue = timer_queue_cpu(master_cpu); | |
1220 | timer_queue_lock_spin(timer_master_queue); | |
1221 | ||
1222 | qe = queue_first(&timer_longterm_queue->head); | |
1223 | while (!queue_end(&timer_longterm_queue->head, qe)) { | |
1224 | call = TIMER_CALL(qe); | |
1225 | deadline = call->soft_deadline; | |
1226 | qe = queue_next(qe); | |
1227 | if (!simple_lock_try(&call->lock)) { | |
1228 | /* case (2c) lock order inversion, dequeue only */ | |
1229 | #ifdef TIMER_ASSERT | |
1230 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1231 | DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, | |
4bd07ac2 A |
1232 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
1233 | VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue), | |
1234 | VM_KERNEL_UNSLIDE_OR_PERM(call->lock.interlock.lock_data), | |
39236c6e A |
1235 | 0x2c, 0); |
1236 | #endif | |
1237 | timer_call_entry_dequeue_async(call); | |
1238 | continue; | |
1239 | } | |
1240 | if (deadline < threshold) { | |
1241 | /* | |
1242 | * This timer needs moving (escalating) | |
1243 | * to the local (boot) processor's queue. | |
1244 | */ | |
1245 | #ifdef TIMER_ASSERT | |
1246 | if (deadline < now) | |
1247 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1248 | DECR_TIMER_OVERDUE | DBG_FUNC_NONE, | |
4bd07ac2 | 1249 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
39236c6e A |
1250 | deadline, |
1251 | now, | |
1252 | threshold, | |
1253 | 0); | |
1254 | #endif | |
1255 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1256 | DECR_TIMER_ESCALATE | DBG_FUNC_NONE, | |
4bd07ac2 | 1257 | VM_KERNEL_UNSLIDE_OR_PERM(call), |
fe8ab488 A |
1258 | TCE(call)->deadline, |
1259 | TCE(call)->entry_time, | |
4bd07ac2 | 1260 | VM_KERNEL_UNSLIDE(TCE(call)->func), |
39236c6e A |
1261 | 0); |
1262 | tlp->escalates++; | |
1263 | timer_call_entry_dequeue(call); | |
1264 | timer_call_entry_enqueue_deadline( | |
fe8ab488 | 1265 | call, timer_master_queue, TCE(call)->deadline); |
39236c6e A |
1266 | /* |
1267 | * A side-effect of the following call is to update | |
1268 | * the actual hardware deadline if required. | |
1269 | */ | |
1270 | (void) timer_queue_assign(deadline); | |
1271 | } else { | |
1272 | if (deadline < tlp->threshold.deadline) { | |
1273 | tlp->threshold.deadline = deadline; | |
1274 | tlp->threshold.call = call; | |
1275 | } | |
1276 | } | |
1277 | simple_unlock(&call->lock); | |
1278 | } | |
1279 | ||
1280 | timer_queue_unlock(timer_master_queue); | |
1281 | } | |
1282 | ||
1283 | void | |
1284 | timer_longterm_callout(timer_call_param_t p0, __unused timer_call_param_t p1) | |
1285 | { | |
1286 | timer_longterm_t *tlp = (timer_longterm_t *) p0; | |
1287 | ||
1288 | timer_longterm_update(tlp); | |
1289 | } | |
1290 | ||
1291 | void | |
1292 | timer_longterm_update_locked(timer_longterm_t *tlp) | |
1293 | { | |
1294 | uint64_t latency; | |
1295 | ||
1296 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1297 | DECR_TIMER_UPDATE | DBG_FUNC_START, | |
4bd07ac2 | 1298 | VM_KERNEL_UNSLIDE_OR_PERM(&tlp->queue), |
39236c6e A |
1299 | tlp->threshold.deadline, |
1300 | tlp->threshold.preempted, | |
1301 | tlp->queue.count, 0); | |
1302 | ||
1303 | tlp->scan_time = mach_absolute_time(); | |
1304 | if (tlp->threshold.preempted != TIMER_LONGTERM_NONE) { | |
1305 | tlp->threshold.preempts++; | |
1306 | tlp->threshold.deadline = tlp->threshold.preempted; | |
1307 | tlp->threshold.preempted = TIMER_LONGTERM_NONE; | |
1308 | /* | |
1309 | * Note: in the unlikely event that a pre-empted timer has | |
1310 | * itself been cancelled, we'll simply re-scan later at the | |
1311 | * time of the preempted/cancelled timer. | |
1312 | */ | |
1313 | } else { | |
1314 | tlp->threshold.scans++; | |
1315 | ||
1316 | /* | |
1317 | * Maintain a moving average of our wakeup latency. | |
1318 | * Clamp latency to 0 and ignore above threshold interval. | |
1319 | */ | |
1320 | if (tlp->scan_time > tlp->threshold.deadline_set) | |
1321 | latency = tlp->scan_time - tlp->threshold.deadline_set; | |
1322 | else | |
1323 | latency = 0; | |
1324 | if (latency < tlp->threshold.interval) { | |
1325 | tlp->threshold.latency_min = | |
1326 | MIN(tlp->threshold.latency_min, latency); | |
1327 | tlp->threshold.latency_max = | |
1328 | MAX(tlp->threshold.latency_max, latency); | |
1329 | tlp->threshold.latency = | |
1330 | (tlp->threshold.latency*99 + latency) / 100; | |
1331 | } | |
1332 | ||
1333 | timer_longterm_scan(tlp, tlp->scan_time); | |
1334 | } | |
1335 | ||
1336 | tlp->threshold.deadline_set = tlp->threshold.deadline; | |
1337 | /* The next deadline timer to be set is adjusted */ | |
1338 | if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) { | |
1339 | tlp->threshold.deadline_set -= tlp->threshold.margin; | |
1340 | tlp->threshold.deadline_set -= tlp->threshold.latency; | |
1341 | } | |
1342 | ||
1343 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1344 | DECR_TIMER_UPDATE | DBG_FUNC_END, | |
4bd07ac2 | 1345 | VM_KERNEL_UNSLIDE_OR_PERM(&tlp->queue), |
39236c6e A |
1346 | tlp->threshold.deadline, |
1347 | tlp->threshold.scans, | |
1348 | tlp->queue.count, 0); | |
1349 | } | |
1350 | ||
1351 | void | |
1352 | timer_longterm_update(timer_longterm_t *tlp) | |
1353 | { | |
1354 | spl_t s = splclock(); | |
1355 | ||
1356 | timer_queue_lock_spin(timer_longterm_queue); | |
1357 | ||
1358 | if (cpu_number() != master_cpu) | |
1359 | panic("timer_longterm_update_master() on non-boot cpu"); | |
1360 | ||
1361 | timer_longterm_update_locked(tlp); | |
1362 | ||
1363 | if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) | |
1364 | timer_call_enter( | |
1365 | &tlp->threshold.timer, | |
1366 | tlp->threshold.deadline_set, | |
1367 | TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL); | |
1368 | ||
1369 | timer_queue_unlock(timer_longterm_queue); | |
1370 | splx(s); | |
1371 | } | |
1372 | ||
1373 | void | |
1374 | timer_longterm_init(void) | |
1375 | { | |
1376 | uint32_t longterm; | |
1377 | timer_longterm_t *tlp = &timer_longterm; | |
1378 | ||
1379 | DBG("timer_longterm_init() tlp: %p, queue: %p\n", tlp, &tlp->queue); | |
1380 | ||
1381 | /* | |
15129b1c A |
1382 | * Set the longterm timer threshold. Defaults to TIMER_LONGTERM_THRESHOLD |
1383 | * or TIMER_LONGTERM_NONE (disabled) for server; | |
1384 | * overridden longterm boot-arg | |
39236c6e | 1385 | */ |
15129b1c A |
1386 | tlp->threshold.interval = serverperfmode ? TIMER_LONGTERM_NONE |
1387 | : TIMER_LONGTERM_THRESHOLD; | |
39236c6e A |
1388 | if (PE_parse_boot_argn("longterm", &longterm, sizeof (longterm))) { |
1389 | tlp->threshold.interval = (longterm == 0) ? | |
1390 | TIMER_LONGTERM_NONE : | |
1391 | longterm * NSEC_PER_MSEC; | |
1392 | } | |
1393 | if (tlp->threshold.interval != TIMER_LONGTERM_NONE) { | |
1394 | printf("Longterm timer threshold: %llu ms\n", | |
1395 | tlp->threshold.interval / NSEC_PER_MSEC); | |
1396 | kprintf("Longterm timer threshold: %llu ms\n", | |
1397 | tlp->threshold.interval / NSEC_PER_MSEC); | |
1398 | nanoseconds_to_absolutetime(tlp->threshold.interval, | |
1399 | &tlp->threshold.interval); | |
1400 | tlp->threshold.margin = tlp->threshold.interval / 10; | |
1401 | tlp->threshold.latency_min = EndOfAllTime; | |
1402 | tlp->threshold.latency_max = 0; | |
1403 | } | |
1404 | ||
1405 | tlp->threshold.preempted = TIMER_LONGTERM_NONE; | |
1406 | tlp->threshold.deadline = TIMER_LONGTERM_NONE; | |
1407 | ||
1408 | lck_attr_setdefault(&timer_longterm_lck_attr); | |
1409 | lck_grp_attr_setdefault(&timer_longterm_lck_grp_attr); | |
1410 | lck_grp_init(&timer_longterm_lck_grp, | |
1411 | "timer_longterm", &timer_longterm_lck_grp_attr); | |
1412 | mpqueue_init(&tlp->queue, | |
1413 | &timer_longterm_lck_grp, &timer_longterm_lck_attr); | |
1414 | ||
1415 | timer_call_setup(&tlp->threshold.timer, | |
1416 | timer_longterm_callout, (timer_call_param_t) tlp); | |
1417 | ||
1418 | timer_longterm_queue = &tlp->queue; | |
1419 | } | |
1420 | ||
1421 | enum { | |
1422 | THRESHOLD, QCOUNT, | |
1423 | ENQUEUES, DEQUEUES, ESCALATES, SCANS, PREEMPTS, | |
1424 | LATENCY, LATENCY_MIN, LATENCY_MAX | |
1425 | }; | |
1426 | uint64_t | |
1427 | timer_sysctl_get(int oid) | |
1428 | { | |
1429 | timer_longterm_t *tlp = &timer_longterm; | |
1430 | ||
1431 | switch (oid) { | |
1432 | case THRESHOLD: | |
1433 | return (tlp->threshold.interval == TIMER_LONGTERM_NONE) ? | |
1434 | 0 : tlp->threshold.interval / NSEC_PER_MSEC; | |
1435 | case QCOUNT: | |
1436 | return tlp->queue.count; | |
1437 | case ENQUEUES: | |
1438 | return tlp->enqueues; | |
1439 | case DEQUEUES: | |
1440 | return tlp->dequeues; | |
1441 | case ESCALATES: | |
1442 | return tlp->escalates; | |
1443 | case SCANS: | |
1444 | return tlp->threshold.scans; | |
1445 | case PREEMPTS: | |
1446 | return tlp->threshold.preempts; | |
1447 | case LATENCY: | |
1448 | return tlp->threshold.latency; | |
1449 | case LATENCY_MIN: | |
1450 | return tlp->threshold.latency_min; | |
1451 | case LATENCY_MAX: | |
1452 | return tlp->threshold.latency_max; | |
1453 | default: | |
1454 | return 0; | |
1455 | } | |
1456 | } | |
1457 | ||
1458 | /* | |
1459 | * timer_master_scan() is the inverse of timer_longterm_scan() | |
1460 | * since it un-escalates timers to the longterm queue. | |
1461 | */ | |
1462 | static void | |
1463 | timer_master_scan(timer_longterm_t *tlp, | |
1464 | uint64_t now) | |
1465 | { | |
1466 | queue_entry_t qe; | |
1467 | timer_call_t call; | |
1468 | uint64_t threshold; | |
1469 | uint64_t deadline; | |
1470 | mpqueue_head_t *timer_master_queue; | |
1471 | ||
1472 | if (tlp->threshold.interval != TIMER_LONGTERM_NONE) | |
1473 | threshold = now + tlp->threshold.interval; | |
1474 | else | |
1475 | threshold = TIMER_LONGTERM_NONE; | |
1476 | ||
1477 | timer_master_queue = timer_queue_cpu(master_cpu); | |
1478 | timer_queue_lock_spin(timer_master_queue); | |
1479 | ||
1480 | qe = queue_first(&timer_master_queue->head); | |
1481 | while (!queue_end(&timer_master_queue->head, qe)) { | |
1482 | call = TIMER_CALL(qe); | |
fe8ab488 | 1483 | deadline = TCE(call)->deadline; |
39236c6e A |
1484 | qe = queue_next(qe); |
1485 | if ((call->flags & TIMER_CALL_LOCAL) != 0) | |
1486 | continue; | |
1487 | if (!simple_lock_try(&call->lock)) { | |
1488 | /* case (2c) lock order inversion, dequeue only */ | |
1489 | timer_call_entry_dequeue_async(call); | |
1490 | continue; | |
1491 | } | |
1492 | if (deadline > threshold) { | |
1493 | /* move from master to longterm */ | |
1494 | timer_call_entry_dequeue(call); | |
1495 | timer_call_entry_enqueue_tail(call, timer_longterm_queue); | |
1496 | if (deadline < tlp->threshold.deadline) { | |
1497 | tlp->threshold.deadline = deadline; | |
1498 | tlp->threshold.call = call; | |
1499 | } | |
1500 | } | |
1501 | simple_unlock(&call->lock); | |
1502 | } | |
1503 | timer_queue_unlock(timer_master_queue); | |
1504 | } | |
1505 | ||
1506 | static void | |
1507 | timer_sysctl_set_threshold(uint64_t value) | |
1508 | { | |
1509 | timer_longterm_t *tlp = &timer_longterm; | |
1510 | spl_t s = splclock(); | |
1511 | boolean_t threshold_increase; | |
1512 | ||
1513 | timer_queue_lock_spin(timer_longterm_queue); | |
1514 | ||
1515 | timer_call_cancel(&tlp->threshold.timer); | |
1516 | ||
1517 | /* | |
1518 | * Set the new threshold and note whther it's increasing. | |
1519 | */ | |
1520 | if (value == 0) { | |
1521 | tlp->threshold.interval = TIMER_LONGTERM_NONE; | |
1522 | threshold_increase = TRUE; | |
1523 | timer_call_cancel(&tlp->threshold.timer); | |
1524 | } else { | |
1525 | uint64_t old_interval = tlp->threshold.interval; | |
1526 | tlp->threshold.interval = value * NSEC_PER_MSEC; | |
1527 | nanoseconds_to_absolutetime(tlp->threshold.interval, | |
1528 | &tlp->threshold.interval); | |
1529 | tlp->threshold.margin = tlp->threshold.interval / 10; | |
1530 | if (old_interval == TIMER_LONGTERM_NONE) | |
1531 | threshold_increase = FALSE; | |
1532 | else | |
1533 | threshold_increase = (tlp->threshold.interval > old_interval); | |
1534 | } | |
1535 | ||
1536 | if (threshold_increase /* or removal */) { | |
1537 | /* Escalate timers from the longterm queue */ | |
1538 | timer_longterm_scan(tlp, mach_absolute_time()); | |
1539 | } else /* decrease or addition */ { | |
1540 | /* | |
1541 | * We scan the local/master queue for timers now longterm. | |
1542 | * To be strictly correct, we should scan all processor queues | |
1543 | * but timer migration results in most timers gravitating to the | |
1544 | * master processor in any case. | |
1545 | */ | |
1546 | timer_master_scan(tlp, mach_absolute_time()); | |
1547 | } | |
1548 | ||
1549 | /* Set new timer accordingly */ | |
1550 | tlp->threshold.deadline_set = tlp->threshold.deadline; | |
1551 | if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) { | |
1552 | tlp->threshold.deadline_set -= tlp->threshold.margin; | |
1553 | tlp->threshold.deadline_set -= tlp->threshold.latency; | |
1554 | timer_call_enter( | |
1555 | &tlp->threshold.timer, | |
1556 | tlp->threshold.deadline_set, | |
1557 | TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL); | |
1558 | } | |
1559 | ||
1560 | /* Reset stats */ | |
1561 | tlp->enqueues = 0; | |
1562 | tlp->dequeues = 0; | |
1563 | tlp->escalates = 0; | |
1564 | tlp->threshold.scans = 0; | |
1565 | tlp->threshold.preempts = 0; | |
1566 | tlp->threshold.latency = 0; | |
1567 | tlp->threshold.latency_min = EndOfAllTime; | |
1568 | tlp->threshold.latency_max = 0; | |
1569 | ||
1570 | timer_queue_unlock(timer_longterm_queue); | |
1571 | splx(s); | |
1572 | } | |
1573 | ||
1574 | int | |
1575 | timer_sysctl_set(int oid, uint64_t value) | |
1576 | { | |
1577 | switch (oid) { | |
1578 | case THRESHOLD: | |
1579 | timer_call_cpu( | |
1580 | master_cpu, | |
1581 | (void (*)(void *)) timer_sysctl_set_threshold, | |
1582 | (void *) value); | |
1583 | return KERN_SUCCESS; | |
1584 | default: | |
1585 | return KERN_INVALID_ARGUMENT; | |
1586 | } | |
1587 | } | |
fe8ab488 A |
1588 | |
1589 | ||
1590 | /* Select timer coalescing window based on per-task quality-of-service hints */ | |
1591 | static boolean_t tcoal_qos_adjust(thread_t t, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited) { | |
1592 | uint32_t latency_qos; | |
1593 | boolean_t adjusted = FALSE; | |
1594 | task_t ctask = t->task; | |
1595 | ||
1596 | if (ctask) { | |
1597 | latency_qos = proc_get_effective_thread_policy(t, TASK_POLICY_LATENCY_QOS); | |
1598 | ||
1599 | assert(latency_qos <= NUM_LATENCY_QOS_TIERS); | |
1600 | ||
1601 | if (latency_qos) { | |
1602 | *tshift = tcoal_prio_params.latency_qos_scale[latency_qos - 1]; | |
1603 | *tmax_abstime = tcoal_prio_params.latency_qos_abstime_max[latency_qos - 1]; | |
1604 | *pratelimited = tcoal_prio_params.latency_tier_rate_limited[latency_qos - 1]; | |
1605 | adjusted = TRUE; | |
1606 | } | |
1607 | } | |
1608 | return adjusted; | |
1609 | } | |
1610 | ||
1611 | ||
1612 | /* Adjust timer deadlines based on priority of the thread and the | |
1613 | * urgency value provided at timeout establishment. With this mechanism, | |
1614 | * timers are no longer necessarily sorted in order of soft deadline | |
1615 | * on a given timer queue, i.e. they may be differentially skewed. | |
1616 | * In the current scheme, this could lead to fewer pending timers | |
1617 | * processed than is technically possible when the HW deadline arrives. | |
1618 | */ | |
1619 | static void | |
1620 | timer_compute_leeway(thread_t cthread, int32_t urgency, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited) { | |
1621 | int16_t tpri = cthread->sched_pri; | |
1622 | if ((urgency & TIMER_CALL_USER_MASK) != 0) { | |
1623 | if (tpri >= BASEPRI_RTQUEUES || | |
1624 | urgency == TIMER_CALL_USER_CRITICAL) { | |
1625 | *tshift = tcoal_prio_params.timer_coalesce_rt_shift; | |
1626 | *tmax_abstime = tcoal_prio_params.timer_coalesce_rt_abstime_max; | |
1627 | TCOAL_PRIO_STAT(rt_tcl); | |
1628 | } else if (proc_get_effective_thread_policy(cthread, TASK_POLICY_DARWIN_BG) || | |
1629 | (urgency == TIMER_CALL_USER_BACKGROUND)) { | |
1630 | /* Determine if timer should be subjected to a lower QoS */ | |
1631 | if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) { | |
1632 | if (*tmax_abstime > tcoal_prio_params.timer_coalesce_bg_abstime_max) { | |
1633 | return; | |
1634 | } else { | |
1635 | *pratelimited = FALSE; | |
1636 | } | |
1637 | } | |
1638 | *tshift = tcoal_prio_params.timer_coalesce_bg_shift; | |
1639 | *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max; | |
1640 | TCOAL_PRIO_STAT(bg_tcl); | |
1641 | } else if (tpri >= MINPRI_KERNEL) { | |
1642 | *tshift = tcoal_prio_params.timer_coalesce_kt_shift; | |
1643 | *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max; | |
1644 | TCOAL_PRIO_STAT(kt_tcl); | |
1645 | } else if (cthread->sched_mode == TH_MODE_FIXED) { | |
1646 | *tshift = tcoal_prio_params.timer_coalesce_fp_shift; | |
1647 | *tmax_abstime = tcoal_prio_params.timer_coalesce_fp_abstime_max; | |
1648 | TCOAL_PRIO_STAT(fp_tcl); | |
1649 | } else if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) { | |
1650 | TCOAL_PRIO_STAT(qos_tcl); | |
1651 | } else if (cthread->sched_mode == TH_MODE_TIMESHARE) { | |
1652 | *tshift = tcoal_prio_params.timer_coalesce_ts_shift; | |
1653 | *tmax_abstime = tcoal_prio_params.timer_coalesce_ts_abstime_max; | |
1654 | TCOAL_PRIO_STAT(ts_tcl); | |
1655 | } else { | |
1656 | TCOAL_PRIO_STAT(nc_tcl); | |
1657 | } | |
1658 | } else if (urgency == TIMER_CALL_SYS_BACKGROUND) { | |
1659 | *tshift = tcoal_prio_params.timer_coalesce_bg_shift; | |
1660 | *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max; | |
1661 | TCOAL_PRIO_STAT(bg_tcl); | |
1662 | } else { | |
1663 | *tshift = tcoal_prio_params.timer_coalesce_kt_shift; | |
1664 | *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max; | |
1665 | TCOAL_PRIO_STAT(kt_tcl); | |
1666 | } | |
1667 | } | |
1668 | ||
1669 | ||
1670 | int timer_user_idle_level; | |
1671 | ||
1672 | uint64_t | |
1673 | timer_call_slop(uint64_t deadline, uint64_t now, uint32_t flags, thread_t cthread, boolean_t *pratelimited) | |
1674 | { | |
1675 | int32_t tcs_shift = 0; | |
1676 | uint64_t tcs_max_abstime = 0; | |
1677 | uint64_t adjval; | |
1678 | uint32_t urgency = (flags & TIMER_CALL_URGENCY_MASK); | |
1679 | ||
1680 | if (mach_timer_coalescing_enabled && | |
1681 | (deadline > now) && (urgency != TIMER_CALL_SYS_CRITICAL)) { | |
1682 | timer_compute_leeway(cthread, urgency, &tcs_shift, &tcs_max_abstime, pratelimited); | |
1683 | ||
1684 | if (tcs_shift >= 0) | |
1685 | adjval = MIN((deadline - now) >> tcs_shift, tcs_max_abstime); | |
1686 | else | |
1687 | adjval = MIN((deadline - now) << (-tcs_shift), tcs_max_abstime); | |
1688 | /* Apply adjustments derived from "user idle level" heuristic */ | |
1689 | adjval += (adjval * timer_user_idle_level) >> 7; | |
1690 | return adjval; | |
1691 | } else { | |
1692 | return 0; | |
1693 | } | |
1694 | } | |
1695 | ||
1696 | int | |
1697 | timer_get_user_idle_level(void) { | |
1698 | return timer_user_idle_level; | |
1699 | } | |
1700 | ||
1701 | kern_return_t timer_set_user_idle_level(int ilevel) { | |
1702 | boolean_t do_reeval = FALSE; | |
1703 | ||
1704 | if ((ilevel < 0) || (ilevel > 128)) | |
1705 | return KERN_INVALID_ARGUMENT; | |
1706 | ||
1707 | if (ilevel < timer_user_idle_level) { | |
1708 | do_reeval = TRUE; | |
1709 | } | |
1710 | ||
1711 | timer_user_idle_level = ilevel; | |
1712 | ||
1713 | if (do_reeval) | |
1714 | ml_timer_evaluate(); | |
1715 | ||
1716 | return KERN_SUCCESS; | |
1717 | } |