]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
c910b4d9 | 2 | * Copyright (c) 1993-2008 Apple Inc. All rights reserved. |
1c79356b | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
29 | * Timer interrupt callout module. | |
1c79356b A |
30 | */ |
31 | ||
32 | #include <mach/mach_types.h> | |
33 | ||
34 | #include <kern/clock.h> | |
9bccf70c | 35 | #include <kern/processor.h> |
1c79356b | 36 | #include <kern/timer_call.h> |
c910b4d9 | 37 | #include <kern/timer_queue.h> |
1c79356b | 38 | #include <kern/call_entry.h> |
39236c6e | 39 | #include <kern/thread.h> |
1c79356b | 40 | |
0c530ab8 A |
41 | #include <sys/kdebug.h> |
42 | ||
4b17d6b6 | 43 | #if CONFIG_DTRACE |
2d21ac55 A |
44 | #include <mach/sdt.h> |
45 | #endif | |
1c79356b | 46 | |
1c79356b | 47 | |
6d2010ae A |
48 | #if DEBUG |
49 | #define TIMER_ASSERT 1 | |
50 | #endif | |
51 | ||
52 | //#define TIMER_ASSERT 1 | |
53 | //#define TIMER_DBG 1 | |
54 | ||
55 | #if TIMER_DBG | |
56 | #define DBG(x...) kprintf("DBG: " x); | |
57 | #else | |
58 | #define DBG(x...) | |
59 | #endif | |
60 | ||
39236c6e A |
61 | #if TIMER_TRACE |
62 | #define TIMER_KDEBUG_TRACE KERNEL_DEBUG_CONSTANT_IST | |
63 | #else | |
64 | #define TIMER_KDEBUG_TRACE(x...) | |
65 | #endif | |
66 | ||
67 | ||
6d2010ae A |
68 | lck_grp_t timer_call_lck_grp; |
69 | lck_attr_t timer_call_lck_attr; | |
70 | lck_grp_attr_t timer_call_lck_grp_attr; | |
71 | ||
39236c6e A |
72 | lck_grp_t timer_longterm_lck_grp; |
73 | lck_attr_t timer_longterm_lck_attr; | |
74 | lck_grp_attr_t timer_longterm_lck_grp_attr; | |
75 | ||
6d2010ae | 76 | |
39236c6e | 77 | #define timer_queue_lock_spin(queue) \ |
6d2010ae A |
78 | lck_mtx_lock_spin_always(&queue->lock_data) |
79 | ||
39236c6e | 80 | #define timer_queue_unlock(queue) \ |
6d2010ae A |
81 | lck_mtx_unlock_always(&queue->lock_data) |
82 | ||
83 | ||
84 | #define QUEUE(x) ((queue_t)(x)) | |
85 | #define MPQUEUE(x) ((mpqueue_head_t *)(x)) | |
86 | #define TIMER_CALL(x) ((timer_call_t)(x)) | |
fe8ab488 | 87 | #define TCE(x) (&(x->call_entry)) |
39236c6e A |
88 | /* |
89 | * The longterm timer object is a global structure holding all timers | |
90 | * beyond the short-term, local timer queue threshold. The boot processor | |
91 | * is responsible for moving each timer to its local timer queue | |
92 | * if and when that timer becomes due within the threshold. | |
93 | */ | |
94 | #define TIMER_LONGTERM_NONE EndOfAllTime | |
95 | #if defined(__x86_64__) | |
96 | #define TIMER_LONGTERM_THRESHOLD (1ULL * NSEC_PER_SEC) | |
97 | #else | |
98 | #define TIMER_LONGTERM_THRESHOLD TIMER_LONGTERM_NONE | |
99 | #endif | |
100 | ||
101 | typedef struct { | |
102 | uint64_t interval; /* longterm timer interval */ | |
103 | uint64_t margin; /* fudge factor (10% of interval */ | |
104 | uint64_t deadline; /* first/soonest longterm deadline */ | |
105 | uint64_t preempted; /* sooner timer has pre-empted */ | |
106 | timer_call_t call; /* first/soonest longterm timer call */ | |
107 | uint64_t deadline_set; /* next timer set */ | |
108 | timer_call_data_t timer; /* timer used by threshold management */ | |
109 | /* Stats: */ | |
110 | uint64_t scans; /* num threshold timer scans */ | |
111 | uint64_t preempts; /* num threshold reductions */ | |
112 | uint64_t latency; /* average threshold latency */ | |
113 | uint64_t latency_min; /* minimum threshold latency */ | |
114 | uint64_t latency_max; /* maximum threshold latency */ | |
115 | } threshold_t; | |
116 | ||
117 | typedef struct { | |
118 | mpqueue_head_t queue; /* longterm timer list */ | |
119 | uint64_t enqueues; /* num timers queued */ | |
120 | uint64_t dequeues; /* num timers dequeued */ | |
121 | uint64_t escalates; /* num timers becoming shortterm */ | |
122 | uint64_t scan_time; /* last time the list was scanned */ | |
123 | threshold_t threshold; /* longterm timer threshold */ | |
124 | } timer_longterm_t; | |
125 | ||
126 | timer_longterm_t timer_longterm; | |
127 | ||
128 | static mpqueue_head_t *timer_longterm_queue = NULL; | |
129 | ||
130 | static void timer_longterm_init(void); | |
131 | static void timer_longterm_callout( | |
132 | timer_call_param_t p0, | |
133 | timer_call_param_t p1); | |
134 | extern void timer_longterm_scan( | |
135 | timer_longterm_t *tlp, | |
136 | uint64_t now); | |
137 | static void timer_longterm_update( | |
138 | timer_longterm_t *tlp); | |
139 | static void timer_longterm_update_locked( | |
140 | timer_longterm_t *tlp); | |
141 | static mpqueue_head_t * timer_longterm_enqueue_unlocked( | |
142 | timer_call_t call, | |
143 | uint64_t now, | |
144 | uint64_t deadline, | |
fe8ab488 A |
145 | mpqueue_head_t ** old_queue, |
146 | uint64_t soft_deadline, | |
147 | uint64_t ttd, | |
148 | timer_call_param_t param1, | |
149 | uint32_t callout_flags); | |
39236c6e A |
150 | static void timer_longterm_dequeued_locked( |
151 | timer_call_t call); | |
316670eb A |
152 | |
153 | uint64_t past_deadline_timers; | |
154 | uint64_t past_deadline_deltas; | |
155 | uint64_t past_deadline_longest; | |
156 | uint64_t past_deadline_shortest = ~0ULL; | |
157 | enum {PAST_DEADLINE_TIMER_ADJUSTMENT_NS = 10 * 1000}; | |
158 | ||
159 | uint64_t past_deadline_timer_adjustment; | |
160 | ||
39236c6e | 161 | static boolean_t timer_call_enter_internal(timer_call_t call, timer_call_param_t param1, uint64_t deadline, uint64_t leeway, uint32_t flags, boolean_t ratelimited); |
6d2010ae A |
162 | boolean_t mach_timer_coalescing_enabled = TRUE; |
163 | ||
164 | mpqueue_head_t *timer_call_enqueue_deadline_unlocked( | |
165 | timer_call_t call, | |
166 | mpqueue_head_t *queue, | |
fe8ab488 A |
167 | uint64_t deadline, |
168 | uint64_t soft_deadline, | |
169 | uint64_t ttd, | |
170 | timer_call_param_t param1, | |
171 | uint32_t flags); | |
6d2010ae A |
172 | |
173 | mpqueue_head_t *timer_call_dequeue_unlocked( | |
174 | timer_call_t call); | |
175 | ||
fe8ab488 A |
176 | timer_coalescing_priority_params_t tcoal_prio_params; |
177 | ||
178 | #if TCOAL_PRIO_STATS | |
179 | int32_t nc_tcl, rt_tcl, bg_tcl, kt_tcl, fp_tcl, ts_tcl, qos_tcl; | |
180 | #define TCOAL_PRIO_STAT(x) (x++) | |
181 | #else | |
182 | #define TCOAL_PRIO_STAT(x) | |
183 | #endif | |
184 | ||
185 | static void | |
186 | timer_call_init_abstime(void) | |
187 | { | |
188 | int i; | |
189 | uint64_t result; | |
190 | timer_coalescing_priority_params_ns_t * tcoal_prio_params_init = timer_call_get_priority_params(); | |
191 | nanoseconds_to_absolutetime(PAST_DEADLINE_TIMER_ADJUSTMENT_NS, &past_deadline_timer_adjustment); | |
192 | nanoseconds_to_absolutetime(tcoal_prio_params_init->idle_entry_timer_processing_hdeadline_threshold_ns, &result); | |
193 | tcoal_prio_params.idle_entry_timer_processing_hdeadline_threshold_abstime = (uint32_t)result; | |
194 | nanoseconds_to_absolutetime(tcoal_prio_params_init->interrupt_timer_coalescing_ilat_threshold_ns, &result); | |
195 | tcoal_prio_params.interrupt_timer_coalescing_ilat_threshold_abstime = (uint32_t)result; | |
196 | nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_resort_threshold_ns, &result); | |
197 | tcoal_prio_params.timer_resort_threshold_abstime = (uint32_t)result; | |
198 | tcoal_prio_params.timer_coalesce_rt_shift = tcoal_prio_params_init->timer_coalesce_rt_shift; | |
199 | tcoal_prio_params.timer_coalesce_bg_shift = tcoal_prio_params_init->timer_coalesce_bg_shift; | |
200 | tcoal_prio_params.timer_coalesce_kt_shift = tcoal_prio_params_init->timer_coalesce_kt_shift; | |
201 | tcoal_prio_params.timer_coalesce_fp_shift = tcoal_prio_params_init->timer_coalesce_fp_shift; | |
202 | tcoal_prio_params.timer_coalesce_ts_shift = tcoal_prio_params_init->timer_coalesce_ts_shift; | |
203 | ||
204 | nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_rt_ns_max, | |
205 | &tcoal_prio_params.timer_coalesce_rt_abstime_max); | |
206 | nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_bg_ns_max, | |
207 | &tcoal_prio_params.timer_coalesce_bg_abstime_max); | |
208 | nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_kt_ns_max, | |
209 | &tcoal_prio_params.timer_coalesce_kt_abstime_max); | |
210 | nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_fp_ns_max, | |
211 | &tcoal_prio_params.timer_coalesce_fp_abstime_max); | |
212 | nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_ts_ns_max, | |
213 | &tcoal_prio_params.timer_coalesce_ts_abstime_max); | |
214 | ||
215 | for (i = 0; i < NUM_LATENCY_QOS_TIERS; i++) { | |
216 | tcoal_prio_params.latency_qos_scale[i] = tcoal_prio_params_init->latency_qos_scale[i]; | |
217 | nanoseconds_to_absolutetime(tcoal_prio_params_init->latency_qos_ns_max[i], | |
218 | &tcoal_prio_params.latency_qos_abstime_max[i]); | |
219 | tcoal_prio_params.latency_tier_rate_limited[i] = tcoal_prio_params_init->latency_tier_rate_limited[i]; | |
220 | } | |
221 | } | |
222 | ||
1c79356b A |
223 | |
224 | void | |
39236c6e | 225 | timer_call_init(void) |
1c79356b | 226 | { |
6d2010ae A |
227 | lck_attr_setdefault(&timer_call_lck_attr); |
228 | lck_grp_attr_setdefault(&timer_call_lck_grp_attr); | |
229 | lck_grp_init(&timer_call_lck_grp, "timer_call", &timer_call_lck_grp_attr); | |
39236c6e A |
230 | |
231 | timer_longterm_init(); | |
fe8ab488 | 232 | timer_call_init_abstime(); |
1c79356b A |
233 | } |
234 | ||
6d2010ae A |
235 | |
236 | void | |
39236c6e | 237 | timer_call_queue_init(mpqueue_head_t *queue) |
6d2010ae | 238 | { |
39236c6e | 239 | DBG("timer_call_queue_init(%p)\n", queue); |
6d2010ae A |
240 | mpqueue_init(queue, &timer_call_lck_grp, &timer_call_lck_attr); |
241 | } | |
242 | ||
243 | ||
1c79356b A |
244 | void |
245 | timer_call_setup( | |
246 | timer_call_t call, | |
247 | timer_call_func_t func, | |
248 | timer_call_param_t param0) | |
249 | { | |
6d2010ae | 250 | DBG("timer_call_setup(%p,%p,%p)\n", call, func, param0); |
fe8ab488 | 251 | call_entry_setup(TCE(call), func, param0); |
6d2010ae A |
252 | simple_lock_init(&(call)->lock, 0); |
253 | call->async_dequeue = FALSE; | |
1c79356b | 254 | } |
6d2010ae A |
255 | #if TIMER_ASSERT |
256 | static __inline__ mpqueue_head_t * | |
257 | timer_call_entry_dequeue( | |
258 | timer_call_t entry) | |
259 | { | |
fe8ab488 | 260 | mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); |
6d2010ae A |
261 | |
262 | if (!hw_lock_held((hw_lock_t)&entry->lock)) | |
263 | panic("_call_entry_dequeue() " | |
264 | "entry %p is not locked\n", entry); | |
265 | /* | |
266 | * XXX The queue lock is actually a mutex in spin mode | |
267 | * but there's no way to test for it being held | |
268 | * so we pretend it's a spinlock! | |
269 | */ | |
270 | if (!hw_lock_held((hw_lock_t)&old_queue->lock_data)) | |
271 | panic("_call_entry_dequeue() " | |
272 | "queue %p is not locked\n", old_queue); | |
273 | ||
fe8ab488 | 274 | call_entry_dequeue(TCE(entry)); |
39236c6e | 275 | old_queue->count--; |
c910b4d9 | 276 | |
6d2010ae A |
277 | return (old_queue); |
278 | } | |
1c79356b | 279 | |
6d2010ae A |
280 | static __inline__ mpqueue_head_t * |
281 | timer_call_entry_enqueue_deadline( | |
282 | timer_call_t entry, | |
283 | mpqueue_head_t *queue, | |
284 | uint64_t deadline) | |
285 | { | |
fe8ab488 | 286 | mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); |
1c79356b | 287 | |
6d2010ae A |
288 | if (!hw_lock_held((hw_lock_t)&entry->lock)) |
289 | panic("_call_entry_enqueue_deadline() " | |
290 | "entry %p is not locked\n", entry); | |
291 | /* XXX More lock pretense: */ | |
292 | if (!hw_lock_held((hw_lock_t)&queue->lock_data)) | |
293 | panic("_call_entry_enqueue_deadline() " | |
294 | "queue %p is not locked\n", queue); | |
295 | if (old_queue != NULL && old_queue != queue) | |
296 | panic("_call_entry_enqueue_deadline() " | |
297 | "old_queue %p != queue", old_queue); | |
1c79356b | 298 | |
fe8ab488 | 299 | call_entry_enqueue_deadline(TCE(entry), QUEUE(queue), deadline); |
1c79356b | 300 | |
39236c6e A |
301 | /* For efficiency, track the earliest soft deadline on the queue, so that |
302 | * fuzzy decisions can be made without lock acquisitions. | |
303 | */ | |
fe8ab488 A |
304 | timer_call_t thead = (timer_call_t)queue_first(&queue->head); |
305 | ||
306 | queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline; | |
39236c6e A |
307 | |
308 | if (old_queue) | |
309 | old_queue->count--; | |
310 | queue->count++; | |
311 | ||
6d2010ae A |
312 | return (old_queue); |
313 | } | |
1c79356b | 314 | |
6d2010ae | 315 | #else |
1c79356b | 316 | |
6d2010ae A |
317 | static __inline__ mpqueue_head_t * |
318 | timer_call_entry_dequeue( | |
319 | timer_call_t entry) | |
320 | { | |
fe8ab488 | 321 | mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); |
39236c6e | 322 | |
fe8ab488 | 323 | call_entry_dequeue(TCE(entry)); |
39236c6e A |
324 | old_queue->count--; |
325 | ||
326 | return old_queue; | |
6d2010ae | 327 | } |
c910b4d9 | 328 | |
6d2010ae A |
329 | static __inline__ mpqueue_head_t * |
330 | timer_call_entry_enqueue_deadline( | |
331 | timer_call_t entry, | |
332 | mpqueue_head_t *queue, | |
333 | uint64_t deadline) | |
334 | { | |
fe8ab488 | 335 | mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); |
39236c6e | 336 | |
fe8ab488 | 337 | call_entry_enqueue_deadline(TCE(entry), QUEUE(queue), deadline); |
39236c6e A |
338 | |
339 | /* For efficiency, track the earliest soft deadline on the queue, | |
340 | * so that fuzzy decisions can be made without lock acquisitions. | |
341 | */ | |
fe8ab488 A |
342 | |
343 | timer_call_t thead = (timer_call_t)queue_first(&queue->head); | |
344 | queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline; | |
39236c6e A |
345 | |
346 | if (old_queue) | |
347 | old_queue->count--; | |
348 | queue->count++; | |
349 | ||
350 | return old_queue; | |
1c79356b A |
351 | } |
352 | ||
6d2010ae A |
353 | #endif |
354 | ||
39236c6e A |
355 | static __inline__ void |
356 | timer_call_entry_enqueue_tail( | |
357 | timer_call_t entry, | |
358 | mpqueue_head_t *queue) | |
359 | { | |
fe8ab488 | 360 | call_entry_enqueue_tail(TCE(entry), QUEUE(queue)); |
39236c6e A |
361 | queue->count++; |
362 | return; | |
363 | } | |
364 | ||
365 | /* | |
366 | * Remove timer entry from its queue but don't change the queue pointer | |
367 | * and set the async_dequeue flag. This is locking case 2b. | |
368 | */ | |
369 | static __inline__ void | |
370 | timer_call_entry_dequeue_async( | |
371 | timer_call_t entry) | |
372 | { | |
fe8ab488 | 373 | mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue); |
39236c6e A |
374 | if (old_queue) { |
375 | old_queue->count--; | |
376 | (void) remque(qe(entry)); | |
377 | entry->async_dequeue = TRUE; | |
378 | } | |
379 | return; | |
380 | } | |
381 | ||
6d2010ae A |
382 | #if TIMER_ASSERT |
383 | unsigned timer_call_enqueue_deadline_unlocked_async1; | |
384 | unsigned timer_call_enqueue_deadline_unlocked_async2; | |
385 | #endif | |
386 | /* | |
387 | * Assumes call_entry and queues unlocked, interrupts disabled. | |
388 | */ | |
389 | __inline__ mpqueue_head_t * | |
390 | timer_call_enqueue_deadline_unlocked( | |
391 | timer_call_t call, | |
392 | mpqueue_head_t *queue, | |
fe8ab488 A |
393 | uint64_t deadline, |
394 | uint64_t soft_deadline, | |
395 | uint64_t ttd, | |
396 | timer_call_param_t param1, | |
397 | uint32_t callout_flags) | |
1c79356b | 398 | { |
fe8ab488 | 399 | call_entry_t entry = TCE(call); |
6d2010ae | 400 | mpqueue_head_t *old_queue; |
1c79356b | 401 | |
6d2010ae | 402 | DBG("timer_call_enqueue_deadline_unlocked(%p,%p,)\n", call, queue); |
1c79356b | 403 | |
6d2010ae | 404 | simple_lock(&call->lock); |
fe8ab488 | 405 | |
6d2010ae | 406 | old_queue = MPQUEUE(entry->queue); |
fe8ab488 | 407 | |
6d2010ae | 408 | if (old_queue != NULL) { |
39236c6e | 409 | timer_queue_lock_spin(old_queue); |
6d2010ae | 410 | if (call->async_dequeue) { |
39236c6e | 411 | /* collision (1c): timer already dequeued, clear flag */ |
6d2010ae | 412 | #if TIMER_ASSERT |
39236c6e A |
413 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, |
414 | DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, | |
415 | call, | |
416 | call->async_dequeue, | |
fe8ab488 | 417 | TCE(call)->queue, |
39236c6e | 418 | 0x1c, 0); |
6d2010ae A |
419 | timer_call_enqueue_deadline_unlocked_async1++; |
420 | #endif | |
39236c6e | 421 | call->async_dequeue = FALSE; |
6d2010ae | 422 | entry->queue = NULL; |
39236c6e A |
423 | } else if (old_queue != queue) { |
424 | timer_call_entry_dequeue(call); | |
6d2010ae A |
425 | #if TIMER_ASSERT |
426 | timer_call_enqueue_deadline_unlocked_async2++; | |
427 | #endif | |
428 | } | |
39236c6e A |
429 | if (old_queue == timer_longterm_queue) |
430 | timer_longterm_dequeued_locked(call); | |
6d2010ae | 431 | if (old_queue != queue) { |
39236c6e A |
432 | timer_queue_unlock(old_queue); |
433 | timer_queue_lock_spin(queue); | |
6d2010ae A |
434 | } |
435 | } else { | |
39236c6e | 436 | timer_queue_lock_spin(queue); |
6d2010ae | 437 | } |
1c79356b | 438 | |
fe8ab488 A |
439 | call->soft_deadline = soft_deadline; |
440 | call->flags = callout_flags; | |
441 | TCE(call)->param1 = param1; | |
442 | call->ttd = ttd; | |
443 | ||
6d2010ae | 444 | timer_call_entry_enqueue_deadline(call, queue, deadline); |
39236c6e | 445 | timer_queue_unlock(queue); |
6d2010ae | 446 | simple_unlock(&call->lock); |
1c79356b | 447 | |
c910b4d9 A |
448 | return (old_queue); |
449 | } | |
1c79356b | 450 | |
6d2010ae A |
451 | #if TIMER_ASSERT |
452 | unsigned timer_call_dequeue_unlocked_async1; | |
453 | unsigned timer_call_dequeue_unlocked_async2; | |
454 | #endif | |
455 | mpqueue_head_t * | |
456 | timer_call_dequeue_unlocked( | |
457 | timer_call_t call) | |
c910b4d9 | 458 | { |
fe8ab488 | 459 | call_entry_t entry = TCE(call); |
6d2010ae | 460 | mpqueue_head_t *old_queue; |
1c79356b | 461 | |
6d2010ae | 462 | DBG("timer_call_dequeue_unlocked(%p)\n", call); |
1c79356b | 463 | |
6d2010ae A |
464 | simple_lock(&call->lock); |
465 | old_queue = MPQUEUE(entry->queue); | |
39236c6e A |
466 | #if TIMER_ASSERT |
467 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
468 | DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, | |
469 | call, | |
470 | call->async_dequeue, | |
fe8ab488 | 471 | TCE(call)->queue, |
39236c6e A |
472 | 0, 0); |
473 | #endif | |
6d2010ae | 474 | if (old_queue != NULL) { |
39236c6e | 475 | timer_queue_lock_spin(old_queue); |
6d2010ae | 476 | if (call->async_dequeue) { |
39236c6e | 477 | /* collision (1c): timer already dequeued, clear flag */ |
6d2010ae | 478 | #if TIMER_ASSERT |
39236c6e A |
479 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, |
480 | DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, | |
481 | call, | |
482 | call->async_dequeue, | |
fe8ab488 | 483 | TCE(call)->queue, |
39236c6e | 484 | 0x1c, 0); |
6d2010ae A |
485 | timer_call_dequeue_unlocked_async1++; |
486 | #endif | |
39236c6e A |
487 | call->async_dequeue = FALSE; |
488 | entry->queue = NULL; | |
6d2010ae | 489 | } else { |
39236c6e | 490 | timer_call_entry_dequeue(call); |
6d2010ae | 491 | } |
39236c6e A |
492 | if (old_queue == timer_longterm_queue) |
493 | timer_longterm_dequeued_locked(call); | |
494 | timer_queue_unlock(old_queue); | |
6d2010ae A |
495 | } |
496 | simple_unlock(&call->lock); | |
c910b4d9 | 497 | return (old_queue); |
1c79356b A |
498 | } |
499 | ||
fe8ab488 A |
500 | |
501 | /* | |
502 | * Timer call entry locking model | |
503 | * ============================== | |
504 | * | |
505 | * Timer call entries are linked on per-cpu timer queues which are protected | |
506 | * by the queue lock and the call entry lock. The locking protocol is: | |
507 | * | |
508 | * 0) The canonical locking order is timer call entry followed by queue. | |
509 | * | |
510 | * 1) With only the entry lock held, entry.queue is valid: | |
511 | * 1a) NULL: the entry is not queued, or | |
512 | * 1b) non-NULL: this queue must be locked before the entry is modified. | |
513 | * After locking the queue, the call.async_dequeue flag must be checked: | |
514 | * 1c) TRUE: the entry was removed from the queue by another thread | |
515 | * and we must NULL the entry.queue and reset this flag, or | |
516 | * 1d) FALSE: (ie. queued), the entry can be manipulated. | |
517 | * | |
518 | * 2) If a queue lock is obtained first, the queue is stable: | |
519 | * 2a) If a try-lock of a queued entry succeeds, the call can be operated on | |
520 | * and dequeued. | |
521 | * 2b) If a try-lock fails, it indicates that another thread is attempting | |
522 | * to change the entry and move it to a different position in this queue | |
523 | * or to different queue. The entry can be dequeued but it should not be | |
524 | * operated upon since it is being changed. Furthermore, we don't null | |
525 | * the entry.queue pointer (protected by the entry lock we don't own). | |
526 | * Instead, we set the async_dequeue flag -- see (1c). | |
527 | * 2c) Same as 2b but occurring when a longterm timer is matured. | |
528 | * 3) A callout's parameters (deadline, flags, parameters, soft deadline &c.) | |
529 | * should be manipulated with the appropriate timer queue lock held, | |
530 | * to prevent queue traversal observations from observing inconsistent | |
531 | * updates to an in-flight callout. | |
532 | */ | |
533 | ||
534 | /* | |
535 | * Inlines timer_call_entry_dequeue() and timer_call_entry_enqueue_deadline() | |
536 | * cast between pointer types (mpqueue_head_t *) and (queue_t) so that | |
537 | * we can use the call_entry_dequeue() and call_entry_enqueue_deadline() | |
538 | * methods to operate on timer_call structs as if they are call_entry structs. | |
539 | * These structures are identical except for their queue head pointer fields. | |
540 | * | |
541 | * In the debug case, we assert that the timer call locking protocol | |
542 | * is being obeyed. | |
543 | */ | |
544 | ||
6d2010ae A |
545 | static boolean_t |
546 | timer_call_enter_internal( | |
547 | timer_call_t call, | |
548 | timer_call_param_t param1, | |
549 | uint64_t deadline, | |
39236c6e A |
550 | uint64_t leeway, |
551 | uint32_t flags, | |
552 | boolean_t ratelimited) | |
1c79356b | 553 | { |
39236c6e | 554 | mpqueue_head_t *queue = NULL; |
6d2010ae | 555 | mpqueue_head_t *old_queue; |
1c79356b | 556 | spl_t s; |
39236c6e A |
557 | uint64_t slop; |
558 | uint32_t urgency; | |
fe8ab488 | 559 | uint64_t sdeadline, ttd; |
1c79356b A |
560 | |
561 | s = splclock(); | |
6d2010ae | 562 | |
fe8ab488 | 563 | sdeadline = deadline; |
39236c6e A |
564 | uint64_t ctime = mach_absolute_time(); |
565 | ||
566 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
567 | DECR_TIMER_ENTER | DBG_FUNC_START, | |
568 | call, | |
569 | param1, deadline, flags, 0); | |
570 | ||
571 | urgency = (flags & TIMER_CALL_URGENCY_MASK); | |
572 | ||
573 | boolean_t slop_ratelimited = FALSE; | |
574 | slop = timer_call_slop(deadline, ctime, urgency, current_thread(), &slop_ratelimited); | |
575 | ||
576 | if ((flags & TIMER_CALL_LEEWAY) != 0 && leeway > slop) | |
577 | slop = leeway; | |
578 | ||
579 | if (UINT64_MAX - deadline <= slop) { | |
580 | deadline = UINT64_MAX; | |
581 | } else { | |
6d2010ae A |
582 | deadline += slop; |
583 | } | |
1c79356b | 584 | |
316670eb A |
585 | if (__improbable(deadline < ctime)) { |
586 | uint64_t delta = (ctime - deadline); | |
587 | ||
588 | past_deadline_timers++; | |
589 | past_deadline_deltas += delta; | |
590 | if (delta > past_deadline_longest) | |
591 | past_deadline_longest = deadline; | |
592 | if (delta < past_deadline_shortest) | |
593 | past_deadline_shortest = delta; | |
594 | ||
595 | deadline = ctime + past_deadline_timer_adjustment; | |
fe8ab488 | 596 | sdeadline = deadline; |
316670eb | 597 | } |
39236c6e | 598 | |
39236c6e | 599 | if (ratelimited || slop_ratelimited) { |
fe8ab488 | 600 | flags |= TIMER_CALL_RATELIMITED; |
39236c6e | 601 | } else { |
fe8ab488 | 602 | flags &= ~TIMER_CALL_RATELIMITED; |
39236c6e A |
603 | } |
604 | ||
fe8ab488 | 605 | ttd = sdeadline - ctime; |
4b17d6b6 | 606 | #if CONFIG_DTRACE |
fe8ab488 A |
607 | DTRACE_TMR7(callout__create, timer_call_func_t, TCE(call)->func, |
608 | timer_call_param_t, TCE(call)->param0, uint32_t, flags, | |
609 | (deadline - sdeadline), | |
610 | (ttd >> 32), (unsigned) (ttd & 0xFFFFFFFF), call); | |
4b17d6b6 A |
611 | #endif |
612 | ||
fe8ab488 A |
613 | /* Program timer callout parameters under the appropriate per-CPU or |
614 | * longterm queue lock. The callout may have been previously enqueued | |
615 | * and in-flight on this or another timer queue. | |
616 | */ | |
39236c6e | 617 | if (!ratelimited && !slop_ratelimited) { |
fe8ab488 | 618 | queue = timer_longterm_enqueue_unlocked(call, ctime, deadline, &old_queue, sdeadline, ttd, param1, flags); |
39236c6e | 619 | } |
1c79356b | 620 | |
39236c6e A |
621 | if (queue == NULL) { |
622 | queue = timer_queue_assign(deadline); | |
fe8ab488 | 623 | old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline, sdeadline, ttd, param1, flags); |
39236c6e | 624 | } |
1c79356b | 625 | |
39236c6e | 626 | #if TIMER_TRACE |
fe8ab488 | 627 | TCE(call)->entry_time = ctime; |
39236c6e A |
628 | #endif |
629 | ||
630 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
631 | DECR_TIMER_ENTER | DBG_FUNC_END, | |
632 | call, | |
fe8ab488 | 633 | (old_queue != NULL), deadline, queue->count, 0); |
1c79356b | 634 | |
1c79356b A |
635 | splx(s); |
636 | ||
c910b4d9 | 637 | return (old_queue != NULL); |
1c79356b A |
638 | } |
639 | ||
39236c6e A |
640 | /* |
641 | * timer_call_*() | |
642 | * return boolean indicating whether the call was previously queued. | |
643 | */ | |
6d2010ae A |
644 | boolean_t |
645 | timer_call_enter( | |
646 | timer_call_t call, | |
647 | uint64_t deadline, | |
648 | uint32_t flags) | |
649 | { | |
39236c6e | 650 | return timer_call_enter_internal(call, NULL, deadline, 0, flags, FALSE); |
6d2010ae A |
651 | } |
652 | ||
1c79356b | 653 | boolean_t |
c910b4d9 A |
654 | timer_call_enter1( |
655 | timer_call_t call, | |
656 | timer_call_param_t param1, | |
6d2010ae A |
657 | uint64_t deadline, |
658 | uint32_t flags) | |
1c79356b | 659 | { |
39236c6e A |
660 | return timer_call_enter_internal(call, param1, deadline, 0, flags, FALSE); |
661 | } | |
662 | ||
663 | boolean_t | |
664 | timer_call_enter_with_leeway( | |
665 | timer_call_t call, | |
666 | timer_call_param_t param1, | |
667 | uint64_t deadline, | |
668 | uint64_t leeway, | |
669 | uint32_t flags, | |
670 | boolean_t ratelimited) | |
671 | { | |
672 | return timer_call_enter_internal(call, param1, deadline, leeway, flags, ratelimited); | |
1c79356b A |
673 | } |
674 | ||
675 | boolean_t | |
c910b4d9 A |
676 | timer_call_cancel( |
677 | timer_call_t call) | |
1c79356b | 678 | { |
6d2010ae | 679 | mpqueue_head_t *old_queue; |
1c79356b A |
680 | spl_t s; |
681 | ||
682 | s = splclock(); | |
1c79356b | 683 | |
39236c6e A |
684 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, |
685 | DECR_TIMER_CANCEL | DBG_FUNC_START, | |
686 | call, | |
fe8ab488 | 687 | TCE(call)->deadline, call->soft_deadline, call->flags, 0); |
39236c6e | 688 | |
6d2010ae | 689 | old_queue = timer_call_dequeue_unlocked(call); |
c910b4d9 A |
690 | |
691 | if (old_queue != NULL) { | |
39236c6e A |
692 | timer_queue_lock_spin(old_queue); |
693 | if (!queue_empty(&old_queue->head)) { | |
fe8ab488 A |
694 | timer_queue_cancel(old_queue, TCE(call)->deadline, CE(queue_first(&old_queue->head))->deadline); |
695 | timer_call_t thead = (timer_call_t)queue_first(&old_queue->head); | |
696 | old_queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline; | |
39236c6e A |
697 | } |
698 | else { | |
fe8ab488 | 699 | timer_queue_cancel(old_queue, TCE(call)->deadline, UINT64_MAX); |
39236c6e A |
700 | old_queue->earliest_soft_deadline = UINT64_MAX; |
701 | } | |
702 | timer_queue_unlock(old_queue); | |
1c79356b | 703 | } |
39236c6e A |
704 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, |
705 | DECR_TIMER_CANCEL | DBG_FUNC_END, | |
706 | call, | |
707 | old_queue, | |
fe8ab488 A |
708 | TCE(call)->deadline - mach_absolute_time(), |
709 | TCE(call)->deadline - TCE(call)->entry_time, 0); | |
1c79356b A |
710 | splx(s); |
711 | ||
4b17d6b6 | 712 | #if CONFIG_DTRACE |
fe8ab488 A |
713 | DTRACE_TMR6(callout__cancel, timer_call_func_t, TCE(call)->func, |
714 | timer_call_param_t, TCE(call)->param0, uint32_t, call->flags, 0, | |
4b17d6b6 A |
715 | (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF)); |
716 | #endif | |
717 | ||
c910b4d9 | 718 | return (old_queue != NULL); |
1c79356b A |
719 | } |
720 | ||
fe8ab488 A |
721 | static uint32_t timer_queue_shutdown_lock_skips; |
722 | static uint32_t timer_queue_shutdown_discarded; | |
723 | ||
9bccf70c | 724 | void |
c910b4d9 | 725 | timer_queue_shutdown( |
6d2010ae | 726 | mpqueue_head_t *queue) |
9bccf70c | 727 | { |
6d2010ae A |
728 | timer_call_t call; |
729 | mpqueue_head_t *new_queue; | |
c910b4d9 | 730 | spl_t s; |
9bccf70c | 731 | |
fe8ab488 | 732 | |
6d2010ae A |
733 | DBG("timer_queue_shutdown(%p)\n", queue); |
734 | ||
c910b4d9 | 735 | s = splclock(); |
9bccf70c | 736 | |
6d2010ae | 737 | /* Note comma operator in while expression re-locking each iteration */ |
39236c6e | 738 | while (timer_queue_lock_spin(queue), !queue_empty(&queue->head)) { |
6d2010ae | 739 | call = TIMER_CALL(queue_first(&queue->head)); |
fe8ab488 | 740 | |
6d2010ae A |
741 | if (!simple_lock_try(&call->lock)) { |
742 | /* | |
743 | * case (2b) lock order inversion, dequeue and skip | |
744 | * Don't change the call_entry queue back-pointer | |
745 | * but set the async_dequeue field. | |
746 | */ | |
747 | timer_queue_shutdown_lock_skips++; | |
39236c6e A |
748 | timer_call_entry_dequeue_async(call); |
749 | #if TIMER_ASSERT | |
750 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
751 | DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, | |
752 | call, | |
753 | call->async_dequeue, | |
fe8ab488 | 754 | TCE(call)->queue, |
39236c6e A |
755 | 0x2b, 0); |
756 | #endif | |
757 | timer_queue_unlock(queue); | |
6d2010ae A |
758 | continue; |
759 | } | |
9bccf70c | 760 | |
fe8ab488 A |
761 | boolean_t call_local = ((call->flags & TIMER_CALL_LOCAL) != 0); |
762 | ||
6d2010ae A |
763 | /* remove entry from old queue */ |
764 | timer_call_entry_dequeue(call); | |
39236c6e | 765 | timer_queue_unlock(queue); |
9bccf70c | 766 | |
fe8ab488 A |
767 | if (call_local == FALSE) { |
768 | /* and queue it on new, discarding LOCAL timers */ | |
769 | new_queue = timer_queue_assign(TCE(call)->deadline); | |
770 | timer_queue_lock_spin(new_queue); | |
771 | timer_call_entry_enqueue_deadline( | |
772 | call, new_queue, TCE(call)->deadline); | |
773 | timer_queue_unlock(new_queue); | |
774 | } else { | |
775 | timer_queue_shutdown_discarded++; | |
776 | } | |
777 | ||
778 | /* The only lingering LOCAL timer should be this thread's | |
779 | * quantum expiration timer. | |
780 | */ | |
781 | assert((call_local == FALSE) || | |
782 | (TCE(call)->func == thread_quantum_expire)); | |
9bccf70c | 783 | |
6d2010ae | 784 | simple_unlock(&call->lock); |
9bccf70c A |
785 | } |
786 | ||
39236c6e | 787 | timer_queue_unlock(queue); |
c910b4d9 | 788 | splx(s); |
9bccf70c A |
789 | } |
790 | ||
fe8ab488 | 791 | static uint32_t timer_queue_expire_lock_skips; |
c910b4d9 | 792 | uint64_t |
39236c6e | 793 | timer_queue_expire_with_options( |
6d2010ae | 794 | mpqueue_head_t *queue, |
39236c6e A |
795 | uint64_t deadline, |
796 | boolean_t rescan) | |
1c79356b | 797 | { |
39236c6e A |
798 | timer_call_t call = NULL; |
799 | uint32_t tc_iterations = 0; | |
6d2010ae A |
800 | DBG("timer_queue_expire(%p,)\n", queue); |
801 | ||
39236c6e A |
802 | uint64_t cur_deadline = deadline; |
803 | timer_queue_lock_spin(queue); | |
1c79356b | 804 | |
6d2010ae | 805 | while (!queue_empty(&queue->head)) { |
39236c6e A |
806 | /* Upon processing one or more timer calls, refresh the |
807 | * deadline to account for time elapsed in the callout | |
808 | */ | |
809 | if (++tc_iterations > 1) | |
810 | cur_deadline = mach_absolute_time(); | |
811 | ||
812 | if (call == NULL) | |
813 | call = TIMER_CALL(queue_first(&queue->head)); | |
1c79356b | 814 | |
39236c6e | 815 | if (call->soft_deadline <= cur_deadline) { |
1c79356b A |
816 | timer_call_func_t func; |
817 | timer_call_param_t param0, param1; | |
818 | ||
39236c6e A |
819 | TCOAL_DEBUG(0xDDDD0000, queue->earliest_soft_deadline, call->soft_deadline, 0, 0, 0); |
820 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
821 | DECR_TIMER_EXPIRE | DBG_FUNC_NONE, | |
822 | call, | |
823 | call->soft_deadline, | |
fe8ab488 A |
824 | TCE(call)->deadline, |
825 | TCE(call)->entry_time, 0); | |
39236c6e | 826 | |
fe8ab488 A |
827 | if ((call->flags & TIMER_CALL_RATELIMITED) && |
828 | (TCE(call)->deadline > cur_deadline)) { | |
39236c6e A |
829 | if (rescan == FALSE) |
830 | break; | |
831 | } | |
832 | ||
6d2010ae A |
833 | if (!simple_lock_try(&call->lock)) { |
834 | /* case (2b) lock inversion, dequeue and skip */ | |
835 | timer_queue_expire_lock_skips++; | |
39236c6e A |
836 | timer_call_entry_dequeue_async(call); |
837 | call = NULL; | |
6d2010ae A |
838 | continue; |
839 | } | |
840 | ||
841 | timer_call_entry_dequeue(call); | |
1c79356b | 842 | |
fe8ab488 A |
843 | func = TCE(call)->func; |
844 | param0 = TCE(call)->param0; | |
845 | param1 = TCE(call)->param1; | |
1c79356b | 846 | |
6d2010ae | 847 | simple_unlock(&call->lock); |
39236c6e | 848 | timer_queue_unlock(queue); |
1c79356b | 849 | |
39236c6e | 850 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, |
316670eb | 851 | DECR_TIMER_CALLOUT | DBG_FUNC_START, |
39236c6e | 852 | call, VM_KERNEL_UNSLIDE(func), param0, param1, 0); |
2d21ac55 | 853 | |
4b17d6b6 | 854 | #if CONFIG_DTRACE |
39236c6e | 855 | DTRACE_TMR7(callout__start, timer_call_func_t, func, |
4b17d6b6 A |
856 | timer_call_param_t, param0, unsigned, call->flags, |
857 | 0, (call->ttd >> 32), | |
39236c6e | 858 | (unsigned) (call->ttd & 0xFFFFFFFF), call); |
2d21ac55 | 859 | #endif |
4b17d6b6 A |
860 | /* Maintain time-to-deadline in per-processor data |
861 | * structure for thread wakeup deadline statistics. | |
862 | */ | |
863 | uint64_t *ttdp = &(PROCESSOR_DATA(current_processor(), timer_call_ttd)); | |
864 | *ttdp = call->ttd; | |
1c79356b | 865 | (*func)(param0, param1); |
4b17d6b6 | 866 | *ttdp = 0; |
4b17d6b6 | 867 | #if CONFIG_DTRACE |
39236c6e A |
868 | DTRACE_TMR4(callout__end, timer_call_func_t, func, |
869 | param0, param1, call); | |
2d21ac55 A |
870 | #endif |
871 | ||
39236c6e | 872 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, |
316670eb | 873 | DECR_TIMER_CALLOUT | DBG_FUNC_END, |
39236c6e A |
874 | call, VM_KERNEL_UNSLIDE(func), param0, param1, 0); |
875 | call = NULL; | |
876 | timer_queue_lock_spin(queue); | |
877 | } else { | |
878 | if (__probable(rescan == FALSE)) { | |
879 | break; | |
880 | } else { | |
fe8ab488 A |
881 | int64_t skew = TCE(call)->deadline - call->soft_deadline; |
882 | assert(TCE(call)->deadline >= call->soft_deadline); | |
39236c6e A |
883 | |
884 | /* DRK: On a latency quality-of-service level change, | |
885 | * re-sort potentially rate-limited timers. The platform | |
886 | * layer determines which timers require | |
887 | * this. In the absence of the per-callout | |
888 | * synchronization requirement, a global resort could | |
889 | * be more efficient. The re-sort effectively | |
890 | * annuls all timer adjustments, i.e. the "soft | |
891 | * deadline" is the sort key. | |
892 | */ | |
893 | ||
894 | if (timer_resort_threshold(skew)) { | |
895 | if (__probable(simple_lock_try(&call->lock))) { | |
896 | timer_call_entry_dequeue(call); | |
897 | timer_call_entry_enqueue_deadline(call, queue, call->soft_deadline); | |
898 | simple_unlock(&call->lock); | |
899 | call = NULL; | |
900 | } | |
901 | } | |
902 | if (call) { | |
903 | call = TIMER_CALL(queue_next(qe(call))); | |
904 | if (queue_end(&queue->head, qe(call))) | |
905 | break; | |
906 | } | |
907 | } | |
c910b4d9 | 908 | } |
1c79356b A |
909 | } |
910 | ||
39236c6e A |
911 | if (!queue_empty(&queue->head)) { |
912 | call = TIMER_CALL(queue_first(&queue->head)); | |
fe8ab488 A |
913 | cur_deadline = TCE(call)->deadline; |
914 | queue->earliest_soft_deadline = (call->flags & TIMER_CALL_RATELIMITED) ? TCE(call)->deadline: call->soft_deadline; | |
39236c6e A |
915 | } else { |
916 | queue->earliest_soft_deadline = cur_deadline = UINT64_MAX; | |
917 | } | |
1c79356b | 918 | |
39236c6e | 919 | timer_queue_unlock(queue); |
c910b4d9 | 920 | |
39236c6e | 921 | return (cur_deadline); |
1c79356b | 922 | } |
6d2010ae | 923 | |
39236c6e A |
924 | uint64_t |
925 | timer_queue_expire( | |
926 | mpqueue_head_t *queue, | |
927 | uint64_t deadline) | |
928 | { | |
929 | return timer_queue_expire_with_options(queue, deadline, FALSE); | |
930 | } | |
6d2010ae A |
931 | |
932 | extern int serverperfmode; | |
fe8ab488 | 933 | static uint32_t timer_queue_migrate_lock_skips; |
6d2010ae | 934 | /* |
39236c6e | 935 | * timer_queue_migrate() is called by timer_queue_migrate_cpu() |
6d2010ae A |
936 | * to move timer requests from the local processor (queue_from) |
937 | * to a target processor's (queue_to). | |
938 | */ | |
939 | int | |
940 | timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to) | |
941 | { | |
942 | timer_call_t call; | |
943 | timer_call_t head_to; | |
944 | int timers_migrated = 0; | |
945 | ||
946 | DBG("timer_queue_migrate(%p,%p)\n", queue_from, queue_to); | |
947 | ||
948 | assert(!ml_get_interrupts_enabled()); | |
949 | assert(queue_from != queue_to); | |
950 | ||
951 | if (serverperfmode) { | |
952 | /* | |
953 | * if we're running a high end server | |
954 | * avoid migrations... they add latency | |
955 | * and don't save us power under typical | |
956 | * server workloads | |
957 | */ | |
958 | return -4; | |
959 | } | |
960 | ||
961 | /* | |
962 | * Take both local (from) and target (to) timer queue locks while | |
963 | * moving the timers from the local queue to the target processor. | |
964 | * We assume that the target is always the boot processor. | |
965 | * But only move if all of the following is true: | |
966 | * - the target queue is non-empty | |
967 | * - the local queue is non-empty | |
968 | * - the local queue's first deadline is later than the target's | |
969 | * - the local queue contains no non-migrateable "local" call | |
970 | * so that we need not have the target resync. | |
971 | */ | |
972 | ||
39236c6e | 973 | timer_queue_lock_spin(queue_to); |
6d2010ae A |
974 | |
975 | head_to = TIMER_CALL(queue_first(&queue_to->head)); | |
976 | if (queue_empty(&queue_to->head)) { | |
977 | timers_migrated = -1; | |
978 | goto abort1; | |
979 | } | |
980 | ||
39236c6e | 981 | timer_queue_lock_spin(queue_from); |
6d2010ae A |
982 | |
983 | if (queue_empty(&queue_from->head)) { | |
984 | timers_migrated = -2; | |
985 | goto abort2; | |
986 | } | |
987 | ||
988 | call = TIMER_CALL(queue_first(&queue_from->head)); | |
fe8ab488 | 989 | if (TCE(call)->deadline < TCE(head_to)->deadline) { |
6d2010ae A |
990 | timers_migrated = 0; |
991 | goto abort2; | |
992 | } | |
993 | ||
994 | /* perform scan for non-migratable timers */ | |
995 | do { | |
996 | if (call->flags & TIMER_CALL_LOCAL) { | |
997 | timers_migrated = -3; | |
998 | goto abort2; | |
999 | } | |
1000 | call = TIMER_CALL(queue_next(qe(call))); | |
1001 | } while (!queue_end(&queue_from->head, qe(call))); | |
1002 | ||
1003 | /* migration loop itself -- both queues are locked */ | |
1004 | while (!queue_empty(&queue_from->head)) { | |
1005 | call = TIMER_CALL(queue_first(&queue_from->head)); | |
1006 | if (!simple_lock_try(&call->lock)) { | |
1007 | /* case (2b) lock order inversion, dequeue only */ | |
39236c6e A |
1008 | #ifdef TIMER_ASSERT |
1009 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1010 | DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, | |
1011 | call, | |
fe8ab488 | 1012 | TCE(call)->queue, |
39236c6e A |
1013 | call->lock.interlock.lock_data, |
1014 | 0x2b, 0); | |
1015 | #endif | |
6d2010ae | 1016 | timer_queue_migrate_lock_skips++; |
39236c6e | 1017 | timer_call_entry_dequeue_async(call); |
6d2010ae A |
1018 | continue; |
1019 | } | |
1020 | timer_call_entry_dequeue(call); | |
1021 | timer_call_entry_enqueue_deadline( | |
fe8ab488 | 1022 | call, queue_to, TCE(call)->deadline); |
6d2010ae A |
1023 | timers_migrated++; |
1024 | simple_unlock(&call->lock); | |
1025 | } | |
39236c6e | 1026 | queue_from->earliest_soft_deadline = UINT64_MAX; |
6d2010ae | 1027 | abort2: |
39236c6e | 1028 | timer_queue_unlock(queue_from); |
6d2010ae | 1029 | abort1: |
39236c6e | 1030 | timer_queue_unlock(queue_to); |
6d2010ae A |
1031 | |
1032 | return timers_migrated; | |
1033 | } | |
39236c6e A |
1034 | |
1035 | void | |
1036 | timer_queue_trace_cpu(int ncpu) | |
1037 | { | |
1038 | timer_call_nosync_cpu( | |
1039 | ncpu, | |
1040 | (void(*)())timer_queue_trace, | |
1041 | (void*) timer_queue_cpu(ncpu)); | |
1042 | } | |
1043 | ||
1044 | void | |
1045 | timer_queue_trace( | |
1046 | mpqueue_head_t *queue) | |
1047 | { | |
1048 | timer_call_t call; | |
1049 | spl_t s; | |
1050 | ||
1051 | if (!kdebug_enable) | |
1052 | return; | |
1053 | ||
1054 | s = splclock(); | |
1055 | timer_queue_lock_spin(queue); | |
1056 | ||
1057 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1058 | DECR_TIMER_QUEUE | DBG_FUNC_START, | |
1059 | queue->count, mach_absolute_time(), 0, 0, 0); | |
1060 | ||
1061 | if (!queue_empty(&queue->head)) { | |
1062 | call = TIMER_CALL(queue_first(&queue->head)); | |
1063 | do { | |
1064 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1065 | DECR_TIMER_QUEUE | DBG_FUNC_NONE, | |
1066 | call->soft_deadline, | |
fe8ab488 A |
1067 | TCE(call)->deadline, |
1068 | TCE(call)->entry_time, | |
1069 | TCE(call)->func, | |
39236c6e A |
1070 | 0); |
1071 | call = TIMER_CALL(queue_next(qe(call))); | |
1072 | } while (!queue_end(&queue->head, qe(call))); | |
1073 | } | |
1074 | ||
1075 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1076 | DECR_TIMER_QUEUE | DBG_FUNC_END, | |
1077 | queue->count, mach_absolute_time(), 0, 0, 0); | |
1078 | ||
1079 | timer_queue_unlock(queue); | |
1080 | splx(s); | |
1081 | } | |
1082 | ||
1083 | void | |
1084 | timer_longterm_dequeued_locked(timer_call_t call) | |
1085 | { | |
1086 | timer_longterm_t *tlp = &timer_longterm; | |
1087 | ||
1088 | tlp->dequeues++; | |
1089 | if (call == tlp->threshold.call) | |
1090 | tlp->threshold.call = NULL; | |
1091 | } | |
1092 | ||
1093 | /* | |
1094 | * Place a timer call in the longterm list | |
1095 | * and adjust the next timer callout deadline if the new timer is first. | |
1096 | */ | |
1097 | mpqueue_head_t * | |
1098 | timer_longterm_enqueue_unlocked(timer_call_t call, | |
1099 | uint64_t now, | |
1100 | uint64_t deadline, | |
fe8ab488 A |
1101 | mpqueue_head_t **old_queue, |
1102 | uint64_t soft_deadline, | |
1103 | uint64_t ttd, | |
1104 | timer_call_param_t param1, | |
1105 | uint32_t callout_flags) | |
39236c6e A |
1106 | { |
1107 | timer_longterm_t *tlp = &timer_longterm; | |
1108 | boolean_t update_required = FALSE; | |
1109 | uint64_t longterm_threshold; | |
1110 | ||
1111 | longterm_threshold = now + tlp->threshold.interval; | |
1112 | ||
1113 | /* | |
1114 | * Return NULL without doing anything if: | |
1115 | * - this timer is local, or | |
1116 | * - the longterm mechanism is disabled, or | |
1117 | * - this deadline is too short. | |
1118 | */ | |
fe8ab488 | 1119 | if ((callout_flags & TIMER_CALL_LOCAL) != 0 || |
39236c6e | 1120 | (tlp->threshold.interval == TIMER_LONGTERM_NONE) || |
fe8ab488 | 1121 | (deadline <= longterm_threshold)) |
39236c6e A |
1122 | return NULL; |
1123 | ||
1124 | /* | |
1125 | * Remove timer from its current queue, if any. | |
1126 | */ | |
1127 | *old_queue = timer_call_dequeue_unlocked(call); | |
1128 | ||
1129 | /* | |
1130 | * Lock the longterm queue, queue timer and determine | |
1131 | * whether an update is necessary. | |
1132 | */ | |
1133 | assert(!ml_get_interrupts_enabled()); | |
1134 | simple_lock(&call->lock); | |
1135 | timer_queue_lock_spin(timer_longterm_queue); | |
fe8ab488 A |
1136 | TCE(call)->deadline = deadline; |
1137 | TCE(call)->param1 = param1; | |
1138 | call->ttd = ttd; | |
1139 | call->soft_deadline = soft_deadline; | |
1140 | call->flags = callout_flags; | |
39236c6e | 1141 | timer_call_entry_enqueue_tail(call, timer_longterm_queue); |
39236c6e A |
1142 | |
1143 | tlp->enqueues++; | |
1144 | ||
1145 | /* | |
1146 | * We'll need to update the currently set threshold timer | |
1147 | * if the new deadline is sooner and no sooner update is in flight. | |
1148 | */ | |
1149 | if (deadline < tlp->threshold.deadline && | |
1150 | deadline < tlp->threshold.preempted) { | |
1151 | tlp->threshold.preempted = deadline; | |
1152 | tlp->threshold.call = call; | |
1153 | update_required = TRUE; | |
1154 | } | |
1155 | timer_queue_unlock(timer_longterm_queue); | |
1156 | simple_unlock(&call->lock); | |
1157 | ||
1158 | if (update_required) { | |
fe8ab488 A |
1159 | /* |
1160 | * Note: this call expects that calling the master cpu | |
1161 | * alone does not involve locking the topo lock. | |
1162 | */ | |
39236c6e A |
1163 | timer_call_nosync_cpu( |
1164 | master_cpu, | |
1165 | (void (*)(void *)) timer_longterm_update, | |
1166 | (void *)tlp); | |
1167 | } | |
1168 | ||
1169 | return timer_longterm_queue; | |
1170 | } | |
1171 | ||
1172 | /* | |
1173 | * Scan for timers below the longterm threshold. | |
1174 | * Move these to the local timer queue (of the boot processor on which the | |
1175 | * calling thread is running). | |
1176 | * Both the local (boot) queue and the longterm queue are locked. | |
1177 | * The scan is similar to the timer migrate sequence but is performed by | |
1178 | * successively examining each timer on the longterm queue: | |
1179 | * - if within the short-term threshold | |
1180 | * - enter on the local queue (unless being deleted), | |
1181 | * - otherwise: | |
1182 | * - if sooner, deadline becomes the next threshold deadline. | |
1183 | */ | |
1184 | void | |
1185 | timer_longterm_scan(timer_longterm_t *tlp, | |
1186 | uint64_t now) | |
1187 | { | |
1188 | queue_entry_t qe; | |
1189 | timer_call_t call; | |
1190 | uint64_t threshold; | |
1191 | uint64_t deadline; | |
1192 | mpqueue_head_t *timer_master_queue; | |
1193 | ||
1194 | assert(!ml_get_interrupts_enabled()); | |
1195 | assert(cpu_number() == master_cpu); | |
1196 | ||
1197 | if (tlp->threshold.interval != TIMER_LONGTERM_NONE) | |
1198 | threshold = now + tlp->threshold.interval; | |
1199 | else | |
1200 | threshold = TIMER_LONGTERM_NONE; | |
1201 | ||
1202 | tlp->threshold.deadline = TIMER_LONGTERM_NONE; | |
1203 | tlp->threshold.call = NULL; | |
1204 | ||
1205 | if (queue_empty(&timer_longterm_queue->head)) | |
1206 | return; | |
1207 | ||
1208 | timer_master_queue = timer_queue_cpu(master_cpu); | |
1209 | timer_queue_lock_spin(timer_master_queue); | |
1210 | ||
1211 | qe = queue_first(&timer_longterm_queue->head); | |
1212 | while (!queue_end(&timer_longterm_queue->head, qe)) { | |
1213 | call = TIMER_CALL(qe); | |
1214 | deadline = call->soft_deadline; | |
1215 | qe = queue_next(qe); | |
1216 | if (!simple_lock_try(&call->lock)) { | |
1217 | /* case (2c) lock order inversion, dequeue only */ | |
1218 | #ifdef TIMER_ASSERT | |
1219 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1220 | DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, | |
1221 | call, | |
fe8ab488 | 1222 | TCE(call)->queue, |
39236c6e A |
1223 | call->lock.interlock.lock_data, |
1224 | 0x2c, 0); | |
1225 | #endif | |
1226 | timer_call_entry_dequeue_async(call); | |
1227 | continue; | |
1228 | } | |
1229 | if (deadline < threshold) { | |
1230 | /* | |
1231 | * This timer needs moving (escalating) | |
1232 | * to the local (boot) processor's queue. | |
1233 | */ | |
1234 | #ifdef TIMER_ASSERT | |
1235 | if (deadline < now) | |
1236 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1237 | DECR_TIMER_OVERDUE | DBG_FUNC_NONE, | |
1238 | call, | |
1239 | deadline, | |
1240 | now, | |
1241 | threshold, | |
1242 | 0); | |
1243 | #endif | |
1244 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1245 | DECR_TIMER_ESCALATE | DBG_FUNC_NONE, | |
1246 | call, | |
fe8ab488 A |
1247 | TCE(call)->deadline, |
1248 | TCE(call)->entry_time, | |
1249 | TCE(call)->func, | |
39236c6e A |
1250 | 0); |
1251 | tlp->escalates++; | |
1252 | timer_call_entry_dequeue(call); | |
1253 | timer_call_entry_enqueue_deadline( | |
fe8ab488 | 1254 | call, timer_master_queue, TCE(call)->deadline); |
39236c6e A |
1255 | /* |
1256 | * A side-effect of the following call is to update | |
1257 | * the actual hardware deadline if required. | |
1258 | */ | |
1259 | (void) timer_queue_assign(deadline); | |
1260 | } else { | |
1261 | if (deadline < tlp->threshold.deadline) { | |
1262 | tlp->threshold.deadline = deadline; | |
1263 | tlp->threshold.call = call; | |
1264 | } | |
1265 | } | |
1266 | simple_unlock(&call->lock); | |
1267 | } | |
1268 | ||
1269 | timer_queue_unlock(timer_master_queue); | |
1270 | } | |
1271 | ||
1272 | void | |
1273 | timer_longterm_callout(timer_call_param_t p0, __unused timer_call_param_t p1) | |
1274 | { | |
1275 | timer_longterm_t *tlp = (timer_longterm_t *) p0; | |
1276 | ||
1277 | timer_longterm_update(tlp); | |
1278 | } | |
1279 | ||
1280 | void | |
1281 | timer_longterm_update_locked(timer_longterm_t *tlp) | |
1282 | { | |
1283 | uint64_t latency; | |
1284 | ||
1285 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1286 | DECR_TIMER_UPDATE | DBG_FUNC_START, | |
1287 | &tlp->queue, | |
1288 | tlp->threshold.deadline, | |
1289 | tlp->threshold.preempted, | |
1290 | tlp->queue.count, 0); | |
1291 | ||
1292 | tlp->scan_time = mach_absolute_time(); | |
1293 | if (tlp->threshold.preempted != TIMER_LONGTERM_NONE) { | |
1294 | tlp->threshold.preempts++; | |
1295 | tlp->threshold.deadline = tlp->threshold.preempted; | |
1296 | tlp->threshold.preempted = TIMER_LONGTERM_NONE; | |
1297 | /* | |
1298 | * Note: in the unlikely event that a pre-empted timer has | |
1299 | * itself been cancelled, we'll simply re-scan later at the | |
1300 | * time of the preempted/cancelled timer. | |
1301 | */ | |
1302 | } else { | |
1303 | tlp->threshold.scans++; | |
1304 | ||
1305 | /* | |
1306 | * Maintain a moving average of our wakeup latency. | |
1307 | * Clamp latency to 0 and ignore above threshold interval. | |
1308 | */ | |
1309 | if (tlp->scan_time > tlp->threshold.deadline_set) | |
1310 | latency = tlp->scan_time - tlp->threshold.deadline_set; | |
1311 | else | |
1312 | latency = 0; | |
1313 | if (latency < tlp->threshold.interval) { | |
1314 | tlp->threshold.latency_min = | |
1315 | MIN(tlp->threshold.latency_min, latency); | |
1316 | tlp->threshold.latency_max = | |
1317 | MAX(tlp->threshold.latency_max, latency); | |
1318 | tlp->threshold.latency = | |
1319 | (tlp->threshold.latency*99 + latency) / 100; | |
1320 | } | |
1321 | ||
1322 | timer_longterm_scan(tlp, tlp->scan_time); | |
1323 | } | |
1324 | ||
1325 | tlp->threshold.deadline_set = tlp->threshold.deadline; | |
1326 | /* The next deadline timer to be set is adjusted */ | |
1327 | if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) { | |
1328 | tlp->threshold.deadline_set -= tlp->threshold.margin; | |
1329 | tlp->threshold.deadline_set -= tlp->threshold.latency; | |
1330 | } | |
1331 | ||
1332 | TIMER_KDEBUG_TRACE(KDEBUG_TRACE, | |
1333 | DECR_TIMER_UPDATE | DBG_FUNC_END, | |
1334 | &tlp->queue, | |
1335 | tlp->threshold.deadline, | |
1336 | tlp->threshold.scans, | |
1337 | tlp->queue.count, 0); | |
1338 | } | |
1339 | ||
1340 | void | |
1341 | timer_longterm_update(timer_longterm_t *tlp) | |
1342 | { | |
1343 | spl_t s = splclock(); | |
1344 | ||
1345 | timer_queue_lock_spin(timer_longterm_queue); | |
1346 | ||
1347 | if (cpu_number() != master_cpu) | |
1348 | panic("timer_longterm_update_master() on non-boot cpu"); | |
1349 | ||
1350 | timer_longterm_update_locked(tlp); | |
1351 | ||
1352 | if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) | |
1353 | timer_call_enter( | |
1354 | &tlp->threshold.timer, | |
1355 | tlp->threshold.deadline_set, | |
1356 | TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL); | |
1357 | ||
1358 | timer_queue_unlock(timer_longterm_queue); | |
1359 | splx(s); | |
1360 | } | |
1361 | ||
1362 | void | |
1363 | timer_longterm_init(void) | |
1364 | { | |
1365 | uint32_t longterm; | |
1366 | timer_longterm_t *tlp = &timer_longterm; | |
1367 | ||
1368 | DBG("timer_longterm_init() tlp: %p, queue: %p\n", tlp, &tlp->queue); | |
1369 | ||
1370 | /* | |
15129b1c A |
1371 | * Set the longterm timer threshold. Defaults to TIMER_LONGTERM_THRESHOLD |
1372 | * or TIMER_LONGTERM_NONE (disabled) for server; | |
1373 | * overridden longterm boot-arg | |
39236c6e | 1374 | */ |
15129b1c A |
1375 | tlp->threshold.interval = serverperfmode ? TIMER_LONGTERM_NONE |
1376 | : TIMER_LONGTERM_THRESHOLD; | |
39236c6e A |
1377 | if (PE_parse_boot_argn("longterm", &longterm, sizeof (longterm))) { |
1378 | tlp->threshold.interval = (longterm == 0) ? | |
1379 | TIMER_LONGTERM_NONE : | |
1380 | longterm * NSEC_PER_MSEC; | |
1381 | } | |
1382 | if (tlp->threshold.interval != TIMER_LONGTERM_NONE) { | |
1383 | printf("Longterm timer threshold: %llu ms\n", | |
1384 | tlp->threshold.interval / NSEC_PER_MSEC); | |
1385 | kprintf("Longterm timer threshold: %llu ms\n", | |
1386 | tlp->threshold.interval / NSEC_PER_MSEC); | |
1387 | nanoseconds_to_absolutetime(tlp->threshold.interval, | |
1388 | &tlp->threshold.interval); | |
1389 | tlp->threshold.margin = tlp->threshold.interval / 10; | |
1390 | tlp->threshold.latency_min = EndOfAllTime; | |
1391 | tlp->threshold.latency_max = 0; | |
1392 | } | |
1393 | ||
1394 | tlp->threshold.preempted = TIMER_LONGTERM_NONE; | |
1395 | tlp->threshold.deadline = TIMER_LONGTERM_NONE; | |
1396 | ||
1397 | lck_attr_setdefault(&timer_longterm_lck_attr); | |
1398 | lck_grp_attr_setdefault(&timer_longterm_lck_grp_attr); | |
1399 | lck_grp_init(&timer_longterm_lck_grp, | |
1400 | "timer_longterm", &timer_longterm_lck_grp_attr); | |
1401 | mpqueue_init(&tlp->queue, | |
1402 | &timer_longterm_lck_grp, &timer_longterm_lck_attr); | |
1403 | ||
1404 | timer_call_setup(&tlp->threshold.timer, | |
1405 | timer_longterm_callout, (timer_call_param_t) tlp); | |
1406 | ||
1407 | timer_longterm_queue = &tlp->queue; | |
1408 | } | |
1409 | ||
1410 | enum { | |
1411 | THRESHOLD, QCOUNT, | |
1412 | ENQUEUES, DEQUEUES, ESCALATES, SCANS, PREEMPTS, | |
1413 | LATENCY, LATENCY_MIN, LATENCY_MAX | |
1414 | }; | |
1415 | uint64_t | |
1416 | timer_sysctl_get(int oid) | |
1417 | { | |
1418 | timer_longterm_t *tlp = &timer_longterm; | |
1419 | ||
1420 | switch (oid) { | |
1421 | case THRESHOLD: | |
1422 | return (tlp->threshold.interval == TIMER_LONGTERM_NONE) ? | |
1423 | 0 : tlp->threshold.interval / NSEC_PER_MSEC; | |
1424 | case QCOUNT: | |
1425 | return tlp->queue.count; | |
1426 | case ENQUEUES: | |
1427 | return tlp->enqueues; | |
1428 | case DEQUEUES: | |
1429 | return tlp->dequeues; | |
1430 | case ESCALATES: | |
1431 | return tlp->escalates; | |
1432 | case SCANS: | |
1433 | return tlp->threshold.scans; | |
1434 | case PREEMPTS: | |
1435 | return tlp->threshold.preempts; | |
1436 | case LATENCY: | |
1437 | return tlp->threshold.latency; | |
1438 | case LATENCY_MIN: | |
1439 | return tlp->threshold.latency_min; | |
1440 | case LATENCY_MAX: | |
1441 | return tlp->threshold.latency_max; | |
1442 | default: | |
1443 | return 0; | |
1444 | } | |
1445 | } | |
1446 | ||
1447 | /* | |
1448 | * timer_master_scan() is the inverse of timer_longterm_scan() | |
1449 | * since it un-escalates timers to the longterm queue. | |
1450 | */ | |
1451 | static void | |
1452 | timer_master_scan(timer_longterm_t *tlp, | |
1453 | uint64_t now) | |
1454 | { | |
1455 | queue_entry_t qe; | |
1456 | timer_call_t call; | |
1457 | uint64_t threshold; | |
1458 | uint64_t deadline; | |
1459 | mpqueue_head_t *timer_master_queue; | |
1460 | ||
1461 | if (tlp->threshold.interval != TIMER_LONGTERM_NONE) | |
1462 | threshold = now + tlp->threshold.interval; | |
1463 | else | |
1464 | threshold = TIMER_LONGTERM_NONE; | |
1465 | ||
1466 | timer_master_queue = timer_queue_cpu(master_cpu); | |
1467 | timer_queue_lock_spin(timer_master_queue); | |
1468 | ||
1469 | qe = queue_first(&timer_master_queue->head); | |
1470 | while (!queue_end(&timer_master_queue->head, qe)) { | |
1471 | call = TIMER_CALL(qe); | |
fe8ab488 | 1472 | deadline = TCE(call)->deadline; |
39236c6e A |
1473 | qe = queue_next(qe); |
1474 | if ((call->flags & TIMER_CALL_LOCAL) != 0) | |
1475 | continue; | |
1476 | if (!simple_lock_try(&call->lock)) { | |
1477 | /* case (2c) lock order inversion, dequeue only */ | |
1478 | timer_call_entry_dequeue_async(call); | |
1479 | continue; | |
1480 | } | |
1481 | if (deadline > threshold) { | |
1482 | /* move from master to longterm */ | |
1483 | timer_call_entry_dequeue(call); | |
1484 | timer_call_entry_enqueue_tail(call, timer_longterm_queue); | |
1485 | if (deadline < tlp->threshold.deadline) { | |
1486 | tlp->threshold.deadline = deadline; | |
1487 | tlp->threshold.call = call; | |
1488 | } | |
1489 | } | |
1490 | simple_unlock(&call->lock); | |
1491 | } | |
1492 | timer_queue_unlock(timer_master_queue); | |
1493 | } | |
1494 | ||
1495 | static void | |
1496 | timer_sysctl_set_threshold(uint64_t value) | |
1497 | { | |
1498 | timer_longterm_t *tlp = &timer_longterm; | |
1499 | spl_t s = splclock(); | |
1500 | boolean_t threshold_increase; | |
1501 | ||
1502 | timer_queue_lock_spin(timer_longterm_queue); | |
1503 | ||
1504 | timer_call_cancel(&tlp->threshold.timer); | |
1505 | ||
1506 | /* | |
1507 | * Set the new threshold and note whther it's increasing. | |
1508 | */ | |
1509 | if (value == 0) { | |
1510 | tlp->threshold.interval = TIMER_LONGTERM_NONE; | |
1511 | threshold_increase = TRUE; | |
1512 | timer_call_cancel(&tlp->threshold.timer); | |
1513 | } else { | |
1514 | uint64_t old_interval = tlp->threshold.interval; | |
1515 | tlp->threshold.interval = value * NSEC_PER_MSEC; | |
1516 | nanoseconds_to_absolutetime(tlp->threshold.interval, | |
1517 | &tlp->threshold.interval); | |
1518 | tlp->threshold.margin = tlp->threshold.interval / 10; | |
1519 | if (old_interval == TIMER_LONGTERM_NONE) | |
1520 | threshold_increase = FALSE; | |
1521 | else | |
1522 | threshold_increase = (tlp->threshold.interval > old_interval); | |
1523 | } | |
1524 | ||
1525 | if (threshold_increase /* or removal */) { | |
1526 | /* Escalate timers from the longterm queue */ | |
1527 | timer_longterm_scan(tlp, mach_absolute_time()); | |
1528 | } else /* decrease or addition */ { | |
1529 | /* | |
1530 | * We scan the local/master queue for timers now longterm. | |
1531 | * To be strictly correct, we should scan all processor queues | |
1532 | * but timer migration results in most timers gravitating to the | |
1533 | * master processor in any case. | |
1534 | */ | |
1535 | timer_master_scan(tlp, mach_absolute_time()); | |
1536 | } | |
1537 | ||
1538 | /* Set new timer accordingly */ | |
1539 | tlp->threshold.deadline_set = tlp->threshold.deadline; | |
1540 | if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) { | |
1541 | tlp->threshold.deadline_set -= tlp->threshold.margin; | |
1542 | tlp->threshold.deadline_set -= tlp->threshold.latency; | |
1543 | timer_call_enter( | |
1544 | &tlp->threshold.timer, | |
1545 | tlp->threshold.deadline_set, | |
1546 | TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL); | |
1547 | } | |
1548 | ||
1549 | /* Reset stats */ | |
1550 | tlp->enqueues = 0; | |
1551 | tlp->dequeues = 0; | |
1552 | tlp->escalates = 0; | |
1553 | tlp->threshold.scans = 0; | |
1554 | tlp->threshold.preempts = 0; | |
1555 | tlp->threshold.latency = 0; | |
1556 | tlp->threshold.latency_min = EndOfAllTime; | |
1557 | tlp->threshold.latency_max = 0; | |
1558 | ||
1559 | timer_queue_unlock(timer_longterm_queue); | |
1560 | splx(s); | |
1561 | } | |
1562 | ||
1563 | int | |
1564 | timer_sysctl_set(int oid, uint64_t value) | |
1565 | { | |
1566 | switch (oid) { | |
1567 | case THRESHOLD: | |
1568 | timer_call_cpu( | |
1569 | master_cpu, | |
1570 | (void (*)(void *)) timer_sysctl_set_threshold, | |
1571 | (void *) value); | |
1572 | return KERN_SUCCESS; | |
1573 | default: | |
1574 | return KERN_INVALID_ARGUMENT; | |
1575 | } | |
1576 | } | |
fe8ab488 A |
1577 | |
1578 | ||
1579 | /* Select timer coalescing window based on per-task quality-of-service hints */ | |
1580 | static boolean_t tcoal_qos_adjust(thread_t t, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited) { | |
1581 | uint32_t latency_qos; | |
1582 | boolean_t adjusted = FALSE; | |
1583 | task_t ctask = t->task; | |
1584 | ||
1585 | if (ctask) { | |
1586 | latency_qos = proc_get_effective_thread_policy(t, TASK_POLICY_LATENCY_QOS); | |
1587 | ||
1588 | assert(latency_qos <= NUM_LATENCY_QOS_TIERS); | |
1589 | ||
1590 | if (latency_qos) { | |
1591 | *tshift = tcoal_prio_params.latency_qos_scale[latency_qos - 1]; | |
1592 | *tmax_abstime = tcoal_prio_params.latency_qos_abstime_max[latency_qos - 1]; | |
1593 | *pratelimited = tcoal_prio_params.latency_tier_rate_limited[latency_qos - 1]; | |
1594 | adjusted = TRUE; | |
1595 | } | |
1596 | } | |
1597 | return adjusted; | |
1598 | } | |
1599 | ||
1600 | ||
1601 | /* Adjust timer deadlines based on priority of the thread and the | |
1602 | * urgency value provided at timeout establishment. With this mechanism, | |
1603 | * timers are no longer necessarily sorted in order of soft deadline | |
1604 | * on a given timer queue, i.e. they may be differentially skewed. | |
1605 | * In the current scheme, this could lead to fewer pending timers | |
1606 | * processed than is technically possible when the HW deadline arrives. | |
1607 | */ | |
1608 | static void | |
1609 | timer_compute_leeway(thread_t cthread, int32_t urgency, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited) { | |
1610 | int16_t tpri = cthread->sched_pri; | |
1611 | if ((urgency & TIMER_CALL_USER_MASK) != 0) { | |
1612 | if (tpri >= BASEPRI_RTQUEUES || | |
1613 | urgency == TIMER_CALL_USER_CRITICAL) { | |
1614 | *tshift = tcoal_prio_params.timer_coalesce_rt_shift; | |
1615 | *tmax_abstime = tcoal_prio_params.timer_coalesce_rt_abstime_max; | |
1616 | TCOAL_PRIO_STAT(rt_tcl); | |
1617 | } else if (proc_get_effective_thread_policy(cthread, TASK_POLICY_DARWIN_BG) || | |
1618 | (urgency == TIMER_CALL_USER_BACKGROUND)) { | |
1619 | /* Determine if timer should be subjected to a lower QoS */ | |
1620 | if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) { | |
1621 | if (*tmax_abstime > tcoal_prio_params.timer_coalesce_bg_abstime_max) { | |
1622 | return; | |
1623 | } else { | |
1624 | *pratelimited = FALSE; | |
1625 | } | |
1626 | } | |
1627 | *tshift = tcoal_prio_params.timer_coalesce_bg_shift; | |
1628 | *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max; | |
1629 | TCOAL_PRIO_STAT(bg_tcl); | |
1630 | } else if (tpri >= MINPRI_KERNEL) { | |
1631 | *tshift = tcoal_prio_params.timer_coalesce_kt_shift; | |
1632 | *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max; | |
1633 | TCOAL_PRIO_STAT(kt_tcl); | |
1634 | } else if (cthread->sched_mode == TH_MODE_FIXED) { | |
1635 | *tshift = tcoal_prio_params.timer_coalesce_fp_shift; | |
1636 | *tmax_abstime = tcoal_prio_params.timer_coalesce_fp_abstime_max; | |
1637 | TCOAL_PRIO_STAT(fp_tcl); | |
1638 | } else if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) { | |
1639 | TCOAL_PRIO_STAT(qos_tcl); | |
1640 | } else if (cthread->sched_mode == TH_MODE_TIMESHARE) { | |
1641 | *tshift = tcoal_prio_params.timer_coalesce_ts_shift; | |
1642 | *tmax_abstime = tcoal_prio_params.timer_coalesce_ts_abstime_max; | |
1643 | TCOAL_PRIO_STAT(ts_tcl); | |
1644 | } else { | |
1645 | TCOAL_PRIO_STAT(nc_tcl); | |
1646 | } | |
1647 | } else if (urgency == TIMER_CALL_SYS_BACKGROUND) { | |
1648 | *tshift = tcoal_prio_params.timer_coalesce_bg_shift; | |
1649 | *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max; | |
1650 | TCOAL_PRIO_STAT(bg_tcl); | |
1651 | } else { | |
1652 | *tshift = tcoal_prio_params.timer_coalesce_kt_shift; | |
1653 | *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max; | |
1654 | TCOAL_PRIO_STAT(kt_tcl); | |
1655 | } | |
1656 | } | |
1657 | ||
1658 | ||
1659 | int timer_user_idle_level; | |
1660 | ||
1661 | uint64_t | |
1662 | timer_call_slop(uint64_t deadline, uint64_t now, uint32_t flags, thread_t cthread, boolean_t *pratelimited) | |
1663 | { | |
1664 | int32_t tcs_shift = 0; | |
1665 | uint64_t tcs_max_abstime = 0; | |
1666 | uint64_t adjval; | |
1667 | uint32_t urgency = (flags & TIMER_CALL_URGENCY_MASK); | |
1668 | ||
1669 | if (mach_timer_coalescing_enabled && | |
1670 | (deadline > now) && (urgency != TIMER_CALL_SYS_CRITICAL)) { | |
1671 | timer_compute_leeway(cthread, urgency, &tcs_shift, &tcs_max_abstime, pratelimited); | |
1672 | ||
1673 | if (tcs_shift >= 0) | |
1674 | adjval = MIN((deadline - now) >> tcs_shift, tcs_max_abstime); | |
1675 | else | |
1676 | adjval = MIN((deadline - now) << (-tcs_shift), tcs_max_abstime); | |
1677 | /* Apply adjustments derived from "user idle level" heuristic */ | |
1678 | adjval += (adjval * timer_user_idle_level) >> 7; | |
1679 | return adjval; | |
1680 | } else { | |
1681 | return 0; | |
1682 | } | |
1683 | } | |
1684 | ||
1685 | int | |
1686 | timer_get_user_idle_level(void) { | |
1687 | return timer_user_idle_level; | |
1688 | } | |
1689 | ||
1690 | kern_return_t timer_set_user_idle_level(int ilevel) { | |
1691 | boolean_t do_reeval = FALSE; | |
1692 | ||
1693 | if ((ilevel < 0) || (ilevel > 128)) | |
1694 | return KERN_INVALID_ARGUMENT; | |
1695 | ||
1696 | if (ilevel < timer_user_idle_level) { | |
1697 | do_reeval = TRUE; | |
1698 | } | |
1699 | ||
1700 | timer_user_idle_level = ilevel; | |
1701 | ||
1702 | if (do_reeval) | |
1703 | ml_timer_evaluate(); | |
1704 | ||
1705 | return KERN_SUCCESS; | |
1706 | } |