]>
Commit | Line | Data |
---|---|---|
39236c6e A |
1 | /* |
2 | * Copyright (c) 2000-2008 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /* | |
29 | * @OSF_COPYRIGHT@ | |
30 | */ | |
31 | /* | |
32 | * @APPLE_FREE_COPYRIGHT@ | |
33 | */ | |
34 | /* | |
35 | * File: timer.c | |
36 | * Purpose: Routines for handling the machine independent timer. | |
37 | */ | |
38 | ||
39 | #include <mach/mach_types.h> | |
40 | ||
41 | #include <kern/timer_queue.h> | |
42 | #include <kern/timer_call.h> | |
43 | #include <kern/clock.h> | |
44 | #include <kern/thread.h> | |
45 | #include <kern/processor.h> | |
46 | #include <kern/macro_help.h> | |
47 | #include <kern/spl.h> | |
48 | #include <kern/timer_queue.h> | |
49 | #include <kern/pms.h> | |
50 | ||
51 | #include <machine/commpage.h> | |
52 | #include <machine/machine_routines.h> | |
53 | ||
54 | #include <sys/kdebug.h> | |
55 | #include <i386/cpu_data.h> | |
56 | #include <i386/cpu_topology.h> | |
57 | #include <i386/cpu_threads.h> | |
58 | ||
59 | uint32_t spurious_timers; | |
60 | ||
61 | /* | |
62 | * Event timer interrupt. | |
63 | * | |
64 | * XXX a drawback of this implementation is that events serviced earlier must not set deadlines | |
65 | * that occur before the entire chain completes. | |
66 | * | |
67 | * XXX a better implementation would use a set of generic callouts and iterate over them | |
68 | */ | |
69 | void | |
70 | timer_intr(int user_mode, | |
71 | uint64_t rip) | |
72 | { | |
73 | uint64_t abstime; | |
74 | rtclock_timer_t *mytimer; | |
75 | cpu_data_t *pp; | |
76 | int64_t latency; | |
77 | uint64_t pmdeadline; | |
78 | boolean_t timer_processed = FALSE; | |
79 | ||
80 | pp = current_cpu_datap(); | |
81 | ||
82 | SCHED_STATS_TIMER_POP(current_processor()); | |
83 | ||
84 | abstime = mach_absolute_time(); /* Get the time now */ | |
85 | ||
86 | /* has a pending clock timer expired? */ | |
87 | mytimer = &pp->rtclock_timer; /* Point to the event timer */ | |
88 | ||
89 | if ((timer_processed = ((mytimer->deadline <= abstime) || | |
90 | (abstime >= (mytimer->queue.earliest_soft_deadline))))) { | |
91 | /* | |
92 | * Log interrupt service latency (-ve value expected by tool) | |
93 | * a non-PM event is expected next. | |
94 | * The requested deadline may be earlier than when it was set | |
95 | * - use MAX to avoid reporting bogus latencies. | |
96 | */ | |
97 | latency = (int64_t) (abstime - MAX(mytimer->deadline, | |
98 | mytimer->when_set)); | |
99 | /* Log zero timer latencies when opportunistically processing | |
100 | * coalesced timers. | |
101 | */ | |
102 | if (latency < 0) { | |
103 | TCOAL_DEBUG(0xEEEE0000, abstime, mytimer->queue.earliest_soft_deadline, abstime - mytimer->queue.earliest_soft_deadline, 0, 0); | |
104 | latency = 0; | |
105 | } | |
106 | ||
107 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
108 | DECR_TRAP_LATENCY | DBG_FUNC_NONE, | |
109 | -latency, | |
110 | ((user_mode != 0) ? rip : VM_KERNEL_UNSLIDE(rip)), | |
111 | user_mode, 0, 0); | |
112 | ||
113 | mytimer->has_expired = TRUE; /* Remember that we popped */ | |
114 | mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime); | |
115 | mytimer->has_expired = FALSE; | |
116 | ||
117 | /* Get the time again since we ran a bit */ | |
118 | abstime = mach_absolute_time(); | |
119 | mytimer->when_set = abstime; | |
120 | } | |
121 | ||
122 | /* is it time for power management state change? */ | |
123 | if ((pmdeadline = pmCPUGetDeadline(pp)) && (pmdeadline <= abstime)) { | |
124 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
125 | DECR_PM_DEADLINE | DBG_FUNC_START, | |
126 | 0, 0, 0, 0, 0); | |
127 | pmCPUDeadline(pp); | |
128 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
129 | DECR_PM_DEADLINE | DBG_FUNC_END, | |
130 | 0, 0, 0, 0, 0); | |
131 | timer_processed = TRUE; | |
132 | } | |
133 | ||
134 | /* schedule our next deadline */ | |
135 | x86_lcpu()->rtcDeadline = EndOfAllTime; | |
136 | timer_resync_deadlines(); | |
137 | ||
138 | if (__improbable(timer_processed == FALSE)) | |
139 | spurious_timers++; | |
140 | } | |
141 | ||
142 | /* | |
143 | * Set the clock deadline. | |
144 | */ | |
145 | void timer_set_deadline(uint64_t deadline) | |
146 | { | |
147 | rtclock_timer_t *mytimer; | |
148 | spl_t s; | |
149 | cpu_data_t *pp; | |
150 | ||
151 | s = splclock(); /* no interruptions */ | |
152 | pp = current_cpu_datap(); | |
153 | ||
154 | mytimer = &pp->rtclock_timer; /* Point to the timer itself */ | |
155 | mytimer->deadline = deadline; /* Set new expiration time */ | |
156 | mytimer->when_set = mach_absolute_time(); | |
157 | ||
158 | timer_resync_deadlines(); | |
159 | ||
160 | splx(s); | |
161 | } | |
162 | ||
163 | /* | |
164 | * Re-evaluate the outstanding deadlines and select the most proximate. | |
165 | * | |
166 | * Should be called at splclock. | |
167 | */ | |
168 | void | |
169 | timer_resync_deadlines(void) | |
170 | { | |
171 | uint64_t deadline = EndOfAllTime; | |
172 | uint64_t pmdeadline; | |
173 | rtclock_timer_t *mytimer; | |
174 | spl_t s = splclock(); | |
175 | cpu_data_t *pp; | |
176 | uint32_t decr; | |
177 | ||
178 | pp = current_cpu_datap(); | |
179 | if (!pp->cpu_running) | |
180 | /* There's really nothing to do if this processor is down */ | |
181 | return; | |
182 | ||
183 | /* | |
184 | * If we have a clock timer set, pick that. | |
185 | */ | |
186 | mytimer = &pp->rtclock_timer; | |
187 | if (!mytimer->has_expired && | |
188 | 0 < mytimer->deadline && mytimer->deadline < EndOfAllTime) | |
189 | deadline = mytimer->deadline; | |
190 | ||
191 | /* | |
192 | * If we have a power management deadline, see if that's earlier. | |
193 | */ | |
194 | pmdeadline = pmCPUGetDeadline(pp); | |
195 | if (0 < pmdeadline && pmdeadline < deadline) | |
196 | deadline = pmdeadline; | |
197 | ||
198 | /* | |
199 | * Go and set the "pop" event. | |
200 | */ | |
201 | decr = (uint32_t) setPop(deadline); | |
202 | ||
203 | /* Record non-PM deadline for latency tool */ | |
204 | if (decr != 0 && deadline != pmdeadline) { | |
205 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
206 | DECR_SET_DEADLINE | DBG_FUNC_NONE, | |
207 | decr, 2, | |
208 | deadline, | |
209 | mytimer->queue.count, 0); | |
210 | } | |
211 | splx(s); | |
212 | } | |
213 | ||
214 | void | |
215 | timer_queue_expire_local( | |
216 | __unused void *arg) | |
217 | { | |
218 | rtclock_timer_t *mytimer; | |
219 | uint64_t abstime; | |
220 | cpu_data_t *pp; | |
221 | ||
222 | pp = current_cpu_datap(); | |
223 | ||
224 | mytimer = &pp->rtclock_timer; | |
225 | abstime = mach_absolute_time(); | |
226 | ||
227 | mytimer->has_expired = TRUE; | |
228 | mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime); | |
229 | mytimer->has_expired = FALSE; | |
230 | mytimer->when_set = mach_absolute_time(); | |
231 | ||
232 | timer_resync_deadlines(); | |
233 | } | |
234 | ||
235 | void | |
236 | timer_queue_expire_rescan( | |
237 | __unused void *arg) | |
238 | { | |
239 | rtclock_timer_t *mytimer; | |
240 | uint64_t abstime; | |
241 | cpu_data_t *pp; | |
242 | ||
243 | assert(ml_get_interrupts_enabled() == FALSE); | |
244 | pp = current_cpu_datap(); | |
245 | ||
246 | mytimer = &pp->rtclock_timer; | |
247 | abstime = mach_absolute_time(); | |
248 | ||
249 | mytimer->has_expired = TRUE; | |
250 | mytimer->deadline = timer_queue_expire_with_options(&mytimer->queue, abstime, TRUE); | |
251 | mytimer->has_expired = FALSE; | |
252 | mytimer->when_set = mach_absolute_time(); | |
253 | ||
254 | timer_resync_deadlines(); | |
255 | } | |
256 | ||
257 | /* N.B.: Max leeway values assume 1GHz timebase */ | |
258 | timer_coalescing_priority_params_t tcoal_prio_params = | |
259 | { | |
260 | /* Deadline scale values for each thread attribute */ | |
261 | 0, -5, 3, 3, 3, | |
262 | /* Maximum leeway in abstime for each thread attribute */ | |
263 | 0ULL, 100*NSEC_PER_MSEC, NSEC_PER_MSEC, NSEC_PER_MSEC, NSEC_PER_MSEC, | |
264 | /* Deadline scale values for each latency QoS tier */ | |
265 | {3, 2, 1, -2, -15, -15}, | |
266 | /* Maximum leeway in abstime for each latency QoS Tier*/ | |
267 | {1*NSEC_PER_MSEC, 5*NSEC_PER_MSEC, 20*NSEC_PER_MSEC, 75*NSEC_PER_MSEC, | |
268 | 10*NSEC_PER_SEC, 10*NSEC_PER_SEC}, | |
269 | /* Signifies that the tier requires rate-limiting */ | |
270 | {FALSE, FALSE, FALSE, FALSE, TRUE, TRUE} | |
271 | }; | |
272 | #define TIMER_RESORT_THRESHOLD_ABSTIME (50 * NSEC_PER_MSEC) | |
273 | ||
274 | #if TCOAL_PRIO_STATS | |
275 | int32_t nc_tcl, rt_tcl, bg_tcl, kt_tcl, fp_tcl, ts_tcl, qos_tcl; | |
276 | #define TCOAL_PRIO_STAT(x) (x++) | |
277 | #else | |
278 | #define TCOAL_PRIO_STAT(x) | |
279 | #endif | |
280 | ||
281 | /* Select timer coalescing window based on per-task quality-of-service hints */ | |
282 | static boolean_t tcoal_qos_adjust(thread_t t, int32_t *tshift, uint64_t *tmax, boolean_t *pratelimited) { | |
283 | uint32_t latency_qos; | |
284 | boolean_t adjusted = FALSE; | |
285 | task_t ctask = t->task; | |
286 | ||
287 | if (ctask) { | |
288 | latency_qos = proc_get_effective_task_policy(ctask, TASK_POLICY_LATENCY_QOS); | |
289 | ||
290 | assert(latency_qos <= NUM_LATENCY_QOS_TIERS); | |
291 | ||
292 | if (latency_qos) { | |
293 | *tshift = tcoal_prio_params.latency_qos_scale[latency_qos - 1]; | |
294 | *tmax = tcoal_prio_params.latency_qos_ns_max[latency_qos - 1]; | |
295 | *pratelimited = tcoal_prio_params.latency_tier_rate_limited[latency_qos - 1]; | |
296 | adjusted = TRUE; | |
297 | } | |
298 | } | |
299 | return adjusted; | |
300 | } | |
301 | ||
302 | /* Adjust timer deadlines based on priority of the thread and the | |
303 | * urgency value provided at timeout establishment. With this mechanism, | |
304 | * timers are no longer necessarily sorted in order of soft deadline | |
305 | * on a given timer queue, i.e. they may be differentially skewed. | |
306 | * In the current scheme, this could lead to fewer pending timers | |
307 | * processed than is technically possible when the HW deadline arrives. | |
308 | */ | |
309 | static void | |
310 | timer_compute_leeway(thread_t cthread, int32_t urgency, int32_t *tshift, uint64_t *tmax, boolean_t *pratelimited) { | |
311 | int16_t tpri = cthread->sched_pri; | |
312 | ||
313 | if ((urgency & TIMER_CALL_USER_MASK) != 0) { | |
314 | if (tpri >= BASEPRI_RTQUEUES || | |
315 | urgency == TIMER_CALL_USER_CRITICAL) { | |
316 | *tshift = tcoal_prio_params.timer_coalesce_rt_shift; | |
317 | *tmax = tcoal_prio_params.timer_coalesce_rt_ns_max; | |
318 | TCOAL_PRIO_STAT(rt_tcl); | |
319 | } else if ((urgency == TIMER_CALL_USER_BACKGROUND) || | |
320 | proc_get_effective_thread_policy(cthread, TASK_POLICY_DARWIN_BG)) { | |
321 | /* Determine if timer should be subjected to a lower QoS */ | |
322 | if (tcoal_qos_adjust(cthread, tshift, tmax, pratelimited)) { | |
323 | if (*tmax > tcoal_prio_params.timer_coalesce_bg_ns_max) { | |
324 | return; | |
325 | } else { | |
326 | *pratelimited = FALSE; | |
327 | } | |
328 | } | |
329 | *tshift = tcoal_prio_params.timer_coalesce_bg_shift; | |
330 | *tmax = tcoal_prio_params.timer_coalesce_bg_ns_max; | |
331 | TCOAL_PRIO_STAT(bg_tcl); | |
332 | } else if (tpri >= MINPRI_KERNEL) { | |
333 | *tshift = tcoal_prio_params.timer_coalesce_kt_shift; | |
334 | *tmax = tcoal_prio_params.timer_coalesce_kt_ns_max; | |
335 | TCOAL_PRIO_STAT(kt_tcl); | |
336 | } else if (cthread->sched_mode == TH_MODE_FIXED) { | |
337 | *tshift = tcoal_prio_params.timer_coalesce_fp_shift; | |
338 | *tmax = tcoal_prio_params.timer_coalesce_fp_ns_max; | |
339 | TCOAL_PRIO_STAT(fp_tcl); | |
340 | } else if (tcoal_qos_adjust(cthread, tshift, tmax, pratelimited)) { | |
341 | TCOAL_PRIO_STAT(qos_tcl); | |
342 | } else if (cthread->sched_mode == TH_MODE_TIMESHARE) { | |
343 | *tshift = tcoal_prio_params.timer_coalesce_ts_shift; | |
344 | *tmax = tcoal_prio_params.timer_coalesce_ts_ns_max; | |
345 | TCOAL_PRIO_STAT(ts_tcl); | |
346 | } else { | |
347 | TCOAL_PRIO_STAT(nc_tcl); | |
348 | } | |
349 | } else if (urgency == TIMER_CALL_SYS_BACKGROUND) { | |
350 | *tshift = tcoal_prio_params.timer_coalesce_bg_shift; | |
351 | *tmax = tcoal_prio_params.timer_coalesce_bg_ns_max; | |
352 | TCOAL_PRIO_STAT(bg_tcl); | |
353 | } else { | |
354 | *tshift = tcoal_prio_params.timer_coalesce_kt_shift; | |
355 | *tmax = tcoal_prio_params.timer_coalesce_kt_ns_max; | |
356 | TCOAL_PRIO_STAT(kt_tcl); | |
357 | } | |
358 | } | |
359 | ||
360 | int timer_user_idle_level; | |
361 | ||
362 | uint64_t | |
363 | timer_call_slop(uint64_t deadline, uint64_t now, uint32_t flags, thread_t cthread, boolean_t *pratelimited) | |
364 | { | |
365 | int32_t tcs_shift = 0; | |
366 | uint64_t tcs_ns_max = 0; | |
367 | uint64_t adjval; | |
368 | uint32_t urgency = (flags & TIMER_CALL_URGENCY_MASK); | |
369 | ||
370 | if (mach_timer_coalescing_enabled && | |
371 | (deadline > now) && (urgency != TIMER_CALL_SYS_CRITICAL)) { | |
372 | timer_compute_leeway(cthread, urgency, &tcs_shift, &tcs_ns_max, pratelimited); | |
373 | ||
374 | if (tcs_shift >= 0) | |
375 | adjval = MIN((deadline - now) >> tcs_shift, tcs_ns_max); | |
376 | else | |
377 | adjval = MIN((deadline - now) << (-tcs_shift), tcs_ns_max); | |
378 | /* Apply adjustments derived from "user idle level" heuristic */ | |
379 | adjval += (adjval * timer_user_idle_level) >> 7; | |
380 | return adjval; | |
381 | } else { | |
382 | return 0; | |
383 | } | |
384 | } | |
385 | ||
386 | boolean_t | |
387 | timer_resort_threshold(uint64_t skew) { | |
388 | if (skew >= TIMER_RESORT_THRESHOLD_ABSTIME) | |
389 | return TRUE; | |
390 | else | |
391 | return FALSE; | |
392 | } | |
393 | ||
394 | int | |
395 | ml_timer_get_user_idle_level(void) { | |
396 | return timer_user_idle_level; | |
397 | } | |
398 | ||
399 | kern_return_t ml_timer_set_user_idle_level(int ilevel) { | |
400 | boolean_t do_reeval = FALSE; | |
401 | ||
402 | if ((ilevel < 0) || (ilevel > 128)) | |
403 | return KERN_INVALID_ARGUMENT; | |
404 | ||
405 | if (ilevel < timer_user_idle_level) { | |
406 | do_reeval = TRUE; | |
407 | } | |
408 | ||
409 | timer_user_idle_level = ilevel; | |
410 | ||
411 | if (do_reeval) | |
412 | ml_timer_evaluate(); | |
413 | ||
414 | return KERN_SUCCESS; | |
415 | } | |
416 | ||
417 | /* | |
418 | * Return the local timer queue for a running processor | |
419 | * else return the boot processor's timer queue. | |
420 | */ | |
421 | mpqueue_head_t * | |
422 | timer_queue_assign( | |
423 | uint64_t deadline) | |
424 | { | |
425 | cpu_data_t *cdp = current_cpu_datap(); | |
426 | mpqueue_head_t *queue; | |
427 | ||
428 | if (cdp->cpu_running) { | |
429 | queue = &cdp->rtclock_timer.queue; | |
430 | ||
431 | if (deadline < cdp->rtclock_timer.deadline) | |
432 | timer_set_deadline(deadline); | |
433 | } | |
434 | else | |
435 | queue = &cpu_datap(master_cpu)->rtclock_timer.queue; | |
436 | ||
437 | return (queue); | |
438 | } | |
439 | ||
440 | void | |
441 | timer_queue_cancel( | |
442 | mpqueue_head_t *queue, | |
443 | uint64_t deadline, | |
444 | uint64_t new_deadline) | |
445 | { | |
446 | if (queue == ¤t_cpu_datap()->rtclock_timer.queue) { | |
447 | if (deadline < new_deadline) | |
448 | timer_set_deadline(new_deadline); | |
449 | } | |
450 | } | |
451 | ||
452 | /* | |
453 | * timer_queue_migrate_cpu() is called from the Power-Management kext | |
454 | * when a logical processor goes idle (in a deep C-state) with a distant | |
455 | * deadline so that it's timer queue can be moved to another processor. | |
456 | * This target processor should be the least idle (most busy) -- | |
457 | * currently this is the primary processor for the calling thread's package. | |
458 | * Locking restrictions demand that the target cpu must be the boot cpu. | |
459 | */ | |
460 | uint32_t | |
461 | timer_queue_migrate_cpu(int target_cpu) | |
462 | { | |
463 | cpu_data_t *target_cdp = cpu_datap(target_cpu); | |
464 | cpu_data_t *cdp = current_cpu_datap(); | |
465 | int ntimers_moved; | |
466 | ||
467 | assert(!ml_get_interrupts_enabled()); | |
468 | assert(target_cpu != cdp->cpu_number); | |
469 | assert(target_cpu == master_cpu); | |
470 | ||
471 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
472 | DECR_TIMER_MIGRATE | DBG_FUNC_START, | |
473 | target_cpu, | |
474 | cdp->rtclock_timer.deadline, (cdp->rtclock_timer.deadline >>32), | |
475 | 0, 0); | |
476 | ||
477 | /* | |
478 | * Move timer requests from the local queue to the target processor's. | |
479 | * The return value is the number of requests moved. If this is 0, | |
480 | * it indicates that the first (i.e. earliest) timer is earlier than | |
481 | * the earliest for the target processor. Since this would force a | |
482 | * resync, the move of this and all later requests is aborted. | |
483 | */ | |
484 | ntimers_moved = timer_queue_migrate(&cdp->rtclock_timer.queue, | |
485 | &target_cdp->rtclock_timer.queue); | |
486 | ||
487 | /* | |
488 | * Assuming we moved stuff, clear local deadline. | |
489 | */ | |
490 | if (ntimers_moved > 0) { | |
491 | cdp->rtclock_timer.deadline = EndOfAllTime; | |
492 | setPop(EndOfAllTime); | |
493 | } | |
494 | ||
495 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
496 | DECR_TIMER_MIGRATE | DBG_FUNC_END, | |
497 | target_cpu, ntimers_moved, 0, 0, 0); | |
498 | ||
499 | return ntimers_moved; | |
500 | } | |
501 | ||
502 | mpqueue_head_t * | |
503 | timer_queue_cpu(int cpu) | |
504 | { | |
505 | return &cpu_datap(cpu)->rtclock_timer.queue; | |
506 | } | |
507 | ||
508 | void | |
509 | timer_call_cpu(int cpu, void (*fn)(void *), void *arg) | |
510 | { | |
511 | mp_cpus_call(cpu_to_cpumask(cpu), SYNC, fn, arg); | |
512 | } | |
513 | ||
514 | void | |
515 | timer_call_nosync_cpu(int cpu, void (*fn)(void *), void *arg) | |
516 | { | |
517 | /* XXX Needs error checking and retry */ | |
518 | mp_cpus_call(cpu_to_cpumask(cpu), NOSYNC, fn, arg); | |
519 | } | |
520 |