]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/i386_timer.c
xnu-2422.100.13.tar.gz
[apple/xnu.git] / osfmk / i386 / i386_timer.c
CommitLineData
39236c6e
A
1/*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * @APPLE_FREE_COPYRIGHT@
33 */
34/*
35 * File: timer.c
36 * Purpose: Routines for handling the machine independent timer.
37 */
38
39#include <mach/mach_types.h>
40
41#include <kern/timer_queue.h>
42#include <kern/timer_call.h>
43#include <kern/clock.h>
44#include <kern/thread.h>
45#include <kern/processor.h>
46#include <kern/macro_help.h>
47#include <kern/spl.h>
48#include <kern/timer_queue.h>
49#include <kern/pms.h>
50
51#include <machine/commpage.h>
52#include <machine/machine_routines.h>
53
54#include <sys/kdebug.h>
55#include <i386/cpu_data.h>
56#include <i386/cpu_topology.h>
57#include <i386/cpu_threads.h>
58
59uint32_t spurious_timers;
60
61/*
62 * Event timer interrupt.
63 *
64 * XXX a drawback of this implementation is that events serviced earlier must not set deadlines
65 * that occur before the entire chain completes.
66 *
67 * XXX a better implementation would use a set of generic callouts and iterate over them
68 */
69void
70timer_intr(int user_mode,
71 uint64_t rip)
72{
73 uint64_t abstime;
74 rtclock_timer_t *mytimer;
75 cpu_data_t *pp;
76 int64_t latency;
77 uint64_t pmdeadline;
78 boolean_t timer_processed = FALSE;
79
80 pp = current_cpu_datap();
81
82 SCHED_STATS_TIMER_POP(current_processor());
83
84 abstime = mach_absolute_time(); /* Get the time now */
85
86 /* has a pending clock timer expired? */
87 mytimer = &pp->rtclock_timer; /* Point to the event timer */
88
89 if ((timer_processed = ((mytimer->deadline <= abstime) ||
90 (abstime >= (mytimer->queue.earliest_soft_deadline))))) {
91 /*
92 * Log interrupt service latency (-ve value expected by tool)
93 * a non-PM event is expected next.
94 * The requested deadline may be earlier than when it was set
95 * - use MAX to avoid reporting bogus latencies.
96 */
97 latency = (int64_t) (abstime - MAX(mytimer->deadline,
98 mytimer->when_set));
99 /* Log zero timer latencies when opportunistically processing
100 * coalesced timers.
101 */
102 if (latency < 0) {
103 TCOAL_DEBUG(0xEEEE0000, abstime, mytimer->queue.earliest_soft_deadline, abstime - mytimer->queue.earliest_soft_deadline, 0, 0);
104 latency = 0;
105 }
106
107 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
108 DECR_TRAP_LATENCY | DBG_FUNC_NONE,
109 -latency,
110 ((user_mode != 0) ? rip : VM_KERNEL_UNSLIDE(rip)),
111 user_mode, 0, 0);
112
113 mytimer->has_expired = TRUE; /* Remember that we popped */
114 mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime);
115 mytimer->has_expired = FALSE;
116
117 /* Get the time again since we ran a bit */
118 abstime = mach_absolute_time();
119 mytimer->when_set = abstime;
120 }
121
122 /* is it time for power management state change? */
123 if ((pmdeadline = pmCPUGetDeadline(pp)) && (pmdeadline <= abstime)) {
124 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
125 DECR_PM_DEADLINE | DBG_FUNC_START,
126 0, 0, 0, 0, 0);
127 pmCPUDeadline(pp);
128 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
129 DECR_PM_DEADLINE | DBG_FUNC_END,
130 0, 0, 0, 0, 0);
131 timer_processed = TRUE;
132 }
133
134 /* schedule our next deadline */
135 x86_lcpu()->rtcDeadline = EndOfAllTime;
136 timer_resync_deadlines();
137
138 if (__improbable(timer_processed == FALSE))
139 spurious_timers++;
140}
141
142/*
143 * Set the clock deadline.
144 */
145void timer_set_deadline(uint64_t deadline)
146{
147 rtclock_timer_t *mytimer;
148 spl_t s;
149 cpu_data_t *pp;
150
151 s = splclock(); /* no interruptions */
152 pp = current_cpu_datap();
153
154 mytimer = &pp->rtclock_timer; /* Point to the timer itself */
155 mytimer->deadline = deadline; /* Set new expiration time */
156 mytimer->when_set = mach_absolute_time();
157
158 timer_resync_deadlines();
159
160 splx(s);
161}
162
163/*
164 * Re-evaluate the outstanding deadlines and select the most proximate.
165 *
166 * Should be called at splclock.
167 */
168void
169timer_resync_deadlines(void)
170{
171 uint64_t deadline = EndOfAllTime;
172 uint64_t pmdeadline;
173 rtclock_timer_t *mytimer;
174 spl_t s = splclock();
175 cpu_data_t *pp;
176 uint32_t decr;
177
178 pp = current_cpu_datap();
179 if (!pp->cpu_running)
180 /* There's really nothing to do if this processor is down */
181 return;
182
183 /*
184 * If we have a clock timer set, pick that.
185 */
186 mytimer = &pp->rtclock_timer;
187 if (!mytimer->has_expired &&
188 0 < mytimer->deadline && mytimer->deadline < EndOfAllTime)
189 deadline = mytimer->deadline;
190
191 /*
192 * If we have a power management deadline, see if that's earlier.
193 */
194 pmdeadline = pmCPUGetDeadline(pp);
195 if (0 < pmdeadline && pmdeadline < deadline)
196 deadline = pmdeadline;
197
198 /*
199 * Go and set the "pop" event.
200 */
201 decr = (uint32_t) setPop(deadline);
202
203 /* Record non-PM deadline for latency tool */
204 if (decr != 0 && deadline != pmdeadline) {
205 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
206 DECR_SET_DEADLINE | DBG_FUNC_NONE,
207 decr, 2,
208 deadline,
209 mytimer->queue.count, 0);
210 }
211 splx(s);
212}
213
214void
215timer_queue_expire_local(
216__unused void *arg)
217{
218 rtclock_timer_t *mytimer;
219 uint64_t abstime;
220 cpu_data_t *pp;
221
222 pp = current_cpu_datap();
223
224 mytimer = &pp->rtclock_timer;
225 abstime = mach_absolute_time();
226
227 mytimer->has_expired = TRUE;
228 mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime);
229 mytimer->has_expired = FALSE;
230 mytimer->when_set = mach_absolute_time();
231
232 timer_resync_deadlines();
233}
234
235void
236timer_queue_expire_rescan(
237__unused void *arg)
238{
239 rtclock_timer_t *mytimer;
240 uint64_t abstime;
241 cpu_data_t *pp;
242
243 assert(ml_get_interrupts_enabled() == FALSE);
244 pp = current_cpu_datap();
245
246 mytimer = &pp->rtclock_timer;
247 abstime = mach_absolute_time();
248
249 mytimer->has_expired = TRUE;
250 mytimer->deadline = timer_queue_expire_with_options(&mytimer->queue, abstime, TRUE);
251 mytimer->has_expired = FALSE;
252 mytimer->when_set = mach_absolute_time();
253
254 timer_resync_deadlines();
255}
256
257/* N.B.: Max leeway values assume 1GHz timebase */
258timer_coalescing_priority_params_t tcoal_prio_params =
259{
260 /* Deadline scale values for each thread attribute */
261 0, -5, 3, 3, 3,
262 /* Maximum leeway in abstime for each thread attribute */
263 0ULL, 100*NSEC_PER_MSEC, NSEC_PER_MSEC, NSEC_PER_MSEC, NSEC_PER_MSEC,
264 /* Deadline scale values for each latency QoS tier */
265 {3, 2, 1, -2, -15, -15},
266 /* Maximum leeway in abstime for each latency QoS Tier*/
267 {1*NSEC_PER_MSEC, 5*NSEC_PER_MSEC, 20*NSEC_PER_MSEC, 75*NSEC_PER_MSEC,
268 10*NSEC_PER_SEC, 10*NSEC_PER_SEC},
269 /* Signifies that the tier requires rate-limiting */
270 {FALSE, FALSE, FALSE, FALSE, TRUE, TRUE}
271};
272#define TIMER_RESORT_THRESHOLD_ABSTIME (50 * NSEC_PER_MSEC)
273
274#if TCOAL_PRIO_STATS
275int32_t nc_tcl, rt_tcl, bg_tcl, kt_tcl, fp_tcl, ts_tcl, qos_tcl;
276#define TCOAL_PRIO_STAT(x) (x++)
277#else
278#define TCOAL_PRIO_STAT(x)
279#endif
280
281/* Select timer coalescing window based on per-task quality-of-service hints */
282static boolean_t tcoal_qos_adjust(thread_t t, int32_t *tshift, uint64_t *tmax, boolean_t *pratelimited) {
283 uint32_t latency_qos;
284 boolean_t adjusted = FALSE;
285 task_t ctask = t->task;
286
287 if (ctask) {
288 latency_qos = proc_get_effective_task_policy(ctask, TASK_POLICY_LATENCY_QOS);
289
290 assert(latency_qos <= NUM_LATENCY_QOS_TIERS);
291
292 if (latency_qos) {
293 *tshift = tcoal_prio_params.latency_qos_scale[latency_qos - 1];
294 *tmax = tcoal_prio_params.latency_qos_ns_max[latency_qos - 1];
295 *pratelimited = tcoal_prio_params.latency_tier_rate_limited[latency_qos - 1];
296 adjusted = TRUE;
297 }
298 }
299 return adjusted;
300}
301
302/* Adjust timer deadlines based on priority of the thread and the
303 * urgency value provided at timeout establishment. With this mechanism,
304 * timers are no longer necessarily sorted in order of soft deadline
305 * on a given timer queue, i.e. they may be differentially skewed.
306 * In the current scheme, this could lead to fewer pending timers
307 * processed than is technically possible when the HW deadline arrives.
308 */
309static void
310timer_compute_leeway(thread_t cthread, int32_t urgency, int32_t *tshift, uint64_t *tmax, boolean_t *pratelimited) {
311 int16_t tpri = cthread->sched_pri;
312
313 if ((urgency & TIMER_CALL_USER_MASK) != 0) {
314 if (tpri >= BASEPRI_RTQUEUES ||
315 urgency == TIMER_CALL_USER_CRITICAL) {
316 *tshift = tcoal_prio_params.timer_coalesce_rt_shift;
317 *tmax = tcoal_prio_params.timer_coalesce_rt_ns_max;
318 TCOAL_PRIO_STAT(rt_tcl);
319 } else if ((urgency == TIMER_CALL_USER_BACKGROUND) ||
320 proc_get_effective_thread_policy(cthread, TASK_POLICY_DARWIN_BG)) {
321 /* Determine if timer should be subjected to a lower QoS */
322 if (tcoal_qos_adjust(cthread, tshift, tmax, pratelimited)) {
323 if (*tmax > tcoal_prio_params.timer_coalesce_bg_ns_max) {
324 return;
325 } else {
326 *pratelimited = FALSE;
327 }
328 }
329 *tshift = tcoal_prio_params.timer_coalesce_bg_shift;
330 *tmax = tcoal_prio_params.timer_coalesce_bg_ns_max;
331 TCOAL_PRIO_STAT(bg_tcl);
332 } else if (tpri >= MINPRI_KERNEL) {
333 *tshift = tcoal_prio_params.timer_coalesce_kt_shift;
334 *tmax = tcoal_prio_params.timer_coalesce_kt_ns_max;
335 TCOAL_PRIO_STAT(kt_tcl);
336 } else if (cthread->sched_mode == TH_MODE_FIXED) {
337 *tshift = tcoal_prio_params.timer_coalesce_fp_shift;
338 *tmax = tcoal_prio_params.timer_coalesce_fp_ns_max;
339 TCOAL_PRIO_STAT(fp_tcl);
340 } else if (tcoal_qos_adjust(cthread, tshift, tmax, pratelimited)) {
341 TCOAL_PRIO_STAT(qos_tcl);
342 } else if (cthread->sched_mode == TH_MODE_TIMESHARE) {
343 *tshift = tcoal_prio_params.timer_coalesce_ts_shift;
344 *tmax = tcoal_prio_params.timer_coalesce_ts_ns_max;
345 TCOAL_PRIO_STAT(ts_tcl);
346 } else {
347 TCOAL_PRIO_STAT(nc_tcl);
348 }
349 } else if (urgency == TIMER_CALL_SYS_BACKGROUND) {
350 *tshift = tcoal_prio_params.timer_coalesce_bg_shift;
351 *tmax = tcoal_prio_params.timer_coalesce_bg_ns_max;
352 TCOAL_PRIO_STAT(bg_tcl);
353 } else {
354 *tshift = tcoal_prio_params.timer_coalesce_kt_shift;
355 *tmax = tcoal_prio_params.timer_coalesce_kt_ns_max;
356 TCOAL_PRIO_STAT(kt_tcl);
357 }
358}
359
360int timer_user_idle_level;
361
362uint64_t
363timer_call_slop(uint64_t deadline, uint64_t now, uint32_t flags, thread_t cthread, boolean_t *pratelimited)
364{
365 int32_t tcs_shift = 0;
366 uint64_t tcs_ns_max = 0;
367 uint64_t adjval;
368 uint32_t urgency = (flags & TIMER_CALL_URGENCY_MASK);
369
370 if (mach_timer_coalescing_enabled &&
371 (deadline > now) && (urgency != TIMER_CALL_SYS_CRITICAL)) {
372 timer_compute_leeway(cthread, urgency, &tcs_shift, &tcs_ns_max, pratelimited);
373
374 if (tcs_shift >= 0)
375 adjval = MIN((deadline - now) >> tcs_shift, tcs_ns_max);
376 else
377 adjval = MIN((deadline - now) << (-tcs_shift), tcs_ns_max);
378 /* Apply adjustments derived from "user idle level" heuristic */
379 adjval += (adjval * timer_user_idle_level) >> 7;
380 return adjval;
381 } else {
382 return 0;
383 }
384}
385
386boolean_t
387timer_resort_threshold(uint64_t skew) {
388 if (skew >= TIMER_RESORT_THRESHOLD_ABSTIME)
389 return TRUE;
390 else
391 return FALSE;
392}
393
394int
395ml_timer_get_user_idle_level(void) {
396 return timer_user_idle_level;
397}
398
399kern_return_t ml_timer_set_user_idle_level(int ilevel) {
400 boolean_t do_reeval = FALSE;
401
402 if ((ilevel < 0) || (ilevel > 128))
403 return KERN_INVALID_ARGUMENT;
404
405 if (ilevel < timer_user_idle_level) {
406 do_reeval = TRUE;
407 }
408
409 timer_user_idle_level = ilevel;
410
411 if (do_reeval)
412 ml_timer_evaluate();
413
414 return KERN_SUCCESS;
415}
416
417/*
418 * Return the local timer queue for a running processor
419 * else return the boot processor's timer queue.
420 */
421mpqueue_head_t *
422timer_queue_assign(
423 uint64_t deadline)
424{
425 cpu_data_t *cdp = current_cpu_datap();
426 mpqueue_head_t *queue;
427
428 if (cdp->cpu_running) {
429 queue = &cdp->rtclock_timer.queue;
430
431 if (deadline < cdp->rtclock_timer.deadline)
432 timer_set_deadline(deadline);
433 }
434 else
435 queue = &cpu_datap(master_cpu)->rtclock_timer.queue;
436
437 return (queue);
438}
439
440void
441timer_queue_cancel(
442 mpqueue_head_t *queue,
443 uint64_t deadline,
444 uint64_t new_deadline)
445{
446 if (queue == &current_cpu_datap()->rtclock_timer.queue) {
447 if (deadline < new_deadline)
448 timer_set_deadline(new_deadline);
449 }
450}
451
452/*
453 * timer_queue_migrate_cpu() is called from the Power-Management kext
454 * when a logical processor goes idle (in a deep C-state) with a distant
455 * deadline so that it's timer queue can be moved to another processor.
456 * This target processor should be the least idle (most busy) --
457 * currently this is the primary processor for the calling thread's package.
458 * Locking restrictions demand that the target cpu must be the boot cpu.
459 */
460uint32_t
461timer_queue_migrate_cpu(int target_cpu)
462{
463 cpu_data_t *target_cdp = cpu_datap(target_cpu);
464 cpu_data_t *cdp = current_cpu_datap();
465 int ntimers_moved;
466
467 assert(!ml_get_interrupts_enabled());
468 assert(target_cpu != cdp->cpu_number);
469 assert(target_cpu == master_cpu);
470
471 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
472 DECR_TIMER_MIGRATE | DBG_FUNC_START,
473 target_cpu,
474 cdp->rtclock_timer.deadline, (cdp->rtclock_timer.deadline >>32),
475 0, 0);
476
477 /*
478 * Move timer requests from the local queue to the target processor's.
479 * The return value is the number of requests moved. If this is 0,
480 * it indicates that the first (i.e. earliest) timer is earlier than
481 * the earliest for the target processor. Since this would force a
482 * resync, the move of this and all later requests is aborted.
483 */
484 ntimers_moved = timer_queue_migrate(&cdp->rtclock_timer.queue,
485 &target_cdp->rtclock_timer.queue);
486
487 /*
488 * Assuming we moved stuff, clear local deadline.
489 */
490 if (ntimers_moved > 0) {
491 cdp->rtclock_timer.deadline = EndOfAllTime;
492 setPop(EndOfAllTime);
493 }
494
495 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
496 DECR_TIMER_MIGRATE | DBG_FUNC_END,
497 target_cpu, ntimers_moved, 0, 0, 0);
498
499 return ntimers_moved;
500}
501
502mpqueue_head_t *
503timer_queue_cpu(int cpu)
504{
505 return &cpu_datap(cpu)->rtclock_timer.queue;
506}
507
508void
509timer_call_cpu(int cpu, void (*fn)(void *), void *arg)
510{
511 mp_cpus_call(cpu_to_cpumask(cpu), SYNC, fn, arg);
512}
513
514void
515timer_call_nosync_cpu(int cpu, void (*fn)(void *), void *arg)
516{
517 /* XXX Needs error checking and retry */
518 mp_cpus_call(cpu_to_cpumask(cpu), NOSYNC, fn, arg);
519}
520