]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/pmCPU.c
a4b8c62e61a7e39bc6734d63341a9e66ea49d665
[apple/xnu.git] / osfmk / i386 / pmCPU.c
1 /*
2 * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * CPU-specific power management support.
31 *
32 * Implements the "wrappers" to the KEXT.
33 */
34 #include <i386/asm.h>
35 #include <i386/machine_cpu.h>
36 #include <i386/mp.h>
37 #include <i386/machine_routines.h>
38 #include <i386/proc_reg.h>
39 #include <i386/pmap.h>
40 #include <i386/misc_protos.h>
41 #include <kern/machine.h>
42 #include <kern/pms.h>
43 #include <kern/processor.h>
44 #include <kern/timer_queue.h>
45 #include <i386/cpu_threads.h>
46 #include <i386/pmCPU.h>
47 #include <i386/cpuid.h>
48 #include <i386/rtclock_protos.h>
49 #include <kern/sched_prim.h>
50 #include <i386/lapic.h>
51 #include <i386/pal_routines.h>
52 #include <sys/kdebug.h>
53 #include <i386/tsc.h>
54
55 extern int disableConsoleOutput;
56
57 #define DELAY_UNSET 0xFFFFFFFFFFFFFFFFULL
58
59 uint64_t cpu_itime_bins[CPU_ITIME_BINS] = {16* NSEC_PER_USEC, 32* NSEC_PER_USEC, 64* NSEC_PER_USEC, 128* NSEC_PER_USEC, 256* NSEC_PER_USEC, 512* NSEC_PER_USEC, 1024* NSEC_PER_USEC, 2048* NSEC_PER_USEC, 4096* NSEC_PER_USEC, 8192* NSEC_PER_USEC, 16384* NSEC_PER_USEC, 32768* NSEC_PER_USEC};
60 uint64_t *cpu_rtime_bins = &cpu_itime_bins[0];
61
62 /*
63 * The following is set when the KEXT loads and initializes.
64 */
65 pmDispatch_t *pmDispatch = NULL;
66
67 uint32_t pmInitDone = 0;
68 static boolean_t earlyTopology = FALSE;
69 static uint64_t earlyMaxBusDelay = DELAY_UNSET;
70 static uint64_t earlyMaxIntDelay = DELAY_UNSET;
71
72 /*
73 * Initialize the Cstate change code.
74 */
75 void
76 power_management_init(void)
77 {
78 if (pmDispatch != NULL && pmDispatch->cstateInit != NULL)
79 (*pmDispatch->cstateInit)();
80 }
81
82 static inline void machine_classify_interval(uint64_t interval, uint64_t *bins, uint64_t *binvals, uint32_t nbins) {
83 uint32_t i;
84 for (i = 0; i < nbins; i++) {
85 if (interval < binvals[i]) {
86 bins[i]++;
87 break;
88 }
89 }
90 }
91
92 uint64_t idle_pending_timers_processed;
93 uint32_t idle_entry_timer_processing_hdeadline_threshold = 5000000;
94
95 /*
96 * Called when the CPU is idle. It calls into the power management kext
97 * to determine the best way to idle the CPU.
98 */
99 void
100 machine_idle(void)
101 {
102 cpu_data_t *my_cpu = current_cpu_datap();
103 __unused uint32_t cnum = my_cpu->cpu_number;
104 uint64_t ctime, rtime, itime;
105 #if CST_DEMOTION_DEBUG
106 processor_t cproc = my_cpu->cpu_processor;
107 uint64_t cwakeups = PROCESSOR_DATA(cproc, wakeups_issued_total);
108 #endif /* CST_DEMOTION_DEBUG */
109 uint64_t esdeadline, ehdeadline;
110 boolean_t do_process_pending_timers = FALSE;
111
112 ctime = mach_absolute_time();
113 esdeadline = my_cpu->rtclock_timer.queue.earliest_soft_deadline;
114 ehdeadline = my_cpu->rtclock_timer.deadline;
115 /* Determine if pending timers exist */
116 if ((ctime >= esdeadline) && (ctime < ehdeadline) &&
117 ((ehdeadline - ctime) < idle_entry_timer_processing_hdeadline_threshold)) {
118 idle_pending_timers_processed++;
119 do_process_pending_timers = TRUE;
120 goto machine_idle_exit;
121 } else {
122 TCOAL_DEBUG(0xCCCC0000, ctime, my_cpu->rtclock_timer.queue.earliest_soft_deadline, my_cpu->rtclock_timer.deadline, idle_pending_timers_processed, 0);
123 }
124
125 my_cpu->lcpu.state = LCPU_IDLE;
126 DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
127 MARK_CPU_IDLE(cnum);
128
129 rtime = ctime - my_cpu->cpu_ixtime;
130
131 my_cpu->cpu_rtime_total += rtime;
132 machine_classify_interval(rtime, &my_cpu->cpu_rtimes[0], &cpu_rtime_bins[0], CPU_RTIME_BINS);
133 #if CST_DEMOTION_DEBUG
134 uint32_t cl = 0, ch = 0;
135 uint64_t c3res, c6res, c7res;
136 rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
137 c3res = ((uint64_t)ch << 32) | cl;
138 rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
139 c6res = ((uint64_t)ch << 32) | cl;
140 rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
141 c7res = ((uint64_t)ch << 32) | cl;
142 #endif
143
144 if (pmInitDone) {
145 /*
146 * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay()
147 * were called prior to the CPU PM kext being registered. We do
148 * this here since we know at this point the values will be first
149 * used since idle is where the decisions using these values is made.
150 */
151 if (earlyMaxBusDelay != DELAY_UNSET)
152 ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF));
153 if (earlyMaxIntDelay != DELAY_UNSET)
154 ml_set_maxintdelay(earlyMaxIntDelay);
155 }
156
157 if (pmInitDone
158 && pmDispatch != NULL
159 && pmDispatch->MachineIdle != NULL)
160 (*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL);
161 else {
162 /*
163 * If no power management, re-enable interrupts and halt.
164 * This will keep the CPU from spinning through the scheduler
165 * and will allow at least some minimal power savings (but it
166 * cause problems in some MP configurations w.r.t. the APIC
167 * stopping during a GV3 transition).
168 */
169 pal_hlt();
170 /* Once woken, re-disable interrupts. */
171 pal_cli();
172 }
173
174 /*
175 * Mark the CPU as running again.
176 */
177 MARK_CPU_ACTIVE(cnum);
178 DBGLOG(cpu_handle, cnum, MP_UNIDLE);
179 my_cpu->lcpu.state = LCPU_RUN;
180 uint64_t ixtime = my_cpu->cpu_ixtime = mach_absolute_time();
181 itime = ixtime - ctime;
182 my_cpu->cpu_idle_exits++;
183 my_cpu->cpu_itime_total += itime;
184 machine_classify_interval(itime, &my_cpu->cpu_itimes[0], &cpu_itime_bins[0], CPU_ITIME_BINS);
185 #if CST_DEMOTION_DEBUG
186 cl = ch = 0;
187 rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
188 c3res = (((uint64_t)ch << 32) | cl) - c3res;
189 rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
190 c6res = (((uint64_t)ch << 32) | cl) - c6res;
191 rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
192 c7res = (((uint64_t)ch << 32) | cl) - c7res;
193
194 uint64_t ndelta = itime - tmrCvt(c3res + c6res + c7res, tscFCvtt2n);
195 KERNEL_DEBUG_CONSTANT(0xcead0000, ndelta, itime, c7res, c6res, c3res);
196 if ((itime > 1000000) && (ndelta > 250000))
197 KERNEL_DEBUG_CONSTANT(0xceae0000, ndelta, itime, c7res, c6res, c3res);
198 #endif
199
200 machine_idle_exit:
201 /*
202 * Re-enable interrupts.
203 */
204
205 pal_sti();
206
207 if (do_process_pending_timers) {
208 TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_START, ctime, esdeadline, ehdeadline, idle_pending_timers_processed, 0);
209
210 /* Adjust to reflect that this isn't truly a package idle exit */
211 __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
212 lapic_timer_swi(); /* Trigger software timer interrupt */
213 __sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);
214
215 TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_END, ctime, esdeadline, idle_pending_timers_processed, 0, 0);
216 }
217 #if CST_DEMOTION_DEBUG
218 uint64_t nwakeups = PROCESSOR_DATA(cproc, wakeups_issued_total);
219
220 if ((nwakeups == cwakeups) && (topoParms.nLThreadsPerPackage == my_cpu->lcpu.package->num_idle)) {
221 KERNEL_DEBUG_CONSTANT(0xceaa0000, cwakeups, 0, 0, 0, 0);
222 }
223 #endif
224 }
225
226 /*
227 * Called when the CPU is to be halted. It will choose the best C-State
228 * to be in.
229 */
230 void
231 pmCPUHalt(uint32_t reason)
232 {
233 cpu_data_t *cpup = current_cpu_datap();
234
235 switch (reason) {
236 case PM_HALT_DEBUG:
237 cpup->lcpu.state = LCPU_PAUSE;
238 pal_stop_cpu(FALSE);
239 break;
240
241 case PM_HALT_PANIC:
242 cpup->lcpu.state = LCPU_PAUSE;
243 pal_stop_cpu(TRUE);
244 break;
245
246 case PM_HALT_NORMAL:
247 case PM_HALT_SLEEP:
248 default:
249 pal_cli();
250
251 if (pmInitDone
252 && pmDispatch != NULL
253 && pmDispatch->pmCPUHalt != NULL) {
254 /*
255 * Halt the CPU (and put it in a low power state.
256 */
257 (*pmDispatch->pmCPUHalt)();
258
259 /*
260 * We've exited halt, so get the CPU schedulable again.
261 * - by calling the fast init routine for a slave, or
262 * - by returning if we're the master processor.
263 */
264 if (cpup->cpu_number != master_cpu) {
265 i386_init_slave_fast();
266 panic("init_slave_fast returned");
267 }
268 } else
269 {
270 /*
271 * If no power managment and a processor is taken off-line,
272 * then invalidate the cache and halt it (it will not be able
273 * to be brought back on-line without resetting the CPU).
274 */
275 __asm__ volatile ("wbinvd");
276 cpup->lcpu.state = LCPU_HALT;
277 pal_stop_cpu(FALSE);
278
279 panic("back from Halt");
280 }
281
282 break;
283 }
284 }
285
286 void
287 pmMarkAllCPUsOff(void)
288 {
289 if (pmInitDone
290 && pmDispatch != NULL
291 && pmDispatch->markAllCPUsOff != NULL)
292 (*pmDispatch->markAllCPUsOff)();
293 }
294
295 static void
296 pmInitComplete(void)
297 {
298 if (earlyTopology
299 && pmDispatch != NULL
300 && pmDispatch->pmCPUStateInit != NULL) {
301 (*pmDispatch->pmCPUStateInit)();
302 earlyTopology = FALSE;
303 }
304 pmInitDone = 1;
305 }
306
307 x86_lcpu_t *
308 pmGetLogicalCPU(int cpu)
309 {
310 return(cpu_to_lcpu(cpu));
311 }
312
313 x86_lcpu_t *
314 pmGetMyLogicalCPU(void)
315 {
316 cpu_data_t *cpup = current_cpu_datap();
317
318 return(&cpup->lcpu);
319 }
320
321 static x86_core_t *
322 pmGetCore(int cpu)
323 {
324 return(cpu_to_core(cpu));
325 }
326
327 static x86_core_t *
328 pmGetMyCore(void)
329 {
330 cpu_data_t *cpup = current_cpu_datap();
331
332 return(cpup->lcpu.core);
333 }
334
335 static x86_die_t *
336 pmGetDie(int cpu)
337 {
338 return(cpu_to_die(cpu));
339 }
340
341 static x86_die_t *
342 pmGetMyDie(void)
343 {
344 cpu_data_t *cpup = current_cpu_datap();
345
346 return(cpup->lcpu.die);
347 }
348
349 static x86_pkg_t *
350 pmGetPackage(int cpu)
351 {
352 return(cpu_to_package(cpu));
353 }
354
355 static x86_pkg_t *
356 pmGetMyPackage(void)
357 {
358 cpu_data_t *cpup = current_cpu_datap();
359
360 return(cpup->lcpu.package);
361 }
362
363 static void
364 pmLockCPUTopology(int lock)
365 {
366 if (lock) {
367 simple_lock(&x86_topo_lock);
368 } else {
369 simple_unlock(&x86_topo_lock);
370 }
371 }
372
373 /*
374 * Called to get the next deadline that has been set by the
375 * power management code.
376 * Note: a return of 0 from AICPM and this routine signifies
377 * that no deadline is set.
378 */
379 uint64_t
380 pmCPUGetDeadline(cpu_data_t *cpu)
381 {
382 uint64_t deadline = 0;
383
384 if (pmInitDone
385 && pmDispatch != NULL
386 && pmDispatch->GetDeadline != NULL)
387 deadline = (*pmDispatch->GetDeadline)(&cpu->lcpu);
388
389 return(deadline);
390 }
391
392 /*
393 * Called to determine if the supplied deadline or the power management
394 * deadline is sooner. Returns which ever one is first.
395 */
396
397 uint64_t
398 pmCPUSetDeadline(cpu_data_t *cpu, uint64_t deadline)
399 {
400 if (pmInitDone
401 && pmDispatch != NULL
402 && pmDispatch->SetDeadline != NULL)
403 deadline = (*pmDispatch->SetDeadline)(&cpu->lcpu, deadline);
404
405 return(deadline);
406 }
407
408 /*
409 * Called when a power management deadline expires.
410 */
411 void
412 pmCPUDeadline(cpu_data_t *cpu)
413 {
414 if (pmInitDone
415 && pmDispatch != NULL
416 && pmDispatch->Deadline != NULL)
417 (*pmDispatch->Deadline)(&cpu->lcpu);
418 }
419
420 /*
421 * Called to get a CPU out of idle.
422 */
423 boolean_t
424 pmCPUExitIdle(cpu_data_t *cpu)
425 {
426 boolean_t do_ipi;
427
428 if (pmInitDone
429 && pmDispatch != NULL
430 && pmDispatch->exitIdle != NULL)
431 do_ipi = (*pmDispatch->exitIdle)(&cpu->lcpu);
432 else
433 do_ipi = TRUE;
434
435 return(do_ipi);
436 }
437
438 kern_return_t
439 pmCPUExitHalt(int cpu)
440 {
441 kern_return_t rc = KERN_INVALID_ARGUMENT;
442
443 if (pmInitDone
444 && pmDispatch != NULL
445 && pmDispatch->exitHalt != NULL)
446 rc = pmDispatch->exitHalt(cpu_to_lcpu(cpu));
447
448 return(rc);
449 }
450
451 kern_return_t
452 pmCPUExitHaltToOff(int cpu)
453 {
454 kern_return_t rc = KERN_SUCCESS;
455
456 if (pmInitDone
457 && pmDispatch != NULL
458 && pmDispatch->exitHaltToOff != NULL)
459 rc = pmDispatch->exitHaltToOff(cpu_to_lcpu(cpu));
460
461 return(rc);
462 }
463
464 /*
465 * Called to initialize the power management structures for the CPUs.
466 */
467 void
468 pmCPUStateInit(void)
469 {
470 if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL)
471 (*pmDispatch->pmCPUStateInit)();
472 else
473 earlyTopology = TRUE;
474 }
475
476 /*
477 * Called when a CPU is being restarted after being powered off (as in S3).
478 */
479 void
480 pmCPUMarkRunning(cpu_data_t *cpu)
481 {
482 cpu_data_t *cpup = current_cpu_datap();
483
484 if (pmInitDone
485 && pmDispatch != NULL
486 && pmDispatch->markCPURunning != NULL)
487 (*pmDispatch->markCPURunning)(&cpu->lcpu);
488 else
489 cpup->lcpu.state = LCPU_RUN;
490 }
491
492 /*
493 * Called to get/set CPU power management state.
494 */
495 int
496 pmCPUControl(uint32_t cmd, void *datap)
497 {
498 int rc = -1;
499
500 if (pmDispatch != NULL
501 && pmDispatch->pmCPUControl != NULL)
502 rc = (*pmDispatch->pmCPUControl)(cmd, datap);
503
504 return(rc);
505 }
506
507 /*
508 * Called to save the timer state used by power management prior
509 * to "sleeping".
510 */
511 void
512 pmTimerSave(void)
513 {
514 if (pmDispatch != NULL
515 && pmDispatch->pmTimerStateSave != NULL)
516 (*pmDispatch->pmTimerStateSave)();
517 }
518
519 /*
520 * Called to restore the timer state used by power management after
521 * waking from "sleep".
522 */
523 void
524 pmTimerRestore(void)
525 {
526 if (pmDispatch != NULL
527 && pmDispatch->pmTimerStateRestore != NULL)
528 (*pmDispatch->pmTimerStateRestore)();
529 }
530
531 /*
532 * Set the worst-case time for the C4 to C2 transition.
533 * No longer does anything.
534 */
535 void
536 ml_set_maxsnoop(__unused uint32_t maxdelay)
537 {
538 }
539
540
541 /*
542 * Get the worst-case time for the C4 to C2 transition. Returns nanoseconds.
543 */
544 unsigned
545 ml_get_maxsnoop(void)
546 {
547 uint64_t max_snoop = 0;
548
549 if (pmInitDone
550 && pmDispatch != NULL
551 && pmDispatch->getMaxSnoop != NULL)
552 max_snoop = pmDispatch->getMaxSnoop();
553
554 return((unsigned)(max_snoop & 0xffffffff));
555 }
556
557
558 uint32_t
559 ml_get_maxbusdelay(void)
560 {
561 uint64_t max_delay = 0;
562
563 if (pmInitDone
564 && pmDispatch != NULL
565 && pmDispatch->getMaxBusDelay != NULL)
566 max_delay = pmDispatch->getMaxBusDelay();
567
568 return((uint32_t)(max_delay & 0xffffffff));
569 }
570
571 /*
572 * Set the maximum delay time allowed for snoop on the bus.
573 *
574 * Note that this value will be compared to the amount of time that it takes
575 * to transition from a non-snooping power state (C4) to a snooping state (C2).
576 * If maxBusDelay is less than C4C2SnoopDelay,
577 * we will not enter the lowest power state.
578 */
579 void
580 ml_set_maxbusdelay(uint32_t mdelay)
581 {
582 uint64_t maxdelay = mdelay;
583
584 if (pmDispatch != NULL
585 && pmDispatch->setMaxBusDelay != NULL) {
586 earlyMaxBusDelay = DELAY_UNSET;
587 pmDispatch->setMaxBusDelay(maxdelay);
588 } else
589 earlyMaxBusDelay = maxdelay;
590 }
591
592 uint64_t
593 ml_get_maxintdelay(void)
594 {
595 uint64_t max_delay = 0;
596
597 if (pmDispatch != NULL
598 && pmDispatch->getMaxIntDelay != NULL)
599 max_delay = pmDispatch->getMaxIntDelay();
600
601 return(max_delay);
602 }
603
604 /*
605 * Set the maximum delay allowed for an interrupt.
606 */
607 void
608 ml_set_maxintdelay(uint64_t mdelay)
609 {
610 if (pmDispatch != NULL
611 && pmDispatch->setMaxIntDelay != NULL) {
612 earlyMaxIntDelay = DELAY_UNSET;
613 pmDispatch->setMaxIntDelay(mdelay);
614 } else
615 earlyMaxIntDelay = mdelay;
616 }
617
618 boolean_t
619 ml_get_interrupt_prewake_applicable()
620 {
621 boolean_t applicable = FALSE;
622
623 if (pmInitDone
624 && pmDispatch != NULL
625 && pmDispatch->pmInterruptPrewakeApplicable != NULL)
626 applicable = pmDispatch->pmInterruptPrewakeApplicable();
627
628 return applicable;
629 }
630
631 /*
632 * Put a CPU into "safe" mode with respect to power.
633 *
634 * Some systems cannot operate at a continuous "normal" speed without
635 * exceeding the thermal design. This is called per-CPU to place the
636 * CPUs into a "safe" operating mode.
637 */
638 void
639 pmSafeMode(x86_lcpu_t *lcpu, uint32_t flags)
640 {
641 if (pmDispatch != NULL
642 && pmDispatch->pmCPUSafeMode != NULL)
643 pmDispatch->pmCPUSafeMode(lcpu, flags);
644 else {
645 /*
646 * Do something reasonable if the KEXT isn't present.
647 *
648 * We only look at the PAUSE and RESUME flags. The other flag(s)
649 * will not make any sense without the KEXT, so just ignore them.
650 *
651 * We set the CPU's state to indicate that it's halted. If this
652 * is the CPU we're currently running on, then spin until the
653 * state becomes non-halted.
654 */
655 if (flags & PM_SAFE_FL_PAUSE) {
656 lcpu->state = LCPU_PAUSE;
657 if (lcpu == x86_lcpu()) {
658 while (lcpu->state == LCPU_PAUSE)
659 cpu_pause();
660 }
661 }
662
663 /*
664 * Clear the halted flag for the specified CPU, that will
665 * get it out of it's spin loop.
666 */
667 if (flags & PM_SAFE_FL_RESUME) {
668 lcpu->state = LCPU_RUN;
669 }
670 }
671 }
672
673 static uint32_t saved_run_count = 0;
674
675 void
676 machine_run_count(uint32_t count)
677 {
678 if (pmDispatch != NULL
679 && pmDispatch->pmSetRunCount != NULL)
680 pmDispatch->pmSetRunCount(count);
681 else
682 saved_run_count = count;
683 }
684
685 boolean_t
686 machine_processor_is_inactive(processor_t processor)
687 {
688 int cpu = processor->cpu_id;
689
690 if (pmDispatch != NULL
691 && pmDispatch->pmIsCPUUnAvailable != NULL)
692 return(pmDispatch->pmIsCPUUnAvailable(cpu_to_lcpu(cpu)));
693 else
694 return(FALSE);
695 }
696
697 processor_t
698 machine_choose_processor(processor_set_t pset,
699 processor_t preferred)
700 {
701 int startCPU;
702 int endCPU;
703 int preferredCPU;
704 int chosenCPU;
705
706 if (!pmInitDone)
707 return(preferred);
708
709 if (pset == NULL) {
710 startCPU = -1;
711 endCPU = -1;
712 } else {
713 startCPU = pset->cpu_set_low;
714 endCPU = pset->cpu_set_hi;
715 }
716
717 if (preferred == NULL)
718 preferredCPU = -1;
719 else
720 preferredCPU = preferred->cpu_id;
721
722 if (pmDispatch != NULL
723 && pmDispatch->pmChooseCPU != NULL) {
724 chosenCPU = pmDispatch->pmChooseCPU(startCPU, endCPU, preferredCPU);
725
726 if (chosenCPU == -1)
727 return(NULL);
728 return(cpu_datap(chosenCPU)->cpu_processor);
729 }
730
731 return(preferred);
732 }
733
734 static int
735 pmThreadGetUrgency(uint64_t *rt_period, uint64_t *rt_deadline)
736 {
737 int urgency;
738 uint64_t arg1, arg2;
739
740 urgency = thread_get_urgency(current_processor()->next_thread, &arg1, &arg2);
741
742 if (urgency == THREAD_URGENCY_REAL_TIME) {
743 if (rt_period != NULL)
744 *rt_period = arg1;
745
746 if (rt_deadline != NULL)
747 *rt_deadline = arg2;
748 }
749
750 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_GET_URGENCY), urgency, arg1, arg2, 0, 0);
751
752 return(urgency);
753 }
754
755 #if DEBUG
756 uint32_t urgency_stats[64][THREAD_URGENCY_MAX];
757 #endif
758
759 #define URGENCY_NOTIFICATION_ASSERT_NS (5 * 1000 * 1000)
760 uint64_t urgency_notification_assert_abstime_threshold, urgency_notification_max_recorded;
761
762 void
763 thread_tell_urgency(int urgency,
764 uint64_t rt_period,
765 uint64_t rt_deadline,
766 thread_t nthread)
767 {
768 uint64_t urgency_notification_time_start, delta;
769 boolean_t urgency_assert = (urgency_notification_assert_abstime_threshold != 0);
770 assert(get_preemption_level() > 0 || ml_get_interrupts_enabled() == FALSE);
771 #if DEBUG
772 urgency_stats[cpu_number() % 64][urgency]++;
773 #endif
774 if (!pmInitDone
775 || pmDispatch == NULL
776 || pmDispatch->pmThreadTellUrgency == NULL)
777 return;
778
779 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, 0, 0);
780
781 if (__improbable((urgency_assert == TRUE)))
782 urgency_notification_time_start = mach_absolute_time();
783
784 current_cpu_datap()->cpu_nthread = nthread;
785 pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline);
786
787 if (__improbable((urgency_assert == TRUE))) {
788 delta = mach_absolute_time() - urgency_notification_time_start;
789
790 if (__improbable(delta > urgency_notification_max_recorded)) {
791 /* This is not synchronized, but it doesn't matter
792 * if we (rarely) miss an event, as it is statistically
793 * unlikely that it will never recur.
794 */
795 urgency_notification_max_recorded = delta;
796
797 if (__improbable((delta > urgency_notification_assert_abstime_threshold) && !machine_timeout_suspended()))
798 panic("Urgency notification callout %p exceeded threshold, 0x%llx abstime units", pmDispatch->pmThreadTellUrgency, delta);
799 }
800 }
801
802 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
803 }
804
805 void
806 active_rt_threads(boolean_t active)
807 {
808 if (!pmInitDone
809 || pmDispatch == NULL
810 || pmDispatch->pmActiveRTThreads == NULL)
811 return;
812
813 pmDispatch->pmActiveRTThreads(active);
814 }
815
816 static uint32_t
817 pmGetSavedRunCount(void)
818 {
819 return(saved_run_count);
820 }
821
822 /*
823 * Returns the root of the package tree.
824 */
825 x86_pkg_t *
826 pmGetPkgRoot(void)
827 {
828 return(x86_pkgs);
829 }
830
831 static boolean_t
832 pmCPUGetHibernate(int cpu)
833 {
834 return(cpu_datap(cpu)->cpu_hibernate);
835 }
836
837 processor_t
838 pmLCPUtoProcessor(int lcpu)
839 {
840 return(cpu_datap(lcpu)->cpu_processor);
841 }
842
843 static void
844 pmReSyncDeadlines(int cpu)
845 {
846 static boolean_t registered = FALSE;
847
848 if (!registered) {
849 PM_interrupt_register(&timer_resync_deadlines);
850 registered = TRUE;
851 }
852
853 if ((uint32_t)cpu == current_cpu_datap()->lcpu.cpu_num)
854 timer_resync_deadlines();
855 else
856 cpu_PM_interrupt(cpu);
857 }
858
859 static void
860 pmSendIPI(int cpu)
861 {
862 lapic_send_ipi(cpu, LAPIC_PM_INTERRUPT);
863 }
864
865 static void
866 pmGetNanotimeInfo(pm_rtc_nanotime_t *rtc_nanotime)
867 {
868 /*
869 * Make sure that nanotime didn't change while we were reading it.
870 */
871 do {
872 rtc_nanotime->generation = pal_rtc_nanotime_info.generation; /* must be first */
873 rtc_nanotime->tsc_base = pal_rtc_nanotime_info.tsc_base;
874 rtc_nanotime->ns_base = pal_rtc_nanotime_info.ns_base;
875 rtc_nanotime->scale = pal_rtc_nanotime_info.scale;
876 rtc_nanotime->shift = pal_rtc_nanotime_info.shift;
877 } while(pal_rtc_nanotime_info.generation != 0
878 && rtc_nanotime->generation != pal_rtc_nanotime_info.generation);
879 }
880
881 uint32_t
882 pmTimerQueueMigrate(int target_cpu)
883 {
884 /* Call the etimer code to do this. */
885 return (target_cpu != cpu_number())
886 ? timer_queue_migrate_cpu(target_cpu)
887 : 0;
888 }
889
890
891 /*
892 * Called by the power management kext to register itself and to get the
893 * callbacks it might need into other kernel functions. This interface
894 * is versioned to allow for slight mis-matches between the kext and the
895 * kernel.
896 */
897 void
898 pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
899 pmCallBacks_t *callbacks)
900 {
901 if (callbacks != NULL && version == PM_DISPATCH_VERSION) {
902 callbacks->setRTCPop = setPop;
903 callbacks->resyncDeadlines = pmReSyncDeadlines;
904 callbacks->initComplete = pmInitComplete;
905 callbacks->GetLCPU = pmGetLogicalCPU;
906 callbacks->GetCore = pmGetCore;
907 callbacks->GetDie = pmGetDie;
908 callbacks->GetPackage = pmGetPackage;
909 callbacks->GetMyLCPU = pmGetMyLogicalCPU;
910 callbacks->GetMyCore = pmGetMyCore;
911 callbacks->GetMyDie = pmGetMyDie;
912 callbacks->GetMyPackage = pmGetMyPackage;
913 callbacks->GetPkgRoot = pmGetPkgRoot;
914 callbacks->LockCPUTopology = pmLockCPUTopology;
915 callbacks->GetHibernate = pmCPUGetHibernate;
916 callbacks->LCPUtoProcessor = pmLCPUtoProcessor;
917 callbacks->ThreadBind = thread_bind;
918 callbacks->GetSavedRunCount = pmGetSavedRunCount;
919 callbacks->GetNanotimeInfo = pmGetNanotimeInfo;
920 callbacks->ThreadGetUrgency = pmThreadGetUrgency;
921 callbacks->RTCClockAdjust = rtc_clock_adjust;
922 callbacks->timerQueueMigrate = pmTimerQueueMigrate;
923 callbacks->topoParms = &topoParms;
924 callbacks->pmSendIPI = pmSendIPI;
925 callbacks->InterruptPending = lapic_is_interrupt_pending;
926 callbacks->IsInterrupting = lapic_is_interrupting;
927 callbacks->InterruptStats = lapic_interrupt_counts;
928 callbacks->DisableApicTimer = lapic_disable_timer;
929 } else {
930 panic("Version mis-match between Kernel and CPU PM");
931 }
932
933 if (cpuFuncs != NULL) {
934 if (pmDispatch) {
935 panic("Attempt to re-register power management interface--AICPM present in xcpm mode? %p->%p", pmDispatch, cpuFuncs);
936 }
937
938 pmDispatch = cpuFuncs;
939
940 if (earlyTopology
941 && pmDispatch->pmCPUStateInit != NULL) {
942 (*pmDispatch->pmCPUStateInit)();
943 earlyTopology = FALSE;
944 }
945
946 if (pmDispatch->pmIPIHandler != NULL) {
947 lapic_set_pm_func((i386_intr_func_t)pmDispatch->pmIPIHandler);
948 }
949 }
950 }
951
952 /*
953 * Unregisters the power management functions from the kext.
954 */
955 void
956 pmUnRegister(pmDispatch_t *cpuFuncs)
957 {
958 if (cpuFuncs != NULL && pmDispatch == cpuFuncs) {
959 pmDispatch = NULL;
960 }
961 }
962
963 void machine_track_platform_idle(boolean_t entry) {
964 cpu_data_t *my_cpu = current_cpu_datap();
965
966 if (entry) {
967 (void)__sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);
968 }
969 else {
970 uint32_t nidle = __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
971 if (nidle == topoParms.nLThreadsPerPackage) {
972 my_cpu->lcpu.package->package_idle_exits++;
973 }
974 }
975 }