]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2004-2011 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | /* | |
30 | * CPU-specific power management support. | |
31 | * | |
32 | * Implements the "wrappers" to the KEXT. | |
33 | */ | |
34 | #include <i386/asm.h> | |
35 | #include <i386/machine_cpu.h> | |
36 | #include <i386/mp.h> | |
37 | #include <i386/machine_routines.h> | |
38 | #include <i386/proc_reg.h> | |
39 | #include <i386/pmap.h> | |
40 | #include <i386/misc_protos.h> | |
41 | #include <kern/machine.h> | |
42 | #include <kern/pms.h> | |
43 | #include <kern/processor.h> | |
44 | #include <kern/timer_queue.h> | |
45 | #include <i386/cpu_threads.h> | |
46 | #include <i386/pmCPU.h> | |
47 | #include <i386/cpuid.h> | |
48 | #include <i386/rtclock_protos.h> | |
49 | #include <kern/sched_prim.h> | |
50 | #include <i386/lapic.h> | |
51 | #include <i386/pal_routines.h> | |
52 | #include <sys/kdebug.h> | |
53 | #include <i386/tsc.h> | |
54 | ||
55 | #include <kern/sched_urgency.h> | |
56 | ||
57 | extern int disableConsoleOutput; | |
58 | ||
59 | #define DELAY_UNSET 0xFFFFFFFFFFFFFFFFULL | |
60 | ||
61 | uint64_t cpu_itime_bins[CPU_ITIME_BINS] = {16 * NSEC_PER_USEC, 32 * NSEC_PER_USEC, 64 * NSEC_PER_USEC, 128 * NSEC_PER_USEC, 256 * NSEC_PER_USEC, 512 * NSEC_PER_USEC, 1024 * NSEC_PER_USEC, 2048 * NSEC_PER_USEC, 4096 * NSEC_PER_USEC, 8192 * NSEC_PER_USEC, 16384 * NSEC_PER_USEC, 32768 * NSEC_PER_USEC}; | |
62 | uint64_t *cpu_rtime_bins = &cpu_itime_bins[0]; | |
63 | ||
64 | /* | |
65 | * The following is set when the KEXT loads and initializes. | |
66 | */ | |
67 | pmDispatch_t *pmDispatch = NULL; | |
68 | ||
69 | uint32_t pmInitDone = 0; | |
70 | static boolean_t earlyTopology = FALSE; | |
71 | static uint64_t earlyMaxBusDelay = DELAY_UNSET; | |
72 | static uint64_t earlyMaxIntDelay = DELAY_UNSET; | |
73 | ||
74 | /* | |
75 | * Initialize the Cstate change code. | |
76 | */ | |
77 | void | |
78 | power_management_init(void) | |
79 | { | |
80 | if (pmDispatch != NULL && pmDispatch->cstateInit != NULL) { | |
81 | (*pmDispatch->cstateInit)(); | |
82 | } | |
83 | } | |
84 | ||
85 | static inline void | |
86 | machine_classify_interval(uint64_t interval, uint64_t *bins, uint64_t *binvals, uint32_t nbins) | |
87 | { | |
88 | uint32_t i; | |
89 | for (i = 0; i < nbins; i++) { | |
90 | if (interval < binvals[i]) { | |
91 | bins[i]++; | |
92 | break; | |
93 | } | |
94 | } | |
95 | } | |
96 | ||
97 | uint64_t idle_pending_timers_processed; | |
98 | uint32_t idle_entry_timer_processing_hdeadline_threshold = 5000000; | |
99 | ||
100 | /* | |
101 | * Called when the CPU is idle. It calls into the power management kext | |
102 | * to determine the best way to idle the CPU. | |
103 | */ | |
104 | void | |
105 | machine_idle(void) | |
106 | { | |
107 | cpu_data_t *my_cpu = current_cpu_datap(); | |
108 | __unused uint32_t cnum = my_cpu->cpu_number; | |
109 | uint64_t ctime, rtime, itime; | |
110 | #if CST_DEMOTION_DEBUG | |
111 | processor_t cproc = my_cpu->cpu_processor; | |
112 | uint64_t cwakeups = my_cpu->cpu_wakeups_issued_total; | |
113 | #endif /* CST_DEMOTION_DEBUG */ | |
114 | uint64_t esdeadline, ehdeadline; | |
115 | boolean_t do_process_pending_timers = FALSE; | |
116 | ||
117 | ctime = mach_absolute_time(); | |
118 | esdeadline = my_cpu->rtclock_timer.queue.earliest_soft_deadline; | |
119 | ehdeadline = my_cpu->rtclock_timer.deadline; | |
120 | /* Determine if pending timers exist */ | |
121 | if ((ctime >= esdeadline) && (ctime < ehdeadline) && | |
122 | ((ehdeadline - ctime) < idle_entry_timer_processing_hdeadline_threshold)) { | |
123 | idle_pending_timers_processed++; | |
124 | do_process_pending_timers = TRUE; | |
125 | goto machine_idle_exit; | |
126 | } else { | |
127 | TCOAL_DEBUG(0xCCCC0000, ctime, my_cpu->rtclock_timer.queue.earliest_soft_deadline, my_cpu->rtclock_timer.deadline, idle_pending_timers_processed, 0); | |
128 | } | |
129 | ||
130 | my_cpu->lcpu.state = LCPU_IDLE; | |
131 | DBGLOG(cpu_handle, cpu_number(), MP_IDLE); | |
132 | MARK_CPU_IDLE(cnum); | |
133 | ||
134 | rtime = ctime - my_cpu->cpu_ixtime; | |
135 | ||
136 | my_cpu->cpu_rtime_total += rtime; | |
137 | machine_classify_interval(rtime, &my_cpu->cpu_rtimes[0], &cpu_rtime_bins[0], CPU_RTIME_BINS); | |
138 | #if CST_DEMOTION_DEBUG | |
139 | uint32_t cl = 0, ch = 0; | |
140 | uint64_t c3res, c6res, c7res; | |
141 | rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch); | |
142 | c3res = ((uint64_t)ch << 32) | cl; | |
143 | rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch); | |
144 | c6res = ((uint64_t)ch << 32) | cl; | |
145 | rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch); | |
146 | c7res = ((uint64_t)ch << 32) | cl; | |
147 | #endif | |
148 | ||
149 | if (pmInitDone) { | |
150 | /* | |
151 | * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay() | |
152 | * were called prior to the CPU PM kext being registered. We do | |
153 | * this here since we know at this point the values will be first | |
154 | * used since idle is where the decisions using these values is made. | |
155 | */ | |
156 | if (earlyMaxBusDelay != DELAY_UNSET) { | |
157 | ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF)); | |
158 | } | |
159 | if (earlyMaxIntDelay != DELAY_UNSET) { | |
160 | ml_set_maxintdelay(earlyMaxIntDelay); | |
161 | } | |
162 | } | |
163 | ||
164 | if (pmInitDone | |
165 | && pmDispatch != NULL | |
166 | && pmDispatch->MachineIdle != NULL) { | |
167 | (*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL); | |
168 | } else { | |
169 | /* | |
170 | * If no power management, re-enable interrupts and halt. | |
171 | * This will keep the CPU from spinning through the scheduler | |
172 | * and will allow at least some minimal power savings (but it | |
173 | * cause problems in some MP configurations w.r.t. the APIC | |
174 | * stopping during a GV3 transition). | |
175 | */ | |
176 | pal_hlt(); | |
177 | /* Once woken, re-disable interrupts. */ | |
178 | pal_cli(); | |
179 | } | |
180 | ||
181 | /* | |
182 | * Mark the CPU as running again. | |
183 | */ | |
184 | MARK_CPU_ACTIVE(cnum); | |
185 | DBGLOG(cpu_handle, cnum, MP_UNIDLE); | |
186 | my_cpu->lcpu.state = LCPU_RUN; | |
187 | uint64_t ixtime = my_cpu->cpu_ixtime = mach_absolute_time(); | |
188 | itime = ixtime - ctime; | |
189 | my_cpu->cpu_idle_exits++; | |
190 | my_cpu->cpu_itime_total += itime; | |
191 | machine_classify_interval(itime, &my_cpu->cpu_itimes[0], &cpu_itime_bins[0], CPU_ITIME_BINS); | |
192 | #if CST_DEMOTION_DEBUG | |
193 | cl = ch = 0; | |
194 | rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch); | |
195 | c3res = (((uint64_t)ch << 32) | cl) - c3res; | |
196 | rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch); | |
197 | c6res = (((uint64_t)ch << 32) | cl) - c6res; | |
198 | rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch); | |
199 | c7res = (((uint64_t)ch << 32) | cl) - c7res; | |
200 | ||
201 | uint64_t ndelta = itime - tmrCvt(c3res + c6res + c7res, tscFCvtt2n); | |
202 | KERNEL_DEBUG_CONSTANT(0xcead0000, ndelta, itime, c7res, c6res, c3res); | |
203 | if ((itime > 1000000) && (ndelta > 250000)) { | |
204 | KERNEL_DEBUG_CONSTANT(0xceae0000, ndelta, itime, c7res, c6res, c3res); | |
205 | } | |
206 | #endif | |
207 | ||
208 | machine_idle_exit: | |
209 | /* | |
210 | * Re-enable interrupts. | |
211 | */ | |
212 | ||
213 | pal_sti(); | |
214 | ||
215 | if (do_process_pending_timers) { | |
216 | TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_START, ctime, esdeadline, ehdeadline, idle_pending_timers_processed, 0); | |
217 | ||
218 | /* Adjust to reflect that this isn't truly a package idle exit */ | |
219 | __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1); | |
220 | lapic_timer_swi(); /* Trigger software timer interrupt */ | |
221 | __sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1); | |
222 | ||
223 | TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_END, ctime, esdeadline, idle_pending_timers_processed, 0, 0); | |
224 | } | |
225 | #if CST_DEMOTION_DEBUG | |
226 | uint64_t nwakeups = my_cpu->cpu_wakeups_issued_total; | |
227 | ||
228 | if ((nwakeups == cwakeups) && (topoParms.nLThreadsPerPackage == my_cpu->lcpu.package->num_idle)) { | |
229 | KERNEL_DEBUG_CONSTANT(0xceaa0000, cwakeups, 0, 0, 0, 0); | |
230 | } | |
231 | #endif | |
232 | } | |
233 | ||
234 | /* | |
235 | * Called when the CPU is to be halted. It will choose the best C-State | |
236 | * to be in. | |
237 | */ | |
238 | void | |
239 | pmCPUHalt(uint32_t reason) | |
240 | { | |
241 | cpu_data_t *cpup = current_cpu_datap(); | |
242 | ||
243 | switch (reason) { | |
244 | case PM_HALT_DEBUG: | |
245 | cpup->lcpu.state = LCPU_PAUSE; | |
246 | pal_stop_cpu(FALSE); | |
247 | break; | |
248 | ||
249 | case PM_HALT_PANIC: | |
250 | cpup->lcpu.state = LCPU_PAUSE; | |
251 | pal_stop_cpu(TRUE); | |
252 | break; | |
253 | ||
254 | case PM_HALT_NORMAL: | |
255 | case PM_HALT_SLEEP: | |
256 | default: | |
257 | pal_cli(); | |
258 | ||
259 | if (pmInitDone | |
260 | && pmDispatch != NULL | |
261 | && pmDispatch->pmCPUHalt != NULL) { | |
262 | /* | |
263 | * Halt the CPU (and put it in a low power state. | |
264 | */ | |
265 | (*pmDispatch->pmCPUHalt)(); | |
266 | ||
267 | /* | |
268 | * We've exited halt, so get the CPU schedulable again. | |
269 | * - by calling the fast init routine for a slave, or | |
270 | * - by returning if we're the master processor. | |
271 | */ | |
272 | if (cpup->cpu_number != master_cpu) { | |
273 | i386_init_slave_fast(); | |
274 | panic("init_slave_fast returned"); | |
275 | } | |
276 | } else { | |
277 | /* | |
278 | * If no power managment and a processor is taken off-line, | |
279 | * then invalidate the cache and halt it (it will not be able | |
280 | * to be brought back on-line without resetting the CPU). | |
281 | */ | |
282 | __asm__ volatile ("wbinvd"); | |
283 | cpup->lcpu.state = LCPU_HALT; | |
284 | pal_stop_cpu(FALSE); | |
285 | ||
286 | panic("back from Halt"); | |
287 | } | |
288 | ||
289 | break; | |
290 | } | |
291 | } | |
292 | ||
293 | void | |
294 | pmMarkAllCPUsOff(void) | |
295 | { | |
296 | if (pmInitDone | |
297 | && pmDispatch != NULL | |
298 | && pmDispatch->markAllCPUsOff != NULL) { | |
299 | (*pmDispatch->markAllCPUsOff)(); | |
300 | } | |
301 | } | |
302 | ||
303 | static void | |
304 | pmInitComplete(void) | |
305 | { | |
306 | if (earlyTopology | |
307 | && pmDispatch != NULL | |
308 | && pmDispatch->pmCPUStateInit != NULL) { | |
309 | (*pmDispatch->pmCPUStateInit)(); | |
310 | earlyTopology = FALSE; | |
311 | } | |
312 | pmInitDone = 1; | |
313 | } | |
314 | ||
315 | x86_lcpu_t * | |
316 | pmGetLogicalCPU(int cpu) | |
317 | { | |
318 | return cpu_to_lcpu(cpu); | |
319 | } | |
320 | ||
321 | x86_lcpu_t * | |
322 | pmGetMyLogicalCPU(void) | |
323 | { | |
324 | cpu_data_t *cpup = current_cpu_datap(); | |
325 | ||
326 | return &cpup->lcpu; | |
327 | } | |
328 | ||
329 | static x86_core_t * | |
330 | pmGetCore(int cpu) | |
331 | { | |
332 | return cpu_to_core(cpu); | |
333 | } | |
334 | ||
335 | static x86_core_t * | |
336 | pmGetMyCore(void) | |
337 | { | |
338 | cpu_data_t *cpup = current_cpu_datap(); | |
339 | ||
340 | return cpup->lcpu.core; | |
341 | } | |
342 | ||
343 | static x86_die_t * | |
344 | pmGetDie(int cpu) | |
345 | { | |
346 | return cpu_to_die(cpu); | |
347 | } | |
348 | ||
349 | static x86_die_t * | |
350 | pmGetMyDie(void) | |
351 | { | |
352 | cpu_data_t *cpup = current_cpu_datap(); | |
353 | ||
354 | return cpup->lcpu.die; | |
355 | } | |
356 | ||
357 | static x86_pkg_t * | |
358 | pmGetPackage(int cpu) | |
359 | { | |
360 | return cpu_to_package(cpu); | |
361 | } | |
362 | ||
363 | static x86_pkg_t * | |
364 | pmGetMyPackage(void) | |
365 | { | |
366 | cpu_data_t *cpup = current_cpu_datap(); | |
367 | ||
368 | return cpup->lcpu.package; | |
369 | } | |
370 | ||
371 | static void | |
372 | pmLockCPUTopology(int lock) | |
373 | { | |
374 | if (lock) { | |
375 | mp_safe_spin_lock(&x86_topo_lock); | |
376 | } else { | |
377 | simple_unlock(&x86_topo_lock); | |
378 | } | |
379 | } | |
380 | ||
381 | /* | |
382 | * Called to get the next deadline that has been set by the | |
383 | * power management code. | |
384 | * Note: a return of 0 from AICPM and this routine signifies | |
385 | * that no deadline is set. | |
386 | */ | |
387 | uint64_t | |
388 | pmCPUGetDeadline(cpu_data_t *cpu) | |
389 | { | |
390 | uint64_t deadline = 0; | |
391 | ||
392 | if (pmInitDone | |
393 | && pmDispatch != NULL | |
394 | && pmDispatch->GetDeadline != NULL) { | |
395 | deadline = (*pmDispatch->GetDeadline)(&cpu->lcpu); | |
396 | } | |
397 | ||
398 | return deadline; | |
399 | } | |
400 | ||
401 | /* | |
402 | * Called to determine if the supplied deadline or the power management | |
403 | * deadline is sooner. Returns which ever one is first. | |
404 | */ | |
405 | ||
406 | uint64_t | |
407 | pmCPUSetDeadline(cpu_data_t *cpu, uint64_t deadline) | |
408 | { | |
409 | if (pmInitDone | |
410 | && pmDispatch != NULL | |
411 | && pmDispatch->SetDeadline != NULL) { | |
412 | deadline = (*pmDispatch->SetDeadline)(&cpu->lcpu, deadline); | |
413 | } | |
414 | ||
415 | return deadline; | |
416 | } | |
417 | ||
418 | /* | |
419 | * Called when a power management deadline expires. | |
420 | */ | |
421 | void | |
422 | pmCPUDeadline(cpu_data_t *cpu) | |
423 | { | |
424 | if (pmInitDone | |
425 | && pmDispatch != NULL | |
426 | && pmDispatch->Deadline != NULL) { | |
427 | (*pmDispatch->Deadline)(&cpu->lcpu); | |
428 | } | |
429 | } | |
430 | ||
431 | /* | |
432 | * Called to get a CPU out of idle. | |
433 | */ | |
434 | boolean_t | |
435 | pmCPUExitIdle(cpu_data_t *cpu) | |
436 | { | |
437 | boolean_t do_ipi; | |
438 | ||
439 | if (pmInitDone | |
440 | && pmDispatch != NULL | |
441 | && pmDispatch->exitIdle != NULL) { | |
442 | do_ipi = (*pmDispatch->exitIdle)(&cpu->lcpu); | |
443 | } else { | |
444 | do_ipi = TRUE; | |
445 | } | |
446 | ||
447 | return do_ipi; | |
448 | } | |
449 | ||
450 | kern_return_t | |
451 | pmCPUExitHalt(int cpu) | |
452 | { | |
453 | kern_return_t rc = KERN_INVALID_ARGUMENT; | |
454 | ||
455 | if (pmInitDone | |
456 | && pmDispatch != NULL | |
457 | && pmDispatch->exitHalt != NULL) { | |
458 | rc = pmDispatch->exitHalt(cpu_to_lcpu(cpu)); | |
459 | } | |
460 | ||
461 | return rc; | |
462 | } | |
463 | ||
464 | kern_return_t | |
465 | pmCPUExitHaltToOff(int cpu) | |
466 | { | |
467 | kern_return_t rc = KERN_SUCCESS; | |
468 | ||
469 | if (pmInitDone | |
470 | && pmDispatch != NULL | |
471 | && pmDispatch->exitHaltToOff != NULL) { | |
472 | rc = pmDispatch->exitHaltToOff(cpu_to_lcpu(cpu)); | |
473 | } | |
474 | ||
475 | return rc; | |
476 | } | |
477 | ||
478 | /* | |
479 | * Called to initialize the power management structures for the CPUs. | |
480 | */ | |
481 | void | |
482 | pmCPUStateInit(void) | |
483 | { | |
484 | if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL) { | |
485 | (*pmDispatch->pmCPUStateInit)(); | |
486 | } else { | |
487 | earlyTopology = TRUE; | |
488 | } | |
489 | } | |
490 | ||
491 | /* | |
492 | * Called when a CPU is being restarted after being powered off (as in S3). | |
493 | */ | |
494 | void | |
495 | pmCPUMarkRunning(cpu_data_t *cpu) | |
496 | { | |
497 | cpu_data_t *cpup = current_cpu_datap(); | |
498 | ||
499 | if (pmInitDone | |
500 | && pmDispatch != NULL | |
501 | && pmDispatch->markCPURunning != NULL) { | |
502 | (*pmDispatch->markCPURunning)(&cpu->lcpu); | |
503 | } else { | |
504 | cpup->lcpu.state = LCPU_RUN; | |
505 | } | |
506 | } | |
507 | ||
508 | /* | |
509 | * Called to get/set CPU power management state. | |
510 | */ | |
511 | int | |
512 | pmCPUControl(uint32_t cmd, void *datap) | |
513 | { | |
514 | int rc = -1; | |
515 | ||
516 | if (pmDispatch != NULL | |
517 | && pmDispatch->pmCPUControl != NULL) { | |
518 | rc = (*pmDispatch->pmCPUControl)(cmd, datap); | |
519 | } | |
520 | ||
521 | return rc; | |
522 | } | |
523 | ||
524 | /* | |
525 | * Called to save the timer state used by power management prior | |
526 | * to "sleeping". | |
527 | */ | |
528 | void | |
529 | pmTimerSave(void) | |
530 | { | |
531 | if (pmDispatch != NULL | |
532 | && pmDispatch->pmTimerStateSave != NULL) { | |
533 | (*pmDispatch->pmTimerStateSave)(); | |
534 | } | |
535 | } | |
536 | ||
537 | /* | |
538 | * Called to restore the timer state used by power management after | |
539 | * waking from "sleep". | |
540 | */ | |
541 | void | |
542 | pmTimerRestore(void) | |
543 | { | |
544 | if (pmDispatch != NULL | |
545 | && pmDispatch->pmTimerStateRestore != NULL) { | |
546 | (*pmDispatch->pmTimerStateRestore)(); | |
547 | } | |
548 | } | |
549 | ||
550 | /* | |
551 | * Set the worst-case time for the C4 to C2 transition. | |
552 | * No longer does anything. | |
553 | */ | |
554 | void | |
555 | ml_set_maxsnoop(__unused uint32_t maxdelay) | |
556 | { | |
557 | } | |
558 | ||
559 | ||
560 | /* | |
561 | * Get the worst-case time for the C4 to C2 transition. Returns nanoseconds. | |
562 | */ | |
563 | unsigned | |
564 | ml_get_maxsnoop(void) | |
565 | { | |
566 | uint64_t max_snoop = 0; | |
567 | ||
568 | if (pmInitDone | |
569 | && pmDispatch != NULL | |
570 | && pmDispatch->getMaxSnoop != NULL) { | |
571 | max_snoop = pmDispatch->getMaxSnoop(); | |
572 | } | |
573 | ||
574 | return (unsigned)(max_snoop & 0xffffffff); | |
575 | } | |
576 | ||
577 | ||
578 | uint32_t | |
579 | ml_get_maxbusdelay(void) | |
580 | { | |
581 | uint64_t max_delay = 0; | |
582 | ||
583 | if (pmInitDone | |
584 | && pmDispatch != NULL | |
585 | && pmDispatch->getMaxBusDelay != NULL) { | |
586 | max_delay = pmDispatch->getMaxBusDelay(); | |
587 | } | |
588 | ||
589 | return (uint32_t)(max_delay & 0xffffffff); | |
590 | } | |
591 | ||
592 | /* | |
593 | * Advertise a memory access latency tolerance of "mdelay" ns | |
594 | */ | |
595 | void | |
596 | ml_set_maxbusdelay(uint32_t mdelay) | |
597 | { | |
598 | uint64_t maxdelay = mdelay; | |
599 | ||
600 | if (pmDispatch != NULL | |
601 | && pmDispatch->setMaxBusDelay != NULL) { | |
602 | earlyMaxBusDelay = DELAY_UNSET; | |
603 | pmDispatch->setMaxBusDelay(maxdelay); | |
604 | } else { | |
605 | earlyMaxBusDelay = maxdelay; | |
606 | } | |
607 | } | |
608 | ||
609 | uint64_t | |
610 | ml_get_maxintdelay(void) | |
611 | { | |
612 | uint64_t max_delay = 0; | |
613 | ||
614 | if (pmDispatch != NULL | |
615 | && pmDispatch->getMaxIntDelay != NULL) { | |
616 | max_delay = pmDispatch->getMaxIntDelay(); | |
617 | } | |
618 | ||
619 | return max_delay; | |
620 | } | |
621 | ||
622 | /* | |
623 | * Set the maximum delay allowed for an interrupt. | |
624 | */ | |
625 | void | |
626 | ml_set_maxintdelay(uint64_t mdelay) | |
627 | { | |
628 | if (pmDispatch != NULL | |
629 | && pmDispatch->setMaxIntDelay != NULL) { | |
630 | earlyMaxIntDelay = DELAY_UNSET; | |
631 | pmDispatch->setMaxIntDelay(mdelay); | |
632 | } else { | |
633 | earlyMaxIntDelay = mdelay; | |
634 | } | |
635 | } | |
636 | ||
637 | boolean_t | |
638 | ml_get_interrupt_prewake_applicable() | |
639 | { | |
640 | boolean_t applicable = FALSE; | |
641 | ||
642 | if (pmInitDone | |
643 | && pmDispatch != NULL | |
644 | && pmDispatch->pmInterruptPrewakeApplicable != NULL) { | |
645 | applicable = pmDispatch->pmInterruptPrewakeApplicable(); | |
646 | } | |
647 | ||
648 | return applicable; | |
649 | } | |
650 | ||
651 | /* | |
652 | * Put a CPU into "safe" mode with respect to power. | |
653 | * | |
654 | * Some systems cannot operate at a continuous "normal" speed without | |
655 | * exceeding the thermal design. This is called per-CPU to place the | |
656 | * CPUs into a "safe" operating mode. | |
657 | */ | |
658 | void | |
659 | pmSafeMode(x86_lcpu_t *lcpu, uint32_t flags) | |
660 | { | |
661 | if (pmDispatch != NULL | |
662 | && pmDispatch->pmCPUSafeMode != NULL) { | |
663 | pmDispatch->pmCPUSafeMode(lcpu, flags); | |
664 | } else { | |
665 | /* | |
666 | * Do something reasonable if the KEXT isn't present. | |
667 | * | |
668 | * We only look at the PAUSE and RESUME flags. The other flag(s) | |
669 | * will not make any sense without the KEXT, so just ignore them. | |
670 | * | |
671 | * We set the CPU's state to indicate that it's halted. If this | |
672 | * is the CPU we're currently running on, then spin until the | |
673 | * state becomes non-halted. | |
674 | */ | |
675 | if (flags & PM_SAFE_FL_PAUSE) { | |
676 | lcpu->state = LCPU_PAUSE; | |
677 | if (lcpu == x86_lcpu()) { | |
678 | while (lcpu->state == LCPU_PAUSE) { | |
679 | cpu_pause(); | |
680 | } | |
681 | } | |
682 | } | |
683 | ||
684 | /* | |
685 | * Clear the halted flag for the specified CPU, that will | |
686 | * get it out of it's spin loop. | |
687 | */ | |
688 | if (flags & PM_SAFE_FL_RESUME) { | |
689 | lcpu->state = LCPU_RUN; | |
690 | } | |
691 | } | |
692 | } | |
693 | ||
694 | static uint32_t saved_run_count = 0; | |
695 | ||
696 | void | |
697 | machine_run_count(uint32_t count) | |
698 | { | |
699 | if (pmDispatch != NULL | |
700 | && pmDispatch->pmSetRunCount != NULL) { | |
701 | pmDispatch->pmSetRunCount(count); | |
702 | } else { | |
703 | saved_run_count = count; | |
704 | } | |
705 | } | |
706 | ||
707 | processor_t | |
708 | machine_choose_processor(processor_set_t pset, | |
709 | processor_t preferred) | |
710 | { | |
711 | int startCPU; | |
712 | int endCPU; | |
713 | int preferredCPU; | |
714 | int chosenCPU; | |
715 | ||
716 | if (!pmInitDone) { | |
717 | return preferred; | |
718 | } | |
719 | ||
720 | if (pset == NULL) { | |
721 | startCPU = -1; | |
722 | endCPU = -1; | |
723 | } else { | |
724 | startCPU = pset->cpu_set_low; | |
725 | endCPU = pset->cpu_set_hi; | |
726 | } | |
727 | ||
728 | if (preferred == NULL) { | |
729 | preferredCPU = -1; | |
730 | } else { | |
731 | preferredCPU = preferred->cpu_id; | |
732 | } | |
733 | ||
734 | if (pmDispatch != NULL | |
735 | && pmDispatch->pmChooseCPU != NULL) { | |
736 | chosenCPU = pmDispatch->pmChooseCPU(startCPU, endCPU, preferredCPU); | |
737 | ||
738 | if (chosenCPU == -1) { | |
739 | return NULL; | |
740 | } | |
741 | return cpu_datap(chosenCPU)->cpu_processor; | |
742 | } | |
743 | ||
744 | return preferred; | |
745 | } | |
746 | ||
747 | static int | |
748 | pmThreadGetUrgency(uint64_t *rt_period, uint64_t *rt_deadline) | |
749 | { | |
750 | thread_urgency_t urgency; | |
751 | uint64_t arg1, arg2; | |
752 | ||
753 | urgency = thread_get_urgency(THREAD_NULL, &arg1, &arg2); | |
754 | ||
755 | if (urgency == THREAD_URGENCY_REAL_TIME) { | |
756 | if (rt_period != NULL) { | |
757 | *rt_period = arg1; | |
758 | } | |
759 | ||
760 | if (rt_deadline != NULL) { | |
761 | *rt_deadline = arg2; | |
762 | } | |
763 | } | |
764 | ||
765 | return (int)urgency; | |
766 | } | |
767 | ||
768 | #if DEBUG | |
769 | uint32_t urgency_stats[64][THREAD_URGENCY_MAX]; | |
770 | #endif | |
771 | ||
772 | #define URGENCY_NOTIFICATION_ASSERT_NS (5 * 1000 * 1000) | |
773 | uint64_t urgency_notification_assert_abstime_threshold, urgency_notification_max_recorded; | |
774 | ||
775 | void | |
776 | thread_tell_urgency(thread_urgency_t urgency, | |
777 | uint64_t rt_period, | |
778 | uint64_t rt_deadline, | |
779 | uint64_t sched_latency, | |
780 | thread_t nthread) | |
781 | { | |
782 | uint64_t urgency_notification_time_start = 0, delta; | |
783 | boolean_t urgency_assert = (urgency_notification_assert_abstime_threshold != 0); | |
784 | assert(get_preemption_level() > 0 || ml_get_interrupts_enabled() == FALSE); | |
785 | #if DEBUG | |
786 | urgency_stats[cpu_number() % 64][urgency]++; | |
787 | #endif | |
788 | if (!pmInitDone | |
789 | || pmDispatch == NULL | |
790 | || pmDispatch->pmThreadTellUrgency == NULL) { | |
791 | return; | |
792 | } | |
793 | ||
794 | SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, sched_latency, 0); | |
795 | ||
796 | if (__improbable((urgency_assert == TRUE))) { | |
797 | urgency_notification_time_start = mach_absolute_time(); | |
798 | } | |
799 | ||
800 | current_cpu_datap()->cpu_nthread = nthread; | |
801 | pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline); | |
802 | ||
803 | if (__improbable((urgency_assert == TRUE))) { | |
804 | delta = mach_absolute_time() - urgency_notification_time_start; | |
805 | ||
806 | if (__improbable(delta > urgency_notification_max_recorded)) { | |
807 | /* This is not synchronized, but it doesn't matter | |
808 | * if we (rarely) miss an event, as it is statistically | |
809 | * unlikely that it will never recur. | |
810 | */ | |
811 | urgency_notification_max_recorded = delta; | |
812 | ||
813 | if (__improbable((delta > urgency_notification_assert_abstime_threshold) && !machine_timeout_suspended())) { | |
814 | panic("Urgency notification callout %p exceeded threshold, 0x%llx abstime units", pmDispatch->pmThreadTellUrgency, delta); | |
815 | } | |
816 | } | |
817 | } | |
818 | ||
819 | SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0); | |
820 | } | |
821 | ||
822 | void | |
823 | machine_thread_going_on_core(__unused thread_t new_thread, | |
824 | __unused thread_urgency_t urgency, | |
825 | __unused uint64_t sched_latency, | |
826 | __unused uint64_t same_pri_latency, | |
827 | __unused uint64_t dispatch_time) | |
828 | { | |
829 | } | |
830 | ||
831 | void | |
832 | machine_thread_going_off_core(thread_t old_thread, boolean_t thread_terminating, | |
833 | uint64_t last_dispatch, boolean_t thread_runnable) | |
834 | { | |
835 | if (!pmInitDone | |
836 | || pmDispatch == NULL | |
837 | || pmDispatch->pmThreadGoingOffCore == NULL) { | |
838 | return; | |
839 | } | |
840 | ||
841 | pmDispatch->pmThreadGoingOffCore(old_thread, thread_terminating, | |
842 | last_dispatch, thread_runnable); | |
843 | } | |
844 | ||
845 | void | |
846 | machine_max_runnable_latency(__unused uint64_t bg_max_latency, | |
847 | __unused uint64_t default_max_latency, | |
848 | __unused uint64_t realtime_max_latency) | |
849 | { | |
850 | } | |
851 | ||
852 | void | |
853 | machine_work_interval_notify(__unused thread_t thread, | |
854 | __unused struct kern_work_interval_args* kwi_args) | |
855 | { | |
856 | } | |
857 | ||
858 | ||
859 | void | |
860 | machine_switch_perfcontrol_context(__unused perfcontrol_event event, | |
861 | __unused uint64_t timestamp, | |
862 | __unused uint32_t flags, | |
863 | __unused uint64_t new_thread_same_pri_latency, | |
864 | __unused thread_t old, | |
865 | __unused thread_t new) | |
866 | { | |
867 | } | |
868 | ||
869 | void | |
870 | machine_switch_perfcontrol_state_update(__unused perfcontrol_event event, | |
871 | __unused uint64_t timestamp, | |
872 | __unused uint32_t flags, | |
873 | __unused thread_t thread) | |
874 | { | |
875 | } | |
876 | ||
877 | void | |
878 | active_rt_threads(boolean_t active) | |
879 | { | |
880 | if (!pmInitDone | |
881 | || pmDispatch == NULL | |
882 | || pmDispatch->pmActiveRTThreads == NULL) { | |
883 | return; | |
884 | } | |
885 | ||
886 | pmDispatch->pmActiveRTThreads(active); | |
887 | } | |
888 | ||
889 | static uint32_t | |
890 | pmGetSavedRunCount(void) | |
891 | { | |
892 | return saved_run_count; | |
893 | } | |
894 | ||
895 | /* | |
896 | * Returns the root of the package tree. | |
897 | */ | |
898 | x86_pkg_t * | |
899 | pmGetPkgRoot(void) | |
900 | { | |
901 | return x86_pkgs; | |
902 | } | |
903 | ||
904 | static boolean_t | |
905 | pmCPUGetHibernate(int cpu) | |
906 | { | |
907 | return cpu_datap(cpu)->cpu_hibernate; | |
908 | } | |
909 | ||
910 | processor_t | |
911 | pmLCPUtoProcessor(int lcpu) | |
912 | { | |
913 | return cpu_datap(lcpu)->cpu_processor; | |
914 | } | |
915 | ||
916 | static void | |
917 | pmReSyncDeadlines(int cpu) | |
918 | { | |
919 | static boolean_t registered = FALSE; | |
920 | ||
921 | if (!registered) { | |
922 | PM_interrupt_register(&timer_resync_deadlines); | |
923 | registered = TRUE; | |
924 | } | |
925 | ||
926 | if ((uint32_t)cpu == current_cpu_datap()->lcpu.cpu_num) { | |
927 | timer_resync_deadlines(); | |
928 | } else { | |
929 | cpu_PM_interrupt(cpu); | |
930 | } | |
931 | } | |
932 | ||
933 | static void | |
934 | pmSendIPI(int cpu) | |
935 | { | |
936 | lapic_send_ipi(cpu, LAPIC_PM_INTERRUPT); | |
937 | } | |
938 | ||
939 | static void | |
940 | pmGetNanotimeInfo(pm_rtc_nanotime_t *rtc_nanotime) | |
941 | { | |
942 | /* | |
943 | * Make sure that nanotime didn't change while we were reading it. | |
944 | */ | |
945 | do { | |
946 | rtc_nanotime->generation = pal_rtc_nanotime_info.generation; /* must be first */ | |
947 | rtc_nanotime->tsc_base = pal_rtc_nanotime_info.tsc_base; | |
948 | rtc_nanotime->ns_base = pal_rtc_nanotime_info.ns_base; | |
949 | rtc_nanotime->scale = pal_rtc_nanotime_info.scale; | |
950 | rtc_nanotime->shift = pal_rtc_nanotime_info.shift; | |
951 | } while (pal_rtc_nanotime_info.generation != 0 | |
952 | && rtc_nanotime->generation != pal_rtc_nanotime_info.generation); | |
953 | } | |
954 | ||
955 | uint32_t | |
956 | pmTimerQueueMigrate(int target_cpu) | |
957 | { | |
958 | /* Call the etimer code to do this. */ | |
959 | return (target_cpu != cpu_number()) | |
960 | ? timer_queue_migrate_cpu(target_cpu) | |
961 | : 0; | |
962 | } | |
963 | ||
964 | ||
965 | /* | |
966 | * Called by the power management kext to register itself and to get the | |
967 | * callbacks it might need into other kernel functions. This interface | |
968 | * is versioned to allow for slight mis-matches between the kext and the | |
969 | * kernel. | |
970 | */ | |
971 | void | |
972 | pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs, | |
973 | pmCallBacks_t *callbacks) | |
974 | { | |
975 | if (callbacks != NULL && version == PM_DISPATCH_VERSION) { | |
976 | callbacks->setRTCPop = setPop; | |
977 | callbacks->resyncDeadlines = pmReSyncDeadlines; | |
978 | callbacks->initComplete = pmInitComplete; | |
979 | callbacks->GetLCPU = pmGetLogicalCPU; | |
980 | callbacks->GetCore = pmGetCore; | |
981 | callbacks->GetDie = pmGetDie; | |
982 | callbacks->GetPackage = pmGetPackage; | |
983 | callbacks->GetMyLCPU = pmGetMyLogicalCPU; | |
984 | callbacks->GetMyCore = pmGetMyCore; | |
985 | callbacks->GetMyDie = pmGetMyDie; | |
986 | callbacks->GetMyPackage = pmGetMyPackage; | |
987 | callbacks->GetPkgRoot = pmGetPkgRoot; | |
988 | callbacks->LockCPUTopology = pmLockCPUTopology; | |
989 | callbacks->GetHibernate = pmCPUGetHibernate; | |
990 | callbacks->LCPUtoProcessor = pmLCPUtoProcessor; | |
991 | callbacks->ThreadBind = thread_bind; | |
992 | callbacks->GetSavedRunCount = pmGetSavedRunCount; | |
993 | callbacks->GetNanotimeInfo = pmGetNanotimeInfo; | |
994 | callbacks->ThreadGetUrgency = pmThreadGetUrgency; | |
995 | callbacks->RTCClockAdjust = rtc_clock_adjust; | |
996 | callbacks->timerQueueMigrate = pmTimerQueueMigrate; | |
997 | callbacks->topoParms = &topoParms; | |
998 | callbacks->pmSendIPI = pmSendIPI; | |
999 | callbacks->InterruptPending = lapic_is_interrupt_pending; | |
1000 | callbacks->IsInterrupting = lapic_is_interrupting; | |
1001 | callbacks->InterruptStats = lapic_interrupt_counts; | |
1002 | callbacks->DisableApicTimer = lapic_disable_timer; | |
1003 | } else { | |
1004 | panic("Version mis-match between Kernel and CPU PM"); | |
1005 | } | |
1006 | ||
1007 | if (cpuFuncs != NULL) { | |
1008 | if (pmDispatch) { | |
1009 | panic("Attempt to re-register power management interface--AICPM present in xcpm mode? %p->%p", pmDispatch, cpuFuncs); | |
1010 | } | |
1011 | ||
1012 | pmDispatch = cpuFuncs; | |
1013 | ||
1014 | if (earlyTopology | |
1015 | && pmDispatch->pmCPUStateInit != NULL) { | |
1016 | (*pmDispatch->pmCPUStateInit)(); | |
1017 | earlyTopology = FALSE; | |
1018 | } | |
1019 | ||
1020 | if (pmDispatch->pmIPIHandler != NULL) { | |
1021 | lapic_set_pm_func((i386_intr_func_t)pmDispatch->pmIPIHandler); | |
1022 | } | |
1023 | } | |
1024 | } | |
1025 | ||
1026 | /* | |
1027 | * Unregisters the power management functions from the kext. | |
1028 | */ | |
1029 | void | |
1030 | pmUnRegister(pmDispatch_t *cpuFuncs) | |
1031 | { | |
1032 | if (cpuFuncs != NULL && pmDispatch == cpuFuncs) { | |
1033 | pmDispatch = NULL; | |
1034 | } | |
1035 | } | |
1036 | ||
1037 | void | |
1038 | machine_track_platform_idle(boolean_t entry) | |
1039 | { | |
1040 | cpu_data_t *my_cpu = current_cpu_datap(); | |
1041 | ||
1042 | if (entry) { | |
1043 | (void)__sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1); | |
1044 | } else { | |
1045 | uint32_t nidle = __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1); | |
1046 | if (nidle == topoParms.nLThreadsPerPackage) { | |
1047 | my_cpu->lcpu.package->package_idle_exits++; | |
1048 | } | |
1049 | } | |
1050 | } |