git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2004-2011 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28
	29	/*
	30	* CPU-specific power management support.
	31	*
	32	* Implements the "wrappers" to the KEXT.
	33	*/
	34	#include <i386/asm.h>
	35	#include <i386/machine_cpu.h>
	36	#include <i386/mp.h>
	37	#include <i386/machine_routines.h>
	38	#include <i386/proc_reg.h>
	39	#include <i386/pmap.h>
	40	#include <i386/misc_protos.h>
	41	#include <kern/machine.h>
	42	#include <kern/pms.h>
	43	#include <kern/processor.h>
	44	#include <kern/timer_queue.h>
	45	#include <i386/cpu_threads.h>
	46	#include <i386/pmCPU.h>
	47	#include <i386/cpuid.h>
	48	#include <i386/rtclock_protos.h>
	49	#include <kern/sched_prim.h>
	50	#include <i386/lapic.h>
	51	#include <i386/pal_routines.h>
	52	#include <sys/kdebug.h>
	53	#include <i386/tsc.h>
	54
	55	#include <kern/sched_urgency.h>
	56
	57	extern int disableConsoleOutput;
	58
	59	#define DELAY_UNSET 0xFFFFFFFFFFFFFFFFULL
	60
	61	uint64_t cpu_itime_bins[CPU_ITIME_BINS] = {16 * NSEC_PER_USEC, 32 * NSEC_PER_USEC, 64 * NSEC_PER_USEC, 128 * NSEC_PER_USEC, 256 * NSEC_PER_USEC, 512 * NSEC_PER_USEC, 1024 * NSEC_PER_USEC, 2048 * NSEC_PER_USEC, 4096 * NSEC_PER_USEC, 8192 * NSEC_PER_USEC, 16384 * NSEC_PER_USEC, 32768 * NSEC_PER_USEC};
	62	uint64_t *cpu_rtime_bins = &cpu_itime_bins[0];
	63
	64	/*
	65	* The following is set when the KEXT loads and initializes.
	66	*/
	67	pmDispatch_t *pmDispatch = NULL;
	68
	69	uint32_t pmInitDone = 0;
	70	static boolean_t earlyTopology = FALSE;
	71	static uint64_t earlyMaxBusDelay = DELAY_UNSET;
	72	static uint64_t earlyMaxIntDelay = DELAY_UNSET;
	73
	74	/*
	75	* Initialize the Cstate change code.
	76	*/
	77	void
	78	power_management_init(void)
	79	{
	80	if (pmDispatch != NULL && pmDispatch->cstateInit != NULL) {
	81	(*pmDispatch->cstateInit)();
	82	}
	83	}
	84
	85	static inline void
	86	machine_classify_interval(uint64_t interval, uint64_t bins, uint64_t binvals, uint32_t nbins)
	87	{
	88	uint32_t i;
	89	for (i = 0; i < nbins; i++) {
	90	if (interval < binvals[i]) {
	91	bins[i]++;
	92	break;
	93	}
	94	}
	95	}
	96
	97	uint64_t idle_pending_timers_processed;
	98	uint32_t idle_entry_timer_processing_hdeadline_threshold = 5000000;
	99
	100	/*
	101	* Called when the CPU is idle. It calls into the power management kext
	102	* to determine the best way to idle the CPU.
	103	*/
	104	void
	105	machine_idle(void)
	106	{
	107	cpu_data_t *my_cpu = current_cpu_datap();
	108	__unused uint32_t cnum = my_cpu->cpu_number;
	109	uint64_t ctime, rtime, itime;
	110	#if CST_DEMOTION_DEBUG
	111	processor_t cproc = my_cpu->cpu_processor;
	112	uint64_t cwakeups = my_cpu->cpu_wakeups_issued_total;
	113	#endif /* CST_DEMOTION_DEBUG */
	114	uint64_t esdeadline, ehdeadline;
	115	boolean_t do_process_pending_timers = FALSE;
	116
	117	ctime = mach_absolute_time();
	118	esdeadline = my_cpu->rtclock_timer.queue.earliest_soft_deadline;
	119	ehdeadline = my_cpu->rtclock_timer.deadline;
	120	/* Determine if pending timers exist */
	121	if ((ctime >= esdeadline) && (ctime < ehdeadline) &&
	122	((ehdeadline - ctime) < idle_entry_timer_processing_hdeadline_threshold)) {
	123	idle_pending_timers_processed++;
	124	do_process_pending_timers = TRUE;
	125	goto machine_idle_exit;
	126	} else {
	127	TCOAL_DEBUG(0xCCCC0000, ctime, my_cpu->rtclock_timer.queue.earliest_soft_deadline, my_cpu->rtclock_timer.deadline, idle_pending_timers_processed, 0);
	128	}
	129
	130	my_cpu->lcpu.state = LCPU_IDLE;
	131	DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
	132	MARK_CPU_IDLE(cnum);
	133
	134	rtime = ctime - my_cpu->cpu_ixtime;
	135
	136	my_cpu->cpu_rtime_total += rtime;
	137	machine_classify_interval(rtime, &my_cpu->cpu_rtimes[0], &cpu_rtime_bins[0], CPU_RTIME_BINS);
	138	#if CST_DEMOTION_DEBUG
	139	uint32_t cl = 0, ch = 0;
	140	uint64_t c3res, c6res, c7res;
	141	rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
	142	c3res = ((uint64_t)ch << 32) \| cl;
	143	rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
	144	c6res = ((uint64_t)ch << 32) \| cl;
	145	rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
	146	c7res = ((uint64_t)ch << 32) \| cl;
	147	#endif
	148
	149	if (pmInitDone) {
	150	/*
	151	* Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay()
	152	* were called prior to the CPU PM kext being registered. We do
	153	* this here since we know at this point the values will be first
	154	* used since idle is where the decisions using these values is made.
	155	*/
	156	if (earlyMaxBusDelay != DELAY_UNSET) {
	157	ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF));
	158	}
	159	if (earlyMaxIntDelay != DELAY_UNSET) {
	160	ml_set_maxintdelay(earlyMaxIntDelay);
	161	}
	162	}
	163
	164	if (pmInitDone
	165	&& pmDispatch != NULL
	166	&& pmDispatch->MachineIdle != NULL) {
	167	(*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL);
	168	} else {
	169	/*
	170	* If no power management, re-enable interrupts and halt.
	171	* This will keep the CPU from spinning through the scheduler
	172	* and will allow at least some minimal power savings (but it
	173	* cause problems in some MP configurations w.r.t. the APIC
	174	* stopping during a GV3 transition).
	175	*/
	176	pal_hlt();
	177	/* Once woken, re-disable interrupts. */
	178	pal_cli();
	179	}
	180
	181	/*
	182	* Mark the CPU as running again.
	183	*/
	184	MARK_CPU_ACTIVE(cnum);
	185	DBGLOG(cpu_handle, cnum, MP_UNIDLE);
	186	my_cpu->lcpu.state = LCPU_RUN;
	187	uint64_t ixtime = my_cpu->cpu_ixtime = mach_absolute_time();
	188	itime = ixtime - ctime;
	189	my_cpu->cpu_idle_exits++;
	190	my_cpu->cpu_itime_total += itime;
	191	machine_classify_interval(itime, &my_cpu->cpu_itimes[0], &cpu_itime_bins[0], CPU_ITIME_BINS);
	192	#if CST_DEMOTION_DEBUG
	193	cl = ch = 0;
	194	rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
	195	c3res = (((uint64_t)ch << 32) \| cl) - c3res;
	196	rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
	197	c6res = (((uint64_t)ch << 32) \| cl) - c6res;
	198	rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
	199	c7res = (((uint64_t)ch << 32) \| cl) - c7res;
	200
	201	uint64_t ndelta = itime - tmrCvt(c3res + c6res + c7res, tscFCvtt2n);
	202	KERNEL_DEBUG_CONSTANT(0xcead0000, ndelta, itime, c7res, c6res, c3res);
	203	if ((itime > 1000000) && (ndelta > 250000)) {
	204	KERNEL_DEBUG_CONSTANT(0xceae0000, ndelta, itime, c7res, c6res, c3res);
	205	}
	206	#endif
	207
	208	machine_idle_exit:
	209	/*
	210	* Re-enable interrupts.
	211	*/
	212
	213	pal_sti();
	214
	215	if (do_process_pending_timers) {
	216	TCOAL_DEBUG(0xBBBB0000 \| DBG_FUNC_START, ctime, esdeadline, ehdeadline, idle_pending_timers_processed, 0);
	217
	218	/* Adjust to reflect that this isn't truly a package idle exit */
	219	__sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
	220	lapic_timer_swi(); /* Trigger software timer interrupt */
	221	__sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);
	222
	223	TCOAL_DEBUG(0xBBBB0000 \| DBG_FUNC_END, ctime, esdeadline, idle_pending_timers_processed, 0, 0);
	224	}
	225	#if CST_DEMOTION_DEBUG
	226	uint64_t nwakeups = my_cpu->cpu_wakeups_issued_total;
	227
	228	if ((nwakeups == cwakeups) && (topoParms.nLThreadsPerPackage == my_cpu->lcpu.package->num_idle)) {
	229	KERNEL_DEBUG_CONSTANT(0xceaa0000, cwakeups, 0, 0, 0, 0);
	230	}
	231	#endif
	232	}
	233
	234	/*
	235	* Called when the CPU is to be halted. It will choose the best C-State
	236	* to be in.
	237	*/
	238	void
	239	pmCPUHalt(uint32_t reason)
	240	{
	241	cpu_data_t *cpup = current_cpu_datap();
	242
	243	switch (reason) {
	244	case PM_HALT_DEBUG:
	245	cpup->lcpu.state = LCPU_PAUSE;
	246	pal_stop_cpu(FALSE);
	247	break;
	248
	249	case PM_HALT_PANIC:
	250	cpup->lcpu.state = LCPU_PAUSE;
	251	pal_stop_cpu(TRUE);
	252	break;
	253
	254	case PM_HALT_NORMAL:
	255	case PM_HALT_SLEEP:
	256	default:
	257	pal_cli();
	258
	259	if (pmInitDone
	260	&& pmDispatch != NULL
	261	&& pmDispatch->pmCPUHalt != NULL) {
	262	/*
	263	* Halt the CPU (and put it in a low power state.
	264	*/
	265	(*pmDispatch->pmCPUHalt)();
	266
	267	/*
	268	* We've exited halt, so get the CPU schedulable again.
	269	* - by calling the fast init routine for a slave, or
	270	* - by returning if we're the master processor.
	271	*/
	272	if (cpup->cpu_number != master_cpu) {
	273	i386_init_slave_fast();
	274	panic("init_slave_fast returned");
	275	}
	276	} else {
	277	/*
	278	* If no power managment and a processor is taken off-line,
	279	* then invalidate the cache and halt it (it will not be able
	280	* to be brought back on-line without resetting the CPU).
	281	*/
	282	__asm__ volatile ("wbinvd");
	283	cpup->lcpu.state = LCPU_HALT;
	284	pal_stop_cpu(FALSE);
	285
	286	panic("back from Halt");
	287	}
	288
	289	break;
	290	}
	291	}
	292
	293	void
	294	pmMarkAllCPUsOff(void)
	295	{
	296	if (pmInitDone
	297	&& pmDispatch != NULL
	298	&& pmDispatch->markAllCPUsOff != NULL) {
	299	(*pmDispatch->markAllCPUsOff)();
	300	}
	301	}
	302
	303	static void
	304	pmInitComplete(void)
	305	{
	306	if (earlyTopology
	307	&& pmDispatch != NULL
	308	&& pmDispatch->pmCPUStateInit != NULL) {
	309	(*pmDispatch->pmCPUStateInit)();
	310	earlyTopology = FALSE;
	311	}
	312	pmInitDone = 1;
	313	}
	314
	315	x86_lcpu_t *
	316	pmGetLogicalCPU(int cpu)
	317	{
	318	return cpu_to_lcpu(cpu);
	319	}
	320
	321	x86_lcpu_t *
	322	pmGetMyLogicalCPU(void)
	323	{
	324	cpu_data_t *cpup = current_cpu_datap();
	325
	326	return &cpup->lcpu;
	327	}
	328
	329	static x86_core_t *
	330	pmGetCore(int cpu)
	331	{
	332	return cpu_to_core(cpu);
	333	}
	334
	335	static x86_core_t *
	336	pmGetMyCore(void)
	337	{
	338	cpu_data_t *cpup = current_cpu_datap();
	339
	340	return cpup->lcpu.core;
	341	}
	342
	343	static x86_die_t *
	344	pmGetDie(int cpu)
	345	{
	346	return cpu_to_die(cpu);
	347	}
	348
	349	static x86_die_t *
	350	pmGetMyDie(void)
	351	{
	352	cpu_data_t *cpup = current_cpu_datap();
	353
	354	return cpup->lcpu.die;
	355	}
	356
	357	static x86_pkg_t *
	358	pmGetPackage(int cpu)
	359	{
	360	return cpu_to_package(cpu);
	361	}
	362
	363	static x86_pkg_t *
	364	pmGetMyPackage(void)
	365	{
	366	cpu_data_t *cpup = current_cpu_datap();
	367
	368	return cpup->lcpu.package;
	369	}
	370
	371	static void
	372	pmLockCPUTopology(int lock)
	373	{
	374	if (lock) {
	375	mp_safe_spin_lock(&x86_topo_lock);
	376	} else {
	377	simple_unlock(&x86_topo_lock);
	378	}
	379	}
	380
	381	/*
	382	* Called to get the next deadline that has been set by the
	383	* power management code.
	384	* Note: a return of 0 from AICPM and this routine signifies
	385	* that no deadline is set.
	386	*/
	387	uint64_t
	388	pmCPUGetDeadline(cpu_data_t *cpu)
	389	{
	390	uint64_t deadline = 0;
	391
	392	if (pmInitDone
	393	&& pmDispatch != NULL
	394	&& pmDispatch->GetDeadline != NULL) {
	395	deadline = (*pmDispatch->GetDeadline)(&cpu->lcpu);
	396	}
	397
	398	return deadline;
	399	}
	400
	401	/*
	402	* Called to determine if the supplied deadline or the power management
	403	* deadline is sooner. Returns which ever one is first.
	404	*/
	405
	406	uint64_t
	407	pmCPUSetDeadline(cpu_data_t *cpu, uint64_t deadline)
	408	{
	409	if (pmInitDone
	410	&& pmDispatch != NULL
	411	&& pmDispatch->SetDeadline != NULL) {
	412	deadline = (*pmDispatch->SetDeadline)(&cpu->lcpu, deadline);
	413	}
	414
	415	return deadline;
	416	}
	417
	418	/*
	419	* Called when a power management deadline expires.
	420	*/
	421	void
	422	pmCPUDeadline(cpu_data_t *cpu)
	423	{
	424	if (pmInitDone
	425	&& pmDispatch != NULL
	426	&& pmDispatch->Deadline != NULL) {
	427	(*pmDispatch->Deadline)(&cpu->lcpu);
	428	}
	429	}
	430
	431	/*
	432	* Called to get a CPU out of idle.
	433	*/
	434	boolean_t
	435	pmCPUExitIdle(cpu_data_t *cpu)
	436	{
	437	boolean_t do_ipi;
	438
	439	if (pmInitDone
	440	&& pmDispatch != NULL
	441	&& pmDispatch->exitIdle != NULL) {
	442	do_ipi = (*pmDispatch->exitIdle)(&cpu->lcpu);
	443	} else {
	444	do_ipi = TRUE;
	445	}
	446
	447	return do_ipi;
	448	}
	449
	450	kern_return_t
	451	pmCPUExitHalt(int cpu)
	452	{
	453	kern_return_t rc = KERN_INVALID_ARGUMENT;
	454
	455	if (pmInitDone
	456	&& pmDispatch != NULL
	457	&& pmDispatch->exitHalt != NULL) {
	458	rc = pmDispatch->exitHalt(cpu_to_lcpu(cpu));
	459	}
	460
	461	return rc;
	462	}
	463
	464	kern_return_t
	465	pmCPUExitHaltToOff(int cpu)
	466	{
	467	kern_return_t rc = KERN_SUCCESS;
	468
	469	if (pmInitDone
	470	&& pmDispatch != NULL
	471	&& pmDispatch->exitHaltToOff != NULL) {
	472	rc = pmDispatch->exitHaltToOff(cpu_to_lcpu(cpu));
	473	}
	474
	475	return rc;
	476	}
	477
	478	/*
	479	* Called to initialize the power management structures for the CPUs.
	480	*/
	481	void
	482	pmCPUStateInit(void)
	483	{
	484	if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL) {
	485	(*pmDispatch->pmCPUStateInit)();
	486	} else {
	487	earlyTopology = TRUE;
	488	}
	489	}
	490
	491	/*
	492	* Called when a CPU is being restarted after being powered off (as in S3).
	493	*/
	494	void
	495	pmCPUMarkRunning(cpu_data_t *cpu)
	496	{
	497	cpu_data_t *cpup = current_cpu_datap();
	498
	499	if (pmInitDone
	500	&& pmDispatch != NULL
	501	&& pmDispatch->markCPURunning != NULL) {
	502	(*pmDispatch->markCPURunning)(&cpu->lcpu);
	503	} else {
	504	cpup->lcpu.state = LCPU_RUN;
	505	}
	506	}
	507
	508	/*
	509	* Called to get/set CPU power management state.
	510	*/
	511	int
	512	pmCPUControl(uint32_t cmd, void *datap)
	513	{
	514	int rc = -1;
	515
	516	if (pmDispatch != NULL
	517	&& pmDispatch->pmCPUControl != NULL) {
	518	rc = (*pmDispatch->pmCPUControl)(cmd, datap);
	519	}
	520
	521	return rc;
	522	}
	523
	524	/*
	525	* Called to save the timer state used by power management prior
	526	* to "sleeping".
	527	*/
	528	void
	529	pmTimerSave(void)
	530	{
	531	if (pmDispatch != NULL
	532	&& pmDispatch->pmTimerStateSave != NULL) {
	533	(*pmDispatch->pmTimerStateSave)();
	534	}
	535	}
	536
	537	/*
	538	* Called to restore the timer state used by power management after
	539	* waking from "sleep".
	540	*/
	541	void
	542	pmTimerRestore(void)
	543	{
	544	if (pmDispatch != NULL
	545	&& pmDispatch->pmTimerStateRestore != NULL) {
	546	(*pmDispatch->pmTimerStateRestore)();
	547	}
	548	}
	549
	550	/*
	551	* Set the worst-case time for the C4 to C2 transition.
	552	* No longer does anything.
	553	*/
	554	void
	555	ml_set_maxsnoop(__unused uint32_t maxdelay)
	556	{
	557	}
	558
	559
	560	/*
	561	* Get the worst-case time for the C4 to C2 transition. Returns nanoseconds.
	562	*/
	563	unsigned
	564	ml_get_maxsnoop(void)
	565	{
	566	uint64_t max_snoop = 0;
	567
	568	if (pmInitDone
	569	&& pmDispatch != NULL
	570	&& pmDispatch->getMaxSnoop != NULL) {
	571	max_snoop = pmDispatch->getMaxSnoop();
	572	}
	573
	574	return (unsigned)(max_snoop & 0xffffffff);
	575	}
	576
	577
	578	uint32_t
	579	ml_get_maxbusdelay(void)
	580	{
	581	uint64_t max_delay = 0;
	582
	583	if (pmInitDone
	584	&& pmDispatch != NULL
	585	&& pmDispatch->getMaxBusDelay != NULL) {
	586	max_delay = pmDispatch->getMaxBusDelay();
	587	}
	588
	589	return (uint32_t)(max_delay & 0xffffffff);
	590	}
	591
	592	/*
	593	* Advertise a memory access latency tolerance of "mdelay" ns
	594	*/
	595	void
	596	ml_set_maxbusdelay(uint32_t mdelay)
	597	{
	598	uint64_t maxdelay = mdelay;
	599
	600	if (pmDispatch != NULL
	601	&& pmDispatch->setMaxBusDelay != NULL) {
	602	earlyMaxBusDelay = DELAY_UNSET;
	603	pmDispatch->setMaxBusDelay(maxdelay);
	604	} else {
	605	earlyMaxBusDelay = maxdelay;
	606	}
	607	}
	608
	609	uint64_t
	610	ml_get_maxintdelay(void)
	611	{
	612	uint64_t max_delay = 0;
	613
	614	if (pmDispatch != NULL
	615	&& pmDispatch->getMaxIntDelay != NULL) {
	616	max_delay = pmDispatch->getMaxIntDelay();
	617	}
	618
	619	return max_delay;
	620	}
	621
	622	/*
	623	* Set the maximum delay allowed for an interrupt.
	624	*/
	625	void
	626	ml_set_maxintdelay(uint64_t mdelay)
	627	{
	628	if (pmDispatch != NULL
	629	&& pmDispatch->setMaxIntDelay != NULL) {
	630	earlyMaxIntDelay = DELAY_UNSET;
	631	pmDispatch->setMaxIntDelay(mdelay);
	632	} else {
	633	earlyMaxIntDelay = mdelay;
	634	}
	635	}
	636
	637	boolean_t
	638	ml_get_interrupt_prewake_applicable()
	639	{
	640	boolean_t applicable = FALSE;
	641
	642	if (pmInitDone
	643	&& pmDispatch != NULL
	644	&& pmDispatch->pmInterruptPrewakeApplicable != NULL) {
	645	applicable = pmDispatch->pmInterruptPrewakeApplicable();
	646	}
	647
	648	return applicable;
	649	}
	650
	651	/*
	652	* Put a CPU into "safe" mode with respect to power.
	653	*
	654	* Some systems cannot operate at a continuous "normal" speed without
	655	* exceeding the thermal design. This is called per-CPU to place the
	656	* CPUs into a "safe" operating mode.
	657	*/
	658	void
	659	pmSafeMode(x86_lcpu_t *lcpu, uint32_t flags)
	660	{
	661	if (pmDispatch != NULL
	662	&& pmDispatch->pmCPUSafeMode != NULL) {
	663	pmDispatch->pmCPUSafeMode(lcpu, flags);
	664	} else {
	665	/*
	666	* Do something reasonable if the KEXT isn't present.
	667	*
	668	* We only look at the PAUSE and RESUME flags. The other flag(s)
	669	* will not make any sense without the KEXT, so just ignore them.
	670	*
	671	* We set the CPU's state to indicate that it's halted. If this
	672	* is the CPU we're currently running on, then spin until the
	673	* state becomes non-halted.
	674	*/
	675	if (flags & PM_SAFE_FL_PAUSE) {
	676	lcpu->state = LCPU_PAUSE;
	677	if (lcpu == x86_lcpu()) {
	678	while (lcpu->state == LCPU_PAUSE) {
	679	cpu_pause();
	680	}
	681	}
	682	}
	683
	684	/*
	685	* Clear the halted flag for the specified CPU, that will
	686	* get it out of it's spin loop.
	687	*/
	688	if (flags & PM_SAFE_FL_RESUME) {
	689	lcpu->state = LCPU_RUN;
	690	}
	691	}
	692	}
	693
	694	static uint32_t saved_run_count = 0;
	695
	696	void
	697	machine_run_count(uint32_t count)
	698	{
	699	if (pmDispatch != NULL
	700	&& pmDispatch->pmSetRunCount != NULL) {
	701	pmDispatch->pmSetRunCount(count);
	702	} else {
	703	saved_run_count = count;
	704	}
	705	}
	706
	707	processor_t
	708	machine_choose_processor(processor_set_t pset,
	709	processor_t preferred)
	710	{
	711	int startCPU;
	712	int endCPU;
	713	int preferredCPU;
	714	int chosenCPU;
	715
	716	if (!pmInitDone) {
	717	return preferred;
	718	}
	719
	720	if (pset == NULL) {
	721	startCPU = -1;
	722	endCPU = -1;
	723	} else {
	724	startCPU = pset->cpu_set_low;
	725	endCPU = pset->cpu_set_hi;
	726	}
	727
	728	if (preferred == NULL) {
	729	preferredCPU = -1;
	730	} else {
	731	preferredCPU = preferred->cpu_id;
	732	}
	733
	734	if (pmDispatch != NULL
	735	&& pmDispatch->pmChooseCPU != NULL) {
	736	chosenCPU = pmDispatch->pmChooseCPU(startCPU, endCPU, preferredCPU);
	737
	738	if (chosenCPU == -1) {
	739	return NULL;
	740	}
	741	return cpu_datap(chosenCPU)->cpu_processor;
	742	}
	743
	744	return preferred;
	745	}
	746
	747	static int
	748	pmThreadGetUrgency(uint64_t rt_period, uint64_t rt_deadline)
	749	{
	750	thread_urgency_t urgency;
	751	uint64_t arg1, arg2;
	752
	753	urgency = thread_get_urgency(THREAD_NULL, &arg1, &arg2);
	754
	755	if (urgency == THREAD_URGENCY_REAL_TIME) {
	756	if (rt_period != NULL) {
	757	*rt_period = arg1;
	758	}
	759
	760	if (rt_deadline != NULL) {
	761	*rt_deadline = arg2;
	762	}
	763	}
	764
	765	return (int)urgency;
	766	}
	767
	768	#if DEBUG
	769	uint32_t urgency_stats[64][THREAD_URGENCY_MAX];
	770	#endif
	771
	772	#define URGENCY_NOTIFICATION_ASSERT_NS (5 * 1000 * 1000)
	773	uint64_t urgency_notification_assert_abstime_threshold, urgency_notification_max_recorded;
	774
	775	void
	776	thread_tell_urgency(thread_urgency_t urgency,
	777	uint64_t rt_period,
	778	uint64_t rt_deadline,
	779	uint64_t sched_latency,
	780	thread_t nthread)
	781	{
	782	uint64_t urgency_notification_time_start = 0, delta;
	783	boolean_t urgency_assert = (urgency_notification_assert_abstime_threshold != 0);
	784	assert(get_preemption_level() > 0 \|\| ml_get_interrupts_enabled() == FALSE);
	785	#if DEBUG
	786	urgency_stats[cpu_number() % 64][urgency]++;
	787	#endif
	788	if (!pmInitDone
	789	\|\| pmDispatch == NULL
	790	\|\| pmDispatch->pmThreadTellUrgency == NULL) {
	791	return;
	792	}
	793
	794	SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) \| DBG_FUNC_START, urgency, rt_period, rt_deadline, sched_latency, 0);
	795
	796	if (__improbable((urgency_assert == TRUE))) {
	797	urgency_notification_time_start = mach_absolute_time();
	798	}
	799
	800	current_cpu_datap()->cpu_nthread = nthread;
	801	pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline);
	802
	803	if (__improbable((urgency_assert == TRUE))) {
	804	delta = mach_absolute_time() - urgency_notification_time_start;
	805
	806	if (__improbable(delta > urgency_notification_max_recorded)) {
	807	/* This is not synchronized, but it doesn't matter
	808	* if we (rarely) miss an event, as it is statistically
	809	* unlikely that it will never recur.
	810	*/
	811	urgency_notification_max_recorded = delta;
	812
	813	if (__improbable((delta > urgency_notification_assert_abstime_threshold) && !machine_timeout_suspended())) {
	814	panic("Urgency notification callout %p exceeded threshold, 0x%llx abstime units", pmDispatch->pmThreadTellUrgency, delta);
	815	}
	816	}
	817	}
	818
	819	SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) \| DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
	820	}
	821
	822	void
	823	machine_thread_going_on_core(__unused thread_t new_thread,
	824	__unused thread_urgency_t urgency,
	825	__unused uint64_t sched_latency,
	826	__unused uint64_t same_pri_latency,
	827	__unused uint64_t dispatch_time)
	828	{
	829	}
	830
	831	void
	832	machine_thread_going_off_core(thread_t old_thread, boolean_t thread_terminating,
	833	uint64_t last_dispatch, boolean_t thread_runnable)
	834	{
	835	if (!pmInitDone
	836	\|\| pmDispatch == NULL
	837	\|\| pmDispatch->pmThreadGoingOffCore == NULL) {
	838	return;
	839	}
	840
	841	pmDispatch->pmThreadGoingOffCore(old_thread, thread_terminating,
	842	last_dispatch, thread_runnable);
	843	}
	844
	845	void
	846	machine_max_runnable_latency(__unused uint64_t bg_max_latency,
	847	__unused uint64_t default_max_latency,
	848	__unused uint64_t realtime_max_latency)
	849	{
	850	}
	851
	852	void
	853	machine_work_interval_notify(__unused thread_t thread,
	854	__unused struct kern_work_interval_args* kwi_args)
	855	{
	856	}
	857
	858
	859	void
	860	machine_switch_perfcontrol_context(__unused perfcontrol_event event,
	861	__unused uint64_t timestamp,
	862	__unused uint32_t flags,
	863	__unused uint64_t new_thread_same_pri_latency,
	864	__unused thread_t old,
	865	__unused thread_t new)
	866	{
	867	}
	868
	869	void
	870	machine_switch_perfcontrol_state_update(__unused perfcontrol_event event,
	871	__unused uint64_t timestamp,
	872	__unused uint32_t flags,
	873	__unused thread_t thread)
	874	{
	875	}
	876
	877	void
	878	active_rt_threads(boolean_t active)
	879	{
	880	if (!pmInitDone
	881	\|\| pmDispatch == NULL
	882	\|\| pmDispatch->pmActiveRTThreads == NULL) {
	883	return;
	884	}
	885
	886	pmDispatch->pmActiveRTThreads(active);
	887	}
	888
	889	static uint32_t
	890	pmGetSavedRunCount(void)
	891	{
	892	return saved_run_count;
	893	}
	894
	895	/*
	896	* Returns the root of the package tree.
	897	*/
	898	x86_pkg_t *
	899	pmGetPkgRoot(void)
	900	{
	901	return x86_pkgs;
	902	}
	903
	904	static boolean_t
	905	pmCPUGetHibernate(int cpu)
	906	{
	907	return cpu_datap(cpu)->cpu_hibernate;
	908	}
	909
	910	processor_t
	911	pmLCPUtoProcessor(int lcpu)
	912	{
	913	return cpu_datap(lcpu)->cpu_processor;
	914	}
	915
	916	static void
	917	pmReSyncDeadlines(int cpu)
	918	{
	919	static boolean_t registered = FALSE;
	920
	921	if (!registered) {
	922	PM_interrupt_register(&timer_resync_deadlines);
	923	registered = TRUE;
	924	}
	925
	926	if ((uint32_t)cpu == current_cpu_datap()->lcpu.cpu_num) {
	927	timer_resync_deadlines();
	928	} else {
	929	cpu_PM_interrupt(cpu);
	930	}
	931	}
	932
	933	static void
	934	pmSendIPI(int cpu)
	935	{
	936	lapic_send_ipi(cpu, LAPIC_PM_INTERRUPT);
	937	}
	938
	939	static void
	940	pmGetNanotimeInfo(pm_rtc_nanotime_t *rtc_nanotime)
	941	{
	942	/*
	943	* Make sure that nanotime didn't change while we were reading it.
	944	*/
	945	do {
	946	rtc_nanotime->generation = pal_rtc_nanotime_info.generation; /* must be first */
	947	rtc_nanotime->tsc_base = pal_rtc_nanotime_info.tsc_base;
	948	rtc_nanotime->ns_base = pal_rtc_nanotime_info.ns_base;
	949	rtc_nanotime->scale = pal_rtc_nanotime_info.scale;
	950	rtc_nanotime->shift = pal_rtc_nanotime_info.shift;
	951	} while (pal_rtc_nanotime_info.generation != 0
	952	&& rtc_nanotime->generation != pal_rtc_nanotime_info.generation);
	953	}
	954
	955	uint32_t
	956	pmTimerQueueMigrate(int target_cpu)
	957	{
	958	/* Call the etimer code to do this. */
	959	return (target_cpu != cpu_number())
	960	? timer_queue_migrate_cpu(target_cpu)
	961	: 0;
	962	}
	963
	964
	965	/*
	966	* Called by the power management kext to register itself and to get the
	967	* callbacks it might need into other kernel functions. This interface
	968	* is versioned to allow for slight mis-matches between the kext and the
	969	* kernel.
	970	*/
	971	void
	972	pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
	973	pmCallBacks_t *callbacks)
	974	{
	975	if (callbacks != NULL && version == PM_DISPATCH_VERSION) {
	976	callbacks->setRTCPop = setPop;
	977	callbacks->resyncDeadlines = pmReSyncDeadlines;
	978	callbacks->initComplete = pmInitComplete;
	979	callbacks->GetLCPU = pmGetLogicalCPU;
	980	callbacks->GetCore = pmGetCore;
	981	callbacks->GetDie = pmGetDie;
	982	callbacks->GetPackage = pmGetPackage;
	983	callbacks->GetMyLCPU = pmGetMyLogicalCPU;
	984	callbacks->GetMyCore = pmGetMyCore;
	985	callbacks->GetMyDie = pmGetMyDie;
	986	callbacks->GetMyPackage = pmGetMyPackage;
	987	callbacks->GetPkgRoot = pmGetPkgRoot;
	988	callbacks->LockCPUTopology = pmLockCPUTopology;
	989	callbacks->GetHibernate = pmCPUGetHibernate;
	990	callbacks->LCPUtoProcessor = pmLCPUtoProcessor;
	991	callbacks->ThreadBind = thread_bind;
	992	callbacks->GetSavedRunCount = pmGetSavedRunCount;
	993	callbacks->GetNanotimeInfo = pmGetNanotimeInfo;
	994	callbacks->ThreadGetUrgency = pmThreadGetUrgency;
	995	callbacks->RTCClockAdjust = rtc_clock_adjust;
	996	callbacks->timerQueueMigrate = pmTimerQueueMigrate;
	997	callbacks->topoParms = &topoParms;
	998	callbacks->pmSendIPI = pmSendIPI;
	999	callbacks->InterruptPending = lapic_is_interrupt_pending;
	1000	callbacks->IsInterrupting = lapic_is_interrupting;
	1001	callbacks->InterruptStats = lapic_interrupt_counts;
	1002	callbacks->DisableApicTimer = lapic_disable_timer;
	1003	} else {
	1004	panic("Version mis-match between Kernel and CPU PM");
	1005	}
	1006
	1007	if (cpuFuncs != NULL) {
	1008	if (pmDispatch) {
	1009	panic("Attempt to re-register power management interface--AICPM present in xcpm mode? %p->%p", pmDispatch, cpuFuncs);
	1010	}
	1011
	1012	pmDispatch = cpuFuncs;
	1013
	1014	if (earlyTopology
	1015	&& pmDispatch->pmCPUStateInit != NULL) {
	1016	(*pmDispatch->pmCPUStateInit)();
	1017	earlyTopology = FALSE;
	1018	}
	1019
	1020	if (pmDispatch->pmIPIHandler != NULL) {
	1021	lapic_set_pm_func((i386_intr_func_t)pmDispatch->pmIPIHandler);
	1022	}
	1023	}
	1024	}
	1025
	1026	/*
	1027	* Unregisters the power management functions from the kext.
	1028	*/
	1029	void
	1030	pmUnRegister(pmDispatch_t *cpuFuncs)
	1031	{
	1032	if (cpuFuncs != NULL && pmDispatch == cpuFuncs) {
	1033	pmDispatch = NULL;
	1034	}
	1035	}
	1036
	1037	void
	1038	machine_track_platform_idle(boolean_t entry)
	1039	{
	1040	cpu_data_t *my_cpu = current_cpu_datap();
	1041
	1042	if (entry) {
	1043	(void)__sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);
	1044	} else {
	1045	uint32_t nidle = __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
	1046	if (nidle == topoParms.nLThreadsPerPackage) {
	1047	my_cpu->lcpu.package->package_idle_exits++;
	1048	}
	1049	}
	1050	}