#include <i386/cpuid.h>
#include <i386/tsc.h>
#include <i386/machine_routines.h>
+#include <i386/pal_routines.h>
#include <i386/ucode.h>
#include <kern/clock.h>
#include <libkern/libkern.h>
#include <i386/lapic.h>
+#include <i386/pmCPU.h>
+
static int
_i386_cpu_info SYSCTL_HANDLER_ARGS
SYSCTL_NODE(_machdep, OID_AUTO, tsc, CTLFLAG_RD|CTLFLAG_LOCKED, NULL, "Timestamp counter parameters");
-SYSCTL_QUAD(_machdep_tsc, OID_AUTO, frequency, CTLFLAG_RD|CTLFLAG_LOCKED, &tscFreq, "");
+SYSCTL_QUAD(_machdep_tsc, OID_AUTO, frequency,
+ CTLFLAG_RD|CTLFLAG_LOCKED, &tscFreq, "");
+
+extern uint32_t deep_idle_rebase;
+SYSCTL_UINT(_machdep_tsc, OID_AUTO, deep_idle_rebase,
+ CTLFLAG_RW|CTLFLAG_KERN|CTLFLAG_LOCKED, &deep_idle_rebase, 0, "");
+
+SYSCTL_NODE(_machdep_tsc, OID_AUTO, nanotime,
+ CTLFLAG_RD|CTLFLAG_LOCKED, NULL, "TSC to ns conversion");
+SYSCTL_QUAD(_machdep_tsc_nanotime, OID_AUTO, tsc_base,
+ CTLFLAG_RD | CTLFLAG_LOCKED,
+ (uint64_t *) &pal_rtc_nanotime_info.tsc_base, "");
+SYSCTL_QUAD(_machdep_tsc_nanotime, OID_AUTO, ns_base,
+ CTLFLAG_RD | CTLFLAG_LOCKED,
+ (uint64_t *)&pal_rtc_nanotime_info.ns_base, "");
+SYSCTL_UINT(_machdep_tsc_nanotime, OID_AUTO, scale,
+ CTLFLAG_RD | CTLFLAG_LOCKED,
+ (uint32_t *)&pal_rtc_nanotime_info.scale, 0, "");
+SYSCTL_UINT(_machdep_tsc_nanotime, OID_AUTO, shift,
+ CTLFLAG_RD | CTLFLAG_LOCKED,
+ (uint32_t *)&pal_rtc_nanotime_info.shift, 0, "");
+SYSCTL_UINT(_machdep_tsc_nanotime, OID_AUTO, generation,
+ CTLFLAG_RD | CTLFLAG_LOCKED,
+ (uint32_t *)&pal_rtc_nanotime_info.generation, 0, "");
SYSCTL_NODE(_machdep, OID_AUTO, misc, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
"Miscellaneous x86 kernel parameters");
#include <dev/random/YarrowCoreLib/include/yarrow.h>
#include <libkern/OSByteOrder.h>
+#include <libkern/OSAtomic.h>
#include <mach/mach_time.h>
#include <machine/machine_routines.h>
/* Used to detect whether we've already been initialized */
-static int gRandomInstalled = 0;
+static UInt8 gRandomInstalled = 0;
static PrngRef gPrngRef;
static int gRandomError = 1;
static lck_grp_t *gYarrowGrp;
static lck_attr_t *gYarrowAttr;
static lck_grp_attr_t *gYarrowGrpAttr;
static lck_mtx_t *gYarrowMutex = 0;
+static UInt8 gYarrowInitializationLock = 0;
#define RESEED_TICKS 50 /* how long a reseed operation can take */
{
prng_error_status perr;
+ /* Multiple threads can enter this as a result of an earlier
+ * check of gYarrowMutex. We make sure that only one of them
+ * can enter at a time. If one of them enters and discovers
+ * that gYarrowMutex is no longer NULL, we know that another
+ * thread has initialized the Yarrow state and we can exit.
+ */
+
+ /* The first thread that enters this function will find
+ * gYarrowInitializationLock set to 0. It will atomically
+ * set the value to 1 and, seeing that it was zero, drop
+ * out of the loop. Other threads will see that the value is
+ * 1 and continue to loop until we are initialized.
+ */
+
+ while (OSTestAndSet(0, &gYarrowInitializationLock)); /* serialize access to this function */
+
+ if (gYarrowMutex) {
+ /* we've already been initialized, clear and get out */
+ goto function_exit;
+ }
+
/* create a Yarrow object */
perr = prngInitialize(&gPrngRef);
if (perr != 0) {
char buffer [16];
/* get a little non-deterministic data as an initial seed. */
+ /* On OSX, securityd will add much more entropy as soon as it */
+ /* comes up. On iOS, entropy is added with each system interrupt. */
microtime(&tt);
/*
if (perr != 0) {
/* an error, complain */
printf ("Couldn't seed Yarrow.\n");
- return;
+ goto function_exit;
}
/* turn the data around */
gYarrowMutex = lck_mtx_alloc_init(gYarrowGrp, gYarrowAttr);
fips_initialize ();
+
+function_exit:
+ /* allow other threads to figure out whether or not we have been initialized. */
+ gYarrowInitializationLock = 0;
}
const Block kKnownAnswer = {0x92, 0xb4, 0x04, 0xe5, 0x56, 0x58, 0x8c, 0xed, 0x6c, 0x1a, 0xcd, 0x4e, 0xbf, 0x05, 0x3f, 0x68, 0x09, 0xf7, 0x3a, 0x93};
{
int ret;
- if (gRandomInstalled)
+ if (OSTestAndSet(0, &gRandomInstalled)) {
+ /* do this atomically so that it works correctly with
+ multiple threads */
return;
-
- /* install us in the file system */
- gRandomInstalled = 1;
-
- /* setup yarrow and the mutex */
- PreliminarySetup();
+ }
ret = cdevsw_add(RANDOM_MAJOR, &random_cdevsw);
if (ret < 0) {
*/
devfs_make_node(makedev (ret, 1), DEVFS_CHAR,
UID_ROOT, GID_WHEEL, 0666, "urandom", 0);
+
+ /* setup yarrow and the mutex if needed*/
+ PreliminarySetup();
}
int
(cp->c_flag & C_DELETED) &&
((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) {
- /* Start a transaction here. We're about to change file sizes */
- if (started_tr == 0) {
- if (hfs_start_transaction(hfsmp) != 0) {
- error = EINVAL;
- goto out;
- }
- else {
- started_tr = 1;
- }
- }
-
/* Truncate away our own fork data. (Case A, B, C above) */
if (VTOF(vp)->ff_blocks != 0) {
-
+
+ /*
+ * SYMLINKS only:
+ *
+ * Encapsulate the entire change (including truncating the link) in
+ * nested transactions if we are modifying a symlink, because we know that its
+ * file length will be at most 4k, and we can fit both the truncation and
+ * any relevant bitmap changes into a single journal transaction. We also want
+ * the kill_block code to execute in the same transaction so that any dirty symlink
+ * blocks will not be written. Otherwise, rely on
+ * hfs_truncate doing its own transactions to ensure that we don't blow up
+ * the journal.
+ */
+ if ((started_tr == 0) && (v_type == VLNK)) {
+ if (hfs_start_transaction(hfsmp) != 0) {
+ error = EINVAL;
+ goto out;
+ }
+ else {
+ started_tr = 1;
+ }
+ }
+
/*
* At this point, we have decided that this cnode is
* suitable for full removal. We are about to deallocate
if (hfsmp->jnl && vnode_islnk(vp)) {
buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp);
}
-
+
/*
- * Since we're already inside a transaction,
- * tell hfs_truncate to skip the ubc_setsize.
- *
* This truncate call (and the one below) is fine from VNOP_RECLAIM's
* context because we're only removing blocks, not zero-filling new
* ones. The C_DELETED check above makes things much simpler.
*/
- error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, 0, ctx);
+ error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 0, 0, ctx);
if (error) {
goto out;
}
truncated = 1;
+
+ /* (SYMLINKS ONLY): Close/End our transaction after truncating the file record */
+ if (started_tr) {
+ hfs_end_transaction(hfsmp);
+ started_tr = 0;
+ }
}
/*
* it is the last fork. That means, by definition, the rsrc fork is not in
* core. To avoid bringing a vnode into core for the sole purpose of deleting the
* data in the resource fork, we call cat_lookup directly, then hfs_release_storage
- * to get rid of the resource fork's data.
+ * to get rid of the resource fork's data. Note that because we are holding the
+ * cnode lock, it is impossible for a competing thread to create the resource fork
+ * vnode from underneath us while we do this.
*
* This is invoked via case A above only.
*/
*/
cp->c_blocks = 0;
}
-
- /* End the transaction from the start of the file truncation segment */
- if (started_tr) {
- hfs_end_transaction(hfsmp);
- started_tr = 0;
- }
}
/*
/*
- * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2013 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
(SWAP_BE32 (hotfileinfo.timeleft) > 0) &&
(SWAP_BE32 (hotfileinfo.timebase) > 0)) {
hfsmp->hfc_maxfiles = SWAP_BE32 (hotfileinfo.maxfilecnt);
- hfsmp->hfc_timeout = SWAP_BE32 (hotfileinfo.timeleft) + tv.tv_sec ;
hfsmp->hfc_timebase = SWAP_BE32 (hotfileinfo.timebase);
+ hfsmp->hfc_timeout = SWAP_BE32 (hotfileinfo.timeleft) + tv.tv_sec ;
/* Fix up any bogus timebase values. */
if (hfsmp->hfc_timebase < HFC_MIN_BASE_TIME) {
hfsmp->hfc_timebase = hfsmp->hfc_timeout - HFC_DEFAULT_DURATION;
if (hfsmp->hfc_stage != HFC_RECORDING)
return (0);
- if ((!vnode_isreg(vp) && !vnode_islnk(vp)) || vnode_issystem(vp)) {
+ /* Only regular files are allowed for hotfile inclusion ; symlinks disallowed */
+ if ((!vnode_isreg(vp)) || vnode_issystem(vp)) {
return (0);
}
/* Skip resource forks for now. */
if (hfsmp->hfc_stage != HFC_RECORDING)
return (0);
- if ((!vnode_isreg(vp) && !vnode_islnk(vp)) || vnode_issystem(vp)) {
+ /* Only regular files can move out of hotfiles */
+ if ((!vnode_isreg(vp)) || vnode_issystem(vp)) {
return (0);
}
static int
hotfiles_collect_callback(struct vnode *vp, __unused void *cargs)
{
- if ((vnode_isreg(vp) || vnode_islnk(vp)) && !vnode_issystem(vp))
+ if ((vnode_isreg(vp)) && !vnode_issystem(vp))
(void) hfs_addhotfile_internal(vp);
return (VNODE_RETURNED);
}
break;
}
- if (!vnode_isreg(vp) && !vnode_islnk(vp)) {
+
+ /* only regular files are eligible */
+ if (!vnode_isreg(vp)) {
printf("hfs: hotfiles_adopt: huh, not a file %d (%d)\n", listp->hfl_hotfile[i].hf_fileid, VTOC(vp)->c_cnid);
hfs_unlock(VTOC(vp));
vnode_put(vp);
}
break;
}
- if (!vnode_isreg(vp) && !vnode_islnk(vp)) {
+
+ /* only regular files are eligible */
+ if (!vnode_isreg(vp)) {
printf("hfs: hotfiles_evict: huh, not a file %d\n", key->fileID);
hfs_unlock(VTOC(vp));
vnode_put(vp);
/*
- * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
enum vtype vnodetype;
vnodetype = vnode_vtype(vp);
- if (vnodetype != VREG && vnodetype != VLNK) {
+ if (vnodetype != VREG) {
+ /* Note symlinks are not allowed to be relocated */
return (EPERM);
}
if (blockHint == 0)
blockHint = hfsmp->nextAllocation;
- if ((fp->ff_size > 0x7fffffff) ||
- ((fp->ff_size > blksize) && vnodetype == VLNK)) {
+ if ((fp->ff_size > 0x7fffffff)) {
return (EFBIG);
}
}
void
-start_kern_tracing(unsigned int new_nkdbufs) {
+start_kern_tracing(unsigned int new_nkdbufs, boolean_t need_map) {
if (!new_nkdbufs)
return;
nkdbufs = kdbg_set_nkdbufs(new_nkdbufs);
kdbg_lock_init();
kdbg_reinit(TRUE);
+ if (need_map == TRUE)
+ kdbg_mapinit();
kdbg_set_tracing_enabled(TRUE, KDEBUG_ENABLE_TRACE);
#if defined(__i386__) || defined(__x86_64__)
SYSCTL_INT (_hw, OID_AUTO, cputhreadtype, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, &cputhreadtype, 0, "");
#if defined(__i386__) || defined(__x86_64__)
-int mmx_flag = -1;
-int sse_flag = -1;
-int sse2_flag = -1;
-int sse3_flag = -1;
-int sse4_1_flag = -1;
-int sse4_2_flag = -1;
-int x86_64_flag = -1;
-int supplementalsse3_flag = -1;
-int aes_flag = -1;
-int avx1_0_flag = -1;
-int rdrand_flag = -1;
-int f16c_flag = -1;
-int enfstrg_flag = -1;
-
-SYSCTL_INT(_hw_optional, OID_AUTO, mmx, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &mmx_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, sse, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &sse_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, sse2, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &sse2_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, sse3, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &sse3_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, supplementalsse3, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &supplementalsse3_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, sse4_1, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &sse4_1_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, sse4_2, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &sse4_2_flag, 0, "");
+static int
+sysctl_cpu_capability
+(__unused struct sysctl_oid *oidp, void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+ uint64_t mask = (uint64_t) (uintptr_t) arg1;
+ boolean_t is_capable = (_get_cpu_capabilities() & mask) != 0;
+
+ return SYSCTL_OUT(req, &is_capable, sizeof(is_capable));
+
+}
+
+SYSCTL_PROC(_hw_optional, OID_AUTO, mmx, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasMMX, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, sse, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, sse2, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE2, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, sse3, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE3, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, supplementalsse3, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSupplementalSSE3, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, sse4_1, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE4_1, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, sse4_2, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE4_2, 0, sysctl_cpu_capability, "I", "");
/* "x86_64" is actually a preprocessor symbol on the x86_64 kernel, so we have to hack this */
#undef x86_64
-SYSCTL_INT(_hw_optional, OID_AUTO, x86_64, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &x86_64_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, aes, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &aes_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, avx1_0, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &avx1_0_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, rdrand, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &rdrand_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, f16c, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &f16c_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, enfstrg, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &enfstrg_flag, 0, "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, x86_64, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) k64Bit, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, aes, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAES, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, avx1_0, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX1_0, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, rdrand, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasRDRAND, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, f16c, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasF16C, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, enfstrg, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasENFSTRG, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, fma, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasFMA, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, avx2_0, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX2_0, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, bmi1, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasBMI1, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, bmi2, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasBMI2, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, rtm, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasRTM, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, hle, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasHLE, 0, sysctl_cpu_capability, "I", "");
#else
#error Unsupported arch
#endif /* !__i386__ && !__x86_64 && !__arm__ */
}
#if defined (__i386__) || defined (__x86_64__)
-#define is_capability_set(k) (((_get_cpu_capabilities() & (k)) == (k)) ? 1 : 0)
- mmx_flag = is_capability_set(kHasMMX);
- sse_flag = is_capability_set(kHasSSE);
- sse2_flag = is_capability_set(kHasSSE2);
- sse3_flag = is_capability_set(kHasSSE3);
- supplementalsse3_flag = is_capability_set(kHasSupplementalSSE3);
- sse4_1_flag = is_capability_set(kHasSSE4_1);
- sse4_2_flag = is_capability_set(kHasSSE4_2);
- x86_64_flag = is_capability_set(k64Bit);
- aes_flag = is_capability_set(kHasAES);
- avx1_0_flag = is_capability_set(kHasAVX1_0);
- rdrand_flag = is_capability_set(kHasRDRAND);
- f16c_flag = is_capability_set(kHasF16C);
- enfstrg_flag = is_capability_set(kHasENFSTRG);
-
/* hw.cpufamily */
cpufamily = cpuid_cpufamily();
int isssd = 0;
uint32_t flags = 0;
uint32_t blksize;
- off_t maxiocount, count;
+ off_t maxiocount, count, segcount;
boolean_t locked = FALSE;
int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
maxiocount = count;
error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTREAD, (caddr_t) &count);
+ if (!error)
+ error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTREAD, (caddr_t) &segcount);
if (error)
- count = 0;
+ count = segcount = 0;
+ count *= segcount;
if (count && (count < maxiocount))
maxiocount = count;
error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, (caddr_t) &count);
+ if (!error)
+ error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTWRITE, (caddr_t) &segcount);
if (error)
- count = 0;
+ count = segcount = 0;
+ count *= segcount;
if (count && (count < maxiocount))
maxiocount = count;
0x1400058 MACH_SCHED_REDISPATCH
0x140005C MACH_SCHED_REMOTE_AST
0x1400060 MACH_SCHED_LPA_BROKEN
+0x1400064 MACH_DEEP_IDLE
0x1500000 MACH_MSGID_INVALID
0x1600000 MTX_SLEEP
0x1600004 MTX_SLEEP_DEADLINE
0x1700020 PMAP_flush_TLBS
0x1700024 PMAP_update_interrupt
0x1700028 PMAP_attribute_clear
+0x1900000 MP_TLB_FLUSH
+0x1900004 MP_CPUS_CALL
+0x1900008 MP_CPUS_CALL_LOCAL
+0x190000c MP_CPUS_CALL_ACTION
+0x1900010 MP_CPUS_CALL_NOBUF
+0x1900014 MP_CPU_FAST_START
+0x1900018 MP_CPU_START
+0x190001c MP_CPU_DEACTIVATE
0x2010000 L_IP_In_Beg
0x2010004 L_IP_Out_Beg
0x2010008 L_IP_In_End
0x53101a4 CPUPM_TEST_RUN_INFO
0x53101a8 CPUPM_TEST_SLAVE_INFO
0x53101ac CPUPM_FORCED_IDLE
+0x53101b4 CPUPM_PSTATE_CHOOSE
+0x53101b8 CPUPM_PSTATE_COMMIT
+0x53101bc CPUPM_PSTATE_CHECK
+0x531023C CPUPM_TQM
+0x5310240 CPUPM_QUIESCE
+0x5310244 CPUPM_MBD
+0x5310248 CPUPM_PST_RATELIMIT_QOS
+0x531024C CPUPM_PST_QOS_RATEUNLIMIT
+0x5310250 CPUPM_PST_QOS_SWITCH
+0x5310254 CPUPM_FORCED_IDLE
+0x531023C CPUPM_TQM
+0x5310240 CPUPM_QUIESCE
+0x5310244 CPUPM_MBD
+0x5310248 CPUPM_PST_RATELIMIT_QOS
+0x531024C CPUPM_PST_QOS_RATEUNLIMIT
+0x5310250 CPUPM_PST_QOS_SWITCH
+0x5310254 CPUPM_FORCED_IDLE
+0x5320000 CPUPM_PST_RESOLVE
+0x5320004 CPUPM_PST_LOAD_TXFR
+0x5320008 CPUPM_PST_IDLE_EXIT
+0x532000C CPUPM_PST_IDLE_ENTRY
+0x5320010 CPUPM_PST_TIMER
+0x5320014 CPUPM_PST_MAXBUS
+0x5320018 CPUPM_PST_MAXINT
+0x532001C CPUPM_PST_PLIMIT
+0x5320020 CPUPM_PST_SELFSEL
+0x5320024 CPUPM_PST_RATELIMIT
+0x5320028 CPUPM_PST_RATEUNLIMIT
+0x532002C CPUPM_DVFS_PAUSE
+0x5320030 CPUPM_DVFS_RESUME
+0x5320034 CPUPM_DVFS_ADVANCE
+0x5320038 CPUPM_DVFS_TRANSIT
0x5330000 HIBERNATE
0x5330004 HIBERNATE_WRITE_IMAGE
0x5330008 HIBERNATE_MACHINE_INIT
{
#pragma unused(err)
struct sfb_bin_fcentry *fce;
- struct inp_fc_entry *infc;
+ struct inpcb *inp;
for (;;) {
lck_mtx_assert(&ifnet_fclist_lock, LCK_MTX_ASSERT_OWNED);
SLIST_NEXT(fce, fce_link) = NULL;
lck_mtx_unlock(&ifnet_fclist_lock);
- infc = inp_fc_getinp(fce->fce_flowhash);
- if (infc == NULL) {
+ inp = inp_fc_getinp(fce->fce_flowhash, 0);
+ if (inp == NULL) {
ifnet_fce_free(fce);
lck_mtx_lock_spin(&ifnet_fclist_lock);
continue;
}
- VERIFY(infc->infc_inp != NULL);
+ inp_fc_feedback(inp);
- inp_fc_feedback(infc->infc_inp);
-
- inp_fc_entry_free(infc);
ifnet_fce_free(fce);
lck_mtx_lock_spin(&ifnet_fclist_lock);
}
OIGMPSTAT_INC(igps_rcv_tooshort);
return;
}
- VERIFY(IS_P2ALIGNED(igmp, sizeof (u_int32_t)));
+ /* N.B.: we assume the packet was correctly aligned in ip_input. */
/*
* Validate checksum.
OIGMPSTAT_INC(igps_rcv_tooshort);
return;
}
- VERIFY(IS_P2ALIGNED(igmpv3,
- sizeof (u_int32_t)));
+ /*
+ * N.B.: we assume the packet was correctly
+ * aligned in ip_input.
+ */
if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
m_freem(m);
return;
u_int32_t inp_hash_seed = 0;
-static __inline int infc_cmp(const struct inp_fc_entry *,
- const struct inp_fc_entry *);
+static __inline int infc_cmp(const struct inpcb *,
+ const struct inpcb *);
lck_grp_t *inp_lck_grp;
lck_grp_attr_t *inp_lck_grp_attr;
lck_attr_t *inp_lck_attr;
decl_lck_mtx_data(, inp_fc_lck);
-RB_HEAD(inp_fc_tree, inp_fc_entry) inp_fc_tree;
-RB_PROTOTYPE(inp_fc_tree, inp_fc_entry, infc_link, infc_cmp);
+RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
+RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
+RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
-RB_GENERATE(inp_fc_tree, inp_fc_entry, infc_link, infc_cmp);
-
-static unsigned int inp_fcezone_size;
-static struct zone *inp_fcezone;
-#define INP_FCEZONE_NAME "inp_fcezone"
-#define INP_FCEZONE_MAX 32
+/*
+ * Use this inp as a key to find an inp in the flowhash tree.
+ * Accesses to it are protected by inp_fc_lck.
+ */
+struct inpcb key_inp;
/*
* in_pcb.c: manage the Protocol Control Blocks.
inp_lck_attr = lck_attr_alloc_init();
lck_mtx_init(&inp_fc_lck, inp_lck_grp, inp_lck_attr);
+ lck_mtx_lock(&inp_fc_lck);
RB_INIT(&inp_fc_tree);
-
- inp_fcezone_size = P2ROUNDUP(sizeof (struct inp_fc_entry),
- sizeof (u_int64_t));
- inp_fcezone = zinit(inp_fcezone_size,
- INP_FCEZONE_MAX * inp_fcezone_size, 0, INP_FCEZONE_NAME);
- if (inp_fcezone == NULL) {
- panic("%s: failed allocating %s", __func__,
- INP_FCEZONE_NAME);
- /* NOTREACHED */
- }
- zone_change(inp_fcezone, Z_EXPAND, TRUE);
- zone_change(inp_fcezone, Z_CALLERACCT, FALSE);
+ bzero(&key_inp, sizeof(key_inp));
+ lck_mtx_unlock(&inp_fc_lck);
}
/*
void
in_pcbremlists(struct inpcb *inp)
{
- struct inp_fc_entry *infce;
inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
if (inp->inp_lport) {
}
LIST_REMOVE(inp, inp_list);
- infce = inp_fc_getinp(inp->inp_flowhash);
- if (infce != NULL)
- inp_fc_entry_free(infce);
-
+ if (inp->inp_flags2 & INP2_IN_FCTREE) {
+ inp_fc_getinp(inp->inp_flowhash,
+ (INPFC_SOLOCKED|INPFC_REMOVE));
+ VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
+ }
inp->inp_pcbinfo->ipi_count--;
}
{
struct inp_flowhash_key fh __attribute__((aligned(8)));
u_int32_t flowhash = 0;
+ struct inpcb *tmp_inp = NULL;
if (inp_hash_seed == 0)
inp_hash_seed = RandomULong();
goto try_again;
}
- return flowhash;
-}
+ inp->inp_flowhash = flowhash;
-/*
- * Function to compare inp_fc_entries in inp flow control tree
- */
-static inline int
-infc_cmp(const struct inp_fc_entry *fc1, const struct inp_fc_entry *fc2)
-{
- return (fc1->infc_flowhash - fc2->infc_flowhash);
-}
-
-int
-inp_fc_addinp(struct inpcb *inp)
-{
- struct inp_fc_entry keyfc, *infc;
- u_int32_t flowhash = inp->inp_flowhash;
-
- keyfc.infc_flowhash = flowhash;
-
- lck_mtx_lock_spin(&inp_fc_lck);
- infc = RB_FIND(inp_fc_tree, &inp_fc_tree, &keyfc);
- if (infc != NULL && infc->infc_inp == inp) {
- /* Entry is already in inp_fc_tree, return */
- lck_mtx_unlock(&inp_fc_lck);
- return (1);
- }
+ /* Insert the inp into inp_fc_tree */
- if (infc != NULL) {
+ lck_mtx_lock(&inp_fc_lck);
+ tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
+ if (tmp_inp != NULL) {
/*
- * There is a different fc entry with the same
- * flow hash but different inp pointer. There
- * can be a collision on flow hash but the
- * probability is low. Let's just avoid
- * adding a second one when there is a collision
+ * There is a different inp with the same flowhash.
+ * There can be a collision on flow hash but the
+ * probability is low. Let's recompute the
+ * flowhash.
*/
lck_mtx_unlock(&inp_fc_lck);
- return (0);
- }
-
- /* become regular mutex */
- lck_mtx_convert_spin(&inp_fc_lck);
-
- infc = zalloc_noblock(inp_fcezone);
- if (infc == NULL) {
- /* memory allocation failed */
- lck_mtx_unlock(&inp_fc_lck);
- return (0);
+ /* recompute hash seed */
+ inp_hash_seed = RandomULong();
+ goto try_again;
}
- bzero(infc, sizeof (*infc));
-
- infc->infc_flowhash = flowhash;
- infc->infc_inp = inp;
-
- RB_INSERT(inp_fc_tree, &inp_fc_tree, infc);
+ RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
+ inp->inp_flags2 |= INP2_IN_FCTREE;
lck_mtx_unlock(&inp_fc_lck);
- return (1);
+
+ return flowhash;
}
-struct inp_fc_entry*
-inp_fc_getinp(u_int32_t flowhash)
+/*
+ * Function to compare inp_fc_entries in inp flow control tree
+ */
+static inline int
+infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
{
- struct inp_fc_entry keyfc, *infc;
+ return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
+ sizeof(inp1->inp_flowhash)));
+}
- keyfc.infc_flowhash = flowhash;
+struct inpcb *
+inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
+{
+ struct inpcb *inp = NULL;
+ int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
lck_mtx_lock_spin(&inp_fc_lck);
- infc = RB_FIND(inp_fc_tree, &inp_fc_tree, &keyfc);
- if (infc == NULL) {
+ key_inp.inp_flowhash = flowhash;
+ inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
+ if (inp == NULL) {
/* inp is not present, return */
lck_mtx_unlock(&inp_fc_lck);
return (NULL);
}
- RB_REMOVE(inp_fc_tree, &inp_fc_tree, infc);
-
- if (in_pcb_checkstate(infc->infc_inp, WNT_ACQUIRE, 0) ==
- WNT_STOPUSING) {
- /* become regular mutex */
- lck_mtx_convert_spin(&inp_fc_lck);
+ if (flags & INPFC_REMOVE) {
+ RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
+ lck_mtx_unlock(&inp_fc_lck);
- /*
- * This inp is going away, just don't process it.
- */
- inp_fc_entry_free(infc);
- infc = NULL;
+ bzero(&(inp->infc_link), sizeof (inp->infc_link));
+ inp->inp_flags2 &= ~INP2_IN_FCTREE;
+ return (NULL);
}
+ if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING)
+ inp = NULL;
lck_mtx_unlock(&inp_fc_lck);
- return (infc);
-}
-
-void
-inp_fc_entry_free(struct inp_fc_entry *infc)
-{
- zfree(inp_fcezone, infc);
+ return (inp);
}
void
int
inp_set_fc_state(struct inpcb *inp, int advcode)
{
+ struct inpcb *tmp_inp = NULL;
/*
* If there was a feedback from the interface when
* send operation was in progress, we should ignore
return(0);
inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
- if (inp_fc_addinp(inp)) {
+ if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash, INPFC_SOLOCKED))
+ != NULL) {
+ if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1)
+ == WNT_STOPUSING)
+ return (0);
+ VERIFY(tmp_inp == inp);
switch (advcode) {
case FADV_FLOW_CONTROLLED:
inp->inp_flags |= INP_FLOW_CONTROLLED;
inp->inp_socket->so_flags |= SOF_SUSPENDED;
break;
}
+ return (1);
}
- return(1);
+ return(0);
}
/*
#endif
struct ifnet;
-#ifdef BSD_KERNEL_PRIVATE
-/* Flow control entry per socket */
-struct inp_fc_entry {
- RB_ENTRY(inp_fc_entry) infc_link;
- u_int32_t infc_flowhash;
- struct inpcb *infc_inp;
-};
-#endif /* BSD_KERNEL_PRIVATE */
-
struct inp_stat {
u_int64_t rxpackets;
u_int64_t rxbytes;
struct socket *inp_socket; /* back pointer to socket */
u_int32_t nat_cookie; /* Cookie stored and returned to NAT */
LIST_ENTRY(inpcb) inp_portlist; /* list for this PCB's local port */
+ RB_ENTRY(inpcb) infc_link; /* link for flowhash RB tree */
struct inpcbport *inp_phd; /* head of this list */
inp_gen_t inp_gencnt; /* generation count of this instance */
u_int32_t inp_flags; /* generic IP/datagram flags */
+ u_int32_t inp_flags2; /* generic IP/datagram flags #2 */
u_int32_t inp_flow;
u_char inp_sndinprog_cnt; /* outstanding send operations */
#define IN6P_RECV_ANYIF INP_RECV_ANYIF
#define IN6P_CONTROLOPTS INP_CONTROLOPTS
#define IN6P_NO_IFT_CELLULAR INP_NO_IFT_CELLULAR
+
+/* Overflowed INP flags; use INP2 prefix to avoid misuse */
+#define INP2_IN_FCTREE 0x2 /* in inp_fc_tree */
/*
* socket AF version is {newer than,or include}
* actual datagram AF version
extern int inp_nocellular(struct inpcb *, unsigned int);
extern u_int32_t inp_calc_flowhash(struct inpcb *);
extern void socket_flowadv_init(void);
-extern int inp_fc_addinp(struct inpcb *);
-extern struct inp_fc_entry *inp_fc_getinp(u_int32_t);
-extern void inp_fc_entry_free(struct inp_fc_entry *);
+
+/* Flags used by inp_fc_getinp */
+#define INPFC_SOLOCKED 0x1
+#define INPFC_REMOVE 0x2
+extern struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
extern void inp_fc_feedback(struct inpcb *);
extern void inp_reset_fc_state(struct inpcb *);
extern int inp_set_fc_state(struct inpcb *, int advcode);
#define CP_READ_ACCESS 0x1
#define CP_WRITE_ACCESS 0x2
+/*
+ * Check for this version when deciding to enable features
+ */
#define CONTENT_PROTECTION_XATTR_NAME "com.apple.system.cprotect"
#define CP_NEW_MAJOR_VERS 4
#define CP_PREV_MAJOR_VERS 2
#define MACH_REMOTE_AST 0x17 /* AST signal issued to remote processor */
#define MACH_SCHED_LPA_BROKEN 0x18 /* last_processor affinity broken in choose_processor */
+#define MACH_DEEP_IDLE 0x19 /* deep idle on master processor */
/* Codes for pmap (DBG_MACH_PMAP) */
#define PMAP__CREATE 0x0
#define DBG_DRVSD 19 /* Secure Digital */
#define DBG_DRVNAND 20 /* NAND drivers and layers */
#define DBG_SSD 21 /* SSD */
+#define DBG_DRVSPI 22 /* SPI */
/* Backwards compatibility */
#define DBG_DRVPOINTING DBG_DRVHID /* OBSOLETE: Use DBG_DRVHID instead */
extern void kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3, long *arg4);
extern void kdbg_dump_trace_to_file(const char *);
-void start_kern_tracing(unsigned int);
+void start_kern_tracing(unsigned int, boolean_t);
struct task;
extern void kdbg_get_task_name(char*, int, struct task *task);
void disable_wrap(uint32_t *old_slowcheck, uint32_t *old_flags);
-12.4.0
+12.5.0
# The first line of this file contains the master version number for the kernel.
# All other instances of the kernel version in xnu are derived from this file.
__ZN22IOInterruptEventSource7warmCPUEy
_acpi_install_wake_handler
_acpi_sleep_kernel
+_acpi_idle_kernel
_add_fsevent
_apic_table
_apply_func_phys
uint32_t sleepTime;
uint32_t compression;
- uint32_t reserved[68]; // make sizeof == 512
+ uint32_t reserved[62]; // make sizeof == 512
+
+ uint64_t restoreTime1 __attribute__ ((packed));
+ uint64_t restoreTime2 __attribute__ ((packed));
+ uint64_t restoreTime3 __attribute__ ((packed));
uint64_t encryptEnd __attribute__ ((packed));
uint64_t deviceBase __attribute__ ((packed));
#include <libkern/c++/OSObject.h>
#define kIOPolledInterfaceSupportKey "IOPolledInterface"
+#define kIOPolledInterfaceActiveKey "IOPolledInterfaceActive"
enum
{
void ParentChangeNotifyInterestedDriversDidChange ( void );
void ParentChangeTellCapabilityDidChange ( void );
void ParentChangeAcknowledgePowerChange ( void );
- void ParentChangeCancelIdleTimer( IOPMPowerStateIndex );
+ void ParentChangeRootChangeDown( void );
void all_done ( void );
void start_ack_timer ( void );
void startSettleTimer( void );
bool checkForDone ( void );
bool responseValid ( uint32_t x, int pid );
- void computeDesiredState ( unsigned long tempDesire = 0 );
+ void computeDesiredState( unsigned long tempDesire, bool computeOnly );
void trackSystemSleepPreventers( IOPMPowerStateIndex, IOPMPowerStateIndex, IOPMPowerChangeFlags );
void tellSystemCapabilityChange( uint32_t nextMS );
+ void restartIdleTimer( void );
static void ack_timer_expired( thread_call_param_t, thread_call_param_t );
static IOReturn actionAckTimerExpired(OSObject *, void *, void *, void *, void * );
@constant kIOPMInitialDeviceState
Indicates the initial power state for the device. If <code>initialPowerStateForDomainState()</code> returns a power state with this flag set in the capability field, then the initial power change is performed without calling the driver's <code>setPowerState()</code>.
+
+ @constant kIOPMRootDomainState
+ An indication that the power flags represent the state of the root power
+ domain. This bit must not be set in the IOPMPowerState structure.
+ Power Management may pass this bit to initialPowerStateForDomainState()
+ or powerStateForDomainState() to map from a global system state to the
+ desired device state.
*/
typedef unsigned long IOPMPowerFlags;
enum {
kIOPMRestartCapability = 0x00000080,
kIOPMSleep = 0x00000001,
kIOPMRestart = 0x00000080,
- kIOPMInitialDeviceState = 0x00000100
+ kIOPMInitialDeviceState = 0x00000100,
+ kIOPMRootDomainState = 0x00000200
};
/*
*/
#define kIOPMDestroyFVKeyOnStandbyKey "DestroyFVKeyOnStandby"
+/*******************************************************************************
+ *
+ * Properties that can control power management behavior
+ *
+ ******************************************************************************/
+
+/* kIOPMResetPowerStateOnWakeKey
+ * If an IOService publishes this key with the value of kOSBooleanTrue,
+ * then PM will disregard the influence from changePowerStateToPriv() or
+ * any activity tickles that occurred before system sleep when resolving
+ * the initial device power state on wake. Influences from power children
+ * and changePowerStateTo() are not eliminated. At the earliest opportunity
+ * upon system wake, PM will query the driver for a new power state to be
+ * installed as the initial changePowerStateToPriv() influence, by calling
+ * initialPowerStateForDomainState() with both kIOPMRootDomainState and
+ * kIOPMPowerOn flags set. The default implementation will always return
+ * the lowest power state. Drivers can override this default behavior to
+ * immediately raise the power state when there are work blocked on the
+ * power change, and cannot afford to wait until the next activity tickle.
+ * This property should be statically added to a driver's plist or set at
+ * runtime before calling PMinit().
+ */
+#define kIOPMResetPowerStateOnWakeKey "IOPMResetPowerStateOnWake"
+
/*******************************************************************************
*
* Driver PM Assertions
*/
#define kIOPMUserWakeAlarmScheduledKey "UserWakeAlarmScheduled"
+/* kIOPMDeepIdleSupportedKey
+ * Presence of this key indicates Deep Idle is supported on this platform.
+ * Key will always refer to a value of kOSBooleanTrue.
+ */
+#define kIOPMDeepIdleSupportedKey "IOPMDeepIdleSupported"
+
/*****************************************************************************
*
* System Sleep Policy
kIOPMSleepTypeHibernate = 4,
kIOPMSleepTypeStandby = 5,
kIOPMSleepTypePowerOff = 6,
- kIOPMSleepTypeLast = 7
+ kIOPMSleepTypeDeepIdle = 7,
+ kIOPMSleepTypeLast = 8
};
// System Sleep Flags
if (kIOReturnSuccess != err)
break;
+ vars->media = part;
+ next = part;
+ while (next)
+ {
+ next->setProperty(kIOPolledInterfaceActiveKey, kOSBooleanTrue);
+ next = next->getParentEntry(gIOServicePlane);
+ }
+
*fileVars = vars;
*fileExtents = extentsData;
static IOReturn
IOHibernateDone(IOHibernateVars * vars)
{
+ IORegistryEntry * next;
+
hibernate_teardown(vars->page_list, vars->page_list_wired, vars->page_list_pal);
if (vars->videoMapping)
IOService::getPMRootDomain()->removeProperty(kIOHibernateGfxStatusKey);
}
-
if (vars->fileVars)
{
+ if ((next = vars->fileVars->media)) do
+ {
+ next->removeProperty(kIOPolledInterfaceActiveKey);
+ next = next->getParentEntry(gIOServicePlane);
+ }
+ while (next);
IOPolledFileClose(vars->fileVars);
}
gIOHibernateCurrentHeader->diag[0], gIOHibernateCurrentHeader->diag[1],
gIOHibernateCurrentHeader->diag[2], gIOHibernateCurrentHeader->diag[3]);
+ HIBLOG("restore times %qd, %qd, %qd ms, tsc 0x%qx scale 0x%x\n",
+ (((gIOHibernateCurrentHeader->restoreTime1 * pal_rtc_nanotime_info.scale) >> 32) / 1000000),
+ (((gIOHibernateCurrentHeader->restoreTime2 * pal_rtc_nanotime_info.scale) >> 32) / 1000000),
+ (((gIOHibernateCurrentHeader->restoreTime3 * pal_rtc_nanotime_info.scale) >> 32) / 1000000),
+ gIOHibernateCurrentHeader->restoreTime1, pal_rtc_nanotime_info.scale);
+
if ((kIOHibernateModeDiscardCleanActive | kIOHibernateModeDiscardCleanInactive) & gIOHibernateMode)
hibernate_page_list_discard(vars->page_list);
break;
case kIOHibernateHandoffTypeMemoryMap:
+
+ clock_get_uptime(&allTime);
+
hibernate_newruntime_map(data, handoff->bytecount,
gIOHibernateCurrentHeader->systemTableOffset);
+
+ clock_get_uptime(&endTime);
+
+ SUB_ABSOLUTETIME(&endTime, &allTime);
+ absolutetime_to_nanoseconds(endTime, &nsec);
+
+ HIBLOG("hibernate_newruntime_map time: %qd ms, ", nsec / 1000000ULL);
+
break;
case kIOHibernateHandoffTypeDeviceTree:
struct IOPolledFileIOVars
{
struct kern_direct_file_io_ref_t * fileRef;
+ IORegistryEntry * media;
class OSArray * pollers;
IOByteCount blockSize;
uint8_t * buffer;
#include <libkern/WKdm.h>
#include "IOHibernateInternal.h"
-#if defined(__i386__) || defined(__x86_64__)
-#include <i386/pal_hibernate.h>
-#endif
+#include <machine/pal_hibernate.h>
/*
This code is linked into the kernel but part of the "__HIB" section, which means
#if defined(__i386__) || defined(__x86_64__)
+#define rdtsc(lo,hi) \
+ __asm__ volatile("lfence; rdtsc; lfence" : "=a" (lo), "=d" (hi))
+
+static inline uint64_t rdtsc64(void)
+{
+ uint64_t lo, hi;
+ rdtsc(lo, hi);
+ return ((hi) << 32) | (lo);
+}
+
+#else
+
+static inline uint64_t rdtsc64(void)
+{
+ return (0);
+}
+
+#endif /* defined(__i386__) || defined(__x86_64__) */
+
+#if defined(__i386__) || defined(__x86_64__)
+
#define DBGLOG 1
#include <architecture/i386/pio.h>
uint32_t handoffPages;
uint32_t handoffPageCount;
+ uint64_t timeStart, time;
+ timeStart = rdtsc64();
+
C_ASSERT(sizeof(IOHibernateImageHeader) == 512);
headerPhys = ptoa_64(p1);
if (!conflicts)
{
// if (compressedSize)
+ time = rdtsc64();
pageSum = store_one_page(gIOHibernateCurrentHeader->processorFlags,
src, compressedSize, 0, ppnum);
+ gIOHibernateCurrentHeader->restoreTime2 += (rdtsc64() - time);
if (stage != 2)
sum += pageSum;
uncompressedPages++;
// -- copy back conflicts
+ time = rdtsc64();
+
pageListPage = copyPageListHeadPage;
while (pageListPage)
{
pal_hib_patchup();
+ gIOHibernateCurrentHeader->restoreTime3 = (rdtsc64() - time);
+
// -- image has been destroyed...
gIOHibernateCurrentHeader->actualImage1Sum = sum;
gIOHibernateState = kIOHibernateStateWakingFromHibernate;
+ gIOHibernateCurrentHeader->restoreTime1 = (rdtsc64() - timeStart);
+
#if CONFIG_SLEEP
#if defined(__i386__) || defined(__x86_64__)
typedef void (*ResetProc)(void);
reserved->dp.memory = 0;
UNLOCK;
}
-
- if ((kIOMemoryTypePhysical != type) && (kIOMemoryTypePhysical64 != type))
+ if ((kIOMemoryTypePhysical == type) || (kIOMemoryTypePhysical64 == type))
+ {
+ ioGMDData * dataP;
+ if (_memoryEntries && (dataP = getDataP(_memoryEntries)) && dataP->fMappedBase)
+ {
+ dataP->fMapper->iovmFree(atop_64(dataP->fMappedBase), _pages);
+ dataP->fMappedBase = 0;
+ }
+ }
+ else
{
- while (_wireCount)
- complete();
+ while (_wireCount) complete();
}
- if (_memoryEntries)
- _memoryEntries->release();
+
+ if (_memoryEntries) _memoryEntries->release();
if (_ranges.v && !(kIOMemoryAsReference & _flags))
{
IOService::updateConsoleUsers(NULL, kIOMessageSystemWillSleep);
- // Notify platform that sleep has begun
- getPlatform()->callPlatformFunction(
- sleepMessagePEFunction, false,
- (void *)(uintptr_t) kIOMessageSystemWillSleep,
- NULL, NULL, NULL);
-
// Two change downs are sent by IOServicePM. Ignore the 2nd.
// But tellClientsWithResponse() must be called for both.
ignoreTellChangeDown = true;
DLOG("sysPowerDownHandler timeout %d s\n", (int) (params->maxWaitForReply / 1000 / 1000));
#endif
+ // Notify platform that sleep has begun, after the early
+ // sleep policy evaluation.
+ getPlatform()->callPlatformFunction(
+ sleepMessagePEFunction, false,
+ (void *)(uintptr_t) kIOMessageSystemWillSleep,
+ NULL, NULL, NULL);
+
if ( !OSCompareAndSwap( 0, 1, &gSleepOrShutdownPending ) )
{
// Purposely delay the ack and hope that shutdown occurs quickly.
IOPMSystemSleepPolicyEntry entries[];
} __attribute__((packed));
+enum {
+ kIOPMSleepAttributeHibernateSetup = 0x00000001,
+ kIOPMSleepAttributeHibernateSleep = 0x00000002
+};
+
+static uint32_t
+getSleepTypeAttributes( uint32_t sleepType )
+{
+ static const uint32_t sleepTypeAttributes[ kIOPMSleepTypeLast ] =
+ {
+ /* invalid */ 0,
+ /* abort */ 0,
+ /* normal */ 0,
+ /* safesleep */ kIOPMSleepAttributeHibernateSetup,
+ /* hibernate */ kIOPMSleepAttributeHibernateSetup | kIOPMSleepAttributeHibernateSleep,
+ /* standby */ kIOPMSleepAttributeHibernateSetup | kIOPMSleepAttributeHibernateSleep,
+ /* poweroff */ kIOPMSleepAttributeHibernateSetup | kIOPMSleepAttributeHibernateSleep,
+ /* deepidle */ 0
+ };
+
+ if (sleepType >= kIOPMSleepTypeLast)
+ return 0;
+
+ return sleepTypeAttributes[sleepType];
+}
+
bool IOPMrootDomain::evaluateSystemSleepPolicy(
IOPMSystemSleepParameters * params, int sleepPhase, uint32_t * hibMode )
{
goto done;
}
- if ((params->sleepType >= kIOPMSleepTypeSafeSleep) &&
+ if ((getSleepTypeAttributes(params->sleepType) &
+ kIOPMSleepAttributeHibernateSetup) &&
((*hibMode & kIOHibernateModeOn) == 0))
{
*hibMode |= (kIOHibernateModeOn | kIOHibernateModeSleep);
&hibernateMode))
{
if (!hibernateNoDefeat &&
- (gEarlySystemSleepParams.sleepType == kIOPMSleepTypeNormalSleep))
+ ((getSleepTypeAttributes(gEarlySystemSleepParams.sleepType) &
+ kIOPMSleepAttributeHibernateSetup) == 0))
{
- // Disable hibernate setup for normal sleep
+ // skip hibernate setup
hibernateDisabled = true;
}
}
if (evaluateSystemSleepPolicy(¶ms, kIOPMSleepPhase2, &hibernateMode))
{
if ((hibernateDisabled || hibernateAborted) &&
- (params.sleepType != kIOPMSleepTypeNormalSleep))
+ (getSleepTypeAttributes(params.sleepType) &
+ kIOPMSleepAttributeHibernateSetup))
{
// Final evaluation picked a state requiring hibernation,
// but hibernate setup was skipped. Retry using the early
paramsData->release();
}
- if (params.sleepType >= kIOPMSleepTypeHibernate)
+ if (getSleepTypeAttributes(params.sleepType) &
+ kIOPMSleepAttributeHibernateSleep)
{
- // Disable safe sleep to force the hibernate path
+ // Disable sleep to force hibernation
gIOHibernateMode &= ~kIOHibernateModeSleep;
}
}
uint32_t changeFlags = *inOutChangeFlags;
uint32_t currentPowerState = (uint32_t) getPowerState();
- if ((currentPowerState == powerState) ||
- (changeFlags & kIOPMParentInitiated))
+ if (changeFlags & kIOPMParentInitiated)
{
// FIXME: cancel any parent change (unexpected)
// Root parent is permanently pegged at max power,
// Revert device desire from SLEEP->ON.
changePowerStateToPriv(ON_STATE);
}
+ else
+ {
+ // Broadcast power down
+ *inOutChangeFlags |= kIOPMRootChangeDown;
+ }
+ }
+ else if (powerState > currentPowerState)
+ {
+ if ((_currentCapability & kIOPMSystemCapabilityCPU) == 0)
+ {
+ // Broadcast power up when waking from sleep, but not for the
+ // initial power change at boot by checking for cpu capability.
+ *inOutChangeFlags |= kIOPMRootChangeUp;
+ }
}
}
if ( minutesToIdleSleep > minutesToDisplayDim )
minutesDelta = minutesToIdleSleep - minutesToDisplayDim;
- else if( minutesToIdleSleep == minutesToDisplayDim )
+ else if( minutesToIdleSleep <= minutesToDisplayDim )
minutesDelta = 1;
if ((sleepSlider == 0) && (minutesToIdleSleep != 0))
if( matches) {
lockForArbitration();
- if( 0 == (__state[0] & kIOServiceFirstPublishState))
+ if( 0 == (__state[0] & kIOServiceFirstPublishState)) {
+ getMetaClass()->addInstance(this);
deliverNotification( gIOFirstPublishNotification,
kIOServiceFirstPublishState, 0xffffffff );
+ }
LOCKREADNOTIFY();
__state[1] &= ~kIOServiceNeedConfigState;
__state[1] |= kIOServiceConfigState;
}
UNLOCKNOTIFY();
- if (didRegister) {
- getMetaClass()->addInstance(this);
- }
unlockForArbitration();
if (keepGuessing && matches->getCount() && (kIOReturnSuccess == getResources()))
// Globals
//******************************************************************************
-static bool gIOPMInitialized = false;
-static uint32_t gIOPMBusyCount = 0;
-static uint32_t gIOPMWorkCount = 0;
-static IOWorkLoop * gIOPMWorkLoop = 0;
-static IOPMRequestQueue * gIOPMRequestQueue = 0;
-static IOPMRequestQueue * gIOPMReplyQueue = 0;
-static IOPMWorkQueue * gIOPMWorkQueue = 0;
-static IOPMCompletionQueue * gIOPMFreeQueue = 0;
-static IOPMRequest * gIOPMRequest = 0;
-static IOService * gIOPMRootNode = 0;
-static IOPlatformExpert * gPlatform = 0;
+static bool gIOPMInitialized = false;
+static uint32_t gIOPMBusyCount = 0;
+static uint32_t gIOPMWorkCount = 0;
+static uint32_t gIOPMTickleGeneration = 0;
+static IOWorkLoop * gIOPMWorkLoop = 0;
+static IOPMRequestQueue * gIOPMRequestQueue = 0;
+static IOPMRequestQueue * gIOPMReplyQueue = 0;
+static IOPMWorkQueue * gIOPMWorkQueue = 0;
+static IOPMCompletionQueue * gIOPMFreeQueue = 0;
+static IOPMRequest * gIOPMRequest = 0;
+static IOService * gIOPMRootNode = 0;
+static IOPlatformExpert * gPlatform = 0;
static const OSSymbol * gIOPMPowerClientDevice = 0;
static const OSSymbol * gIOPMPowerClientDriver = 0;
gIOPMRootNode = this;
fParentsKnowState = true;
}
+ else if (getProperty(kIOPMResetPowerStateOnWakeKey) == kOSBooleanTrue)
+ {
+ fResetPowerStateOnWake = true;
+ }
fAckTimer = thread_call_allocate(
&IOService::ack_timer_expired, (thread_call_param_t)this);
PM_UNLOCK();
}
- // Tell idleTimerExpired() to ignore idle timer.
+ // Clear idle period to prevent idleTimerExpired() from servicing
+ // idle timer expirations.
+
fIdleTimerPeriod = 0;
if (fIdleTimer && thread_call_cancel(fIdleTimer))
release();
void IOService::adjustPowerState ( uint32_t clamp )
{
PM_ASSERT_IN_GATE();
- computeDesiredState(clamp);
+ computeDesiredState(clamp, false);
if (fControllingDriver && fParentsKnowState && inPlane(gIOPowerPlane))
{
IOPMPowerChangeFlags changeFlags = kIOPMSelfInitiated;
- // Indicate that children desires were ignored, and do not ask
+ // Indicate that children desires must be ignored, and do not ask
// apps for permission to drop power. This is used by root domain
// for demand sleep.
OSIterator * iter;
OSObject * next;
IOPowerConnection * connection;
- IOPMPowerStateIndex newPowerState;
+ IOPMPowerStateIndex maxPowerState;
IOPMPowerFlags combinedPowerFlags;
bool savedParentsKnowState;
IOReturn result = IOPMAckImplied;
if ( fControllingDriver && !fInitialPowerChange )
{
- newPowerState = fControllingDriver->maxCapabilityForDomainState(
+ maxPowerState = fControllingDriver->maxCapabilityForDomainState(
combinedPowerFlags);
- // Absorb parent's kIOPMSynchronize flag.
+ // Use kIOPMSynchronize below instead of kIOPMRootBroadcastFlags
+ // to avoid propagating the root change flags if any service must
+ // change power state due to root's will-change notification.
+ // Root does not change power state for kIOPMSynchronize.
+
myChangeFlags = kIOPMParentInitiated | kIOPMDomainWillChange |
(parentChangeFlags & kIOPMSynchronize);
result = startPowerChange(
/* flags */ myChangeFlags,
- /* power state */ newPowerState,
+ /* power state */ maxPowerState,
/* domain flags */ combinedPowerFlags,
/* connection */ whichParent,
/* parent flags */ parentPowerFlags);
IOPowerConnection * whichParent = (IOPowerConnection *) request->fArg1;
IOPMPowerChangeFlags parentChangeFlags = (IOPMPowerChangeFlags)(uintptr_t) request->fArg2;
IOPMPowerChangeFlags myChangeFlags;
- IOPMPowerStateIndex newPowerState;
- IOPMPowerStateIndex initialDesire;
+ IOPMPowerStateIndex maxPowerState;
+ IOPMPowerStateIndex initialDesire = 0;
+ bool computeDesire = false;
+ bool desireChanged = false;
bool savedParentsKnowState;
IOReturn result = IOPMAckImplied;
if ( fControllingDriver )
{
- newPowerState = fControllingDriver->maxCapabilityForDomainState(
+ maxPowerState = fControllingDriver->maxCapabilityForDomainState(
fParentsCurrentPowerFlags);
if (fInitialPowerChange)
{
+ computeDesire = true;
initialDesire = fControllingDriver->initialPowerStateForDomainState(
- fParentsCurrentPowerFlags);
- computeDesiredState(initialDesire);
+ fParentsCurrentPowerFlags);
}
- else if (fAdvisoryTickleUsed && (newPowerState > 0) &&
- ((parentChangeFlags & kIOPMSynchronize) == 0))
+ else if (parentChangeFlags & kIOPMRootChangeUp)
{
- // re-compute desired state in case advisory tickle was enabled
- computeDesiredState();
+ if (fAdvisoryTickleUsed)
+ {
+ // On system wake, re-compute the desired power state since
+ // gIOPMAdvisoryTickleEnabled will change for a full wake,
+ // which is an input to computeDesiredState(). This is not
+ // necessary for a dark wake because powerChangeDone() will
+ // handle the dark to full wake case, but it does no harm.
+
+ desireChanged = true;
+ }
+
+ if (fResetPowerStateOnWake)
+ {
+ // Query the driver for the desired power state on system wake.
+ // Default implementation returns the lowest power state.
+
+ IOPMPowerStateIndex wakePowerState =
+ fControllingDriver->initialPowerStateForDomainState(
+ kIOPMRootDomainState | kIOPMPowerOn );
+
+ // fDesiredPowerState was adjusted before going to sleep
+ // with fDeviceDesire at min.
+
+ if (wakePowerState > fDesiredPowerState)
+ {
+ // Must schedule a power adjustment if we changed the
+ // device desire. That will update the desired domain
+ // power on the parent power connection and ping the
+ // power parent if necessary.
+
+ updatePowerClient(gIOPMPowerClientDevice, wakePowerState);
+ desireChanged = true;
+ }
+ }
}
- // Absorb parent's kIOPMSynchronize flag.
+ if (computeDesire || desireChanged)
+ computeDesiredState(initialDesire, false);
+
+ // Absorb and propagate parent's broadcast flags
myChangeFlags = kIOPMParentInitiated | kIOPMDomainDidChange |
- (parentChangeFlags & kIOPMSynchronize);
+ (parentChangeFlags & kIOPMRootBroadcastFlags);
result = startPowerChange(
/* flags */ myChangeFlags,
- /* power state */ newPowerState,
+ /* power state */ maxPowerState,
/* domain flags */ fParentsCurrentPowerFlags,
/* connection */ whichParent,
/* parent flags */ 0);
}
// If the parent registers its power driver late, then this is the
- // first opportunity to tell our parent about our desire.
+ // first opportunity to tell our parent about our desire. Or if the
+ // child's desire changed during a parent change notify.
- if (!savedParentsKnowState && fParentsKnowState)
+ if ((!savedParentsKnowState && fParentsKnowState) || desireChanged)
{
- PM_LOG1("%s::powerDomainDidChangeTo parentsKnowState = true\n",
- getName());
+ PM_LOG1("%s::powerDomainDidChangeTo parentsKnowState %d\n",
+ getName(), fParentsKnowState);
requestDomainPower( fDesiredPowerState );
}
{
IOPMRequest * cancelRequest;
- cancelRequest = acquirePMRequest( this, kIOPMRequestTypeIdleCancel );
+ cancelRequest = acquirePMRequest( getPMRootDomain(), kIOPMRequestTypeIdleCancel );
if (cancelRequest)
{
- getPMRootDomain()->submitPMRequest( cancelRequest );
+ submitPMRequest( cancelRequest );
}
}
#endif
// [private] computeDesiredState
//*********************************************************************************
-void IOService::computeDesiredState ( unsigned long localClamp )
+void IOService::computeDesiredState( unsigned long localClamp, bool computeOnly )
{
OSIterator * iter;
OSObject * next;
if (hasChildren && (client == gIOPMPowerClientChildProxy))
continue;
+ // Advisory tickles are irrelevant unless system is in full wake
if (client == gIOPMPowerClientAdvisoryTickle &&
!gIOPMAdvisoryTickleEnabled)
continue;
(uint32_t) localClamp, (uint32_t) fTempClampPowerState,
(uint32_t) fCurrentPowerState, newPowerState);
- // Restart idle timer if stopped and device desire has increased.
- // Or if advisory desire exists.
-
- if (fIdleTimerStopped)
+ if (!computeOnly)
{
- if (fDeviceDesire > 0)
- {
- fIdleTimerStopped = false;
- fActivityTickleCount = 0;
- clock_get_uptime(&fIdleTimerStartTime);
- start_PM_idle_timer();
- }
- else if (fHasAdvisoryDesire)
+ // Restart idle timer if possible when device desire has increased.
+ // Or if an advisory desire exists.
+
+ if (fIdleTimerPeriod && fIdleTimerStopped)
{
- fIdleTimerStopped = false;
- start_PM_idle_timer();
+ restartIdleTimer();
}
- }
- // Invalidate cached tickle power state when desires change, and not
- // due to a tickle request. This invalidation must occur before the
- // power state change to minimize races. We want to err on the side
- // of servicing more activity tickles rather than dropping one when
- // the device is in a low power state.
+ // Invalidate cached tickle power state when desires change, and not
+ // due to a tickle request. In case the driver has requested a lower
+ // power state, but the tickle is caching a higher power state which
+ // will drop future tickles until the cached value is lowered or in-
+ // validated. The invalidation must occur before the power transition
+ // to avoid dropping a necessary tickle.
- if ((getPMRequestType() != kIOPMRequestTypeActivityTickle) &&
- (fActivityTicklePowerState != kInvalidTicklePowerState))
- {
- IOLockLock(fActivityLock);
- fActivityTicklePowerState = kInvalidTicklePowerState;
- IOLockUnlock(fActivityLock);
+ if ((getPMRequestType() != kIOPMRequestTypeActivityTickle) &&
+ (fActivityTicklePowerState != kInvalidTicklePowerState))
+ {
+ IOLockLock(fActivityLock);
+ fActivityTicklePowerState = kInvalidTicklePowerState;
+ IOLockUnlock(fActivityLock);
+ }
}
}
{
IOPMRequest * request;
bool noPowerChange = true;
+ uint32_t tickleFlags;
if (!initialized)
return true; // no power change
fActivityTicklePowerState = stateNumber;
noPowerChange = false;
+ tickleFlags = kTickleTypeActivity | kTickleTypePowerRise;
request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle );
if (request)
{
- request->fArg0 = (void *) stateNumber; // power state
- request->fArg1 = (void *) true; // power rise
- request->fArg2 = (void *) false; // regular tickle
+ request->fArg0 = (void *) stateNumber;
+ request->fArg1 = (void *) tickleFlags;
+ request->fArg2 = (void *) gIOPMTickleGeneration;
submitPMRequest(request);
}
}
fAdvisoryTicklePowerState = stateNumber;
noPowerChange = false;
+ tickleFlags = kTickleTypeAdvisory | kTickleTypePowerRise;
request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle );
if (request)
{
- request->fArg0 = (void *) stateNumber; // power state
- request->fArg1 = (void *) true; // power rise
- request->fArg2 = (void *) true; // advisory tickle
+ request->fArg0 = (void *) stateNumber;
+ request->fArg1 = (void *) tickleFlags;
+ request->fArg2 = (void *) gIOPMTickleGeneration;
submitPMRequest(request);
}
}
void IOService::handleActivityTickle ( IOPMRequest * request )
{
uint32_t ticklePowerState = (uint32_t)(uintptr_t) request->fArg0;
- bool deviceWasActive = (request->fArg1 == (void *) true);
- bool isRegularTickle = (request->fArg2 == (void *) false);
+ uint32_t tickleFlags = (uint32_t)(uintptr_t) request->fArg1;
+ uint32_t tickleGeneration = (uint32_t)(uintptr_t) request->fArg2;
bool adjustPower = false;
PM_ASSERT_IN_GATE();
- if (isRegularTickle)
+ if (fResetPowerStateOnWake && (tickleGeneration != gIOPMTickleGeneration))
+ {
+ // Drivers that don't want power restored on wake will drop any
+ // tickles that pre-dates the current system wake. The model is
+ // that each wake is a fresh start, with power state depressed
+ // until a new tickle or an explicit power up request from the
+ // driver. It is possible for the PM work loop to enter the
+ // system sleep path with tickle requests queued.
+
+ return;
+ }
+
+ if (tickleFlags & kTickleTypeActivity)
{
- if (deviceWasActive)
+ if (tickleFlags & kTickleTypePowerRise)
{
if ((ticklePowerState > fDeviceDesire) &&
(ticklePowerState < fNumberOfPowerStates))
}
else // advisory tickle
{
- if (deviceWasActive)
+ if (tickleFlags & kTickleTypePowerRise)
{
if ((ticklePowerState == fDeviceUsablePowerState) &&
(ticklePowerState < fNumberOfPowerStates))
if (pending) release();
}
+//*********************************************************************************
+// [private] restartIdleTimer
+//*********************************************************************************
+
+void IOService::restartIdleTimer( void )
+{
+ if (fDeviceDesire != 0)
+ {
+ fIdleTimerStopped = false;
+ fActivityTickleCount = 0;
+ clock_get_uptime(&fIdleTimerStartTime);
+ start_PM_idle_timer();
+ }
+ else if (fHasAdvisoryDesire)
+ {
+ fIdleTimerStopped = false;
+ start_PM_idle_timer();
+ }
+ else
+ {
+ fIdleTimerStopped = true;
+ }
+}
+
//*********************************************************************************
// idle_timer_expired
//*********************************************************************************
{
IOPMRequest * request;
bool restartTimer = true;
+ uint32_t tickleFlags;
- if ( !initialized || !fIdleTimerPeriod || fLockedFlags.PMStop )
+ if ( !initialized || !fIdleTimerPeriod || fIdleTimerStopped ||
+ fLockedFlags.PMStop )
return;
IOLockLock(fActivityLock);
if (fActivityTicklePowerState > 0)
fActivityTicklePowerState--;
+ tickleFlags = kTickleTypeActivity | kTickleTypePowerDrop;
request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle );
if (request)
{
- request->fArg0 = (void *) 0; // power state (irrelevant)
- request->fArg1 = (void *) false; // timer expiration (not tickle)
- request->fArg2 = (void *) false; // regular tickle
+ request->fArg0 = (void *) 0; // irrelevant
+ request->fArg1 = (void *) tickleFlags;
+ request->fArg2 = (void *) gIOPMTickleGeneration;
submitPMRequest( request );
// Do not restart timer until after the tickle request has been
// Want new tickles to turn into pm request after we drop the lock
fAdvisoryTicklePowerState = kInvalidTicklePowerState;
+ tickleFlags = kTickleTypeAdvisory | kTickleTypePowerDrop;
request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle );
if (request)
{
- request->fArg0 = (void *) 0; // power state (irrelevant)
- request->fArg1 = (void *) false; // timer expiration (not tickle)
- request->fArg2 = (void *) true; // advisory tickle
+ request->fArg0 = (void *) 0; // irrelevant
+ request->fArg1 = (void *) tickleFlags;
+ request->fArg2 = (void *) gIOPMTickleGeneration;
submitPMRequest( request );
// Do not restart timer until after the tickle request has been
}
else if (fAdvisoryTickleUsed)
{
- // Not root domain and advisory tickle target
+ // Not root domain and advisory tickle target.
// Re-adjust power after power tree sync at the 'did' pass
+ // to recompute desire and adjust power state between dark
+ // and full wake transitions. Root domain is responsible
+ // for calling setAdvisoryTickleEnable() before starting
+ // the kIOPMSynchronize power change.
+
if (!fAdjustPowerScheduled &&
(fHeadNoteChangeFlags & kIOPMDomainDidChange))
{
if (fCurrentCapabilityFlags & kIOPMStaticPowerValid)
fCurrentPowerConsumption = powerStatePtr->staticPower;
+ if (fHeadNoteChangeFlags & kIOPMRootChangeDown)
+ {
+ // Bump tickle generation count once the entire tree is down
+ gIOPMTickleGeneration++;
+ }
+
// inform subclass policy-maker
if (fPCDFunctionOverride && fParentsKnowState &&
assertPMDriverCall(&callEntry, kIOPMADC_NoInactiveCheck))
// parent's power change
if ( fHeadNoteChangeFlags & kIOPMParentInitiated)
{
+ if (fHeadNoteChangeFlags & kIOPMRootChangeDown)
+ ParentChangeRootChangeDown();
+
if (((fHeadNoteChangeFlags & kIOPMDomainWillChange) &&
(fCurrentPowerState >= fHeadNotePowerState)) ||
((fHeadNoteChangeFlags & kIOPMDomainDidChange) &&
}
}
+//*********************************************************************************
+// [private] requestDomainPowerApplier
+//
+// Call requestPowerDomainState() on all power parents.
//*********************************************************************************
struct IOPMRequestDomainPowerContext {
//*********************************************************************************
// [private] requestDomainPower
+//
+// Called by a power child to broadcast its desired power state to all parents.
+// If the child self-initiates a power change, it must call this function to
+// allow its parents to adjust power state.
//*********************************************************************************
IOReturn IOService::requestDomainPower(
if (IS_PM_ROOT)
return kIOReturnSuccess;
- // Fetch the input power flags for the requested power state.
+ // Fetch our input power flags for the requested power state.
// Parent request is stated in terms of required power flags.
requestPowerFlags = fPowerStates[ourPowerState].inputPowerFlags;
}
fPreviousRequestPowerFlags = requestPowerFlags;
+ // The results will be collected by fHeadNoteDomainTargetFlags
context.child = this;
context.requestPowerFlags = requestPowerFlags;
fHeadNoteDomainTargetFlags = 0;
maxPowerState = fControllingDriver->maxCapabilityForDomainState(
fHeadNoteDomainTargetFlags );
- if (maxPowerState < fHeadNotePowerState)
+ if (maxPowerState < ourPowerState)
{
PM_LOG1("%s: power desired %u:0x%x got %u:0x%x\n",
getName(),
PM_ASSERT_IN_GATE();
OUR_PMLog( kPMLogStartParentChange, fHeadNotePowerState, fCurrentPowerState );
- // Power domain is lowering power
- if ( fHeadNotePowerState < fCurrentPowerState )
+ // Root power domain has transitioned to its max power state
+ if ((fHeadNoteChangeFlags & (kIOPMDomainDidChange | kIOPMRootChangeUp)) ==
+ (kIOPMDomainDidChange | kIOPMRootChangeUp))
{
- // Piggy-back idle timer cancellation on a parent down
- if (0 == fHeadNotePowerState)
- ParentChangeCancelIdleTimer(fHeadNotePowerState);
-
- // TODO: redundant? See handlePowerDomainWillChangeTo()
- setParentInfo( fHeadNoteParentFlags, fHeadNoteParentConnection, true );
+ // Restart the idle timer stopped by ParentChangeRootChangeDown()
+ if (fIdleTimerPeriod && fIdleTimerStopped)
+ {
+ restartIdleTimer();
+ }
+ }
+ // Power domain is forcing us to lower power
+ if ( fHeadNotePowerState < fCurrentPowerState )
+ {
PM_ACTION_2(actionPowerChangeStart, fHeadNotePowerState, &fHeadNoteChangeFlags);
// Tell apps and kernel clients
ParentChangeTellCapabilityWillChange();
return IOPMWillAckLater;
}
- else if (fHeadNoteChangeFlags & kIOPMSynchronize)
+ else if (fHeadNoteChangeFlags & kIOPMRootBroadcastFlags)
{
- // We do not need to change power state, but notify
- // children to propagate tree synchronization.
+ // No need to change power state, but broadcast change
+ // to our children.
fMachineState = kIOPM_SyncNotifyDidChange;
fDriverCallReason = kDriverCallInformPreChange;
notifyChildren();
return IOPMAckImplied;
}
+//******************************************************************************
+// [private] ParentChangeRootChangeDown
+//
+// Root domain has finished the transition to the system sleep state. And all
+// drivers in the power plane should have powered down. Cancel the idle timer,
+// and also reset the device desire for those drivers that don't want power
+// automatically restored on wake.
+//******************************************************************************
+
+void IOService::ParentChangeRootChangeDown( void )
+{
+ // Always stop the idle timer before root power down
+ if (fIdleTimerPeriod && !fIdleTimerStopped)
+ {
+ fIdleTimerStopped = true;
+ if (fIdleTimer && thread_call_cancel(fIdleTimer))
+ release();
+ }
+
+ if (fResetPowerStateOnWake)
+ {
+ // Reset device desire down to the lowest power state.
+ // Advisory tickle desire is intentionally untouched since
+ // it has no effect until system is promoted to full wake.
+
+ if (fDeviceDesire != 0)
+ {
+ updatePowerClient(gIOPMPowerClientDevice, 0);
+ computeDesiredState(0, true);
+ PM_LOG1("%s: tickle desire removed\n", fName);
+ }
+
+ // Invalidate tickle cache so the next tickle will issue a request
+ IOLockLock(fActivityLock);
+ fDeviceWasActive = false;
+ fActivityTicklePowerState = kInvalidTicklePowerState;
+ IOLockUnlock(fActivityLock);
+
+ fIdleTimerMinPowerState = 0;
+ }
+ else if (fAdvisoryTickleUsed)
+ {
+ // Less aggressive mechanism to accelerate idle timer expiration
+ // before system sleep. May not always allow the driver to wake
+ // up from system sleep in the min power state.
+
+ AbsoluteTime now;
+ uint64_t nsec;
+ bool dropTickleDesire = false;
+
+ if (fIdleTimerPeriod && !fIdleTimerIgnored &&
+ (fIdleTimerMinPowerState == 0) &&
+ (fDeviceDesire != 0))
+ {
+ IOLockLock(fActivityLock);
+
+ if (!fDeviceWasActive)
+ {
+ // No tickles since the last idle timer expiration.
+ // Safe to drop the device desire to zero.
+ dropTickleDesire = true;
+ }
+ else
+ {
+ // Was tickled since the last idle timer expiration,
+ // but not in the last minute.
+ clock_get_uptime(&now);
+ SUB_ABSOLUTETIME(&now, &fDeviceActiveTimestamp);
+ absolutetime_to_nanoseconds(now, &nsec);
+ if (nsec >= kNoTickleCancelWindow)
+ {
+ dropTickleDesire = true;
+ }
+ }
+
+ if (dropTickleDesire)
+ {
+ // Force the next tickle to raise power state
+ fDeviceWasActive = false;
+ fActivityTicklePowerState = kInvalidTicklePowerState;
+ }
+
+ IOLockUnlock(fActivityLock);
+ }
+
+ if (dropTickleDesire)
+ {
+ // Advisory tickle desire is intentionally untouched since
+ // it has no effect until system is promoted to full wake.
+
+ updatePowerClient(gIOPMPowerClientDevice, 0);
+ computeDesiredState(0, true);
+ PM_LOG1("%s: tickle desire dropped\n", fName);
+ }
+ }
+}
+
//*********************************************************************************
// [private] ParentChangeTellPriorityClientsPowerDown
//
nub->release();
}
-void IOService::ParentChangeCancelIdleTimer( IOPMPowerStateIndex newPowerState )
-{
- AbsoluteTime now;
- uint64_t nsec;
- bool cancel = false;
-
- // No ready or idle timer not in use
- if (!initialized || !fIdleTimerPeriod || fLockedFlags.PMStop ||
- !fAdvisoryTickleUsed)
- return;
-
- // Not allowed to induce artifical idle timeout
- if (fIdleTimerIgnored || fIdleTimerMinPowerState)
- goto done;
-
- // Idle timer already has no influence
- if (!fDesiredPowerState || fIdleTimerStopped)
- goto done;
-
- IOLockLock(fActivityLock);
-
- if (!fDeviceWasActive)
- {
- // No tickles since the last idle timer expiration.
- // Safe to drop the device desire to zero.
- cancel = true;
- }
- else
- {
- // Was tickled since the last idle timer expiration,
- // but not in the last minute.
- clock_get_uptime(&now);
- SUB_ABSOLUTETIME(&now, &fDeviceActiveTimestamp);
- absolutetime_to_nanoseconds(now, &nsec);
- if (nsec >= kNoTickleCancelWindow)
- {
- cancel = true;
- }
- }
-
- if (cancel)
- {
- // Force the next tickle to raise power state
- fActivityTicklePowerState = kInvalidTicklePowerState;
- fDeviceWasActive = false;
- }
-
- IOLockUnlock(fActivityLock);
-
- if (cancel)
- {
- // cancel idle timer
- if (fIdleTimer && thread_call_cancel(fIdleTimer))
- release();
-
- updatePowerClient(gIOPMPowerClientDevice, 0);
- computeDesiredState();
-
- fIdleTimerStopped = true;
- }
-
-done:
- OUR_PMLog( kPMLogStartParentChange, fHeadNotePowerState, fCurrentPowerState );
- PM_LOG("%s::%s cancel=%d\n", fName, __FUNCTION__, cancel);
-}
-
// MARK: -
// MARK: Ack and Settle timers
void IOService::startSettleTimer( void )
{
+#if NOT_USEFUL
+ // This function is broken and serves no useful purpose since it never
+ // updates fSettleTimeUS to a non-zero value to stall the state machine,
+ // yet it starts a delay timer. It appears no driver relies on a delay
+ // from settleUpTime and settleDownTime in the power state table.
+
AbsoluteTime deadline;
IOPMPowerStateIndex i;
uint32_t settleTime = 0;
pending = thread_call_enter_delayed(fSettleTimer, deadline);
if (pending) release();
}
+#endif
}
//*********************************************************************************
{
int i;
+ if (fResetPowerStateOnWake && (domainState & kIOPMRootDomainState))
+ {
+ // Return lowest power state for any root power domain changes
+ return 0;
+ }
+
if (fNumberOfPowerStates == 0 )
{
return 0;
// Catch requests created by idleTimerExpired().
if ((request->getType() == kIOPMRequestTypeActivityTickle) &&
- (request->fArg1 == (void *) false))
+ (((uintptr_t) request->fArg1) & kTickleTypePowerDrop) &&
+ fIdleTimerPeriod)
{
- // Idle timer expiration - power drop request completed.
- // Restart the idle timer if deviceDesire can go lower, otherwise set
- // a flag so we know to restart idle timer when fDeviceDesire > 0.
-
- if (fDeviceDesire > 0)
- {
- fActivityTickleCount = 0;
- clock_get_uptime(&fIdleTimerStartTime);
- start_PM_idle_timer();
- }
- else if (fHasAdvisoryDesire)
- {
- start_PM_idle_timer();
- }
- else
- {
- fIdleTimerStopped = true;
- }
+ restartIdleTimer();
}
// If the request is linked, then Work queue has already incremented its
fIsPreChange = false;
if (fHeadNoteChangeFlags & kIOPMParentInitiated)
+ {
fMachineState = kIOPM_SyncFinish;
+ }
else
+ {
+ assert(IS_ROOT_DOMAIN);
fMachineState = kIOPM_SyncTellCapabilityDidChange;
+ }
fDriverCallReason = kDriverCallInformPostChange;
notifyChildren();
case kIOPMRequestTypeSetIdleTimerPeriod:
{
fIdleTimerPeriod = (uintptr_t) request->fArg0;
-
if ((false == fLockedFlags.PMStop) && (fIdleTimerPeriod > 0))
- {
- fActivityTickleCount = 0;
- clock_get_uptime(&fIdleTimerStartTime);
- start_PM_idle_timer();
- }
+ restartIdleTimer();
}
break;
fType = kIOPMRequestTypeInvalid;
+#if NOT_READY
if (fCompletionAction)
{
fCompletionAction(fCompletionTarget, fCompletionParam, fCompletionStatus);
}
+#endif
if (fTarget)
{
fRequestNext = next;
fRequestNext->fWorkWaitCount++;
#if LOG_REQUEST_ATTACH
- kprintf("Attached next: %p [0x%x] -> %p [0x%x, %u] %s\n",
+ PM_LOG("Attached next: %p [0x%x] -> %p [0x%x, %u] %s\n",
this, (uint32_t) fType, fRequestNext,
(uint32_t) fRequestNext->fType,
(uint32_t) fRequestNext->fWorkWaitCount,
if (fRequestNext->fWorkWaitCount)
fRequestNext->fWorkWaitCount--;
#if LOG_REQUEST_ATTACH
- kprintf("Detached next: %p [0x%x] -> %p [0x%x, %u] %s\n",
+ PM_LOG("Detached next: %p [0x%x] -> %p [0x%x, %u] %s\n",
this, (uint32_t) fType, fRequestNext,
(uint32_t) fRequestNext->fType,
(uint32_t) fRequestNext->fWorkWaitCount,
fRequestRoot = root;
fRequestRoot->fFreeWaitCount++;
#if LOG_REQUEST_ATTACH
- kprintf("Attached root: %p [0x%x] -> %p [0x%x, %u] %s\n",
+ PM_LOG("Attached root: %p [0x%x] -> %p [0x%x, %u] %s\n",
this, (uint32_t) fType, fRequestRoot,
(uint32_t) fRequestRoot->fType,
(uint32_t) fRequestRoot->fFreeWaitCount,
if (fRequestRoot->fFreeWaitCount)
fRequestRoot->fFreeWaitCount--;
#if LOG_REQUEST_ATTACH
- kprintf("Detached root: %p [0x%x] -> %p [0x%x, %u] %s\n",
+ PM_LOG("Detached root: %p [0x%x] -> %p [0x%x, %u] %s\n",
this, (uint32_t) fType, fRequestRoot,
(uint32_t) fRequestRoot->fType,
(uint32_t) fRequestRoot->fFreeWaitCount,
// PM state lock.
IOLock * PMLock;
- unsigned int InitialPowerChange:1;
- unsigned int InitialSetPowerState:1;
- unsigned int DeviceOverrideEnabled:1;
- unsigned int DoNotPowerDown:1;
- unsigned int ParentsKnowState:1;
- unsigned int StrictTreeOrder:1;
- unsigned int IdleTimerStopped:1;
- unsigned int AdjustPowerScheduled:1;
- unsigned int IsPreChange:1;
- unsigned int DriverCallBusy:1;
- unsigned int PCDFunctionOverride:1;
- unsigned int IdleTimerIgnored:1;
- unsigned int HasAdvisoryDesire:1;
- unsigned int AdvisoryTickleUsed:1;
+ unsigned int InitialPowerChange :1;
+ unsigned int InitialSetPowerState :1;
+ unsigned int DeviceOverrideEnabled :1;
+ unsigned int DoNotPowerDown :1;
+ unsigned int ParentsKnowState :1;
+ unsigned int StrictTreeOrder :1;
+ unsigned int IdleTimerStopped :1;
+ unsigned int AdjustPowerScheduled :1;
+
+ unsigned int IsPreChange :1;
+ unsigned int DriverCallBusy :1;
+ unsigned int PCDFunctionOverride :1;
+ unsigned int IdleTimerIgnored :1;
+ unsigned int HasAdvisoryDesire :1;
+ unsigned int AdvisoryTickleUsed :1;
+ unsigned int ResetPowerStateOnWake :1;
// Time of last device activity.
AbsoluteTime DeviceActiveTimestamp;
#define fIdleTimerIgnored pwrMgt->IdleTimerIgnored
#define fHasAdvisoryDesire pwrMgt->HasAdvisoryDesire
#define fAdvisoryTickleUsed pwrMgt->AdvisoryTickleUsed
+#define fResetPowerStateOnWake pwrMgt->ResetPowerStateOnWake
#define fDeviceActiveTimestamp pwrMgt->DeviceActiveTimestamp
#define fActivityLock pwrMgt->ActivityLock
#define fIdleTimerPeriod pwrMgt->IdleTimerPeriod
#define kIOPMSyncTellPowerDown 0x0400 // send the ask/will power off messages
#define kIOPMSyncCancelPowerDown 0x0800 // sleep cancel for maintenance wake
#define kIOPMInitialPowerChange 0x1000 // set for initial power change
+#define kIOPMRootChangeUp 0x2000 // Root power domain change up
+#define kIOPMRootChangeDown 0x4000 // Root power domain change down
+
+#define kIOPMRootBroadcastFlags (kIOPMSynchronize | \
+ kIOPMRootChangeUp | kIOPMRootChangeDown)
+
+// Activity tickle request flags
+#define kTickleTypePowerDrop 0x01
+#define kTickleTypePowerRise 0x02
+#define kTickleTypeActivity 0x04
+#define kTickleTypeAdvisory 0x08
enum {
kDriverCallInformPreChange,
set $kgm_actint_framecount = 0
while ($mysp != 0) && (($mysp & $stkmask) == 0) \
&& ($mysp != $prevsp) \
- && ((((unsigned long) $mysp ^ (unsigned long) $prevsp) < 0x2000) \
+ && ((((unsigned long) $mysp - (unsigned long) $prevsp) < 0x4000) \
|| (((unsigned long)$mysp < ((unsigned long) ($kgm_thread->kernel_stack+kernel_stack_size))) \
&& ((unsigned long)$mysp > (unsigned long) ($kgm_thread->kernel_stack)))) \
&& ($kgm_actint_framecount < 128)
if (superClassLink) {
superClassLink->removeInstance(reserved->instances, true);
}
+ IOLockLock(sAllClassesLock);
reserved->instances->release();
reserved->instances = 0;
+ IOLockUnlock(sAllClassesLock);
}
}
.align 2, 0x90
.globl __get_cpu_capabilities
__get_cpu_capabilities:
- movq $(_COMM_PAGE_CPU_CAPABILITIES), %rax
- movl (%rax), %eax
+ movq $(_COMM_PAGE_CPU_CAPABILITIES64), %rax
+ movq (%rax), %rax
ret
#elif defined(__i386__)
.align 2, 0x90
.globl __get_cpu_capabilities
__get_cpu_capabilities:
- movl _COMM_PAGE_CPU_CAPABILITIES, %eax
+ movl _COMM_PAGE_CPU_CAPABILITIES64, %eax
+ movl _COMM_PAGE_CPU_CAPABILITIES64+4, %edx
ret
#else
osfmk/i386/startup64.c standard
osfmk/x86_64/idt64.s standard
+
machine_conf();
-#if NOTYET
- ml_thrm_init(); /* Start thermal monitoring on this processor */
-#endif
-
/*
* Start the system.
*/
#include <kern/kalloc.h>
#include <sys/kdebug.h>
+#include <i386/machine_cpu.h>
+#include <i386/misc_protos.h>
+#include <i386/cpuid.h>
+
+#define PERMIT_PERMCHECK (0)
+
diagWork dgWork;
uint64_t lastRuptClear = 0ULL;
typedef struct {
uint64_t caperf;
uint64_t cmperf;
- uint64_t ccres[3];
- uint64_t crtimes[4];
- uint64_t citimes[4];
+ uint64_t ccres[6];
+ uint64_t crtimes[CPU_RTIME_BINS];
+ uint64_t citimes[CPU_ITIME_BINS];
uint64_t crtime_total;
uint64_t citime_total;
+ uint64_t cpu_idle_exits;
+ uint64_t cpu_insns;
+ uint64_t cpu_ucc;
+ uint64_t cpu_urc;
} core_energy_stat_t;
typedef struct {
- uint64_t pkg_cres[2][4];
+ uint64_t pkg_cres[2][7];
uint64_t pkg_power_unit;
uint64_t pkg_energy;
+ uint64_t pp0_energy;
+ uint64_t pp1_energy;
+ uint64_t ddr_energy;
+ uint64_t llc_flushed_cycles;
+ uint64_t ring_ratio_instantaneous;
+ uint64_t IA_frequency_clipping_cause;
+ uint64_t GT_frequency_clipping_cause;
+ uint64_t pkg_idle_exits;
+ uint64_t pkg_rtimes[CPU_RTIME_BINS];
+ uint64_t pkg_itimes[CPU_ITIME_BINS];
+ uint64_t mbus_delay_time;
+ uint64_t mint_delay_time;
uint32_t ncpus;
core_energy_stat_t cest[];
} pkg_energy_statistics_t;
int
diagCall64(x86_saved_state_t * state)
{
- uint64_t curpos, i, j;
- uint64_t selector, data;
- uint64_t currNap, durNap;
+ uint64_t curpos, i, j;
+ uint64_t selector, data;
+ uint64_t currNap, durNap;
x86_saved_state64_t *regs;
boolean_t diagflag;
uint32_t rval = 0;
pkes.pkg_cres[0][2] = ((uint64_t)c6h << 32) | c6l;
pkes.pkg_cres[0][3] = ((uint64_t)c7h << 32) | c7l;
+ uint32_t cpumodel = cpuid_info()->cpuid_model;
+ boolean_t c8avail;
+ switch (cpumodel) {
+ case CPUID_MODEL_HASWELL_ULT:
+ c8avail = TRUE;
+ break;
+ default:
+ c8avail = FALSE;
+ break;
+ }
+ uint64_t c8r = ~0ULL, c9r = ~0ULL, c10r = ~0ULL;
+
+ if (c8avail) {
+ rdmsr64_carefully(MSR_IA32_PKG_C8_RESIDENCY, &c8r);
+ rdmsr64_carefully(MSR_IA32_PKG_C9_RESIDENCY, &c9r);
+ rdmsr64_carefully(MSR_IA32_PKG_C10_RESIDENCY, &c10r);
+ }
+
+ pkes.pkg_cres[0][4] = c8r;
+ pkes.pkg_cres[0][5] = c9r;
+ pkes.pkg_cres[0][6] = c10r;
+
+ pkes.ddr_energy = ~0ULL;
+ rdmsr64_carefully(MSR_IA32_DDR_ENERGY_STATUS, &pkes.ddr_energy);
+ pkes.llc_flushed_cycles = ~0ULL;
+ rdmsr64_carefully(MSR_IA32_LLC_FLUSHED_RESIDENCY_TIMER, &pkes.llc_flushed_cycles);
+
+ pkes.ring_ratio_instantaneous = ~0ULL;
+ rdmsr64_carefully(MSR_IA32_RING_PERF_STATUS, &pkes.ring_ratio_instantaneous);
+
+ pkes.IA_frequency_clipping_cause = ~0ULL;
+ rdmsr64_carefully(MSR_IA32_IA_PERF_LIMIT_REASONS, &pkes.IA_frequency_clipping_cause);
+
+ pkes.GT_frequency_clipping_cause = ~0ULL;
+ rdmsr64_carefully(MSR_IA32_GT_PERF_LIMIT_REASONS, &pkes.GT_frequency_clipping_cause);
+
rdmsr_carefully(MSR_IA32_PKG_POWER_SKU_UNIT, &pkg_unit_l, &pkg_unit_h);
rdmsr_carefully(MSR_IA32_PKG_ENERGY_STATUS, &pkg_ecl, &pkg_ech);
-
pkes.pkg_power_unit = ((uint64_t)pkg_unit_h << 32) | pkg_unit_l;
pkes.pkg_energy = ((uint64_t)pkg_ech << 32) | pkg_ecl;
+ rdmsr_carefully(MSR_IA32_PP0_ENERGY_STATUS, &pkg_ecl, &pkg_ech);
+ pkes.pp0_energy = ((uint64_t)pkg_ech << 32) | pkg_ecl;
+
+ rdmsr_carefully(MSR_IA32_PP1_ENERGY_STATUS, &pkg_ecl, &pkg_ech);
+ pkes.pp1_energy = ((uint64_t)pkg_ech << 32) | pkg_ecl;
+
+ pkes.pkg_idle_exits = current_cpu_datap()->lcpu.package->package_idle_exits;
pkes.ncpus = real_ncpus;
(void) ml_set_interrupts_enabled(TRUE);
mp_cpus_call(CPUMASK_ALL, ASYNC, cpu_powerstats, NULL);
for (i = 0; i < real_ncpus; i++) {
+ (void) ml_set_interrupts_enabled(FALSE);
+
cest.caperf = cpu_data_ptr[i]->cpu_aperf;
cest.cmperf = cpu_data_ptr[i]->cpu_mperf;
cest.ccres[0] = cpu_data_ptr[i]->cpu_c3res;
bcopy(&cpu_data_ptr[i]->cpu_rtimes[0], &cest.crtimes[0], sizeof(cest.crtimes));
bcopy(&cpu_data_ptr[i]->cpu_itimes[0], &cest.citimes[0], sizeof(cest.citimes));
+
cest.citime_total = cpu_data_ptr[i]->cpu_itime_total;
cest.crtime_total = cpu_data_ptr[i]->cpu_rtime_total;
+ cest.cpu_idle_exits = cpu_data_ptr[i]->cpu_idle_exits;
+ cest.cpu_insns = cpu_data_ptr[i]->cpu_cur_insns;
+ cest.cpu_ucc = cpu_data_ptr[i]->cpu_cur_ucc;
+ cest.cpu_urc = cpu_data_ptr[i]->cpu_cur_urc;
+ (void) ml_set_interrupts_enabled(TRUE);
copyout(&cest, curpos, sizeof(cest));
curpos += sizeof(cest);
rval = 1;
}
break;
+ case dgEnaPMC:
+ {
+ boolean_t enable = TRUE;
+ mp_cpus_call(CPUMASK_ALL, ASYNC, cpu_pmc_control, &enable);
+ rval = 1;
+ }
+ break;
#if DEBUG
case dgGzallocTest:
kfree(ptr, 1024);
*ptr = 0x42;
}
- break;
+ break;
#endif
-#if defined(__x86_64__)
+#if PERMIT_PERMCHECK
case dgPermCheck:
{
(void) ml_set_interrupts_enabled(TRUE);
rval = pmap_permissions_verify(kernel_pmap, kernel_map, 0, ~0ULL);
}
break;
-#endif /* __x86_64__*/
+#endif /* PERMIT_PERMCHECK */
default: /* Handle invalid ones */
rval = 0; /* Return an exception */
void cpu_powerstats(__unused void *arg) {
cpu_data_t *cdp = current_cpu_datap();
- int cnum = cdp->cpu_number;
+ __unused int cnum = cdp->cpu_number;
uint32_t cl = 0, ch = 0, mpl = 0, mph = 0, apl = 0, aph = 0;
rdmsr_carefully(MSR_IA32_MPERF, &mpl, &mph);
cdp->cpu_mperf = ((uint64_t)mph << 32) | mpl;
cdp->cpu_aperf = ((uint64_t)aph << 32) | apl;
- if (cnum & 1)
- return;
+ uint64_t ctime = mach_absolute_time();
+ cdp->cpu_rtime_total += ctime - cdp->cpu_ixtime;
+ cdp->cpu_ixtime = ctime;
rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
cdp->cpu_c3res = ((uint64_t)ch << 32) | cl;
rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
cdp->cpu_c7res = ((uint64_t)ch << 32) | cl;
+
+ uint64_t insns = read_pmc(FIXED_PMC0);
+ uint64_t ucc = read_pmc(FIXED_PMC1);
+ uint64_t urc = read_pmc(FIXED_PMC2);
+ cdp->cpu_cur_insns = insns;
+ cdp->cpu_cur_ucc = ucc;
+ cdp->cpu_cur_urc = urc;
+}
+
+void cpu_pmc_control(void *enablep) {
+ boolean_t enable = *(boolean_t *)enablep;
+ cpu_data_t *cdp = current_cpu_datap();
+
+ if (enable) {
+ wrmsr64(0x38F, 0x70000000FULL);
+ wrmsr64(0x38D, 0x333);
+ set_cr4(get_cr4() | CR4_PCE);
+
+ } else {
+ wrmsr64(0x38F, 0);
+ wrmsr64(0x38D, 0);
+ set_cr4((get_cr4() & ~CR4_PCE));
+ }
+ cdp->cpu_fixed_pmcs_enabled = enable;
}
#define dgBind 18
#define dgAcntg 20
#define dgKlra 21
-#define dgKfree 22
+#define dgEnaPMC 22
#define dgWar 23
#define dgNapStat 24
#define dgRuptStat 25
extern diagWork dgWork;
-
+#define FIXED_PMC (1 << 30)
+#define FIXED_PMC0 (FIXED_PMC)
+#define FIXED_PMC1 (FIXED_PMC | 1)
+#define FIXED_PMC2 (FIXED_PMC | 2)
+
+static inline uint64_t read_pmc(uint32_t counter)
+{
+ uint32_t lo = 0, hi = 0;
+ __asm__ volatile("rdpmc" : "=a" (lo), "=d" (hi) : "c" (counter));
+ return ((((uint64_t)hi) << 32) | ((uint64_t)lo));
+}
#endif /* _DIAGNOSTICS_H_ */
#endif /* KERNEL_PRIVATE */
#include <i386/tsc.h>
#include <kern/cpu_data.h>
+#include <kern/etimer.h>
+#include <kern/machine.h>
+#include <kern/timer_queue.h>
#include <console/serial_protos.h>
#include <machine/pal_routines.h>
#include <vm/vm_page.h>
#include <IOKit/IOHibernatePrivate.h>
#endif
#include <IOKit/IOPlatformExpert.h>
-
#include <sys/kdebug.h>
#if CONFIG_SLEEP
extern void acpi_sleep_cpu(acpi_sleep_callback, void * refcon);
-extern void acpi_wake_prot(void);
+extern void acpi_wake_prot(void);
#endif
extern kern_return_t IOCPURunPlatformQuiesceActions(void);
extern kern_return_t IOCPURunPlatformActiveActions(void);
unsigned int save_kdebug_enable = 0;
static uint64_t acpi_sleep_abstime;
+static uint64_t acpi_idle_abstime;
+static uint64_t acpi_wake_abstime;
+boolean_t deep_idle_rebase = TRUE;
#if CONFIG_SLEEP
static void
extern void slave_pstart(void);
+extern unsigned int wake_nkdbufs;
void
acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
if (lapic_probe())
lapic_configure();
+ acpi_wake_abstime = mach_absolute_time();
+
/* let the realtime clock reset */
rtc_sleep_wakeup(acpi_sleep_abstime);
kdebug_enable = save_kdebug_enable;
+ if (kdebug_enable == 0) {
+ if (wake_nkdbufs)
+ start_kern_tracing(wake_nkdbufs, TRUE);
+ }
+
+ /* Reconfigure FP/SIMD unit */
+ init_fpu();
+ clear_ts();
+
IOCPURunPlatformActiveActions();
if (did_hibernate) {
/* Restart timer interrupts */
rtc_timer_start();
- /* Reconfigure FP/SIMD unit */
- init_fpu();
+
#if HIBERNATION
#ifdef __i386__
#endif
}
+/*
+ * acpi_idle_kernel is called by the ACPI Platform kext to request the kernel
+ * to idle the boot processor in the deepest C-state for S0 sleep. All slave
+ * processors are expected already to have been offlined in the deepest C-state.
+ *
+ * The contract with ACPI is that although the kernel is called with interrupts
+ * disabled, interrupts may need to be re-enabled to dismiss any pending timer
+ * interrupt. However, the callback function will be called once this has
+ * occurred and interrupts are guaranteed to be disabled at that time,
+ * and to remain disabled during C-state entry, exit (wake) and return
+ * from acpi_idle_kernel.
+ */
+void
+acpi_idle_kernel(acpi_sleep_callback func, void *refcon)
+{
+ boolean_t istate = ml_get_interrupts_enabled();
+
+ kprintf("acpi_idle_kernel, cpu=%d, interrupts %s\n",
+ cpu_number(), istate ? "enabled" : "disabled");
+
+ assert(cpu_number() == master_cpu);
+
+ /*
+ * Effectively set the boot cpu offline.
+ * This will stop further deadlines being set.
+ */
+ cpu_datap(master_cpu)->cpu_running = FALSE;
+
+ /* Cancel any pending deadline */
+ setPop(0);
+ while (lapic_is_interrupting(LAPIC_TIMER_VECTOR)) {
+ (void) ml_set_interrupts_enabled(TRUE);
+ setPop(0);
+ ml_set_interrupts_enabled(FALSE);
+ }
+
+ /*
+ * Call back to caller to indicate that interrupts will remain
+ * disabled while we deep idle, wake and return.
+ */
+ func(refcon);
+
+ acpi_idle_abstime = mach_absolute_time();
+
+ KERNEL_DEBUG_CONSTANT(
+ MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEEP_IDLE) | DBG_FUNC_START,
+ acpi_idle_abstime, deep_idle_rebase, 0, 0, 0);
+
+ /*
+ * Disable tracing during S0-sleep
+ * unless overridden by sysctl -w tsc.deep_idle_rebase=0
+ */
+ if (deep_idle_rebase) {
+ save_kdebug_enable = kdebug_enable;
+ kdebug_enable = 0;
+ }
+
+ /*
+ * Call into power-management to enter the lowest C-state.
+ * Note when called on the boot processor this routine will
+ * return directly when awoken.
+ */
+ pmCPUHalt(PM_HALT_SLEEP);
+
+ /*
+ * Get wakeup time relative to the TSC which has progressed.
+ * Then rebase nanotime to reflect time not progressing over sleep
+ * - unless overriden so that tracing can occur during deep_idle.
+ */
+ acpi_wake_abstime = mach_absolute_time();
+ if (deep_idle_rebase) {
+ rtc_sleep_wakeup(acpi_idle_abstime);
+ kdebug_enable = save_kdebug_enable;
+ }
+
+ cpu_datap(master_cpu)->cpu_running = TRUE;
+
+ KERNEL_DEBUG_CONSTANT(
+ MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEEP_IDLE) | DBG_FUNC_END,
+ acpi_wake_abstime, acpi_wake_abstime - acpi_idle_abstime, 0, 0, 0);
+
+ /* Like S3 sleep, turn on tracing if trace_wake boot-arg is present */
+ if (kdebug_enable == 0) {
+ if (wake_nkdbufs)
+ start_kern_tracing(wake_nkdbufs, TRUE);
+ }
+
+ IOCPURunPlatformActiveActions();
+
+ /* Restart timer interrupts */
+ rtc_timer_start();
+}
+
extern char real_mode_bootstrap_end[];
extern char real_mode_bootstrap_base[];
typedef void (*acpi_sleep_callback)(void * refcon);
extern vm_offset_t acpi_install_wake_handler(void);
extern void acpi_sleep_kernel(acpi_sleep_callback func, void * refcon);
+extern void acpi_idle_kernel(acpi_sleep_callback func, void * refcon);
void install_real_mode_bootstrap(void *prot_entry);
#endif /* ASSEMBLER */
char *commPagePtr32 = NULL; // virtual addr in kernel map of 32-bit commpage
char *commPagePtr64 = NULL; // ...and of 64-bit commpage
-char *commPageTextPtr32 = NULL; // virtual addr in kernel map of 32-bit commpage
-char *commPageTextPtr64 = NULL; // ...and of 64-bit commpage
-uint32_t _cpu_capabilities = 0; // define the capability vector
+char *commPageTextPtr32 = NULL; // virtual addr in kernel map of 32-bit commpage
+char *commPageTextPtr64 = NULL; // ...and of 64-bit commpage
-int noVMX = 0; /* if true, do not set kHasAltivec in ppc _cpu_capabilities */
+uint64_t _cpu_capabilities = 0; // define the capability vector
typedef uint32_t commpage_address_t;
-static commpage_address_t next; // next available address in comm page
-static commpage_address_t cur_routine; // comm page address of "current" routine
-static boolean_t matched; // true if we've found a match for "current" routine
+static commpage_address_t next; // next available address in comm page
static char *commPagePtr; // virtual addr in kernel map of commpage we are working on
static commpage_address_t commPageBaseOffset; // subtract from 32-bit runtime address to get offset in virtual commpage in kernel map
static void
commpage_init_cpu_capabilities( void )
{
- uint32_t bits;
+ uint64_t bits;
int cpus;
ml_cpu_info_t cpu_info;
}
cpus = commpage_cpus(); // how many CPUs do we have
- if (cpus == 1)
- bits |= kUP;
-
bits |= (cpus << kNumCPUsShift);
bits |= kFastThreadLocalStorage; // we use %gs for TLS
- if (cpu_mode_is64bit()) // k64Bit means processor is 64-bit capable
- bits |= k64Bit;
-
- if (tscFreq <= SLOW_TSC_THRESHOLD) /* is TSC too slow for _commpage_nanotime? */
- bits |= kSlow;
-
- bits |= (cpuid_features() & CPUID_FEATURE_AES) ? kHasAES : 0;
-
- bits |= (cpuid_features() & CPUID_FEATURE_F16C) ? kHasF16C : 0;
- bits |= (cpuid_features() & CPUID_FEATURE_RDRAND) ? kHasRDRAND : 0;
- bits |= ((cpuid_leaf7_features() & CPUID_LEAF7_FEATURE_ENFSTRG) &&
- (rdmsr64(MSR_IA32_MISC_ENABLE) & 1ULL )) ? kHasENFSTRG : 0;
-
+#define setif(_bits, _bit, _condition) \
+ if (_condition) _bits |= _bit
+
+ setif(bits, kUP, cpus == 1);
+ setif(bits, k64Bit, cpu_mode_is64bit());
+ setif(bits, kSlow, tscFreq <= SLOW_TSC_THRESHOLD);
+
+ setif(bits, kHasAES, cpuid_features() &
+ CPUID_FEATURE_AES);
+ setif(bits, kHasF16C, cpuid_features() &
+ CPUID_FEATURE_F16C);
+ setif(bits, kHasRDRAND, cpuid_features() &
+ CPUID_FEATURE_RDRAND);
+ setif(bits, kHasFMA, cpuid_features() &
+ CPUID_FEATURE_FMA);
+
+ setif(bits, kHasBMI1, cpuid_leaf7_features() &
+ CPUID_LEAF7_FEATURE_BMI1);
+ setif(bits, kHasBMI2, cpuid_leaf7_features() &
+ CPUID_LEAF7_FEATURE_BMI2);
+ setif(bits, kHasRTM, cpuid_leaf7_features() &
+ CPUID_LEAF7_FEATURE_RTM);
+ setif(bits, kHasHLE, cpuid_leaf7_features() &
+ CPUID_LEAF7_FEATURE_HLE);
+ setif(bits, kHasAVX2_0, cpuid_leaf7_features() &
+ CPUID_LEAF7_FEATURE_AVX2);
+
+ uint64_t misc_enable = rdmsr64(MSR_IA32_MISC_ENABLE);
+ setif(bits, kHasENFSTRG, (misc_enable & 1ULL) &&
+ (cpuid_leaf7_features() &
+ CPUID_LEAF7_FEATURE_ENFSTRG));
+
_cpu_capabilities = bits; // set kernel version for use by drivers etc
}
-int
+uint64_t
_get_cpu_capabilities(void)
{
return _cpu_capabilities;
*/
static void
commpage_stuff_routine(
- commpage_descriptor *rd )
+ commpage_descriptor *rd )
{
- uint32_t must,cant;
-
- if (rd->commpage_address != cur_routine) {
- if ((cur_routine!=0) && (matched==0))
- panic("commpage no match for last, next address %08x", rd->commpage_address);
- cur_routine = rd->commpage_address;
- matched = 0;
- }
-
- must = _cpu_capabilities & rd->musthave;
- cant = _cpu_capabilities & rd->canthave;
-
- if ((must == rd->musthave) && (cant == 0)) {
- if (matched)
- panic("commpage multiple matches for address %08x", rd->commpage_address);
- matched = 1;
-
- commpage_stuff(rd->commpage_address,rd->code_address,rd->code_length);
- }
+ commpage_stuff(rd->commpage_address,rd->code_address,rd->code_length);
}
/* Fill in the 32- or 64-bit commpage. Called once for each.
const char* signature, // "commpage 32-bit" or "commpage 64-bit"
vm_prot_t uperm)
{
- uint8_t c1;
- short c2;
- int c4;
- uint64_t c8;
+ uint8_t c1;
+ uint16_t c2;
+ int c4;
+ uint64_t c8;
uint32_t cfamily;
short version = _COMM_PAGE_THIS_VERSION;
next = 0;
- cur_routine = 0;
commPagePtr = (char *)commpage_allocate( submap, (vm_size_t) area_used, uperm );
*kernAddressPtr = commPagePtr; // save address either in commPagePtr32 or 64
commPageBaseOffset = base_offset;
/* Stuff in the constants. We move things into the comm page in strictly
* ascending order, so we can check for overlap and panic if so.
+ * Note: the 32-bit cpu_capabilities vector is retained in addition to
+ * the expanded 64-bit vector.
*/
- commpage_stuff(_COMM_PAGE_SIGNATURE,signature,(int)strlen(signature));
+ commpage_stuff(_COMM_PAGE_SIGNATURE,signature,(int)MIN(_COMM_PAGE_SIGNATURELEN, strlen(signature)));
+ commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES64,&_cpu_capabilities,sizeof(_cpu_capabilities));
commpage_stuff(_COMM_PAGE_VERSION,&version,sizeof(short));
- commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES,&_cpu_capabilities,sizeof(int));
+ commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES,&_cpu_capabilities,sizeof(uint32_t));
c2 = 32; // default
if (_cpu_capabilities & kCache64)
else if (_cpu_capabilities & kCache128)
c2 = 128;
commpage_stuff(_COMM_PAGE_CACHE_LINESIZE,&c2,2);
-
+
c4 = MP_SPIN_TRIES;
commpage_stuff(_COMM_PAGE_SPIN_COUNT,&c4,4);
void commpage_text_populate( void ){
commpage_descriptor **rd;
- next =0;
- cur_routine=0;
+ next = 0;
commPagePtr = (char *) commpage_allocate(commpage_text32_map, (vm_size_t) _COMM_PAGE_TEXT_AREA_USED, VM_PROT_READ | VM_PROT_EXECUTE);
commPageTextPtr32 = commPagePtr;
for (rd = commpage_32_routines; *rd != NULL; rd++) {
commpage_stuff_routine(*rd);
}
- if (!matched)
- panic(" commpage_text no match for last routine ");
#ifndef __LP64__
pmap_commpage32_init((vm_offset_t) commPageTextPtr32, _COMM_PAGE_TEXT_START,
#endif
if (_cpu_capabilities & k64Bit) {
- next =0;
- cur_routine=0;
+ next = 0;
commPagePtr = (char *) commpage_allocate(commpage_text64_map, (vm_size_t) _COMM_PAGE_TEXT_AREA_USED, VM_PROT_READ | VM_PROT_EXECUTE);
commPageTextPtr64 = commPagePtr;
#endif
}
- if (!matched)
- panic(" commpage_text no match for last routine ");
-
if (next > _COMM_PAGE_TEXT_END)
panic("commpage text overflow: next=0x%08x, commPagePtr=%p", next, commPagePtr);
}
-/* Update commpage nanotime information. Note that we interleave
- * setting the 32- and 64-bit commpages, in order to keep nanotime more
- * nearly in sync between the two environments.
+/* Update commpage nanotime information.
*
* This routine must be serialized by some external means, ie a lock.
*/
panic("nanotime trouble 1"); /* possibly not serialized */
if ( ns_base < p32->nt_ns_base )
panic("nanotime trouble 2");
- if ((shift != 32) && ((_cpu_capabilities & kSlow)==0) )
+ if ((shift != 0) && ((_cpu_capabilities & kSlow)==0) )
panic("nanotime trouble 3");
next_gen = ++generation;
cp = commPagePtr32;
if ( cp ) {
cp += (_COMM_PAGE_MEMORY_PRESSURE - _COMM_PAGE32_BASE_ADDRESS);
- ip = (uint32_t*) cp;
+ ip = (uint32_t*) (void *) cp;
*ip = (uint32_t) pressure;
}
cp = commPagePtr64;
if ( cp ) {
cp += (_COMM_PAGE_MEMORY_PRESSURE - _COMM_PAGE32_START_ADDRESS);
- ip = (uint32_t*) cp;
+ ip = (uint32_t*) (void *) cp;
*ip = (uint32_t) pressure;
}
cp = commPagePtr32;
if ( cp ) {
cp += (_COMM_PAGE_SPIN_COUNT - _COMM_PAGE32_BASE_ADDRESS);
- ip = (uint32_t*) cp;
+ ip = (uint32_t*) (void *) cp;
*ip = (uint32_t) count;
}
cp = commPagePtr64;
if ( cp ) {
cp += (_COMM_PAGE_SPIN_COUNT - _COMM_PAGE32_START_ADDRESS);
- ip = (uint32_t*) cp;
+ ip = (uint32_t*) (void *) cp;
*ip = (uint32_t) count;
}
.align alignment, 0x90 ;\
L ## label ## :
-#define COMMPAGE_DESCRIPTOR(label,address,must,cant) \
+#define COMMPAGE_DESCRIPTOR(label,address) \
L ## label ## _end: ;\
.set L ## label ## _size, L ## label ## _end - L ## label ;\
.const_data ;\
COMMPAGE_DESCRIPTOR_FIELD_POINTER L ## label ;\
.long L ## label ## _size ;\
.long address ;\
- .long must ;\
- .long cant ;\
.text
void *code_address; // address of code
uint32_t code_length; // length in bytes
uint32_t commpage_address; // put at this address (_COMM_PAGE_BCOPY etc)
- uint32_t musthave; // _cpu_capability bits we must have
- uint32_t canthave; // _cpu_capability bits we can't have
} commpage_descriptor;
movl $(-58),%eax /* 58 = pfz_exit */
xorl %ebx,%ebx // clear "preemption pending" flag
sysenter
-COMMPAGE_DESCRIPTOR(preempt,_COMM_PAGE_PREEMPT,0,0)
+COMMPAGE_DESCRIPTOR(preempt,_COMM_PAGE_PREEMPT)
/* Subroutine to back off if we cannot get the spinlock. Called
cmpl $0,8(%edi) // sniff the lockword
jnz 1b // loop if still taken
ret // lockword is free, so reenter PFZ
-COMMPAGE_DESCRIPTOR(backoff,_COMM_PAGE_BACKOFF,0,0)
+COMMPAGE_DESCRIPTOR(backoff,_COMM_PAGE_BACKOFF)
/* Preemption-free-zone routine to FIFO Enqueue:
movl %esi,4(%edi) // new element becomes last in q
movl $0,8(%edi) // unlock spinlock
ret
-COMMPAGE_DESCRIPTOR(pfz_enqueue,_COMM_PAGE_PFZ_ENQUEUE,0,0)
+COMMPAGE_DESCRIPTOR(pfz_enqueue,_COMM_PAGE_PFZ_ENQUEUE)
/* Preemption-free-zone routine to FIFO Dequeue:
4:
movl $0,8(%edi) // unlock spinlock
ret
-COMMPAGE_DESCRIPTOR(pfz_dequeue,_COMM_PAGE_PFZ_DEQUEUE,0,0)
+COMMPAGE_DESCRIPTOR(pfz_dequeue,_COMM_PAGE_PFZ_DEQUEUE)
popq %rcx
popq %rax
ret
-COMMPAGE_DESCRIPTOR(preempt_64,_COMM_PAGE_PREEMPT,0,0)
+COMMPAGE_DESCRIPTOR(preempt_64,_COMM_PAGE_PREEMPT)
/* Subroutine to back off if we cannot get the spinlock. Called
cmpl $0,16(%rdi) // sniff the lockword
jnz 1b // loop if still taken
ret // lockword is free, so reenter PFZ
-COMMPAGE_DESCRIPTOR(backoff_64,_COMM_PAGE_BACKOFF,0,0)
+COMMPAGE_DESCRIPTOR(backoff_64,_COMM_PAGE_BACKOFF)
/* Preemption-free-zone routine to FIFO Enqueue:
movq %rsi,8(%rdi) // new element becomes last in q
movl $0,16(%rdi) // unlock spinlock
ret
-COMMPAGE_DESCRIPTOR(pfz_enqueue_64,_COMM_PAGE_PFZ_ENQUEUE,0,0)
+COMMPAGE_DESCRIPTOR(pfz_enqueue_64,_COMM_PAGE_PFZ_ENQUEUE)
4:
movl $0,16(%rdi) // unlock spinlock
ret
-COMMPAGE_DESCRIPTOR(pfz_dequeue_64,_COMM_PAGE_PFZ_DEQUEUE,0,0)
+COMMPAGE_DESCRIPTOR(pfz_dequeue_64,_COMM_PAGE_PFZ_DEQUEUE)
orl $0x00180000,%eax // copy 24 bytes of arguments in trampoline
xorl %ebx,%ebx // clear preemption flag
sysenter
-COMMPAGE_DESCRIPTOR(pfz_mutex_lock,_COMM_PAGE_PFZ_MUTEX_LOCK,0,0)
+COMMPAGE_DESCRIPTOR(pfz_mutex_lock,_COMM_PAGE_PFZ_MUTEX_LOCK)
movl $PTHRW_STATUS_SYSCALL,%eax // we made syscall
popq %rbp
ret
-COMMPAGE_DESCRIPTOR(pfz_mutex_lock_64,_COMM_PAGE_PFZ_MUTEX_LOCK,0,0)
+COMMPAGE_DESCRIPTOR(pfz_mutex_lock_64,_COMM_PAGE_PFZ_MUTEX_LOCK)
{
cpu_data_t *cdp = current_cpu_datap();
- i386_deactivate_cpu();
-
PE_cpu_machine_quiesce(cdp->cpu_id);
cpu_thread_halt();
#define kSlow 0x00004000 /* tsc < nanosecond */
#define kUP 0x00008000 /* set if (kNumCPUs == 1) */
#define kNumCPUs 0x00FF0000 /* number of CPUs (see _NumCPUs() below) */
+#define kNumCPUsShift 16
#define kHasAVX1_0 0x01000000
#define kHasRDRAND 0x02000000
#define kHasF16C 0x04000000
#define kHasENFSTRG 0x08000000
-#define kNumCPUsShift 16 /* see _NumCPUs() below */
+#define kHasFMA 0x10000000
+#define kHasAVX2_0 0x20000000
+#define kHasBMI1 0x40000000
+#define kHasBMI2 0x80000000
+/* Extending into 64-bits from here: */
+#define kHasRTM 0x0000000100000000ULL
+#define kHasHLE 0x0000000200000000ULL
+
#ifndef __ASSEMBLER__
#include <sys/cdefs.h>
__BEGIN_DECLS
-extern int _get_cpu_capabilities( void );
+extern uint64_t _get_cpu_capabilities( void );
__END_DECLS
inline static
int _NumCPUs( void )
{
- return (_get_cpu_capabilities() & kNumCPUs) >> kNumCPUsShift;
+ return (int) (_get_cpu_capabilities() & kNumCPUs) >> kNumCPUsShift;
}
#endif /* __ASSEMBLER__ */
/* data in the comm page */
-#define _COMM_PAGE_SIGNATURE (_COMM_PAGE_START_ADDRESS+0x000) /* first few bytes are a signature */
+#define _COMM_PAGE_SIGNATURE (_COMM_PAGE_START_ADDRESS+0x000) /* first 16 bytes are a signature */
+#define _COMM_PAGE_SIGNATURELEN (0x10)
+#define _COMM_PAGE_CPU_CAPABILITIES64 (_COMM_PAGE_START_ADDRESS+0x010) /* uint64_t _cpu_capabilities */
+#define _COMM_PAGE_UNUSED (_COMM_PAGE_START_ADDRESS+0x018) /* 6 unused bytes */
#define _COMM_PAGE_VERSION (_COMM_PAGE_START_ADDRESS+0x01E) /* 16-bit version# */
-#define _COMM_PAGE_THIS_VERSION 12 /* version of the commarea format */
+#define _COMM_PAGE_THIS_VERSION 13 /* in ver 13, _COMM_PAGE_NT_SHIFT defaults to 0 (was 32) */
-#define _COMM_PAGE_CPU_CAPABILITIES (_COMM_PAGE_START_ADDRESS+0x020) /* uint32_t _cpu_capabilities */
+#define _COMM_PAGE_CPU_CAPABILITIES (_COMM_PAGE_START_ADDRESS+0x020) /* uint32_t _cpu_capabilities (retained for compatibility) */
#define _COMM_PAGE_NCPUS (_COMM_PAGE_START_ADDRESS+0x022) /* uint8_t number of configured CPUs (hw.logicalcpu at boot time) */
-#define _COMM_PAGE_UNUSED0 (_COMM_PAGE_START_ADDRESS+0x024) /* 2 unused bytes, reserved for future expansion of cpu_capabilities */
+#define _COMM_PAGE_UNUSED0 (_COMM_PAGE_START_ADDRESS+0x024) /* 2 unused bytes, previouly reserved for expansion of cpu_capabilities */
#define _COMM_PAGE_CACHE_LINESIZE (_COMM_PAGE_START_ADDRESS+0x026) /* uint16_t cache line size */
#define _COMM_PAGE_SCHED_GEN (_COMM_PAGE_START_ADDRESS+0x028) /* uint32_t scheduler generation number (count of pre-emptions) */
typedef uint16_t pcid_t;
typedef uint8_t pcid_ref_t;
+
+#define CPU_RTIME_BINS (12)
+#define CPU_ITIME_BINS (CPU_RTIME_BINS)
+
/*
* Per-cpu data.
*
int cpu_prior_signals; /* Last set of events,
* debugging
*/
- int cpu_mcount_off; /* mcount recursion */
ast_t cpu_pending_ast;
- int cpu_type;
- int cpu_subtype;
- int cpu_threadtype;
- int cpu_running;
+ volatile int cpu_running;
+ boolean_t cpu_fixed_pmcs_enabled;
rtclock_timer_t rtclock_timer;
boolean_t cpu_is64bit;
volatile addr64_t cpu_active_cr3 __attribute((aligned(64)));
struct fake_descriptor *cpu_ldtp;
cpu_desc_index_t cpu_desc_index;
int cpu_ldt;
- boolean_t cpu_iflag;
- boolean_t cpu_boot_complete;
- int cpu_hibernate;
#if NCOPY_WINDOWS > 0
vm_offset_t cpu_copywindow_base;
uint64_t *cpu_copywindow_pdp;
vm_offset_t cpu_physwindow_base;
uint64_t *cpu_physwindow_ptep;
#endif
- void *cpu_hi_iss;
#define HWINTCNT_SIZE 256
uint32_t cpu_hwIntCnt[HWINTCNT_SIZE]; /* Interrupt counts */
+ uint64_t cpu_hwIntpexits[HWINTCNT_SIZE];
+ uint64_t cpu_hwIntcexits[HWINTCNT_SIZE];
uint64_t cpu_dr7; /* debug control register */
uint64_t cpu_int_event_time; /* intr entry/exit time */
-#if CONFIG_VMX
- vmx_cpu_t cpu_vmx; /* wonderful world of virtualization */
-#endif
-#if CONFIG_MCA
- struct mca_state *cpu_mca_state; /* State at MC fault */
-#endif
uint64_t cpu_uber_arg_store; /* Double mapped address
* of current thread's
* uu_arg array.
uint64_t cpu_c7res;
uint64_t cpu_itime_total;
uint64_t cpu_rtime_total;
- uint64_t cpu_rtimes[4];
- uint64_t cpu_itimes[4];
uint64_t cpu_ixtime;
+ uint64_t cpu_idle_exits;
+ uint64_t cpu_rtimes[CPU_RTIME_BINS];
+ uint64_t cpu_itimes[CPU_ITIME_BINS];
+ uint64_t cpu_cur_insns;
+ uint64_t cpu_cur_ucc;
+ uint64_t cpu_cur_urc;
uint64_t cpu_max_observed_int_latency;
int cpu_max_observed_int_latency_vector;
uint64_t debugger_entry_time;
+ uint64_t debugger_ipi_time;
volatile boolean_t cpu_NMI_acknowledged;
/* A separate nested interrupt stack flag, to account
* for non-nested interrupts arriving while on the interrupt stack
uint32_t cpu_nested_istack_events;
x86_saved_state64_t *cpu_fatal_trap_state;
x86_saved_state64_t *cpu_post_fatal_trap_state;
+#if CONFIG_VMX
+ vmx_cpu_t cpu_vmx; /* wonderful world of virtualization */
+#endif
+#if CONFIG_MCA
+ struct mca_state *cpu_mca_state; /* State at MC fault */
+#endif
+ int cpu_type;
+ int cpu_subtype;
+ int cpu_threadtype;
+ boolean_t cpu_iflag;
+ boolean_t cpu_boot_complete;
+ int cpu_hibernate;
} cpu_data_t;
extern cpu_data_t *cpu_data_ptr[];
void *pmStats; /* Power Management stats for package*/
void *pmState; /* Power Management state for package*/
struct mca_state *mca_state; /* MCA state for memory errors */
+ uint64_t package_idle_exits;
uint32_t num_idle;
} x86_pkg_t;
{ 0x70, CACHE, TRACE, 8, 12*K, NA },
{ 0x71, CACHE, TRACE, 8, 16*K, NA },
{ 0x72, CACHE, TRACE, 8, 32*K, NA },
+ { 0x76, TLB, INST, NA, BOTH, 8 },
{ 0x78, CACHE, L2, 4, 1*M, 64 },
{ 0x79, CACHE, L2_2LINESECTOR, 8, 128*K, 64 },
{ 0x7A, CACHE, L2_2LINESECTOR, 8, 256*K, 64 },
{ 0xB2, TLB, INST, 4, SMALL, 64 },
{ 0xB3, TLB, DATA, 4, SMALL, 128 },
{ 0xB4, TLB, DATA1, 4, SMALL, 256 },
+ { 0xB5, TLB, DATA1, 8, SMALL, 64 },
+ { 0xB6, TLB, DATA1, 8, SMALL, 128 },
{ 0xBA, TLB, DATA1, 4, BOTH, 64 },
- { 0xCA, STLB, DATA1, 4, BOTH, 512 },
+ { 0xC1, STLB, DATA1, 8, SMALL, 1024},
+ { 0xCA, STLB, DATA1, 4, SMALL, 512 },
{ 0xD0, CACHE, L3, 4, 512*K, 64 },
{ 0xD1, CACHE, L3, 4, 1*M, 64 },
{ 0xD2, CACHE, L3, 4, 2*M, 64 },
ctp->sensor = bitfield32(reg[eax], 0, 0);
ctp->dynamic_acceleration = bitfield32(reg[eax], 1, 1);
ctp->invariant_APIC_timer = bitfield32(reg[eax], 2, 2);
- ctp->core_power_limits = bitfield32(reg[eax], 3, 3);
- ctp->fine_grain_clock_mod = bitfield32(reg[eax], 4, 4);
- ctp->package_thermal_intr = bitfield32(reg[eax], 5, 5);
+ ctp->core_power_limits = bitfield32(reg[eax], 4, 4);
+ ctp->fine_grain_clock_mod = bitfield32(reg[eax], 5, 5);
+ ctp->package_thermal_intr = bitfield32(reg[eax], 6, 6);
ctp->thresholds = bitfield32(reg[ebx], 3, 0);
ctp->ACNT_MCNT = bitfield32(reg[ecx], 0, 0);
ctp->hardware_feedback = bitfield32(reg[ecx], 1, 1);
- ctp->energy_policy = bitfield32(reg[ecx], 2, 2);
+ ctp->energy_policy = bitfield32(reg[ecx], 3, 3);
info_p->cpuid_thermal_leafp = ctp;
DBG(" Thermal/Power Leaf:\n");
DBG(" package_thermal_intr : %d\n", ctp->package_thermal_intr);
DBG(" thresholds : %d\n", ctp->thresholds);
DBG(" ACNT_MCNT : %d\n", ctp->ACNT_MCNT);
- DBG(" hardware_feedback : %d\n", ctp->hardware_feedback);
+ DBG(" ACNT2 : %d\n", ctp->hardware_feedback);
DBG(" energy_policy : %d\n", ctp->energy_policy);
}
DBG(" EDX : 0x%x\n", xsp->extended_state[edx]);
}
- if (info_p->cpuid_model == CPUID_MODEL_IVYBRIDGE) {
+ if (info_p->cpuid_model >= CPUID_MODEL_IVYBRIDGE) {
/*
- * XSAVE Features:
+ * Leaf7 Features:
*/
cpuid_fn(0x7, reg);
info_p->cpuid_leaf7_features = reg[ebx];
case CPUID_MODEL_IVYBRIDGE:
cpufamily = CPUFAMILY_INTEL_IVYBRIDGE;
break;
+ case CPUID_MODEL_HASWELL:
+ case CPUID_MODEL_HASWELL_ULT:
+ case CPUID_MODEL_CRYSTALWELL:
+ cpufamily = CPUFAMILY_INTEL_HASWELL;
+ break;
}
break;
}
info_p->thread_count = bitfield32((uint32_t)msr, 15, 0);
break;
}
+ case CPUFAMILY_INTEL_HASWELL:
case CPUFAMILY_INTEL_IVYBRIDGE:
case CPUFAMILY_INTEL_SANDYBRIDGE:
case CPUFAMILY_INTEL_NEHALEM: {
{CPUID_FEATURE_TM2, "TM2"},
{CPUID_FEATURE_SSSE3, "SSSE3"},
{CPUID_FEATURE_CID, "CID"},
+ {CPUID_FEATURE_FMA, "FMA"},
{CPUID_FEATURE_CX16, "CX16"},
{CPUID_FEATURE_xTPR, "TPR"},
{CPUID_FEATURE_PDCM, "PDCM"},
{CPUID_FEATURE_SSE4_1, "SSE4.1"},
{CPUID_FEATURE_SSE4_2, "SSE4.2"},
- {CPUID_FEATURE_xAPIC, "xAPIC"},
+ {CPUID_FEATURE_x2APIC, "x2APIC"},
{CPUID_FEATURE_MOVBE, "MOVBE"},
{CPUID_FEATURE_POPCNT, "POPCNT"},
{CPUID_FEATURE_AES, "AES"},
},
leaf7_feature_map[] = {
{CPUID_LEAF7_FEATURE_RDWRFSGS, "RDWRFSGS"},
+ {CPUID_LEAF7_FEATURE_TSCOFF, "TSC_THREAD_OFFSET"},
+ {CPUID_LEAF7_FEATURE_BMI1, "BMI1"},
+ {CPUID_LEAF7_FEATURE_HLE, "HLE"},
{CPUID_LEAF7_FEATURE_SMEP, "SMEP"},
+ {CPUID_LEAF7_FEATURE_AVX2, "AVX2"},
+ {CPUID_LEAF7_FEATURE_BMI2, "BMI2"},
{CPUID_LEAF7_FEATURE_ENFSTRG, "ENFSTRG"},
+ {CPUID_LEAF7_FEATURE_INVPCID, "INVPCID"},
+ {CPUID_LEAF7_FEATURE_RTM, "RTM"},
{0, 0}
};
#define CPUID_FEATURE_SSSE3 _HBit(9) /* Supplemental SSE3 instructions */
#define CPUID_FEATURE_CID _HBit(10) /* L1 Context ID */
#define CPUID_FEATURE_SEGLIM64 _HBit(11) /* 64-bit segment limit checking */
+#define CPUID_FEATURE_FMA _HBit(12) /* Fused-Multiply-Add support */
#define CPUID_FEATURE_CX16 _HBit(13) /* CmpXchg16b instruction */
#define CPUID_FEATURE_xTPR _HBit(14) /* Send Task PRiority msgs */
#define CPUID_FEATURE_PDCM _HBit(15) /* Perf/Debug Capability MSR */
#define CPUID_FEATURE_DCA _HBit(18) /* Direct Cache Access */
#define CPUID_FEATURE_SSE4_1 _HBit(19) /* Streaming SIMD extensions 4.1 */
#define CPUID_FEATURE_SSE4_2 _HBit(20) /* Streaming SIMD extensions 4.2 */
-#define CPUID_FEATURE_xAPIC _HBit(21) /* Extended APIC Mode */
+#define CPUID_FEATURE_x2APIC _HBit(21) /* Extended APIC Mode */
#define CPUID_FEATURE_MOVBE _HBit(22) /* MOVBE instruction */
#define CPUID_FEATURE_POPCNT _HBit(23) /* POPCNT instruction */
#define CPUID_FEATURE_TSCTMR _HBit(24) /* TSC deadline timer */
* Bits returned in %ebx to a CPUID request with {%eax,%ecx} of (0x7,0x0}:
*/
#define CPUID_LEAF7_FEATURE_RDWRFSGS _Bit(0) /* FS/GS base read/write */
+#define CPUID_LEAF7_FEATURE_TSCOFF _Bit(1) /* TSC thread offset */
+#define CPUID_LEAF7_FEATURE_BMI1 _Bit(3) /* Bit Manipulation Instrs, set 1 */
+#define CPUID_LEAF7_FEATURE_HLE _Bit(4) /* Hardware Lock Elision*/
+#define CPUID_LEAF7_FEATURE_AVX2 _Bit(5) /* AVX2 Instructions */
#define CPUID_LEAF7_FEATURE_SMEP _Bit(7) /* Supervisor Mode Execute Protect */
+#define CPUID_LEAF7_FEATURE_BMI2 _Bit(8) /* Bit Manipulation Instrs, set 2 */
#define CPUID_LEAF7_FEATURE_ENFSTRG _Bit(9) /* ENhanced Fast STRinG copy */
+#define CPUID_LEAF7_FEATURE_INVPCID _Bit(10) /* INVPCID intruction, TDB */
+#define CPUID_LEAF7_FEATURE_RTM _Bit(11) /* TBD */
/*
* The CPUID_EXTFEATURE_XXX values define 64-bit values
#define CPUID_MODEL_SANDYBRIDGE 0x2A
#define CPUID_MODEL_JAKETOWN 0x2D
#define CPUID_MODEL_IVYBRIDGE 0x3A
+#define CPUID_MODEL_HASWELL 0x3C
+#define CPUID_MODEL_HASWELL_SVR 0x3F
+#define CPUID_MODEL_HASWELL_ULT 0x45
+#define CPUID_MODEL_CRYSTALWELL 0x46
#define CPUID_VMM_FAMILY_UNKNOWN 0x0
void
etimer_resync_deadlines(void)
{
- uint64_t deadline;
+ uint64_t deadline = EndOfAllTime;
uint64_t pmdeadline;
rtclock_timer_t *mytimer;
spl_t s = splclock();
uint32_t decr;
pp = current_cpu_datap();
- deadline = EndOfAllTime;
+ if (!pp->cpu_running)
+ /* There's really nothing to do if this procesor is down */
+ return;
/*
* If we have a clock timer set, pick that.
if (fp_kind == FP_NO)
return KERN_FAILURE;
+ if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) &&
+ !ml_fpu_avx_enabled())
+ return KERN_FAILURE;
+
state = (x86_float_state64_t *)tstate;
assert(thr_act != THREAD_NULL);
if (fp_kind == FP_NO)
return KERN_FAILURE;
+ if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) &&
+ !ml_fpu_avx_enabled())
+ return KERN_FAILURE;
+
state = (x86_float_state64_t *)tstate;
assert(thr_act != THREAD_NULL);
offsetof(cpu_data_t *,cpu_number));
DECLARE("CPU_RUNNING",
offsetof(cpu_data_t *,cpu_running));
- DECLARE("CPU_MCOUNT_OFF",
- offsetof(cpu_data_t *,cpu_mcount_off));
DECLARE("CPU_PENDING_AST",
offsetof(cpu_data_t *,cpu_pending_ast));
DECLARE("CPU_DESC_TABLEP",
index = (virt >> I386_LPGSHIFT);
virt += (uintptr_t)(phys & I386_LPGMASK);
phys = ((phys & ~((uint64_t)I386_LPGMASK)) | INTEL_PTE_PS | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+ if (phys == BootPTD[index]) return (virt);
BootPTD[index] = phys;
invlpg(virt);
BootPTD[index + 1] = (phys + I386_LPGBYTES);
tsc_init();
power_management_init();
-
processor_bootstrap();
thread_bootstrap();
mca_cpu_init();
#endif
+ LAPIC_INIT();
lapic_configure();
LAPIC_DUMP();
LAPIC_CPU_MAP_DUMP();
#if CONFIG_MTRR
mtrr_update_cpu();
#endif
+ /* update CPU microcode */
+ ucode_update_wake();
} else
init_param = FAST_SLAVE_INIT;
- /* update CPU microcode */
- ucode_update_wake();
-
#if CONFIG_VMX
/* resume VT operation */
vmx_resume();
/* Base vector for local APIC interrupt sources */
int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE;
-#define MAX_LAPICIDS (LAPIC_ID_MAX+1)
int lapic_to_cpu[MAX_LAPICIDS];
int cpu_to_lapic[MAX_CPUS];
#define LAPIC_MSR(reg) (LAPIC_MSR_BASE + LAPIC_MSR_OFFSET(reg))
typedef struct {
- void (*init) (void);
- uint32_t (*read) (lapic_register_t);
- void (*write)(lapic_register_t, uint32_t);
+ void (*init) (void);
+ uint32_t (*read) (lapic_register_t);
+ void (*write) (lapic_register_t, uint32_t);
+ uint64_t (*read_icr) (void);
+ void (*write_icr) (uint32_t, uint32_t);
} lapic_ops_table_t;
extern lapic_ops_table_t *lapic_ops;
+#define LAPIC_INIT() lapic_ops->init();
#define LAPIC_WRITE(reg,val) lapic_ops->write(reg, val)
#define LAPIC_READ(reg) lapic_ops->read(reg)
#define LAPIC_READ_OFFSET(reg,off) LAPIC_READ((reg)+(off))
+#define LAPIC_READ_ICR() lapic_ops->read_icr()
+#define LAPIC_WRITE_ICR(dst,cmd) lapic_ops->write_icr(dst, cmd)
typedef enum {
periodic,
#define LAPIC_PM_INTERRUPT 0x7
#define LAPIC_PMC_SWI_VECTOR (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_PMC_SW_INTERRUPT)
+#define LAPIC_TIMER_VECTOR (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_TIMER_INTERRUPT)
/* The vector field is ignored for NMI interrupts via the LAPIC
* or otherwise, so this is not an offset from the interrupt
extern void lapic_interrupt_counts(uint64_t intrs[256]);
extern void lapic_disable_timer(void);
+#define MAX_LAPICIDS (LAPIC_ID_MAX+1)
#ifdef MP_DEBUG
-extern void lapic_cpu_map_dump(void);
#define LAPIC_CPU_MAP_DUMP() lapic_cpu_map_dump()
#define LAPIC_DUMP() lapic_dump()
#else
vm_map_offset_t lapic_vbase64;
/* Establish a map to the local apic */
- lapic_vbase64 = (vm_offset_t)vm_map_min(kernel_map);
- result = vm_map_find_space(kernel_map,
- &lapic_vbase64,
- round_page(LAPIC_SIZE), 0,
- VM_MAKE_TAG(VM_MEMORY_IOKIT), &entry);
- /* Convert 64-bit vm_map_offset_t to "pointer sized" vm_offset_t
- */
- lapic_vbase = (vm_offset_t) lapic_vbase64;
- if (result != KERN_SUCCESS) {
- panic("legacy_init: vm_map_find_entry FAILED (err=%d)", result);
+ if (lapic_vbase == 0) {
+ lapic_vbase64 = (vm_offset_t)vm_map_min(kernel_map);
+ result = vm_map_find_space(kernel_map,
+ &lapic_vbase64,
+ round_page(LAPIC_SIZE), 0,
+ VM_MAKE_TAG(VM_MEMORY_IOKIT), &entry);
+ /* Convert 64-bit vm_map_offset_t to "pointer sized" vm_offset_t
+ */
+ lapic_vbase = (vm_offset_t) lapic_vbase64;
+ if (result != KERN_SUCCESS) {
+ panic("legacy_init: vm_map_find_entry FAILED (err=%d)", result);
+ }
+ vm_map_unlock(kernel_map);
+
+ /*
+ * Map in the local APIC non-cacheable, as recommended by Intel
+ * in section 8.4.1 of the "System Programming Guide".
+ * In fact, this is redundant because EFI will have assigned an
+ * MTRR physical range containing the local APIC's MMIO space as
+ * UC and this will override the default PAT setting.
+ */
+ pmap_enter(pmap_kernel(),
+ lapic_vbase,
+ (ppnum_t) i386_btop(lapic_pbase),
+ VM_PROT_READ|VM_PROT_WRITE,
+ VM_PROT_NONE,
+ VM_WIMG_IO,
+ TRUE);
}
- vm_map_unlock(kernel_map);
/*
- * Map in the local APIC non-cacheable, as recommended by Intel
- * in section 8.4.1 of the "System Programming Guide".
- * In fact, this is redundant because EFI will have assigned an
- * MTRR physical range containing the local APIC's MMIO space as
- * UC and this will override the default PAT setting.
+ * Set flat delivery model, logical processor id
+ * This should already be the default set.
*/
- pmap_enter(pmap_kernel(),
- lapic_vbase,
- (ppnum_t) i386_btop(lapic_pbase),
- VM_PROT_READ|VM_PROT_WRITE,
- VM_PROT_NONE,
- VM_WIMG_IO,
- TRUE);
+ LAPIC_WRITE(DFR, LAPIC_DFR_FLAT);
+ LAPIC_WRITE(LDR, (get_cpu_number()) << LAPIC_LDR_SHIFT);
}
*LAPIC_MMIO(reg) = value;
}
+static uint64_t
+legacy_read_icr(void)
+{
+ return (((uint64_t)*LAPIC_MMIO(ICRD)) << 32) | ((uint64_t)*LAPIC_MMIO(ICR));
+}
+
+static void
+legacy_write_icr(uint32_t dst, uint32_t cmd)
+{
+ *LAPIC_MMIO(ICRD) = dst << LAPIC_ICRD_DEST_SHIFT;
+ *LAPIC_MMIO(ICR) = cmd;
+}
+
static lapic_ops_table_t legacy_ops = {
legacy_init,
legacy_read,
- legacy_write
+ legacy_write,
+ legacy_read_icr,
+ legacy_write_icr
};
+static boolean_t is_x2apic = FALSE;
+
static void
x2apic_init(void)
{
+ uint32_t lo;
+ uint32_t hi;
+
+ rdmsr(MSR_IA32_APIC_BASE, lo, hi);
+ if ((lo & MSR_IA32_APIC_BASE_EXTENDED) == 0) {
+ lo |= MSR_IA32_APIC_BASE_EXTENDED;
+ wrmsr(MSR_IA32_APIC_BASE, lo, hi);
+ kprintf("x2APIC mode enabled\n");
+ }
}
static uint32_t
wrmsr(LAPIC_MSR(reg), value, 0);
}
+static uint64_t
+x2apic_read_icr(void)
+{
+ return rdmsr64(LAPIC_MSR(ICR));;
+}
+
+static void
+x2apic_write_icr(uint32_t dst, uint32_t cmd)
+{
+ wrmsr(LAPIC_MSR(ICR), cmd, dst);
+}
+
static lapic_ops_table_t x2apic_ops = {
x2apic_init,
x2apic_read,
- x2apic_write
+ x2apic_write,
+ x2apic_read_icr,
+ x2apic_write_icr
};
-
void
lapic_init(void)
{
uint32_t hi;
boolean_t is_boot_processor;
boolean_t is_lapic_enabled;
- boolean_t is_x2apic;
/* Examine the local APIC state */
rdmsr(MSR_IA32_APIC_BASE, lo, hi);
if (!is_boot_processor || !is_lapic_enabled)
panic("Unexpected local APIC state\n");
+ /*
+ * If x2APIC is available and not already enabled, enable it.
+ * Unless overriden by boot-arg.
+ */
+ if (!is_x2apic && (cpuid_features() & CPUID_FEATURE_x2APIC)) {
+ PE_parse_boot_argn("-x2apic", &is_x2apic, sizeof(is_x2apic));
+ kprintf("x2APIC supported %s be enabled\n",
+ is_x2apic ? "and will" : "but will not");
+ }
+
lapic_ops = is_x2apic ? &x2apic_ops : &legacy_ops;
- lapic_ops->init();
+ LAPIC_INIT();
+ kprintf("ID: 0x%x LDR: 0x%x\n", LAPIC_READ(ID), LAPIC_READ(LDR));
if ((LAPIC_READ(VERSION)&LAPIC_VERSION_MASK) < 0x14) {
panic("Local APIC version 0x%x, 0x14 or more expected\n",
(LAPIC_READ(VERSION)&LAPIC_VERSION_MASK));
LAPIC_READ(APR)&LAPIC_APR_MASK,
LAPIC_READ(PPR)&LAPIC_PPR_MASK);
kprintf("Destination Format 0x%x Logical Destination 0x%x\n",
- LAPIC_READ(DFR)>>LAPIC_DFR_SHIFT,
+ is_x2apic ? 0 : LAPIC_READ(DFR)>>LAPIC_DFR_SHIFT,
LAPIC_READ(LDR)>>LAPIC_LDR_SHIFT);
kprintf("%cEnabled %cFocusChecking SV 0x%x\n",
BOOL(LAPIC_READ(SVR)&LAPIC_SVR_ENABLE),
}
}
- /* Set flat delivery model, logical processor id */
- LAPIC_WRITE(DFR, LAPIC_DFR_FLAT);
- LAPIC_WRITE(LDR, (get_cpu_number()) << LAPIC_LDR_SHIFT);
-
/* Accept all */
LAPIC_WRITE(TPR, 0);
state = ml_set_interrupts_enabled(FALSE);
/* Wait for pending outgoing send to complete */
- while (LAPIC_READ(ICR) & LAPIC_ICR_DS_PENDING) {
+ while (LAPIC_READ_ICR() & LAPIC_ICR_DS_PENDING) {
cpu_pause();
}
- LAPIC_WRITE(ICRD, cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT);
- LAPIC_WRITE(ICR, vector | LAPIC_ICR_DM_FIXED);
+ LAPIC_WRITE_ICR(cpu_to_lapic[cpu], vector | LAPIC_ICR_DM_FIXED);
(void) ml_set_interrupts_enabled(state);
}
mca_get_availability(void)
{
uint64_t features = cpuid_info()->cpuid_features;
- uint32_t family = cpuid_info()->cpuid_family;
+ uint32_t family = cpuid_info()->cpuid_family;
+ uint32_t model = cpuid_info()->cpuid_model;
+ uint32_t stepping = cpuid_info()->cpuid_stepping;
mca_MCE_present = (features & CPUID_FEATURE_MCE) != 0;
mca_MCA_present = (features & CPUID_FEATURE_MCA) != 0;
mca_family = family;
-
+
+ if ((model == CPUID_MODEL_HASWELL && stepping < 3) ||
+ (model == CPUID_MODEL_HASWELL_ULT && stepping < 1) ||
+ (model == CPUID_MODEL_CRYSTALWELL && stepping < 1))
+ panic("Haswell pre-C0 steppings are not supported");
+
/*
* If MCA, the number of banks etc is reported by the IA32_MCG_CAP MSR.
*/
* instead of spinning for clock_delay_until().
*/
void
-ml_init_delay_spin_threshold(void)
+ml_init_delay_spin_threshold(int threshold_us)
{
- nanoseconds_to_absolutetime(10ULL * NSEC_PER_USEC, &delay_spin_threshold);
+ nanoseconds_to_absolutetime(threshold_us * NSEC_PER_USEC, &delay_spin_threshold);
}
boolean_t
}
/*
- * This is called from the machine-independent routine cpu_up()
+ * This is called from the machine-independent layer
* to perform machine-dependent info updates. Defer to cpu_thread_init().
*/
void
}
/*
- * This is called from the machine-independent routine cpu_down()
+ * This is called from the machine-independent layer
* to perform machine-dependent info updates.
*/
void
ml_cpu_down(void)
{
+ i386_deactivate_cpu();
+
return;
}
void ml_get_timebase(unsigned long long *timestamp);
void ml_init_lock_timeout(void);
-void ml_init_delay_spin_threshold(void);
+void ml_init_delay_spin_threshold(int);
boolean_t ml_delay_should_spin(uint64_t interval);
#define FAST_SLAVE_INIT ((void *)(uintptr_t)1)
uint64_t ml_early_random(void);
+void cpu_pmc_control(void *);
#endif /* _I386_MISC_PROTOS_H_ */
#define TRACE_MP_CPUS_CALL_LOCAL MACHDBG_CODE(DBG_MACH_MP, 2)
#define TRACE_MP_CPUS_CALL_ACTION MACHDBG_CODE(DBG_MACH_MP, 3)
#define TRACE_MP_CPUS_CALL_NOBUF MACHDBG_CODE(DBG_MACH_MP, 4)
+#define TRACE_MP_CPU_FAST_START MACHDBG_CODE(DBG_MACH_MP, 5)
+#define TRACE_MP_CPU_START MACHDBG_CODE(DBG_MACH_MP, 6)
+#define TRACE_MP_CPU_DEACTIVATE MACHDBG_CODE(DBG_MACH_MP, 7)
#define ABS(v) (((v) > 0)?(v):-(v))
*/
return(rc);
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPU_FAST_START | DBG_FUNC_START,
+ slot_num, 0, 0, 0, 0);
+
/*
* Wait until the CPU is back online.
*/
mp_wait_for_cpu_up(slot_num, 30000, 1);
mp_enable_preemption();
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPU_FAST_START | DBG_FUNC_END,
+ slot_num, cpu_datap(slot_num)->cpu_running, 0, 0, 0);
+
/*
* Check to make sure that the CPU is really running. If not,
* go through the slow path.
if (cpu_number() != psip->starter_cpu)
return;
+ DBG("start_cpu(%p) about to start cpu %d, lapic %d\n",
+ arg, psip->target_cpu, psip->target_lapic);
+
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPU_START | DBG_FUNC_START,
+ psip->target_cpu,
+ psip->target_lapic, 0, 0, 0);
+
i386_start_cpu(psip->target_lapic, psip->target_cpu);
#ifdef POSTCODE_DELAY
/* Wait much longer if postcodes are displayed for a delay period. */
i *= 10000;
#endif
+ DBG("start_cpu(%p) about to wait for cpu %d\n",
+ arg, psip->target_cpu);
+
mp_wait_for_cpu_up(psip->target_cpu, i*100, 100);
+
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPU_START | DBG_FUNC_END,
+ psip->target_cpu,
+ cpu_datap(psip->target_cpu)->cpu_running, 0, 0, 0);
+
if (TSC_sync_margin &&
cpu_datap(psip->target_cpu)->cpu_running) {
/*
cpu_data_t *cdp = current_cpu_datap();
assert(!ml_get_interrupts_enabled());
+
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_START,
+ 0, 0, 0, 0, 0);
simple_lock(&x86_topo_lock);
cdp->cpu_running = FALSE;
simple_unlock(&x86_topo_lock);
+ /*
+ * Move all of this cpu's timers to the master/boot cpu,
+ * and poke it in case there's a sooner deadline for it to schedule.
+ */
timer_queue_shutdown(&cdp->rtclock_timer.queue);
- cdp->rtclock_timer.deadline = EndOfAllTime;
mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, etimer_timer_expire, NULL);
/*
- * In case a rendezvous/braodcast/call was initiated to this cpu
- * before we cleared cpu_running, we must perform any actions due.
+ * Open an interrupt window
+ * and ensure any pending IPI or timer is serviced
*/
- if (i_bit(MP_RENDEZVOUS, &cdp->cpu_signals))
- mp_rendezvous_action();
- if (i_bit(MP_BROADCAST, &cdp->cpu_signals))
- mp_broadcast_action();
- if (i_bit(MP_CALL, &cdp->cpu_signals))
- mp_cpus_call_action();
- cdp->cpu_signals = 0; /* all clear */
+ mp_disable_preemption();
+ ml_set_interrupts_enabled(TRUE);
+
+ while (cdp->cpu_signals && x86_lcpu()->rtcDeadline != EndOfAllTime)
+ cpu_pause();
+ /*
+ * Ensure there's no remaining timer deadline set
+ * - AICPM may have left one active.
+ */
+ setPop(0);
+
+ ml_set_interrupts_enabled(FALSE);
+ mp_enable_preemption();
+
+ KERNEL_DEBUG_CONSTANT(
+ TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_END,
+ 0, 0, 0, 0, 0);
}
int pmsafe_debug = 1;
cpu_NMI_interrupt(cpu);
}
- DBG("mp_kdp_enter() %u processors done %s\n",
+ DBG("mp_kdp_enter() %d processors done %s\n",
(int)mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out");
postcode(MP_KDP_ENTER);
DBG("mp_kdp_wait()\n");
/* If an I/O port has been specified as a debugging aid, issue a read */
panic_io_port_read();
-
+ current_cpu_datap()->debugger_ipi_time = mach_absolute_time();
#if CONFIG_MCA
/* If we've trapped due to a machine-check, save MCA registers */
mca_check_save();
clock_init();
cpu_machine_init(); /* Interrupts enabled hereafter */
mp_cpus_call_cpu_init();
+ } else {
+ cpu_machine_init(); /* Interrupts enabled hereafter */
}
}
void
i386_start_cpu(int lapic_id, __unused int cpu_num )
{
- LAPIC_WRITE(ICRD, lapic_id << LAPIC_ICRD_DEST_SHIFT);
- LAPIC_WRITE(ICR, LAPIC_ICR_DM_INIT);
+ LAPIC_WRITE_ICR(lapic_id, LAPIC_ICR_DM_INIT);
delay(100);
-
- LAPIC_WRITE(ICRD, lapic_id << LAPIC_ICRD_DEST_SHIFT);
- LAPIC_WRITE(ICR, LAPIC_ICR_DM_STARTUP|(REAL_MODE_BOOTSTRAP_OFFSET>>12));
+ LAPIC_WRITE_ICR(lapic_id,
+ LAPIC_ICR_DM_STARTUP|(REAL_MODE_BOOTSTRAP_OFFSET>>12));
}
void
{
boolean_t state = ml_set_interrupts_enabled(FALSE);
/* Program the interrupt command register */
- LAPIC_WRITE(ICRD, cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT);
/* The vector is ignored in this case--the target CPU will enter on the
* NMI vector.
*/
- LAPIC_WRITE(ICR, LAPIC_VECTOR(INTERPROCESSOR)|LAPIC_ICR_DM_NMI);
+ LAPIC_WRITE_ICR(cpu_to_lapic[cpu],
+ LAPIC_VECTOR(INTERPROCESSOR)|LAPIC_ICR_DM_NMI);
(void) ml_set_interrupts_enabled(state);
}
volatile uint64_t tsc_base; /* timestamp */
volatile uint64_t ns_base; /* nanoseconds */
uint32_t scale; /* tsc -> nanosec multiplier */
- uint32_t shift; /* tsc -> nanosec shift/div */
- /* shift is overloaded with
- * lower 32bits of tsc_freq
- * on slower machines (SLOW_TSC_THRESHOLD) */
+ uint32_t shift; /* shift is nonzero only on "slow" machines, */
+ /* ie where tscFreq <= SLOW_TSC_THRESHOLD */
volatile uint32_t generation; /* 0 == being updated */
uint32_t spare1;
};
-
/*
* Copyright (c) 2009 Apple Inc. All rights reserved.
*
/* Include a PAL-specific header, too, for xnu-internal overrides */
#include <i386/pal_native.h>
+
extern boolean_t virtualized;
#define PAL_VIRTUALIZED_PROPERTY_VALUE 4
return fpu_set_fxstate(thr_act, tstate, flavor);
}
+ case x86_AVX_STATE:
+ {
+ x86_avx_state_t *state;
+
+ if (count != x86_AVX_STATE_COUNT)
+ return(KERN_INVALID_ARGUMENT);
+
+ state = (x86_avx_state_t *)tstate;
+ if (state->ash.flavor == x86_AVX_STATE64 &&
+ state->ash.count == x86_FLOAT_STATE64_COUNT &&
+ thread_is_64bit(thr_act)) {
+ return fpu_set_fxstate(thr_act,
+ (thread_state_t)&state->ufs.as64,
+ x86_FLOAT_STATE64);
+ }
+ if (state->ash.flavor == x86_FLOAT_STATE32 &&
+ state->ash.count == x86_FLOAT_STATE32_COUNT &&
+ !thread_is_64bit(thr_act)) {
+ return fpu_set_fxstate(thr_act,
+ (thread_state_t)&state->ufs.as32,
+ x86_FLOAT_STATE32);
+ }
+ return(KERN_INVALID_ARGUMENT);
+ }
+
case x86_THREAD_STATE32:
{
if (count != x86_THREAD_STATE32_COUNT)
break;
}
+ case THREAD_STATE_FLAVOR_LIST_10_9:
+ {
+ if (*count < 5)
+ return (KERN_INVALID_ARGUMENT);
+
+ tstate[0] = x86_THREAD_STATE;
+ tstate[1] = x86_FLOAT_STATE;
+ tstate[2] = x86_EXCEPTION_STATE;
+ tstate[3] = x86_DEBUG_STATE;
+ tstate[4] = x86_AVX_STATE;
+
+ *count = 5;
+ break;
+ }
+
case x86_SAVED_STATE32:
{
x86_saved_state32_t *state;
return(kret);
}
- case x86_AVX_STATE32:
- {
+ case x86_AVX_STATE32:
+ {
if (*count != x86_AVX_STATE32_COUNT)
return(KERN_INVALID_ARGUMENT);
*count = x86_AVX_STATE32_COUNT;
return fpu_get_fxstate(thr_act, tstate, flavor);
- }
+ }
- case x86_AVX_STATE64:
- {
+ case x86_AVX_STATE64:
+ {
if (*count != x86_AVX_STATE64_COUNT)
return(KERN_INVALID_ARGUMENT);
*count = x86_AVX_STATE64_COUNT;
return fpu_get_fxstate(thr_act, tstate, flavor);
- }
+ }
+
+ case x86_AVX_STATE:
+ {
+ x86_avx_state_t *state;
+ kern_return_t kret;
+
+ if (*count < x86_AVX_STATE_COUNT)
+ return(KERN_INVALID_ARGUMENT);
+
+ state = (x86_avx_state_t *)tstate;
+
+ bzero((char *)state, sizeof(x86_avx_state_t));
+ if (thread_is_64bit(thr_act)) {
+ state->ash.flavor = x86_AVX_STATE64;
+ state->ash.count = x86_AVX_STATE64_COUNT;
+ kret = fpu_get_fxstate(thr_act,
+ (thread_state_t)&state->ufs.as64,
+ x86_AVX_STATE64);
+ } else {
+ state->ash.flavor = x86_AVX_STATE32;
+ state->ash.count = x86_AVX_STATE32_COUNT;
+ kret = fpu_get_fxstate(thr_act,
+ (thread_state_t)&state->ufs.as32,
+ x86_AVX_STATE32);
+ }
+ *count = x86_AVX_STATE_COUNT;
+
+ return(kret);
+ }
case x86_THREAD_STATE32:
{
#include <kern/sched_prim.h>
#include <i386/lapic.h>
#include <i386/pal_routines.h>
-
#include <sys/kdebug.h>
extern int disableConsoleOutput;
#define DELAY_UNSET 0xFFFFFFFFFFFFFFFFULL
+uint64_t cpu_itime_bins[CPU_ITIME_BINS] = {16* NSEC_PER_USEC, 32* NSEC_PER_USEC, 64* NSEC_PER_USEC, 128* NSEC_PER_USEC, 256* NSEC_PER_USEC, 512* NSEC_PER_USEC, 1024* NSEC_PER_USEC, 2048* NSEC_PER_USEC, 4096* NSEC_PER_USEC, 8192* NSEC_PER_USEC, 16384* NSEC_PER_USEC, 32768* NSEC_PER_USEC};
+uint64_t *cpu_rtime_bins = &cpu_itime_bins[0];
+
/*
* The following is set when the KEXT loads and initializes.
*/
pmDispatch_t *pmDispatch = NULL;
-static uint32_t pmInitDone = 0;
+uint32_t pmInitDone = 0;
static boolean_t earlyTopology = FALSE;
static uint64_t earlyMaxBusDelay = DELAY_UNSET;
static uint64_t earlyMaxIntDelay = DELAY_UNSET;
(*pmDispatch->cstateInit)();
}
-#define CPU_ACTIVE_STAT_BIN_1 (500000)
-#define CPU_ACTIVE_STAT_BIN_2 (2000000)
-#define CPU_ACTIVE_STAT_BIN_3 (5000000)
-
-#define CPU_IDLE_STAT_BIN_1 (500000)
-#define CPU_IDLE_STAT_BIN_2 (2000000)
-#define CPU_IDLE_STAT_BIN_3 (5000000)
+static inline void machine_classify_interval(uint64_t interval, uint64_t *bins, uint64_t *binvals, uint32_t nbins) {
+ uint32_t i;
+ for (i = 0; i < nbins; i++) {
+ if (interval < binvals[i]) {
+ bins[i]++;
+ break;
+ }
+ }
+}
/*
* Called when the CPU is idle. It calls into the power management kext
void
machine_idle(void)
{
- cpu_data_t *my_cpu = current_cpu_datap();
- uint64_t ctime, rtime, itime;
+ cpu_data_t *my_cpu = current_cpu_datap();
+ uint64_t ctime, rtime, itime;
- if (my_cpu == NULL)
- goto out;
+ if (my_cpu == NULL)
+ goto out;
ctime = mach_absolute_time();
- my_cpu->lcpu.state = LCPU_IDLE;
- DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
- MARK_CPU_IDLE(cpu_number());
+ my_cpu->lcpu.state = LCPU_IDLE;
+ DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
+ MARK_CPU_IDLE(cpu_number());
rtime = ctime - my_cpu->cpu_ixtime;
my_cpu->cpu_rtime_total += rtime;
+ machine_classify_interval(rtime, &my_cpu->cpu_rtimes[0], &cpu_rtime_bins[0], CPU_RTIME_BINS);
+
+ if (pmInitDone) {
+ /*
+ * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay()
+ * were called prior to the CPU PM kext being registered. We do
+ * this here since we know at this point the values will be first
+ * used since idle is where the decisions using these values is made.
+ */
+ if (earlyMaxBusDelay != DELAY_UNSET)
+ ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF));
+
+ if (earlyMaxIntDelay != DELAY_UNSET)
+ ml_set_maxintdelay(earlyMaxIntDelay);
+ }
- if (rtime < CPU_ACTIVE_STAT_BIN_1)
- my_cpu->cpu_rtimes[0]++;
- else if (rtime < CPU_ACTIVE_STAT_BIN_2)
- my_cpu->cpu_rtimes[1]++;
- else if (rtime < CPU_ACTIVE_STAT_BIN_3)
- my_cpu->cpu_rtimes[2]++;
- else
- my_cpu->cpu_rtimes[3]++;
-
+ if (pmInitDone
+ && pmDispatch != NULL
+ && pmDispatch->MachineIdle != NULL)
+ (*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL);
+ else {
+ /*
+ * If no power management, re-enable interrupts and halt.
+ * This will keep the CPU from spinning through the scheduler
+ * and will allow at least some minimal power savings (but it
+ * cause problems in some MP configurations w.r.t. the APIC
+ * stopping during a GV3 transition).
+ */
+ pal_hlt();
+
+ /* Once woken, re-disable interrupts. */
+ pal_cli();
+ }
- if (pmInitDone) {
/*
- * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay()
- * were called prior to the CPU PM kext being registered. We do
- * this here since we know at this point the values will be first
- * used since idle is where the decisions using these values is made.
+ * Mark the CPU as running again.
*/
- if (earlyMaxBusDelay != DELAY_UNSET)
- ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF));
-
- if (earlyMaxIntDelay != DELAY_UNSET)
- ml_set_maxintdelay(earlyMaxIntDelay);
- }
-
- if (pmInitDone
- && pmDispatch != NULL
- && pmDispatch->MachineIdle != NULL)
- (*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL);
- else {
- /*
- * If no power management, re-enable interrupts and halt.
- * This will keep the CPU from spinning through the scheduler
- * and will allow at least some minimal power savings (but it
- * cause problems in some MP configurations w.r.t. the APIC
- * stopping during a GV3 transition).
- */
- pal_hlt();
-
- /* Once woken, re-disable interrupts. */
- pal_cli();
- }
-
- /*
- * Mark the CPU as running again.
- */
- MARK_CPU_ACTIVE(cpu_number());
- DBGLOG(cpu_handle, cpu_number(), MP_UNIDLE);
+ MARK_CPU_ACTIVE(cpu_number());
+ DBGLOG(cpu_handle, cpu_number(), MP_UNIDLE);
uint64_t ixtime = my_cpu->cpu_ixtime = mach_absolute_time();
- itime = ixtime - ctime;
+ my_cpu->cpu_idle_exits++;
- my_cpu->lcpu.state = LCPU_RUN;
+ itime = ixtime - ctime;
- if (itime < CPU_IDLE_STAT_BIN_1)
- my_cpu->cpu_itimes[0]++;
- else if (itime < CPU_IDLE_STAT_BIN_2)
- my_cpu->cpu_itimes[1]++;
- else if (itime < CPU_IDLE_STAT_BIN_3)
- my_cpu->cpu_itimes[2]++;
- else
- my_cpu->cpu_itimes[3]++;
+ my_cpu->lcpu.state = LCPU_RUN;
+ machine_classify_interval(itime, &my_cpu->cpu_itimes[0], &cpu_itime_bins[0], CPU_ITIME_BINS);
my_cpu->cpu_itime_total += itime;
- /*
- * Re-enable interrupts.
- */
- out:
- pal_sti();
+ /*
+ * Re-enable interrupts.
+ */
+out:
+ pal_sti();
}
/*
break;
case PM_HALT_NORMAL:
+ case PM_HALT_SLEEP:
default:
pal_cli();
(*pmDispatch->pmCPUHalt)();
/*
- * We've exited halt, so get the the CPU schedulable again.
+ * We've exited halt, so get the CPU schedulable again.
+ * - by calling the fast init routine for a slave, or
+ * - by returning if we're the master processor.
*/
- i386_init_slave_fast();
-
- panic("init_slave_fast returned");
+ if (cpup->cpu_number != master_cpu) {
+ i386_init_slave_fast();
+ panic("init_slave_fast returned");
+ }
} else
{
/*
pmInitDone = 1;
}
-static x86_lcpu_t *
+x86_lcpu_t *
pmGetLogicalCPU(int cpu)
{
return(cpu_to_lcpu(cpu));
}
-static x86_lcpu_t *
+x86_lcpu_t *
pmGetMyLogicalCPU(void)
{
cpu_data_t *cpup = current_cpu_datap();
/*
* Returns the root of the package tree.
*/
-static x86_pkg_t *
+x86_pkg_t *
pmGetPkgRoot(void)
{
return(x86_pkgs);
return(cpu_datap(cpu)->cpu_hibernate);
}
-static processor_t
+processor_t
pmLCPUtoProcessor(int lcpu)
{
return(cpu_datap(lcpu)->cpu_processor);
&& rtc_nanotime->generation != pal_rtc_nanotime_info.generation);
}
-static uint32_t
+uint32_t
pmTimerQueueMigrate(int target_cpu)
{
/* Call the etimer code to do this. */
}
if (cpuFuncs != NULL) {
+ if (pmDispatch) {
+ panic("Attempt to re-register power management interface--AICPM present in xcpm mode? %p->%p", pmDispatch, cpuFuncs);
+ }
+
pmDispatch = cpuFuncs;
if (earlyTopology
if (entry) {
(void)__sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);
}
- else {
- (void)__sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
- }
+ else {
+ uint32_t nidle = __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
+ if (nidle == topoParms.nLThreadsPerPackage) {
+ my_cpu->lcpu.package->package_idle_exits++;
+ }
+ }
}
void pmTimerRestore(void);
kern_return_t pmCPUExitHalt(int cpu);
kern_return_t pmCPUExitHaltToOff(int cpu);
+uint32_t pmTimerQueueMigrate(int);
#define PM_HALT_NORMAL 0 /* normal halt path */
#define PM_HALT_DEBUG 1 /* debug code wants to halt */
#define URGENCY_NOTIFICATION_ASSERT_NS (5 * 1000 * 1000)
extern uint64_t urgency_notification_assert_abstime_threshold;
+x86_lcpu_t *
+pmGetLogicalCPU(int cpu);
+x86_lcpu_t *
+pmGetMyLogicalCPU(void);
+processor_t
+pmLCPUtoProcessor(int lcpu);
+x86_pkg_t *
+pmGetPkgRoot(void);
+
+
/******************************************************************************
*
* All of the following are deprecated interfaces and no longer used.
set_cr3_raw(get_cr3_raw());
}
#endif
+extern int rdmsr64_carefully(uint32_t msr, uint64_t *val);
+extern int wrmsr64_carefully(uint32_t msr, uint64_t val);
#endif /* MACH_KERNEL_PRIVATE */
static inline void wbinvd(void)
* The implementation is in locore.s.
*/
extern int rdmsr_carefully(uint32_t msr, uint32_t *lo, uint32_t *hi);
-
__END_DECLS
#endif /* ASSEMBLER */
#define MSR_IA32_MPERF 0xE7
#define MSR_IA32_APERF 0xE8
-#define MSR_PMG_CST_CONFIG_CONTROL 0xe2
-
#define MSR_IA32_BBL_CR_CTL 0x119
#define MSR_IA32_SYSENTER_CS 0x174
#define MSR_IA32_MISC_ENABLE 0x1a0
-#define MSR_IA32_ENERGY_PERFORMANCE_BIAS 0x1b0
#define MSR_IA32_PACKAGE_THERM_STATUS 0x1b1
#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x1b2
#define MSR_IA32_PKG_POWER_SKU_UNIT 0x606
#define MSR_IA32_PKG_C2_RESIDENCY 0x60D
#define MSR_IA32_PKG_ENERGY_STATUS 0x611
-#define MSR_IA32_PRIMARY_PLANE_ENERY_STATUS 0x639
-#define MSR_IA32_SECONDARY_PLANE_ENERY_STATUS 0x641
+
+#define MSR_IA32_DDR_ENERGY_STATUS 0x619
+#define MSR_IA32_LLC_FLUSHED_RESIDENCY_TIMER 0x61D
+#define MSR_IA32_RING_PERF_STATUS 0x621
+
+#define MSR_IA32_PKG_C8_RESIDENCY 0x630
+#define MSR_IA32_PKG_C9_RESIDENCY 0x631
+#define MSR_IA32_PKG_C10_RESIDENCY 0x632
+
+#define MSR_IA32_PP0_ENERGY_STATUS 0x639
+#define MSR_IA32_PP1_ENERGY_STATUS 0x641
+#define MSR_IA32_IA_PERF_LIMIT_REASONS 0x690
+#define MSR_IA32_GT_PERF_LIMIT_REASONS 0x6B0
+
#define MSR_IA32_TSC_DEADLINE 0x6e0
#define MSR_IA32_EFER 0xC0000080
etimer_resync_deadlines();
}
-/*
- * tsc_to_nanoseconds:
- *
- * Basic routine to convert a raw 64 bit TSC value to a
- * 64 bit nanosecond value. The conversion is implemented
- * based on the scale factor and an implicit 32 bit shift.
- */
-static inline uint64_t
-_tsc_to_nanoseconds(uint64_t value)
-{
-#if defined(__i386__)
- asm volatile("movl %%edx,%%esi ;"
- "mull %%ecx ;"
- "movl %%edx,%%edi ;"
- "movl %%esi,%%eax ;"
- "mull %%ecx ;"
- "addl %%edi,%%eax ;"
- "adcl $0,%%edx "
- : "+A" (value)
- : "c" (pal_rtc_nanotime_info.scale)
- : "esi", "edi");
-#elif defined(__x86_64__)
- asm volatile("mul %%rcx;"
- "shrq $32, %%rax;"
- "shlq $32, %%rdx;"
- "orq %%rdx, %%rax;"
- : "=a"(value)
- : "a"(value), "c"(pal_rtc_nanotime_info.scale)
- : "rdx", "cc" );
-#else
-#error Unsupported architecture
-#endif
-
- return (value);
-}
-
static inline uint32_t
_absolutetime_to_microtime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *microsecs)
{
static inline uint64_t
rtc_nanotime_read(void)
{
-
-#if CONFIG_EMBEDDED
- if (gPEClockFrequencyInfo.timebase_frequency_hz > SLOW_TSC_THRESHOLD)
- return _rtc_nanotime_read(&rtc_nanotime_info, 1); /* slow processor */
- else
-#endif
- return _rtc_nanotime_read(&pal_rtc_nanotime_info, 0); /* assume fast processor */
+ return _rtc_nanotime_read(&pal_rtc_nanotime_info);
}
/*
assert(!ml_get_interrupts_enabled());
tsc = rdtsc64();
- oldnsecs = rntp->ns_base + _tsc_to_nanoseconds(tsc - rntp->tsc_base);
- newnsecs = base + _tsc_to_nanoseconds(tsc - tsc_base);
+ oldnsecs = rntp->ns_base + _rtc_tsc_to_nanoseconds(tsc - rntp->tsc_base, rntp);
+ newnsecs = base + _rtc_tsc_to_nanoseconds(tsc - tsc_base, rntp);
/*
* Only update the base values if time using the new base values
* rtc_sleep_wakeup:
*
* Invoked from power management when we have awoken from a sleep (S3)
- * and the TSC has been reset. The nanotime data is updated based on
- * the passed in value.
+ * and the TSC has been reset, or from Deep Idle (S0) sleep when the TSC
+ * has progressed. The nanotime data is updated based on the passed-in value.
*
* The caller must guarantee non-reentrancy.
*/
rtc_timer_init();
clock_timebase_init();
ml_init_lock_timeout();
- ml_init_delay_spin_threshold();
+ ml_init_delay_spin_threshold(10);
}
/* Set fixed configuration for lapic timers */
rtc_set_timescale(uint64_t cycles)
{
pal_rtc_nanotime_t *rntp = &pal_rtc_nanotime_info;
+ uint32_t shift = 0;
+
+ /* the "scale" factor will overflow unless cycles>SLOW_TSC_THRESHOLD */
+
+ while ( cycles <= SLOW_TSC_THRESHOLD) {
+ shift++;
+ cycles <<= 1;
+ }
+
+ if ( shift != 0 )
+ printf("Slow TSC, rtc_nanotime.shift == %d\n", shift);
+
rntp->scale = (uint32_t)(((uint64_t)NSEC_PER_SEC << 32) / cycles);
-#if CONFIG_EMBEDDED
- if (cycles <= SLOW_TSC_THRESHOLD)
- rntp->shift = (uint32_t)cycles;
- else
-#endif
- rntp->shift = 32;
+ rntp->shift = shift;
if (tsc_rebase_abs_time == 0)
tsc_rebase_abs_time = mach_absolute_time();
void
machine_delay_until(
- uint64_t deadline)
+ uint64_t interval,
+ uint64_t deadline)
{
- uint64_t now;
-
- do {
- cpu_pause();
- now = mach_absolute_time();
- } while (now < deadline);
+ (void)interval;
+ while (mach_absolute_time() < deadline) {
+ cpu_pause();
+ }
}
/*
* Assembly snippet included in exception handlers and rtc_nanotime_read()
+ *
+ *
+ * Warning! There are several copies of this code in the trampolines found in
+ * osfmk/x86_64/idt64.s, coming from the various TIMER macros in rtclock_asm.h.
+ * They're all kept in sync by using the RTC_NANOTIME_READ() macro.
+ *
+ * The algorithm we use is:
+ *
+ * ns = ((((rdtsc - rnt_tsc_base)<<rnt_shift)*rnt_tsc_scale) / 2**32) + rnt_ns_base;
+ *
+ * rnt_shift, a constant computed during initialization, is the smallest value for which:
+ *
+ * (tscFreq << rnt_shift) > SLOW_TSC_THRESHOLD
+ *
+ * Where SLOW_TSC_THRESHOLD is about 10e9. Since most processor's tscFreqs are greater
+ * than 1GHz, rnt_shift is usually 0. rnt_tsc_scale is also a 32-bit constant:
+ *
+ * rnt_tsc_scale = (10e9 * 2**32) / (tscFreq << rnt_shift);
+ *
* %rdi points to nanotime info struct.
* %rax returns nanotime
*/
rdtsc ; \
lfence ; \
shlq $32,%rdx ; \
+ movl RNT_SHIFT(%rdi),%ecx ; \
orq %rdx,%rax /* %rax := tsc */ ; \
subq RNT_TSC_BASE(%rdi),%rax /* tsc - tsc_base */ ; \
- xorq %rcx,%rcx ; \
+ shlq %cl,%rax ; \
movl RNT_SCALE(%rdi),%ecx ; \
mulq %rcx /* delta * scale */ ; \
shrdq $32,%rdx,%rax /* %rdx:%rax >>= 32 */ ; \
pal_rtc_nanotime_t *dst);
extern uint64_t _rtc_nanotime_read(
- pal_rtc_nanotime_t *rntp,
- int slow);
+ pal_rtc_nanotime_t *rntp);
+
+extern uint64_t _rtc_tsc_to_nanoseconds(
+ uint64_t value,
+ pal_rtc_nanotime_t *rntp);
extern void rtclock_intr(x86_saved_state_t *regs);
#include <mach/i386/syscall_sw.h>
#include <libkern/OSDebug.h>
-
+#include <i386/cpu_threads.h>
#include <machine/pal_routines.h>
extern void throttle_lowpri_io(int);
int ipl;
int cnum = cpu_number();
int itype = 0;
-
+
if (is_saved_state64(state) == TRUE) {
x86_saved_state64_t *state64;
interrupt_num = state32->trapno;
}
+ if (cpu_data_ptr[cnum]->lcpu.package->num_idle == topoParms.nLThreadsPerPackage)
+ cpu_data_ptr[cnum]->cpu_hwIntpexits[interrupt_num]++;
+
if (interrupt_num == (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_INTERPROCESSOR_INTERRUPT))
itype = 1;
else if (interrupt_num == (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_TIMER_INTERRUPT))
busFreq = EFI_FSB_frequency();
switch (cpuid_cpufamily()) {
+ case CPUFAMILY_INTEL_HASWELL:
case CPUFAMILY_INTEL_IVYBRIDGE:
case CPUFAMILY_INTEL_SANDYBRIDGE:
case CPUFAMILY_INTEL_WESTMERE:
}
kprintf(" BUS: Frequency = %6d.%06dMHz, "
- "cvtt2n = %08Xx.%08Xx, cvtn2t = %08Xx.%08Xx\n",
+ "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X\n",
(uint32_t)(busFreq / Mega),
(uint32_t)(busFreq % Mega),
(uint32_t)(busFCvtt2n >> 32), (uint32_t)busFCvtt2n,
tscFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / tscFCvtt2n;
kprintf(" TSC: Frequency = %6d.%06dMHz, "
- "cvtt2n = %08Xx.%08Xx, cvtn2t = %08Xx.%08Xx, gran = %lld%s\n",
+ "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, gran = %lld%s\n",
(uint32_t)(tscFreq / Mega),
(uint32_t)(tscFreq % Mega),
(uint32_t)(tscFCvtt2n >> 32), (uint32_t)tscFCvtt2n,
#define BASE_NHM_CLOCK_SOURCE 133333333ULL
#define IA32_PERF_STS 0x198
-#define SLOW_TSC_THRESHOLD 1000067800 /* TSC is too slow for regular nanotime() algorithm */
+#define SLOW_TSC_THRESHOLD 1000067800 /* if slower, nonzero shift required in nanotime() algorithm */
#ifndef ASSEMBLER
extern uint64_t busFCvtt2n;
if ( ml_delay_should_spin(interval) ||
get_preemption_level() != 0 ||
ml_get_interrupts_enabled() == FALSE ) {
- machine_delay_until(deadline);
+ machine_delay_until(interval, deadline);
} else {
assert_wait_deadline((event_t)clock_delay_until, THREAD_UNINT, deadline);
clock_sec_t *secs,
clock_usec_t *microsecs);
-extern void machine_delay_until(
+extern void machine_delay_until(uint64_t interval,
uint64_t deadline);
extern uint32_t hz_tick_interval;
}
/*
- * Called at splsched.
+ * Called with interrupts disabled.
*/
void
processor_doshutdown(
{
thread_t old_thread, self = current_thread();
processor_t prev;
+ processor_set_t pset;
/*
* Get onto the processor to shutdown
prev = thread_bind(processor);
thread_block(THREAD_CONTINUE_NULL);
-#if HIBERNATION
- if (processor_avail_count < 2)
- hibernate_vm_lock();
-#endif
-
assert(processor->state == PROCESSOR_SHUTDOWN);
+ ml_cpu_down();
+
#if HIBERNATION
- if (processor_avail_count < 2)
+ if (processor_avail_count < 2) {
+ hibernate_vm_lock();
hibernate_vm_unlock();
+ }
#endif
+ pset = processor->processor_set;
+ pset_lock(pset);
+ processor->state = PROCESSOR_OFF_LINE;
+ if (--pset->online_processor_count == 0) {
+ pset_pri_init_hint(pset, PROCESSOR_NULL);
+ pset_count_init_hint(pset, PROCESSOR_NULL);
+ }
+ (void)hw_atomic_sub(&processor_avail_count, 1);
+ commpage_update_active_cpus();
+ SCHED(processor_queue_shutdown)(processor);
+ /* pset lock dropped */
+
/*
* Continue processor shutdown in shutdown context.
*/
}
/*
- * Complete the shutdown and place the processor offline.
+ *Complete the shutdown and place the processor offline.
*
* Called at splsched in the shutdown context.
*/
processor_t processor)
{
thread_t new_thread, old_thread = processor->active_thread;
- processor_set_t pset;
new_thread = processor->idle_thread;
processor->active_thread = new_thread;
PMAP_DEACTIVATE_KERNEL(processor->cpu_id);
- pset = processor->processor_set;
- pset_lock(pset);
- processor->state = PROCESSOR_OFF_LINE;
- if (--pset->online_processor_count == 0) {
- pset_pri_init_hint(pset, PROCESSOR_NULL);
- pset_count_init_hint(pset, PROCESSOR_NULL);
- }
- (void)hw_atomic_sub(&processor_avail_count, 1);
- commpage_update_active_cpus();
- SCHED(processor_queue_shutdown)(processor);
- /* pset lock dropped */
-
- ml_cpu_down();
-
cpu_sleep();
panic("zombie processor");
/*NOTREACHED*/
int cpu_id,
processor_set_t pset)
{
+ spl_t s;
+
if (processor != master_processor) {
/* Scheduler state deferred until sched_init() */
SCHED(processor_init)(processor);
processor_data_init(processor);
processor->processor_list = NULL;
+ s = splsched();
pset_lock(pset);
if (pset->cpu_set_count++ == 0)
pset->cpu_set_low = pset->cpu_set_hi = cpu_id;
pset->cpu_set_hi = (cpu_id > pset->cpu_set_hi)? cpu_id: pset->cpu_set_hi;
}
pset_unlock(pset);
+ splx(s);
simple_lock(&processor_list_lock);
if (processor_list == NULL)
#include <pmc/pmc.h>
#endif
+#include <i386/pmCPU.h>
static void kernel_bootstrap_thread(void);
static void load_context(
/* size of kernel trace buffer, disabled by default */
unsigned int new_nkdbufs = 0;
+unsigned int wake_nkdbufs = 0;
/* mach leak logging */
int log_leaks = 0;
PE_parse_boot_argn("trace", &new_nkdbufs, sizeof (new_nkdbufs));
+ PE_parse_boot_argn("trace_wake", &wake_nkdbufs, sizeof (wake_nkdbufs));
+
/* i386_vm_init already checks for this ; do it aagin anyway */
if (PE_parse_boot_argn("serverperfmode", &serverperfmode, sizeof (serverperfmode))) {
serverperfmode = 1;
#if (defined(__i386__) || defined(__x86_64__))
if (turn_on_log_leaks && !new_nkdbufs)
new_nkdbufs = 200000;
- start_kern_tracing(new_nkdbufs);
+ start_kern_tracing(new_nkdbufs, FALSE);
if (turn_on_log_leaks)
log_leaks = 1;
#endif
#if (!defined(__i386__) && !defined(__x86_64__))
if (turn_on_log_leaks && !new_nkdbufs)
new_nkdbufs = 200000;
- start_kern_tracing(new_nkdbufs);
+ start_kern_tracing(new_nkdbufs, FALSE);
if (turn_on_log_leaks)
log_leaks = 1;
#endif
#ifndef _MACH_BRANCH_PREDICATES_H
#define _MACH_BRANCH_PREDICATES_H
-#define __probable(x) __builtin_expect((x), 1)
-#define __improbable(x) __builtin_expect((x), 0)
+#define __probable(x) __builtin_expect((long)(x), 1L)
+#define __improbable(x) __builtin_expect((long)(x), 0L)
#endif /* _MACH_BRANCH_PREDICATES_H */
#define x86_DEBUG_STATE64 11
#define x86_DEBUG_STATE 12
#define THREAD_STATE_NONE 13
-/* 15 and 16 are used for the internal x86_SAVED_STATE flavours */
+/* 14 and 15 are used for the internal x86_SAVED_STATE flavours */
#define x86_AVX_STATE32 16
#define x86_AVX_STATE64 17
+#define x86_AVX_STATE 18
/*
(x == x86_DEBUG_STATE) || \
(x == x86_AVX_STATE32) || \
(x == x86_AVX_STATE64) || \
+ (x == x86_AVX_STATE) || \
(x == THREAD_STATE_NONE))
struct x86_state_hdr {
} uds;
};
+struct x86_avx_state {
+ x86_state_hdr_t ash;
+ union {
+ x86_avx_state32_t as32;
+ x86_avx_state64_t as64;
+ } ufs;
+};
+
typedef struct x86_thread_state x86_thread_state_t;
#define x86_THREAD_STATE_COUNT ((mach_msg_type_number_t) \
( sizeof (x86_thread_state_t) / sizeof (int) ))
#define x86_DEBUG_STATE_COUNT ((mach_msg_type_number_t) \
(sizeof(x86_debug_state_t)/sizeof(unsigned int)))
+typedef struct x86_avx_state x86_avx_state_t;
+#define x86_AVX_STATE_COUNT ((mach_msg_type_number_t) \
+ (sizeof(x86_avx_state_t)/sizeof(unsigned int)))
+
/*
* Machine-independent way for servers and Mach's exception mechanism to
* choose the most efficient state flavor for exception RPC's:
* task_extmod_info_t (8 64-bit ints)
* task_basic_info_64_2_t
* mach_task_basic_info_t (12 ints)
+ * task_power_info_t (18 ints)
* If other task_info flavors are added, this
* definition may need to be changed. (See
* mach/task_info.h and mach/policy.h) */
* kernel_resource_sizes_t (5 ints)
* host_load_info_t (6 ints)
* vm_statistics32_t (15 ints)
+ * host_expired_task_info uses a task_power_info (18 ints)
*
* If other host_info flavors are added, this definition may
* need to be changed. (See mach/{host_info,vm_statistics}.h)
*/
type host_flavor_t = int;
-type host_info_t = array[*:15] of integer_t;
+type host_info_t = array[*:18] of integer_t;
/*
#define CPUFAMILY_INTEL_WESTMERE 0x573b5eec
#define CPUFAMILY_INTEL_SANDYBRIDGE 0x5490b78c
#define CPUFAMILY_INTEL_IVYBRIDGE 0x1f65e835
+#define CPUFAMILY_INTEL_HASWELL 0x10b282dc
#define CPUFAMILY_ARM_9 0xe73283ae
#define CPUFAMILY_ARM_11 0x8ff620d8
#define CPUFAMILY_ARM_XSCALE 0x53b005f5
#define THREAD_STATE_FLAVOR_LIST 0 /* List of valid flavors */
#define THREAD_STATE_FLAVOR_LIST_NEW 128
+#define THREAD_STATE_FLAVOR_LIST_10_9 129
typedef int thread_state_flavor_t;
typedef thread_state_flavor_t *thread_state_flavor_array_t;
vm_object_t object;
vm_object_offset_t offset;
vm_object_offset_t pg_offset;
- vm_map_entry_t entry;
+ vm_map_entry_t entry = NULL;
vm_map_offset_t map_addr, fill_start;
vm_map_offset_t map_mask;
vm_map_size_t map_size, fill_size;
export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
include $(MakeInc_cmd)
include $(MakeInc_def)
-EXPORT_ONLY_FILES =
-
-INSTALL_MD_DIR = x86_64
-
-INSTALL_MD_LIST =
-
-INSTALL_MD_LCL_LIST =
-
-EXPORT_MD_LIST = ${EXPORT_ONLY_FILES}
-
-EXPORT_MD_DIR = x86_64
include $(MakeInc_rule)
include $(MakeInc_dir)
movq $1, %rax
ret
+/*
+ * int rdmsr64_carefully(uint32_t msr, uint64_t *val);
+ */
+
+ENTRY(rdmsr64_carefully)
+ movl %edi, %ecx
+ RECOVERY_SECTION
+ RECOVER(rdmsr64_carefully_fail)
+ rdmsr
+ movl %eax, (%rsi)
+ movl %edx, 4(%rsi)
+ xorl %eax, %eax
+ ret
+rdmsr64_carefully_fail:
+ movl $1, %eax
+ ret
+/*
+ * int wrmsr64_carefully(uint32_t msr, uint64_t val);
+ */
+
+ENTRY(wrmsr_carefully)
+ movl %edi, %ecx
+ movl %esi, %eax
+ shr $32, %rsi
+ movl %esi, %edx
+ RECOVERY_SECTION
+ RECOVER(wrmsr_fail)
+ wrmsr
+ xorl %eax, %eax
+ ret
+wrmsr_fail:
+ movl $1, %eax
+ ret
+
.globl EXT(thread_exception_return)
.globl EXT(thread_bootstrap_return)
LEXT(thread_bootstrap_return)
ret
/*
- * unint64_t _rtc_nanotime_read(rtc_nanotime_t *rntp, int slow);
+ * uint64_t _rtc_nanotime_read(rtc_nanotime_t *rntp);
*
* This is the same as the commpage nanotime routine, except that it uses the
* kernel internal "rtc_nanotime_info" data instead of the commpage data.
* These two copies of data are kept in sync by rtc_clock_napped().
*
- * Warning! There is another copy of this code in osfmk/x86_64/idt64.s.
- * These are kept in sync by both using the RTC_NANOTIME_READ() macro.
+ * Warning! There are several copies of this code in the trampolines found in
+ * osfmk/x86_64/idt64.s, coming from the various TIMER macros in rtclock_asm.h.
+ * They're all kept in sync by using the RTC_NANOTIME_READ() macro.
*
- * There are two versions of this algorithm, for "slow" and "fast" processors.
- * The more common "fast" algorithm is:
+ * The algorithm we use is:
*
- * ns = (((rdtsc - rnt_tsc_base)*rnt_tsc_scale) / 2**32) + rnt_ns_base;
+ * ns = ((((rdtsc - rnt_tsc_base)<<rnt_shift)*rnt_tsc_scale) / 2**32) + rnt_ns_base;
*
- * Of course, the divide by 2**32 is a nop. rnt_tsc_scale is a constant
- * computed during initialization:
+ * rnt_shift, a constant computed during initialization, is the smallest value for which:
*
- * rnt_tsc_scale = (10e9 * 2**32) / tscFreq;
+ * (tscFreq << rnt_shift) > SLOW_TSC_THRESHOLD
*
- * The "slow" algorithm uses long division:
+ * Where SLOW_TSC_THRESHOLD is about 10e9. Since most processor's tscFreqs are greater
+ * than 1GHz, rnt_shift is usually 0. rnt_tsc_scale is also a 32-bit constant:
*
- * ns = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) + rnt_ns_base;
+ * rnt_tsc_scale = (10e9 * 2**32) / (tscFreq << rnt_shift);
+ *
+ * On 64-bit processors this algorithm could be simplified by doing a 64x64 bit
+ * multiply of rdtsc by tscFCvtt2n:
+ *
+ * ns = (((rdtsc - rnt_tsc_base) * tscFCvtt2n) / 2**32) + rnt_ns_base;
+ *
+ * We don't do so in order to use the same algorithm in 32- and 64-bit mode.
+ * When U32 goes away, we should reconsider.
*
* Since this routine is not synchronized and can be called in any context,
* we use a generation count to guard against seeing partially updated data.
* the generation is zero.
*
* unint64_t _rtc_nanotime_read(
- * rtc_nanotime_t *rntp, // %rdi
- * int slow); // %rsi
+ * rtc_nanotime_t *rntp); // %rdi
*
*/
ENTRY(_rtc_nanotime_read)
- test %rsi,%rsi
- jnz Lslow
-
- /*
- * Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD
- */
+
PAL_RTC_NANOTIME_READ_FAST()
ret
+
+/*
+ * extern uint64_t _rtc_tsc_to_nanoseconds(
+ * uint64_t value, // %rdi
+ * pal_rtc_nanotime_t *rntp); // %rsi
+ *
+ * Converts TSC units to nanoseconds, using an abbreviated form of the above
+ * algorithm. Note that while we could have simply used tmrCvt(value,tscFCvtt2n),
+ * which would avoid the need for this asm, doing so is a bit more risky since
+ * we'd be using a different algorithm with possibly different rounding etc.
+ */
- /*
- * Processor whose TSC frequency is not faster than SLOW_TSC_THRESHOLD
- * But K64 doesn't support this...
- */
-Lslow:
- lea 1f(%rip),%rdi
- xorb %al,%al
- call EXT(panic)
- hlt
- .data
-1: String "_rtc_nanotime_read() - slow algorithm not supported"
- .text
+ENTRY(_rtc_tsc_to_nanoseconds)
+ movq %rdi,%rax /* copy value (in TSC units) to convert */
+ movl RNT_SHIFT(%rsi),%ecx
+ movl RNT_SCALE(%rsi),%edx
+ shlq %cl,%rax /* tscUnits << shift */
+ mulq %rdx /* (tscUnits << shift) * scale */
+ shrdq $32,%rdx,%rax /* %rdx:%rax >>= 32 */
+ ret
+
+
Entry(call_continuation)
movq %rdi,%rcx /* get continuation */
void
pmap_cpu_init(void)
{
+ cpu_data_t *cdp = current_cpu_datap();
/*
* Here early in the life of a processor (from cpu_mode_init()).
* Ensure global page feature is disabled at this point.
/*
* Initialize the per-cpu, TLB-related fields.
*/
- current_cpu_datap()->cpu_kernel_cr3 = kernel_pmap->pm_cr3;
- current_cpu_datap()->cpu_active_cr3 = kernel_pmap->pm_cr3;
- current_cpu_datap()->cpu_tlb_invalid = FALSE;
- current_cpu_datap()->cpu_task_map = TASK_MAP_64BIT;
+ cdp->cpu_kernel_cr3 = kernel_pmap->pm_cr3;
+ cdp->cpu_active_cr3 = kernel_pmap->pm_cr3;
+ cdp->cpu_tlb_invalid = FALSE;
+ cdp->cpu_task_map = TASK_MAP_64BIT;
pmap_pcid_configure();
if (cpuid_leaf7_features() & CPUID_LEAF7_FEATURE_SMEP) {
boolean_t nsmep;
pmap_smep_enabled = TRUE;
}
}
+
+ if (cdp->cpu_fixed_pmcs_enabled) {
+ boolean_t enable = TRUE;
+ cpu_pmc_control(&enable);
+ }
}
void serial_putc( char c )
{
uart_putc(c);
- if (c == '\n') uart_putc('\r');
}
int serial_getc( void )