]> git.saurik.com Git - apple/xnu.git/commitdiff
xnu-2050.48.11.tar.gz mac-os-x-1085 v2050.48.11
authorApple <opensource@apple.com>
Wed, 9 Oct 2013 15:39:01 +0000 (15:39 +0000)
committerApple <opensource@apple.com>
Wed, 9 Oct 2013 15:39:01 +0000 (15:39 +0000)
91 files changed:
bsd/dev/i386/sysctl.c
bsd/dev/random/randomdev.c
bsd/hfs/hfs_cnode.c
bsd/hfs/hfs_hotfiles.c
bsd/hfs/hfs_readwrite.c
bsd/kern/kdebug.c
bsd/kern/kern_mib.c
bsd/kern/kern_symfile.c
bsd/kern/trace.codes
bsd/net/dlil.c
bsd/netinet/igmp.c
bsd/netinet/in_pcb.c
bsd/netinet/in_pcb.h
bsd/sys/cprotect.h
bsd/sys/kdebug.h
config/MasterVersion
config/Private.x86_64.exports
iokit/IOKit/IOHibernatePrivate.h
iokit/IOKit/IOPolledInterface.h
iokit/IOKit/IOService.h
iokit/IOKit/pwr_mgt/IOPM.h
iokit/IOKit/pwr_mgt/IOPMPrivate.h
iokit/Kernel/IOHibernateIO.cpp
iokit/Kernel/IOHibernateInternal.h
iokit/Kernel/IOHibernateRestoreKernel.c
iokit/Kernel/IOMemoryDescriptor.cpp
iokit/Kernel/IOPMrootDomain.cpp
iokit/Kernel/IOService.cpp
iokit/Kernel/IOServicePM.cpp
iokit/Kernel/IOServicePMPrivate.h
kgmacros
libkern/c++/OSMetaClass.cpp
libsyscall/wrappers/__get_cpu_capabilities.s
osfmk/conf/files.x86_64
osfmk/i386/AT386/model_dep.c
osfmk/i386/Diagnostics.c
osfmk/i386/Diagnostics.h
osfmk/i386/acpi.c
osfmk/i386/acpi.h
osfmk/i386/commpage/commpage.c
osfmk/i386/commpage/commpage.h
osfmk/i386/commpage/fifo_queues.s
osfmk/i386/commpage/pthreads.s
osfmk/i386/cpu.c
osfmk/i386/cpu_capabilities.h
osfmk/i386/cpu_data.h
osfmk/i386/cpu_topology.h
osfmk/i386/cpuid.c
osfmk/i386/cpuid.h
osfmk/i386/etimer.c
osfmk/i386/fpu.c
osfmk/i386/genassym.c
osfmk/i386/hibernate_restore.c
osfmk/i386/i386_init.c
osfmk/i386/lapic.c
osfmk/i386/lapic.h
osfmk/i386/lapic_native.c
osfmk/i386/machine_check.c
osfmk/i386/machine_routines.c
osfmk/i386/machine_routines.h
osfmk/i386/misc_protos.h
osfmk/i386/mp.c
osfmk/i386/mp_native.c
osfmk/i386/pal_native.h
osfmk/i386/pal_routines.h
osfmk/i386/pcb.c
osfmk/i386/pmCPU.c
osfmk/i386/pmCPU.h
osfmk/i386/proc_reg.h
osfmk/i386/rtclock.c
osfmk/i386/rtclock_asm_native.h
osfmk/i386/rtclock_protos.h
osfmk/i386/trap.c
osfmk/i386/tsc.c
osfmk/i386/tsc.h
osfmk/kern/clock.c
osfmk/kern/clock.h
osfmk/kern/machine.c
osfmk/kern/processor.c
osfmk/kern/startup.c
osfmk/mach/branch_predicates.h
osfmk/mach/i386/thread_status.h
osfmk/mach/mach_types.defs
osfmk/mach/machine.h
osfmk/mach/thread_status.h
osfmk/vm/vm_kern.c
osfmk/x86_64/Makefile
osfmk/x86_64/locore.s
osfmk/x86_64/machine_routines_asm.s
osfmk/x86_64/pmap.c
pexpert/i386/pe_serial.c

index ddc9503e3e9420a6499954209586ba15e2f32fe3..f971730756e2460f084c997f587b9543164c1b3a 100644 (file)
 #include <i386/cpuid.h>
 #include <i386/tsc.h>
 #include <i386/machine_routines.h>
+#include <i386/pal_routines.h>
 #include <i386/ucode.h>
 #include <kern/clock.h>
 #include <libkern/libkern.h>
 #include <i386/lapic.h>
+#include <i386/pmCPU.h>
+
 
 static int
 _i386_cpu_info SYSCTL_HANDLER_ARGS
@@ -730,7 +733,30 @@ SYSCTL_QUAD(_machdep_memmap, OID_AUTO, Other, CTLFLAG_RD|CTLFLAG_LOCKED, &firmwa
 
 SYSCTL_NODE(_machdep, OID_AUTO, tsc, CTLFLAG_RD|CTLFLAG_LOCKED, NULL, "Timestamp counter parameters");
 
-SYSCTL_QUAD(_machdep_tsc, OID_AUTO, frequency, CTLFLAG_RD|CTLFLAG_LOCKED, &tscFreq, "");
+SYSCTL_QUAD(_machdep_tsc, OID_AUTO, frequency,
+       CTLFLAG_RD|CTLFLAG_LOCKED, &tscFreq, "");
+
+extern uint32_t deep_idle_rebase;
+SYSCTL_UINT(_machdep_tsc, OID_AUTO, deep_idle_rebase,
+       CTLFLAG_RW|CTLFLAG_KERN|CTLFLAG_LOCKED, &deep_idle_rebase, 0, "");
+
+SYSCTL_NODE(_machdep_tsc, OID_AUTO, nanotime,
+       CTLFLAG_RD|CTLFLAG_LOCKED, NULL, "TSC to ns conversion");
+SYSCTL_QUAD(_machdep_tsc_nanotime, OID_AUTO, tsc_base,
+       CTLFLAG_RD | CTLFLAG_LOCKED,
+       (uint64_t *) &pal_rtc_nanotime_info.tsc_base, "");
+SYSCTL_QUAD(_machdep_tsc_nanotime, OID_AUTO, ns_base,
+       CTLFLAG_RD | CTLFLAG_LOCKED,
+       (uint64_t *)&pal_rtc_nanotime_info.ns_base, "");
+SYSCTL_UINT(_machdep_tsc_nanotime, OID_AUTO, scale,
+       CTLFLAG_RD | CTLFLAG_LOCKED,
+       (uint32_t *)&pal_rtc_nanotime_info.scale, 0, "");
+SYSCTL_UINT(_machdep_tsc_nanotime, OID_AUTO, shift,
+       CTLFLAG_RD | CTLFLAG_LOCKED,
+       (uint32_t *)&pal_rtc_nanotime_info.shift, 0, "");
+SYSCTL_UINT(_machdep_tsc_nanotime, OID_AUTO, generation,
+       CTLFLAG_RD | CTLFLAG_LOCKED,
+       (uint32_t *)&pal_rtc_nanotime_info.generation, 0, "");
 
 SYSCTL_NODE(_machdep, OID_AUTO, misc, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
        "Miscellaneous x86 kernel parameters");
index c081bd20904ca64b7dc49b091f1cb734eac98ad6..c29e9f877fec0e9672e4ef05787cbc08ffff21b6 100644 (file)
@@ -56,6 +56,7 @@
 #include <dev/random/YarrowCoreLib/include/yarrow.h>
 
 #include <libkern/OSByteOrder.h>
+#include <libkern/OSAtomic.h>
 
 #include <mach/mach_time.h>
 #include <machine/machine_routines.h>
@@ -101,13 +102,14 @@ static struct cdevsw random_cdevsw =
 
 
 /* Used to detect whether we've already been initialized */
-static int gRandomInstalled = 0;
+static UInt8 gRandomInstalled = 0;
 static PrngRef gPrngRef;
 static int gRandomError = 1;
 static lck_grp_t *gYarrowGrp;
 static lck_attr_t *gYarrowAttr;
 static lck_grp_attr_t *gYarrowGrpAttr;
 static lck_mtx_t *gYarrowMutex = 0;
+static UInt8 gYarrowInitializationLock = 0;
 
 #define RESEED_TICKS 50 /* how long a reseed operation can take */
 
@@ -307,6 +309,27 @@ PreliminarySetup(void)
 {
     prng_error_status perr;
 
+       /* Multiple threads can enter this as a result of an earlier
+        * check of gYarrowMutex.  We make sure that only one of them
+        * can enter at a time.  If one of them enters and discovers
+        * that gYarrowMutex is no longer NULL, we know that another
+        * thread has initialized the Yarrow state and we can exit.
+        */
+       
+       /* The first thread that enters this function will find
+        * gYarrowInitializationLock set to 0.  It will atomically
+        * set the value to 1 and, seeing that it was zero, drop
+        * out of the loop.  Other threads will see that the value is
+        * 1 and continue to loop until we are initialized.
+     */
+
+       while (OSTestAndSet(0, &gYarrowInitializationLock)); /* serialize access to this function */
+       
+       if (gYarrowMutex) {
+               /*  we've already been initialized, clear and get out */
+               goto function_exit;
+       }
+
     /* create a Yarrow object */
     perr = prngInitialize(&gPrngRef);
     if (perr != 0) {
@@ -321,6 +344,8 @@ PreliminarySetup(void)
     char buffer [16];
 
     /* get a little non-deterministic data as an initial seed. */
+       /* On OSX, securityd will add much more entropy as soon as it */
+       /* comes up.  On iOS, entropy is added with each system interrupt. */
     microtime(&tt);
 
     /*
@@ -334,7 +359,7 @@ PreliminarySetup(void)
     if (perr != 0) {
         /* an error, complain */
         printf ("Couldn't seed Yarrow.\n");
-        return;
+        goto function_exit;
     }
     
     /* turn the data around */
@@ -350,6 +375,10 @@ PreliminarySetup(void)
     gYarrowMutex   = lck_mtx_alloc_init(gYarrowGrp, gYarrowAttr);
        
        fips_initialize ();
+
+function_exit:
+       /* allow other threads to figure out whether or not we have been initialized. */
+       gYarrowInitializationLock = 0;
 }
 
 const Block kKnownAnswer = {0x92, 0xb4, 0x04, 0xe5, 0x56, 0x58, 0x8c, 0xed, 0x6c, 0x1a, 0xcd, 0x4e, 0xbf, 0x05, 0x3f, 0x68, 0x09, 0xf7, 0x3a, 0x93};
@@ -384,14 +413,11 @@ random_init(void)
 {
        int ret;
 
-       if (gRandomInstalled)
+       if (OSTestAndSet(0, &gRandomInstalled)) {
+               /* do this atomically so that it works correctly with
+                multiple threads */
                return;
-
-       /* install us in the file system */
-       gRandomInstalled = 1;
-
-       /* setup yarrow and the mutex */
-       PreliminarySetup();
+       }
 
        ret = cdevsw_add(RANDOM_MAJOR, &random_cdevsw);
        if (ret < 0) {
@@ -409,6 +435,9 @@ random_init(void)
         */
        devfs_make_node(makedev (ret, 1), DEVFS_CHAR,
                UID_ROOT, GID_WHEEL, 0666, "urandom", 0);
+
+       /* setup yarrow and the mutex if needed*/
+       PreliminarySetup();
 }
 
 int
index d507342861faa4bcc5e98446b92aa1f0134dd50d..65f2825d058fda86300b9b80c076fbf42b26ab0f 100644 (file)
@@ -312,20 +312,31 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) {
                (cp->c_flag & C_DELETED) &&
                ((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) {
 
-               /* Start a transaction here.  We're about to change file sizes */
-               if (started_tr == 0) {
-                       if (hfs_start_transaction(hfsmp) != 0) {
-                               error = EINVAL;
-                               goto out;
-                       }
-                       else {
-                               started_tr = 1;
-                       }
-               }
-       
                /* Truncate away our own fork data. (Case A, B, C above) */
                if (VTOF(vp)->ff_blocks != 0) {
-                       
+
+                       /* 
+                        * SYMLINKS only:
+                        *
+                        * Encapsulate the entire change (including truncating the link) in 
+                        * nested transactions if we are modifying a symlink, because we know that its
+                        * file length will be at most 4k, and we can fit both the truncation and 
+                        * any relevant bitmap changes into a single journal transaction.  We also want
+                        * the kill_block code to execute in the same transaction so that any dirty symlink
+                        * blocks will not be written. Otherwise, rely on
+                        * hfs_truncate doing its own transactions to ensure that we don't blow up
+                        * the journal.
+                        */ 
+                       if ((started_tr == 0) && (v_type == VLNK)) {
+                               if (hfs_start_transaction(hfsmp) != 0) {
+                                       error = EINVAL;
+                                       goto out;
+                               }
+                               else {
+                                       started_tr = 1;
+                               }
+                       }
+
                        /*
                         * At this point, we have decided that this cnode is
                         * suitable for full removal.  We are about to deallocate
@@ -348,20 +359,23 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) {
                        if (hfsmp->jnl && vnode_islnk(vp)) {
                                buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp);
                        }
-
+       
                        /*
-                        * Since we're already inside a transaction,
-                        * tell hfs_truncate to skip the ubc_setsize.
-                        *
                         * This truncate call (and the one below) is fine from VNOP_RECLAIM's 
                         * context because we're only removing blocks, not zero-filling new 
                         * ones.  The C_DELETED check above makes things much simpler. 
                         */
-                       error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, 0, ctx);
+                       error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 0, 0, ctx);
                        if (error) {
                                goto out;
                        }
                        truncated = 1;
+
+                       /* (SYMLINKS ONLY): Close/End our transaction after truncating the file record */
+                       if (started_tr) {
+                               hfs_end_transaction(hfsmp);
+                               started_tr = 0;
+                       }
                }
                
                /* 
@@ -369,7 +383,9 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) {
                 * it is the last fork.  That means, by definition, the rsrc fork is not in 
                 * core.  To avoid bringing a vnode into core for the sole purpose of deleting the
                 * data in the resource fork, we call cat_lookup directly, then hfs_release_storage
-                * to get rid of the resource fork's data. 
+                * to get rid of the resource fork's data. Note that because we are holding the 
+                * cnode lock, it is impossible for a competing thread to create the resource fork
+                * vnode from underneath us while we do this.
                 * 
                 * This is invoked via case A above only.
                 */
@@ -441,12 +457,6 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) {
                         */
                        cp->c_blocks = 0;
                }
-
-               /* End the transaction from the start of the file truncation segment */
-               if (started_tr) {
-                       hfs_end_transaction(hfsmp);
-                       started_tr = 0;
-               }
        }
        
        /*
index 7ebb82bc6a569f88cc22f98a585eb97d3a970796..50a29e2237043c65ef6550acf33a1e38feac277c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2013 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -207,8 +207,8 @@ hfs_recording_start(struct hfsmount *hfsmp)
                    (SWAP_BE32 (hotfileinfo.timeleft) > 0) &&
                    (SWAP_BE32 (hotfileinfo.timebase) > 0)) {
                        hfsmp->hfc_maxfiles = SWAP_BE32 (hotfileinfo.maxfilecnt);
-                       hfsmp->hfc_timeout = SWAP_BE32 (hotfileinfo.timeleft) + tv.tv_sec ;
                        hfsmp->hfc_timebase = SWAP_BE32 (hotfileinfo.timebase);
+                       hfsmp->hfc_timeout = SWAP_BE32 (hotfileinfo.timeleft) + tv.tv_sec ;
                        /* Fix up any bogus timebase values. */
                        if (hfsmp->hfc_timebase < HFC_MIN_BASE_TIME) {
                                hfsmp->hfc_timebase = hfsmp->hfc_timeout - HFC_DEFAULT_DURATION;
@@ -792,7 +792,8 @@ hfs_addhotfile_internal(struct vnode *vp)
        if (hfsmp->hfc_stage != HFC_RECORDING)
                return (0);
 
-       if ((!vnode_isreg(vp) && !vnode_islnk(vp)) || vnode_issystem(vp)) {
+       /* Only regular files are allowed for hotfile inclusion ; symlinks disallowed */
+       if ((!vnode_isreg(vp)) || vnode_issystem(vp)) {
                return (0);
        }
        /* Skip resource forks for now. */
@@ -862,7 +863,8 @@ hfs_removehotfile(struct vnode *vp)
        if (hfsmp->hfc_stage != HFC_RECORDING)
                return (0);
 
-       if ((!vnode_isreg(vp) && !vnode_islnk(vp)) || vnode_issystem(vp)) {
+       /* Only regular files can move out of hotfiles */
+       if ((!vnode_isreg(vp)) || vnode_issystem(vp)) {
                return (0);
        }
 
@@ -904,7 +906,7 @@ out:
 static int
 hotfiles_collect_callback(struct vnode *vp, __unused void *cargs)
 {
-        if ((vnode_isreg(vp) || vnode_islnk(vp)) && !vnode_issystem(vp))
+        if ((vnode_isreg(vp)) && !vnode_issystem(vp))
                (void) hfs_addhotfile_internal(vp);
 
        return (VNODE_RETURNED);
@@ -1138,7 +1140,9 @@ hotfiles_adopt(struct hfsmount *hfsmp)
                        }
                        break;
                }
-               if (!vnode_isreg(vp) && !vnode_islnk(vp)) {
+
+               /* only regular files are eligible */
+               if (!vnode_isreg(vp)) { 
                        printf("hfs: hotfiles_adopt: huh, not a file %d (%d)\n", listp->hfl_hotfile[i].hf_fileid, VTOC(vp)->c_cnid);
                        hfs_unlock(VTOC(vp));
                        vnode_put(vp);
@@ -1361,7 +1365,9 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx)
                        }
                        break;
                }
-               if (!vnode_isreg(vp) && !vnode_islnk(vp)) {
+
+               /* only regular files are eligible */
+               if (!vnode_isreg(vp)) {
                        printf("hfs: hotfiles_evict: huh, not a file %d\n", key->fileID);
                        hfs_unlock(VTOC(vp));
                        vnode_put(vp);
index f0b91b94a76f464fa9e8a2f2a573686caa217bbb..b9bcdd03624b00d3ef34914a7f78328eeaa3390a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -4391,7 +4391,8 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
        enum vtype vnodetype;
 
        vnodetype = vnode_vtype(vp);
-       if (vnodetype != VREG && vnodetype != VLNK) {
+       if (vnodetype != VREG) {
+               /* Note symlinks are not allowed to be relocated */
                return (EPERM);
        }
        
@@ -4424,8 +4425,7 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
        if (blockHint == 0)
                blockHint = hfsmp->nextAllocation;
 
-       if ((fp->ff_size > 0x7fffffff) ||
-           ((fp->ff_size > blksize) && vnodetype == VLNK)) {
+       if ((fp->ff_size > 0x7fffffff)) {
                return (EFBIG);
        }
 
index b25b3f9d477570d3e7abd3fd4b8972be0eba2480..79896dbbe87b5bed869cd7129585408b9ae6a45f 100644 (file)
@@ -2459,13 +2459,15 @@ error_exit:
 }
 
 void
-start_kern_tracing(unsigned int new_nkdbufs) {
+start_kern_tracing(unsigned int new_nkdbufs, boolean_t need_map) {
 
        if (!new_nkdbufs)
                return;
        nkdbufs = kdbg_set_nkdbufs(new_nkdbufs);
        kdbg_lock_init();
        kdbg_reinit(TRUE);
+    if (need_map == TRUE)
+       kdbg_mapinit();
        kdbg_set_tracing_enabled(TRUE, KDEBUG_ENABLE_TRACE);
 
 #if defined(__i386__) || defined(__x86_64__)
index 7c27eb16dd9fd4aa6fb117acd1e32dfed7ba9941..497ab5a440bc9235ae49c40b181aa937c86785c1 100644 (file)
@@ -406,35 +406,38 @@ SYSCTL_PROC(_hw, HW_L3SETTINGS,   l3settings, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG
 SYSCTL_INT (_hw, OID_AUTO, cputhreadtype, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, &cputhreadtype, 0, "");
 
 #if defined(__i386__) || defined(__x86_64__)
-int mmx_flag = -1;
-int sse_flag = -1;
-int sse2_flag = -1;
-int sse3_flag = -1;
-int sse4_1_flag = -1;
-int sse4_2_flag = -1;
-int x86_64_flag = -1;
-int supplementalsse3_flag = -1;
-int aes_flag = -1;
-int avx1_0_flag = -1;
-int rdrand_flag = -1;
-int f16c_flag = -1;
-int enfstrg_flag = -1;
-
-SYSCTL_INT(_hw_optional, OID_AUTO, mmx, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &mmx_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, sse, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &sse_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, sse2, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &sse2_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, sse3, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &sse3_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, supplementalsse3, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &supplementalsse3_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, sse4_1, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &sse4_1_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, sse4_2, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &sse4_2_flag, 0, "");
+static int
+sysctl_cpu_capability
+(__unused struct sysctl_oid *oidp, void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+       uint64_t        mask = (uint64_t) (uintptr_t) arg1;
+       boolean_t       is_capable = (_get_cpu_capabilities() & mask) != 0;
+       return SYSCTL_OUT(req, &is_capable, sizeof(is_capable));
+
+}
+
+SYSCTL_PROC(_hw_optional, OID_AUTO, mmx,       CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasMMX, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, sse,       CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, sse2,      CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE2, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, sse3,      CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE3, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, supplementalsse3,  CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSupplementalSSE3, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, sse4_1,    CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE4_1, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, sse4_2,    CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSSE4_2, 0, sysctl_cpu_capability, "I", "");
 /* "x86_64" is actually a preprocessor symbol on the x86_64 kernel, so we have to hack this */
 #undef x86_64
-SYSCTL_INT(_hw_optional, OID_AUTO, x86_64, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &x86_64_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, aes, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &aes_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, avx1_0, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &avx1_0_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, rdrand, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &rdrand_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, f16c, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &f16c_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, enfstrg, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &enfstrg_flag, 0, "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, x86_64,    CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) k64Bit, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, aes,       CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAES, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, avx1_0,    CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX1_0, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, rdrand,    CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasRDRAND, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, f16c,      CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasF16C, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, enfstrg,   CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasENFSTRG, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, fma,       CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasFMA, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, avx2_0,    CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX2_0, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, bmi1,      CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasBMI1, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, bmi2,      CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasBMI2, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, rtm,       CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasRTM, 0, sysctl_cpu_capability, "I", "");
+SYSCTL_PROC(_hw_optional, OID_AUTO, hle,       CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasHLE, 0, sysctl_cpu_capability, "I", "");
 #else
 #error Unsupported arch
 #endif /* !__i386__ && !__x86_64 && !__arm__ */
@@ -498,21 +501,6 @@ sysctl_mib_init(void)
        }
 
 #if defined (__i386__) || defined (__x86_64__)
-#define is_capability_set(k) (((_get_cpu_capabilities() & (k)) == (k)) ? 1 : 0)
-       mmx_flag                = is_capability_set(kHasMMX);
-       sse_flag                = is_capability_set(kHasSSE);
-       sse2_flag               = is_capability_set(kHasSSE2);
-       sse3_flag               = is_capability_set(kHasSSE3);
-       supplementalsse3_flag   = is_capability_set(kHasSupplementalSSE3);
-       sse4_1_flag             = is_capability_set(kHasSSE4_1);
-       sse4_2_flag             = is_capability_set(kHasSSE4_2);
-       x86_64_flag             = is_capability_set(k64Bit);
-       aes_flag                = is_capability_set(kHasAES);
-       avx1_0_flag             = is_capability_set(kHasAVX1_0);
-       rdrand_flag             = is_capability_set(kHasRDRAND);
-       f16c_flag               = is_capability_set(kHasF16C);
-       enfstrg_flag            = is_capability_set(kHasENFSTRG);
-
        /* hw.cpufamily */
        cpufamily = cpuid_cpufamily();
 
index 1636b4ab58b9414986fc19e36a933318c9df5a3b..2e1965dfd0685a2da4a5d60b55394a27f8f0ef10 100644 (file)
@@ -203,7 +203,7 @@ kern_open_file_for_direct_io(const char * name,
     int                                isssd = 0;
     uint32_t                    flags = 0;
     uint32_t                   blksize;
-    off_t                      maxiocount, count;
+    off_t                      maxiocount, count, segcount;
     boolean_t                   locked = FALSE;
 
     int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
@@ -406,14 +406,20 @@ kern_open_file_for_direct_io(const char * name,
         maxiocount = count;
 
     error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTREAD, (caddr_t) &count);
+    if (!error)
+       error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTREAD, (caddr_t) &segcount);
     if (error)
-        count = 0;
+        count = segcount = 0;
+    count *= segcount;
     if (count && (count < maxiocount))
         maxiocount = count;
 
     error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, (caddr_t) &count);
+    if (!error)
+       error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTWRITE, (caddr_t) &segcount);
     if (error)
-        count = 0;
+        count = segcount = 0;
+    count *= segcount;
     if (count && (count < maxiocount))
         maxiocount = count;
 
index af5a27b19a90a6b12868f274960f158f11428bdc..7bfcacf18519dccd3836cf47120bb2612a540dc8 100644 (file)
 0x1400058      MACH_SCHED_REDISPATCH
 0x140005C      MACH_SCHED_REMOTE_AST
 0x1400060      MACH_SCHED_LPA_BROKEN
+0x1400064      MACH_DEEP_IDLE
 0x1500000      MACH_MSGID_INVALID
 0x1600000      MTX_SLEEP
 0x1600004      MTX_SLEEP_DEADLINE
 0x1700020      PMAP_flush_TLBS
 0x1700024      PMAP_update_interrupt
 0x1700028      PMAP_attribute_clear
+0x1900000      MP_TLB_FLUSH
+0x1900004      MP_CPUS_CALL
+0x1900008      MP_CPUS_CALL_LOCAL
+0x190000c      MP_CPUS_CALL_ACTION
+0x1900010      MP_CPUS_CALL_NOBUF
+0x1900014      MP_CPU_FAST_START
+0x1900018      MP_CPU_START
+0x190001c      MP_CPU_DEACTIVATE
 0x2010000      L_IP_In_Beg
 0x2010004      L_IP_Out_Beg
 0x2010008      L_IP_In_End
 0x53101a4      CPUPM_TEST_RUN_INFO
 0x53101a8      CPUPM_TEST_SLAVE_INFO
 0x53101ac      CPUPM_FORCED_IDLE
+0x53101b4      CPUPM_PSTATE_CHOOSE
+0x53101b8      CPUPM_PSTATE_COMMIT
+0x53101bc      CPUPM_PSTATE_CHECK
+0x531023C      CPUPM_TQM       
+0x5310240      CPUPM_QUIESCE
+0x5310244      CPUPM_MBD
+0x5310248      CPUPM_PST_RATELIMIT_QOS
+0x531024C      CPUPM_PST_QOS_RATEUNLIMIT
+0x5310250      CPUPM_PST_QOS_SWITCH
+0x5310254      CPUPM_FORCED_IDLE
+0x531023C      CPUPM_TQM       
+0x5310240      CPUPM_QUIESCE
+0x5310244      CPUPM_MBD
+0x5310248      CPUPM_PST_RATELIMIT_QOS
+0x531024C      CPUPM_PST_QOS_RATEUNLIMIT
+0x5310250      CPUPM_PST_QOS_SWITCH
+0x5310254      CPUPM_FORCED_IDLE
+0x5320000      CPUPM_PST_RESOLVE
+0x5320004      CPUPM_PST_LOAD_TXFR
+0x5320008      CPUPM_PST_IDLE_EXIT
+0x532000C      CPUPM_PST_IDLE_ENTRY
+0x5320010      CPUPM_PST_TIMER
+0x5320014      CPUPM_PST_MAXBUS
+0x5320018      CPUPM_PST_MAXINT
+0x532001C      CPUPM_PST_PLIMIT
+0x5320020      CPUPM_PST_SELFSEL
+0x5320024      CPUPM_PST_RATELIMIT
+0x5320028      CPUPM_PST_RATEUNLIMIT
+0x532002C      CPUPM_DVFS_PAUSE
+0x5320030      CPUPM_DVFS_RESUME
+0x5320034      CPUPM_DVFS_ADVANCE
+0x5320038      CPUPM_DVFS_TRANSIT
 0x5330000      HIBERNATE
 0x5330004      HIBERNATE_WRITE_IMAGE
 0x5330008      HIBERNATE_MACHINE_INIT
index 8889263189d6ea1a3b6a02d27d89371a8a291e49..af80a4a40c803b5a1a1e9cf3b1a4ba7fd438c6ec 100644 (file)
@@ -5797,7 +5797,7 @@ ifnet_fc_thread_cont(int err)
 {
 #pragma unused(err)
        struct sfb_bin_fcentry *fce;
-       struct inp_fc_entry *infc;
+       struct inpcb *inp;
 
        for (;;) {
                lck_mtx_assert(&ifnet_fclist_lock, LCK_MTX_ASSERT_OWNED);
@@ -5813,17 +5813,14 @@ ifnet_fc_thread_cont(int err)
                SLIST_NEXT(fce, fce_link) = NULL;
                lck_mtx_unlock(&ifnet_fclist_lock);
 
-               infc = inp_fc_getinp(fce->fce_flowhash);
-               if (infc == NULL) {
+               inp = inp_fc_getinp(fce->fce_flowhash, 0);
+               if (inp == NULL) {
                        ifnet_fce_free(fce);
                        lck_mtx_lock_spin(&ifnet_fclist_lock);
                        continue;
                }
-               VERIFY(infc->infc_inp != NULL);
+               inp_fc_feedback(inp);
 
-               inp_fc_feedback(infc->infc_inp);
-
-               inp_fc_entry_free(infc);
                ifnet_fce_free(fce);
                lck_mtx_lock_spin(&ifnet_fclist_lock);
        }
index 7e0cd82e92d01d7975f81e20e0180d4951b14c01..e8442d135ea95b23e85d10eb57d264375c942989 100644 (file)
@@ -1604,7 +1604,7 @@ igmp_input(struct mbuf *m, int off)
                OIGMPSTAT_INC(igps_rcv_tooshort);
                return;
        }
-       VERIFY(IS_P2ALIGNED(igmp, sizeof (u_int32_t)));
+       /* N.B.: we assume the packet was correctly aligned in ip_input. */
 
        /*
         * Validate checksum.
@@ -1701,8 +1701,10 @@ igmp_input(struct mbuf *m, int off)
                                        OIGMPSTAT_INC(igps_rcv_tooshort);
                                        return;
                                }
-                               VERIFY(IS_P2ALIGNED(igmpv3,
-                                   sizeof (u_int32_t)));
+                               /* 
+                                * N.B.: we assume the packet was correctly
+                                * aligned in ip_input.
+                                */
                                if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
                                        m_freem(m);
                                        return;
index 043057a59529fb39f9db0aef3860a9db048c2434..485e8dbcd0c3f691b21ac565358a35031db0708d 100644 (file)
@@ -201,22 +201,22 @@ struct inp_flowhash_key {
 
 u_int32_t inp_hash_seed = 0;
 
-static __inline int infc_cmp(const struct inp_fc_entry *,
-    const struct inp_fc_entry *);
+static __inline int infc_cmp(const struct inpcb *,
+    const struct inpcb *);
 lck_grp_t *inp_lck_grp;
 lck_grp_attr_t *inp_lck_grp_attr;
 lck_attr_t *inp_lck_attr;
 decl_lck_mtx_data(, inp_fc_lck);
 
-RB_HEAD(inp_fc_tree, inp_fc_entry) inp_fc_tree;
-RB_PROTOTYPE(inp_fc_tree, inp_fc_entry, infc_link, infc_cmp);
+RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
+RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
+RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
 
-RB_GENERATE(inp_fc_tree, inp_fc_entry, infc_link, infc_cmp);
-
-static unsigned int inp_fcezone_size;
-static struct zone *inp_fcezone;
-#define INP_FCEZONE_NAME "inp_fcezone"
-#define INP_FCEZONE_MAX 32
+/*
+ * Use this inp as a key to find an inp in the flowhash tree.
+ * Accesses to it are protected by inp_fc_lck.
+ */
+struct inpcb key_inp;
 
 /*
  * in_pcb.c: manage the Protocol Control Blocks.
@@ -235,19 +235,10 @@ socket_flowadv_init(void)
        inp_lck_attr = lck_attr_alloc_init();
        lck_mtx_init(&inp_fc_lck, inp_lck_grp, inp_lck_attr);
 
+       lck_mtx_lock(&inp_fc_lck);
        RB_INIT(&inp_fc_tree);
-
-       inp_fcezone_size = P2ROUNDUP(sizeof (struct inp_fc_entry),
-           sizeof (u_int64_t));
-       inp_fcezone = zinit(inp_fcezone_size,
-           INP_FCEZONE_MAX * inp_fcezone_size, 0, INP_FCEZONE_NAME);
-       if (inp_fcezone == NULL) {
-               panic("%s: failed allocating %s", __func__,
-                   INP_FCEZONE_NAME);
-               /* NOTREACHED */
-       }
-       zone_change(inp_fcezone, Z_EXPAND, TRUE);
-       zone_change(inp_fcezone, Z_CALLERACCT, FALSE);
+       bzero(&key_inp, sizeof(key_inp));
+       lck_mtx_unlock(&inp_fc_lck);
 }
 
 /*
@@ -1721,7 +1712,6 @@ in_pcbrehash(struct inpcb *inp)
 void
 in_pcbremlists(struct inpcb *inp)
 {
-       struct inp_fc_entry *infce;
        inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
 
        if (inp->inp_lport) {
@@ -1736,10 +1726,11 @@ in_pcbremlists(struct inpcb *inp)
        }
        LIST_REMOVE(inp, inp_list);
 
-       infce = inp_fc_getinp(inp->inp_flowhash);
-       if (infce != NULL)
-               inp_fc_entry_free(infce);
-
+       if (inp->inp_flags2 & INP2_IN_FCTREE) {
+               inp_fc_getinp(inp->inp_flowhash,
+                       (INPFC_SOLOCKED|INPFC_REMOVE));
+               VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
+       }
        inp->inp_pcbinfo->ipi_count--;
 }
 
@@ -2031,6 +2022,7 @@ inp_calc_flowhash(struct inpcb *inp)
 {
        struct inp_flowhash_key fh __attribute__((aligned(8)));
        u_int32_t flowhash = 0;
+       struct inpcb *tmp_inp = NULL;
 
        if (inp_hash_seed == 0)
                inp_hash_seed = RandomULong();
@@ -2055,102 +2047,69 @@ try_again:
                goto try_again;
        }
 
-       return flowhash;
-}
+       inp->inp_flowhash = flowhash;
 
-/*
- * Function to compare inp_fc_entries in inp flow control tree
- */
-static inline int
-infc_cmp(const struct inp_fc_entry *fc1, const struct inp_fc_entry *fc2)
-{
-       return (fc1->infc_flowhash - fc2->infc_flowhash);
-}
-
-int
-inp_fc_addinp(struct inpcb *inp)
-{
-       struct inp_fc_entry keyfc, *infc;
-       u_int32_t flowhash = inp->inp_flowhash;
-
-       keyfc.infc_flowhash = flowhash;
-
-       lck_mtx_lock_spin(&inp_fc_lck);
-       infc = RB_FIND(inp_fc_tree, &inp_fc_tree, &keyfc);
-       if (infc != NULL && infc->infc_inp == inp) {
-               /* Entry is already in inp_fc_tree, return */
-               lck_mtx_unlock(&inp_fc_lck);
-               return (1);
-       }
+       /* Insert the inp into inp_fc_tree */
 
-       if (infc != NULL) {
+       lck_mtx_lock(&inp_fc_lck);
+       tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
+       if (tmp_inp != NULL) {
                /*
-                * There is a different fc entry with the same
-                * flow hash but different inp pointer. There
-                * can be a collision on flow hash but the
-                * probability is low. Let's just avoid
-                * adding a second one when there is a collision
+                * There is a different inp with the same flowhash.
+                * There can be a collision on flow hash but the
+                * probability is low. Let's recompute the
+                * flowhash.
                 */
                lck_mtx_unlock(&inp_fc_lck);
-               return (0);
-       }
-
-       /* become regular mutex */
-       lck_mtx_convert_spin(&inp_fc_lck);
-
-       infc = zalloc_noblock(inp_fcezone);
-       if (infc == NULL) {
-               /* memory allocation failed */
-               lck_mtx_unlock(&inp_fc_lck);
-               return (0);
+               /* recompute hash seed */
+               inp_hash_seed = RandomULong();
+               goto try_again;
        }
-       bzero(infc, sizeof (*infc));
-
-       infc->infc_flowhash = flowhash;
-       infc->infc_inp = inp;
-
-       RB_INSERT(inp_fc_tree, &inp_fc_tree, infc);
+       RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
+       inp->inp_flags2 |= INP2_IN_FCTREE;
        lck_mtx_unlock(&inp_fc_lck);
-       return (1);
+
+       return flowhash;
 }
 
-struct inp_fc_entry*
-inp_fc_getinp(u_int32_t flowhash)
+/*
+ * Function to compare inp_fc_entries in inp flow control tree
+ */
+static inline int
+infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
 {
-       struct inp_fc_entry keyfc, *infc;
+       return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
+               sizeof(inp1->inp_flowhash)));
+}
 
-       keyfc.infc_flowhash = flowhash;
+struct inpcb *
+inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
+{
+       struct inpcb *inp = NULL;
+       int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
 
        lck_mtx_lock_spin(&inp_fc_lck);
-       infc = RB_FIND(inp_fc_tree, &inp_fc_tree, &keyfc);
-       if (infc == NULL) {
+       key_inp.inp_flowhash = flowhash;
+       inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
+       if (inp == NULL) {
                /* inp is not present, return */
                lck_mtx_unlock(&inp_fc_lck);
                return (NULL);
        }
 
-       RB_REMOVE(inp_fc_tree, &inp_fc_tree, infc);
-
-       if (in_pcb_checkstate(infc->infc_inp, WNT_ACQUIRE, 0) ==
-           WNT_STOPUSING) {
-               /* become regular mutex */
-               lck_mtx_convert_spin(&inp_fc_lck);
+       if (flags & INPFC_REMOVE) {
+               RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
+               lck_mtx_unlock(&inp_fc_lck);
 
-               /*
-                * This inp is going away, just don't process it.
-                */
-               inp_fc_entry_free(infc);
-               infc = NULL;
+               bzero(&(inp->infc_link), sizeof (inp->infc_link));
+               inp->inp_flags2 &= ~INP2_IN_FCTREE;
+               return (NULL);
        }
+       if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING)
+               inp = NULL;
        lck_mtx_unlock(&inp_fc_lck);
 
-       return (infc);
-}
-
-void
-inp_fc_entry_free(struct inp_fc_entry *infc)
-{
-       zfree(inp_fcezone, infc);
+       return (inp);
 }
 
 void
@@ -2209,6 +2168,7 @@ inp_reset_fc_state(struct inpcb *inp)
 int
 inp_set_fc_state(struct inpcb *inp, int advcode)
 {
+       struct inpcb *tmp_inp = NULL;
        /*
         * If there was a feedback from the interface when 
         * send operation was in progress, we should ignore
@@ -2220,7 +2180,12 @@ inp_set_fc_state(struct inpcb *inp, int advcode)
                return(0);
 
        inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
-       if (inp_fc_addinp(inp)) {
+       if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash, INPFC_SOLOCKED)) 
+               != NULL) {
+               if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1)
+                       == WNT_STOPUSING)
+                       return (0);
+               VERIFY(tmp_inp == inp);
                switch (advcode) {
                case FADV_FLOW_CONTROLLED:
                        inp->inp_flags |= INP_FLOW_CONTROLLED;
@@ -2234,8 +2199,9 @@ inp_set_fc_state(struct inpcb *inp, int advcode)
                        inp->inp_socket->so_flags |= SOF_SUSPENDED;
                        break;
                }
+               return (1);
        }
-       return(1);
+       return(0);
 }
 
 /*
index 63dddb8fd7a0dbb819b529d8cf9fe5d26c051236..acd942f63506655e543e02b248d56dfd88dc99b3 100644 (file)
@@ -124,15 +124,6 @@ struct     label;
 #endif
 struct ifnet;
 
-#ifdef BSD_KERNEL_PRIVATE
-/* Flow control entry per socket */
-struct inp_fc_entry {
-       RB_ENTRY(inp_fc_entry) infc_link;
-       u_int32_t infc_flowhash;
-       struct inpcb *infc_inp;
-};
-#endif /* BSD_KERNEL_PRIVATE */
-
 struct inp_stat {
        u_int64_t       rxpackets;
        u_int64_t       rxbytes;
@@ -153,9 +144,11 @@ struct inpcb {
        struct  socket *inp_socket;     /* back pointer to socket */
        u_int32_t nat_cookie;           /* Cookie stored and returned to NAT */
        LIST_ENTRY(inpcb) inp_portlist; /* list for this PCB's local port */
+       RB_ENTRY(inpcb) infc_link;      /* link for flowhash RB tree */
        struct  inpcbport *inp_phd;     /* head of this list */
        inp_gen_t inp_gencnt;           /* generation count of this instance */
        u_int32_t inp_flags;            /* generic IP/datagram flags */
+       u_int32_t inp_flags2;           /* generic IP/datagram flags #2 */
        u_int32_t inp_flow;
 
        u_char  inp_sndinprog_cnt;      /* outstanding send operations */
@@ -611,6 +604,9 @@ struct inpcbinfo {          /* XXX documentation, prefixes */
 #define        IN6P_RECV_ANYIF         INP_RECV_ANYIF
 #define        IN6P_CONTROLOPTS INP_CONTROLOPTS
 #define        IN6P_NO_IFT_CELLULAR    INP_NO_IFT_CELLULAR
+
+/* Overflowed INP flags; use INP2 prefix to avoid misuse */
+#define INP2_IN_FCTREE         0x2     /* in inp_fc_tree */
        /*
         * socket AF version is {newer than,or include}
         * actual datagram AF version
@@ -702,9 +698,11 @@ extern int inp_bindif(struct inpcb *, unsigned int);
 extern int     inp_nocellular(struct inpcb *, unsigned int);
 extern u_int32_t inp_calc_flowhash(struct inpcb *);
 extern void    socket_flowadv_init(void);
-extern int     inp_fc_addinp(struct inpcb *);
-extern struct inp_fc_entry *inp_fc_getinp(u_int32_t);
-extern void    inp_fc_entry_free(struct inp_fc_entry *);
+
+/* Flags used by inp_fc_getinp */
+#define INPFC_SOLOCKED 0x1
+#define INPFC_REMOVE   0x2
+extern struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
 extern void    inp_fc_feedback(struct inpcb *);
 extern void    inp_reset_fc_state(struct inpcb *);
 extern int     inp_set_fc_state(struct inpcb *, int advcode);
index 0dda075ac9e7ec31c1a60f7c4990558ca6aed8ea..eb0a134fdff7fca6d8ec54373325449543adf2eb 100644 (file)
@@ -65,6 +65,9 @@ extern "C" {
 #define CP_READ_ACCESS         0x1
 #define CP_WRITE_ACCESS 0x2
 
+/* 
+ * Check for this version when deciding to enable features
+ */
 #define CONTENT_PROTECTION_XATTR_NAME  "com.apple.system.cprotect"
 #define CP_NEW_MAJOR_VERS 4
 #define CP_PREV_MAJOR_VERS 2
index ae8970aa43feced0288deb80dae14b1bff3e1ced..9a76fbc8011f8b43a6e1629ff6b206f04b91624b 100644 (file)
@@ -150,6 +150,7 @@ __BEGIN_DECLS
 #define        MACH_REMOTE_AST         0x17    /* AST signal issued to remote processor */
 
 #define        MACH_SCHED_LPA_BROKEN   0x18    /* last_processor affinity broken in choose_processor */
+#define MACH_DEEP_IDLE          0x19   /* deep idle on master processor */
 
 /* Codes for pmap (DBG_MACH_PMAP) */     
 #define PMAP__CREATE           0x0
@@ -250,6 +251,7 @@ __BEGIN_DECLS
 #define DBG_DRVSD              19      /* Secure Digital */
 #define DBG_DRVNAND            20      /* NAND drivers and layers */
 #define DBG_SSD                        21      /* SSD */
+#define DBG_DRVSPI             22      /* SPI */
 
 /* Backwards compatibility */
 #define        DBG_DRVPOINTING         DBG_DRVHID              /* OBSOLETE: Use DBG_DRVHID instead */
@@ -539,7 +541,7 @@ extern void kdbg_trace_data(struct proc *proc, long *arg_pid);
 extern void kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3, long *arg4);
 
 extern void kdbg_dump_trace_to_file(const char *);
-void start_kern_tracing(unsigned int);
+void start_kern_tracing(unsigned int, boolean_t);
 struct task;
 extern void kdbg_get_task_name(char*, int, struct task *task);
 void disable_wrap(uint32_t *old_slowcheck, uint32_t *old_flags);
index 80a99e0df34efe60ffb4b01e60a1c1c4eac022c4..bf00b8e5c609398173ab79aba61756f544388a91 100644 (file)
@@ -1,4 +1,4 @@
-12.4.0
+12.5.0
 
 # The first line of this file contains the master version number for the kernel.
 # All other instances of the kernel version in xnu are derived from this file.
index 73963f2c23ed7c028f06f5a3539b8fbc37739d27..62dbcc51cd6c86eb149c189e262190d64a903c1b 100644 (file)
@@ -5,6 +5,7 @@ _SHA256_Update
 __ZN22IOInterruptEventSource7warmCPUEy
 _acpi_install_wake_handler
 _acpi_sleep_kernel
+_acpi_idle_kernel
 _add_fsevent
 _apic_table
 _apply_func_phys
index da180376c113f5ec9f81a759beb9d70146303f03..36d98707c7096f2cd9871620c372ae2d233afe8e 100644 (file)
@@ -102,7 +102,11 @@ struct IOHibernateImageHeader
     uint32_t   sleepTime;
     uint32_t    compression;
 
-    uint32_t   reserved[68];           // make sizeof == 512
+    uint32_t   reserved[62];           // make sizeof == 512
+
+    uint64_t   restoreTime1 __attribute__ ((packed));
+    uint64_t   restoreTime2 __attribute__ ((packed));
+    uint64_t   restoreTime3 __attribute__ ((packed));
 
     uint64_t   encryptEnd __attribute__ ((packed));
     uint64_t   deviceBase __attribute__ ((packed));
index 09ef735d919c3a13e2767e45e98f909832621f26..ec500eae29ed8e48ac274214d13b5c7b7d944cad 100644 (file)
@@ -32,6 +32,7 @@
 #include <libkern/c++/OSObject.h>
 
 #define kIOPolledInterfaceSupportKey "IOPolledInterface"
+#define kIOPolledInterfaceActiveKey  "IOPolledInterfaceActive"
 
 enum
 {
index f3d1bed4a817ca5885b3739f0bdb95d5fa04e712..94a230651663c2a57c8f40d3fbf5d74eef165c41 100644 (file)
@@ -1785,7 +1785,7 @@ private:
     void ParentChangeNotifyInterestedDriversDidChange ( void );
     void ParentChangeTellCapabilityDidChange ( void );
     void ParentChangeAcknowledgePowerChange ( void );
-    void ParentChangeCancelIdleTimer( IOPMPowerStateIndex );
+    void ParentChangeRootChangeDown( void );
 
     void all_done ( void );
     void start_ack_timer ( void );
@@ -1793,9 +1793,10 @@ private:
     void startSettleTimer( void );
     bool checkForDone ( void );
     bool responseValid ( uint32_t x, int pid );
-    void computeDesiredState ( unsigned long tempDesire = 0 );
+    void computeDesiredState( unsigned long tempDesire, bool computeOnly );
     void trackSystemSleepPreventers( IOPMPowerStateIndex, IOPMPowerStateIndex, IOPMPowerChangeFlags );
     void tellSystemCapabilityChange( uint32_t nextMS );
+    void restartIdleTimer( void );
 
        static void ack_timer_expired( thread_call_param_t, thread_call_param_t );
        static IOReturn actionAckTimerExpired(OSObject *, void *, void *, void *, void * );
index c2003ef4716ae1a41e87888f679cd415a03533b0..f01c84178bb10ae60492d59e7462db15e4630949 100644 (file)
@@ -90,6 +90,13 @@ enum {
 
     @constant kIOPMInitialDeviceState
     Indicates the initial power state for the device. If <code>initialPowerStateForDomainState()</code> returns a power state with this flag set in the capability field, then the initial power change is performed without calling the driver's <code>setPowerState()</code>.
+
+    @constant kIOPMRootDomainState
+    An indication that the power flags represent the state of the root power
+    domain. This bit must not be set in the IOPMPowerState structure.
+    Power Management may pass this bit to initialPowerStateForDomainState()
+    or powerStateForDomainState() to map from a global system state to the
+    desired device state.
 */
 typedef unsigned long IOPMPowerFlags;
 enum {
@@ -101,7 +108,8 @@ enum {
     kIOPMRestartCapability          = 0x00000080,
     kIOPMSleep                      = 0x00000001,
     kIOPMRestart                    = 0x00000080,
-    kIOPMInitialDeviceState         = 0x00000100
+    kIOPMInitialDeviceState         = 0x00000100,
+    kIOPMRootDomainState            = 0x00000200
 };
 
 /*
@@ -247,6 +255,30 @@ enum {
  */
 #define kIOPMDestroyFVKeyOnStandbyKey       "DestroyFVKeyOnStandby"
 
+/*******************************************************************************
+ *
+ * Properties that can control power management behavior
+ *
+ ******************************************************************************/
+
+/* kIOPMResetPowerStateOnWakeKey
+ * If an IOService publishes this key with the value of kOSBooleanTrue,
+ * then PM will disregard the influence from changePowerStateToPriv() or
+ * any activity tickles that occurred before system sleep when resolving
+ * the initial device power state on wake. Influences from power children
+ * and changePowerStateTo() are not eliminated. At the earliest opportunity
+ * upon system wake, PM will query the driver for a new power state to be
+ * installed as the initial changePowerStateToPriv() influence, by calling
+ * initialPowerStateForDomainState() with both kIOPMRootDomainState and
+ * kIOPMPowerOn flags set. The default implementation will always return
+ * the lowest power state. Drivers can override this default behavior to
+ * immediately raise the power state when there are work blocked on the
+ * power change, and cannot afford to wait until the next activity tickle.
+ * This property should be statically added to a driver's plist or set at
+ * runtime before calling PMinit().
+ */
+#define kIOPMResetPowerStateOnWakeKey       "IOPMResetPowerStateOnWake"
+
 /*******************************************************************************
  *
  * Driver PM Assertions
index 6ff802dde6c480250449aa6cb7086fa15cfda878..55f86b4755ebff900662225a97a96eff6fae7d32 100644 (file)
@@ -660,6 +660,12 @@ enum {
  */
 #define kIOPMUserWakeAlarmScheduledKey      "UserWakeAlarmScheduled"
 
+/* kIOPMDeepIdleSupportedKey
+ * Presence of this key indicates Deep Idle is supported on this platform.
+ * Key will always refer to a value of kOSBooleanTrue.
+ */
+#define kIOPMDeepIdleSupportedKey           "IOPMDeepIdleSupported"
+
 /*****************************************************************************
  *
  * System Sleep Policy
@@ -743,7 +749,8 @@ enum {
     kIOPMSleepTypeHibernate                 = 4,
     kIOPMSleepTypeStandby                   = 5,
     kIOPMSleepTypePowerOff                  = 6,
-    kIOPMSleepTypeLast                      = 7
+    kIOPMSleepTypeDeepIdle                  = 7,
+    kIOPMSleepTypeLast                      = 8
 };
 
 // System Sleep Flags
index 74b18022eda9786f3d8cb1c76a4c8c3140d9dd58..034ca65ca33b9a786521a81624763a0629dc1d3e 100644 (file)
@@ -774,6 +774,14 @@ IOPolledFileOpen( const char * filename, uint64_t setFileSize,
        if (kIOReturnSuccess != err)
            break;
 
+       vars->media = part;
+        next = part;
+       while (next)
+       {
+           next->setProperty(kIOPolledInterfaceActiveKey, kOSBooleanTrue);
+           next = next->getParentEntry(gIOServicePlane);
+       }
+
        *fileVars    = vars;
        *fileExtents = extentsData;
     
@@ -1781,6 +1789,8 @@ IOHibernateSystemWake(void)
 static IOReturn
 IOHibernateDone(IOHibernateVars * vars)
 {
+    IORegistryEntry * next;
+
     hibernate_teardown(vars->page_list, vars->page_list_wired, vars->page_list_pal);
 
     if (vars->videoMapping)
@@ -1821,9 +1831,14 @@ IOHibernateDone(IOHibernateVars * vars)
         IOService::getPMRootDomain()->removeProperty(kIOHibernateGfxStatusKey);
     }
 
-
     if (vars->fileVars)
     {
+       if ((next = vars->fileVars->media)) do
+       {
+           next->removeProperty(kIOPolledInterfaceActiveKey);
+           next = next->getParentEntry(gIOServicePlane);
+       }
+       while (next);
        IOPolledFileClose(vars->fileVars);
     }
 
@@ -2716,6 +2731,12 @@ hibernate_machine_init(void)
            gIOHibernateCurrentHeader->diag[0], gIOHibernateCurrentHeader->diag[1], 
            gIOHibernateCurrentHeader->diag[2], gIOHibernateCurrentHeader->diag[3]);
 
+    HIBLOG("restore times %qd, %qd, %qd ms, tsc 0x%qx scale 0x%x\n", 
+       (((gIOHibernateCurrentHeader->restoreTime1 * pal_rtc_nanotime_info.scale) >> 32) / 1000000),
+       (((gIOHibernateCurrentHeader->restoreTime2 * pal_rtc_nanotime_info.scale) >> 32) / 1000000),
+       (((gIOHibernateCurrentHeader->restoreTime3 * pal_rtc_nanotime_info.scale) >> 32) / 1000000),
+       gIOHibernateCurrentHeader->restoreTime1, pal_rtc_nanotime_info.scale);
+
     if ((kIOHibernateModeDiscardCleanActive | kIOHibernateModeDiscardCleanInactive) & gIOHibernateMode)
         hibernate_page_list_discard(vars->page_list);
 
@@ -2756,8 +2777,19 @@ hibernate_machine_init(void)
                break;
 
            case kIOHibernateHandoffTypeMemoryMap:
+
+               clock_get_uptime(&allTime);
+
                hibernate_newruntime_map(data, handoff->bytecount, 
                                         gIOHibernateCurrentHeader->systemTableOffset);
+
+               clock_get_uptime(&endTime);
+           
+               SUB_ABSOLUTETIME(&endTime, &allTime);
+               absolutetime_to_nanoseconds(endTime, &nsec);
+           
+               HIBLOG("hibernate_newruntime_map time: %qd ms, ", nsec / 1000000ULL);
+
                break;
 
            case kIOHibernateHandoffTypeDeviceTree:
index 6e044eb5749a34b9f230cf1a94f5d3e2385ee464..23f875d18e5428b467ef326ec3205f67f3eb1074 100644 (file)
@@ -64,6 +64,7 @@ typedef struct IOHibernateVars IOHibernateVars;
 struct IOPolledFileIOVars
 {
     struct kern_direct_file_io_ref_t * fileRef;
+    IORegistryEntry *                   media;
     class OSArray *                    pollers;
     IOByteCount                                blockSize;
     uint8_t *                                  buffer;
index 10bd705f509f94fbbae7490dbc61920d9eb91bea..b45b2acd3bf14cf80c22a55d575c654dc3d97e74 100644 (file)
@@ -37,9 +37,7 @@
 #include <libkern/WKdm.h>
 #include "IOHibernateInternal.h"
 
-#if defined(__i386__) || defined(__x86_64__)
-#include <i386/pal_hibernate.h>
-#endif
+#include <machine/pal_hibernate.h>
 
 /*
 This code is linked into the kernel but part of the "__HIB" section, which means
@@ -76,6 +74,27 @@ extern void acpi_wake_prot_entry(void);
 
 #if defined(__i386__) || defined(__x86_64__)
 
+#define rdtsc(lo,hi) \
+    __asm__ volatile("lfence; rdtsc; lfence" : "=a" (lo), "=d" (hi))
+
+static inline uint64_t rdtsc64(void)
+{
+    uint64_t lo, hi;
+    rdtsc(lo, hi);
+    return ((hi) << 32) | (lo);
+}
+
+#else
+
+static inline uint64_t rdtsc64(void)
+{
+    return (0);
+}
+
+#endif /* defined(__i386__) || defined(__x86_64__) */
+
+#if defined(__i386__) || defined(__x86_64__)
+
 #define DBGLOG 1
 
 #include <architecture/i386/pio.h>
@@ -441,6 +460,9 @@ hibernate_kernel_entrypoint(uint32_t p1,
     uint32_t handoffPages;
     uint32_t handoffPageCount;
 
+    uint64_t timeStart, time;
+    timeStart = rdtsc64();
+
     C_ASSERT(sizeof(IOHibernateImageHeader) == 512);
 
     headerPhys = ptoa_64(p1);
@@ -604,8 +626,10 @@ hibernate_kernel_entrypoint(uint32_t p1,
            if (!conflicts)
            {
 //              if (compressedSize)
+               time = rdtsc64();
                pageSum = store_one_page(gIOHibernateCurrentHeader->processorFlags,
                                         src, compressedSize, 0, ppnum);
+                gIOHibernateCurrentHeader->restoreTime2 += (rdtsc64() - time);
                if (stage != 2)
                    sum += pageSum;
                uncompressedPages++;
@@ -658,6 +682,8 @@ hibernate_kernel_entrypoint(uint32_t p1,
 
     // -- copy back conflicts
 
+    time = rdtsc64();
+
     pageListPage = copyPageListHeadPage;
     while (pageListPage)
     {
@@ -681,6 +707,8 @@ hibernate_kernel_entrypoint(uint32_t p1,
 
     pal_hib_patchup();
 
+    gIOHibernateCurrentHeader->restoreTime3 = (rdtsc64() - time);
+
     // -- image has been destroyed...
 
     gIOHibernateCurrentHeader->actualImage1Sum         = sum;
@@ -690,6 +718,8 @@ hibernate_kernel_entrypoint(uint32_t p1,
 
     gIOHibernateState = kIOHibernateStateWakingFromHibernate;
 
+    gIOHibernateCurrentHeader->restoreTime1 = (rdtsc64() - timeStart);
+
 #if CONFIG_SLEEP
 #if defined(__i386__) || defined(__x86_64__)
     typedef void (*ResetProc)(void);
index aa24637cb4485a632b6f6a1c43da37c13e2a6909..8d49aeebe03389fd55f04874300387ff19068fa2 100644 (file)
@@ -906,14 +906,21 @@ void IOGeneralMemoryDescriptor::free()
        reserved->dp.memory = 0;
        UNLOCK;
     }
-
-    if ((kIOMemoryTypePhysical != type) && (kIOMemoryTypePhysical64 != type))
+    if ((kIOMemoryTypePhysical == type) || (kIOMemoryTypePhysical64 == type))
+    {
+       ioGMDData * dataP;
+       if (_memoryEntries && (dataP = getDataP(_memoryEntries)) && dataP->fMappedBase)
+       {
+           dataP->fMapper->iovmFree(atop_64(dataP->fMappedBase), _pages);
+           dataP->fMappedBase = 0;
+       }
+    }
+    else
     {
-       while (_wireCount)
-           complete();
+       while (_wireCount) complete();
     }
-    if (_memoryEntries)
-        _memoryEntries->release();
+
+    if (_memoryEntries) _memoryEntries->release();
 
     if (_ranges.v && !(kIOMemoryAsReference & _flags))
     {
index b3d6e1f44ca63de9a97a031d3221c3932f101cc3..742b1df588e799179f730d5900c0f56af8f20d3b 100644 (file)
@@ -2535,12 +2535,6 @@ bool IOPMrootDomain::tellChangeDown( unsigned long stateNum )
 
         IOService::updateConsoleUsers(NULL, kIOMessageSystemWillSleep);
 
-        // Notify platform that sleep has begun
-        getPlatform()->callPlatformFunction(
-                        sleepMessagePEFunction, false,
-                        (void *)(uintptr_t) kIOMessageSystemWillSleep,
-                        NULL, NULL, NULL);
-
         // Two change downs are sent by IOServicePM. Ignore the 2nd.
         // But tellClientsWithResponse() must be called for both.
         ignoreTellChangeDown = true;
@@ -2746,6 +2740,13 @@ IOReturn IOPMrootDomain::sysPowerDownHandler(
             DLOG("sysPowerDownHandler timeout %d s\n", (int) (params->maxWaitForReply / 1000 / 1000));
 #endif
 
+            // Notify platform that sleep has begun, after the early
+            // sleep policy evaluation.
+            getPlatform()->callPlatformFunction(
+                            sleepMessagePEFunction, false,
+                            (void *)(uintptr_t) kIOMessageSystemWillSleep,
+                            NULL, NULL, NULL);
+
             if ( !OSCompareAndSwap( 0, 1, &gSleepOrShutdownPending ) )
             {
                 // Purposely delay the ack and hope that shutdown occurs quickly.
@@ -3683,6 +3684,32 @@ struct IOPMSystemSleepPolicyTable
     IOPMSystemSleepPolicyEntry  entries[];
 } __attribute__((packed));
 
+enum {
+    kIOPMSleepAttributeHibernateSetup   = 0x00000001,
+    kIOPMSleepAttributeHibernateSleep   = 0x00000002
+};
+
+static uint32_t
+getSleepTypeAttributes( uint32_t sleepType )
+{
+    static const uint32_t sleepTypeAttributes[ kIOPMSleepTypeLast ] =
+    {
+    /* invalid   */ 0,
+    /* abort     */ 0,
+    /* normal    */ 0,
+    /* safesleep */ kIOPMSleepAttributeHibernateSetup,
+    /* hibernate */ kIOPMSleepAttributeHibernateSetup | kIOPMSleepAttributeHibernateSleep,
+    /* standby   */ kIOPMSleepAttributeHibernateSetup | kIOPMSleepAttributeHibernateSleep,
+    /* poweroff  */ kIOPMSleepAttributeHibernateSetup | kIOPMSleepAttributeHibernateSleep,
+    /* deepidle  */ 0
+    };
+
+    if (sleepType >= kIOPMSleepTypeLast)
+        return 0;
+
+    return sleepTypeAttributes[sleepType];
+}
+
 bool IOPMrootDomain::evaluateSystemSleepPolicy(
     IOPMSystemSleepParameters * params, int sleepPhase, uint32_t * hibMode )
 {
@@ -3829,7 +3856,8 @@ bool IOPMrootDomain::evaluateSystemSleepPolicy(
             goto done;
         }
 
-        if ((params->sleepType >= kIOPMSleepTypeSafeSleep) &&
+        if ((getSleepTypeAttributes(params->sleepType) &
+             kIOPMSleepAttributeHibernateSetup) &&
             ((*hibMode & kIOHibernateModeOn) == 0))
         {
             *hibMode |= (kIOHibernateModeOn | kIOHibernateModeSleep);
@@ -3951,9 +3979,10 @@ void IOPMrootDomain::evaluateSystemSleepPolicyEarly( void )
                                   &hibernateMode))
     {
         if (!hibernateNoDefeat &&
-            (gEarlySystemSleepParams.sleepType == kIOPMSleepTypeNormalSleep))
+            ((getSleepTypeAttributes(gEarlySystemSleepParams.sleepType) &
+              kIOPMSleepAttributeHibernateSetup) == 0))
         {
-            // Disable hibernate setup for normal sleep
+            // skip hibernate setup
             hibernateDisabled = true;
         }
     }
@@ -3991,7 +4020,8 @@ void IOPMrootDomain::evaluateSystemSleepPolicyFinal( void )
     if (evaluateSystemSleepPolicy(&params, kIOPMSleepPhase2, &hibernateMode))
     {
         if ((hibernateDisabled || hibernateAborted) &&
-            (params.sleepType != kIOPMSleepTypeNormalSleep))
+            (getSleepTypeAttributes(params.sleepType) &
+             kIOPMSleepAttributeHibernateSetup))
         {
             // Final evaluation picked a state requiring hibernation,
             // but hibernate setup was skipped. Retry using the early
@@ -4016,9 +4046,10 @@ void IOPMrootDomain::evaluateSystemSleepPolicyFinal( void )
             paramsData->release();
         }
 
-        if (params.sleepType >= kIOPMSleepTypeHibernate)
+        if (getSleepTypeAttributes(params.sleepType) &
+            kIOPMSleepAttributeHibernateSleep)
         {
-            // Disable safe sleep to force the hibernate path
+            // Disable sleep to force hibernation
             gIOHibernateMode &= ~kIOHibernateModeSleep;
         }
     }
@@ -4410,8 +4441,7 @@ void IOPMrootDomain::overrideOurPowerChange(
     uint32_t    changeFlags = *inOutChangeFlags;
     uint32_t    currentPowerState = (uint32_t) getPowerState();
 
-    if ((currentPowerState == powerState) ||
-        (changeFlags & kIOPMParentInitiated))
+    if (changeFlags & kIOPMParentInitiated)
     {
         // FIXME: cancel any parent change (unexpected)
         // Root parent is permanently pegged at max power,
@@ -4453,6 +4483,20 @@ void IOPMrootDomain::overrideOurPowerChange(
             // Revert device desire from SLEEP->ON.
             changePowerStateToPriv(ON_STATE);
         }
+        else
+        {
+            // Broadcast power down
+            *inOutChangeFlags |= kIOPMRootChangeDown;
+        }
+    }
+    else if (powerState > currentPowerState)
+    {
+        if ((_currentCapability & kIOPMSystemCapabilityCPU) == 0)
+        {
+            // Broadcast power up when waking from sleep, but not for the
+            // initial power change at boot by checking for cpu capability.
+            *inOutChangeFlags |= kIOPMRootChangeUp;
+        }
     }
 }
 
@@ -6079,7 +6123,7 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg )
 
             if ( minutesToIdleSleep > minutesToDisplayDim )
                 minutesDelta = minutesToIdleSleep - minutesToDisplayDim;
-            else if( minutesToIdleSleep == minutesToDisplayDim )
+            else if( minutesToIdleSleep <= minutesToDisplayDim )
                 minutesDelta = 1;
 
             if ((sleepSlider == 0) && (minutesToIdleSleep != 0))
index 0eabf83f9d70ebd488ce9ff04b5d4ebd2964723c..f35c6b6d81739a2199c196d212c01889a45ee900 100644 (file)
@@ -3108,9 +3108,11 @@ void IOService::doServiceMatch( IOOptionBits options )
         if( matches) {
 
             lockForArbitration();
-            if( 0 == (__state[0] & kIOServiceFirstPublishState))
+            if( 0 == (__state[0] & kIOServiceFirstPublishState)) {
+               getMetaClass()->addInstance(this);
                 deliverNotification( gIOFirstPublishNotification,
                                      kIOServiceFirstPublishState, 0xffffffff );
+            }
            LOCKREADNOTIFY();
             __state[1] &= ~kIOServiceNeedConfigState;
             __state[1] |= kIOServiceConfigState;
@@ -3134,9 +3136,6 @@ void IOService::doServiceMatch( IOOptionBits options )
             }
 
            UNLOCKNOTIFY();
-           if (didRegister) {
-               getMetaClass()->addInstance(this);
-           }
             unlockForArbitration();
 
             if (keepGuessing && matches->getCount() && (kIOReturnSuccess == getResources()))
index 97711e1c8d28683e2e58d025290ceb51f9a9d0e2..4d35ed45bbf100807893416aaa16e58a92e842e3 100644 (file)
@@ -83,17 +83,18 @@ OSDefineMetaClassAndStructors( PMEventDetails, OSObject );
 // Globals
 //******************************************************************************
 
-static bool                  gIOPMInitialized   = false;
-static uint32_t              gIOPMBusyCount     = 0;
-static uint32_t              gIOPMWorkCount     = 0;
-static IOWorkLoop *          gIOPMWorkLoop      = 0;
-static IOPMRequestQueue *    gIOPMRequestQueue  = 0;
-static IOPMRequestQueue *    gIOPMReplyQueue    = 0;
-static IOPMWorkQueue *       gIOPMWorkQueue     = 0;
-static IOPMCompletionQueue * gIOPMFreeQueue     = 0;
-static IOPMRequest *         gIOPMRequest       = 0;
-static IOService *           gIOPMRootNode      = 0;
-static IOPlatformExpert *    gPlatform          = 0;
+static bool                  gIOPMInitialized       = false;
+static uint32_t              gIOPMBusyCount         = 0;
+static uint32_t              gIOPMWorkCount         = 0;
+static uint32_t              gIOPMTickleGeneration  = 0;
+static IOWorkLoop *          gIOPMWorkLoop          = 0;
+static IOPMRequestQueue *    gIOPMRequestQueue      = 0;
+static IOPMRequestQueue *    gIOPMReplyQueue        = 0;
+static IOPMWorkQueue *       gIOPMWorkQueue         = 0;
+static IOPMCompletionQueue * gIOPMFreeQueue         = 0;
+static IOPMRequest *         gIOPMRequest           = 0;
+static IOService *           gIOPMRootNode          = 0;
+static IOPlatformExpert *    gPlatform              = 0;
 
 static const OSSymbol *      gIOPMPowerClientDevice     = 0;
 static const OSSymbol *      gIOPMPowerClientDriver     = 0;
@@ -555,6 +556,10 @@ void IOService::PMinit ( void )
             gIOPMRootNode = this;
             fParentsKnowState = true;
         }
+        else if (getProperty(kIOPMResetPowerStateOnWakeKey) == kOSBooleanTrue)
+        {
+            fResetPowerStateOnWake = true;
+        }
 
         fAckTimer = thread_call_allocate(
                        &IOService::ack_timer_expired, (thread_call_param_t)this);
@@ -852,7 +857,9 @@ void IOService::handlePMstop ( IOPMRequest * request )
         PM_UNLOCK();
     }
 
-    // Tell idleTimerExpired() to ignore idle timer.
+    // Clear idle period to prevent idleTimerExpired() from servicing
+    // idle timer expirations.
+
     fIdleTimerPeriod = 0;
     if (fIdleTimer && thread_call_cancel(fIdleTimer))
         release();
@@ -1667,12 +1674,12 @@ IOReturn IOService::acknowledgeSetPowerState ( void )
 void IOService::adjustPowerState ( uint32_t clamp )
 {
        PM_ASSERT_IN_GATE();
-       computeDesiredState(clamp);
+       computeDesiredState(clamp, false);
        if (fControllingDriver && fParentsKnowState && inPlane(gIOPowerPlane))
        {
         IOPMPowerChangeFlags changeFlags = kIOPMSelfInitiated;
 
-        // Indicate that children desires were ignored, and do not ask
+        // Indicate that children desires must be ignored, and do not ask
         // apps for permission to drop power. This is used by root domain
         // for demand sleep.
 
@@ -1793,7 +1800,7 @@ void IOService::handlePowerDomainWillChangeTo ( IOPMRequest * request )
     OSIterator *                iter;
     OSObject *                  next;
     IOPowerConnection *         connection;
-    IOPMPowerStateIndex  newPowerState;
+    IOPMPowerStateIndex  maxPowerState;
     IOPMPowerFlags              combinedPowerFlags;
        bool                             savedParentsKnowState;
        IOReturn                         result = IOPMAckImplied;
@@ -1834,16 +1841,20 @@ void IOService::handlePowerDomainWillChangeTo ( IOPMRequest * request )
 
     if ( fControllingDriver && !fInitialPowerChange )
     {
-               newPowerState = fControllingDriver->maxCapabilityForDomainState(
+               maxPowerState = fControllingDriver->maxCapabilityForDomainState(
                                                        combinedPowerFlags);
 
-        // Absorb parent's kIOPMSynchronize flag.
+        // Use kIOPMSynchronize below instead of kIOPMRootBroadcastFlags
+        // to avoid propagating the root change flags if any service must
+        // change power state due to root's will-change notification.
+        // Root does not change power state for kIOPMSynchronize.
+        
         myChangeFlags = kIOPMParentInitiated | kIOPMDomainWillChange |
                         (parentChangeFlags & kIOPMSynchronize);
 
                result = startPowerChange(
                  /* flags        */    myChangeFlags,
-                 /* power state  */    newPowerState,
+                 /* power state  */    maxPowerState,
                                 /* domain flags */     combinedPowerFlags,
                                 /* connection   */     whichParent,
                                 /* parent flags */     parentPowerFlags);
@@ -1909,8 +1920,10 @@ void IOService::handlePowerDomainDidChangeTo ( IOPMRequest * request )
        IOPowerConnection *      whichParent = (IOPowerConnection *) request->fArg1;
     IOPMPowerChangeFlags parentChangeFlags = (IOPMPowerChangeFlags)(uintptr_t) request->fArg2;
     IOPMPowerChangeFlags myChangeFlags;
-    IOPMPowerStateIndex  newPowerState;
-    IOPMPowerStateIndex  initialDesire;
+    IOPMPowerStateIndex  maxPowerState;
+    IOPMPowerStateIndex  initialDesire = 0;
+    bool                 computeDesire = false;
+    bool                 desireChanged = false;
        bool                             savedParentsKnowState;
        IOReturn                         result = IOPMAckImplied;
 
@@ -1929,29 +1942,63 @@ void IOService::handlePowerDomainDidChangeTo ( IOPMRequest * request )
 
     if ( fControllingDriver )
        {
-               newPowerState = fControllingDriver->maxCapabilityForDomainState(
+               maxPowerState = fControllingDriver->maxCapabilityForDomainState(
                                                        fParentsCurrentPowerFlags);
 
         if (fInitialPowerChange)
         {
+            computeDesire = true;
             initialDesire = fControllingDriver->initialPowerStateForDomainState(
-                            fParentsCurrentPowerFlags);
-            computeDesiredState(initialDesire);
+                                fParentsCurrentPowerFlags);
         }
-        else if (fAdvisoryTickleUsed && (newPowerState > 0) &&
-                 ((parentChangeFlags & kIOPMSynchronize) == 0))
+        else if (parentChangeFlags & kIOPMRootChangeUp)
         {
-            // re-compute desired state in case advisory tickle was enabled
-            computeDesiredState();
+            if (fAdvisoryTickleUsed)
+            {
+                // On system wake, re-compute the desired power state since
+                // gIOPMAdvisoryTickleEnabled will change for a full wake,
+                // which is an input to computeDesiredState(). This is not
+                // necessary for a dark wake because powerChangeDone() will
+                // handle the dark to full wake case, but it does no harm.
+
+                desireChanged = true;
+            }
+
+            if (fResetPowerStateOnWake)
+            {
+                // Query the driver for the desired power state on system wake.
+                // Default implementation returns the lowest power state.
+
+                IOPMPowerStateIndex wakePowerState =
+                    fControllingDriver->initialPowerStateForDomainState(
+                        kIOPMRootDomainState | kIOPMPowerOn );
+
+                // fDesiredPowerState was adjusted before going to sleep
+                // with fDeviceDesire at min.
+
+                if (wakePowerState > fDesiredPowerState)
+                {
+                    // Must schedule a power adjustment if we changed the
+                    // device desire. That will update the desired domain
+                    // power on the parent power connection and ping the
+                    // power parent if necessary.
+
+                    updatePowerClient(gIOPMPowerClientDevice, wakePowerState);
+                    desireChanged = true;
+                }
+            }
         }
 
-        // Absorb parent's kIOPMSynchronize flag.
+        if (computeDesire || desireChanged)
+            computeDesiredState(initialDesire, false);
+
+        // Absorb and propagate parent's broadcast flags
         myChangeFlags = kIOPMParentInitiated | kIOPMDomainDidChange |
-                        (parentChangeFlags & kIOPMSynchronize);
+                        (parentChangeFlags & kIOPMRootBroadcastFlags);
 
                result = startPowerChange(
                                 /* flags        */     myChangeFlags,
-                 /* power state  */    newPowerState,
+                 /* power state  */    maxPowerState,
                                 /* domain flags */     fParentsCurrentPowerFlags,
                                 /* connection   */     whichParent,
                                 /* parent flags */     0);
@@ -1974,12 +2021,13 @@ void IOService::handlePowerDomainDidChangeTo ( IOPMRequest * request )
        }
 
        // If the parent registers its power driver late, then this is the
-       // first opportunity to tell our parent about our desire. 
+       // first opportunity to tell our parent about our desire. Or if the
+    // child's desire changed during a parent change notify.
 
-       if (!savedParentsKnowState && fParentsKnowState)
+       if ((!savedParentsKnowState && fParentsKnowState) || desireChanged)
        {
-               PM_LOG1("%s::powerDomainDidChangeTo parentsKnowState = true\n",
-                       getName());
+               PM_LOG1("%s::powerDomainDidChangeTo parentsKnowState %d\n",
+                       getName(), fParentsKnowState);
                requestDomainPower( fDesiredPowerState );
        }
 
@@ -2057,10 +2105,10 @@ void IOService::trackSystemSleepPreventers(
         {
             IOPMRequest *   cancelRequest;
 
-            cancelRequest = acquirePMRequest( this, kIOPMRequestTypeIdleCancel );
+            cancelRequest = acquirePMRequest( getPMRootDomain(), kIOPMRequestTypeIdleCancel );
             if (cancelRequest)
             {
-                getPMRootDomain()->submitPMRequest( cancelRequest );
+                submitPMRequest( cancelRequest );
             }
         }
 #endif
@@ -2538,7 +2586,7 @@ void IOService::handlePowerOverrideChanged ( IOPMRequest * request )
 // [private] computeDesiredState
 //*********************************************************************************
 
-void IOService::computeDesiredState ( unsigned long localClamp )
+void IOService::computeDesiredState( unsigned long localClamp, bool computeOnly )
 {
     OSIterator *               iter;
     OSObject *                 next;
@@ -2603,6 +2651,7 @@ void IOService::computeDesiredState ( unsigned long localClamp )
             if (hasChildren && (client == gIOPMPowerClientChildProxy))
                 continue;
 
+            // Advisory tickles are irrelevant unless system is in full wake
             if (client == gIOPMPowerClientAdvisoryTickle &&
                 !gIOPMAdvisoryTickleEnabled)
                 continue;
@@ -2640,37 +2689,30 @@ void IOService::computeDesiredState ( unsigned long localClamp )
         (uint32_t) localClamp, (uint32_t) fTempClampPowerState,
                (uint32_t) fCurrentPowerState, newPowerState);
 
-    // Restart idle timer if stopped and device desire has increased.
-    // Or if advisory desire exists.
-    
-    if (fIdleTimerStopped)
+    if (!computeOnly)
     {
-        if (fDeviceDesire > 0)
-        {
-            fIdleTimerStopped = false;
-            fActivityTickleCount = 0;
-            clock_get_uptime(&fIdleTimerStartTime);
-            start_PM_idle_timer();
-        }
-        else if (fHasAdvisoryDesire)
+        // Restart idle timer if possible when device desire has increased.
+        // Or if an advisory desire exists.
+
+        if (fIdleTimerPeriod && fIdleTimerStopped)
         {
-            fIdleTimerStopped = false;
-            start_PM_idle_timer();
+            restartIdleTimer();
         }
-    }
 
-    // Invalidate cached tickle power state when desires change, and not
-    // due to a tickle request.  This invalidation must occur before the
-    // power state change to minimize races.  We want to err on the side
-    // of servicing more activity tickles rather than dropping one when
-    // the device is in a low power state.
+        // Invalidate cached tickle power state when desires change, and not
+        // due to a tickle request. In case the driver has requested a lower
+        // power state, but the tickle is caching a higher power state which
+        // will drop future tickles until the cached value is lowered or in-
+        // validated. The invalidation must occur before the power transition
+        // to avoid dropping a necessary tickle.
 
-    if ((getPMRequestType() != kIOPMRequestTypeActivityTickle) &&
-        (fActivityTicklePowerState != kInvalidTicklePowerState))
-    {
-        IOLockLock(fActivityLock);
-        fActivityTicklePowerState = kInvalidTicklePowerState;
-        IOLockUnlock(fActivityLock);
+        if ((getPMRequestType() != kIOPMRequestTypeActivityTickle) &&
+            (fActivityTicklePowerState != kInvalidTicklePowerState))
+        {
+            IOLockLock(fActivityLock);
+            fActivityTicklePowerState = kInvalidTicklePowerState;
+            IOLockUnlock(fActivityLock);
+        }
     }
 }
 
@@ -2795,6 +2837,7 @@ bool IOService::activityTickle ( unsigned long type, unsigned long stateNumber )
 {
        IOPMRequest *   request;
        bool                    noPowerChange = true;
+    uint32_t        tickleFlags;
 
     if (!initialized)
         return true;    // no power change
@@ -2820,12 +2863,13 @@ bool IOService::activityTickle ( unsigned long type, unsigned long stateNumber )
                        fActivityTicklePowerState = stateNumber;
                        noPowerChange = false;
 
+            tickleFlags = kTickleTypeActivity | kTickleTypePowerRise;
                        request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle );
                        if (request)
                        {
-                               request->fArg0 = (void *) stateNumber;  // power state
-                               request->fArg1 = (void *) true;         // power rise
-                request->fArg2 = (void *) false;        // regular tickle
+                               request->fArg0 = (void *) stateNumber;
+                               request->fArg1 = (void *) tickleFlags;
+                request->fArg2 = (void *) gIOPMTickleGeneration;
                                submitPMRequest(request);
                        }
                }
@@ -2845,12 +2889,13 @@ bool IOService::activityTickle ( unsigned long type, unsigned long stateNumber )
                        fAdvisoryTicklePowerState = stateNumber;
                        noPowerChange = false;
 
+            tickleFlags = kTickleTypeAdvisory | kTickleTypePowerRise;
                        request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle );
                        if (request)
                        {
-                               request->fArg0 = (void *) stateNumber;  // power state
-                               request->fArg1 = (void *) true;         // power rise
-                request->fArg2 = (void *) true;         // advisory tickle
+                               request->fArg0 = (void *) stateNumber;
+                               request->fArg1 = (void *) tickleFlags;
+                request->fArg2 = (void *) gIOPMTickleGeneration;
                                submitPMRequest(request);
                        }
                }
@@ -2871,14 +2916,26 @@ bool IOService::activityTickle ( unsigned long type, unsigned long stateNumber )
 void IOService::handleActivityTickle ( IOPMRequest * request )
 {
        uint32_t ticklePowerState   = (uint32_t)(uintptr_t) request->fArg0;
-    bool     deviceWasActive    = (request->fArg1 == (void *) true);
-    bool     isRegularTickle    = (request->fArg2 == (void *) false);
+    uint32_t tickleFlags        = (uint32_t)(uintptr_t) request->fArg1;
+    uint32_t tickleGeneration   = (uint32_t)(uintptr_t) request->fArg2;
     bool     adjustPower        = false;
     
        PM_ASSERT_IN_GATE();
-    if (isRegularTickle)
+    if (fResetPowerStateOnWake && (tickleGeneration != gIOPMTickleGeneration))
+    {
+        // Drivers that don't want power restored on wake will drop any
+        // tickles that pre-dates the current system wake. The model is
+        // that each wake is a fresh start, with power state depressed
+        // until a new tickle or an explicit power up request from the
+        // driver. It is possible for the PM work loop to enter the
+        // system sleep path with tickle requests queued.
+
+        return;
+    }
+
+    if (tickleFlags & kTickleTypeActivity)
     {
-        if (deviceWasActive)
+        if (tickleFlags & kTickleTypePowerRise)
         {
             if ((ticklePowerState > fDeviceDesire) &&
                 (ticklePowerState < fNumberOfPowerStates))
@@ -2904,7 +2961,7 @@ void IOService::handleActivityTickle ( IOPMRequest * request )
     }
     else    // advisory tickle
     {
-        if (deviceWasActive)
+        if (tickleFlags & kTickleTypePowerRise)
         {
             if ((ticklePowerState == fDeviceUsablePowerState) &&
                 (ticklePowerState < fNumberOfPowerStates))
@@ -3054,6 +3111,30 @@ void IOService::start_PM_idle_timer ( void )
     if (pending) release();
 }
 
+//*********************************************************************************
+// [private] restartIdleTimer
+//*********************************************************************************
+
+void IOService::restartIdleTimer( void )
+{
+    if (fDeviceDesire != 0)
+    {
+        fIdleTimerStopped = false;
+        fActivityTickleCount = 0;
+        clock_get_uptime(&fIdleTimerStartTime);
+        start_PM_idle_timer();
+    }
+    else if (fHasAdvisoryDesire)
+    {
+        fIdleTimerStopped = false;
+        start_PM_idle_timer();
+    }
+    else
+    {
+        fIdleTimerStopped = true;
+    }
+}
+
 //*********************************************************************************
 // idle_timer_expired
 //*********************************************************************************
@@ -3085,8 +3166,10 @@ void IOService::idleTimerExpired( void )
 {
        IOPMRequest *   request;
        bool                    restartTimer = true;
+    uint32_t        tickleFlags;
 
-    if ( !initialized || !fIdleTimerPeriod || fLockedFlags.PMStop )
+    if ( !initialized || !fIdleTimerPeriod || fIdleTimerStopped ||
+         fLockedFlags.PMStop )
         return;
 
        IOLockLock(fActivityLock);
@@ -3108,12 +3191,13 @@ void IOService::idleTimerExpired( void )
                if (fActivityTicklePowerState > 0)
                        fActivityTicklePowerState--;
 
+        tickleFlags = kTickleTypeActivity | kTickleTypePowerDrop;
                request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle );
                if (request)
                {
-                       request->fArg0 = (void *) 0;            // power state (irrelevant)
-                       request->fArg1 = (void *) false;        // timer expiration (not tickle)
-            request->fArg2 = (void *) false;    // regular tickle
+                       request->fArg0 = (void *) 0;    // irrelevant
+                       request->fArg1 = (void *) tickleFlags;
+            request->fArg2 = (void *) gIOPMTickleGeneration;
                        submitPMRequest( request );
 
                        // Do not restart timer until after the tickle request has been
@@ -3132,12 +3216,13 @@ void IOService::idleTimerExpired( void )
         // Want new tickles to turn into pm request after we drop the lock
         fAdvisoryTicklePowerState = kInvalidTicklePowerState;
 
+        tickleFlags = kTickleTypeAdvisory | kTickleTypePowerDrop;
                request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle );
                if (request)
                {
-                       request->fArg0 = (void *) 0;            // power state (irrelevant)
-                       request->fArg1 = (void *) false;        // timer expiration (not tickle)
-            request->fArg2 = (void *) true;     // advisory tickle
+                       request->fArg0 = (void *) 0;    // irrelevant
+                       request->fArg1 = (void *) tickleFlags;
+            request->fArg2 = (void *) gIOPMTickleGeneration;
                        submitPMRequest( request );
 
                        // Do not restart timer until after the tickle request has been
@@ -4098,8 +4183,13 @@ void IOService::all_done ( void )
         }
         else if (fAdvisoryTickleUsed)
         {
-            // Not root domain and advisory tickle target
+            // Not root domain and advisory tickle target.
             // Re-adjust power after power tree sync at the 'did' pass
+            // to recompute desire and adjust power state between dark
+            // and full wake transitions. Root domain is responsible
+            // for calling setAdvisoryTickleEnable() before starting
+            // the kIOPMSynchronize power change.
+
             if (!fAdjustPowerScheduled &&
                 (fHeadNoteChangeFlags & kIOPMDomainDidChange))
             {
@@ -4150,6 +4240,12 @@ void IOService::all_done ( void )
             if (fCurrentCapabilityFlags & kIOPMStaticPowerValid)
                 fCurrentPowerConsumption = powerStatePtr->staticPower;
 
+            if (fHeadNoteChangeFlags & kIOPMRootChangeDown)
+            {
+                // Bump tickle generation count once the entire tree is down
+                gIOPMTickleGeneration++;
+            }
+
             // inform subclass policy-maker
             if (fPCDFunctionOverride && fParentsKnowState &&
                 assertPMDriverCall(&callEntry, kIOPMADC_NoInactiveCheck))
@@ -4168,6 +4264,9 @@ void IOService::all_done ( void )
     // parent's power change
     if ( fHeadNoteChangeFlags & kIOPMParentInitiated)
     {
+        if (fHeadNoteChangeFlags & kIOPMRootChangeDown)
+            ParentChangeRootChangeDown();
+    
         if (((fHeadNoteChangeFlags & kIOPMDomainWillChange) &&
              (fCurrentPowerState >= fHeadNotePowerState))   ||
                          ((fHeadNoteChangeFlags & kIOPMDomainDidChange)  &&
@@ -4307,6 +4406,10 @@ void IOService::OurChangeStart ( void )
     }
 }
 
+//*********************************************************************************
+// [private] requestDomainPowerApplier
+//
+// Call requestPowerDomainState() on all power parents.
 //*********************************************************************************
 
 struct IOPMRequestDomainPowerContext {
@@ -4345,6 +4448,10 @@ requestDomainPowerApplier(
 
 //*********************************************************************************
 // [private] requestDomainPower
+//
+// Called by a power child to broadcast its desired power state to all parents.
+// If the child self-initiates a power change, it must call this function to
+// allow its parents to adjust power state.
 //*********************************************************************************
 
 IOReturn IOService::requestDomainPower(
@@ -4362,7 +4469,7 @@ IOReturn IOService::requestDomainPower(
     if (IS_PM_ROOT)
         return kIOReturnSuccess;
 
-    // Fetch the input power flags for the requested power state.
+    // Fetch our input power flags for the requested power state.
     // Parent request is stated in terms of required power flags.
 
        requestPowerFlags = fPowerStates[ourPowerState].inputPowerFlags;
@@ -4377,6 +4484,7 @@ IOReturn IOService::requestDomainPower(
     }
     fPreviousRequestPowerFlags = requestPowerFlags;
 
+    // The results will be collected by fHeadNoteDomainTargetFlags
     context.child              = this;
     context.requestPowerFlags  = requestPowerFlags;
     fHeadNoteDomainTargetFlags = 0;
@@ -4387,7 +4495,7 @@ IOReturn IOService::requestDomainPower(
         maxPowerState = fControllingDriver->maxCapabilityForDomainState(
                             fHeadNoteDomainTargetFlags );
 
-        if (maxPowerState < fHeadNotePowerState)
+        if (maxPowerState < ourPowerState)
         {
             PM_LOG1("%s: power desired %u:0x%x got %u:0x%x\n",
                 getName(),
@@ -4600,16 +4708,20 @@ IOReturn IOService::ParentChangeStart ( void )
        PM_ASSERT_IN_GATE();
     OUR_PMLog( kPMLogStartParentChange, fHeadNotePowerState, fCurrentPowerState );
 
-    // Power domain is lowering power
-    if ( fHeadNotePowerState < fCurrentPowerState )
+    // Root power domain has transitioned to its max power state
+    if ((fHeadNoteChangeFlags & (kIOPMDomainDidChange | kIOPMRootChangeUp)) ==
+                                (kIOPMDomainDidChange | kIOPMRootChangeUp))
     {
-        // Piggy-back idle timer cancellation on a parent down
-        if (0 == fHeadNotePowerState)
-            ParentChangeCancelIdleTimer(fHeadNotePowerState);
-    
-               // TODO: redundant? See handlePowerDomainWillChangeTo()
-               setParentInfo( fHeadNoteParentFlags, fHeadNoteParentConnection, true );
+        // Restart the idle timer stopped by ParentChangeRootChangeDown()
+        if (fIdleTimerPeriod && fIdleTimerStopped)
+        {
+            restartIdleTimer();
+        }
+    }
 
+    // Power domain is forcing us to lower power
+    if ( fHeadNotePowerState < fCurrentPowerState )
+    {
         PM_ACTION_2(actionPowerChangeStart, fHeadNotePowerState, &fHeadNoteChangeFlags);
 
        // Tell apps and kernel clients
@@ -4651,10 +4763,10 @@ IOReturn IOService::ParentChangeStart ( void )
             ParentChangeTellCapabilityWillChange();
             return IOPMWillAckLater;
         }
-        else if (fHeadNoteChangeFlags & kIOPMSynchronize)
+        else if (fHeadNoteChangeFlags & kIOPMRootBroadcastFlags)
         {
-            // We do not need to change power state, but notify
-            // children to propagate tree synchronization.
+            // No need to change power state, but broadcast change
+            // to our children.
             fMachineState     = kIOPM_SyncNotifyDidChange;
             fDriverCallReason = kDriverCallInformPreChange;
             notifyChildren();
@@ -4666,6 +4778,103 @@ IOReturn IOService::ParentChangeStart ( void )
     return IOPMAckImplied;
 }
 
+//******************************************************************************
+// [private] ParentChangeRootChangeDown
+//
+// Root domain has finished the transition to the system sleep state. And all
+// drivers in the power plane should have powered down. Cancel the idle timer,
+// and also reset the device desire for those drivers that don't want power
+// automatically restored on wake.
+//******************************************************************************
+
+void IOService::ParentChangeRootChangeDown( void )
+{
+    // Always stop the idle timer before root power down
+    if (fIdleTimerPeriod && !fIdleTimerStopped)
+    {
+        fIdleTimerStopped = true;
+        if (fIdleTimer && thread_call_cancel(fIdleTimer))
+            release();
+    }
+
+    if (fResetPowerStateOnWake)
+    {
+        // Reset device desire down to the lowest power state.
+        // Advisory tickle desire is intentionally untouched since
+        // it has no effect until system is promoted to full wake.
+
+        if (fDeviceDesire != 0)
+        {
+            updatePowerClient(gIOPMPowerClientDevice, 0);
+            computeDesiredState(0, true);
+            PM_LOG1("%s: tickle desire removed\n", fName);
+        }
+
+        // Invalidate tickle cache so the next tickle will issue a request
+        IOLockLock(fActivityLock);
+        fDeviceWasActive = false;
+        fActivityTicklePowerState = kInvalidTicklePowerState;
+        IOLockUnlock(fActivityLock);
+
+        fIdleTimerMinPowerState = 0;
+    }
+    else if (fAdvisoryTickleUsed)
+    {
+        // Less aggressive mechanism to accelerate idle timer expiration
+        // before system sleep. May not always allow the driver to wake
+        // up from system sleep in the min power state.
+
+        AbsoluteTime    now;
+        uint64_t        nsec;
+        bool            dropTickleDesire = false;
+
+        if (fIdleTimerPeriod && !fIdleTimerIgnored &&
+            (fIdleTimerMinPowerState == 0) &&
+            (fDeviceDesire != 0))
+        {
+            IOLockLock(fActivityLock);
+
+            if (!fDeviceWasActive)
+            {
+                // No tickles since the last idle timer expiration.
+                // Safe to drop the device desire to zero.
+                dropTickleDesire = true;
+            }
+            else
+            {
+                // Was tickled since the last idle timer expiration,
+                // but not in the last minute.
+                clock_get_uptime(&now);
+                SUB_ABSOLUTETIME(&now, &fDeviceActiveTimestamp);
+                absolutetime_to_nanoseconds(now, &nsec);
+                if (nsec >= kNoTickleCancelWindow)
+                {
+                    dropTickleDesire = true;
+                }
+            }
+
+            if (dropTickleDesire)
+            {
+                // Force the next tickle to raise power state
+                fDeviceWasActive = false;
+                fActivityTicklePowerState = kInvalidTicklePowerState;
+            }
+
+            IOLockUnlock(fActivityLock);
+        }
+
+        if (dropTickleDesire)
+        {
+            // Advisory tickle desire is intentionally untouched since
+            // it has no effect until system is promoted to full wake.
+
+            updatePowerClient(gIOPMPowerClientDevice, 0);
+            computeDesiredState(0, true);
+            PM_LOG1("%s: tickle desire dropped\n", fName);
+        }
+    }
+}
+
 //*********************************************************************************
 // [private] ParentChangeTellPriorityClientsPowerDown
 //
@@ -4785,72 +4994,6 @@ void IOService::ParentChangeAcknowledgePowerChange ( void )
     nub->release();
 }
 
-void IOService::ParentChangeCancelIdleTimer( IOPMPowerStateIndex newPowerState )
-{
-    AbsoluteTime    now;
-    uint64_t        nsec;
-    bool            cancel = false;
-
-    // No ready or idle timer not in use
-    if (!initialized || !fIdleTimerPeriod || fLockedFlags.PMStop ||
-        !fAdvisoryTickleUsed)
-        return;
-
-    // Not allowed to induce artifical idle timeout
-    if (fIdleTimerIgnored || fIdleTimerMinPowerState)
-        goto done;
-
-    // Idle timer already has no influence
-    if (!fDesiredPowerState || fIdleTimerStopped)
-        goto done;
-
-       IOLockLock(fActivityLock);
-
-    if (!fDeviceWasActive)
-    {
-        // No tickles since the last idle timer expiration.
-        // Safe to drop the device desire to zero.
-        cancel = true;
-    }
-    else
-    {
-        // Was tickled since the last idle timer expiration,
-        // but not in the last minute.
-        clock_get_uptime(&now);
-        SUB_ABSOLUTETIME(&now, &fDeviceActiveTimestamp);
-        absolutetime_to_nanoseconds(now, &nsec);
-        if (nsec >= kNoTickleCancelWindow)
-        {
-            cancel = true;
-        }
-    }
-
-    if (cancel)
-    {
-        // Force the next tickle to raise power state
-               fActivityTicklePowerState = kInvalidTicklePowerState;
-        fDeviceWasActive = false;
-    }
-
-       IOLockUnlock(fActivityLock);
-
-    if (cancel)
-    {
-        // cancel idle timer
-        if (fIdleTimer && thread_call_cancel(fIdleTimer))
-            release();
-
-        updatePowerClient(gIOPMPowerClientDevice, 0);
-        computeDesiredState();
-
-        fIdleTimerStopped = true;
-    }
-
-done:
-    OUR_PMLog( kPMLogStartParentChange, fHeadNotePowerState, fCurrentPowerState );
-    PM_LOG("%s::%s cancel=%d\n", fName, __FUNCTION__, cancel);
-}
-
 // MARK: -
 // MARK: Ack and Settle timers
 
@@ -4895,6 +5038,12 @@ settle_timer_expired( thread_call_param_t arg0, thread_call_param_t arg1 )
 
 void IOService::startSettleTimer( void )
 {
+#if NOT_USEFUL
+    // This function is broken and serves no useful purpose since it never
+    // updates fSettleTimeUS to a non-zero value to stall the state machine,
+    // yet it starts a delay timer. It appears no driver relies on a delay
+    // from settleUpTime and settleDownTime in the power state table.
+
     AbsoluteTime        deadline;
     IOPMPowerStateIndex i;
     uint32_t            settleTime = 0;
@@ -4931,6 +5080,7 @@ void IOService::startSettleTimer( void )
         pending = thread_call_enter_delayed(fSettleTimer, deadline);
         if (pending) release();
     }
+#endif
 }
 
 //*********************************************************************************
@@ -6337,6 +6487,12 @@ unsigned long IOService::initialPowerStateForDomainState ( IOPMPowerFlags domain
 {
     int i;
 
+    if (fResetPowerStateOnWake && (domainState & kIOPMRootDomainState))
+    {
+        // Return lowest power state for any root power domain changes
+        return 0;
+    }
+
     if (fNumberOfPowerStates == 0 )
     {
         return 0;
@@ -6606,26 +6762,10 @@ bool IOService::retirePMRequest( IOPMRequest * request, IOPMWorkQueue * queue )
        // Catch requests created by idleTimerExpired().
 
        if ((request->getType() == kIOPMRequestTypeActivityTickle) &&
-           (request->fArg1     == (void *) false))
+           (((uintptr_t) request->fArg1) & kTickleTypePowerDrop)  &&
+        fIdleTimerPeriod)
        {
-               // Idle timer expiration - power drop request completed.
-               // Restart the idle timer if deviceDesire can go lower, otherwise set
-               // a flag so we know to restart idle timer when fDeviceDesire > 0.
-
-               if (fDeviceDesire > 0)
-               {
-            fActivityTickleCount = 0;
-                       clock_get_uptime(&fIdleTimerStartTime);
-                       start_PM_idle_timer();
-               }
-        else if (fHasAdvisoryDesire)
-        {
-                       start_PM_idle_timer();
-        }
-               else
-        {
-                       fIdleTimerStopped = true;
-        }
+        restartIdleTimer();
     }
 
     // If the request is linked, then Work queue has already incremented its
@@ -6946,9 +7086,14 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue )
                 fIsPreChange = false;
 
                 if (fHeadNoteChangeFlags & kIOPMParentInitiated)
+                {
                     fMachineState = kIOPM_SyncFinish;
+                }
                 else
+                {
+                    assert(IS_ROOT_DOMAIN);
                     fMachineState = kIOPM_SyncTellCapabilityDidChange;
+                }
 
                 fDriverCallReason = kDriverCallInformPostChange;
                 notifyChildren();
@@ -7068,13 +7213,8 @@ void IOService::executePMRequest( IOPMRequest * request )
         case kIOPMRequestTypeSetIdleTimerPeriod:
             {
                 fIdleTimerPeriod = (uintptr_t) request->fArg0;
-
                 if ((false == fLockedFlags.PMStop) && (fIdleTimerPeriod > 0))
-                {
-                    fActivityTickleCount = 0;
-                    clock_get_uptime(&fIdleTimerStartTime);
-                    start_PM_idle_timer();
-                }
+                    restartIdleTimer();
             }
             break;
 
@@ -7425,10 +7565,12 @@ void IOPMRequest::reset( void )
 
        fType = kIOPMRequestTypeInvalid;
 
+#if NOT_READY
        if (fCompletionAction)
        {
         fCompletionAction(fCompletionTarget, fCompletionParam, fCompletionStatus);
     }
+#endif
 
        if (fTarget)
        {
@@ -7448,7 +7590,7 @@ bool IOPMRequest::attachNextRequest( IOPMRequest * next )
         fRequestNext = next;
         fRequestNext->fWorkWaitCount++;
 #if LOG_REQUEST_ATTACH
-        kprintf("Attached next: %p [0x%x] -> %p [0x%x, %u] %s\n",
+        PM_LOG("Attached next: %p [0x%x] -> %p [0x%x, %u] %s\n",
             this, (uint32_t) fType, fRequestNext,
             (uint32_t) fRequestNext->fType,
             (uint32_t) fRequestNext->fWorkWaitCount,
@@ -7469,7 +7611,7 @@ bool IOPMRequest::detachNextRequest( void )
         if (fRequestNext->fWorkWaitCount)
             fRequestNext->fWorkWaitCount--;
 #if LOG_REQUEST_ATTACH
-        kprintf("Detached next: %p [0x%x] -> %p [0x%x, %u] %s\n",
+        PM_LOG("Detached next: %p [0x%x] -> %p [0x%x, %u] %s\n",
             this, (uint32_t) fType, fRequestNext,
             (uint32_t) fRequestNext->fType,
             (uint32_t) fRequestNext->fWorkWaitCount,
@@ -7492,7 +7634,7 @@ bool IOPMRequest::attachRootRequest( IOPMRequest * root )
         fRequestRoot = root;
         fRequestRoot->fFreeWaitCount++;
 #if LOG_REQUEST_ATTACH
-        kprintf("Attached root: %p [0x%x] -> %p [0x%x, %u] %s\n",
+        PM_LOG("Attached root: %p [0x%x] -> %p [0x%x, %u] %s\n",
             this, (uint32_t) fType, fRequestRoot,
             (uint32_t) fRequestRoot->fType,
             (uint32_t) fRequestRoot->fFreeWaitCount,
@@ -7513,7 +7655,7 @@ bool IOPMRequest::detachRootRequest( void )
         if (fRequestRoot->fFreeWaitCount)
             fRequestRoot->fFreeWaitCount--;
 #if LOG_REQUEST_ATTACH
-        kprintf("Detached root: %p [0x%x] -> %p [0x%x, %u] %s\n",
+        PM_LOG("Detached root: %p [0x%x] -> %p [0x%x, %u] %s\n",
             this, (uint32_t) fType, fRequestRoot,
             (uint32_t) fRequestRoot->fType,
             (uint32_t) fRequestRoot->fFreeWaitCount,
index 47f99ea4501b83e98ccf953d65165afda5e40858..8651e6af887a76aaa452e8665fa632a18c399211 100644 (file)
@@ -229,20 +229,22 @@ private:
     // PM state lock.
     IOLock *                PMLock;
 
-    unsigned int            InitialPowerChange:1;
-    unsigned int            InitialSetPowerState:1;
-    unsigned int            DeviceOverrideEnabled:1;
-    unsigned int            DoNotPowerDown:1;
-    unsigned int            ParentsKnowState:1;
-    unsigned int            StrictTreeOrder:1;
-    unsigned int            IdleTimerStopped:1;
-    unsigned int            AdjustPowerScheduled:1;
-    unsigned int            IsPreChange:1;
-    unsigned int            DriverCallBusy:1;
-    unsigned int            PCDFunctionOverride:1;
-    unsigned int            IdleTimerIgnored:1;
-    unsigned int            HasAdvisoryDesire:1;
-    unsigned int            AdvisoryTickleUsed:1;
+    unsigned int            InitialPowerChange          :1;
+    unsigned int            InitialSetPowerState        :1;
+    unsigned int            DeviceOverrideEnabled       :1;
+    unsigned int            DoNotPowerDown              :1;
+    unsigned int            ParentsKnowState            :1;
+    unsigned int            StrictTreeOrder             :1;
+    unsigned int            IdleTimerStopped            :1;
+    unsigned int            AdjustPowerScheduled        :1;
+    
+    unsigned int            IsPreChange                 :1;
+    unsigned int            DriverCallBusy              :1;
+    unsigned int            PCDFunctionOverride         :1;
+    unsigned int            IdleTimerIgnored            :1;
+    unsigned int            HasAdvisoryDesire           :1;
+    unsigned int            AdvisoryTickleUsed          :1;
+    unsigned int            ResetPowerStateOnWake       :1;
 
     // Time of last device activity.
     AbsoluteTime            DeviceActiveTimestamp;
@@ -384,6 +386,7 @@ private:
 #define fIdleTimerIgnored           pwrMgt->IdleTimerIgnored
 #define fHasAdvisoryDesire          pwrMgt->HasAdvisoryDesire
 #define fAdvisoryTickleUsed         pwrMgt->AdvisoryTickleUsed
+#define fResetPowerStateOnWake      pwrMgt->ResetPowerStateOnWake
 #define fDeviceActiveTimestamp      pwrMgt->DeviceActiveTimestamp
 #define fActivityLock               pwrMgt->ActivityLock
 #define fIdleTimerPeriod            pwrMgt->IdleTimerPeriod
@@ -464,6 +467,17 @@ the ack timer is ticking every tenth of a second.
 #define kIOPMSyncTellPowerDown      0x0400  // send the ask/will power off messages
 #define kIOPMSyncCancelPowerDown    0x0800  // sleep cancel for maintenance wake
 #define kIOPMInitialPowerChange     0x1000  // set for initial power change
+#define kIOPMRootChangeUp           0x2000  // Root power domain change up
+#define kIOPMRootChangeDown         0x4000  // Root power domain change down
+
+#define kIOPMRootBroadcastFlags     (kIOPMSynchronize  | \
+                                     kIOPMRootChangeUp | kIOPMRootChangeDown)
+
+// Activity tickle request flags
+#define kTickleTypePowerDrop        0x01
+#define kTickleTypePowerRise        0x02
+#define kTickleTypeActivity         0x04
+#define kTickleTypeAdvisory         0x08
 
 enum {
     kDriverCallInformPreChange,
index 0f5dcbc2b024b06664f345f150c930228e8eb633..6a12bb11cee59600f00a8d4c00f1c7b762829254 100644 (file)
--- a/kgmacros
+++ b/kgmacros
@@ -807,7 +807,7 @@ define showactint
                set $kgm_actint_framecount = 0
                while ($mysp != 0) && (($mysp & $stkmask) == 0) \
                      && ($mysp != $prevsp) \
-                     && ((((unsigned long) $mysp ^ (unsigned long) $prevsp) < 0x2000) \
+                     && ((((unsigned long) $mysp - (unsigned long) $prevsp) < 0x4000) \
                      || (((unsigned long)$mysp < ((unsigned long) ($kgm_thread->kernel_stack+kernel_stack_size))) \
                      && ((unsigned long)$mysp > (unsigned long) ($kgm_thread->kernel_stack)))) \
                      && ($kgm_actint_framecount < 128)
index 8bd5fa52202a5f3d95536af1e601e36c142547d6..fafd4547cad57307733461274f957cbee7be4af5 100644 (file)
@@ -813,8 +813,10 @@ OSMetaClass::removeInstance(const OSObject * instance, bool super) const
            if (superClassLink) {
                superClassLink->removeInstance(reserved->instances, true);
            }
+           IOLockLock(sAllClassesLock);
            reserved->instances->release();
            reserved->instances = 0;
+           IOLockUnlock(sAllClassesLock);
        }
     }
 
index f03e44420ea375f9149f5fb022a09c7151271304..de177986ac276d069a6800cc7113c69152005d16 100644 (file)
@@ -33,8 +33,8 @@
        .align 2, 0x90
        .globl __get_cpu_capabilities
 __get_cpu_capabilities:
-       movq    $(_COMM_PAGE_CPU_CAPABILITIES), %rax
-       movl    (%rax), %eax
+       movq    $(_COMM_PAGE_CPU_CAPABILITIES64), %rax
+       movq    (%rax), %rax
        ret
 
 #elif defined(__i386__)
@@ -43,7 +43,8 @@ __get_cpu_capabilities:
        .align 2, 0x90
        .globl __get_cpu_capabilities
 __get_cpu_capabilities:
-       movl    _COMM_PAGE_CPU_CAPABILITIES, %eax
+       movl    _COMM_PAGE_CPU_CAPABILITIES64, %eax
+       movl    _COMM_PAGE_CPU_CAPABILITIES64+4, %edx
        ret
 
 #else
index 02f3cadf8d1b464bcd75ef262ea4be5f280d081a..50345face11373dedc74b29d42fc51e923f34796 100644 (file)
@@ -134,3 +134,4 @@ osfmk/kperf/x86_64/kperf_mp.c   optional kperf
 
 osfmk/i386/startup64.c         standard
 osfmk/x86_64/idt64.s           standard
+
index 6315fc5e9be83f3aa55be6e90aaa3e0082ef62bf..8d01210e9c26ce48008c7e739884ead4ad025893 100644 (file)
@@ -217,10 +217,6 @@ machine_startup(void)
 
        machine_conf();
 
-#if NOTYET
-       ml_thrm_init();         /* Start thermal monitoring on this processor */
-#endif
-
        /*
         * Start the system.
         */
index 0921ad5759efc3632ef730b8b10f414c37fe7733..3023aeef9cd98eab84a68bbd90658f59924b82cc 100644 (file)
 #include <kern/kalloc.h>
 #include <sys/kdebug.h>
 
+#include <i386/machine_cpu.h>
+#include <i386/misc_protos.h>
+#include <i386/cpuid.h>
+
+#define PERMIT_PERMCHECK (0)
+
 diagWork        dgWork;
 uint64_t        lastRuptClear = 0ULL;
 
@@ -80,17 +86,33 @@ void cpu_powerstats(void *);
 typedef struct {
        uint64_t caperf;
        uint64_t cmperf;
-       uint64_t ccres[3];
-       uint64_t crtimes[4];
-       uint64_t citimes[4];
+       uint64_t ccres[6];
+       uint64_t crtimes[CPU_RTIME_BINS];
+       uint64_t citimes[CPU_ITIME_BINS];
        uint64_t crtime_total;
        uint64_t citime_total;
+       uint64_t cpu_idle_exits;
+       uint64_t cpu_insns;
+       uint64_t cpu_ucc;
+       uint64_t cpu_urc;
 } core_energy_stat_t;
 
 typedef struct {
-       uint64_t pkg_cres[2][4];
+       uint64_t pkg_cres[2][7];
        uint64_t pkg_power_unit;
        uint64_t pkg_energy;
+       uint64_t pp0_energy;
+       uint64_t pp1_energy;
+       uint64_t ddr_energy;
+       uint64_t llc_flushed_cycles;
+       uint64_t ring_ratio_instantaneous;
+       uint64_t IA_frequency_clipping_cause;
+       uint64_t GT_frequency_clipping_cause;
+       uint64_t pkg_idle_exits;
+       uint64_t pkg_rtimes[CPU_RTIME_BINS];
+       uint64_t pkg_itimes[CPU_ITIME_BINS];
+       uint64_t mbus_delay_time;
+       uint64_t mint_delay_time;
        uint32_t ncpus;
        core_energy_stat_t cest[];
 } pkg_energy_statistics_t;
@@ -99,9 +121,9 @@ typedef struct {
 int 
 diagCall64(x86_saved_state_t * state)
 {
-       uint64_t        curpos, i, j;
-       uint64_t        selector, data;
-       uint64_t        currNap, durNap;
+       uint64_t        curpos, i, j;
+       uint64_t        selector, data;
+       uint64_t        currNap, durNap;
        x86_saved_state64_t     *regs;
        boolean_t       diagflag;
        uint32_t        rval = 0;
@@ -175,12 +197,54 @@ diagCall64(x86_saved_state_t * state)
                pkes.pkg_cres[0][2] = ((uint64_t)c6h << 32) | c6l;
                pkes.pkg_cres[0][3] = ((uint64_t)c7h << 32) | c7l;
 
+               uint32_t cpumodel = cpuid_info()->cpuid_model;
+               boolean_t c8avail;
+               switch (cpumodel) {
+               case CPUID_MODEL_HASWELL_ULT:
+                       c8avail = TRUE;
+                       break;
+               default:
+                       c8avail = FALSE;
+                       break;
+               }
+               uint64_t c8r = ~0ULL, c9r = ~0ULL, c10r = ~0ULL;
+
+               if (c8avail) {
+                       rdmsr64_carefully(MSR_IA32_PKG_C8_RESIDENCY, &c8r);
+                       rdmsr64_carefully(MSR_IA32_PKG_C9_RESIDENCY, &c9r);
+                       rdmsr64_carefully(MSR_IA32_PKG_C10_RESIDENCY, &c10r);
+               }
+
+               pkes.pkg_cres[0][4] = c8r;
+               pkes.pkg_cres[0][5] = c9r;
+               pkes.pkg_cres[0][6] = c10r;
+
+               pkes.ddr_energy = ~0ULL;
+               rdmsr64_carefully(MSR_IA32_DDR_ENERGY_STATUS, &pkes.ddr_energy);
+               pkes.llc_flushed_cycles = ~0ULL;
+               rdmsr64_carefully(MSR_IA32_LLC_FLUSHED_RESIDENCY_TIMER, &pkes.llc_flushed_cycles);
+
+               pkes.ring_ratio_instantaneous = ~0ULL;
+               rdmsr64_carefully(MSR_IA32_RING_PERF_STATUS, &pkes.ring_ratio_instantaneous);
+
+               pkes.IA_frequency_clipping_cause = ~0ULL;
+               rdmsr64_carefully(MSR_IA32_IA_PERF_LIMIT_REASONS, &pkes.IA_frequency_clipping_cause);
+
+               pkes.GT_frequency_clipping_cause = ~0ULL;
+               rdmsr64_carefully(MSR_IA32_GT_PERF_LIMIT_REASONS, &pkes.GT_frequency_clipping_cause);
+
                rdmsr_carefully(MSR_IA32_PKG_POWER_SKU_UNIT, &pkg_unit_l, &pkg_unit_h);
                rdmsr_carefully(MSR_IA32_PKG_ENERGY_STATUS, &pkg_ecl, &pkg_ech);
-
                pkes.pkg_power_unit = ((uint64_t)pkg_unit_h << 32) | pkg_unit_l;
                pkes.pkg_energy = ((uint64_t)pkg_ech << 32) | pkg_ecl;
 
+               rdmsr_carefully(MSR_IA32_PP0_ENERGY_STATUS, &pkg_ecl, &pkg_ech);
+               pkes.pp0_energy = ((uint64_t)pkg_ech << 32) | pkg_ecl;
+
+               rdmsr_carefully(MSR_IA32_PP1_ENERGY_STATUS, &pkg_ecl, &pkg_ech);
+               pkes.pp1_energy = ((uint64_t)pkg_ech << 32) | pkg_ecl;
+
+               pkes.pkg_idle_exits = current_cpu_datap()->lcpu.package->package_idle_exits;
                pkes.ncpus = real_ncpus;
 
                (void) ml_set_interrupts_enabled(TRUE);
@@ -191,6 +255,8 @@ diagCall64(x86_saved_state_t * state)
                mp_cpus_call(CPUMASK_ALL, ASYNC, cpu_powerstats, NULL);
                
                for (i = 0; i < real_ncpus; i++) {
+                       (void) ml_set_interrupts_enabled(FALSE);
+
                        cest.caperf = cpu_data_ptr[i]->cpu_aperf;
                        cest.cmperf = cpu_data_ptr[i]->cpu_mperf;
                        cest.ccres[0] = cpu_data_ptr[i]->cpu_c3res;
@@ -199,8 +265,14 @@ diagCall64(x86_saved_state_t * state)
 
                        bcopy(&cpu_data_ptr[i]->cpu_rtimes[0], &cest.crtimes[0], sizeof(cest.crtimes));
                        bcopy(&cpu_data_ptr[i]->cpu_itimes[0], &cest.citimes[0], sizeof(cest.citimes));
+
                        cest.citime_total = cpu_data_ptr[i]->cpu_itime_total;
                        cest.crtime_total = cpu_data_ptr[i]->cpu_rtime_total;
+                       cest.cpu_idle_exits = cpu_data_ptr[i]->cpu_idle_exits;
+                       cest.cpu_insns = cpu_data_ptr[i]->cpu_cur_insns;
+                       cest.cpu_ucc = cpu_data_ptr[i]->cpu_cur_ucc;
+                       cest.cpu_urc = cpu_data_ptr[i]->cpu_cur_urc;
+                       (void) ml_set_interrupts_enabled(TRUE);
 
                        copyout(&cest, curpos, sizeof(cest));
                        curpos += sizeof(cest);
@@ -208,6 +280,13 @@ diagCall64(x86_saved_state_t * state)
                rval = 1;
        }
                break;
+       case dgEnaPMC:
+       {
+               boolean_t enable = TRUE;
+               mp_cpus_call(CPUMASK_ALL, ASYNC, cpu_pmc_control, &enable);
+               rval = 1;
+       }
+       break;
 
 #if    DEBUG
        case dgGzallocTest:
@@ -220,10 +299,10 @@ diagCall64(x86_saved_state_t * state)
                kfree(ptr, 1024);
                *ptr = 0x42;
        }
-               break;
+       break;
 #endif
 
-#if    defined(__x86_64__)             
+#if PERMIT_PERMCHECK   
        case    dgPermCheck:
        {
                (void) ml_set_interrupts_enabled(TRUE);
@@ -233,7 +312,7 @@ diagCall64(x86_saved_state_t * state)
                rval = pmap_permissions_verify(kernel_pmap, kernel_map, 0, ~0ULL);
        }
                break;
-#endif /* __x86_64__*/
+#endif /* PERMIT_PERMCHECK */
 
        default:                /* Handle invalid ones */
                rval = 0;       /* Return an exception */
@@ -246,7 +325,7 @@ diagCall64(x86_saved_state_t * state)
 
 void cpu_powerstats(__unused void *arg) {
        cpu_data_t *cdp = current_cpu_datap();
-       int cnum = cdp->cpu_number;
+       __unused int cnum = cdp->cpu_number;
        uint32_t cl = 0, ch = 0, mpl = 0, mph = 0, apl = 0, aph = 0;
 
        rdmsr_carefully(MSR_IA32_MPERF, &mpl, &mph);
@@ -255,8 +334,9 @@ void cpu_powerstats(__unused void *arg) {
        cdp->cpu_mperf = ((uint64_t)mph << 32) | mpl;
        cdp->cpu_aperf = ((uint64_t)aph << 32) | apl;
 
-       if (cnum & 1)
-               return;
+       uint64_t ctime = mach_absolute_time();
+       cdp->cpu_rtime_total += ctime - cdp->cpu_ixtime;
+       cdp->cpu_ixtime = ctime;
 
        rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
        cdp->cpu_c3res = ((uint64_t)ch << 32) | cl;
@@ -266,4 +346,28 @@ void cpu_powerstats(__unused void *arg) {
 
        rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
        cdp->cpu_c7res = ((uint64_t)ch << 32) | cl;
+       
+       uint64_t insns = read_pmc(FIXED_PMC0);
+       uint64_t ucc = read_pmc(FIXED_PMC1);
+       uint64_t urc = read_pmc(FIXED_PMC2);
+       cdp->cpu_cur_insns = insns;
+       cdp->cpu_cur_ucc = ucc;
+       cdp->cpu_cur_urc = urc;
+}
+
+void cpu_pmc_control(void *enablep) {
+       boolean_t enable = *(boolean_t *)enablep;
+       cpu_data_t      *cdp = current_cpu_datap();
+
+       if (enable) {
+               wrmsr64(0x38F, 0x70000000FULL);
+               wrmsr64(0x38D, 0x333);
+               set_cr4(get_cr4() | CR4_PCE);
+
+       } else {
+               wrmsr64(0x38F, 0);
+               wrmsr64(0x38D, 0);
+               set_cr4((get_cr4() & ~CR4_PCE));
+       }
+       cdp->cpu_fixed_pmcs_enabled = enable;
 }
index 2ce145e27d4c1417c5baa055ed81032f0b2840b2..4e37eea2bb6c451cee45050a5563fb2d105e5b11 100644 (file)
@@ -70,7 +70,7 @@ int diagCall64(x86_saved_state_t *regs);
 #define dgBind 18
 #define dgAcntg 20
 #define dgKlra 21
-#define dgKfree 22
+#define dgEnaPMC 22
 #define        dgWar 23
 #define dgNapStat 24
 #define dgRuptStat 25
@@ -100,7 +100,17 @@ typedef struct diagWork {                  /* Diagnostic work area */
 
 extern diagWork dgWork;
 
-
+#define FIXED_PMC (1 << 30)
+#define FIXED_PMC0 (FIXED_PMC)
+#define FIXED_PMC1 (FIXED_PMC | 1)
+#define FIXED_PMC2 (FIXED_PMC | 2)
+static inline uint64_t read_pmc(uint32_t counter)
+{
+       uint32_t lo = 0, hi = 0;
+       __asm__ volatile("rdpmc" : "=a" (lo), "=d" (hi) : "c" (counter));
+       return ((((uint64_t)hi) << 32) | ((uint64_t)lo));
+}
 #endif /* _DIAGNOSTICS_H_ */
 
 #endif /* KERNEL_PRIVATE */
index 4106c92831deea8b2f6b514224094ca3fe7320c8..69a45836a16ded0be92bd16583a080da88ff5485 100644 (file)
@@ -53,6 +53,9 @@
 #include <i386/tsc.h>
 
 #include <kern/cpu_data.h>
+#include <kern/etimer.h>
+#include <kern/machine.h>
+#include <kern/timer_queue.h>
 #include <console/serial_protos.h>
 #include <machine/pal_routines.h>
 #include <vm/vm_page.h>
 #include <IOKit/IOHibernatePrivate.h>
 #endif
 #include <IOKit/IOPlatformExpert.h>
-
 #include <sys/kdebug.h>
 
 #if CONFIG_SLEEP
 extern void    acpi_sleep_cpu(acpi_sleep_callback, void * refcon);
-extern void acpi_wake_prot(void);
+extern void    acpi_wake_prot(void);
 #endif
 extern kern_return_t IOCPURunPlatformQuiesceActions(void);
 extern kern_return_t IOCPURunPlatformActiveActions(void);
@@ -93,6 +95,9 @@ typedef struct acpi_hibernate_callback_data acpi_hibernate_callback_data_t;
 
 unsigned int           save_kdebug_enable = 0;
 static uint64_t                acpi_sleep_abstime;
+static uint64_t                acpi_idle_abstime;
+static uint64_t                acpi_wake_abstime;
+boolean_t              deep_idle_rebase = TRUE;
 
 #if CONFIG_SLEEP
 static void
@@ -153,6 +158,7 @@ acpi_hibernate(void *refcon)
 
 extern void                    slave_pstart(void);
 
+extern unsigned int            wake_nkdbufs;
 
 void
 acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
@@ -302,11 +308,22 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
        if (lapic_probe())
                lapic_configure();
 
+       acpi_wake_abstime = mach_absolute_time();
+
        /* let the realtime clock reset */
        rtc_sleep_wakeup(acpi_sleep_abstime);
 
        kdebug_enable = save_kdebug_enable;
 
+       if (kdebug_enable == 0) {
+               if (wake_nkdbufs)
+                       start_kern_tracing(wake_nkdbufs, TRUE);
+       }
+
+       /* Reconfigure FP/SIMD unit */
+       init_fpu();
+       clear_ts();
+
        IOCPURunPlatformActiveActions();
 
        if (did_hibernate) {
@@ -334,8 +351,7 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
        /* Restart timer interrupts */
        rtc_timer_start();
 
-       /* Reconfigure FP/SIMD unit */
-       init_fpu();
+
 
 #if HIBERNATION
 #ifdef __i386__
@@ -358,6 +374,99 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
 #endif
 }
 
+/*
+ * acpi_idle_kernel is called by the ACPI Platform kext to request the kernel
+ * to idle the boot processor in the deepest C-state for S0 sleep. All slave
+ * processors are expected already to have been offlined in the deepest C-state.
+ *
+ * The contract with ACPI is that although the kernel is called with interrupts
+ * disabled, interrupts may need to be re-enabled to dismiss any pending timer
+ * interrupt. However, the callback function will be called once this has
+ * occurred and interrupts are guaranteed to be disabled at that time,
+ * and to remain disabled during C-state entry, exit (wake) and return
+ * from acpi_idle_kernel.
+ */
+void
+acpi_idle_kernel(acpi_sleep_callback func, void *refcon)
+{
+       boolean_t       istate = ml_get_interrupts_enabled();
+       
+       kprintf("acpi_idle_kernel, cpu=%d, interrupts %s\n",
+               cpu_number(), istate ? "enabled" : "disabled");
+
+       assert(cpu_number() == master_cpu);
+
+       /*
+        * Effectively set the boot cpu offline.
+        * This will stop further deadlines being set.
+        */
+       cpu_datap(master_cpu)->cpu_running = FALSE;
+
+       /* Cancel any pending deadline */
+       setPop(0);
+       while (lapic_is_interrupting(LAPIC_TIMER_VECTOR)) {
+               (void) ml_set_interrupts_enabled(TRUE);
+               setPop(0);
+               ml_set_interrupts_enabled(FALSE);
+       }
+
+       /*
+        * Call back to caller to indicate that interrupts will remain
+        * disabled while we deep idle, wake and return.
+        */ 
+       func(refcon);
+
+       acpi_idle_abstime = mach_absolute_time();
+
+       KERNEL_DEBUG_CONSTANT(
+               MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEEP_IDLE) | DBG_FUNC_START,
+               acpi_idle_abstime, deep_idle_rebase, 0, 0, 0);
+
+       /*
+        * Disable tracing during S0-sleep
+        * unless overridden by sysctl -w tsc.deep_idle_rebase=0
+        */
+       if (deep_idle_rebase) {
+               save_kdebug_enable = kdebug_enable;
+               kdebug_enable = 0;
+       }
+
+       /*
+        * Call into power-management to enter the lowest C-state.
+        * Note when called on the boot processor this routine will
+        * return directly when awoken.
+        */
+       pmCPUHalt(PM_HALT_SLEEP);
+
+       /*
+        * Get wakeup time relative to the TSC which has progressed.
+        * Then rebase nanotime to reflect time not progressing over sleep
+        * - unless overriden so that tracing can occur during deep_idle.
+        */ 
+       acpi_wake_abstime = mach_absolute_time();
+       if (deep_idle_rebase) {
+               rtc_sleep_wakeup(acpi_idle_abstime);
+               kdebug_enable = save_kdebug_enable;
+       }
+
+       cpu_datap(master_cpu)->cpu_running = TRUE;
+
+       KERNEL_DEBUG_CONSTANT(
+               MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEEP_IDLE) | DBG_FUNC_END,
+               acpi_wake_abstime, acpi_wake_abstime - acpi_idle_abstime, 0, 0, 0);
+       /* Like S3 sleep, turn on tracing if trace_wake boot-arg is present */ 
+       if (kdebug_enable == 0) {
+               if (wake_nkdbufs)
+                       start_kern_tracing(wake_nkdbufs, TRUE);
+       }
+
+       IOCPURunPlatformActiveActions();
+
+       /* Restart timer interrupts */
+       rtc_timer_start();
+}
+
 extern char real_mode_bootstrap_end[];
 extern char real_mode_bootstrap_base[];
 
index a64e8127ce749b2cd9ea56f08e6c9f37d03dce38..ed8fdd08e5ec098a7f16be6c575fcbda0fb2aaaf 100644 (file)
@@ -46,6 +46,7 @@
 typedef void (*acpi_sleep_callback)(void * refcon);
 extern vm_offset_t acpi_install_wake_handler(void);
 extern void       acpi_sleep_kernel(acpi_sleep_callback func, void * refcon);
+extern void       acpi_idle_kernel(acpi_sleep_callback func, void * refcon);
 void install_real_mode_bootstrap(void *prot_entry);
 #endif /* ASSEMBLER */
 
index 7076ff53376932e7731cb771ae1c5e79ccbfdc6e..30ea3e10ff57ad206f3751ff154eb4c36d46295c 100644 (file)
@@ -80,17 +80,14 @@ extern vm_map_t     commpage_text64_map;    // the shared submap, set up in vm init
 
 char   *commPagePtr32 = NULL;          // virtual addr in kernel map of 32-bit commpage
 char   *commPagePtr64 = NULL;          // ...and of 64-bit commpage
-char   *commPageTextPtr32 = NULL;              // virtual addr in kernel map of 32-bit commpage
-char   *commPageTextPtr64 = NULL;              // ...and of 64-bit commpage
-uint32_t     _cpu_capabilities = 0;          // define the capability vector
+char   *commPageTextPtr32 = NULL;      // virtual addr in kernel map of 32-bit commpage
+char   *commPageTextPtr64 = NULL;      // ...and of 64-bit commpage
 
-int    noVMX = 0;              /* if true, do not set kHasAltivec in ppc _cpu_capabilities */
+uint64_t     _cpu_capabilities = 0;     // define the capability vector
 
 typedef uint32_t commpage_address_t;
 
-static commpage_address_t      next;                   // next available address in comm page
-static commpage_address_t      cur_routine;            // comm page address of "current" routine
-static boolean_t               matched;                // true if we've found a match for "current" routine
+static commpage_address_t      next;   // next available address in comm page
 
 static char    *commPagePtr;           // virtual addr in kernel map of commpage we are working on
 static commpage_address_t      commPageBaseOffset; // subtract from 32-bit runtime address to get offset in virtual commpage in kernel map
@@ -205,7 +202,7 @@ commpage_cpus( void )
 static void
 commpage_init_cpu_capabilities( void )
 {
-       uint32_t bits;
+       uint64_t bits;
        int cpus;
        ml_cpu_info_t cpu_info;
 
@@ -254,30 +251,46 @@ commpage_init_cpu_capabilities( void )
        }
        cpus = commpage_cpus();                 // how many CPUs do we have
 
-       if (cpus == 1)
-               bits |= kUP;
-
        bits |= (cpus << kNumCPUsShift);
 
        bits |= kFastThreadLocalStorage;        // we use %gs for TLS
 
-       if (cpu_mode_is64bit())                 // k64Bit means processor is 64-bit capable
-               bits |= k64Bit;
-
-       if (tscFreq <= SLOW_TSC_THRESHOLD)      /* is TSC too slow for _commpage_nanotime?  */
-               bits |= kSlow;
-
-       bits |= (cpuid_features() & CPUID_FEATURE_AES) ? kHasAES : 0;
-
-       bits |= (cpuid_features() & CPUID_FEATURE_F16C) ? kHasF16C : 0;
-       bits |= (cpuid_features() & CPUID_FEATURE_RDRAND) ? kHasRDRAND : 0;
-       bits |= ((cpuid_leaf7_features() & CPUID_LEAF7_FEATURE_ENFSTRG) &&
-                (rdmsr64(MSR_IA32_MISC_ENABLE) & 1ULL )) ? kHasENFSTRG : 0;
-
+#define setif(_bits, _bit, _condition) \
+       if (_condition) _bits |= _bit
+
+       setif(bits, kUP,         cpus == 1);
+       setif(bits, k64Bit,      cpu_mode_is64bit());
+       setif(bits, kSlow,       tscFreq <= SLOW_TSC_THRESHOLD);
+
+       setif(bits, kHasAES,     cpuid_features() &
+                                       CPUID_FEATURE_AES);
+       setif(bits, kHasF16C,    cpuid_features() &
+                                       CPUID_FEATURE_F16C);
+       setif(bits, kHasRDRAND,  cpuid_features() &
+                                       CPUID_FEATURE_RDRAND);
+       setif(bits, kHasFMA,     cpuid_features() &
+                                       CPUID_FEATURE_FMA);
+
+       setif(bits, kHasBMI1,    cpuid_leaf7_features() &
+                                       CPUID_LEAF7_FEATURE_BMI1);
+       setif(bits, kHasBMI2,    cpuid_leaf7_features() &
+                                       CPUID_LEAF7_FEATURE_BMI2);
+       setif(bits, kHasRTM,     cpuid_leaf7_features() &
+                                       CPUID_LEAF7_FEATURE_RTM);
+       setif(bits, kHasHLE,     cpuid_leaf7_features() &
+                                       CPUID_LEAF7_FEATURE_HLE);
+       setif(bits, kHasAVX2_0,  cpuid_leaf7_features() &
+                                       CPUID_LEAF7_FEATURE_AVX2);
+       
+       uint64_t misc_enable = rdmsr64(MSR_IA32_MISC_ENABLE);
+       setif(bits, kHasENFSTRG, (misc_enable & 1ULL) &&
+                                (cpuid_leaf7_features() &
+                                       CPUID_LEAF7_FEATURE_ENFSTRG));
+       
        _cpu_capabilities = bits;               // set kernel version for use by drivers etc
 }
 
-int
+uint64_t
 _get_cpu_capabilities(void)
 {
        return _cpu_capabilities;
@@ -305,27 +318,9 @@ commpage_stuff(
  */
 static void
 commpage_stuff_routine(
-    commpage_descriptor        *rd     )
+    commpage_descriptor *rd     )
 {
-    uint32_t           must,cant;
-    
-    if (rd->commpage_address != cur_routine) {
-        if ((cur_routine!=0) && (matched==0))
-            panic("commpage no match for last, next address %08x", rd->commpage_address);
-        cur_routine = rd->commpage_address;
-        matched = 0;
-    }
-    
-    must = _cpu_capabilities & rd->musthave;
-    cant = _cpu_capabilities & rd->canthave;
-    
-    if ((must == rd->musthave) && (cant == 0)) {
-        if (matched)
-            panic("commpage multiple matches for address %08x", rd->commpage_address);
-        matched = 1;
-        
-        commpage_stuff(rd->commpage_address,rd->code_address,rd->code_length);
-       }
+       commpage_stuff(rd->commpage_address,rd->code_address,rd->code_length);
 }
 
 /* Fill in the 32- or 64-bit commpage.  Called once for each.
@@ -341,15 +336,14 @@ commpage_populate_one(
        const char*     signature,      // "commpage 32-bit" or "commpage 64-bit"
        vm_prot_t       uperm)
 {
-       uint8_t c1;
-       short   c2;
-       int         c4;
-       uint64_t c8;
+       uint8_t         c1;
+       uint16_t        c2;
+       int             c4;
+       uint64_t        c8;
        uint32_t        cfamily;
        short   version = _COMM_PAGE_THIS_VERSION;
 
        next = 0;
-       cur_routine = 0;
        commPagePtr = (char *)commpage_allocate( submap, (vm_size_t) area_used, uperm );
        *kernAddressPtr = commPagePtr;                          // save address either in commPagePtr32 or 64
        commPageBaseOffset = base_offset;
@@ -358,10 +352,13 @@ commpage_populate_one(
 
        /* Stuff in the constants.  We move things into the comm page in strictly
        * ascending order, so we can check for overlap and panic if so.
+       * Note: the 32-bit cpu_capabilities vector is retained in addition to
+       * the expanded 64-bit vector.
        */
-       commpage_stuff(_COMM_PAGE_SIGNATURE,signature,(int)strlen(signature));
+       commpage_stuff(_COMM_PAGE_SIGNATURE,signature,(int)MIN(_COMM_PAGE_SIGNATURELEN, strlen(signature)));
+       commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES64,&_cpu_capabilities,sizeof(_cpu_capabilities));
        commpage_stuff(_COMM_PAGE_VERSION,&version,sizeof(short));
-       commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES,&_cpu_capabilities,sizeof(int));
+       commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES,&_cpu_capabilities,sizeof(uint32_t));
 
        c2 = 32;  // default
        if (_cpu_capabilities & kCache64)
@@ -369,7 +366,7 @@ commpage_populate_one(
        else if (_cpu_capabilities & kCache128)
                c2 = 128;
        commpage_stuff(_COMM_PAGE_CACHE_LINESIZE,&c2,2);
-       
+
        c4 = MP_SPIN_TRIES;
        commpage_stuff(_COMM_PAGE_SPIN_COUNT,&c4,4);
 
@@ -442,8 +439,7 @@ commpage_populate( void )
 void commpage_text_populate( void ){
        commpage_descriptor **rd;
        
-       next =0;
-       cur_routine=0;
+       next = 0;
        commPagePtr = (char *) commpage_allocate(commpage_text32_map, (vm_size_t) _COMM_PAGE_TEXT_AREA_USED, VM_PROT_READ | VM_PROT_EXECUTE);
        commPageTextPtr32 = commPagePtr;
        
@@ -457,8 +453,6 @@ void commpage_text_populate( void ){
        for (rd = commpage_32_routines; *rd != NULL; rd++) {
                commpage_stuff_routine(*rd);
        }
-       if (!matched)
-               panic(" commpage_text no match for last routine ");
 
 #ifndef __LP64__
        pmap_commpage32_init((vm_offset_t) commPageTextPtr32, _COMM_PAGE_TEXT_START, 
@@ -466,8 +460,7 @@ void commpage_text_populate( void ){
 #endif 
 
        if (_cpu_capabilities & k64Bit) {
-               next =0;
-               cur_routine=0;
+               next = 0;
                commPagePtr = (char *) commpage_allocate(commpage_text64_map, (vm_size_t) _COMM_PAGE_TEXT_AREA_USED, VM_PROT_READ | VM_PROT_EXECUTE);
                commPageTextPtr64 = commPagePtr;
 
@@ -486,17 +479,12 @@ void commpage_text_populate( void ){
 #endif 
        }
 
-       if (!matched)
-               panic(" commpage_text no match for last routine ");
-
        if (next > _COMM_PAGE_TEXT_END) 
                panic("commpage text overflow: next=0x%08x, commPagePtr=%p", next, commPagePtr); 
 
 }
 
-/* Update commpage nanotime information.  Note that we interleave
- * setting the 32- and 64-bit commpages, in order to keep nanotime more
- * nearly in sync between the two environments.
+/* Update commpage nanotime information.
  *
  * This routine must be serialized by some external means, ie a lock.
  */
@@ -520,7 +508,7 @@ commpage_set_nanotime(
                panic("nanotime trouble 1");    /* possibly not serialized */
        if ( ns_base < p32->nt_ns_base )
                panic("nanotime trouble 2");
-       if ((shift != 32) && ((_cpu_capabilities & kSlow)==0) )
+       if ((shift != 0) && ((_cpu_capabilities & kSlow)==0) )
                panic("nanotime trouble 3");
                
        next_gen = ++generation;
@@ -604,14 +592,14 @@ commpage_set_memory_pressure(
        cp = commPagePtr32;
        if ( cp ) {
                cp += (_COMM_PAGE_MEMORY_PRESSURE - _COMM_PAGE32_BASE_ADDRESS);
-               ip = (uint32_t*) cp;
+               ip = (uint32_t*) (void *) cp;
                *ip = (uint32_t) pressure;
        }
        
        cp = commPagePtr64;
        if ( cp ) {
                cp += (_COMM_PAGE_MEMORY_PRESSURE - _COMM_PAGE32_START_ADDRESS);
-               ip = (uint32_t*) cp;
+               ip = (uint32_t*) (void *) cp;
                *ip = (uint32_t) pressure;
        }
 
@@ -633,14 +621,14 @@ commpage_set_spin_count(
        cp = commPagePtr32;
        if ( cp ) {
                cp += (_COMM_PAGE_SPIN_COUNT - _COMM_PAGE32_BASE_ADDRESS);
-               ip = (uint32_t*) cp;
+               ip = (uint32_t*) (void *) cp;
                *ip = (uint32_t) count;
        }
        
        cp = commPagePtr64;
        if ( cp ) {
                cp += (_COMM_PAGE_SPIN_COUNT - _COMM_PAGE32_START_ADDRESS);
-               ip = (uint32_t*) cp;
+               ip = (uint32_t*) (void *) cp;
                *ip = (uint32_t) count;
        }
 
index c8369d78d7f543edfc426740f41d68cefda16842..030d294af0b6d0ff5e29d77cd535d42e2462b0d6 100644 (file)
@@ -81,7 +81,7 @@
 .align alignment, 0x90                                         ;\
 L ## label ## :
 
-#define        COMMPAGE_DESCRIPTOR(label,address,must,cant)    \
+#define        COMMPAGE_DESCRIPTOR(label,address)                      \
 L ## label ## _end:                                            ;\
 .set L ## label ## _size, L ## label ## _end - L ## label      ;\
 .const_data                                                    ;\
@@ -90,8 +90,6 @@ COMMPAGE_DESCRIPTOR_NAME(label) ## :                          ;\
     COMMPAGE_DESCRIPTOR_FIELD_POINTER  L ## label              ;\
     .long                              L ## label ## _size     ;\
     .long                              address                 ;\
-    .long                              must                    ;\
-    .long                              cant                    ;\
 .text
 
 
@@ -131,8 +129,6 @@ typedef     struct  commpage_descriptor     {
     void               *code_address;                          // address of code
     uint32_t           code_length;                            // length in bytes
     uint32_t           commpage_address;                       // put at this address (_COMM_PAGE_BCOPY etc)
-    uint32_t           musthave;                               // _cpu_capability bits we must have
-    uint32_t           canthave;                               // _cpu_capability bits we can't have
 } commpage_descriptor;
 
 
index e994ae94569d66f993c4197557a72a5e6bfdf852..81c041ae406480509ab808d5d8a99df86144b87b 100644 (file)
@@ -83,7 +83,7 @@ COMMPAGE_FUNCTION_START(preempt, 32, 4)
        movl    $(-58),%eax     /* 58 = pfz_exit */
        xorl    %ebx,%ebx       // clear "preemption pending" flag
        sysenter
-COMMPAGE_DESCRIPTOR(preempt,_COMM_PAGE_PREEMPT,0,0)
+COMMPAGE_DESCRIPTOR(preempt,_COMM_PAGE_PREEMPT)
 
 
 /* Subroutine to back off if we cannot get the spinlock.  Called
@@ -107,7 +107,7 @@ COMMPAGE_FUNCTION_START(backoff, 32, 4)
        cmpl    $0,8(%edi)      // sniff the lockword
        jnz     1b              // loop if still taken
        ret                     // lockword is free, so reenter PFZ
-COMMPAGE_DESCRIPTOR(backoff,_COMM_PAGE_BACKOFF,0,0)
+COMMPAGE_DESCRIPTOR(backoff,_COMM_PAGE_BACKOFF)
 
 
 /* Preemption-free-zone routine to FIFO Enqueue:
@@ -152,7 +152,7 @@ COMMPAGE_FUNCTION_START(pfz_enqueue, 32, 4)
        movl        %esi,4(%edi)    // new element becomes last in q
        movl        $0,8(%edi)      // unlock spinlock
        ret
-COMMPAGE_DESCRIPTOR(pfz_enqueue,_COMM_PAGE_PFZ_ENQUEUE,0,0)
+COMMPAGE_DESCRIPTOR(pfz_enqueue,_COMM_PAGE_PFZ_ENQUEUE)
 
 
 /* Preemption-free-zone routine to FIFO Dequeue:
@@ -198,7 +198,7 @@ COMMPAGE_FUNCTION_START(pfz_dequeue, 32, 4)
 4:
        movl        $0,8(%edi)      // unlock spinlock
        ret
-COMMPAGE_DESCRIPTOR(pfz_dequeue,_COMM_PAGE_PFZ_DEQUEUE,0,0)
+COMMPAGE_DESCRIPTOR(pfz_dequeue,_COMM_PAGE_PFZ_DEQUEUE)
 
 
 
@@ -233,7 +233,7 @@ COMMPAGE_FUNCTION_START(preempt_64, 64, 4)
        popq    %rcx
        popq    %rax
        ret
-COMMPAGE_DESCRIPTOR(preempt_64,_COMM_PAGE_PREEMPT,0,0)
+COMMPAGE_DESCRIPTOR(preempt_64,_COMM_PAGE_PREEMPT)
 
 
 /* Subroutine to back off if we cannot get the spinlock.  Called
@@ -252,7 +252,7 @@ COMMPAGE_FUNCTION_START(backoff_64, 64, 4)
        cmpl    $0,16(%rdi)     // sniff the lockword
        jnz     1b              // loop if still taken
        ret                     // lockword is free, so reenter PFZ
-COMMPAGE_DESCRIPTOR(backoff_64,_COMM_PAGE_BACKOFF,0,0)
+COMMPAGE_DESCRIPTOR(backoff_64,_COMM_PAGE_BACKOFF)
 
 
 /* Preemption-free-zone routine to FIFO Enqueue:
@@ -297,7 +297,7 @@ COMMPAGE_FUNCTION_START(pfz_enqueue_64, 64, 4)
        movq        %rsi,8(%rdi)    // new element becomes last in q
        movl        $0,16(%rdi)     // unlock spinlock
        ret
-COMMPAGE_DESCRIPTOR(pfz_enqueue_64,_COMM_PAGE_PFZ_ENQUEUE,0,0)
+COMMPAGE_DESCRIPTOR(pfz_enqueue_64,_COMM_PAGE_PFZ_ENQUEUE)
 
 
 
@@ -344,4 +344,4 @@ COMMPAGE_FUNCTION_START(pfz_dequeue_64, 64, 4)
 4:
        movl        $0,16(%rdi)     // unlock spinlock
        ret
-COMMPAGE_DESCRIPTOR(pfz_dequeue_64,_COMM_PAGE_PFZ_DEQUEUE,0,0)
+COMMPAGE_DESCRIPTOR(pfz_dequeue_64,_COMM_PAGE_PFZ_DEQUEUE)
index a7226180bc88af4947c097f5b77bd46b65406ef4..c62094de1e657deae3eae0381e9ee82bcc16e9c1 100644 (file)
@@ -155,7 +155,7 @@ COMMPAGE_FUNCTION_START(pfz_mutex_lock, 32, 4)
        orl     $0x00180000,%eax            // copy 24 bytes of arguments in trampoline
        xorl    %ebx,%ebx                   // clear preemption flag
        sysenter
-COMMPAGE_DESCRIPTOR(pfz_mutex_lock,_COMM_PAGE_PFZ_MUTEX_LOCK,0,0)
+COMMPAGE_DESCRIPTOR(pfz_mutex_lock,_COMM_PAGE_PFZ_MUTEX_LOCK)
 
 
 
@@ -224,5 +224,5 @@ COMMPAGE_FUNCTION_START(pfz_mutex_lock_64, 64, 4)
        movl    $PTHRW_STATUS_SYSCALL,%eax  // we made syscall
        popq    %rbp
        ret
-COMMPAGE_DESCRIPTOR(pfz_mutex_lock_64,_COMM_PAGE_PFZ_MUTEX_LOCK,0,0)
+COMMPAGE_DESCRIPTOR(pfz_mutex_lock_64,_COMM_PAGE_PFZ_MUTEX_LOCK)
 
index 4cdeed647742736794cfc1ba549d394084be3a86..23b38e3db33940a4b0fe8190dc239dbbf76728e2 100644 (file)
@@ -91,8 +91,6 @@ cpu_sleep(void)
 {
        cpu_data_t      *cdp = current_cpu_datap();
 
-       i386_deactivate_cpu();
-
        PE_cpu_machine_quiesce(cdp->cpu_id);
 
        cpu_thread_halt();
index 3cf464e3414fb2a878c35fe889053867eebfebce..b8d5027af7673f0ad41005303d0de215602b493a 100644 (file)
 #define        kSlow                           0x00004000      /* tsc < nanosecond */
 #define        kUP                             0x00008000      /* set if (kNumCPUs == 1) */
 #define        kNumCPUs                        0x00FF0000      /* number of CPUs (see _NumCPUs() below) */
+#define        kNumCPUsShift                   16
 #define        kHasAVX1_0                      0x01000000
 #define        kHasRDRAND                      0x02000000
 #define        kHasF16C                        0x04000000
 #define        kHasENFSTRG                     0x08000000
-#define        kNumCPUsShift                   16              /* see _NumCPUs() below */
+#define        kHasFMA                         0x10000000
+#define        kHasAVX2_0                      0x20000000
+#define        kHasBMI1                        0x40000000
+#define        kHasBMI2                        0x80000000
+/* Extending into 64-bits from here: */ 
+#define        kHasRTM                 0x0000000100000000ULL
+#define        kHasHLE                 0x0000000200000000ULL
+
 
 #ifndef        __ASSEMBLER__
 #include <sys/cdefs.h>
 
 __BEGIN_DECLS
-extern int  _get_cpu_capabilities( void );
+extern uint64_t  _get_cpu_capabilities( void );
 __END_DECLS
 
 inline static
 int _NumCPUs( void )
 {
-       return (_get_cpu_capabilities() & kNumCPUs) >> kNumCPUsShift;
+       return (int) (_get_cpu_capabilities() & kNumCPUs) >> kNumCPUsShift;
 }
 
 #endif /* __ASSEMBLER__ */
@@ -151,13 +159,16 @@ int _NumCPUs( void )
 
 /* data in the comm page */
  
-#define _COMM_PAGE_SIGNATURE           (_COMM_PAGE_START_ADDRESS+0x000)        /* first few bytes are a signature */
+#define _COMM_PAGE_SIGNATURE           (_COMM_PAGE_START_ADDRESS+0x000)        /* first 16 bytes are a signature */
+#define _COMM_PAGE_SIGNATURELEN                (0x10)
+#define _COMM_PAGE_CPU_CAPABILITIES64  (_COMM_PAGE_START_ADDRESS+0x010)        /* uint64_t _cpu_capabilities */
+#define _COMM_PAGE_UNUSED              (_COMM_PAGE_START_ADDRESS+0x018)        /* 6 unused bytes */
 #define _COMM_PAGE_VERSION             (_COMM_PAGE_START_ADDRESS+0x01E)        /* 16-bit version# */
-#define _COMM_PAGE_THIS_VERSION                12                                      /* version of the commarea format */
+#define _COMM_PAGE_THIS_VERSION                13                                      /* in ver 13, _COMM_PAGE_NT_SHIFT defaults to 0 (was 32) */
   
-#define _COMM_PAGE_CPU_CAPABILITIES    (_COMM_PAGE_START_ADDRESS+0x020)        /* uint32_t _cpu_capabilities */
+#define _COMM_PAGE_CPU_CAPABILITIES    (_COMM_PAGE_START_ADDRESS+0x020)        /* uint32_t _cpu_capabilities (retained for compatibility) */
 #define _COMM_PAGE_NCPUS               (_COMM_PAGE_START_ADDRESS+0x022)        /* uint8_t number of configured CPUs (hw.logicalcpu at boot time) */
-#define _COMM_PAGE_UNUSED0                     (_COMM_PAGE_START_ADDRESS+0x024)        /* 2 unused bytes, reserved for future expansion of cpu_capabilities */
+#define _COMM_PAGE_UNUSED0             (_COMM_PAGE_START_ADDRESS+0x024)        /* 2 unused bytes, previouly reserved for expansion of cpu_capabilities */
 #define _COMM_PAGE_CACHE_LINESIZE      (_COMM_PAGE_START_ADDRESS+0x026)        /* uint16_t cache line size */
 
 #define _COMM_PAGE_SCHED_GEN           (_COMM_PAGE_START_ADDRESS+0x028)        /* uint32_t scheduler generation number (count of pre-emptions) */
index a501be695463c62018672e0884ea3344ea62a3a3..620ba6a29e8c024954f429cce352ba0f9ec26238 100644 (file)
@@ -125,6 +125,10 @@ typedef struct {
 
 typedef        uint16_t        pcid_t;
 typedef        uint8_t         pcid_ref_t;
+
+#define CPU_RTIME_BINS (12)
+#define CPU_ITIME_BINS (CPU_RTIME_BINS)
+
 /*
  * Per-cpu data.
  *
@@ -157,12 +161,9 @@ typedef struct cpu_data
        int                     cpu_prior_signals;      /* Last set of events,
                                                         * debugging
                                                         */
-       int                     cpu_mcount_off;         /* mcount recursion */
        ast_t                   cpu_pending_ast;
-       int                     cpu_type;
-       int                     cpu_subtype;
-       int                     cpu_threadtype;
-       int                     cpu_running;
+       volatile int            cpu_running;
+       boolean_t               cpu_fixed_pmcs_enabled;
        rtclock_timer_t         rtclock_timer;
        boolean_t               cpu_is64bit;
        volatile addr64_t       cpu_active_cr3 __attribute((aligned(64)));
@@ -188,9 +189,6 @@ typedef struct cpu_data
        struct fake_descriptor  *cpu_ldtp;
        cpu_desc_index_t        cpu_desc_index;
        int                     cpu_ldt;
-       boolean_t               cpu_iflag;
-       boolean_t               cpu_boot_complete;
-       int                     cpu_hibernate;
 #if NCOPY_WINDOWS > 0
        vm_offset_t             cpu_copywindow_base;
        uint64_t                *cpu_copywindow_pdp;
@@ -198,18 +196,13 @@ typedef struct cpu_data
        vm_offset_t             cpu_physwindow_base;
        uint64_t                *cpu_physwindow_ptep;
 #endif
-       void                    *cpu_hi_iss;
 
 #define HWINTCNT_SIZE 256
        uint32_t                cpu_hwIntCnt[HWINTCNT_SIZE];    /* Interrupt counts */
+       uint64_t                cpu_hwIntpexits[HWINTCNT_SIZE];
+       uint64_t                cpu_hwIntcexits[HWINTCNT_SIZE];
        uint64_t                cpu_dr7; /* debug control register */
        uint64_t                cpu_int_event_time;     /* intr entry/exit time */
-#if CONFIG_VMX
-       vmx_cpu_t               cpu_vmx;                /* wonderful world of virtualization */
-#endif
-#if CONFIG_MCA
-       struct mca_state        *cpu_mca_state;         /* State at MC fault */
-#endif
        uint64_t                cpu_uber_arg_store;     /* Double mapped address
                                                         * of current thread's
                                                         * uu_arg array.
@@ -246,12 +239,17 @@ typedef struct cpu_data
        uint64_t                cpu_c7res;
        uint64_t                cpu_itime_total;
        uint64_t                cpu_rtime_total;
-       uint64_t                cpu_rtimes[4];
-       uint64_t                cpu_itimes[4];
        uint64_t                cpu_ixtime;
+       uint64_t                cpu_idle_exits;
+       uint64_t                cpu_rtimes[CPU_RTIME_BINS];
+       uint64_t                cpu_itimes[CPU_ITIME_BINS];
+       uint64_t                cpu_cur_insns;
+       uint64_t                cpu_cur_ucc;
+       uint64_t                cpu_cur_urc;
        uint64_t                cpu_max_observed_int_latency;
        int                     cpu_max_observed_int_latency_vector;
        uint64_t                debugger_entry_time;
+       uint64_t                debugger_ipi_time;
        volatile boolean_t      cpu_NMI_acknowledged;
        /* A separate nested interrupt stack flag, to account
         * for non-nested interrupts arriving while on the interrupt stack
@@ -262,6 +260,18 @@ typedef struct cpu_data
        uint32_t                cpu_nested_istack_events;
        x86_saved_state64_t     *cpu_fatal_trap_state;
        x86_saved_state64_t     *cpu_post_fatal_trap_state;
+#if CONFIG_VMX
+       vmx_cpu_t               cpu_vmx;                /* wonderful world of virtualization */
+#endif
+#if CONFIG_MCA
+       struct mca_state        *cpu_mca_state;         /* State at MC fault */
+#endif
+       int                     cpu_type;
+       int                     cpu_subtype;
+       int                     cpu_threadtype;
+       boolean_t               cpu_iflag;
+       boolean_t               cpu_boot_complete;
+       int                     cpu_hibernate;
 } cpu_data_t;
 
 extern cpu_data_t      *cpu_data_ptr[];  
index ff109f927ec23efa3db25e3df467d39a40c97239..715a25420fe33b4ee6a3e52cab9934552b0fced1 100644 (file)
@@ -207,6 +207,7 @@ typedef struct x86_pkg
     void               *pmStats;       /* Power Management stats for package*/
     void               *pmState;       /* Power Management state for package*/
     struct mca_state   *mca_state;     /* MCA state for memory errors */
+    uint64_t           package_idle_exits;
     uint32_t           num_idle;
 } x86_pkg_t;
 
index 46061d43a16d038ccc6fe0c7afbedf2f5abfeae8..3ca38be8b6d071f3d3b0d723224c12098dbafcb2 100644 (file)
@@ -162,6 +162,7 @@ static cpuid_cache_descriptor_t intel_cpuid_leaf2_descriptor_table[] = {
        { 0x70, CACHE,  TRACE,          8,      12*K,   NA  },
        { 0x71, CACHE,  TRACE,          8,      16*K,   NA  },
        { 0x72, CACHE,  TRACE,          8,      32*K,   NA  },
+       { 0x76, TLB,    INST,           NA,     BOTH,   8   },
        { 0x78, CACHE,  L2,             4,      1*M,    64  },
        { 0x79, CACHE,  L2_2LINESECTOR, 8,      128*K,  64  },
        { 0x7A, CACHE,  L2_2LINESECTOR, 8,      256*K,  64  },
@@ -181,8 +182,11 @@ static cpuid_cache_descriptor_t intel_cpuid_leaf2_descriptor_table[] = {
        { 0xB2, TLB,    INST,           4,      SMALL,  64  },  
        { 0xB3, TLB,    DATA,           4,      SMALL,  128 },  
        { 0xB4, TLB,    DATA1,          4,      SMALL,  256 },  
+       { 0xB5, TLB,    DATA1,          8,      SMALL,  64  },  
+       { 0xB6, TLB,    DATA1,          8,      SMALL,  128 },  
        { 0xBA, TLB,    DATA1,          4,      BOTH,   64  },  
-       { 0xCA, STLB,   DATA1,          4,      BOTH,   512 },  
+       { 0xC1, STLB,   DATA1,          8,      SMALL,  1024},  
+       { 0xCA, STLB,   DATA1,          4,      SMALL,  512 },  
        { 0xD0, CACHE,  L3,             4,      512*K,  64  },  
        { 0xD1, CACHE,  L3,             4,      1*M,    64  },  
        { 0xD2, CACHE,  L3,             4,      2*M,    64  },  
@@ -663,13 +667,13 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
                ctp->sensor               = bitfield32(reg[eax], 0, 0);
                ctp->dynamic_acceleration = bitfield32(reg[eax], 1, 1);
                ctp->invariant_APIC_timer = bitfield32(reg[eax], 2, 2);
-               ctp->core_power_limits    = bitfield32(reg[eax], 3, 3);
-               ctp->fine_grain_clock_mod = bitfield32(reg[eax], 4, 4);
-               ctp->package_thermal_intr = bitfield32(reg[eax], 5, 5);
+               ctp->core_power_limits    = bitfield32(reg[eax], 4, 4);
+               ctp->fine_grain_clock_mod = bitfield32(reg[eax], 5, 5);
+               ctp->package_thermal_intr = bitfield32(reg[eax], 6, 6);
                ctp->thresholds           = bitfield32(reg[ebx], 3, 0);
                ctp->ACNT_MCNT            = bitfield32(reg[ecx], 0, 0);
                ctp->hardware_feedback    = bitfield32(reg[ecx], 1, 1);
-               ctp->energy_policy        = bitfield32(reg[ecx], 2, 2);
+               ctp->energy_policy        = bitfield32(reg[ecx], 3, 3);
                info_p->cpuid_thermal_leafp = ctp;
 
                DBG(" Thermal/Power Leaf:\n");
@@ -681,7 +685,7 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
                DBG("  package_thermal_intr : %d\n", ctp->package_thermal_intr);
                DBG("  thresholds           : %d\n", ctp->thresholds);
                DBG("  ACNT_MCNT            : %d\n", ctp->ACNT_MCNT);
-               DBG("  hardware_feedback    : %d\n", ctp->hardware_feedback);
+               DBG("  ACNT2                : %d\n", ctp->hardware_feedback);
                DBG("  energy_policy        : %d\n", ctp->energy_policy);
        }
 
@@ -726,9 +730,9 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
                DBG("  EDX           : 0x%x\n", xsp->extended_state[edx]);
        }
 
-       if (info_p->cpuid_model == CPUID_MODEL_IVYBRIDGE) {
+       if (info_p->cpuid_model >= CPUID_MODEL_IVYBRIDGE) {
                /*
-                * XSAVE Features:
+                * Leaf7 Features:
                 */
                cpuid_fn(0x7, reg);
                info_p->cpuid_leaf7_features = reg[ebx];
@@ -777,6 +781,11 @@ cpuid_set_cpufamily(i386_cpu_info_t *info_p)
                case CPUID_MODEL_IVYBRIDGE:
                        cpufamily = CPUFAMILY_INTEL_IVYBRIDGE;
                        break;
+               case CPUID_MODEL_HASWELL:
+               case CPUID_MODEL_HASWELL_ULT:
+               case CPUID_MODEL_CRYSTALWELL:
+                       cpufamily = CPUFAMILY_INTEL_HASWELL;
+                       break;
                }
                break;
        }
@@ -823,6 +832,7 @@ cpuid_set_info(void)
                info_p->thread_count = bitfield32((uint32_t)msr, 15,  0);
                break;
                }
+       case CPUFAMILY_INTEL_HASWELL:
        case CPUFAMILY_INTEL_IVYBRIDGE:
        case CPUFAMILY_INTEL_SANDYBRIDGE:
        case CPUFAMILY_INTEL_NEHALEM: {
@@ -887,12 +897,13 @@ static struct table {
        {CPUID_FEATURE_TM2,       "TM2"},
        {CPUID_FEATURE_SSSE3,     "SSSE3"},
        {CPUID_FEATURE_CID,       "CID"},
+       {CPUID_FEATURE_FMA,       "FMA"},
        {CPUID_FEATURE_CX16,      "CX16"},
        {CPUID_FEATURE_xTPR,      "TPR"},
        {CPUID_FEATURE_PDCM,      "PDCM"},
        {CPUID_FEATURE_SSE4_1,    "SSE4.1"},
        {CPUID_FEATURE_SSE4_2,    "SSE4.2"},
-       {CPUID_FEATURE_xAPIC,     "xAPIC"},
+       {CPUID_FEATURE_x2APIC,    "x2APIC"},
        {CPUID_FEATURE_MOVBE,     "MOVBE"},
        {CPUID_FEATURE_POPCNT,    "POPCNT"},
        {CPUID_FEATURE_AES,       "AES"},
@@ -920,8 +931,15 @@ extfeature_map[] = {
 },
 leaf7_feature_map[] = {
        {CPUID_LEAF7_FEATURE_RDWRFSGS, "RDWRFSGS"},
+       {CPUID_LEAF7_FEATURE_TSCOFF,   "TSC_THREAD_OFFSET"},
+       {CPUID_LEAF7_FEATURE_BMI1,     "BMI1"},
+       {CPUID_LEAF7_FEATURE_HLE,      "HLE"},
        {CPUID_LEAF7_FEATURE_SMEP,     "SMEP"},
+       {CPUID_LEAF7_FEATURE_AVX2,     "AVX2"},
+       {CPUID_LEAF7_FEATURE_BMI2,     "BMI2"},
        {CPUID_LEAF7_FEATURE_ENFSTRG,  "ENFSTRG"},
+       {CPUID_LEAF7_FEATURE_INVPCID,  "INVPCID"},
+       {CPUID_LEAF7_FEATURE_RTM,      "RTM"},
        {0, 0}
 };
 
index 1bc3e29277f82362e5a4528ed9729fa2cc1a89b3..7597bc6539cc84b54172103206bbb234745e08d7 100644 (file)
@@ -97,6 +97,7 @@
 #define CPUID_FEATURE_SSSE3     _HBit(9)  /* Supplemental SSE3 instructions */
 #define CPUID_FEATURE_CID       _HBit(10) /* L1 Context ID */
 #define CPUID_FEATURE_SEGLIM64  _HBit(11) /* 64-bit segment limit checking */
+#define CPUID_FEATURE_FMA       _HBit(12) /* Fused-Multiply-Add support */
 #define CPUID_FEATURE_CX16      _HBit(13) /* CmpXchg16b instruction */
 #define CPUID_FEATURE_xTPR      _HBit(14) /* Send Task PRiority msgs */
 #define CPUID_FEATURE_PDCM      _HBit(15) /* Perf/Debug Capability MSR */
 #define CPUID_FEATURE_DCA       _HBit(18) /* Direct Cache Access */
 #define CPUID_FEATURE_SSE4_1    _HBit(19) /* Streaming SIMD extensions 4.1 */
 #define CPUID_FEATURE_SSE4_2    _HBit(20) /* Streaming SIMD extensions 4.2 */
-#define CPUID_FEATURE_xAPIC     _HBit(21) /* Extended APIC Mode */
+#define CPUID_FEATURE_x2APIC    _HBit(21) /* Extended APIC Mode */
 #define CPUID_FEATURE_MOVBE     _HBit(22) /* MOVBE instruction */
 #define CPUID_FEATURE_POPCNT    _HBit(23) /* POPCNT instruction */
 #define CPUID_FEATURE_TSCTMR    _HBit(24) /* TSC deadline timer */
  * Bits returned in %ebx to a CPUID request with {%eax,%ecx} of (0x7,0x0}:
  */
 #define CPUID_LEAF7_FEATURE_RDWRFSGS _Bit(0)   /* FS/GS base read/write */
+#define CPUID_LEAF7_FEATURE_TSCOFF   _Bit(1)   /* TSC thread offset */
+#define CPUID_LEAF7_FEATURE_BMI1     _Bit(3)   /* Bit Manipulation Instrs, set 1 */
+#define CPUID_LEAF7_FEATURE_HLE      _Bit(4)   /* Hardware Lock Elision*/
+#define CPUID_LEAF7_FEATURE_AVX2     _Bit(5)   /* AVX2 Instructions */
 #define CPUID_LEAF7_FEATURE_SMEP     _Bit(7)   /* Supervisor Mode Execute Protect */
+#define CPUID_LEAF7_FEATURE_BMI2     _Bit(8)   /* Bit Manipulation Instrs, set 2 */
 #define CPUID_LEAF7_FEATURE_ENFSTRG  _Bit(9)   /* ENhanced Fast STRinG copy */
+#define CPUID_LEAF7_FEATURE_INVPCID  _Bit(10)  /* INVPCID intruction, TDB */
+#define CPUID_LEAF7_FEATURE_RTM      _Bit(11)  /* TBD */
 
 /*
  * The CPUID_EXTFEATURE_XXX values define 64-bit values
 #define CPUID_MODEL_SANDYBRIDGE        0x2A
 #define CPUID_MODEL_JAKETOWN   0x2D
 #define CPUID_MODEL_IVYBRIDGE  0x3A
+#define CPUID_MODEL_HASWELL    0x3C
+#define CPUID_MODEL_HASWELL_SVR        0x3F
+#define CPUID_MODEL_HASWELL_ULT        0x45
+#define CPUID_MODEL_CRYSTALWELL        0x46
 
 
 #define CPUID_VMM_FAMILY_UNKNOWN       0x0
index c834962ef86b39b60b3cabec10bab3eeed815f8a..3e03db1e9cf4173cf24996da04c79fda59deead1 100644 (file)
@@ -150,7 +150,7 @@ void etimer_set_deadline(uint64_t deadline)
 void
 etimer_resync_deadlines(void)
 {
-       uint64_t                deadline;
+       uint64_t                deadline = EndOfAllTime;
        uint64_t                pmdeadline;
        rtclock_timer_t         *mytimer;
        spl_t                   s = splclock();
@@ -158,7 +158,9 @@ etimer_resync_deadlines(void)
        uint32_t                decr;
 
        pp = current_cpu_datap();
-       deadline = EndOfAllTime;
+       if (!pp->cpu_running)
+               /* There's really nothing to do if this procesor is down */
+               return;
 
        /*
         * If we have a clock timer set, pick that.
index 1119a0a7335d4ec9dc2a34a1d06678153288151e..879851b8addadd3633026b716281b708988e044b 100644 (file)
@@ -488,6 +488,10 @@ fpu_set_fxstate(
        if (fp_kind == FP_NO)
            return KERN_FAILURE;
 
+       if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) &&
+           !ml_fpu_avx_enabled())
+           return KERN_FAILURE;
+
        state = (x86_float_state64_t *)tstate;
 
        assert(thr_act != THREAD_NULL);
@@ -607,6 +611,10 @@ fpu_get_fxstate(
        if (fp_kind == FP_NO)
                return KERN_FAILURE;
 
+       if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) &&
+           !ml_fpu_avx_enabled())
+               return KERN_FAILURE;
+
        state = (x86_float_state64_t *)tstate;
 
        assert(thr_act != THREAD_NULL);
index 0c7f1f59562ad026e63b52bf9aee9153727a4102..8595b8bb0369ccb8d287eab36ccc2b292ba5e2bc 100644 (file)
@@ -419,8 +419,6 @@ main(
                offsetof(cpu_data_t *,cpu_number));
         DECLARE("CPU_RUNNING",
                offsetof(cpu_data_t *,cpu_running));
-        DECLARE("CPU_MCOUNT_OFF",
-               offsetof(cpu_data_t *,cpu_mcount_off));
        DECLARE("CPU_PENDING_AST",
                offsetof(cpu_data_t *,cpu_pending_ast));
        DECLARE("CPU_DESC_TABLEP",
index f04a56c4afa57e940763b867014ff75465160c69..ba8704298087fe9d3cd9267e0776a876b8972d73 100644 (file)
@@ -82,6 +82,7 @@ pal_hib_map(uintptr_t virt, uint64_t phys)
     index = (virt >> I386_LPGSHIFT);
     virt += (uintptr_t)(phys & I386_LPGMASK);
     phys  = ((phys & ~((uint64_t)I386_LPGMASK)) | INTEL_PTE_PS  | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+    if (phys == BootPTD[index]) return (virt);
     BootPTD[index] = phys;
     invlpg(virt);
     BootPTD[index + 1] = (phys + I386_LPGBYTES);
index 39102c926d377a5ca424f03bc8f4707f82dab733..910194f59b9e30376e84ea655d1153ccbc47f86d 100644 (file)
@@ -578,7 +578,6 @@ i386_init(void)
 
        tsc_init();
        power_management_init();
-
        processor_bootstrap();
        thread_bootstrap();
 
@@ -608,6 +607,7 @@ do_init_slave(boolean_t fast_restart)
                mca_cpu_init();
 #endif
   
+               LAPIC_INIT();
                lapic_configure();
                LAPIC_DUMP();
                LAPIC_CPU_MAP_DUMP();
@@ -617,12 +617,11 @@ do_init_slave(boolean_t fast_restart)
 #if CONFIG_MTRR
                mtrr_update_cpu();
 #endif
+               /* update CPU microcode */
+               ucode_update_wake();
        } else
            init_param = FAST_SLAVE_INIT;
 
-       /* update CPU microcode */
-       ucode_update_wake();
-
 #if CONFIG_VMX
        /* resume VT operation */
        vmx_resume();
index b365d6070a2cef8e6f910ec1201e12dc9945fae9..0e74dd06ecdfe65de528f68de11b3badd90411a4 100644 (file)
@@ -48,7 +48,6 @@
 /* Base vector for local APIC interrupt sources */
 int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE;
 
-#define                MAX_LAPICIDS    (LAPIC_ID_MAX+1)
 int            lapic_to_cpu[MAX_LAPICIDS];
 int            cpu_to_lapic[MAX_CPUS];
 
index 378f4d7ebf6aad444c5b7999f654c571ba02537d..219332ae4d9d8815f7b7f5344937daf6ab580553 100644 (file)
@@ -174,15 +174,20 @@ typedef enum {
 #define        LAPIC_MSR(reg)          (LAPIC_MSR_BASE + LAPIC_MSR_OFFSET(reg))
 
 typedef struct {
-       void            (*init) (void);
-       uint32_t        (*read) (lapic_register_t);
-       void            (*write)(lapic_register_t, uint32_t);
+       void            (*init)         (void);
+       uint32_t        (*read)         (lapic_register_t);
+       void            (*write)        (lapic_register_t, uint32_t);
+       uint64_t        (*read_icr)     (void);
+       void            (*write_icr)    (uint32_t, uint32_t);
 } lapic_ops_table_t;
 extern  lapic_ops_table_t *lapic_ops;
 
+#define LAPIC_INIT()                   lapic_ops->init();
 #define LAPIC_WRITE(reg,val)           lapic_ops->write(reg, val)
 #define LAPIC_READ(reg)                        lapic_ops->read(reg)
 #define LAPIC_READ_OFFSET(reg,off)     LAPIC_READ((reg)+(off))
+#define LAPIC_READ_ICR()               lapic_ops->read_icr()
+#define LAPIC_WRITE_ICR(dst,cmd)       lapic_ops->write_icr(dst, cmd)
 
 typedef enum {
        periodic,
@@ -225,6 +230,7 @@ typedef uint32_t lapic_timer_count_t;
 #define LAPIC_PM_INTERRUPT             0x7
 
 #define LAPIC_PMC_SWI_VECTOR           (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_PMC_SW_INTERRUPT)
+#define LAPIC_TIMER_VECTOR             (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_TIMER_INTERRUPT)
 
 /* The vector field is ignored for NMI interrupts via the LAPIC
  * or otherwise, so this is not an offset from the interrupt
@@ -314,8 +320,8 @@ extern boolean_t    lapic_is_interrupting(uint8_t vector);
 extern void            lapic_interrupt_counts(uint64_t intrs[256]);
 extern void            lapic_disable_timer(void);
 
+#define        MAX_LAPICIDS    (LAPIC_ID_MAX+1)
 #ifdef MP_DEBUG
-extern void            lapic_cpu_map_dump(void);
 #define LAPIC_CPU_MAP_DUMP()   lapic_cpu_map_dump()
 #define LAPIC_DUMP()           lapic_dump()
 #else
index 3e699197423e1c9107f53a6dc73bccb2e9d2dbc6..347b9e969597f862f2069c69aba1fa14b817ca67 100644 (file)
@@ -116,33 +116,42 @@ legacy_init(void)
        vm_map_offset_t lapic_vbase64;
        /* Establish a map to the local apic */
 
-       lapic_vbase64 = (vm_offset_t)vm_map_min(kernel_map);
-       result = vm_map_find_space(kernel_map,
-                                  &lapic_vbase64,
-                                  round_page(LAPIC_SIZE), 0,
-                                  VM_MAKE_TAG(VM_MEMORY_IOKIT), &entry);
-       /* Convert 64-bit vm_map_offset_t to "pointer sized" vm_offset_t
-        */
-       lapic_vbase = (vm_offset_t) lapic_vbase64;
-       if (result != KERN_SUCCESS) {
-               panic("legacy_init: vm_map_find_entry FAILED (err=%d)", result);
+       if (lapic_vbase == 0) {
+               lapic_vbase64 = (vm_offset_t)vm_map_min(kernel_map);
+               result = vm_map_find_space(kernel_map,
+                                          &lapic_vbase64,
+                                          round_page(LAPIC_SIZE), 0,
+                                          VM_MAKE_TAG(VM_MEMORY_IOKIT), &entry);
+               /* Convert 64-bit vm_map_offset_t to "pointer sized" vm_offset_t
+                */
+               lapic_vbase = (vm_offset_t) lapic_vbase64;
+               if (result != KERN_SUCCESS) {
+                       panic("legacy_init: vm_map_find_entry FAILED (err=%d)", result);
+               }
+               vm_map_unlock(kernel_map);
+
+               /*
+                * Map in the local APIC non-cacheable, as recommended by Intel
+                * in section 8.4.1 of the "System Programming Guide".
+                * In fact, this is redundant because EFI will have assigned an
+                * MTRR physical range containing the local APIC's MMIO space as
+                * UC and this will override the default PAT setting.
+                */
+               pmap_enter(pmap_kernel(),
+                               lapic_vbase,
+                               (ppnum_t) i386_btop(lapic_pbase),
+                               VM_PROT_READ|VM_PROT_WRITE,
+                               VM_PROT_NONE,
+                               VM_WIMG_IO,
+                               TRUE);
        }
-       vm_map_unlock(kernel_map);
 
        /*
-        * Map in the local APIC non-cacheable, as recommended by Intel
-        * in section 8.4.1 of the "System Programming Guide".
-        * In fact, this is redundant because EFI will have assigned an
-        * MTRR physical range containing the local APIC's MMIO space as
-        * UC and this will override the default PAT setting.
+        * Set flat delivery model, logical processor id
+        * This should already be the default set.
         */
-       pmap_enter(pmap_kernel(),
-                       lapic_vbase,
-                       (ppnum_t) i386_btop(lapic_pbase),
-                       VM_PROT_READ|VM_PROT_WRITE,
-                       VM_PROT_NONE,
-                       VM_WIMG_IO,
-                       TRUE);
+       LAPIC_WRITE(DFR, LAPIC_DFR_FLAT);
+       LAPIC_WRITE(LDR, (get_cpu_number()) << LAPIC_LDR_SHIFT);
 }
 
 
@@ -158,15 +167,41 @@ legacy_write(lapic_register_t reg, uint32_t value)
        *LAPIC_MMIO(reg) = value;
 }
 
+static uint64_t
+legacy_read_icr(void)
+{
+       return (((uint64_t)*LAPIC_MMIO(ICRD)) << 32) | ((uint64_t)*LAPIC_MMIO(ICR));
+}
+
+static void
+legacy_write_icr(uint32_t dst, uint32_t cmd)
+{
+       *LAPIC_MMIO(ICRD) = dst << LAPIC_ICRD_DEST_SHIFT;
+       *LAPIC_MMIO(ICR) = cmd;
+}
+
 static lapic_ops_table_t legacy_ops = {
        legacy_init,
        legacy_read,
-       legacy_write
+       legacy_write,
+       legacy_read_icr,
+       legacy_write_icr
 };
 
+static boolean_t is_x2apic = FALSE;
+
 static void
 x2apic_init(void)
 {
+       uint32_t        lo;
+       uint32_t        hi;
+
+       rdmsr(MSR_IA32_APIC_BASE, lo, hi);
+       if ((lo & MSR_IA32_APIC_BASE_EXTENDED) == 0)  {
+               lo |= MSR_IA32_APIC_BASE_EXTENDED;
+               wrmsr(MSR_IA32_APIC_BASE, lo, hi);
+               kprintf("x2APIC mode enabled\n");
+       }
 }
 
 static uint32_t
@@ -185,13 +220,26 @@ x2apic_write(lapic_register_t reg, uint32_t value)
        wrmsr(LAPIC_MSR(reg), value, 0);
 }
 
+static uint64_t
+x2apic_read_icr(void)
+{
+       return rdmsr64(LAPIC_MSR(ICR));;
+}
+
+static void
+x2apic_write_icr(uint32_t dst, uint32_t cmd)
+{
+         wrmsr(LAPIC_MSR(ICR), cmd, dst);
+}
+
 static lapic_ops_table_t x2apic_ops = {
        x2apic_init,
        x2apic_read,
-       x2apic_write
+       x2apic_write,
+       x2apic_read_icr,
+       x2apic_write_icr
 };
 
-
 void
 lapic_init(void)
 {
@@ -199,7 +247,6 @@ lapic_init(void)
        uint32_t        hi;
        boolean_t       is_boot_processor;
        boolean_t       is_lapic_enabled;
-       boolean_t       is_x2apic;
 
        /* Examine the local APIC state */
        rdmsr(MSR_IA32_APIC_BASE, lo, hi);
@@ -214,10 +261,21 @@ lapic_init(void)
        if (!is_boot_processor || !is_lapic_enabled)
                panic("Unexpected local APIC state\n");
 
+       /*
+        * If x2APIC is available and not already enabled, enable it.
+        * Unless overriden by boot-arg.
+        */
+       if (!is_x2apic && (cpuid_features() & CPUID_FEATURE_x2APIC)) {
+               PE_parse_boot_argn("-x2apic", &is_x2apic, sizeof(is_x2apic));
+               kprintf("x2APIC supported %s be enabled\n",
+                       is_x2apic ? "and will" : "but will not");
+       }
+
        lapic_ops = is_x2apic ? &x2apic_ops : &legacy_ops;
 
-       lapic_ops->init();
+       LAPIC_INIT();
 
+       kprintf("ID: 0x%x LDR: 0x%x\n", LAPIC_READ(ID), LAPIC_READ(LDR));
        if ((LAPIC_READ(VERSION)&LAPIC_VERSION_MASK) < 0x14) {
                panic("Local APIC version 0x%x, 0x14 or more expected\n",
                        (LAPIC_READ(VERSION)&LAPIC_VERSION_MASK));
@@ -290,7 +348,7 @@ lapic_dump(void)
                LAPIC_READ(APR)&LAPIC_APR_MASK,
                LAPIC_READ(PPR)&LAPIC_PPR_MASK);
        kprintf("Destination Format 0x%x Logical Destination 0x%x\n",
-               LAPIC_READ(DFR)>>LAPIC_DFR_SHIFT,
+               is_x2apic ? 0 : LAPIC_READ(DFR)>>LAPIC_DFR_SHIFT,
                LAPIC_READ(LDR)>>LAPIC_LDR_SHIFT);
        kprintf("%cEnabled %cFocusChecking SV 0x%x\n",
                BOOL(LAPIC_READ(SVR)&LAPIC_SVR_ENABLE),
@@ -449,10 +507,6 @@ lapic_configure(void)
                }
        }
 
-       /* Set flat delivery model, logical processor id */
-       LAPIC_WRITE(DFR, LAPIC_DFR_FLAT);
-       LAPIC_WRITE(LDR, (get_cpu_number()) << LAPIC_LDR_SHIFT);
-
        /* Accept all */
        LAPIC_WRITE(TPR, 0);
 
@@ -801,12 +855,11 @@ lapic_send_ipi(int cpu, int vector)
        state = ml_set_interrupts_enabled(FALSE);
 
        /* Wait for pending outgoing send to complete */
-       while (LAPIC_READ(ICR) & LAPIC_ICR_DS_PENDING) {
+       while (LAPIC_READ_ICR() & LAPIC_ICR_DS_PENDING) {
                cpu_pause();
        }
 
-       LAPIC_WRITE(ICRD, cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT);
-       LAPIC_WRITE(ICR, vector | LAPIC_ICR_DM_FIXED);
+       LAPIC_WRITE_ICR(cpu_to_lapic[cpu], vector | LAPIC_ICR_DM_FIXED);
 
        (void) ml_set_interrupts_enabled(state);
 }
index 64b0c9b8d41b8dd295c4102cd866ef4a4aeab9a8..c70168b3c7529a38dd64f64f14e7a99bebef64d9 100644 (file)
@@ -89,12 +89,19 @@ static void
 mca_get_availability(void)
 {
        uint64_t        features = cpuid_info()->cpuid_features;
-       uint32_t        family =  cpuid_info()->cpuid_family;
+       uint32_t        family =   cpuid_info()->cpuid_family;
+       uint32_t        model =    cpuid_info()->cpuid_model;
+       uint32_t        stepping = cpuid_info()->cpuid_stepping;
 
        mca_MCE_present = (features & CPUID_FEATURE_MCE) != 0;
        mca_MCA_present = (features & CPUID_FEATURE_MCA) != 0;
        mca_family = family;
-       
+
+       if ((model == CPUID_MODEL_HASWELL     && stepping < 3) ||
+           (model == CPUID_MODEL_HASWELL_ULT && stepping < 1) ||
+           (model == CPUID_MODEL_CRYSTALWELL && stepping < 1))
+               panic("Haswell pre-C0 steppings are not supported");
+
        /*
         * If MCA, the number of banks etc is reported by the IA32_MCG_CAP MSR.
         */
index e008a9a612b502df51ecd7a52e958dfac6bb644a..7218060478352682e0ada3b05174f9d26c0c63e0 100644 (file)
@@ -567,9 +567,9 @@ ml_init_lock_timeout(void)
  * instead of spinning for clock_delay_until().
  */
 void
-ml_init_delay_spin_threshold(void)
+ml_init_delay_spin_threshold(int threshold_us)
 {
-       nanoseconds_to_absolutetime(10ULL * NSEC_PER_USEC, &delay_spin_threshold);
+       nanoseconds_to_absolutetime(threshold_us * NSEC_PER_USEC, &delay_spin_threshold);
 }
 
 boolean_t
@@ -579,7 +579,7 @@ ml_delay_should_spin(uint64_t interval)
 }
 
 /*
- * This is called from the machine-independent routine cpu_up()
+ * This is called from the machine-independent layer
  * to perform machine-dependent info updates. Defer to cpu_thread_init().
  */
 void
@@ -589,12 +589,14 @@ ml_cpu_up(void)
 }
 
 /*
- * This is called from the machine-independent routine cpu_down()
+ * This is called from the machine-independent layer
  * to perform machine-dependent info updates.
  */
 void
 ml_cpu_down(void)
 {
+       i386_deactivate_cpu();
+
        return;
 }
 
index d800625f75cc41946e4f9b1d00c5d7a7e3b07fee..270ddcfb56cf4b3908c5a8f3c17d9378c8d7e7e9 100644 (file)
@@ -80,7 +80,7 @@ void ml_install_interrupt_handler(
 
 void ml_get_timebase(unsigned long long *timestamp);
 void ml_init_lock_timeout(void); 
-void ml_init_delay_spin_threshold(void);
+void ml_init_delay_spin_threshold(int);
 
 boolean_t ml_delay_should_spin(uint64_t interval);
 
index 0007b3321f59324eba4b284f1613722d8eabee81..4a2ed207d4f067992b5b5838e5ab5408b01ce107 100644 (file)
@@ -173,4 +173,5 @@ extern void act_machine_switch_pcb(thread_t old, thread_t new);
 #define FAST_SLAVE_INIT        ((void *)(uintptr_t)1)
 
 uint64_t ml_early_random(void);
+void cpu_pmc_control(void *);
 #endif /* _I386_MISC_PROTOS_H_ */
index b66399d2d7e2d66bd76703d4f505792bb9349874..286b822aa2bc4e172e870e5876e05cc206fa06b4 100644 (file)
@@ -93,6 +93,9 @@
 #define        TRACE_MP_CPUS_CALL_LOCAL        MACHDBG_CODE(DBG_MACH_MP, 2)
 #define        TRACE_MP_CPUS_CALL_ACTION       MACHDBG_CODE(DBG_MACH_MP, 3)
 #define        TRACE_MP_CPUS_CALL_NOBUF        MACHDBG_CODE(DBG_MACH_MP, 4)
+#define        TRACE_MP_CPU_FAST_START         MACHDBG_CODE(DBG_MACH_MP, 5)
+#define        TRACE_MP_CPU_START              MACHDBG_CODE(DBG_MACH_MP, 6)
+#define        TRACE_MP_CPU_DEACTIVATE         MACHDBG_CODE(DBG_MACH_MP, 7)
 
 #define ABS(v)         (((v) > 0)?(v):-(v))
 
@@ -287,6 +290,10 @@ intel_startCPU_fast(int slot_num)
                 */
                return(rc);
 
+       KERNEL_DEBUG_CONSTANT(
+               TRACE_MP_CPU_FAST_START | DBG_FUNC_START,
+               slot_num, 0, 0, 0, 0);
+
        /*
         * Wait until the CPU is back online.
         */
@@ -301,6 +308,10 @@ intel_startCPU_fast(int slot_num)
        mp_wait_for_cpu_up(slot_num, 30000, 1);
        mp_enable_preemption();
 
+       KERNEL_DEBUG_CONSTANT(
+               TRACE_MP_CPU_FAST_START | DBG_FUNC_END,
+               slot_num, cpu_datap(slot_num)->cpu_running, 0, 0, 0);
+
        /*
         * Check to make sure that the CPU is really running.  If not,
         * go through the slow path.
@@ -341,13 +352,30 @@ start_cpu(void *arg)
        if (cpu_number() != psip->starter_cpu)
                return;
 
+       DBG("start_cpu(%p) about to start cpu %d, lapic %d\n",
+               arg, psip->target_cpu, psip->target_lapic);
+
+       KERNEL_DEBUG_CONSTANT(
+               TRACE_MP_CPU_START | DBG_FUNC_START,
+               psip->target_cpu,
+               psip->target_lapic, 0, 0, 0);
+
        i386_start_cpu(psip->target_lapic, psip->target_cpu);
 
 #ifdef POSTCODE_DELAY
        /* Wait much longer if postcodes are displayed for a delay period. */
        i *= 10000;
 #endif
+       DBG("start_cpu(%p) about to wait for cpu %d\n",
+               arg, psip->target_cpu);
+
        mp_wait_for_cpu_up(psip->target_cpu, i*100, 100);
+
+       KERNEL_DEBUG_CONSTANT(
+               TRACE_MP_CPU_START | DBG_FUNC_END,
+               psip->target_cpu,
+               cpu_datap(psip->target_cpu)->cpu_running, 0, 0, 0);
+
        if (TSC_sync_margin &&
            cpu_datap(psip->target_cpu)->cpu_running) {
                /*
@@ -1293,26 +1321,43 @@ i386_deactivate_cpu(void)
        cpu_data_t      *cdp = current_cpu_datap();
 
        assert(!ml_get_interrupts_enabled());
+       KERNEL_DEBUG_CONSTANT(
+               TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_START,
+               0, 0, 0, 0, 0);
 
        simple_lock(&x86_topo_lock);
        cdp->cpu_running = FALSE;
        simple_unlock(&x86_topo_lock);
 
+       /*
+        * Move all of this cpu's timers to the master/boot cpu,
+        * and poke it in case there's a sooner deadline for it to schedule.
+        */
        timer_queue_shutdown(&cdp->rtclock_timer.queue);
-       cdp->rtclock_timer.deadline = EndOfAllTime;
        mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, etimer_timer_expire, NULL);
 
        /*
-        * In case a rendezvous/braodcast/call was initiated to this cpu
-        * before we cleared cpu_running, we must perform any actions due.
+        * Open an interrupt window
+        * and ensure any pending IPI or timer is serviced
         */
-       if (i_bit(MP_RENDEZVOUS, &cdp->cpu_signals))
-               mp_rendezvous_action();
-       if (i_bit(MP_BROADCAST, &cdp->cpu_signals))
-               mp_broadcast_action();
-       if (i_bit(MP_CALL, &cdp->cpu_signals))
-               mp_cpus_call_action();
-       cdp->cpu_signals = 0;                   /* all clear */
+       mp_disable_preemption();
+       ml_set_interrupts_enabled(TRUE);
+
+       while (cdp->cpu_signals && x86_lcpu()->rtcDeadline != EndOfAllTime)
+               cpu_pause();
+       /*
+        * Ensure there's no remaining timer deadline set
+        * - AICPM may have left one active.
+        */
+       setPop(0);
+
+       ml_set_interrupts_enabled(FALSE);
+       mp_enable_preemption();
+
+       KERNEL_DEBUG_CONSTANT(
+               TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_END,
+               0, 0, 0, 0, 0);
 }
 
 int    pmsafe_debug    = 1;
@@ -1424,7 +1469,7 @@ mp_kdp_enter(void)
                        cpu_NMI_interrupt(cpu);
                }
 
-       DBG("mp_kdp_enter() %u processors done %s\n",
+       DBG("mp_kdp_enter() %d processors done %s\n",
            (int)mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out");
        
        postcode(MP_KDP_ENTER);
@@ -1479,7 +1524,7 @@ mp_kdp_wait(boolean_t flush, boolean_t isNMI)
        DBG("mp_kdp_wait()\n");
        /* If an I/O port has been specified as a debugging aid, issue a read */
        panic_io_port_read();
-
+       current_cpu_datap()->debugger_ipi_time = mach_absolute_time();
 #if CONFIG_MCA
        /* If we've trapped due to a machine-check, save MCA registers */
        mca_check_save();
@@ -1581,6 +1626,8 @@ slave_machine_init(void *param)
                clock_init();
                cpu_machine_init();     /* Interrupts enabled hereafter */
                mp_cpus_call_cpu_init();
+       } else {
+               cpu_machine_init();     /* Interrupts enabled hereafter */
        }
 }
 
index ea379978071846753499f7ffbe1160ea711bca07..c013a149bc1aff3b193d5c49f5832b1397b72236 100644 (file)
@@ -68,12 +68,10 @@ i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler, int ipi_vector, i386
 void
 i386_start_cpu(int lapic_id, __unused int cpu_num )
 {
-       LAPIC_WRITE(ICRD, lapic_id << LAPIC_ICRD_DEST_SHIFT);
-       LAPIC_WRITE(ICR, LAPIC_ICR_DM_INIT);
+       LAPIC_WRITE_ICR(lapic_id, LAPIC_ICR_DM_INIT);
        delay(100);
-
-       LAPIC_WRITE(ICRD, lapic_id << LAPIC_ICRD_DEST_SHIFT);
-       LAPIC_WRITE(ICR, LAPIC_ICR_DM_STARTUP|(REAL_MODE_BOOTSTRAP_OFFSET>>12));
+       LAPIC_WRITE_ICR(lapic_id,
+                       LAPIC_ICR_DM_STARTUP|(REAL_MODE_BOOTSTRAP_OFFSET>>12));
 }
 
 void
@@ -81,11 +79,11 @@ i386_send_NMI(int cpu)
 {
        boolean_t state = ml_set_interrupts_enabled(FALSE);
        /* Program the interrupt command register */
-       LAPIC_WRITE(ICRD, cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT);
        /* The vector is ignored in this case--the target CPU will enter on the
         * NMI vector.
         */
-       LAPIC_WRITE(ICR, LAPIC_VECTOR(INTERPROCESSOR)|LAPIC_ICR_DM_NMI);
+       LAPIC_WRITE_ICR(cpu_to_lapic[cpu],
+                       LAPIC_VECTOR(INTERPROCESSOR)|LAPIC_ICR_DM_NMI);
        (void) ml_set_interrupts_enabled(state);
 }
 
index 13cbf69fbc6da5deee63062e1d176d039ae1af35..1979983a8cb7b53216ca11553a83ffbf0f9bb81a 100644 (file)
@@ -72,10 +72,8 @@ struct pal_rtc_nanotime {
        volatile uint64_t       tsc_base;       /* timestamp */
        volatile uint64_t       ns_base;        /* nanoseconds */
        uint32_t                scale;          /* tsc -> nanosec multiplier */
-       uint32_t                shift;          /* tsc -> nanosec shift/div */
-                                               /* shift is overloaded with
-                                                * lower 32bits of tsc_freq
-                                                * on slower machines (SLOW_TSC_THRESHOLD) */
+       uint32_t                shift;          /* shift is nonzero only on "slow" machines, */
+                                               /* ie where tscFreq <= SLOW_TSC_THRESHOLD */
        volatile uint32_t       generation;     /* 0 == being updated */
        uint32_t                spare1;
 };
index 7e0112fbb99f49d82627ff112fe0d313d1e64111..4945a66c4a1b54d51fc9852a494637de3754fe6e 100644 (file)
@@ -1,4 +1,3 @@
-
 /*
  * Copyright (c) 2009 Apple Inc. All rights reserved.
  *
@@ -155,6 +154,7 @@ void pal_efi_hibernate_prepare(void);
 /* Include a PAL-specific header, too, for xnu-internal overrides */
 #include <i386/pal_native.h>
 
+
 extern boolean_t virtualized;
 #define PAL_VIRTUALIZED_PROPERTY_VALUE 4
 
index a70348b338a455d04a316972325957b119c3ea3f..e5274a5eb4014374b14c9ff3f79dd4d9036ba5cd 100644 (file)
@@ -991,6 +991,31 @@ machine_thread_set_state(
                return fpu_set_fxstate(thr_act, tstate, flavor);
        }
 
+       case x86_AVX_STATE:
+       {   
+               x86_avx_state_t       *state;
+
+               if (count != x86_AVX_STATE_COUNT)
+                       return(KERN_INVALID_ARGUMENT);
+
+               state = (x86_avx_state_t *)tstate;
+               if (state->ash.flavor == x86_AVX_STATE64 &&
+                   state->ash.count  == x86_FLOAT_STATE64_COUNT &&
+                   thread_is_64bit(thr_act)) {
+                       return fpu_set_fxstate(thr_act,
+                                              (thread_state_t)&state->ufs.as64,
+                                              x86_FLOAT_STATE64);
+               }
+               if (state->ash.flavor == x86_FLOAT_STATE32 &&
+                   state->ash.count  == x86_FLOAT_STATE32_COUNT &&
+                   !thread_is_64bit(thr_act)) {
+                       return fpu_set_fxstate(thr_act,
+                                              (thread_state_t)&state->ufs.as32,
+                                              x86_FLOAT_STATE32); 
+               }
+               return(KERN_INVALID_ARGUMENT);
+       }
+
        case x86_THREAD_STATE32: 
        {
                if (count != x86_THREAD_STATE32_COUNT)
@@ -1137,6 +1162,21 @@ machine_thread_get_state(
                break;
            }
 
+           case THREAD_STATE_FLAVOR_LIST_10_9:
+           {
+               if (*count < 5)
+                       return (KERN_INVALID_ARGUMENT);
+
+               tstate[0] = x86_THREAD_STATE;
+               tstate[1] = x86_FLOAT_STATE;
+               tstate[2] = x86_EXCEPTION_STATE;
+               tstate[3] = x86_DEBUG_STATE;
+               tstate[4] = x86_AVX_STATE;
+
+               *count = 5;
+               break;
+           }
+
            case x86_SAVED_STATE32:
            {
                x86_saved_state32_t     *state;
@@ -1245,8 +1285,8 @@ machine_thread_get_state(
                return(kret);
            }
 
-       case x86_AVX_STATE32:
-       {
+           case x86_AVX_STATE32:
+           {
                if (*count != x86_AVX_STATE32_COUNT)
                        return(KERN_INVALID_ARGUMENT);
 
@@ -1256,10 +1296,10 @@ machine_thread_get_state(
                *count = x86_AVX_STATE32_COUNT;
 
                return fpu_get_fxstate(thr_act, tstate, flavor);
-       }
+           }
 
-       case x86_AVX_STATE64:
-       {
+           case x86_AVX_STATE64:
+           {
                if (*count != x86_AVX_STATE64_COUNT)
                        return(KERN_INVALID_ARGUMENT);
 
@@ -1269,7 +1309,36 @@ machine_thread_get_state(
                *count = x86_AVX_STATE64_COUNT;
 
                return fpu_get_fxstate(thr_act, tstate, flavor);
-       }
+           }
+
+           case x86_AVX_STATE:
+           {
+               x86_avx_state_t         *state;
+               kern_return_t           kret;
+
+               if (*count < x86_AVX_STATE_COUNT)
+                       return(KERN_INVALID_ARGUMENT);
+
+               state = (x86_avx_state_t *)tstate;
+
+               bzero((char *)state, sizeof(x86_avx_state_t));
+               if (thread_is_64bit(thr_act)) {
+                       state->ash.flavor = x86_AVX_STATE64;
+                       state->ash.count  = x86_AVX_STATE64_COUNT;
+                       kret = fpu_get_fxstate(thr_act,
+                                              (thread_state_t)&state->ufs.as64,
+                                              x86_AVX_STATE64);
+               } else {
+                       state->ash.flavor = x86_AVX_STATE32;
+                       state->ash.count  = x86_AVX_STATE32_COUNT;
+                       kret = fpu_get_fxstate(thr_act,
+                                              (thread_state_t)&state->ufs.as32,
+                                              x86_AVX_STATE32);
+               }
+               *count = x86_AVX_STATE_COUNT;
+
+               return(kret);
+           }
 
            case x86_THREAD_STATE32: 
            {
index ec5ae7f784206d05d1ecfce567334d1fe0f1ddfc..d9c7ff9fbdd4ecee02b95f91859e3504dd6ec073 100644 (file)
 #include <kern/sched_prim.h>
 #include <i386/lapic.h>
 #include <i386/pal_routines.h>
-
 #include <sys/kdebug.h>
 
 extern int disableConsoleOutput;
 
 #define DELAY_UNSET            0xFFFFFFFFFFFFFFFFULL
 
+uint64_t cpu_itime_bins[CPU_ITIME_BINS] = {16* NSEC_PER_USEC, 32* NSEC_PER_USEC, 64* NSEC_PER_USEC, 128* NSEC_PER_USEC, 256* NSEC_PER_USEC, 512* NSEC_PER_USEC, 1024* NSEC_PER_USEC, 2048* NSEC_PER_USEC, 4096* NSEC_PER_USEC, 8192* NSEC_PER_USEC, 16384* NSEC_PER_USEC, 32768* NSEC_PER_USEC};
+uint64_t *cpu_rtime_bins = &cpu_itime_bins[0];
+
 /*
  * The following is set when the KEXT loads and initializes.
  */
 pmDispatch_t   *pmDispatch     = NULL;
 
-static uint32_t                pmInitDone              = 0;
+uint32_t               pmInitDone              = 0;
 static boolean_t       earlyTopology           = FALSE;
 static uint64_t                earlyMaxBusDelay        = DELAY_UNSET;
 static uint64_t                earlyMaxIntDelay        = DELAY_UNSET;
@@ -76,13 +78,15 @@ power_management_init(void)
        (*pmDispatch->cstateInit)();
 }
 
-#define CPU_ACTIVE_STAT_BIN_1 (500000)
-#define CPU_ACTIVE_STAT_BIN_2 (2000000)
-#define CPU_ACTIVE_STAT_BIN_3 (5000000)
-
-#define CPU_IDLE_STAT_BIN_1 (500000)
-#define CPU_IDLE_STAT_BIN_2 (2000000)
-#define CPU_IDLE_STAT_BIN_3 (5000000)
+static inline void machine_classify_interval(uint64_t interval, uint64_t *bins, uint64_t *binvals, uint32_t nbins) {
+       uint32_t i;
+       for (i = 0; i < nbins; i++) {
+               if (interval < binvals[i]) {
+                       bins[i]++;
+                       break;
+               }
+       }
+}
 
 /*
  * Called when the CPU is idle.  It calls into the power management kext
@@ -91,92 +95,77 @@ power_management_init(void)
 void
 machine_idle(void)
 {
-    cpu_data_t         *my_cpu         = current_cpu_datap();
-    uint64_t           ctime, rtime, itime;
+       cpu_data_t              *my_cpu         = current_cpu_datap();
+       uint64_t                ctime, rtime, itime;
 
-    if (my_cpu == NULL)
-       goto out;
+       if (my_cpu == NULL)
+               goto out;
 
        ctime = mach_absolute_time();
 
-    my_cpu->lcpu.state = LCPU_IDLE;
-    DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
-    MARK_CPU_IDLE(cpu_number());
+       my_cpu->lcpu.state = LCPU_IDLE;
+       DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
+       MARK_CPU_IDLE(cpu_number());
 
        rtime = ctime - my_cpu->cpu_ixtime;
 
        my_cpu->cpu_rtime_total += rtime;
+       machine_classify_interval(rtime, &my_cpu->cpu_rtimes[0], &cpu_rtime_bins[0], CPU_RTIME_BINS);
+
+       if (pmInitDone) {
+               /*
+                * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay()
+                * were called prior to the CPU PM kext being registered.  We do
+                * this here since we know at this point the values will be first
+                * used since idle is where the decisions using these values is made.
+                */
+               if (earlyMaxBusDelay != DELAY_UNSET)
+                       ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF));
+
+               if (earlyMaxIntDelay != DELAY_UNSET)
+                       ml_set_maxintdelay(earlyMaxIntDelay);
+       }
 
-       if (rtime < CPU_ACTIVE_STAT_BIN_1)
-               my_cpu->cpu_rtimes[0]++;
-       else if (rtime < CPU_ACTIVE_STAT_BIN_2)
-               my_cpu->cpu_rtimes[1]++;
-       else if (rtime < CPU_ACTIVE_STAT_BIN_3)
-               my_cpu->cpu_rtimes[2]++;
-       else
-               my_cpu->cpu_rtimes[3]++;
-
+       if (pmInitDone
+           && pmDispatch != NULL
+           && pmDispatch->MachineIdle != NULL)
+               (*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL);
+       else {
+               /*
+                * If no power management, re-enable interrupts and halt.
+                * This will keep the CPU from spinning through the scheduler
+                * and will allow at least some minimal power savings (but it
+                * cause problems in some MP configurations w.r.t. the APIC
+                * stopping during a GV3 transition).
+                */
+               pal_hlt();
+
+               /* Once woken, re-disable interrupts. */
+               pal_cli();
+       }
 
-    if (pmInitDone) {
        /*
-        * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay()
-        * were called prior to the CPU PM kext being registered.  We do
-        * this here since we know at this point the values will be first
-        * used since idle is where the decisions using these values is made.
+        * Mark the CPU as running again.
         */
-       if (earlyMaxBusDelay != DELAY_UNSET)
-           ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF));
-
-       if (earlyMaxIntDelay != DELAY_UNSET)
-           ml_set_maxintdelay(earlyMaxIntDelay);
-    }
-
-    if (pmInitDone
-       && pmDispatch != NULL
-       && pmDispatch->MachineIdle != NULL)
-       (*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL);
-    else {
-       /*
-        * If no power management, re-enable interrupts and halt.
-        * This will keep the CPU from spinning through the scheduler
-        * and will allow at least some minimal power savings (but it
-        * cause problems in some MP configurations w.r.t. the APIC
-        * stopping during a GV3 transition).
-        */
-       pal_hlt();
-
-       /* Once woken, re-disable interrupts. */
-       pal_cli();
-    }
-
-    /*
-     * Mark the CPU as running again.
-     */
-    MARK_CPU_ACTIVE(cpu_number());
-    DBGLOG(cpu_handle, cpu_number(), MP_UNIDLE);
+       MARK_CPU_ACTIVE(cpu_number());
+       DBGLOG(cpu_handle, cpu_number(), MP_UNIDLE);
 
        uint64_t ixtime = my_cpu->cpu_ixtime = mach_absolute_time();
-       itime = ixtime - ctime;
+       my_cpu->cpu_idle_exits++;
 
-    my_cpu->lcpu.state = LCPU_RUN;
+       itime = ixtime - ctime;
 
-       if (itime < CPU_IDLE_STAT_BIN_1)
-               my_cpu->cpu_itimes[0]++;
-       else if (itime < CPU_IDLE_STAT_BIN_2)
-               my_cpu->cpu_itimes[1]++;
-       else if (itime < CPU_IDLE_STAT_BIN_3)
-               my_cpu->cpu_itimes[2]++;
-       else
-               my_cpu->cpu_itimes[3]++;
+       my_cpu->lcpu.state = LCPU_RUN;
 
+       machine_classify_interval(itime, &my_cpu->cpu_itimes[0], &cpu_itime_bins[0], CPU_ITIME_BINS);
        my_cpu->cpu_itime_total += itime;
 
 
-    /*
-     * Re-enable interrupts.
-     */
-  out:
-    pal_sti();
+       /*
+        * Re-enable interrupts.
+        */
+out:
+       pal_sti();
 }
 
 /*
@@ -200,6 +189,7 @@ pmCPUHalt(uint32_t reason)
        break;
 
     case PM_HALT_NORMAL:
+    case PM_HALT_SLEEP:
     default:
         pal_cli();
 
@@ -212,11 +202,14 @@ pmCPUHalt(uint32_t reason)
            (*pmDispatch->pmCPUHalt)();
 
            /*
-            * We've exited halt, so get the the CPU schedulable again.
+            * We've exited halt, so get the CPU schedulable again.
+            * - by calling the fast init routine for a slave, or
+            * - by returning if we're the master processor.
             */
-           i386_init_slave_fast();
-
-           panic("init_slave_fast returned");
+           if (cpup->cpu_number != master_cpu) {
+               i386_init_slave_fast();
+               panic("init_slave_fast returned");
+           }
        } else
        {
            /*
@@ -257,13 +250,13 @@ pmInitComplete(void)
     pmInitDone = 1;
 }
 
-static x86_lcpu_t *
+x86_lcpu_t *
 pmGetLogicalCPU(int cpu)
 {
     return(cpu_to_lcpu(cpu));
 }
 
-static x86_lcpu_t *
+x86_lcpu_t *
 pmGetMyLogicalCPU(void)
 {
     cpu_data_t *cpup   = current_cpu_datap();
@@ -758,7 +751,7 @@ pmGetSavedRunCount(void)
 /*
  * Returns the root of the package tree.
  */
-static x86_pkg_t *
+x86_pkg_t *
 pmGetPkgRoot(void)
 {
     return(x86_pkgs);
@@ -770,7 +763,7 @@ pmCPUGetHibernate(int cpu)
     return(cpu_datap(cpu)->cpu_hibernate);
 }
 
-static processor_t
+processor_t
 pmLCPUtoProcessor(int lcpu)
 {
     return(cpu_datap(lcpu)->cpu_processor);
@@ -814,7 +807,7 @@ pmGetNanotimeInfo(pm_rtc_nanotime_t *rtc_nanotime)
                && rtc_nanotime->generation != pal_rtc_nanotime_info.generation);
 }
 
-static uint32_t
+uint32_t
 pmTimerQueueMigrate(int target_cpu)
 {
     /* Call the etimer code to do this. */
@@ -867,6 +860,10 @@ pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
     }
 
     if (cpuFuncs != NULL) {
+        if (pmDispatch) {
+            panic("Attempt to re-register power management interface--AICPM present in xcpm mode? %p->%p", pmDispatch, cpuFuncs);
+        }
+
        pmDispatch = cpuFuncs;
 
        if (earlyTopology
@@ -938,7 +935,10 @@ void machine_track_platform_idle(boolean_t entry) {
        if (entry) {
                (void)__sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);
        }
-       else {
-               (void)__sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
-       }
+       else {
+               uint32_t nidle = __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
+               if (nidle == topoParms.nLThreadsPerPackage) {
+                       my_cpu->lcpu.package->package_idle_exits++;
+               }
+       }
 }
index c443c1efa9d160a7a8d234fea56799495e246a4d..1ed973e4ceb2fae9a33dd1a88e058a6a463ee0cd 100644 (file)
@@ -141,6 +141,7 @@ void pmTimerSave(void);
 void pmTimerRestore(void);
 kern_return_t pmCPUExitHalt(int cpu);
 kern_return_t pmCPUExitHaltToOff(int cpu);
+uint32_t pmTimerQueueMigrate(int);
 
 #define PM_HALT_NORMAL         0               /* normal halt path */
 #define PM_HALT_DEBUG          1               /* debug code wants to halt */
@@ -159,6 +160,16 @@ extern int pmsafe_debug;
 #define                URGENCY_NOTIFICATION_ASSERT_NS (5 * 1000 * 1000)
 extern uint64_t        urgency_notification_assert_abstime_threshold;
 
+x86_lcpu_t *
+pmGetLogicalCPU(int cpu);
+x86_lcpu_t *
+pmGetMyLogicalCPU(void);
+processor_t
+pmLCPUtoProcessor(int lcpu);
+x86_pkg_t *
+pmGetPkgRoot(void);
+
+
 /******************************************************************************
  *
  * All of the following are deprecated interfaces and no longer used.
index 6438d9372edc71931b152e829f80467de1b8b79f..019d0aebed79bcf12dda622f6aed565193052815 100644 (file)
@@ -397,6 +397,8 @@ static inline void flush_tlb_raw(void)
        set_cr3_raw(get_cr3_raw());
 }
 #endif
+extern int rdmsr64_carefully(uint32_t msr, uint64_t *val);
+extern int wrmsr64_carefully(uint32_t msr, uint64_t val);
 #endif /* MACH_KERNEL_PRIVATE */
 
 static inline void wbinvd(void)
@@ -501,7 +503,6 @@ static inline uint64_t rdtscp64(uint32_t *aux)
  * The implementation is in locore.s.
  */
 extern int rdmsr_carefully(uint32_t msr, uint32_t *lo, uint32_t *hi);
-
 __END_DECLS
 
 #endif /* ASSEMBLER */
@@ -538,8 +539,6 @@ __END_DECLS
 #define MSR_IA32_MPERF                         0xE7
 #define MSR_IA32_APERF                         0xE8
 
-#define MSR_PMG_CST_CONFIG_CONTROL             0xe2
-
 #define MSR_IA32_BBL_CR_CTL                    0x119
 
 #define MSR_IA32_SYSENTER_CS                   0x174
@@ -560,7 +559,6 @@ __END_DECLS
 
 #define MSR_IA32_MISC_ENABLE                   0x1a0
 
-#define MSR_IA32_ENERGY_PERFORMANCE_BIAS       0x1b0
 #define MSR_IA32_PACKAGE_THERM_STATUS          0x1b1
 #define MSR_IA32_PACKAGE_THERM_INTERRUPT       0x1b2
 
@@ -618,8 +616,20 @@ __END_DECLS
 #define MSR_IA32_PKG_POWER_SKU_UNIT            0x606
 #define MSR_IA32_PKG_C2_RESIDENCY              0x60D
 #define MSR_IA32_PKG_ENERGY_STATUS             0x611
-#define MSR_IA32_PRIMARY_PLANE_ENERY_STATUS    0x639
-#define MSR_IA32_SECONDARY_PLANE_ENERY_STATUS  0x641
+
+#define MSR_IA32_DDR_ENERGY_STATUS             0x619
+#define MSR_IA32_LLC_FLUSHED_RESIDENCY_TIMER   0x61D
+#define MSR_IA32_RING_PERF_STATUS              0x621
+
+#define MSR_IA32_PKG_C8_RESIDENCY              0x630
+#define MSR_IA32_PKG_C9_RESIDENCY              0x631
+#define MSR_IA32_PKG_C10_RESIDENCY             0x632
+
+#define MSR_IA32_PP0_ENERGY_STATUS             0x639
+#define MSR_IA32_PP1_ENERGY_STATUS             0x641
+#define MSR_IA32_IA_PERF_LIMIT_REASONS         0x690
+#define MSR_IA32_GT_PERF_LIMIT_REASONS         0x6B0
+
 #define MSR_IA32_TSC_DEADLINE                  0x6e0
 
 #define        MSR_IA32_EFER                           0xC0000080
index 8a5f8c667b5beec375c59a05e56cae9a347505a3..28354563c21fae8e5ac773a69d1172c09dcde163 100644 (file)
@@ -91,42 +91,6 @@ rtc_timer_start(void)
        etimer_resync_deadlines();
 }
 
-/*
- * tsc_to_nanoseconds:
- *
- * Basic routine to convert a raw 64 bit TSC value to a
- * 64 bit nanosecond value.  The conversion is implemented
- * based on the scale factor and an implicit 32 bit shift.
- */
-static inline uint64_t
-_tsc_to_nanoseconds(uint64_t value)
-{
-#if defined(__i386__)
-    asm volatile("movl %%edx,%%esi     ;"
-                "mull  %%ecx           ;"
-                "movl  %%edx,%%edi     ;"
-                "movl  %%esi,%%eax     ;"
-                "mull  %%ecx           ;"
-                "addl  %%edi,%%eax     ;"      
-                "adcl  $0,%%edx         "
-                : "+A" (value)
-                : "c" (pal_rtc_nanotime_info.scale)
-                : "esi", "edi");
-#elif defined(__x86_64__)
-    asm volatile("mul %%rcx;"
-                "shrq $32, %%rax;"
-                "shlq $32, %%rdx;"
-                "orq %%rdx, %%rax;"
-                : "=a"(value)
-                : "a"(value), "c"(pal_rtc_nanotime_info.scale)
-                : "rdx", "cc" );
-#else
-#error Unsupported architecture
-#endif
-
-    return (value);
-}
-
 static inline uint32_t
 _absolutetime_to_microtime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *microsecs)
 {
@@ -251,13 +215,7 @@ rtc_nanotime_init_commpage(void)
 static inline uint64_t
 rtc_nanotime_read(void)
 {
-       
-#if CONFIG_EMBEDDED
-       if (gPEClockFrequencyInfo.timebase_frequency_hz > SLOW_TSC_THRESHOLD)
-               return  _rtc_nanotime_read(&rtc_nanotime_info, 1);      /* slow processor */
-       else
-#endif
-       return  _rtc_nanotime_read(&pal_rtc_nanotime_info, 0);  /* assume fast processor */
+       return  _rtc_nanotime_read(&pal_rtc_nanotime_info);
 }
 
 /*
@@ -277,8 +235,8 @@ rtc_clock_napped(uint64_t base, uint64_t tsc_base)
 
        assert(!ml_get_interrupts_enabled());
        tsc = rdtsc64();
-       oldnsecs = rntp->ns_base + _tsc_to_nanoseconds(tsc - rntp->tsc_base);
-       newnsecs = base + _tsc_to_nanoseconds(tsc - tsc_base);
+       oldnsecs = rntp->ns_base + _rtc_tsc_to_nanoseconds(tsc - rntp->tsc_base, rntp);
+       newnsecs = base + _rtc_tsc_to_nanoseconds(tsc - tsc_base, rntp);
        
        /*
         * Only update the base values if time using the new base values
@@ -326,8 +284,8 @@ rtc_clock_stepped(__unused uint32_t new_frequency,
  * rtc_sleep_wakeup:
  *
  * Invoked from power management when we have awoken from a sleep (S3)
- * and the TSC has been reset.  The nanotime data is updated based on
- * the passed in value.
+ * and the TSC has been reset, or from Deep Idle (S0) sleep when the TSC
+ * has progressed.  The nanotime data is updated based on the passed-in value.
  *
  * The caller must guarantee non-reentrancy.
  */
@@ -377,7 +335,7 @@ rtclock_init(void)
                rtc_timer_init();
                clock_timebase_init();
                ml_init_lock_timeout();
-               ml_init_delay_spin_threshold();
+               ml_init_delay_spin_threshold(10);
        }
 
        /* Set fixed configuration for lapic timers */
@@ -394,14 +352,21 @@ static void
 rtc_set_timescale(uint64_t cycles)
 {
        pal_rtc_nanotime_t      *rntp = &pal_rtc_nanotime_info;
+       uint32_t    shift = 0;
+    
+       /* the "scale" factor will overflow unless cycles>SLOW_TSC_THRESHOLD */
+    
+       while ( cycles <= SLOW_TSC_THRESHOLD) {
+               shift++;
+               cycles <<= 1;
+       }
+       
+       if ( shift != 0 )
+               printf("Slow TSC, rtc_nanotime.shift == %d\n", shift);
+    
        rntp->scale = (uint32_t)(((uint64_t)NSEC_PER_SEC << 32) / cycles);
 
-#if CONFIG_EMBEDDED
-       if (cycles <= SLOW_TSC_THRESHOLD)
-               rntp->shift = (uint32_t)cycles;
-       else
-#endif
-               rntp->shift = 32;
+       rntp->shift = shift;
 
        if (tsc_rebase_abs_time == 0)
                tsc_rebase_abs_time = mach_absolute_time();
@@ -602,12 +567,11 @@ nanoseconds_to_absolutetime(
 
 void
 machine_delay_until(
-       uint64_t                deadline)
+        uint64_t interval,
+        uint64_t                deadline)
 {
-       uint64_t                now;
-
-       do {
-               cpu_pause();
-               now = mach_absolute_time();
-       } while (now < deadline);
+        (void)interval;
+        while (mach_absolute_time() < deadline) {
+                cpu_pause();
+        }
 }
index c17320b7a667c89861396cc947ce5933786e1436..528cbfe75bb0debb409e4857cc4832c323083132 100644 (file)
 
 /*
  * Assembly snippet included in exception handlers and rtc_nanotime_read()
+ *
+ *
+ * Warning!  There are several copies of this code in the trampolines found in
+ * osfmk/x86_64/idt64.s, coming from the various TIMER macros in rtclock_asm.h.
+ * They're all kept in sync by using the RTC_NANOTIME_READ() macro.
+ *
+ * The algorithm we use is:
+ *
+ *     ns = ((((rdtsc - rnt_tsc_base)<<rnt_shift)*rnt_tsc_scale) / 2**32) + rnt_ns_base;
+ *
+ * rnt_shift, a constant computed during initialization, is the smallest value for which:
+ *
+ *     (tscFreq << rnt_shift) > SLOW_TSC_THRESHOLD
+ *
+ * Where SLOW_TSC_THRESHOLD is about 10e9.  Since most processor's tscFreqs are greater
+ * than 1GHz, rnt_shift is usually 0.  rnt_tsc_scale is also a 32-bit constant:
+ *
+ *     rnt_tsc_scale = (10e9 * 2**32) / (tscFreq << rnt_shift);
+ *
  * %rdi points to nanotime info struct.
  * %rax returns nanotime
  */
        rdtsc                                                           ; \
        lfence                                                          ; \
        shlq    $32,%rdx                                                ; \
+       movl    RNT_SHIFT(%rdi),%ecx                                    ; \
        orq     %rdx,%rax                       /* %rax := tsc */       ; \
        subq    RNT_TSC_BASE(%rdi),%rax         /* tsc - tsc_base */    ; \
-       xorq    %rcx,%rcx                                               ; \
+       shlq    %cl,%rax                                                ; \
        movl    RNT_SCALE(%rdi),%ecx                                    ; \
        mulq    %rcx                            /* delta * scale */     ; \
        shrdq   $32,%rdx,%rax                   /* %rdx:%rax >>= 32 */  ; \
index 2d944765d47f62142b7f25c44f625402507e65aa..b467df170763be026df54c7e1657c2a1ac879ace 100644 (file)
@@ -48,8 +48,11 @@ extern void  _rtc_nanotime_adjust(
                        pal_rtc_nanotime_t      *dst);
 
 extern uint64_t        _rtc_nanotime_read(
-                       pal_rtc_nanotime_t      *rntp,
-                       int                     slow);
+                       pal_rtc_nanotime_t      *rntp);
+
+extern uint64_t _rtc_tsc_to_nanoseconds(
+                       uint64_t    value,
+                       pal_rtc_nanotime_t      *rntp);
 
 extern void    rtclock_intr(x86_saved_state_t *regs);
 
index d7b4608786c2fd46b5168fe650dad279bc198ae2..c6f921c6f140692c407cdb03512deab581baaaa1 100644 (file)
 #include <mach/i386/syscall_sw.h>
 
 #include <libkern/OSDebug.h>
-
+#include <i386/cpu_threads.h>
 #include <machine/pal_routines.h>
 
 extern void throttle_lowpri_io(int);
@@ -350,7 +350,7 @@ interrupt(x86_saved_state_t *state)
        int             ipl;
        int             cnum = cpu_number();
        int             itype = 0;
-       
+
        if (is_saved_state64(state) == TRUE) {
                x86_saved_state64_t     *state64;
 
@@ -373,6 +373,9 @@ interrupt(x86_saved_state_t *state)
                interrupt_num = state32->trapno;
        }
 
+       if (cpu_data_ptr[cnum]->lcpu.package->num_idle == topoParms.nLThreadsPerPackage)
+               cpu_data_ptr[cnum]->cpu_hwIntpexits[interrupt_num]++;
+
        if (interrupt_num == (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_INTERPROCESSOR_INTERRUPT))
                itype = 1;
        else if (interrupt_num == (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_TIMER_INTERRUPT))
index 02b41779c99eb2596b727701c75aded41eb4791d..f8f4fd665acad034d4e3dadf6e0328fd8df60e20 100644 (file)
@@ -165,6 +165,7 @@ tsc_init(void)
        busFreq = EFI_FSB_frequency();
 
        switch (cpuid_cpufamily()) {
+       case CPUFAMILY_INTEL_HASWELL:
        case CPUFAMILY_INTEL_IVYBRIDGE:
        case CPUFAMILY_INTEL_SANDYBRIDGE:
        case CPUFAMILY_INTEL_WESTMERE:
@@ -211,7 +212,7 @@ tsc_init(void)
        }
 
        kprintf(" BUS: Frequency = %6d.%06dMHz, "
-               "cvtt2n = %08Xx.%08Xx, cvtn2t = %08Xx.%08Xx\n",
+               "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X\n",
                (uint32_t)(busFreq / Mega),
                (uint32_t)(busFreq % Mega), 
                (uint32_t)(busFCvtt2n >> 32), (uint32_t)busFCvtt2n,
@@ -238,7 +239,7 @@ tsc_init(void)
        tscFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / tscFCvtt2n;
 
        kprintf(" TSC: Frequency = %6d.%06dMHz, "
-               "cvtt2n = %08Xx.%08Xx, cvtn2t = %08Xx.%08Xx, gran = %lld%s\n",
+               "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, gran = %lld%s\n",
                (uint32_t)(tscFreq / Mega),
                (uint32_t)(tscFreq % Mega), 
                (uint32_t)(tscFCvtt2n >> 32), (uint32_t)tscFCvtt2n,
index 2f6011b931b7c21bd663146384a26680b5a9bec3..42bb9684d1fbc5d023086016baf75c0c9214f00b 100644 (file)
@@ -42,7 +42,7 @@
 
 #define BASE_NHM_CLOCK_SOURCE  133333333ULL
 #define IA32_PERF_STS          0x198
-#define        SLOW_TSC_THRESHOLD      1000067800      /* TSC is too slow for regular nanotime() algorithm */
+#define        SLOW_TSC_THRESHOLD      1000067800      /* if slower, nonzero shift required in nanotime() algorithm */
 
 #ifndef ASSEMBLER
 extern uint64_t        busFCvtt2n;
index c7aaa6faf743290b9a0727ffd120815607a4e5d0..771a0dbef4fc75a1cda1602d89427a61656480f4 100644 (file)
@@ -827,7 +827,7 @@ _clock_delay_until_deadline(
        if (    ml_delay_should_spin(interval)  ||
                        get_preemption_level() != 0                             ||
                        ml_get_interrupts_enabled() == FALSE    ) {
-               machine_delay_until(deadline);
+               machine_delay_until(interval, deadline);
        } else {
                assert_wait_deadline((event_t)clock_delay_until, THREAD_UNINT, deadline);
 
index ed8218d17d8a2196836ffd4182299aa37dbb0661..fd31a1b9b7bcb061aa3bad8ed8fd21571a321c11 100644 (file)
@@ -117,7 +117,7 @@ extern void                 clock_gettimeofday_set_commpage(
                                                clock_sec_t                             *secs,
                                                clock_usec_t                    *microsecs);
 
-extern void                    machine_delay_until(
+extern void                    machine_delay_until(uint64_t interval,
                                                uint64_t                deadline);
 
 extern uint32_t                hz_tick_interval;
index 9b310f031cb6809ec9fcab9e74d06152bf8b5fc5..c0caa55114d52b7c43896aa23bf61dfbc4d0b3b7 100644 (file)
@@ -237,7 +237,7 @@ processor_shutdown(
 }
 
 /*
- * Called at splsched.
+ * Called with interrupts disabled.
  */
 void
 processor_doshutdown(
@@ -245,6 +245,7 @@ processor_doshutdown(
 {
        thread_t                        old_thread, self = current_thread();
        processor_t                     prev;
+       processor_set_t                 pset;
 
        /*
         *      Get onto the processor to shutdown
@@ -252,18 +253,29 @@ processor_doshutdown(
        prev = thread_bind(processor);
        thread_block(THREAD_CONTINUE_NULL);
 
-#if HIBERNATION
-       if (processor_avail_count < 2)
-               hibernate_vm_lock();
-#endif
-
        assert(processor->state == PROCESSOR_SHUTDOWN);
 
+       ml_cpu_down();
+
 #if HIBERNATION
-       if (processor_avail_count < 2)
+       if (processor_avail_count < 2) {
+               hibernate_vm_lock();
                hibernate_vm_unlock();
+       }
 #endif
 
+       pset = processor->processor_set;
+       pset_lock(pset);
+       processor->state = PROCESSOR_OFF_LINE;
+       if (--pset->online_processor_count == 0) {
+               pset_pri_init_hint(pset, PROCESSOR_NULL);
+               pset_count_init_hint(pset, PROCESSOR_NULL);
+       }
+       (void)hw_atomic_sub(&processor_avail_count, 1);
+       commpage_update_active_cpus();
+       SCHED(processor_queue_shutdown)(processor);
+       /* pset lock dropped */
+
        /*
         *      Continue processor shutdown in shutdown context.
         */
@@ -274,7 +286,7 @@ processor_doshutdown(
 }
 
 /*
- *     Complete the shutdown and place the processor offline.
+ *Complete the shutdown and place the processor offline.
  *
  *     Called at splsched in the shutdown context.
  */
@@ -283,7 +295,6 @@ processor_offline(
        processor_t                     processor)
 {
        thread_t                        new_thread, old_thread = processor->active_thread;
-       processor_set_t         pset;
 
        new_thread = processor->idle_thread;
        processor->active_thread = new_thread;
@@ -301,20 +312,6 @@ processor_offline(
 
        PMAP_DEACTIVATE_KERNEL(processor->cpu_id);
 
-       pset = processor->processor_set;
-       pset_lock(pset);
-       processor->state = PROCESSOR_OFF_LINE;
-       if (--pset->online_processor_count == 0) {
-               pset_pri_init_hint(pset, PROCESSOR_NULL);
-               pset_count_init_hint(pset, PROCESSOR_NULL);
-       }
-       (void)hw_atomic_sub(&processor_avail_count, 1);
-       commpage_update_active_cpus();
-       SCHED(processor_queue_shutdown)(processor);
-       /* pset lock dropped */
-
-       ml_cpu_down();
-
        cpu_sleep();
        panic("zombie processor");
        /*NOTREACHED*/
index 23a5496119f7ea63cce73147b87f3c0cd5b8bdc6..ead95b882b2fb689b4c5363e881b7ff540d99181 100644 (file)
@@ -143,6 +143,8 @@ processor_init(
        int                                     cpu_id,
        processor_set_t         pset)
 {
+       spl_t           s;
+
        if (processor != master_processor) {
                /* Scheduler state deferred until sched_init() */
                SCHED(processor_init)(processor);
@@ -162,6 +164,7 @@ processor_init(
        processor_data_init(processor);
        processor->processor_list = NULL;
 
+       s = splsched();
        pset_lock(pset);
        if (pset->cpu_set_count++ == 0)
                pset->cpu_set_low = pset->cpu_set_hi = cpu_id;
@@ -170,6 +173,7 @@ processor_init(
                pset->cpu_set_hi = (cpu_id > pset->cpu_set_hi)? cpu_id: pset->cpu_set_hi;
        }
        pset_unlock(pset);
+       splx(s);
 
        simple_lock(&processor_list_lock);
        if (processor_list == NULL)
index b20629ffa82b832546b1bc4ddd130745f49188e6..e1763137612c312c80ee2e2aa609d26328b37929 100644 (file)
 #include <pmc/pmc.h>
 #endif
 
+#include <i386/pmCPU.h>
 static void            kernel_bootstrap_thread(void);
 
 static void            load_context(
@@ -142,6 +143,7 @@ extern int serverperfmode;
 
 /* size of kernel trace buffer, disabled by default */
 unsigned int new_nkdbufs = 0;
+unsigned int wake_nkdbufs = 0;
 
 /* mach leak logging */
 int log_leaks = 0;
@@ -177,6 +179,8 @@ kernel_bootstrap(void)
 
        PE_parse_boot_argn("trace", &new_nkdbufs, sizeof (new_nkdbufs));
 
+       PE_parse_boot_argn("trace_wake", &wake_nkdbufs, sizeof (wake_nkdbufs));
+
        /* i386_vm_init already checks for this ; do it aagin anyway */
         if (PE_parse_boot_argn("serverperfmode", &serverperfmode, sizeof (serverperfmode))) {
                 serverperfmode = 1;
@@ -341,7 +345,7 @@ kernel_bootstrap_thread(void)
 #if (defined(__i386__) || defined(__x86_64__))
        if (turn_on_log_leaks && !new_nkdbufs)
                new_nkdbufs = 200000;
-       start_kern_tracing(new_nkdbufs);
+       start_kern_tracing(new_nkdbufs, FALSE);
        if (turn_on_log_leaks)
                log_leaks = 1;
 #endif
@@ -366,7 +370,7 @@ kernel_bootstrap_thread(void)
 #if (!defined(__i386__) && !defined(__x86_64__))
        if (turn_on_log_leaks && !new_nkdbufs)
                new_nkdbufs = 200000;
-       start_kern_tracing(new_nkdbufs);
+       start_kern_tracing(new_nkdbufs, FALSE);
        if (turn_on_log_leaks)
                log_leaks = 1;
 #endif
index 8d16db0faf1f9dabef78eb5e492d59a65131cda0..ab32e87a7b4844b700cafd68ff2188eab83df4ca 100644 (file)
@@ -30,6 +30,6 @@
 #ifndef        _MACH_BRANCH_PREDICATES_H
 #define        _MACH_BRANCH_PREDICATES_H
 
-#define        __probable(x)   __builtin_expect((x), 1)
-#define        __improbable(x) __builtin_expect((x), 0)
+#define        __probable(x)   __builtin_expect((long)(x), 1L)
+#define        __improbable(x) __builtin_expect((long)(x), 0L)
 #endif /* _MACH_BRANCH_PREDICATES_H */
index 715422ac8f6cee59274e205168d00d947c500c84..36232c736a802077a9c33f766ae7e2e0a9d92cca 100644 (file)
 #define x86_DEBUG_STATE64              11
 #define x86_DEBUG_STATE                        12
 #define THREAD_STATE_NONE              13
-/* 15 and 16 are used for the internal x86_SAVED_STATE flavours */
+/* 14 and 15 are used for the internal x86_SAVED_STATE flavours */
 #define x86_AVX_STATE32                        16
 #define x86_AVX_STATE64                        17
+#define x86_AVX_STATE                  18
 
 
 /*
          (x == x86_DEBUG_STATE)        || \
          (x == x86_AVX_STATE32)        || \
          (x == x86_AVX_STATE64)        || \
+         (x == x86_AVX_STATE)          || \
          (x == THREAD_STATE_NONE))
 
 struct x86_state_hdr {
@@ -263,6 +265,14 @@ struct x86_debug_state {
        } uds;
 };
 
+struct x86_avx_state {
+       x86_state_hdr_t                 ash;
+       union {
+               x86_avx_state32_t       as32;
+               x86_avx_state64_t       as64;
+       } ufs;
+};
+
 typedef struct x86_thread_state x86_thread_state_t;
 #define x86_THREAD_STATE_COUNT ((mach_msg_type_number_t) \
                ( sizeof (x86_thread_state_t) / sizeof (int) ))
@@ -279,6 +289,10 @@ typedef struct x86_debug_state x86_debug_state_t;
 #define x86_DEBUG_STATE_COUNT ((mach_msg_type_number_t) \
                (sizeof(x86_debug_state_t)/sizeof(unsigned int)))
 
+typedef struct x86_avx_state x86_avx_state_t;
+#define x86_AVX_STATE_COUNT ((mach_msg_type_number_t) \
+               (sizeof(x86_avx_state_t)/sizeof(unsigned int)))
+
 /*
  * Machine-independent way for servers and Mach's exception mechanism to
  * choose the most efficient state flavor for exception RPC's:
index a013ff1ae06aa8c740101217a670e39da4d38d15..e890f6652b1b132900de1fe861a5c6a3561922ed 100644 (file)
@@ -217,6 +217,7 @@ type thread_policy_t                = array[*:16] of integer_t;
                 * task_extmod_info_t (8 64-bit ints)
                 * task_basic_info_64_2_t
                 * mach_task_basic_info_t (12 ints)
+                * task_power_info_t (18 ints)
                 * If other task_info flavors are added, this
                 * definition may need to be changed. (See
                 * mach/task_info.h and mach/policy.h) */
@@ -303,12 +304,13 @@ type host_security_t = mach_port_t
                 *      kernel_resource_sizes_t (5 ints)
                 *      host_load_info_t (6 ints)
                 *      vm_statistics32_t (15 ints)
+                *      host_expired_task_info uses a task_power_info (18 ints)
                 * 
                 * If other host_info flavors are added, this definition may
                 * need to be changed. (See mach/{host_info,vm_statistics}.h)
                 */
 type host_flavor_t             = int;
-type host_info_t               = array[*:15] of integer_t;
+type host_info_t               = array[*:18] of integer_t;
 
 
                /* 
index 57a0c28e690ace66b824e4aa4dfc9e897a356e77..1520a6049beb541d4ff8438dd75671de2c0841c2 100644 (file)
@@ -378,6 +378,7 @@ __END_DECLS
 #define CPUFAMILY_INTEL_WESTMERE       0x573b5eec
 #define CPUFAMILY_INTEL_SANDYBRIDGE    0x5490b78c
 #define CPUFAMILY_INTEL_IVYBRIDGE      0x1f65e835
+#define CPUFAMILY_INTEL_HASWELL                0x10b282dc
 #define CPUFAMILY_ARM_9                        0xe73283ae
 #define CPUFAMILY_ARM_11               0x8ff620d8
 #define CPUFAMILY_ARM_XSCALE           0x53b005f5
index aead09bf9733da6af3d9bbc346b2bdbf54f9e1db..cef4380bfdf8e0291c8387b911bd47e59228d43f 100644 (file)
@@ -87,6 +87,7 @@ typedef       natural_t       thread_state_data_t[THREAD_STATE_MAX];
 
 #define        THREAD_STATE_FLAVOR_LIST        0       /* List of valid flavors */
 #define THREAD_STATE_FLAVOR_LIST_NEW   128
+#define THREAD_STATE_FLAVOR_LIST_10_9  129
 
 typedef        int                     thread_state_flavor_t;
 typedef thread_state_flavor_t  *thread_state_flavor_array_t;
index e35f70daf6339938ce091b8857b4a7d12deb6d03..0629a9900775add9449040a93f2626b8ea90feb9 100644 (file)
@@ -233,7 +233,7 @@ kernel_memory_allocate(
        vm_object_t             object;
        vm_object_offset_t      offset;
        vm_object_offset_t      pg_offset;
-       vm_map_entry_t          entry;
+       vm_map_entry_t          entry = NULL;
        vm_map_offset_t         map_addr, fill_start;
        vm_map_offset_t         map_mask;
        vm_map_size_t           map_size, fill_size;
index 354a2f3db1717eeaa90b1c458b41ce6dec110d95..8e7e0cc88bb6a6b30e45b6dd2a465453e20c936e 100644 (file)
@@ -3,21 +3,9 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
 export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
 export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
 
-
 include $(MakeInc_cmd)
 include $(MakeInc_def)
 
-EXPORT_ONLY_FILES =
-
-INSTALL_MD_DIR = x86_64
-
-INSTALL_MD_LIST = 
-
-INSTALL_MD_LCL_LIST =
-
-EXPORT_MD_LIST = ${EXPORT_ONLY_FILES}
-
-EXPORT_MD_DIR = x86_64
 
 include $(MakeInc_rule)
 include $(MakeInc_dir)
index f13db5aabdca101b63729cba05f7106e55266acf..8ca0c92a437f15d9c5f03dad9b97511bce851fb0 100644 (file)
@@ -128,6 +128,40 @@ rdmsr_fail:
        movq    $1, %rax
        ret
 
+/*
+ * int rdmsr64_carefully(uint32_t msr, uint64_t *val);
+ */
+
+ENTRY(rdmsr64_carefully)
+       movl    %edi, %ecx
+       RECOVERY_SECTION
+       RECOVER(rdmsr64_carefully_fail)
+       rdmsr
+       movl    %eax, (%rsi)
+       movl    %edx, 4(%rsi)
+       xorl    %eax, %eax
+       ret
+rdmsr64_carefully_fail:
+       movl    $1, %eax
+       ret
+/*
+ * int wrmsr64_carefully(uint32_t msr, uint64_t val);
+ */
+
+ENTRY(wrmsr_carefully)
+       movl    %edi, %ecx
+       movl    %esi, %eax
+       shr     $32, %rsi
+       movl    %esi, %edx
+       RECOVERY_SECTION
+       RECOVER(wrmsr_fail)
+       wrmsr
+       xorl    %eax, %eax
+       ret
+wrmsr_fail:
+       movl    $1, %eax
+       ret
+
 .globl EXT(thread_exception_return)
 .globl EXT(thread_bootstrap_return)
 LEXT(thread_bootstrap_return)
index 3628875868bad43f22a33de71f2e6af3ac828b8f..1d2e3ed3fa18c7391182d987d522263feb385694 100644 (file)
@@ -104,28 +104,36 @@ ENTRY(_rtc_nanotime_adjust)
        ret
 
 /*
- * unint64_t _rtc_nanotime_read(rtc_nanotime_t *rntp, int slow);
+ * uint64_t _rtc_nanotime_read(rtc_nanotime_t *rntp);
  *
  * This is the same as the commpage nanotime routine, except that it uses the
  * kernel internal "rtc_nanotime_info" data instead of the commpage data.
  * These two copies of data are kept in sync by rtc_clock_napped().
  *
- * Warning!  There is another copy of this code in osfmk/x86_64/idt64.s.
- * These are kept in sync by both using the RTC_NANOTIME_READ() macro.
+ * Warning!  There are several copies of this code in the trampolines found in
+ * osfmk/x86_64/idt64.s, coming from the various TIMER macros in rtclock_asm.h.
+ * They're all kept in sync by using the RTC_NANOTIME_READ() macro.
  *
- * There are two versions of this algorithm, for "slow" and "fast" processors.
- * The more common "fast" algorithm is:
+ * The algorithm we use is:
  *
- *     ns = (((rdtsc - rnt_tsc_base)*rnt_tsc_scale) / 2**32) + rnt_ns_base;
+ *     ns = ((((rdtsc - rnt_tsc_base)<<rnt_shift)*rnt_tsc_scale) / 2**32) + rnt_ns_base;
  *
- * Of course, the divide by 2**32 is a nop.  rnt_tsc_scale is a constant
- * computed during initialization:
+ * rnt_shift, a constant computed during initialization, is the smallest value for which:
  *
- *     rnt_tsc_scale = (10e9 * 2**32) / tscFreq;
+ *     (tscFreq << rnt_shift) > SLOW_TSC_THRESHOLD
  *
- * The "slow" algorithm uses long division:
+ * Where SLOW_TSC_THRESHOLD is about 10e9.  Since most processor's tscFreqs are greater
+ * than 1GHz, rnt_shift is usually 0.  rnt_tsc_scale is also a 32-bit constant:
  *
- *     ns = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) + rnt_ns_base;
+ *     rnt_tsc_scale = (10e9 * 2**32) / (tscFreq << rnt_shift);
+ *
+ * On 64-bit processors this algorithm could be simplified by doing a 64x64 bit
+ * multiply of rdtsc by tscFCvtt2n:
+ *
+ *     ns = (((rdtsc - rnt_tsc_base) * tscFCvtt2n) / 2**32) + rnt_ns_base;
+ *
+ * We don't do so in order to use the same algorithm in 32- and 64-bit mode.
+ * When U32 goes away, we should reconsider.
  *
  * Since this routine is not synchronized and can be called in any context, 
  * we use a generation count to guard against seeing partially updated data.
@@ -136,33 +144,36 @@ ENTRY(_rtc_nanotime_adjust)
  * the generation is zero.
  *
  * unint64_t _rtc_nanotime_read(
- *                     rtc_nanotime_t *rntp,           // %rdi
- *                     int            slow);           // %rsi
+ *                     rtc_nanotime_t *rntp);          // %rdi
  *
  */
 ENTRY(_rtc_nanotime_read)
-       test            %rsi,%rsi
-       jnz             Lslow
-               
-       /*
-        * Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD
-        */
+
        PAL_RTC_NANOTIME_READ_FAST()
 
        ret
+    
+/*
+ * extern uint64_t _rtc_tsc_to_nanoseconds(
+ *          uint64_t    value,              // %rdi
+ *          pal_rtc_nanotime_t *rntp);     // %rsi
+ *
+ * Converts TSC units to nanoseconds, using an abbreviated form of the above
+ * algorithm.  Note that while we could have simply used tmrCvt(value,tscFCvtt2n),
+ * which would avoid the need for this asm, doing so is a bit more risky since
+ * we'd be using a different algorithm with possibly different rounding etc.
+ */
 
-       /*
-        * Processor whose TSC frequency is not faster than SLOW_TSC_THRESHOLD
-        * But K64 doesn't support this...
-        */
-Lslow:
-       lea     1f(%rip),%rdi
-       xorb    %al,%al
-       call    EXT(panic)
-       hlt
-       .data
-1:     String  "_rtc_nanotime_read() - slow algorithm not supported"
-       .text
+ENTRY(_rtc_tsc_to_nanoseconds)
+       movq    %rdi,%rax                       /* copy value (in TSC units) to convert */
+       movl    RNT_SHIFT(%rsi),%ecx
+       movl    RNT_SCALE(%rsi),%edx
+       shlq    %cl,%rax                        /* tscUnits << shift */
+       mulq    %rdx                            /* (tscUnits << shift) * scale */
+       shrdq   $32,%rdx,%rax                   /* %rdx:%rax >>= 32 */
+       ret
+    
+    
 
 Entry(call_continuation)
        movq    %rdi,%rcx                       /* get continuation */
index 2bc5bfab73952ee99cb883d6afe4c668130f4347..3f05e7f277d878afbda4060e47115e84e8585894 100644 (file)
@@ -295,6 +295,7 @@ boolean_t pmap_smep_enabled = FALSE;
 void
 pmap_cpu_init(void)
 {
+       cpu_data_t      *cdp = current_cpu_datap();
        /*
         * Here early in the life of a processor (from cpu_mode_init()).
         * Ensure global page feature is disabled at this point.
@@ -305,10 +306,10 @@ pmap_cpu_init(void)
        /*
         * Initialize the per-cpu, TLB-related fields.
         */
-       current_cpu_datap()->cpu_kernel_cr3 = kernel_pmap->pm_cr3;
-       current_cpu_datap()->cpu_active_cr3 = kernel_pmap->pm_cr3;
-       current_cpu_datap()->cpu_tlb_invalid = FALSE;
-       current_cpu_datap()->cpu_task_map = TASK_MAP_64BIT;
+       cdp->cpu_kernel_cr3 = kernel_pmap->pm_cr3;
+       cdp->cpu_active_cr3 = kernel_pmap->pm_cr3;
+       cdp->cpu_tlb_invalid = FALSE;
+       cdp->cpu_task_map = TASK_MAP_64BIT;
        pmap_pcid_configure();
        if (cpuid_leaf7_features() & CPUID_LEAF7_FEATURE_SMEP) {
                boolean_t nsmep;
@@ -317,6 +318,11 @@ pmap_cpu_init(void)
                        pmap_smep_enabled = TRUE;
                }
        }
+
+       if (cdp->cpu_fixed_pmcs_enabled) {
+               boolean_t enable = TRUE;
+               cpu_pmc_control(&enable);
+       }
 }
 
 
index fcff88b88e0158c972605e9ab1580003d0d7e68f..f056487a044ce0a798318ad729fd744266a7c829 100644 (file)
@@ -201,7 +201,6 @@ int serial_init( void )
 void serial_putc( char c )
 {
     uart_putc(c);
-    if (c == '\n') uart_putc('\r');
 }
 
 int serial_getc( void )