]> git.saurik.com Git - apple/xnu.git/commitdiff
xnu-1504.15.3.tar.gz mac-os-x-1068 v1504.15.3
authorApple <opensource@apple.com>
Fri, 24 Jun 2011 14:00:12 +0000 (14:00 +0000)
committerApple <opensource@apple.com>
Fri, 24 Jun 2011 14:00:12 +0000 (14:00 +0000)
122 files changed:
bsd/conf/MASTER
bsd/conf/MASTER.i386
bsd/conf/MASTER.ppc
bsd/conf/MASTER.x86_64
bsd/dev/i386/sysctl.c
bsd/dev/i386/unix_signal.c
bsd/hfs/hfs.h
bsd/hfs/hfs_vfsops.c
bsd/hfs/hfs_xattr.c
bsd/hfs/hfscommon/Misc/VolumeAllocation.c
bsd/i386/_structs.h
bsd/kern/pthread_synch.c
bsd/miscfs/devfs/devfs_tree.c
bsd/net/if_media.h
bsd/netinet6/ip6_fw.h
bsd/netinet6/ip6_output.c
bsd/netinet6/raw_ip6.c
bsd/sys/buf_internal.h
bsd/sys/disk.h
bsd/sys/kdebug.h
bsd/sys/mount.h
bsd/sys/mount_internal.h
bsd/vfs/vfs_cluster.c
bsd/vfs/vfs_journal.c
bsd/vfs/vfs_journal.h
bsd/vfs/vfs_subr.c
config/IOKit.exports
config/MasterVersion
config/Private.i386.exports
config/System6.0.exports
config/Unsupported.exports
config/Unsupported.i386.exports
config/Unsupported.x86_64.exports
iokit/IOKit/IOInterruptEventSource.h
iokit/IOKit/IOKitDebug.h
iokit/IOKit/IOReturn.h
iokit/IOKit/IOTimeStamp.h
iokit/IOKit/pwr_mgt/IOPM.h
iokit/Kernel/IOCommandGate.cpp
iokit/Kernel/IOCommandQueue.cpp
iokit/Kernel/IOFilterInterruptEventSource.cpp
iokit/Kernel/IOHibernateIO.cpp
iokit/Kernel/IOInterruptController.cpp
iokit/Kernel/IOInterruptEventSource.cpp
iokit/Kernel/IOKitDebug.cpp
iokit/Kernel/IOKitKernelInternal.h
iokit/Kernel/IOMemoryDescriptor.cpp
iokit/Kernel/IOPMrootDomain.cpp
iokit/Kernel/IOPlatformExpert.cpp
iokit/Kernel/IOService.cpp
iokit/Kernel/IOServicePM.cpp
iokit/Kernel/IOServicePMPrivate.h
iokit/Kernel/IOStartIOKit.cpp
iokit/Kernel/IOTimerEventSource.cpp
iokit/Kernel/IOWorkLoop.cpp
kgmacros
osfmk/conf/Makefile.x86_64
osfmk/console/i386/serial_console.c
osfmk/console/video_console.c
osfmk/console/video_console.h
osfmk/i386/AT386/model_dep.c
osfmk/i386/acpi.c
osfmk/i386/cpu_data.h
osfmk/i386/cpu_topology.h
osfmk/i386/cpuid.c
osfmk/i386/cpuid.h
osfmk/i386/etimer.c
osfmk/i386/fpu.c
osfmk/i386/fpu.h
osfmk/i386/genassym.c
osfmk/i386/i386_init.c
osfmk/i386/i386_vm_init.c
osfmk/i386/io_map.c
osfmk/i386/io_map_entries.h
osfmk/i386/lapic.c
osfmk/i386/lapic.h
osfmk/i386/locks_i386.c
osfmk/i386/locore.s
osfmk/i386/machine_routines.c
osfmk/i386/machine_routines.h
osfmk/i386/misc_protos.h
osfmk/i386/mp.c
osfmk/i386/mp.h
osfmk/i386/pcb.c
osfmk/i386/pmCPU.c
osfmk/i386/pmCPU.h
osfmk/i386/pmap.c
osfmk/i386/pmap.h
osfmk/i386/pmap_internal.h
osfmk/i386/pmap_x86_common.c
osfmk/i386/proc_reg.h
osfmk/i386/rtclock.c
osfmk/i386/rtclock.h
osfmk/i386/seg.h
osfmk/i386/start64.s
osfmk/i386/thread.h
osfmk/i386/trap.c
osfmk/i386/tsc.c
osfmk/kdp/kdp.c
osfmk/kdp/kdp_udp.c
osfmk/kern/etimer.h
osfmk/kern/sched_prim.c
osfmk/kern/wait_queue.h
osfmk/mach/i386/_structs.h
osfmk/mach/i386/fp_reg.h
osfmk/mach/i386/thread_status.h
osfmk/mach/mach_vm.defs
osfmk/mach/vm_map.defs
osfmk/mach/vm_statistics.h
osfmk/ppc/io_map.c
osfmk/ppc/io_map_entries.h
osfmk/ppc/machine_routines.c
osfmk/ppc/machine_routines.h
osfmk/vm/vm_map.c
osfmk/vm/vm_map.h
osfmk/vm/vm_resident.c
osfmk/vm/vm_user.c
osfmk/x86_64/idt64.s
osfmk/x86_64/pmap.c
pexpert/i386/pe_init.c
pexpert/pexpert/i386/efi.h
pexpert/pexpert/pexpert.h

index fd9635408152c9715a6d5bf325381aadcfa4347e..93872ad07b75a9244b2065783a7a85b7099b97e1 100644 (file)
@@ -189,6 +189,7 @@ options             FDESC           # fdesc_fs support              # <fdesc>
 options                DEVFS           # devfs support                 # <devfs>
 options                JOURNALING      # journaling support    # <journaling>
 options                HFS_COMPRESSION # hfs compression       # <hfs_compression>
+options                CONFIG_HFS_TRIM # HFS trims unused blocks       # <config_hfs_trim>
 
 #
 #      file system features
index f5e48b2f5e7f56acd0ea05dd475407d90b4bc97c..1e6641911bbd1103bf883ed9d6e083054b67d007 100644 (file)
@@ -45,7 +45,7 @@
 #  Standard Apple Research Configurations:
 #  -------- ----- -------- ---------------
 #  BASE =        [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ]
-#  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression config_imgsrc_access ]
+#  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs config_hfs_trim hfs_compression config_imgsrc_access ]
 #  NETWORKING =  [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ipflow pkt_priority if_bridge ]
 #  NFS =         [ nfsclient nfsserver ]
 #  VPN =         [ ipsec ]
index bd15e6657f9c28f1bbbd79e782c41d3eb8bb6004..d99b6e4f587c8833c10479fdf369a4e18bbdcc79 100644 (file)
@@ -46,7 +46,7 @@
 #  -------- ----- -------- ---------------
 #
 #  BASE =        [ ppc mach medium config_dtrace vol pst gdb noprofiling simple_clock kernstack sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue ]
-#  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression ]
+#  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs config_hfs_trim hfs_compression ]
 #  NETWORKING =  [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk ipflow pkt_priority ]
 #  NFS =         [ nfsclient nfsserver ]
 #  VPN =         [ ipsec ]
index b43836a824632b15d743560e683123268487be24..1050897d249ff2a3d049735ec1068b862b350f2a 100644 (file)
@@ -45,7 +45,7 @@
 #  Standard Apple Research Configurations:
 #  -------- ----- -------- ---------------
 #  BASE =        [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ]
-#  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression config_imgsrc_access ]
+#  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs config_hfs_trim hfs_compression config_imgsrc_access ]
 #  NETWORKING =  [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ipflow pkt_priority if_bridge ]
 #  NFS =         [ nfsclient nfsserver ]
 #  VPN =         [ ipsec ]
index 597a208c1714bf6643c778fb7b85e6c8438a8d81..c255529a19f02b299649b80d87bd32a7df70f68d 100644 (file)
@@ -32,6 +32,7 @@
 #include <sys/sysctl.h>
 #include <i386/cpuid.h>
 #include <i386/tsc.h>
+#include <i386/machine_routines.h>
 
 static int
 _i386_cpu_info SYSCTL_HANDLER_ARGS
@@ -105,13 +106,23 @@ cpu_arch_perf SYSCTL_HANDLER_ARGS
     return _i386_cpu_info(oidp, ptr, arg2, req);
 }
 
+static int
+cpu_xsave SYSCTL_HANDLER_ARGS
+{
+    i386_cpu_info_t *cpu_info = cpuid_info();
+    void *ptr = (uint8_t *)cpu_info->cpuid_xsave_leafp + (uintptr_t)arg1;
+    if (cpu_info->cpuid_xsave_leafp == NULL)
+        return ENOENT;
+    return _i386_cpu_info(oidp, ptr, arg2, req);
+}
+
 static int
 cpu_features SYSCTL_HANDLER_ARGS
 {
     __unused struct sysctl_oid *unused_oidp = oidp;
     __unused void *unused_arg1 = arg1;
     __unused int unused_arg2 = arg2; 
-    char buf[256];
+    char buf[512];
 
     buf[0] = '\0';
     cpuid_get_feature_names(cpuid_features(), buf, sizeof(buf));
@@ -125,7 +136,7 @@ cpu_extfeatures SYSCTL_HANDLER_ARGS
     __unused struct sysctl_oid *unused_oidp = oidp;
     __unused void *unused_arg1 = arg1;
     __unused int unused_arg2 = arg2; 
-    char buf[256];
+    char buf[512];
 
     buf[0] = '\0';
     cpuid_get_extfeature_names(cpuid_extfeatures(), buf, sizeof(buf));
@@ -190,6 +201,28 @@ cpu_flex_ratio_max SYSCTL_HANDLER_ARGS
        return SYSCTL_OUT(req, &flex_ratio_max, sizeof(flex_ratio_max));
 }
 
+/*
+ * Populates the {CPU, vector, latency} triple for the maximum observed primary
+ * interrupt latency
+ */
+static int
+misc_interrupt_latency_max(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+       int changed = 0, error;
+       char buf[128];
+       buf[0] = '\0';
+
+       interrupt_populate_latency_stats(buf, sizeof(buf));
+
+       error = sysctl_io_string(req, buf, sizeof(buf), 0, &changed);
+
+       if (error == 0 && changed) {
+               interrupt_reset_latency_stats();
+       }
+
+       return error;
+}
+
 SYSCTL_NODE(_machdep, OID_AUTO, cpu, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
        "CPU info");
 
@@ -332,6 +365,46 @@ SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, ACNT_MCNT,
            sizeof(boolean_t),
            cpu_thermal, "I", "ACNT_MCNT capability");
 
+SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, core_power_limits,
+           CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
+           (void *)offsetof(cpuid_thermal_leaf_t, core_power_limits),
+           sizeof(boolean_t),
+           cpu_thermal, "I", "Power Limit Notifications at a Core Level");
+
+SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, fine_grain_clock_mod,
+           CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
+           (void *)offsetof(cpuid_thermal_leaf_t, fine_grain_clock_mod),
+           sizeof(boolean_t),
+           cpu_thermal, "I", "Fine Grain Clock Modulation");
+
+SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, package_thermal_intr,
+           CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
+           (void *)offsetof(cpuid_thermal_leaf_t, package_thermal_intr),
+           sizeof(boolean_t),
+           cpu_thermal, "I", "Packge Thermal interrupt and Status");
+
+SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, hardware_feedback,
+           CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
+           (void *)offsetof(cpuid_thermal_leaf_t, hardware_feedback),
+           sizeof(boolean_t),
+           cpu_thermal, "I", "Hardware Coordination Feedback");
+
+SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, energy_policy,
+           CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
+           (void *)offsetof(cpuid_thermal_leaf_t, energy_policy),
+           sizeof(boolean_t),
+           cpu_thermal, "I", "Energy Efficient Policy Support");
+
+
+SYSCTL_NODE(_machdep_cpu, OID_AUTO, xsave, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
+       "xsave");
+
+SYSCTL_PROC(_machdep_cpu_xsave, OID_AUTO, extended_state,
+           CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
+           (void *)offsetof(cpuid_xsave_leaf_t, extended_state),
+           sizeof(cpuid_xsave_leaf_t),
+           cpu_xsave, "IU", "XSAVE Extended State");
+
 
 SYSCTL_NODE(_machdep_cpu, OID_AUTO, arch_perf, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
        "arch_perf");
@@ -544,3 +617,13 @@ SYSCTL_QUAD(_machdep_memmap, OID_AUTO, PalCode, CTLFLAG_RD|CTLFLAG_LOCKED, &firm
 SYSCTL_QUAD(_machdep_memmap, OID_AUTO, Reserved, CTLFLAG_RD|CTLFLAG_LOCKED, &firmware_Reserved_bytes, "");
 SYSCTL_QUAD(_machdep_memmap, OID_AUTO, Unusable, CTLFLAG_RD|CTLFLAG_LOCKED, &firmware_Unusable_bytes, "");
 SYSCTL_QUAD(_machdep_memmap, OID_AUTO, Other, CTLFLAG_RD|CTLFLAG_LOCKED, &firmware_other_bytes, "");
+
+SYSCTL_NODE(_machdep, OID_AUTO, tsc, CTLFLAG_RD|CTLFLAG_LOCKED, NULL, "Timestamp counter parameters");
+
+SYSCTL_QUAD(_machdep_tsc, OID_AUTO, frequency, CTLFLAG_RD|CTLFLAG_LOCKED, &tscFreq, "");
+SYSCTL_NODE(_machdep, OID_AUTO, misc, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
+       "Miscellaneous x86 kernel parameters");
+
+SYSCTL_PROC(_machdep_misc, OID_AUTO, interrupt_latency_max, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED, 
+           0, 0,
+           misc_interrupt_latency_max, "A", "Maximum Interrupt latency");
index eb96e879edaba0d778bd1f805c533b7a71a52a6c..06ed4172cb8f7d48311855e00a14b02247eb117d 100644 (file)
@@ -137,10 +137,11 @@ siginfo_user_to_user64(user_siginfo_t *in, user64_siginfo_t *out)
 void
 sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint32_t code)
 {
-        union {
-           struct mcontext32   mctx32;
-           struct mcontext64   mctx64;
-       } mctx;
+       union {
+               struct mcontext_avx32   mctx_avx32;
+               struct mcontext_avx64   mctx_avx64;
+       } mctx_store, *mctxp = &mctx_store;
+
        user_addr_t     ua_sp;
        user_addr_t     ua_fp;
        user_addr_t     ua_cr2;
@@ -160,7 +161,8 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint
        struct uthread * ut;
        int stack_size = 0;
        int infostyle = UC_TRAD;
-    
+       boolean_t       sig_avx;
+
        thread = current_thread();
        ut = get_bsdthread_info(thread);
 
@@ -178,7 +180,9 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint
 
        bzero((caddr_t)&sinfo64, sizeof(sinfo64));
        sinfo64.si_signo = sig;
-               
+
+       bzero(mctxp, sizeof(*mctxp));
+       sig_avx = ml_fpu_avx_enabled();
 
        if (proc_is64bit(p)) {
                x86_thread_state64_t    *tstate64;
@@ -186,23 +190,29 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint
 
                flavor = x86_THREAD_STATE64;
                state_count = x86_THREAD_STATE64_COUNT;
-               state = (void *)&mctx.mctx64.ss;
+               state = (void *)&mctxp->mctx_avx64.ss;
                if (thread_getstatus(thread, flavor, (thread_state_t)state, &state_count) != KERN_SUCCESS)
                        goto bad;
 
-               flavor = x86_FLOAT_STATE64;
-               state_count = x86_FLOAT_STATE64_COUNT;
-               state = (void *)&mctx.mctx64.fs;
+               if (sig_avx) {
+                       flavor = x86_AVX_STATE64;
+                       state_count = x86_AVX_STATE64_COUNT;
+               }
+               else {
+                       flavor = x86_FLOAT_STATE64;
+                       state_count = x86_FLOAT_STATE64_COUNT;
+               }
+               state = (void *)&mctxp->mctx_avx64.fs;
                if (thread_getstatus(thread, flavor, (thread_state_t)state, &state_count) != KERN_SUCCESS)
                        goto bad;
 
                flavor = x86_EXCEPTION_STATE64;
                state_count = x86_EXCEPTION_STATE64_COUNT;
-               state = (void *)&mctx.mctx64.es;
+               state = (void *)&mctxp->mctx_avx64.es;
                if (thread_getstatus(thread, flavor, (thread_state_t)state, &state_count) != KERN_SUCCESS)
                        goto bad;
 
-               tstate64 = &mctx.mctx64.ss;
+               tstate64 = &mctxp->mctx_avx64.ss;
 
                /* figure out where our new stack lives */
                if ((ut->uu_flag & UT_ALTSTACK) && !oonstack &&
@@ -214,7 +224,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint
                } else {
                        ua_sp = tstate64->rsp;
                }
-               ua_cr2 = mctx.mctx64.es.faultvaddr;
+               ua_cr2 = mctxp->mctx_avx64.es.faultvaddr;
 
                /* The x86_64 ABI defines a 128-byte red zone. */
                ua_sp -= C_64_REDZONE_LEN;
@@ -225,7 +235,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint
                ua_sp -= sizeof (user64_siginfo_t);
                ua_sip = ua_sp;
 
-               ua_sp -= sizeof (struct mcontext64);
+               ua_sp -= sizeof (struct mcontext_avx64);
                ua_mctxp = ua_sp;
 
                /*
@@ -254,13 +264,13 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint
                        uctx64.uc_stack.ss_flags |= SS_ONSTACK; 
                uctx64.uc_link = 0;
 
-               uctx64.uc_mcsize = sizeof(struct mcontext64);
+               uctx64.uc_mcsize = sig_avx ? sizeof(struct mcontext_avx64) : sizeof(struct mcontext64);
                uctx64.uc_mcontext64 = ua_mctxp;
                
                if (copyout((caddr_t)&uctx64, ua_uctxp, sizeof (uctx64))) 
                        goto bad;
 
-               if (copyout((caddr_t)&mctx.mctx64, ua_mctxp, sizeof (struct mcontext64))) 
+               if (copyout((caddr_t)&mctxp->mctx_avx64, ua_mctxp, sizeof (struct mcontext_avx64))) 
                        goto bad;
 
                sinfo64.pad[0]  = tstate64->rsp;
@@ -293,23 +303,30 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint
 
                flavor = x86_THREAD_STATE32;
                state_count = x86_THREAD_STATE32_COUNT;
-               state = (void *)&mctx.mctx32.ss;
+               state = (void *)&mctxp->mctx_avx32.ss;
                if (thread_getstatus(thread, flavor, (thread_state_t)state, &state_count) != KERN_SUCCESS)
                        goto bad;
 
-               flavor = x86_FLOAT_STATE32;
-               state_count = x86_FLOAT_STATE32_COUNT;
-               state = (void *)&mctx.mctx32.fs;
+               if (sig_avx) {
+                       flavor = x86_AVX_STATE32;
+                       state_count = x86_AVX_STATE32_COUNT;
+               }
+               else {
+                       flavor = x86_FLOAT_STATE32;
+                       state_count = x86_FLOAT_STATE32_COUNT;
+               }
+
+               state = (void *)&mctxp->mctx_avx32.fs;
                if (thread_getstatus(thread, flavor, (thread_state_t)state, &state_count) != KERN_SUCCESS)
                        goto bad;
 
                flavor = x86_EXCEPTION_STATE32;
                state_count = x86_EXCEPTION_STATE32_COUNT;
-               state = (void *)&mctx.mctx32.es;
+               state = (void *)&mctxp->mctx_avx32.es;
                if (thread_getstatus(thread, flavor, (thread_state_t)state, &state_count) != KERN_SUCCESS)
                        goto bad;
 
-               tstate32 = &mctx.mctx32.ss;
+               tstate32 = &mctxp->mctx_avx32.ss;
 
                /* figure out where our new stack lives */
                if ((ut->uu_flag & UT_ALTSTACK) && !oonstack &&
@@ -321,7 +338,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint
                } else {
                        ua_sp = tstate32->esp;
                }
-               ua_cr2 = mctx.mctx32.es.faultvaddr;
+               ua_cr2 = mctxp->mctx_avx32.es.faultvaddr;
 
                ua_sp -= sizeof (struct user_ucontext32);
                ua_uctxp = ua_sp;                        // someone tramples the first word!
@@ -329,7 +346,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint
                ua_sp -= sizeof (user32_siginfo_t);
                ua_sip = ua_sp;
 
-               ua_sp -= sizeof (struct mcontext32);
+               ua_sp -= sizeof (struct mcontext_avx32);
                ua_mctxp = ua_sp;
 
                ua_sp -= sizeof (struct sigframe32);
@@ -375,14 +392,14 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint
                        uctx32.uc_stack.ss_flags |= SS_ONSTACK; 
                uctx32.uc_link = 0;
 
-               uctx32.uc_mcsize = sizeof(struct mcontext32);
+               uctx32.uc_mcsize = sig_avx ? sizeof(struct mcontext_avx32) : sizeof(struct mcontext32);
 
                uctx32.uc_mcontext = CAST_DOWN_EXPLICIT(user32_addr_t, ua_mctxp);
                
                if (copyout((caddr_t)&uctx32, ua_uctxp, sizeof (uctx32))) 
                        goto bad;
 
-               if (copyout((caddr_t)&mctx.mctx32, ua_mctxp, sizeof (struct mcontext32))) 
+               if (copyout((caddr_t)&mctxp->mctx_avx32, ua_mctxp, sizeof (struct mcontext_avx32))) 
                        goto bad;
 
                sinfo64.pad[0]  = tstate32->esp;
@@ -536,7 +553,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint
 
                flavor = x86_THREAD_STATE64;
                state_count = x86_THREAD_STATE64_COUNT;
-               state = (void *)&mctx.mctx64.ss;
+               state = (void *)&mctxp->mctx_avx64.ss;
        } else {
                x86_thread_state32_t    *tstate32;
                user32_siginfo_t sinfo32;
@@ -571,7 +588,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint
                if (copyout((caddr_t)&sinfo32, ua_sip, sizeof (sinfo32))) 
                        goto bad;
        
-               tstate32 = &mctx.mctx32.ss;
+               tstate32 = &mctxp->mctx_avx32.ss;
 
                tstate32->eip = CAST_DOWN_EXPLICIT(user32_addr_t, trampact);
                tstate32->esp = CAST_DOWN_EXPLICIT(user32_addr_t, ua_fp);
@@ -599,6 +616,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint
        return;
 
 bad:
+
        proc_lock(p);
        SIGACTION(p, SIGILL) = SIG_DFL;
        sig = sigmask(SIGILL);
@@ -626,10 +644,11 @@ bad:
 int
 sigreturn(struct proc *p, struct sigreturn_args *uap, __unused int *retval)
 {
-        union {
-           struct mcontext32   mctx32;
-           struct mcontext64   mctx64;
-       } mctx;
+       union {
+               struct mcontext_avx32   mctx_avx32;
+               struct mcontext_avx64   mctx_avx64;
+       } mctx_store, *mctxp = &mctx_store;
+
        thread_t thread = current_thread();
        struct uthread * ut;
        int     error;
@@ -641,6 +660,8 @@ sigreturn(struct proc *p, struct sigreturn_args *uap, __unused int *retval)
        mach_msg_type_number_t fs_count;
        unsigned int           fs_flavor;
        void                *  fs;
+       int     rval = EJUSTRETURN;
+       boolean_t       sig_avx;
 
        ut = (struct uthread *)get_bsdthread_info(thread);
 
@@ -656,25 +677,35 @@ sigreturn(struct proc *p, struct sigreturn_args *uap, __unused int *retval)
                return (0);
        }
 
+       bzero(mctxp, sizeof(*mctxp));
+       sig_avx = ml_fpu_avx_enabled();
+
        if (proc_is64bit(p)) {
                struct user_ucontext64  uctx64;
 
                if ((error = copyin(uap->uctx, (void *)&uctx64, sizeof (uctx64))))
                        return(error);
 
-               if ((error = copyin(uctx64.uc_mcontext64, (void *)&mctx.mctx64, sizeof (struct mcontext64))))
+               if ((error = copyin(uctx64.uc_mcontext64, (void *)&mctxp->mctx_avx64, sizeof (struct mcontext_avx64))))
                        return(error);
 
                onstack = uctx64.uc_onstack & 01;
                ut->uu_sigmask = uctx64.uc_sigmask & ~sigcantmask;
 
-               ts_flavor = x86_THREAD_STATE64;
+               ts_flavor = x86_THREAD_STATE64;
                ts_count  = x86_THREAD_STATE64_COUNT;
-               ts = (void *)&mctx.mctx64.ss;
+               ts = (void *)&mctxp->mctx_avx64.ss;
 
-               fs_flavor = x86_FLOAT_STATE64;
-               fs_count  = x86_FLOAT_STATE64_COUNT;
-               fs = (void *)&mctx.mctx64.fs;
+               if (sig_avx) {
+                       fs_flavor = x86_AVX_STATE64;
+                       fs_count = x86_AVX_STATE64_COUNT;
+               }
+               else {
+                       fs_flavor = x86_FLOAT_STATE64;
+                       fs_count = x86_FLOAT_STATE64_COUNT;
+               }
+
+               fs = (void *)&mctxp->mctx_avx64.fs;
 
       } else {
                struct user_ucontext32  uctx32;
@@ -682,7 +713,7 @@ sigreturn(struct proc *p, struct sigreturn_args *uap, __unused int *retval)
                if ((error = copyin(uap->uctx, (void *)&uctx32, sizeof (uctx32)))) 
                        return(error);
 
-               if ((error = copyin(CAST_USER_ADDR_T(uctx32.uc_mcontext), (void *)&mctx.mctx32, sizeof (struct mcontext32)))) 
+               if ((error = copyin(CAST_USER_ADDR_T(uctx32.uc_mcontext), (void *)&mctxp->mctx_avx32, sizeof (struct mcontext_avx32)))) 
                        return(error);
 
                onstack = uctx32.uc_onstack & 01;
@@ -690,11 +721,18 @@ sigreturn(struct proc *p, struct sigreturn_args *uap, __unused int *retval)
 
                ts_flavor = x86_THREAD_STATE32;
                ts_count  = x86_THREAD_STATE32_COUNT;
-               ts = (void *)&mctx.mctx32.ss;
+               ts = (void *)&mctxp->mctx_avx32.ss;
+
+               if (sig_avx) {
+                       fs_flavor = x86_AVX_STATE32;
+                       fs_count = x86_AVX_STATE32_COUNT;
+               }
+               else {
+                       fs_flavor = x86_FLOAT_STATE32;
+                       fs_count = x86_FLOAT_STATE32_COUNT;
+               }
 
-               fs_flavor = x86_FLOAT_STATE32;
-               fs_count  = x86_FLOAT_STATE32_COUNT;
-               fs = (void *)&mctx.mctx32.fs;
+               fs = (void *)&mctxp->mctx_avx32.fs;
        }
 
        if (onstack)
@@ -704,20 +742,24 @@ sigreturn(struct proc *p, struct sigreturn_args *uap, __unused int *retval)
 
        if (ut->uu_siglist & ~ut->uu_sigmask)
                signal_setast(thread);
-
        /*
         * thread_set_state() does all the needed checks for the passed in
         * content
         */
-       if (thread_setstatus(thread, ts_flavor, ts, ts_count) != KERN_SUCCESS)
-               return(EINVAL);
-
+       if (thread_setstatus(thread, ts_flavor, ts, ts_count) != KERN_SUCCESS) {
+               rval = EINVAL;
+               goto error_ret;
+       }
+       
        ml_fp_setvalid(TRUE);
 
-       if (thread_setstatus(thread, fs_flavor, fs, fs_count)  != KERN_SUCCESS)
-               return(EINVAL);
+       if (thread_setstatus(thread, fs_flavor, fs, fs_count)  != KERN_SUCCESS) {
+               rval = EINVAL;
+               goto error_ret;
 
-       return (EJUSTRETURN);
+       }
+error_ret:
+       return rval;
 }
 
 
index 89f97ebc19fd1d6cb68a1def29d735a85809f110..24807f7f37c6750581440c4611209b4acfd915d7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -46,6 +46,7 @@
 #include <sys/quota.h>
 #include <sys/dirent.h>
 #include <sys/event.h>
+#include <sys/disk.h>
 #include <kern/thread_call.h>
 
 #include <kern/locks.h>
@@ -312,7 +313,6 @@ typedef struct hfsmount {
        u_int64_t       hfs_max_pending_io;
                                        
        thread_call_t   hfs_syncer;           // removeable devices get sync'ed by this guy
-
 } hfsmount_t;
 
 #define HFS_META_DELAY     (100)
@@ -391,6 +391,7 @@ enum privdirtype {FILE_HARDLINKS, DIR_HARDLINKS};
  */
 #define HFS_RDONLY_DOWNGRADE      0x80000
 #define HFS_DID_CONTIG_SCAN      0x100000
+#define HFS_UNMAP                0x200000
 
 
 /* Macro to update next allocation block in the HFS mount structure.  If 
index aaac6d0dff2f0c5e70c73baca054994f692c1572..7a049916f6a27691fc5a8c46cd41a9c868a7041f 100644 (file)
@@ -985,7 +985,8 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
        daddr64_t mdb_offset;
        int isvirtual = 0;
        int isroot = 0;
-
+       u_int32_t device_features = 0;
+       
        if (args == NULL) {
                /* only hfs_mountroot passes us NULL as the 'args' argument */
                isroot = 1;
@@ -1121,7 +1122,19 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
        bzero(hfsmp, sizeof(struct hfsmount));
        
        hfs_chashinit_finish(hfsmp);
-
+       
+       /*
+        * See if the disk supports unmap (trim).
+        *
+        * NOTE: vfs_init_io_attributes has not been called yet, so we can't use the io_flags field
+        * returned by vfs_ioattr.  We need to call VNOP_IOCTL ourselves.
+        */
+       if (VNOP_IOCTL(devvp, DKIOCGETFEATURES, (caddr_t)&device_features, 0, context) == 0) {
+               if (device_features & DK_FEATURE_UNMAP) {
+                       hfsmp->hfs_flags |= HFS_UNMAP;
+               }
+       }
+       
        /*
         *  Init the volume information structure
         */
@@ -1615,7 +1628,7 @@ error_exit:
                        vnode_rele(hfsmp->hfs_devvp);
                }
                hfs_delete_chash(hfsmp);
-
+               
                FREE(hfsmp, M_HFSMNT);
                vfs_setfsprivate(mp, NULL);
        }
index 598f1dd7b3d35224f202a504703066d57f515763..6eec7028b24ac658d18b907791728ff13beec7a6 100644 (file)
@@ -1184,7 +1184,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap)
        bzero(iterator, sizeof(*iterator));
 
        if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
-               return (result);
+               goto exit_nolock;
        }
 
        result = hfs_buildattrkey(cp->c_fileid, ap->a_name, (HFSPlusAttrKey *)&iterator->key);
@@ -1228,6 +1228,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap)
        hfs_end_transaction(hfsmp);
 exit:
        hfs_unlock(cp);
+exit_nolock:
        FREE(iterator, M_TEMP);
        return MacToVFSError(result);
 }
@@ -1545,7 +1546,10 @@ exit:
        if (user_start) {
                vsunlock(user_start, user_len, TRUE);
        }
-       FREE(iterator, M_TEMP);
+       
+       if (iterator) {
+               FREE(iterator, M_TEMP);
+       }
 
        hfs_unlock(cp);
        
index 3d8255a9e18f95b44fa2f329e75f4a2917388760..bc58bd9477c974a21c31da9433cf1e6987fc089e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -87,7 +87,9 @@ Internal routines:
 #include <sys/types.h>
 #include <sys/buf.h>
 #include <sys/systm.h>
+#include <sys/sysctl.h>
 #include <sys/disk.h>
+#include <kern/kalloc.h>
 
 #include "../../hfs.h"
 #include "../../hfs_dbg.h"
@@ -96,6 +98,10 @@ Internal routines:
 
 #include "../headers/FileMgrInternal.h"
 
+#ifndef CONFIG_HFS_TRIM
+#define CONFIG_HFS_TRIM 0
+#endif
+
 
 enum {
        kBytesPerWord                   =       4,
@@ -158,6 +164,86 @@ static OSErr BlockAllocateKnown(
 static int free_extent_cache_active(
        ExtendedVCB             *vcb);
 
+
+/*
+;________________________________________________________________________________
+;
+; Routine:             hfs_unmap_free_extent
+;
+; Function:            Make note of a range of allocation blocks that should be
+;                              unmapped (trimmed).  That is, the given range of blocks no
+;                              longer have useful content, and the device can unmap the
+;                              previous contents.  For example, a solid state disk may reuse
+;                              the underlying storage for other blocks.
+;
+;                              This routine is only supported for journaled volumes.  The extent
+;                              being freed is passed to the journal code, and the extent will
+;                              be unmapped after the current transaction is written to disk.
+;
+; Input Arguments:
+;      hfsmp                   - The volume containing the allocation blocks.
+;      startingBlock   - The first allocation block of the extent being freed.
+;      numBlocks               - The number of allocation blocks of the extent being freed.
+;________________________________________________________________________________
+*/
+static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
+{
+       if (CONFIG_HFS_TRIM) {
+               u_int64_t offset;
+               u_int64_t length;
+               int err;
+               
+               if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL)) {
+                       offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
+                       length = (u_int64_t) numBlocks * hfsmp->blockSize;
+       
+                       err = journal_trim_add_extent(hfsmp->jnl, offset, length);
+                       if (err) {
+                               printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent", err);
+                               hfsmp->hfs_flags &= ~HFS_UNMAP;
+                       }
+               }
+       }
+}
+
+
+/*
+;________________________________________________________________________________
+;
+; Routine:             hfs_unmap_alloc_extent
+;
+; Function:            Make note of a range of allocation blocks, some of
+;                              which may have previously been passed to hfs_unmap_free_extent,
+;                              is now in use on the volume.  The given blocks will be removed
+;                              from any pending DKIOCUNMAP.
+;
+; Input Arguments:
+;      hfsmp                   - The volume containing the allocation blocks.
+;      startingBlock   - The first allocation block of the extent being allocated.
+;      numBlocks               - The number of allocation blocks being allocated.
+;________________________________________________________________________________
+*/
+static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
+{
+       if (CONFIG_HFS_TRIM) {
+               u_int64_t offset;
+               u_int64_t length;
+               int err;
+               
+               if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL)) {
+                       offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
+                       length = (u_int64_t) numBlocks * hfsmp->blockSize;
+                       
+                       err = journal_trim_remove_extent(hfsmp->jnl, offset, length);
+                       if (err) {
+                               printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent", err);
+                               hfsmp->hfs_flags &= ~HFS_UNMAP;
+                       }
+               }
+       }
+}
+
+
 /*
 ;________________________________________________________________________________
 ;
@@ -1038,9 +1124,15 @@ Exit:
        if (err == noErr) {
                *actualNumBlocks = block - *actualStartBlock;
 
-       // sanity check
-       if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit)
-               panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN);
+               // sanity check
+               if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) {
+                       panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN);
+               }
+
+               /* Remove these blocks from the TRIM list if applicable */
+               if (CONFIG_HFS_TRIM) {
+                       hfs_unmap_alloc_extent(vcb, *actualStartBlock, *actualNumBlocks);
+               }       
        }
        else {
                *actualStartBlock = 0;
@@ -1212,7 +1304,10 @@ OSErr BlockMarkAllocated(
        // XXXdbg
        struct hfsmount *hfsmp = VCBTOHFS(vcb);
 
-
+       if (CONFIG_HFS_TRIM) {
+               hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks);
+       }
+       
        //
        //      Pre-read the bitmap block containing the first word of allocation
        //
@@ -1365,10 +1460,12 @@ _______________________________________________________________________
 __private_extern__
 OSErr BlockMarkFree(
        ExtendedVCB             *vcb,
-       u_int32_t               startingBlock,
-       register u_int32_t      numBlocks)
+       u_int32_t               startingBlock_in,
+       register u_int32_t      numBlocks_in)
 {
        OSErr                   err;
+       u_int32_t       startingBlock = startingBlock_in;
+       u_int32_t       numBlocks = numBlocks_in;
        register u_int32_t      *currentWord;   //      Pointer to current word within bitmap block
        register u_int32_t      wordsLeft;              //      Number of words left in this bitmap block
        register u_int32_t      bitMask;                //      Word with given bits already set (ready to OR in)
@@ -1380,7 +1477,6 @@ OSErr BlockMarkFree(
        u_int32_t  wordsPerBlock;
     // XXXdbg
        struct hfsmount *hfsmp = VCBTOHFS(vcb);
-       dk_discard_t discard;
 
        /*
         * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
@@ -1393,11 +1489,6 @@ OSErr BlockMarkFree(
                goto Exit;
        }
 
-       memset(&discard, 0, sizeof(dk_discard_t));
-       discard.offset = (uint64_t)startingBlock * (uint64_t)vcb->blockSize;
-       discard.length = (uint64_t)numBlocks * (uint64_t)vcb->blockSize;
-
-
        //
        //      Pre-read the bitmap block containing the first word of allocation
        //
@@ -1521,9 +1612,8 @@ Exit:
        if (buffer)
                (void)ReleaseBitmapBlock(vcb, blockRef, true);
 
-       if (err == noErr) {
-               // it doesn't matter if this fails, it's just informational anyway
-               VNOP_IOCTL(vcb->hfs_devvp, DKIOCDISCARD, (caddr_t)&discard, 0, vfs_context_kernel());
+       if (CONFIG_HFS_TRIM && err == noErr) {
+               hfs_unmap_free_extent(vcb, startingBlock_in, numBlocks_in);
        }
 
 
index 9cad355eb412b19ce1d14b483ce6bc73c3467739..3bdac83bc602328df37c708de6e759d2b5ac3da8 100644 (file)
@@ -51,6 +51,15 @@ _STRUCT_MCONTEXT32
        _STRUCT_X86_THREAD_STATE32      __ss;
        _STRUCT_X86_FLOAT_STATE32       __fs;
 };
+
+#define        _STRUCT_MCONTEXT_AVX32  struct __darwin_mcontext_avx32
+_STRUCT_MCONTEXT_AVX32
+{
+       _STRUCT_X86_EXCEPTION_STATE32   __es;
+       _STRUCT_X86_THREAD_STATE32      __ss;
+       _STRUCT_X86_AVX_STATE32         __fs;
+};
+
 #else /* !__DARWIN_UNIX03 */
 #define        _STRUCT_MCONTEXT32      struct mcontext32
 _STRUCT_MCONTEXT32
@@ -59,6 +68,15 @@ _STRUCT_MCONTEXT32
        _STRUCT_X86_THREAD_STATE32      ss;
        _STRUCT_X86_FLOAT_STATE32       fs;
 };
+
+#define        _STRUCT_MCONTEXT_AVX32  struct mcontext_avx32
+_STRUCT_MCONTEXT_AVX32
+{
+       _STRUCT_X86_EXCEPTION_STATE32   es;
+       _STRUCT_X86_THREAD_STATE32      ss;
+       _STRUCT_X86_AVX_STATE32         fs;
+};
+
 #endif /* __DARWIN_UNIX03 */
 #endif /* _STRUCT_MCONTEXT32 */
 
@@ -71,6 +89,15 @@ _STRUCT_MCONTEXT64
        _STRUCT_X86_THREAD_STATE64      __ss;
        _STRUCT_X86_FLOAT_STATE64       __fs;
 };
+
+#define        _STRUCT_MCONTEXT_AVX64  struct __darwin_mcontext_avx64
+_STRUCT_MCONTEXT_AVX64
+{
+       _STRUCT_X86_EXCEPTION_STATE64   __es;
+       _STRUCT_X86_THREAD_STATE64      __ss;
+       _STRUCT_X86_AVX_STATE64         __fs;
+};
+
 #else /* !__DARWIN_UNIX03 */
 #define        _STRUCT_MCONTEXT64      struct mcontext64
 _STRUCT_MCONTEXT64
@@ -79,6 +106,15 @@ _STRUCT_MCONTEXT64
        _STRUCT_X86_THREAD_STATE64      ss;
        _STRUCT_X86_FLOAT_STATE64       fs;
 };
+
+#define        _STRUCT_MCONTEXT_AVX64  struct mcontext_avx64
+_STRUCT_MCONTEXT_AVX64
+{
+       _STRUCT_X86_EXCEPTION_STATE64   es;
+       _STRUCT_X86_THREAD_STATE64      ss;
+       _STRUCT_X86_AVX_STATE64         fs;
+};
+
 #endif /* __DARWIN_UNIX03 */
 #endif /* _STRUCT_MCONTEXT64 */
 #endif /* __need_struct_mcontext */
index a290655846b5237454a6c5e573c39489be8a163f..7a00399cc23cd26e444a9a958bbd04a946858f8f 100644 (file)
@@ -2033,6 +2033,7 @@ wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
           int reuse_thread, int wake_thread, int return_directly)
 {
        int ret = 0;
+       boolean_t need_resume = FALSE;
 
        KERNEL_DEBUG1(0xefffd004 | DBG_FUNC_START, tl->th_workq, tl->th_priority, tl->th_affinity_tag, thread_tid(current_thread()), thread_tid(th));
 
@@ -2063,11 +2064,19 @@ wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
                if (tl->th_flags & TH_LIST_NEED_WAKEUP)
                        wakeup(tl);
                else
-                       thread_resume(th);
+                       need_resume = TRUE;
 
                tl->th_flags &= ~(TH_LIST_BUSY | TH_LIST_NEED_WAKEUP);
                
                workqueue_unlock(p);
+
+               if (need_resume) {
+                       /*
+                        * need to do this outside of the workqueue spin lock
+                        * since thread_resume locks the thread via a full mutex
+                        */
+                       thread_resume(th);
+               }
        }
 }
 
index d8f3ae088c5bfe8206bc68e5df15a9a71f90b605..58aea8eb97614230c48e13f297edcd4db1554dae 100644 (file)
@@ -1050,12 +1050,12 @@ dev_free_name(devdirent_t * dirent_p)
                        if(dnp->dn_linklist == dirent_p) {
                                dnp->dn_linklist = dirent_p->de_nextlink;
                        }
-                       dirent_p->de_nextlink->de_prevlinkp 
-                           = dirent_p->de_prevlinkp;
-                       *dirent_p->de_prevlinkp = dirent_p->de_nextlink;
                }
                devfs_dn_free(dnp);
        }
+       
+       dirent_p->de_nextlink->de_prevlinkp = dirent_p->de_prevlinkp;
+       *(dirent_p->de_prevlinkp) = dirent_p->de_nextlink;
 
        /*
         * unlink ourselves from the directory on this plane
index 51be28e3c28faf4ba1f9d90df2d73c1397dc9e16..12cbc871bee38b850390442d0881983fe4bf51a6 100644 (file)
@@ -221,6 +221,7 @@ int ifmedia_ioctl(struct ifnet *ifp, struct ifreq *ifr,
 #define IFM_FDX                0x00100000      /* Force full duplex */
 #define        IFM_HDX         0x00200000      /* Force half duplex */
 #define        IFM_FLOW        0x00400000      /* enable hardware flow control */
+#define IFM_EEE                0x00800000      /* Support energy efficient ethernet */
 #define IFM_FLAG0      0x01000000      /* Driver defined flag */
 #define IFM_FLAG1      0x02000000      /* Driver defined flag */
 #define IFM_FLAG2      0x04000000      /* Driver defined flag */
@@ -408,6 +409,7 @@ struct ifmedia_description {
     { IFM_FDX,      "full-duplex" },                \
     { IFM_HDX,      "half-duplex" },                \
     { IFM_FLOW,     "flow-control" },               \
+    { IFM_EEE,     "energy-efficient-ethernet" },  \
     { IFM_FLAG0,    "flag0" },                      \
     { IFM_FLAG1,    "flag1" },                      \
     { IFM_FLAG2,    "flag2" },                      \
index 1cfa5e11644e7deb7327d9d8e60d93115a486f28..32f4f280b39d93f1e11748f067c2dfcab7c0dda8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2008-2011 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -329,6 +329,7 @@ struct ip6_fw_chain {
  * Function definitions.
  */
 void ip6_fw_init(void);
+void load_ip6fw(void);
 
 /* Firewall hooks */
 struct ip6_hdr;
index 143625b09da8b89c4a7a50b9f7c5fc1256c1fdb5..309686f7fd4b15a481eae16bbf1f5ce9b828cd8e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -1741,10 +1741,12 @@ do { \
                        case IPV6_FW_FLUSH:
                        case IPV6_FW_ZERO:
                                {
-                               if (ip6_fw_ctl_ptr == NULL && load_ipfw() != 0)
-                                       return EINVAL;
-
-                               error = (*ip6_fw_ctl_ptr)(sopt);
+                               if (ip6_fw_ctl_ptr == NULL)
+                                       load_ip6fw();
+                               if (ip6_fw_ctl_ptr != NULL)
+                                       error = (*ip6_fw_ctl_ptr)(sopt);
+                               else
+                                       return ENOPROTOOPT;
                                }
                                break;
 #endif /* IPFIREWALL */
@@ -1909,10 +1911,12 @@ do { \
 #if IPFIREWALL
                        case IPV6_FW_GET:
                                {
-                               if (ip6_fw_ctl_ptr == NULL && load_ipfw() != 0)
-                                       return EINVAL;
-
-                               error = (*ip6_fw_ctl_ptr)(sopt);
+                               if (ip6_fw_ctl_ptr == NULL)
+                                       load_ip6fw();
+                               if (ip6_fw_ctl_ptr != NULL)
+                                       error = (*ip6_fw_ctl_ptr)(sopt);
+                               else
+                                       return ENOPROTOOPT;
                                }
                                break;
 #endif /* IPFIREWALL */
index 3a665a2c4b5fd42e5c9c044f7ed7dfea94b981b8..169b7992dc2cb5d706d54048761461938c43537a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -542,7 +542,7 @@ rip6_output(
 }
 
 #if IPFW2
-static void
+__private_extern__ void
 load_ip6fw(void)
 {
        ip6_fw_init();
index 50aafaa53867be513df208efe1731b32da8f69a9..5718861f67633d5c49eab6d1bfc8fff9a66eb316 100644 (file)
@@ -191,6 +191,7 @@ struct buf {
  */
 #define B_NEED_IODONE   0x20000000     /* need biodone on the real_bp associated with a cluster_io */
 #define B_COMMIT_UPL    0x40000000     /* commit/abort the UPL on I/O success/failure */
+#define B_TDONE                0x80000000      /* buf_t that is part of a cluster level transaction has completed */
 
 
 /* Flags to low-level allocation routines. */
index 6013db9dfb3579c461b270e8423dbe40c110866f..0232617caec8be96ddd47319f68726d68dd386a3 100644 (file)
@@ -51,7 +51,7 @@
  * DKIOCISWRITABLE                       is media writable?
  *
  * DKIOCREQUESTIDLE                      idle media
- * DKIOCDISCARD                          delete unused data
+ * DKIOCUNMAP                            delete unused data
  *
  * DKIOCGETMAXBLOCKCOUNTREAD             get maximum block count for reads
  * DKIOCGETMAXBLOCKCOUNTWRITE            get maximum block count for writes
  * DKIOCGETMINSEGMENTALIGNMENTBYTECOUNT  get minimum segment alignment in bytes
  * DKIOCGETMAXSEGMENTADDRESSABLEBITCOUNT get maximum segment width in bits
  *
+ * DKIOCGETFEATURES                      get device's feature set
  * DKIOCGETPHYSICALBLOCKSIZE             get device's block size
  * DKIOCGETCOMMANDPOOLSIZE               get device's queue depth
  */
 
+#define DK_FEATURE_UNMAP                      0x00000010
+
 typedef struct
 {
     uint64_t               offset;
     uint64_t               length;
-
-    uint8_t                reserved0128[16];       /* reserved, clear to zero */
-} dk_discard_t;
+} dk_extent_t;
 
 typedef struct
 {
@@ -103,6 +104,18 @@ typedef struct
 #endif /* !__LP64__ */
 } dk_format_capacities_t;
 
+typedef struct
+{
+    dk_extent_t *          extents;
+    uint32_t               extentsCount;
+
+#ifdef __LP64__
+    uint8_t                reserved0096[4];        /* reserved, clear to zero */
+#else /* !__LP64__ */
+    uint8_t                reserved0064[8];        /* reserved, clear to zero */
+#endif /* !__LP64__ */
+} dk_unmap_t;
+
 #define DKIOCEJECT                            _IO('d', 21)
 #define DKIOCSYNCHRONIZECACHE                 _IO('d', 22)
 
@@ -117,7 +130,7 @@ typedef struct
 #define DKIOCISWRITABLE                       _IOR('d', 29, uint32_t)
 
 #define DKIOCREQUESTIDLE                      _IO('d', 30)
-#define DKIOCDISCARD                          _IOW('d', 31, dk_discard_t)
+#define DKIOCUNMAP                            _IOW('d', 31, dk_unmap_t)
 
 #define DKIOCGETMAXBLOCKCOUNTREAD             _IOR('d', 64, uint64_t)
 #define DKIOCGETMAXBLOCKCOUNTWRITE            _IOR('d', 65, uint64_t)
@@ -132,11 +145,21 @@ typedef struct
 #define DKIOCGETMINSEGMENTALIGNMENTBYTECOUNT  _IOR('d', 74, uint64_t)
 #define DKIOCGETMAXSEGMENTADDRESSABLEBITCOUNT _IOR('d', 75, uint64_t)
 
+#define DKIOCGETFEATURES                      _IOR('d', 76, uint32_t)
 #define DKIOCGETPHYSICALBLOCKSIZE             _IOR('d', 77, uint32_t)
 #define DKIOCGETCOMMANDPOOLSIZE               _IOR('d', 78, uint32_t)
 
+typedef struct
+{
+    uint64_t               offset;
+    uint64_t               length;
+
+    uint8_t                reserved0128[16];       /* reserved, clear to zero */
+} dk_discard_t __attribute__ ((deprecated));
+
+#define DKIOCDISCARD                          _IOW('d', 31, dk_discard_t)
+
 #ifdef KERNEL
-#define DK_FEATURE_DISCARD                    0x00000010
 #define DK_FEATURE_FORCE_UNIT_ACCESS          0x00000001
 #define DKIOCGETBLOCKCOUNT32                  _IOR('d', 25, uint32_t)
 #define DKIOCSETBLOCKSIZE                     _IOW('d', 24, uint32_t)
@@ -144,7 +167,6 @@ typedef struct
 #define DKIOCISSOLIDSTATE                    _IOR('d', 79, uint32_t)
 #define DKIOCISVIRTUAL                        _IOR('d', 72, uint32_t)
 #define DKIOCGETBASE                          _IOR('d', 73, uint64_t)
-#define DKIOCGETFEATURES                      _IOR('d', 76, uint32_t)
 #endif /* KERNEL */
 
 #endif /* _SYS_DISK_H_ */
index 6767bad2dae78df9cf6f7d6dc302d31170ed79f4..9f7b789c9d35af736dd52a090ef05a2f746a16f2 100644 (file)
@@ -168,6 +168,7 @@ __BEGIN_DECLS
 #define        DBG_NETIPSEC    128     /* IPsec Protocol  */
 
 /* **** The Kernel Debug Sub Classes for IOKIT (DBG_IOKIT) **** */
+#define DBG_IOINTC                     0       /* Interrupt controller */
 #define DBG_IOWORKLOOP         1       /* Work from work loop */
 #define DBG_IOINTES                    2       /* Interrupt event source */
 #define DBG_IOCLKES                    3       /* Clock event source */
index 598471f91fc9227415b709d991e83c370dc3149a..8633a14652347f49e06483bb172cbe4e2bf11cef 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -480,7 +480,8 @@ struct vfsioattr {
        void *          io_reserved[2];         /* extended attribute information */
 };
 
-#define VFS_IOATTR_FLAGS_FUA   0x01    /* Write-through cache supported */
+#define VFS_IOATTR_FLAGS_FUA           0x01    /* Write-through cache supported */
+#define VFS_IOATTR_FLAGS_UNMAP         0x02    /* Unmap (trim) supported */
 
 /*
  * Filesystem Registration information
index d36e8ea744593d58b64c7aad69f9b0a56bd61ebf..b069b1a0fa05a5489b43e42f440c8bb4eb0fc7c8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -202,6 +202,7 @@ struct mount {
  * ioflags
  */
 #define MNT_IOFLAGS_FUA_SUPPORTED      0x00000001
+#define MNT_IOFLAGS_UNMAP_SUPPORTED    0x00000002
 
 /*
  * ioqueue depth for devices that don't report one
index f1f9f649be47ba5e7b9d737fb3b4de517a7e08db..499056a3b5eb8605aa25e55289b80fee126a14fa 100644 (file)
@@ -132,6 +132,7 @@ static lck_grp_t    *cl_mtx_grp;
 static lck_attr_t      *cl_mtx_attr;
 static lck_grp_attr_t   *cl_mtx_grp_attr;
 static lck_mtx_t       *cl_mtxp;
+static lck_mtx_t       *cl_transaction_mtxp;
 
 
 #define        IO_UNKNOWN      0
@@ -242,6 +243,11 @@ cluster_init(void) {
 
        if (cl_mtxp == NULL)
                panic("cluster_init: failed to allocate cl_mtxp");
+
+       cl_transaction_mtxp = lck_mtx_alloc_init(cl_mtx_grp, cl_mtx_attr);
+
+       if (cl_transaction_mtxp == NULL)
+               panic("cluster_init: failed to allocate cl_transaction_mtxp");
 }
 
 
@@ -510,26 +516,36 @@ cluster_iodone(buf_t bp, void *callback_arg)
        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_START,
                     cbp_head, bp->b_lblkno, bp->b_bcount, bp->b_flags, 0);
 
-       for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next) {
-               /*
-                * all I/O requests that are part of this transaction
-                * have to complete before we can process it
-                */
-               if ( !(cbp->b_flags & B_DONE)) {
+       if (cbp_head->b_trans_next || !(cbp_head->b_flags & B_EOT)) {
+
+               lck_mtx_lock_spin(cl_transaction_mtxp);
+
+               bp->b_flags |= B_TDONE;
+               
+               for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next) {
+                       /*
+                        * all I/O requests that are part of this transaction
+                        * have to complete before we can process it
+                        */
+                       if ( !(cbp->b_flags & B_TDONE)) {
 
+                               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
+                                            cbp_head, cbp, cbp->b_bcount, cbp->b_flags, 0);
+
+                               lck_mtx_unlock(cl_transaction_mtxp);
+                               return 0;
+                       }
+                       if (cbp->b_flags & B_EOT)
+                               transaction_complete = TRUE;
+               }
+               lck_mtx_unlock(cl_transaction_mtxp);
+
+               if (transaction_complete == FALSE) {
                        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
-                                    cbp_head, cbp, cbp->b_bcount, cbp->b_flags, 0);
+                                    cbp_head, 0, 0, 0, 0);
 
                        return 0;
                }
-               if (cbp->b_flags & B_EOT)
-                       transaction_complete = TRUE;
-       }
-       if (transaction_complete == FALSE) {
-               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
-                            cbp_head, 0, 0, 0, 0);
-
-               return 0;
        }
        error       = 0;
        total_size  = 0;
@@ -759,6 +775,14 @@ cluster_complete_transaction(buf_t *cbp_head, void *callback_arg, int *retval, i
                for (cbp = *cbp_head; cbp; cbp = cbp->b_trans_next)
                        buf_biowait(cbp);
        }
+       /*
+        * we've already waited on all of the I/Os in this transaction,
+        * so mark all of the buf_t's in this transaction as B_TDONE
+        * so that cluster_iodone sees the transaction as completed
+        */
+       for (cbp = *cbp_head; cbp; cbp = cbp->b_trans_next)
+               cbp->b_flags |= B_TDONE;
+
        error = cluster_iodone(*cbp_head, callback_arg);
 
        if ( !(flags & CL_ASYNC) && error && *retval == 0) {
index 7ff95f4788c8fa6e8cb593338b497a9a9fdb06ea..0a967aba97e5d1e3c9bad9203abd396567520723 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1995-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 1995-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -52,6 +52,7 @@
 #include <sys/ubc.h>
 #include <sys/malloc.h>
 #include <kern/thread.h>
+#include <kern/kalloc.h>
 #include <sys/disk.h>
 #include <sys/kdebug.h>
 #include <miscfs/specfs/specdev.h>
@@ -79,8 +80,28 @@ extern task_t kernel_task;
 
 #include "vfs_journal.h"
 
+#ifndef CONFIG_HFS_TRIM
+#define CONFIG_HFS_TRIM 0
+#endif
+
 #if JOURNALING
 
+//
+// By default, we grow the list of extents to trim by one page at a time.
+// We'll opt to flush a transaction if it contains at least
+// JOURNAL_FLUSH_TRIM_EXTENTS extents to be trimmed (even if the number
+// of modified blocks is small).
+//
+enum {
+    JOURNAL_DEFAULT_TRIM_BYTES = PAGE_SIZE,
+    JOURNAL_DEFAULT_TRIM_EXTENTS = JOURNAL_DEFAULT_TRIM_BYTES / sizeof(dk_extent_t),
+    JOURNAL_FLUSH_TRIM_EXTENTS = JOURNAL_DEFAULT_TRIM_EXTENTS * 15 / 16
+};
+
+unsigned int jnl_trim_flush_limit = JOURNAL_FLUSH_TRIM_EXTENTS;
+SYSCTL_UINT (_kern, OID_AUTO, jnl_trim_flush, CTLFLAG_RW, &jnl_trim_flush_limit, 0, "number of trimmed extents to cause a journal flush");
+
+
 /* XXX next prototytype should be from libsa/stdlib.h> but conflicts libkern */
 __private_extern__ void qsort(
     void * array,
@@ -1789,24 +1810,20 @@ journal_open(struct vnode *jvp,
        /*
         * The volume has probably been resized (such that we had to adjust the
         * logical sector size), or copied to media with a different logical
-        * sector size.  If the journal is empty, then just switch to the
-        * current logical sector size.  If the journal is not empty, then
-        * fail to open the journal.
+        * sector size.
+        *
+        * Temporarily change the device's logical block size to match the
+        * journal's header size.  This will allow us to replay the journal
+        * safely.  If the replay succeeds, we will update the journal's header
+        * size (later in this function).
         */
-        
-       if (jnl->jhdr->start == jnl->jhdr->end) {
-           printf("jnl: %s: open: changing journal header size from %d to %u\n",
-               jdev_name, jnl->jhdr->jhdr_size, phys_blksz);
-           jnl->jhdr->jhdr_size = phys_blksz;
-           if (write_journal_header(jnl, 1)) {
-               printf("jnl: %s: open: failed to update journal header size\n", jdev_name);
-               goto bad_journal;
-           }
-       } else {
-           printf("jnl: %s: open: phys_blksz %u does not match journal header size %d, and journal is not empty!\n",
-               jdev_name, phys_blksz, jnl->jhdr->jhdr_size);
-           goto bad_journal;
-       }
+
+       orig_blksz = phys_blksz;
+       phys_blksz = jnl->jhdr->jhdr_size;
+       VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&phys_blksz, FWRITE, &context);
+
+       printf("jnl: %s: open: temporarily switched block size from %u to %u\n",
+              jdev_name, orig_blksz, phys_blksz);
     }
 
     if (   jnl->jhdr->start <= 0
@@ -1859,14 +1876,32 @@ journal_open(struct vnode *jvp,
        goto bad_journal;
     }
 
+    /*
+     * When we get here, we know that the journal is empty (jnl->jhdr->start ==
+     * jnl->jhdr->end).  If the device's logical block size was different from
+     * the journal's header size, then we can now restore the device's logical
+     * block size and update the journal's header size to match.
+     *
+     * Note that we also adjust the journal's start and end so that they will
+     * be aligned on the new block size.  We pick a new sequence number to
+     * avoid any problems if a replay found previous transactions using the old
+     * journal header size.  (See the comments in journal_create(), above.)
+     */
     if (orig_blksz != 0) {
        VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&orig_blksz, FWRITE, &context);
        phys_blksz = orig_blksz;
-       if (orig_blksz < (uint32_t)jnl->jhdr->jhdr_size) {
-           printf("jnl: %s: open: jhdr_size is %d but orig phys blk size is %d.  switching.\n",
-               jdev_name, jnl->jhdr->jhdr_size, orig_blksz);
-                                  
-           jnl->jhdr->jhdr_size = orig_blksz;
+       orig_blksz = 0;
+       
+       jnl->jhdr->jhdr_size = phys_blksz;
+       jnl->jhdr->start = phys_blksz;
+       jnl->jhdr->end = phys_blksz;
+       jnl->jhdr->sequence_num = (jnl->jhdr->sequence_num +
+                                  (journal_size / phys_blksz) +
+                                  (random() % 16384)) & 0x00ffffff;
+       
+       if (write_journal_header(jnl, 1)) {
+               printf("jnl: %s: open: failed to update journal header size\n", jdev_name);
+               goto bad_journal;
        }
     }
 
@@ -1876,6 +1911,7 @@ journal_open(struct vnode *jvp,
     // set this now, after we've replayed the journal
     size_up_tbuffer(jnl, tbuffer_size, phys_blksz);
 
+    // TODO: Does this need to change if the device's logical block size changed?
     if ((off_t)(jnl->jhdr->blhdr_size/sizeof(block_info)-1) > (jnl->jhdr->size/jnl->jhdr->jhdr_size)) {
            printf("jnl: %s: open: jhdr size and blhdr size are not compatible (0x%llx, %d, %d)\n", jdev_name, jnl->jhdr->size,
                   jnl->jhdr->blhdr_size, jnl->jhdr->jhdr_size);
@@ -1890,6 +1926,7 @@ journal_open(struct vnode *jvp,
     if (orig_blksz != 0) {
        phys_blksz = orig_blksz;
        VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&orig_blksz, FWRITE, &context);
+       printf("jnl: %s: open: restored block size after error\n", jdev_name);
     }
     kmem_free(kernel_map, (vm_offset_t)jnl->header_buf, phys_blksz);
   bad_kmem_alloc:
@@ -2752,6 +2789,383 @@ journal_kill_block(journal *jnl, struct buf *bp)
 }
 
 
+/*
+;________________________________________________________________________________
+;
+; Routine:             journal_trim_realloc
+;
+; Function:            Increase the amount of memory allocated for the list of extents
+;                              to be unmapped (trimmed).  This routine will be called when
+;                              adding an extent to the list, and the list already occupies
+;                              all of the space allocated to it.  This routine returns ENOMEM
+;                              if unable to allocate more space, or 0 if the extent list was
+;                              grown successfully.
+;
+; Input Arguments:
+;      tr                      - The transaction containing the extent list.
+;
+; Output:
+;      (result)        - ENOMEM or 0.
+;
+; Side effects:
+;       The allocated_count and extents fields of tr->trim are updated
+;       if the function returned 0.
+;________________________________________________________________________________
+*/
+static int
+journal_trim_realloc(transaction *tr)
+{
+       if (CONFIG_HFS_TRIM) {
+               void *new_extents;
+               uint32_t new_allocated_count;
+               
+               new_allocated_count = tr->trim.allocated_count + JOURNAL_DEFAULT_TRIM_EXTENTS;
+               new_extents = kalloc(new_allocated_count * sizeof(dk_extent_t));
+               if (new_extents == NULL) {
+                       printf("journal_trim_realloc: unable to grow extent list!\n");
+                       /*
+                        * Since we could be called when allocating space previously marked
+                        * to be trimmed, we need to empty out the list to be safe.
+                        */
+                       tr->trim.extent_count = 0;
+                       return ENOMEM;
+               }
+               
+               /* Copy the old extent list to the newly allocated list. */
+               if (tr->trim.extents != NULL) {
+                       memmove(new_extents,
+                                       tr->trim.extents,
+                                       tr->trim.allocated_count * sizeof(dk_extent_t));
+                       kfree(tr->trim.extents,
+                                 tr->trim.allocated_count * sizeof(dk_extent_t));
+               }
+               
+               tr->trim.allocated_count = new_allocated_count;
+               tr->trim.extents = new_extents;
+       }
+       return 0;
+}
+
+
+/*
+;________________________________________________________________________________
+;
+; Routine:             journal_trim_add_extent
+;
+; Function:            Make note of a range of bytes that should be unmapped
+;                              (trimmed).  That is, the given range of bytes no longer have
+;                              useful content, and the device can unmap the previous
+;                              contents.  For example, a solid state disk may reuse the
+;                              underlying storage for other blocks.
+;
+;                              The extent will be unmapped after the transaction is written
+;                              to the journal.
+;
+; Input Arguments:
+;      jnl                     - The journal for the volume containing the byte range.
+;      offset          - The first byte of the range to be trimmed.
+;      length          - The number of bytes of the extent being trimmed.
+;________________________________________________________________________________
+*/
+__private_extern__ int
+journal_trim_add_extent(journal *jnl, uint64_t offset, uint64_t length)
+{
+       if (CONFIG_HFS_TRIM) {
+               uint64_t end;
+               transaction *tr;
+               dk_extent_t *extent;
+               uint32_t insert_index;
+               uint32_t replace_count;
+               
+               CHECK_JOURNAL(jnl);
+       
+               if (jnl->flags & JOURNAL_TRIM_ERR) {
+                       /*
+                        * A previous trim failed, so we have disabled trim for this volume
+                        * for as long as it remains mounted.
+                        */
+                       return 0;
+               }
+               
+               if (jnl->flags & JOURNAL_INVALID) {
+                       return EINVAL;
+               }
+       
+               tr = jnl->active_tr;
+               CHECK_TRANSACTION(tr);
+       
+               if (jnl->owner != current_thread()) {
+                       panic("jnl: trim_add_extent: called w/out a transaction! jnl %p, owner %p, curact %p\n",
+                                 jnl, jnl->owner, current_thread());
+               }
+       
+               free_old_stuff(jnl);
+               
+               end = offset + length;
+               
+               /*
+                * Find the range of existing extents that can be combined with the
+                * input extent.  We start by counting the number of extents that end
+                * strictly before the input extent, then count the number of extents
+                * that overlap or are contiguous with the input extent.
+                */
+               extent = tr->trim.extents;
+               insert_index = 0;
+               while (insert_index < tr->trim.extent_count && extent->offset + extent->length < offset) {
+                       ++insert_index;
+                       ++extent;
+               }
+               replace_count = 0;
+               while (insert_index + replace_count < tr->trim.extent_count && extent->offset <= end) {
+                       ++replace_count;
+                       ++extent;
+               }
+               
+               /*
+                * If none of the existing extents can be combined with the input extent,
+                * then just insert it in the list (before item number insert_index).
+                */
+               if (replace_count == 0) {
+                       /* If the list was already full, we need to grow it. */
+                       if (tr->trim.extent_count == tr->trim.allocated_count) {
+                               if (journal_trim_realloc(tr) != 0) {
+                                       printf("jnl: trim_add_extent: out of memory!");
+                                       return ENOMEM;
+                               }
+                       }
+                       
+                       /* Shift any existing extents with larger offsets. */
+                       if (insert_index < tr->trim.extent_count) {
+                               memmove(&tr->trim.extents[insert_index+1],
+                                               &tr->trim.extents[insert_index],
+                                               (tr->trim.extent_count - insert_index) * sizeof(dk_extent_t));
+                       }
+                       tr->trim.extent_count++;
+                       
+                       /* Store the new extent in the list. */
+                       tr->trim.extents[insert_index].offset = offset;
+                       tr->trim.extents[insert_index].length = length;
+                       
+                       /* We're done. */
+                       return 0;
+               }
+               
+               /*
+                * Update extent number insert_index to be the union of the input extent
+                * and all of the replaced extents.
+                */
+               if (tr->trim.extents[insert_index].offset < offset)
+                       offset = tr->trim.extents[insert_index].offset;
+               extent = &tr->trim.extents[insert_index + replace_count - 1];
+               if (extent->offset + extent->length > end)
+                       end = extent->offset + extent->length;
+               tr->trim.extents[insert_index].offset = offset;
+               tr->trim.extents[insert_index].length = end - offset;
+               
+               /*
+                * If we were replacing more than one existing extent, then shift any
+                * extents with larger offsets, and update the count of extents.
+                *
+                * We're going to leave extent #insert_index alone since it was just updated, above.
+                * We need to move extents from index (insert_index + replace_count) through the end of
+                * the list by (replace_count - 1) positions so that they overwrite extent #(insert_index + 1).
+                */
+               if (replace_count > 1 && (insert_index + replace_count) < tr->trim.extent_count) {
+                       memmove(&tr->trim.extents[insert_index + 1],
+                                       &tr->trim.extents[insert_index + replace_count],
+                                       (tr->trim.extent_count - insert_index - replace_count) * sizeof(dk_extent_t));
+               }
+               tr->trim.extent_count -= replace_count - 1;
+    }
+    return 0;
+}
+
+
+/*
+;________________________________________________________________________________
+;
+; Routine:             journal_trim_remove_extent
+;
+; Function:            Make note of a range of bytes, some of which may have previously
+;                              been passed to journal_trim_add_extent, is now in use on the
+;                              volume.  The given bytes will be not be trimmed as part of
+;                              this transaction.
+;
+; Input Arguments:
+;      jnl                     - The journal for the volume containing the byte range.
+;      offset          - The first byte of the range to be trimmed.
+;      length          - The number of bytes of the extent being trimmed.
+;________________________________________________________________________________
+*/
+__private_extern__ int
+journal_trim_remove_extent(journal *jnl, uint64_t offset, uint64_t length)
+{
+       if (CONFIG_HFS_TRIM) {
+               u_int64_t end;
+               dk_extent_t *extent;
+               transaction *tr;
+               u_int32_t keep_before;
+               u_int32_t keep_after;
+               
+               CHECK_JOURNAL(jnl);
+       
+               if (jnl->flags & JOURNAL_TRIM_ERR) {
+                       /*
+                        * A previous trim failed, so we have disabled trim for this volume
+                        * for as long as it remains mounted.
+                        */
+                       return 0;
+               }
+               
+               if (jnl->flags & JOURNAL_INVALID) {
+                       return EINVAL;
+               }
+       
+               tr = jnl->active_tr;
+               CHECK_TRANSACTION(tr);
+       
+               if (jnl->owner != current_thread()) {
+                       panic("jnl: trim_remove_extent: called w/out a transaction! jnl %p, owner %p, curact %p\n",
+                                 jnl, jnl->owner, current_thread());
+               }
+       
+               free_old_stuff(jnl);
+       
+               end = offset + length;
+       
+               /*
+                * Find any existing extents that start before or end after the input
+                * extent.  These extents will be modified if they overlap the input
+                * extent.  Other extents between them will be deleted.
+                */
+               extent = tr->trim.extents;
+               keep_before = 0;
+               while (keep_before < tr->trim.extent_count && extent->offset < offset) {
+                       ++keep_before;
+                       ++extent;
+               }
+               keep_after = keep_before;
+               if (keep_after > 0) {
+                       /* See if previous extent extends beyond both ends of input extent. */
+                       --keep_after;
+                       --extent;
+               }
+               while (keep_after < tr->trim.extent_count && (extent->offset + extent->length) <= end) {
+                       ++keep_after;
+                       ++extent;
+               }
+               
+               /*
+                * When we get here, the first keep_before extents (0 .. keep_before-1)
+                * start before the input extent, and extents (keep_after .. extent_count-1)
+                * end after the input extent.  We'll need to keep, all of those extents,
+                * but possibly modify #(keep_before-1) and #keep_after to remove the portion
+                * that overlaps with the input extent.
+                */
+               
+               /*
+                * Does the input extent start after and end before the same existing
+                * extent?  If so, we have to "punch a hole" in that extent and convert
+                * it to two separate extents.
+                */
+               if (keep_before >  keep_after) {
+                       /* If the list was already full, we need to grow it. */
+                       if (tr->trim.extent_count == tr->trim.allocated_count) {
+                               if (journal_trim_realloc(tr) != 0) {
+                                       printf("jnl: trim_remove_extent: out of memory!");
+                                       return ENOMEM;
+                               }
+                       }
+                       
+                       /*
+                        * Make room for a new extent by shifting extents #keep_after and later
+                        * down by one extent.  When we're done, extents #keep_before and
+                        * #keep_after will be identical, and we can fall through to removing
+                        * the portion that overlaps the input extent.
+                        */
+                       memmove(&tr->trim.extents[keep_before],
+                                       &tr->trim.extents[keep_after],
+                                       (tr->trim.extent_count - keep_after) * sizeof(dk_extent_t));
+                       ++tr->trim.extent_count;
+                       ++keep_after;
+                       
+                       /*
+                        * Fall through.  We now have the case where the length of extent
+                        * #(keep_before - 1) needs to be updated, and the start of extent
+                        * #(keep_after) needs to be updated.
+                        */
+               }
+               
+               /*
+                * May need to truncate the end of extent #(keep_before - 1) if it overlaps
+                * the input extent.
+                */
+               if (keep_before > 0) {
+                       extent = &tr->trim.extents[keep_before - 1];
+                       if (extent->offset + extent->length > offset) {
+                               extent->length = offset - extent->offset;
+                       }
+               }
+               
+               /*
+                * May need to update the start of extent #(keep_after) if it overlaps the
+                * input extent.
+                */
+               if (keep_after < tr->trim.extent_count) {
+                       extent = &tr->trim.extents[keep_after];
+                       if (extent->offset < end) {
+                               extent->length = extent->offset + extent->length - end;
+                               extent->offset = end;
+                       }
+               }
+               
+               /*
+                * If there were whole extents that overlapped the input extent, get rid
+                * of them by shifting any following extents, and updating the count.
+                */
+               if (keep_after > keep_before && keep_after < tr->trim.extent_count) {
+                       memmove(&tr->trim.extents[keep_before],
+                                       &tr->trim.extents[keep_after],
+                                       (tr->trim.extent_count - keep_after) * sizeof(dk_extent_t));
+               }
+               tr->trim.extent_count -= keep_after - keep_before;
+       }
+       return 0;
+}
+
+
+static int
+journal_trim_flush(journal *jnl, transaction *tr)
+{
+       int errno = 0;
+       
+       if (CONFIG_HFS_TRIM) {
+               if ((jnl->flags & JOURNAL_TRIM_ERR) == 0 && tr->trim.extent_count > 0) {
+                       dk_unmap_t unmap;
+                       
+                       bzero(&unmap, sizeof(unmap));
+                       unmap.extents = tr->trim.extents;
+                       unmap.extentsCount = tr->trim.extent_count;
+                       errno = VNOP_IOCTL(jnl->fsdev, DKIOCUNMAP, (caddr_t)&unmap, FWRITE, vfs_context_kernel());
+                       if (errno) {
+                               printf("jnl: error %d from DKIOCUNMAP (extents=%lx, count=%u); disabling trim for %s\n",
+                                               errno, (unsigned long) (tr->trim.extents), tr->trim.extent_count,
+                                               jnl->jdev_name);
+                               jnl->flags |= JOURNAL_TRIM_ERR;
+                       }
+               }
+               if (tr->trim.extents) {
+                       kfree(tr->trim.extents, tr->trim.allocated_count * sizeof(dk_extent_t));
+                       tr->trim.allocated_count = 0;
+                       tr->trim.extent_count = 0;
+                       tr->trim.extents = NULL;
+               }
+       }
+       
+       return errno;
+}
+
+
 static int
 journal_binfo_cmp(const void *a, const void *b)
 {
@@ -2834,10 +3248,17 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void
     // transaction buffer if it's full or if we have more than
     // one of them so we don't start hogging too much memory.
     //
+    // We also check the number of extents waiting to be trimmed.
+    // If it is small enough, then keep accumulating more (so we
+    // can reduce the overhead of trimming).  If there was a
+    // prior trim error, then we stop issuing trims for this
+    // volume, so we can also coalesce transactions.
+    //
     if (   force_it == 0
                   && (jnl->flags & JOURNAL_NO_GROUP_COMMIT) == 0 
                   && tr->num_blhdrs < 3
-                  && (tr->total_bytes <= ((tr->tbuffer_size*tr->num_blhdrs) - tr->tbuffer_size/8))) {
+                  && (tr->total_bytes <= ((tr->tbuffer_size*tr->num_blhdrs) - tr->tbuffer_size/8))
+                  && ((jnl->flags & JOURNAL_TRIM_ERR) || (tr->trim.extent_count < jnl_trim_flush_limit))) {
 
                jnl->cur_tr = tr;
                return 0;
@@ -3064,6 +3485,12 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void
                goto bad_journal;
        }
        
+       //
+       // Send a DKIOCUNMAP for the extents trimmed by this transaction, and
+       // free up the extent list.
+       //
+       errno = journal_trim_flush(jnl, tr);
+       
     //
     // setup for looping through all the blhdr's.  we null out the
     // tbuffer and blhdr fields so that they're not used any more.
@@ -3148,7 +3575,7 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void
   bad_journal:
     jnl->flags |= JOURNAL_INVALID;
     jnl->old_start[sizeof(jnl->old_start)/sizeof(jnl->old_start[0]) - 1] &= ~0x8000000000000000LL;
-    abort_transaction(jnl, tr);
+    abort_transaction(jnl, tr);                // cleans up list of extents to be trimmed
     return -1;
 }
 
@@ -3212,6 +3639,12 @@ abort_transaction(journal *jnl, transaction *tr)
                kmem_free(kernel_map, (vm_offset_t)blhdr, tr->tbuffer_size);
     }
 
+       if (tr->trim.extents) {
+               kfree(tr->trim.extents, tr->trim.allocated_count * sizeof(dk_extent_t));
+       }
+       tr->trim.allocated_count = 0;
+       tr->trim.extent_count = 0;
+       tr->trim.extents = NULL;
     tr->tbuffer     = NULL;
     tr->blhdr       = NULL;
     tr->total_bytes = 0xdbadc0de;
index c3e058b40ee8bf0e3ee1155e47f125cb8f92064d..310445395d2343d02bae2ff067278cfc3e8a8ed9 100644 (file)
@@ -1,6 +1,6 @@
 
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -42,6 +42,7 @@
 
 #include <sys/types.h>
 #include <kern/locks.h>
+#include <sys/disk.h>
 
 typedef struct _blk_info {
     int32_t    bsize;
@@ -74,6 +75,12 @@ typedef struct block_list_header {
 
 struct journal;
 
+struct jnl_trim_list {
+       uint32_t        allocated_count;
+       uint32_t        extent_count;
+       dk_extent_t *extents;
+};
+
 typedef struct transaction {
     int                 tbuffer_size;  // in bytes
     char               *tbuffer;       // memory copy of the transaction
@@ -87,6 +94,7 @@ typedef struct transaction {
     struct journal     *jnl;           // ptr back to the journal structure
     struct transaction *next;          // list of tr's (either completed or to be free'd)
     uint32_t            sequence_num;
+    struct jnl_trim_list       trim;
 } transaction;
 
 
@@ -166,6 +174,7 @@ typedef struct journal {
 #define JOURNAL_FLUSHCACHE_ERR    0x00040000   // means we already printed this err
 #define JOURNAL_NEED_SWAP         0x00080000   // swap any data read from disk
 #define JOURNAL_DO_FUA_WRITES     0x00100000   // do force-unit-access writes
+#define JOURNAL_TRIM_ERR          0x00200000   // a previous trim failed
 
 /* journal_open/create options are always in the low-16 bits */
 #define JOURNAL_OPTION_FLAGS_MASK 0x0000ffff
@@ -283,12 +292,21 @@ void      journal_close(journal *journalp);
  * then call journal_kill_block().  This will mark it so
  * that the journal does not play it back (effectively
  * dropping it).
+ *
+ * journal_trim_add_extent() marks a range of bytes on the device which should
+ * be trimmed (invalidated, unmapped).  journal_trim_remove_extent() marks a
+ * range of bytes which should no longer be trimmed.  Accumulated extents
+ * will be trimmed when the transaction is flushed to the on-disk journal.
  */
 int   journal_start_transaction(journal *jnl);
 int   journal_modify_block_start(journal *jnl, struct buf *bp);
 int   journal_modify_block_abort(journal *jnl, struct buf *bp);
 int   journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(struct buf *bp, void *arg), void *arg);
 int   journal_kill_block(journal *jnl, struct buf *bp);
+#ifdef BSD_KERNEL_PRIVATE
+int   journal_trim_add_extent(journal *jnl, uint64_t offset, uint64_t length);
+int   journal_trim_remove_extent(journal *jnl, uint64_t offset, uint64_t length);
+#endif
 int   journal_end_transaction(journal *jnl);
 
 int   journal_active(journal *jnl);
index a8fc4b148074fe984b4f9d3fb835d9b7e9ef0f6e..3b10114cb52f4fc41442e4645fcd3536c5eb63af 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -2994,7 +2994,8 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp)
 
        if (features & DK_FEATURE_FORCE_UNIT_ACCESS)
                mp->mnt_ioflags |= MNT_IOFLAGS_FUA_SUPPORTED;
-       
+       if (features & DK_FEATURE_UNMAP)
+               mp->mnt_ioflags |= MNT_IOFLAGS_UNMAP_SUPPORTED;
        return (error);
 }
 
index deb1b0fbecfc1bb002a7a228484fddfedc20537b..8f1cb8e73a68430deff4af47c196022a70927e6a 100644 (file)
@@ -1008,6 +1008,7 @@ __ZN21IOSubMemoryDescriptorD0Ev
 __ZN21IOSubMemoryDescriptorD2Ev
 __ZN22IOInterruptEventSource10gMetaClassE
 __ZN22IOInterruptEventSource10superClassE
+__ZN22IOInterruptEventSource11setWorkLoopEP10IOWorkLoop
 __ZN22IOInterruptEventSource12checkForWorkEv
 __ZN22IOInterruptEventSource17interruptOccurredEPvP9IOServicei
 __ZN22IOInterruptEventSource20interruptEventSourceEP8OSObjectPFvS1_PS_iEP9IOServicei
index 1ad776f4b00949a1f018279f97cee60516973845..237d3331dbdee8501cc9408255de7d3e40c4cb04 100644 (file)
@@ -1,4 +1,4 @@
-10.7.0
+10.8.0
 
 # The first line of this file contains the master version number for the kernel.
 # All other instances of the kernel version in xnu are derived from this file.
index 63d85a3b0d0393baebf37bdaa8fd006b381a7e87..5ff0653e9822d2ea8a232e3114efc126c2f2fe62 100644 (file)
@@ -7,8 +7,8 @@ _cpuid_features
 _cpuid_info
 _gOSKextUnresolved
 _lapic_end_of_interrupt
+_lapic_unmask_perfcnt_interrupt
 _mp_broadcast
 _mp_cpus_call
 _need_fsevent
 _smp_initialized
-_lapic_unmask_perfcnt_interrupt
index ab5e6038b0c71119513e8ef9db610283f8234341..75146568ccbd1831d84c3ededadc5654f1980c47 100644 (file)
@@ -1431,6 +1431,7 @@ __ZN21IOSubMemoryDescriptorD0Ev
 __ZN21IOSubMemoryDescriptorD2Ev
 __ZN22IOInterruptEventSource10gMetaClassE
 __ZN22IOInterruptEventSource10superClassE
+__ZN22IOInterruptEventSource11setWorkLoopEP10IOWorkLoop
 __ZN22IOInterruptEventSource12checkForWorkEv
 __ZN22IOInterruptEventSource17interruptOccurredEPvP9IOServicei
 __ZN22IOInterruptEventSource20interruptEventSourceEP8OSObjectPFvS1_PS_iEP9IOServicei
index 0944c326c11a59e33e6f41385997c1d5e49f48ed..8886533d8c0c05d5159e1f2e43c5ec79c631d3f4 100644 (file)
@@ -156,6 +156,7 @@ _task_get_special_port
 _task_resume
 _task_suspend
 _thread_notrigger
+_thread_tid
 _tsleep
 _vfs_context_current
 _vfs_setlocklocal
index bf2cedbf77f8bd922dcd65bf2b42dd560ed6592f..66029e241e5050c9ab090d33bba7e24c8dfebc34 100644 (file)
@@ -13,7 +13,6 @@ _in6addr_local
 _in_broadcast
 _inaddr_local
 _inet_domain_mutex
-_io_map_spec
 _ip_mutex
 _ip_output
 _ip_protox
@@ -21,7 +20,8 @@ _kdp_register_callout
 _kdp_set_ip_and_mac_addresses
 _kernel_flock
 _kernel_thread
-_lapic_start
+_lapic_set_perfcnt_interrupt_mask
+_lapic_set_pmi_func
 _lo_ifp
 _m_adj
 _m_cat
@@ -73,6 +73,7 @@ _pru_sense_null
 _pru_shutdown_notsupp
 _pru_sockaddr_notsupp
 _pru_sopoll_notsupp
+_rdmsr_carefully
 _real_ncpus
 _rtc_clock_napped
 _sbappendaddr
index f4dc69724c6bbbe040b53ab9de085ce469393302..79dce8fdc4aa7867039e8f9d36e7d27a4402f804 100644 (file)
@@ -7,10 +7,10 @@ _cpu_number
 _dsmos_page_transform_hook
 _gPEEFIRuntimeServices
 _gPEEFISystemTable
-_io_map_spec
 _kdp_register_callout
 _kdp_set_ip_and_mac_addresses
-_lapic_start
+_lapic_set_perfcnt_interrupt_mask
+_lapic_set_pmi_func
 _ml_get_apicid
 _ml_get_maxbusdelay
 _ml_get_maxsnoop
@@ -20,6 +20,7 @@ _mp_rendezvous_no_intrs
 _pmCPUControl
 _pmKextRegister
 _pm_init_lock
+_rdmsr_carefully
 _real_ncpus
 _rtc_clock_napped
 _serial_getc
index 0be7caf4575ee0a02c011dbc04ec5fb2136a734f..fe5d4ae12854c0406aea9cdcb76cc484b875a47f 100644 (file)
@@ -112,6 +112,10 @@ protected:
     @result Return true if this function needs to be called again before all its outstanding events have been processed. */
     virtual bool checkForWork();
 
+/*! @function setWorkLoop
+    @abstract Sub-class implementation of setWorkLoop method. */
+    virtual void setWorkLoop(IOWorkLoop *inWorkLoop);
+
 public:
 
 /*! @function interruptEventSource
@@ -186,6 +190,9 @@ state when checkForWork is called. */
     @param ind What is this interrupts index within 'nub'. */
     virtual void disableInterruptOccurred(void *, IOService *nub, int ind);
 
+private:
+    IOReturn registerInterruptHandler(IOService *inProvider, int inIntIndex);
+
 private:
     OSMetaClassDeclareReservedUnused(IOInterruptEventSource, 0);
     OSMetaClassDeclareReservedUnused(IOInterruptEventSource, 1);
index 499faa3c85bee3772a63813c597c44d0ea8b5e84..96fb7c5a094d643791ede8c2047ee6948cb30f3c 100644 (file)
@@ -71,7 +71,7 @@ enum {
     kIOLogPower         =         0x00000080ULL,
     kIOLogMapping       =         0x00000100ULL,
     kIOLogCatalogue     =         0x00000200ULL,
-    kIOLogTracePower    =         0x00000400ULL,
+       kIOLogTracePower    =         0x00000400ULL,  // Obsolete: Use iotrace=0x00000400ULL to enable now
     kIOLogDebugPower    =         0x00000800ULL,
     kIOLogServiceTree   =         0x00001000ULL,
     kIOLogDTree         =         0x00002000ULL,
@@ -80,7 +80,7 @@ enum {
     kOSLogRegistryMods  =         0x00010000ULL,  // Log attempts to modify registry collections
     kIOLogPMRootDomain  =         0x00020000ULL,
     kOSRegistryModsMode =         0x00040000ULL,  // Change default registry modification handling - panic vs. log
-    kIOTraceIOService   =         0x00080000ULL,
+//    kIOTraceIOService   =         0x00080000ULL,  // Obsolete: Use iotrace=0x00080000ULL to enable now
     kIOLogHibernate     =         0x00100000ULL,
 
     // debug aids - change behaviour
@@ -91,7 +91,25 @@ enum {
     _kIODebugTopFlag    = 0x8000000000000000ULL   // force enum to be 64 bits
 };
 
+enum {
+       kIOTraceInterrupts              =               0x00000001ULL,  // Trace primary interrupts
+       kIOTraceWorkLoops               =               0x00000002ULL,  // Trace workloop activity
+       kIOTraceEventSources    =               0x00000004ULL,  // Trace non-passive event sources
+       kIOTraceIntEventSource  =               0x00000008ULL,  // Trace IOIES and IOFIES sources
+       kIOTraceCommandGates    =               0x00000010ULL,  // Trace command gate activity
+       kIOTraceTimers                  =               0x00000020ULL,  // Trace timer event source activity
+       
+       kIOTracePowerMgmt               =               0x00000400ULL,  // Trace power management changes
+       
+       kIOTraceIOService       =               0x00080000ULL,  // registerService/termination
+       
+       kIOTraceCompatBootArgs  =               kIOTraceIOService | kIOTracePowerMgmt
+};
+
 extern SInt64    gIOKitDebug;
+extern SInt64    gIOKitTrace;
+extern UInt64   gIOInterruptThresholdNS;
+
 
 #ifdef __cplusplus
 extern "C" {
index 38811b63ae59e0ec9616f220c024da05a8c9a2d8..9f18537855f681191c50de87bb682cd2353e2e4a 100644 (file)
@@ -62,6 +62,7 @@ typedef       kern_return_t           IOReturn;
 //#define sub_iokit_hidsystem             err_sub(14)
 #define sub_iokit_scsi                    err_sub(16)
 //#define sub_iokit_pccard                err_sub(21)
+#define sub_iokit_thunderbolt             err_sub(29)
 
 #define sub_iokit_vendor_specific         err_sub(-2)
 #define sub_iokit_reserved                err_sub(-1)
index b1b09057fefbefac61a83dd7e7f11b6a5b5d4d40..a1d22f4d3a9702f72a68807915f7d151760389ee 100644 (file)
@@ -107,6 +107,7 @@ IOTimeStamp(uintptr_t csc,
 
 
 /* IOKit infrastructure subclasses */
+#define IODBG_INTC(code)                       (KDBG_CODE(DBG_IOKIT, DBG_IOINTC, code))
 #define IODBG_WORKLOOP(code)           (KDBG_CODE(DBG_IOKIT, DBG_IOWORKLOOP, code))
 #define IODBG_INTES(code)                      (KDBG_CODE(DBG_IOKIT, DBG_IOINTES, code))
 #define IODBG_TIMES(code)                      (KDBG_CODE(DBG_IOKIT, DBG_IOCLKES, code))
@@ -132,6 +133,9 @@ IOTimeStamp(uintptr_t csc,
 
 /* DBG_IOKIT/DBG_IOTTY codes */
 
+/* DBG_IOKIT/DBG_IOINTC codes */
+#define IOINTC_HANDLER 1       /* 0x05000004 */
+
 /* DBG_IOKIT/DBG_IOWORKLOOP codes */
 #define IOWL_CLIENT            1       /* 0x05010004 */
 #define IOWL_WORK              2       /* 0x05010008 */
index fc8d7ca8f970a63a58a552d3a639a0384a294e0c..804b9bbfdf986946d2c8392224aa8191c1c8cae0 100644 (file)
@@ -230,14 +230,14 @@ enum {
  *  false       == Deep Sleep is disabled
  *  not present == Deep Sleep is not supported on this hardware
  */
-#define kIOPMDeepSleepEnabledKey            "DeepSleep Enabled"
+#define kIOPMDeepSleepEnabledKey            "Standby Enabled"
 
 /* kIOPMDeepSleepDelayKey
  * Key refers to a CFNumberRef that represents the delay in seconds before
  * entering Deep Sleep state. The property is not present if Deep Sleep is
  * unsupported.
  */
-#define kIOPMDeepSleepDelayKey              "DeepSleep Delay"
+#define kIOPMDeepSleepDelayKey              "Standby Delay"
 
 /* kIOPMLowBatteryWakeThresholdKey
  * Key refers to a CFNumberRef that represents the percentage of battery
index 55d6eee7fe4683dc4563ab9b169f9b5c728d01f5..0b823d2b69eaff02a7ecd8363201f29b101ad1c9 100644 (file)
@@ -31,6 +31,7 @@
 #include <IOKit/IOWorkLoop.h>
 #include <IOKit/IOReturn.h>
 #include <IOKit/IOTimeStamp.h>
+#include <IOKit/IOKitDebug.h>
 
 #define super IOEventSource
 
@@ -129,9 +130,6 @@ IOReturn IOCommandGate::runAction(Action inAction,
     if (!inAction)
         return kIOReturnBadArgument;
 
-    IOTimeStampConstant(IODBG_CMDQ(IOCMDQ_ACTION),
-                       (uintptr_t) inAction, (uintptr_t) owner);
-
     // closeGate is recursive needn't worry if we already hold the lock.
     closeGate();
 
@@ -158,8 +156,19 @@ IOReturn IOCommandGate::runAction(Action inAction,
        }
     }
 
+    bool trace = ( gIOKitTrace & kIOTraceCommandGates ) ? true : false;
+       
+       if (trace)
+               IOTimeStampStartConstant(IODBG_CMDQ(IOCMDQ_ACTION),
+                                                                (uintptr_t) inAction, (uintptr_t) owner);
+       
     // Must be gated and on the work loop or enabled
     res = (*inAction)(owner, arg0, arg1, arg2, arg3);
+       
+       if (trace)
+               IOTimeStampEndConstant(IODBG_CMDQ(IOCMDQ_ACTION),
+                                                          (uintptr_t) inAction, (uintptr_t) owner);
+    
     openGate();
 
     return res;
@@ -182,10 +191,18 @@ IOReturn IOCommandGate::attemptAction(Action inAction,
     if (!workLoop->onThread() && !enabled)
         res = kIOReturnNotPermitted;
     else {
-       IOTimeStampConstant(IODBG_CMDQ(IOCMDQ_ACTION),
+               
+        bool trace = ( gIOKitTrace & kIOTraceCommandGates ) ? true : false;
+               
+        if (trace)
+            IOTimeStampStartConstant(IODBG_CMDQ(IOCMDQ_ACTION),
                            (uintptr_t) inAction, (uintptr_t) owner);
 
        res = (*inAction)(owner, arg0, arg1, arg2, arg3);
+               
+        if (trace)
+            IOTimeStampEndConstant(IODBG_CMDQ(IOCMDQ_ACTION),
+                                                                  (uintptr_t) inAction, (uintptr_t) owner);
     }
 
     openGate();
index e2cd65b4e601725a80a0a2248c820a78d9b1c673..7d7249dee3bb39ca35eeaf34461ffbf3b9725dc5 100644 (file)
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
-Copyright (c) 1998 Apple Computer, Inc.  All rights reserved.
-
-HISTORY
-    1998-7-13  Godfrey van der Linden(gvdl)
-        Created.
-]*/
 
 #if !defined(__LP64__)
 
 #include <IOKit/IOCommandQueue.h>
 #include <IOKit/IOWorkLoop.h>
 #include <IOKit/IOTimeStamp.h>
+#include <IOKit/IOKitDebug.h>
 
 #include <mach/sync_policy.h>
 
@@ -137,6 +131,7 @@ void IOCommandQueue::free()
 bool IOCommandQueue::checkForWork()
 {
     void *field0, *field1, *field2, *field3;
+       bool    trace = ( gIOKitTrace & kIOTraceCommandGates ) ? true : false;
 
     if (!enabled || consumerIndex == producerIndex)
         return false;
@@ -153,11 +148,16 @@ bool IOCommandQueue::checkForWork()
     if (++consumerIndex >= size)
         consumerIndex = 0;
 
-    IOTimeStampConstant(IODBG_CMDQ(IOCMDQ_ACTION),
+       if (trace)
+               IOTimeStampStartConstant(IODBG_CMDQ(IOCMDQ_ACTION),
                        (uintptr_t) action, (uintptr_t) owner);
 
     (*(IOCommandQueueAction) action)(owner, field0, field1, field2, field3);
 
+       if (trace)
+               IOTimeStampEndConstant(IODBG_CMDQ(IOCMDQ_ACTION),
+                                                          (uintptr_t) action, (uintptr_t) owner);
+       
     return (consumerIndex != producerIndex);
 }
 
index 47a3b8a143c03fe531ed5ec2d3136cef798d4767..f4f73e2b4c1b3a371ddf791b5915590af1d8916e 100644 (file)
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
-Copyright (c) 1999 Apple Computer, Inc.  All rights reserved.
 
-HISTORY
-    1999-4-15  Godfrey van der Linden(gvdl)
-        Created.
-*/
 #include <IOKit/IOFilterInterruptEventSource.h>
 #include <IOKit/IOService.h>
+#include <IOKit/IOKitDebug.h>
 #include <IOKit/IOTimeStamp.h>
 #include <IOKit/IOWorkLoop.h>
 
-#if KDEBUG
-
-#define IOTimeTypeStampS(t)                                            \
-do {                                                                   \
-    IOTimeStampStart(IODBG_INTES(t),                                   \
-                     (uintptr_t) this, (uintptr_t) owner);     \
-} while(0)
-
-#define IOTimeTypeStampE(t)                                            \
-do {                                                                   \
-    IOTimeStampEnd(IODBG_INTES(t),                                     \
-                   (uintptr_t) this, (uintptr_t) owner);               \
-} while(0)
-
-#define IOTimeStampLatency()                                           \
-do {                                                                   \
-    IOTimeStampEnd(IODBG_INTES(IOINTES_LAT),                           \
-                   (uintptr_t) this, (uintptr_t) owner);               \
-} while(0)
-
-#else /* !KDEBUG */
-#define IOTimeTypeStampS(t)
-#define IOTimeTypeStampE(t)
-#define IOTimeStampLatency()
-#endif /* KDEBUG */
-
 #define super IOInterruptEventSource
 
 OSDefineMetaClassAndStructors
@@ -133,13 +102,18 @@ IOFilterInterruptEventSource *IOFilterInterruptEventSource
 
 void IOFilterInterruptEventSource::signalInterrupt()
 {
-IOTimeStampLatency();
+       bool trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false;
 
     producerCount++;
 
-IOTimeTypeStampS(IOINTES_SEMA);
+       if (trace)
+           IOTimeStampStartConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner);
+    
     signalWorkAvailable();
-IOTimeTypeStampE(IOINTES_SEMA);
+       
+       if (trace)
+           IOTimeStampEndConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner);
+       
 }
 
 
@@ -156,38 +130,42 @@ void IOFilterInterruptEventSource::normalInterruptOccurred
     (void */*refcon*/, IOService */*prov*/, int /*source*/)
 {
     bool filterRes;
+       bool    trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false;
 
-IOTimeTypeStampS(IOINTES_INTCTXT);
+       if (trace)
+               IOTimeStampStartConstant(IODBG_INTES(IOINTES_FILTER),
+                                                                (uintptr_t) filterAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
 
-IOTimeTypeStampS(IOINTES_INTFLTR);
-    IOTimeStampConstant(IODBG_INTES(IOINTES_FILTER),
-                        (uintptr_t) filterAction, (uintptr_t) owner);
+    // Call the filter.
     filterRes = (*filterAction)(owner, this);
-IOTimeTypeStampE(IOINTES_INTFLTR);
+       
+       if (trace)
+               IOTimeStampEndConstant(IODBG_INTES(IOINTES_FILTER),
+                                                          (uintptr_t) filterAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
 
     if (filterRes)
         signalInterrupt();
-
-IOTimeTypeStampE(IOINTES_INTCTXT);
 }
 
 void IOFilterInterruptEventSource::disableInterruptOccurred
     (void */*refcon*/, IOService *prov, int source)
 {
     bool filterRes;
+       bool    trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false;
 
-IOTimeTypeStampS(IOINTES_INTCTXT);
+       if (trace)
+               IOTimeStampStartConstant(IODBG_INTES(IOINTES_FILTER),
+                                                                (uintptr_t) filterAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
 
-IOTimeTypeStampS(IOINTES_INTFLTR);
-    IOTimeStampConstant(IODBG_INTES(IOINTES_FILTER),
-                        (uintptr_t) filterAction, (uintptr_t) owner);
+    // Call the filter.
     filterRes = (*filterAction)(owner, this);
-IOTimeTypeStampE(IOINTES_INTFLTR);
+       
+       if (trace)
+               IOTimeStampEndConstant(IODBG_INTES(IOINTES_FILTER),
+                                                          (uintptr_t) filterAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
 
     if (filterRes) {
         prov->disableInterrupt(source);        /* disable the interrupt */
-
         signalInterrupt();
     }
-IOTimeTypeStampE(IOINTES_INTCTXT);
 }
index 6b906baa92fd3aa7d5b8701f779a6d72d6ce0232..bc180fb5bf2f753383dd1055eaba64bc8986e1b9 100644 (file)
@@ -180,6 +180,12 @@ static IODTNVRAM *         gIOOptionsEntry;
 static IORegistryEntry *       gIOChosenEntry;
 #if defined(__i386__) || defined(__x86_64__)
 static const OSSymbol *         gIOCreateEFIDevicePathSymbol;
+static const OSSymbol *        gIOHibernateRTCVariablesKey;
+static const OSSymbol *         gIOHibernateBoot0082Key;
+static const OSSymbol *         gIOHibernateBootNextKey;
+static OSData *                        gIOHibernateBoot0082Data;
+static OSData *                        gIOHibernateBootNextData;
+static OSObject *              gIOHibernateBootNextSave;
 #endif
 
 static IOPolledFileIOVars                gFileVars;
@@ -1210,29 +1216,73 @@ IOHibernateSystemSleep(void)
            data = OSData::withBytes(&rtcVars, sizeof(rtcVars));
            if (data)
            { 
-                       IOService::getPMRootDomain()->setProperty(kIOHibernateRTCVariablesKey, data);
-               
-                       if( gIOOptionsEntry )
+               if (!gIOHibernateRTCVariablesKey)
+                   gIOHibernateRTCVariablesKey = OSSymbol::withCStringNoCopy(kIOHibernateRTCVariablesKey);
+               if (gIOHibernateRTCVariablesKey)
+                   IOService::getPMRootDomain()->setProperty(gIOHibernateRTCVariablesKey, data);
+       
+               if( gIOOptionsEntry )
+               {
+                   if( gIOHibernateMode & kIOHibernateModeSwitch )
+                   {
+                       const OSSymbol *sym;
+                       sym = OSSymbol::withCStringNoCopy(kIOHibernateBootSwitchVarsKey);
+                       if( sym )
                        {
-                               if( gIOHibernateMode & kIOHibernateModeSwitch )
-                               {
-                                       const OSSymbol *sym;
-                                       sym = OSSymbol::withCStringNoCopy(kIOHibernateBootSwitchVarsKey);
-                                       if( sym )
-                                       {
-                                               gIOOptionsEntry->setProperty(sym, data); /* intentional insecure backup of rtc boot vars */
-                                               sym->release();
-                                       }
-                               }       
+                           gIOOptionsEntry->setProperty(sym, data); /* intentional insecure backup of rtc boot vars */
+                           sym->release();
                        }
+                   }   
+               }
 
-                       data->release();
+               data->release();
            }
             if (gIOChosenEntry)
             {
                 data = OSDynamicCast(OSData, gIOChosenEntry->getProperty(kIOHibernateMachineSignatureKey));
                 if (data)
                     gIOHibernateCurrentHeader->machineSignature = *((UInt32 *)data->getBytesNoCopy());
+               {
+                   // set BootNext
+
+                   if (!gIOHibernateBoot0082Data)
+                   {
+                       data = OSDynamicCast(OSData, gIOChosenEntry->getProperty("boot-device-path"));
+                       if (data)
+                       {
+                           // AppleNVRAM_EFI_LOAD_OPTION
+                           struct {
+                               uint32_t Attributes;
+                               uint16_t FilePathLength;
+                               uint16_t Desc;
+                           } loadOptionHeader;
+                           loadOptionHeader.Attributes     = 1;
+                           loadOptionHeader.FilePathLength = data->getLength();
+                           loadOptionHeader.Desc           = 0;
+                           gIOHibernateBoot0082Data = OSData::withCapacity(sizeof(loadOptionHeader) + loadOptionHeader.FilePathLength);
+                           if (gIOHibernateBoot0082Data)
+                           {
+                               gIOHibernateBoot0082Data->appendBytes(&loadOptionHeader, sizeof(loadOptionHeader));
+                               gIOHibernateBoot0082Data->appendBytes(data);
+                           }
+                       }
+                   }
+                   if (!gIOHibernateBoot0082Key)
+                       gIOHibernateBoot0082Key = OSSymbol::withCString("8BE4DF61-93CA-11D2-AA0D-00E098032B8C:Boot0082");
+                   if (!gIOHibernateBootNextKey)
+                       gIOHibernateBootNextKey = OSSymbol::withCString("8BE4DF61-93CA-11D2-AA0D-00E098032B8C:BootNext");
+                   if (!gIOHibernateBootNextData)
+                   {
+                       uint16_t bits = 0x0082;
+                       gIOHibernateBootNextData = OSData::withBytes(&bits, sizeof(bits));
+                   }
+                   if (gIOHibernateBoot0082Key && gIOHibernateBoot0082Data && gIOHibernateBootNextKey && gIOHibernateBootNextData)
+                   {
+                       gIOHibernateBootNextSave = gIOOptionsEntry->copyProperty(gIOHibernateBootNextKey);
+                       gIOOptionsEntry->setProperty(gIOHibernateBoot0082Key, gIOHibernateBoot0082Data);
+                       gIOOptionsEntry->setProperty(gIOHibernateBootNextKey, gIOHibernateBootNextData);
+                   }
+               }
             }
 #else /* !i386 && !x86_64 */
             if (kIOHibernateModeEncrypt & gIOHibernateMode)
@@ -1589,22 +1639,32 @@ IOHibernateSystemWake(void)
 #endif
 
 #if defined(__i386__) || defined(__x86_64__)
-       IOService::getPMRootDomain()->removeProperty(kIOHibernateRTCVariablesKey);
+       IOService::getPMRootDomain()->removeProperty(gIOHibernateRTCVariablesKey);
 
        /*
         * Hibernate variable is written to NVRAM on platforms in which RtcRam
         * is not backed by coin cell.  Remove Hibernate data from NVRAM.
         */
        if (gIOOptionsEntry) {
-               const OSSymbol * sym = OSSymbol::withCStringNoCopy(kIOHibernateRTCVariablesKey);
 
-               if (sym) {
-                       if (gIOOptionsEntry->getProperty(sym)) {
-                               gIOOptionsEntry->removeProperty(sym);
-                               gIOOptionsEntry->sync();
-                       }
-                       sym->release();
+           if (gIOHibernateRTCVariablesKey) {
+               if (gIOOptionsEntry->getProperty(gIOHibernateRTCVariablesKey)) {
+                   gIOOptionsEntry->removeProperty(gIOHibernateRTCVariablesKey);
+               }
+           }
+
+           if (gIOHibernateBootNextKey)
+           {
+               if (gIOHibernateBootNextSave)
+               {
+                   gIOOptionsEntry->setProperty(gIOHibernateBootNextKey, gIOHibernateBootNextSave);
+                   gIOHibernateBootNextSave->release();
+                   gIOHibernateBootNextSave = NULL;
                }
+               else
+                   gIOOptionsEntry->removeProperty(gIOHibernateBootNextKey);
+           }
+           gIOOptionsEntry->sync();
        }
 #endif
 
index beedc1a8b6ad8a63fbea67ac7e627206e1123365..a8e04bddd28deb711e851fd8b40c818a20890c24 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- * Copyright (c) 1999 Apple Computer, Inc.  All rights reserved. 
- *
- *  DRI: Josh de Cesare
- *
- */
 
 
 #if __ppc__
@@ -43,6 +37,9 @@
 #include <IOKit/IODeviceTreeSupport.h>
 #include <IOKit/IOInterrupts.h>
 #include <IOKit/IOInterruptController.h>
+#include <IOKit/IOKitDebug.h>
+#include <IOKit/IOTimeStamp.h>
+
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
@@ -83,7 +80,7 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source,
   vector = &vectors[vectorNumber];
   
   // Get the lock for this vector.
-  IOTakeLock(vector->interruptLock);
+  IOLockLock(vector->interruptLock);
   
   // Check if the interrupt source can/should be shared.
   canBeShared = vectorCanBeShared(vectorNumber, vector);
@@ -102,7 +99,7 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source,
   
   // If the vector is registered and can not be shared return error.
   if (wasAlreadyRegisterd && !canBeShared) {
-    IOUnlock(vector->interruptLock);
+    IOLockUnlock(vector->interruptLock);
     return kIOReturnNoResources;
   }
   
@@ -115,7 +112,7 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source,
       // Make the IOShareInterruptController instance
       vector->sharedController = new IOSharedInterruptController;
       if (vector->sharedController == 0) {
-        IOUnlock(vector->interruptLock);
+        IOLockUnlock(vector->interruptLock);
         return kIOReturnNoMemory;
       }
       
@@ -139,7 +136,7 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source,
        if (wasAlreadyRegisterd) enableInterrupt(originalNub, originalSource);
         vector->sharedController->release();
         vector->sharedController = 0;
-        IOUnlock(vector->interruptLock);
+        IOLockUnlock(vector->interruptLock);
         return error;
       }
       
@@ -167,7 +164,7 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source,
          
          vector->sharedController->release();
          vector->sharedController = 0;
-         IOUnlock(vector->interruptLock);
+         IOLockUnlock(vector->interruptLock);
          return error;
        }
       }
@@ -199,7 +196,7 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source,
     
     error = vector->sharedController->registerInterrupt(nub, source, target,
                                                         handler, refCon);
-    IOUnlock(vector->interruptLock);
+    IOLockUnlock(vector->interruptLock);
     return error;
   }
   
@@ -218,7 +215,7 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source,
   vector->interruptDisabledSoft = 1;
   vector->interruptRegistered   = 1;
   
-  IOUnlock(vector->interruptLock);
+  IOLockUnlock(vector->interruptLock);
   return kIOReturnSuccess;
 }
 
@@ -235,11 +232,11 @@ IOReturn IOInterruptController::unregisterInterrupt(IOService *nub, int source)
   vector = &vectors[vectorNumber];
   
   // Get the lock for this vector.
-  IOTakeLock(vector->interruptLock);
+  IOLockLock(vector->interruptLock);
   
   // Return success if it is not already registered
   if (!vector->interruptRegistered) {
-    IOUnlock(vector->interruptLock);
+    IOLockUnlock(vector->interruptLock);
     return kIOReturnSuccess;
   }
   
@@ -260,7 +257,7 @@ IOReturn IOInterruptController::unregisterInterrupt(IOService *nub, int source)
   vector->target = 0;
   vector->refCon = 0;
   
-  IOUnlock(vector->interruptLock);
+  IOLockUnlock(vector->interruptLock);
   return kIOReturnSuccess;
 }
 
@@ -512,13 +509,13 @@ IOReturn IOSharedInterruptController::registerInterrupt(IOService *nub,
       vector = &vectors[vectorNumber];
       
       // Get the lock for this vector.
-      IOTakeLock(vector->interruptLock);
+      IOLockLock(vector->interruptLock);
       
       // Is it unregistered?
       if (!vector->interruptRegistered) break;
       
       // Move along to the next one.
-      IOUnlock(vector->interruptLock);
+      IOLockUnlock(vector->interruptLock);
     }
     
     if (vectorNumber != kIOSharedInterruptControllerDefaultVectors) break;
@@ -555,7 +552,7 @@ IOReturn IOSharedInterruptController::registerInterrupt(IOService *nub,
   if (++vectorsRegistered > numVectors) numVectors = vectorsRegistered;
   IOSimpleLockUnlockEnableInterrupt(controllerLock, interruptState);
   
-  IOUnlock(vector->interruptLock);
+  IOLockUnlock(vector->interruptLock);
   return kIOReturnSuccess;
 }
 
@@ -570,12 +567,12 @@ IOReturn IOSharedInterruptController::unregisterInterrupt(IOService *nub,
     vector = &vectors[vectorNumber];
 
     // Get the lock for this vector.
-    IOTakeLock(vector->interruptLock);
+    IOLockLock(vector->interruptLock);
 
     // Return success if it is not already registered
     if (!vector->interruptRegistered
      || (vector->nub != nub) || (vector->source != source)) {
-        IOUnlock(vector->interruptLock);
+        IOLockUnlock(vector->interruptLock);
         continue;
     }
 
@@ -598,7 +595,7 @@ IOReturn IOSharedInterruptController::unregisterInterrupt(IOService *nub,
     IOSimpleLockUnlockEnableInterrupt(controllerLock, interruptState);
 
     // Move along to the next one.
-    IOUnlock(vector->interruptLock);
+    IOLockUnlock(vector->interruptLock);
   }
 
   // Re-enable the controller if all vectors are enabled.
@@ -713,9 +710,36 @@ IOReturn IOSharedInterruptController::handleInterrupt(void * /*refCon*/,
       
       // Call the handler if it exists.
       if (vector->interruptRegistered) {
-       vector->handler(vector->target, vector->refCon,
-                       vector->nub, vector->source);
+      
+                 bool          trace           = (gIOKitTrace & kIOTraceInterrupts) ? true : false;
+                 bool          timeHandler     = gIOInterruptThresholdNS ? true : false;
+                 uint64_t      startTime       = 0;
+                 uint64_t      endTime         = 0;
+                 
+                 if (trace)
+                         IOTimeStampStartConstant(IODBG_INTC(IOINTC_HANDLER),
+                                                                          (uintptr_t) vectorNumber, (uintptr_t) vector->handler, (uintptr_t)vector->target);
+                 
+                 if (timeHandler)
+                         startTime = mach_absolute_time();
+                 
+                 // Call handler.
+                 vector->handler(vector->target, vector->refCon, vector->nub, vector->source);
+
+                 if (timeHandler)
+                 {
+                         endTime = mach_absolute_time();
+                         if ((endTime - startTime) > gIOInterruptThresholdNS)
+                                 panic("IOSIC::handleInterrupt: interrupt exceeded threshold, handlerTime = %qd, vectorNumber = %d, handler = %p, target = %p\n",
+                                               endTime - startTime, (int)vectorNumber, vector->handler, vector->target);
+                 }
+                 
+                 if (trace)
+                         IOTimeStampEndConstant(IODBG_INTC(IOINTC_HANDLER),
+                                                                        (uintptr_t) vectorNumber, (uintptr_t) vector->handler, (uintptr_t)vector->target);
+                 
       }
+      
     }
     
     vector->interruptActive = 0;
index 9694b11307fc1201892732585520c4f5c0349310..97d4c595761deb84c70f8a39341cc3b7e76d593b 100644 (file)
@@ -33,38 +33,13 @@ HISTORY
         Created.
 */
 #include <IOKit/IOInterruptEventSource.h>
+#include <IOKit/IOKitDebug.h>
 #include <IOKit/IOLib.h>
 #include <IOKit/IOService.h>
 #include <IOKit/IOInterrupts.h>
 #include <IOKit/IOTimeStamp.h>
 #include <IOKit/IOWorkLoop.h>
 
-#if KDEBUG
-
-#define IOTimeTypeStampS(t)                                            \
-do {                                                                   \
-    IOTimeStampStart(IODBG_INTES(t),                                   \
-                     (uintptr_t) this, (uintptr_t) owner);     \
-} while(0)
-
-#define IOTimeTypeStampE(t)                                            \
-do {                                                                   \
-    IOTimeStampEnd(IODBG_INTES(t),                                     \
-                   (uintptr_t) this, (uintptr_t) owner);               \
-} while(0)
-
-#define IOTimeStampLatency()                                           \
-do {                                                                   \
-    IOTimeStampEnd(IODBG_INTES(IOINTES_LAT),                           \
-                   (uintptr_t) this, (uintptr_t) owner);               \
-} while(0)
-
-#else /* !KDEBUG */
-#define IOTimeTypeStampS(t)
-#define IOTimeTypeStampE(t)
-#define IOTimeStampLatency()
-#endif /* KDEBUG */
-
 #define super IOEventSource
 
 OSDefineMetaClassAndStructors(IOInterruptEventSource, IOEventSource)
@@ -90,36 +65,43 @@ bool IOInterruptEventSource::init(OSObject *inOwner,
     provider = inProvider;
     producerCount = consumerCount = 0;
     autoDisable = explicitDisable = false;
-    intIndex = -1;
+    intIndex = ~inIntIndex;
 
     // Assumes inOwner holds a reference(retain) on the provider
     if (inProvider) {
-        int intType;
-
-        res = (kIOReturnSuccess
-                    == inProvider->getInterruptType(inIntIndex, &intType));
-        if (res) {
-            IOInterruptAction intHandler;
-
-            autoDisable = (intType == kIOInterruptTypeLevel);
-            if (autoDisable) {
-                intHandler = OSMemberFunctionCast(IOInterruptAction,
-                   this, &IOInterruptEventSource::disableInterruptOccurred);
-            }
-            else
-                intHandler = OSMemberFunctionCast(IOInterruptAction,
-                   this, &IOInterruptEventSource::normalInterruptOccurred);
-
-            res = (kIOReturnSuccess == inProvider->registerInterrupt
-                                        (inIntIndex, this, intHandler));
-            if (res)
-                intIndex = inIntIndex;
-        }
+        res = (kIOReturnSuccess == registerInterruptHandler(inProvider, inIntIndex));
+       if (res)
+           intIndex = inIntIndex;
     }
 
     return res;
 }
 
+IOReturn IOInterruptEventSource::registerInterruptHandler(IOService *inProvider,
+                                 int inIntIndex)
+{
+    IOReturn ret;
+    int intType;
+    IOInterruptAction intHandler;
+
+    ret = inProvider->getInterruptType(inIntIndex, &intType);
+    if (kIOReturnSuccess != ret)
+       return (ret);
+
+    autoDisable = (intType == kIOInterruptTypeLevel);
+    if (autoDisable) {
+       intHandler = OSMemberFunctionCast(IOInterruptAction,
+           this, &IOInterruptEventSource::disableInterruptOccurred);
+    }
+    else
+       intHandler = OSMemberFunctionCast(IOInterruptAction,
+           this, &IOInterruptEventSource::normalInterruptOccurred);
+
+    ret = provider->registerInterrupt(inIntIndex, this, intHandler);
+
+    return (ret);
+}
+
 IOInterruptEventSource *
 IOInterruptEventSource::interruptEventSource(OSObject *inOwner,
                                             Action inAction,
@@ -138,7 +120,7 @@ IOInterruptEventSource::interruptEventSource(OSObject *inOwner,
 
 void IOInterruptEventSource::free()
 {
-    if (provider && intIndex != -1)
+    if (provider && intIndex >= 0)
         provider->unregisterInterrupt(intIndex);
 
     super::free();
@@ -146,7 +128,7 @@ void IOInterruptEventSource::free()
 
 void IOInterruptEventSource::enable()
 {
-    if (provider && intIndex != -1) {
+    if (provider && intIndex >= 0) {
         provider->enableInterrupt(intIndex);
         explicitDisable = false;
         enabled = true;
@@ -155,13 +137,30 @@ void IOInterruptEventSource::enable()
 
 void IOInterruptEventSource::disable()
 {
-    if (provider && intIndex != -1) {
+    if (provider && intIndex >= 0) {
         provider->disableInterrupt(intIndex);
         explicitDisable = true;
         enabled = false;
     }
 }
 
+void IOInterruptEventSource::setWorkLoop(IOWorkLoop *inWorkLoop)
+{
+    super::setWorkLoop(inWorkLoop);
+
+    if (!provider)
+       return;
+
+    if ( !inWorkLoop ) {
+       if (intIndex >= 0) {
+           provider->unregisterInterrupt(intIndex);
+           intIndex = ~intIndex;
+       }
+    } else if ((intIndex < 0) && (kIOReturnSuccess == registerInterruptHandler(provider, ~intIndex))) {
+       intIndex = ~intIndex;
+    }
+}
+
 const IOService *IOInterruptEventSource::getProvider() const
 {
     return provider;
@@ -182,27 +181,38 @@ bool IOInterruptEventSource::checkForWork()
     unsigned int cacheProdCount = producerCount;
     int numInts = cacheProdCount - consumerCount;
     IOInterruptEventAction intAction = (IOInterruptEventAction) action;
+       bool trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false;
 
-    if (numInts > 0) {
+       if ( numInts > 0 )
+       {
+               if (trace)
+                       IOTimeStampStartConstant(IODBG_INTES(IOINTES_ACTION),
+                                                                        (uintptr_t) intAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
 
-        IOTimeStampLatency();
-        IOTimeTypeStampS(IOINTES_CLIENT);
-            IOTimeStampConstant(IODBG_INTES(IOINTES_ACTION),
-                                (uintptr_t) intAction, (uintptr_t) owner);
-            (*intAction)(owner, this,  numInts);
-        IOTimeTypeStampE(IOINTES_CLIENT);
+               // Call the handler
+        (*intAction)(owner, this,  numInts);
+               
+               if (trace)
+                       IOTimeStampEndConstant(IODBG_INTES(IOINTES_ACTION),
+                                                                  (uintptr_t) intAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
 
         consumerCount = cacheProdCount;
         if (autoDisable && !explicitDisable)
             enable();
     }
-    else if (numInts < 0) {
-        IOTimeStampLatency();
-        IOTimeTypeStampS(IOINTES_CLIENT);
-            IOTimeStampConstant(IODBG_INTES(IOINTES_ACTION),
-                                (uintptr_t) intAction, (uintptr_t) owner);
-             (*intAction)(owner, this, -numInts);
-        IOTimeTypeStampE(IOINTES_CLIENT);
+       
+       else if ( numInts < 0 )
+       {
+               if (trace)
+                       IOTimeStampStartConstant(IODBG_INTES(IOINTES_ACTION),
+                                                                        (uintptr_t) intAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
+               
+               // Call the handler
+       (*intAction)(owner, this, -numInts);
+               
+               if (trace)
+                       IOTimeStampEndConstant(IODBG_INTES(IOINTES_ACTION),
+                                                                  (uintptr_t) intAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
     
         consumerCount = cacheProdCount;
         if (autoDisable && !explicitDisable)
@@ -215,33 +225,35 @@ bool IOInterruptEventSource::checkForWork()
 void IOInterruptEventSource::normalInterruptOccurred
     (void */*refcon*/, IOService */*prov*/, int /*source*/)
 {
-IOTimeTypeStampS(IOINTES_INTCTXT);
-IOTimeStampLatency();
+       bool trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false;
 
     producerCount++;
 
-IOTimeTypeStampS(IOINTES_SEMA);
+       if (trace)
+           IOTimeStampStartConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner);
+       
     signalWorkAvailable();
-IOTimeTypeStampE(IOINTES_SEMA);
 
-IOTimeTypeStampE(IOINTES_INTCTXT);
+       if (trace)
+           IOTimeStampEndConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner);
 }
 
 void IOInterruptEventSource::disableInterruptOccurred
     (void */*refcon*/, IOService *prov, int source)
 {
-IOTimeTypeStampS(IOINTES_INTCTXT);
-IOTimeStampLatency();
+       bool trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false;
 
     prov->disableInterrupt(source);    /* disable the interrupt */
 
     producerCount++;
 
-IOTimeTypeStampS(IOINTES_SEMA);
+       if (trace)
+           IOTimeStampStartConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner);
+    
     signalWorkAvailable();
-IOTimeTypeStampE(IOINTES_SEMA);
 
-IOTimeTypeStampE(IOINTES_INTCTXT);
+       if (trace)
+           IOTimeStampEndConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner);
 }
 
 void IOInterruptEventSource::interruptOccurred
index f061b516c24ad078829bc90db8890301e391b417..31d681664edfc37b5edfd5130ce9746a9d924159 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- * Copyright (c) 1998 Apple Computer, Inc.  All rights reserved. 
- *
- * HISTORY
- *
- */
 
 #include <sys/sysctl.h>
 
 #endif
 
 SInt64         gIOKitDebug = DEBUG_INIT_VALUE;
-SYSCTL_QUAD(_debug, OID_AUTO, iokit, CTLFLAG_RW, &gIOKitDebug, "boot_arg io");
+SInt64         gIOKitTrace = 0x3B;
+UInt64         gIOInterruptThresholdNS = 0;
+
+SYSCTL_QUAD(_debug, OID_AUTO, iokit, CTLFLAG_RW | CTLFLAG_LOCKED, &gIOKitDebug, "boot_arg io");
+SYSCTL_QUAD(_debug, OID_AUTO, iotrace, CTLFLAG_RW | CTLFLAG_LOCKED, &gIOKitTrace, "trace io");
+SYSCTL_QUAD(_debug, OID_AUTO, iointthreshold, CTLFLAG_RW | CTLFLAG_LOCKED, &gIOInterruptThresholdNS, "io interrupt threshold");
 
 
 int            debug_malloc_size;
index 2deffc4154c7540401010cd9422a898afb72ff49..804c57f2421baa3ff019c4582a65d904dec2d1f7 100644 (file)
@@ -46,12 +46,10 @@ kern_return_t IOIteratePageableMaps(vm_size_t size,
 vm_map_t IOPageableMapForAddress(uintptr_t address);
 
 kern_return_t 
-IOMemoryDescriptorMapMemEntry(vm_map_t map, ipc_port_t entry, IOOptionBits options, bool pageable,
+IOMemoryDescriptorMapMemEntry(vm_map_t map, ipc_port_t entry, IOOptionBits options, bool pageable,
                                mach_vm_size_t offset, mach_vm_address_t * address, mach_vm_size_t length);
 kern_return_t 
-IOMemoryDescriptorMapCopy(vm_map_t map, 
-                               vm_map_t src_map, 
-                               mach_vm_offset_t src_address,
+IOMemoryDescriptorMapCopy(vm_map_t * map, 
                                IOOptionBits options,
                                mach_vm_size_t offset, 
                                mach_vm_address_t * address, mach_vm_size_t length);
index bd3c671767f1ded4a14079781f6ad4a5362c8930..a46021ede7021636d71e0883c41519803169ed28 100644 (file)
@@ -2311,6 +2311,9 @@ IOReturn IOGeneralMemoryDescriptor::doMap(
     user_addr_t range0Addr = 0;
     IOByteCount range0Len = 0;
 
+    if ((offset >= _length) || ((offset + length) > _length))
+       return( kIOReturnBadArgument );
+
     if (vec.v)
        getAddrLenForInd(range0Addr, range0Len, type, vec, 0);
 
@@ -2339,38 +2342,110 @@ IOReturn IOGeneralMemoryDescriptor::doMap(
            else if (kIOMapDefaultCache != (options & kIOMapCacheMask))
                prot |= VM_PROT_WRITE;
 
-            kr = mach_make_memory_entry_64(get_task_map(_task),
-                        &actualSize, range0Addr,
-                        prot, &sharedMem,
-                        NULL );
-
-            if( (KERN_SUCCESS == kr) && (actualSize != round_page(size)))
+            if (_rangesCount == 1)
+            {
+                kr = mach_make_memory_entry_64(get_task_map(_task),
+                                                &actualSize, range0Addr,
+                                                prot, &sharedMem,
+                                                NULL);
+            }
+            if( (_rangesCount != 1) 
+                || ((KERN_SUCCESS == kr) && (actualSize != round_page(size))))
+            do
            {
-               // map will cross vm objects
 #if IOASSERT
-                IOLog("mach_make_memory_entry_64 (%08llx) size (%08llx:%08llx)\n",
-                     range0Addr, (UInt64)actualSize, (UInt64)size);
+                IOLog("mach_vm_remap path for ranges %d size (%08llx:%08llx)\n",
+                     _rangesCount, (UInt64)actualSize, (UInt64)size);
 #endif
                 kr = kIOReturnVMError;
-                ipc_port_release_send( sharedMem );
-                sharedMem = MACH_PORT_NULL;
-
-               mach_vm_address_t address;
-               mach_vm_size_t    pageOffset = (range0Addr & PAGE_MASK);
+                if (sharedMem)
+                {
+                    ipc_port_release_send(sharedMem);
+                    sharedMem = MACH_PORT_NULL;
+                }
 
+               mach_vm_address_t address, segDestAddr;
+                mach_vm_size_t    mapLength;
+                unsigned          rangesIndex;
+                IOOptionBits      type = _flags & kIOMemoryTypeMask;
+                user_addr_t       srcAddr;
+                IOPhysicalLength  segLen = 0;
+
+                // Find starting address within the vector of ranges
+                for (rangesIndex = 0; rangesIndex < _rangesCount; rangesIndex++) {
+                    getAddrLenForInd(srcAddr, segLen, type, _ranges, rangesIndex);
+                    if (offset < segLen)
+                        break;
+                    offset -= segLen; // (make offset relative)
+                } 
+
+               mach_vm_size_t    pageOffset = (srcAddr & PAGE_MASK);
                address = trunc_page_64(mapping->fAddress);
+
                if ((options & kIOMapAnywhere) || ((mapping->fAddress - address) == pageOffset))
                {
-                   kr = IOMemoryDescriptorMapCopy(mapping->fAddressMap, 
-                                                   get_task_map(_task), range0Addr,
+                   vm_map_t map = mapping->fAddressMap;
+                   kr = IOMemoryDescriptorMapCopy(&map, 
                                                    options,
                                                    offset, &address, round_page_64(length + pageOffset));
-                   if (kr == KERN_SUCCESS)
+                    if (kr == KERN_SUCCESS)
+                    {
+                        segDestAddr  = address;
+                        segLen      -= offset;
+                        mapLength    = length;
+
+                        while (true)
+                        {
+                            vm_prot_t cur_prot, max_prot;
+                            kr = mach_vm_remap(map, &segDestAddr, round_page_64(segLen), PAGE_MASK, 
+                                                    VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
+                                                    get_task_map(_task), trunc_page_64(srcAddr),
+                                                    FALSE /* copy */,
+                                                    &cur_prot,
+                                                    &max_prot,
+                                                    VM_INHERIT_NONE);
+                            if (KERN_SUCCESS == kr)
+                            {
+                                if ((!(VM_PROT_READ & cur_prot))
+                                    || (!(kIOMapReadOnly & options) && !(VM_PROT_WRITE & cur_prot)))
+                                {
+                                    kr = KERN_PROTECTION_FAILURE;
+                                }
+                            }
+                            if (KERN_SUCCESS != kr)
+                                break;
+                            segDestAddr += segLen;
+                            mapLength   -= segLen;
+                            if (!mapLength)
+                                break;
+                            rangesIndex++;
+                            if (rangesIndex >= _rangesCount)
+                            {
+                                kr = kIOReturnBadArgument;
+                                break;
+                            }
+                            getAddrLenForInd(srcAddr, segLen, type, vec, rangesIndex);
+                            if (srcAddr & PAGE_MASK)
+                            {
+                                kr = kIOReturnBadArgument;
+                                break;
+                            }
+                            if (segLen > mapLength)
+                                segLen = mapLength;
+                        } 
+                        if (KERN_SUCCESS != kr)
+                        {
+                            mach_vm_deallocate(mapping->fAddressMap, address, round_page_64(length + pageOffset));
+                        }
+                    }
+
+                   if (KERN_SUCCESS == kr)
                        mapping->fAddress = address + pageOffset;
                    else
                        mapping->fAddress = NULL;
                }
             }
+            while (false);
         } 
        else do
        {       // _task == 0, must be physical
@@ -2553,8 +2628,7 @@ bool IOMemoryMap::setMemoryDescriptor(IOMemoryDescriptor * _memory, mach_vm_size
 struct IOMemoryDescriptorMapAllocRef
 {
     ipc_port_t         sharedMem;
-    vm_map_t            src_map;
-    mach_vm_offset_t    src_address;
+    vm_map_t            map;
     mach_vm_address_t  mapped;
     mach_vm_size_t     size;
     mach_vm_size_t     sourceOffset;
@@ -2624,40 +2698,20 @@ static kern_return_t IOMemoryDescriptorMapAlloc(vm_map_t map, void * _ref)
                 ref->mapped = 0;
                 continue;
             }
+            ref->map = map;
         }
-        else if (ref->src_map)
-       {
-           vm_prot_t cur_prot, max_prot;
-           err = mach_vm_remap(map, &ref->mapped, ref->size, PAGE_MASK, 
-                                   (ref->options & kIOMapAnywhere) ? TRUE : FALSE,
-                                   ref->src_map, ref->src_address,
-                                   FALSE /* copy */,
-                                   &cur_prot,
-                                   &max_prot,
-                                   VM_INHERIT_NONE);
-            if (KERN_SUCCESS == err)
-           {
-               if ((!(VM_PROT_READ & cur_prot))
-                   || (!(kIOMapReadOnly & ref->options) && !(VM_PROT_WRITE & cur_prot)))
-               {
-                   mach_vm_deallocate(map, ref->mapped, ref->size);
-                   err = KERN_PROTECTION_FAILURE;
-               }
-           }
-            if (KERN_SUCCESS != err)
-                ref->mapped = 0;
-       }
        else
        {
-            err = mach_vm_allocate( map, &ref->mapped, ref->size,
+            err = mach_vm_allocate(map, &ref->mapped, ref->size,
                             ((ref->options & kIOMapAnywhere) ? VM_FLAGS_ANYWHERE : VM_FLAGS_FIXED)
                             | VM_MAKE_TAG(VM_MEMORY_IOKIT) );
             if( KERN_SUCCESS != err) {
                 ref->mapped = 0;
                 continue;
             }
+            ref->map = map;
             // we have to make sure that these guys don't get copied if we fork.
-            err = vm_inherit( map, ref->mapped, ref->size, VM_INHERIT_NONE);
+            err = vm_inherit(map, ref->mapped, ref->size, VM_INHERIT_NONE);
             assert( KERN_SUCCESS == err );
         }
     }
@@ -2667,15 +2721,14 @@ static kern_return_t IOMemoryDescriptorMapAlloc(vm_map_t map, void * _ref)
 }
 
 kern_return_t 
-IOMemoryDescriptorMapMemEntry(vm_map_t map, ipc_port_t entry, IOOptionBits options, bool pageable,
+IOMemoryDescriptorMapMemEntry(vm_map_t map, ipc_port_t entry, IOOptionBits options, bool pageable,
                                mach_vm_size_t offset, 
                                mach_vm_address_t * address, mach_vm_size_t length)
 {
     IOReturn err;
     IOMemoryDescriptorMapAllocRef ref;
 
-    ref.sharedMem    = entry;
-    ref.src_map      = NULL;
+    ref.map          = *map;
     ref.sharedMem    = entry;
     ref.sourceOffset = trunc_page_64(offset);
     ref.options             = options;
@@ -2687,19 +2740,19 @@ IOMemoryDescriptorMapMemEntry(vm_map_t map, ipc_port_t entry, IOOptionBits optio
     else
        ref.mapped = *address;
 
-    if( ref.sharedMem && (map == kernel_map) && pageable)
+    if( ref.sharedMem && (ref.map == kernel_map) && pageable)
        err = IOIteratePageableMaps( ref.size, &IOMemoryDescriptorMapAlloc, &ref );
     else
-       err = IOMemoryDescriptorMapAlloc( map, &ref );
+       err = IOMemoryDescriptorMapAlloc( ref.map, &ref );
 
     *address = ref.mapped;
+    *map     = ref.map;
+
     return (err);
 }
 
 kern_return_t 
-IOMemoryDescriptorMapCopy(vm_map_t map, 
-                               vm_map_t src_map, 
-                               mach_vm_offset_t src_address,
+IOMemoryDescriptorMapCopy(vm_map_t * map, 
                                IOOptionBits options,
                                mach_vm_size_t offset, 
                                mach_vm_address_t * address, mach_vm_size_t length)
@@ -2707,9 +2760,8 @@ IOMemoryDescriptorMapCopy(vm_map_t map,
     IOReturn err;
     IOMemoryDescriptorMapAllocRef ref;
 
+    ref.map          = *map;
     ref.sharedMem    = NULL;
-    ref.src_map      = src_map;
-    ref.src_address  = src_address;
     ref.sourceOffset = trunc_page_64(offset);
     ref.options             = options;
     ref.size         = length;
@@ -2720,12 +2772,14 @@ IOMemoryDescriptorMapCopy(vm_map_t map,
     else
        ref.mapped = *address;
 
-    if (map == kernel_map)
+    if (ref.map == kernel_map)
        err = IOIteratePageableMaps(ref.size, &IOMemoryDescriptorMapAlloc, &ref);
     else
-       err = IOMemoryDescriptorMapAlloc(map, &ref);
+       err = IOMemoryDescriptorMapAlloc(ref.map, &ref);
 
     *address = ref.mapped;
+    *map     = ref.map;
+
     return (err);
 }
 
@@ -2828,7 +2882,8 @@ IOReturn IOMemoryDescriptor::doMap(
                }
            }
 
-           err = IOMemoryDescriptorMapMemEntry(mapping->fAddressMap, (ipc_port_t) _memEntry,
+            vm_map_t map = mapping->fAddressMap;
+           err = IOMemoryDescriptorMapMemEntry(&map, (ipc_port_t) _memEntry,
                                                    options, (kIOMemoryBufferPageable & _flags),
                                                    offset, &address, round_page_64(length + pageOffset));
            if( err != KERN_SUCCESS)
@@ -3515,7 +3570,7 @@ IOMemoryMap * IOMemoryDescriptor::makeMapping(
 
        if (kIOMapUnique & options)
        {
-           IOPhysicalAddress phys;
+           addr64_t phys;
            IOByteCount       physLen;
 
 //         if (owner != this)          continue;
@@ -3717,7 +3772,7 @@ bool IOGeneralMemoryDescriptor::serialize(OSSerialize * s) const
        user_addr_t addr = vcopy[index].address;
        IOByteCount len = (IOByteCount) vcopy[index].length;
        values[0] =
-           OSNumber::withNumber(addr, (((UInt64) addr) >> 32)? 64 : 32);
+           OSNumber::withNumber(addr, sizeof(addr) * 8);
        if (values[0] == 0) {
          result = false;
          goto bail;
@@ -3791,6 +3846,3 @@ OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 15);
 IOPhysicalAddress 
 IOMemoryDescriptor::getPhysicalAddress()
         { return( getPhysicalSegment( 0, 0 )); }
-
-
-
index 2144447aed054d6cab564a3019dda20450e59fa6..3ccda1a1b5b0fd50031e57d3eb070b3398a9a58c 100644 (file)
@@ -3167,6 +3167,11 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg )
         clamshellIsClosed = false;
         clamshellExists = true;
 
+        if (msg & kIOPMSetValue)
+        {
+            reportUserInput();
+        }
+
         // Tell PMCPU
         informCPUStateChange(kInformLid, 0);
 
@@ -3585,8 +3590,8 @@ void IOPMrootDomain::handlePlatformHaltRestart( UInt32 pe_type )
        // Notify legacy clients
        applyToInterested(gIOPriorityPowerStateInterest, platformHaltRestartApplier, &ctx);
 
-    // For UPS shutdown leave File Server Mode intact, otherwise turn it off.
-    if (kPEUPSDelayHaltCPU != pe_type)
+    // For normal shutdown, turn off File Server Mode.
+    if (kPEHaltCPU == pe_type)
     {
         const OSSymbol * setting = OSSymbol::withCString(kIOPMSettingRestartOnPowerLossKey);
         OSNumber * num = OSNumber::withNumber((unsigned long long) 0, 32);
index 8109e190e3472f3d07e43c3434bb096528f31a7e..f00ffd725bc4d9d8718af241f3a54040f2163a76 100644 (file)
@@ -25,9 +25,6 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- * HISTORY
- */
  
 #include <IOKit/IOCPU.h>
 #include <IOKit/IODeviceTreeSupport.h>
@@ -373,6 +370,7 @@ PMLog(const char *who, unsigned long event,
       unsigned long param1, unsigned long param2)
 {
     UInt32 debugFlags = gIOKitDebug;
+    UInt32 traceFlags = gIOKitTrace;
 
     if (debugFlags & kIOLogPower) {
 
@@ -385,7 +383,7 @@ PMLog(const char *who, unsigned long event,
                nowus, current_thread(), who,   // Identity
                (int) event, (long) param1, (long) param2);                     // Args
 
-       if (debugFlags & kIOLogTracePower) {
+       if (traceFlags & kIOTracePowerMgmt) {
            static const UInt32 sStartStopBitField[] = 
                { 0x00000000, 0x00000040 }; // Only Program Hardware so far
 
index 62dda56feee9f0cd4798909e052b003c1c8ffe63..1a28626cfe6a492ce6b6343ebf87916511fbf450 100644 (file)
@@ -216,7 +216,7 @@ bool IOService::isInactive( void ) const
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 #define IOServiceTrace(csc, a, b, c, d) {                              \
-    if(kIOTraceIOService & gIOKitDebug) {                              \
+    if(kIOTraceIOService & gIOKitTrace) {                              \
        KERNEL_DEBUG_CONSTANT(IODBG_IOSERVICE(csc), a, b, c, d, 0);     \
     }                                                                  \
 }
@@ -2124,8 +2124,8 @@ void IOService::terminateWorker( IOOptionBits options )
                        (uintptr_t) (regID2 >> 32));
 
                 } else {
-                    // not ready for stop if it has clients, skip it
-                    if( (client->__state[1] & kIOServiceTermPhase3State) && client->getClient()) {
+                    // a terminated client is not ready for stop if it has clients, skip it
+                    if( (kIOServiceInactiveState & client->__state[0]) && client->getClient()) {
                         TLOG("%s::defer stop(%s)\n", client->getName(), provider->getName());
 
                        uint64_t regID1 = provider->getRegistryEntryID();
@@ -3065,8 +3065,8 @@ void IOService::doServiceMatch( IOOptionBits options )
             __state[1] |= kIOServiceConfigState;
             __state[0] |= kIOServiceRegisteredState;
 
-            if( reRegistered && (0 == (__state[0] & kIOServiceInactiveState))) {
-
+           keepGuessing &= (0 == (__state[0] & kIOServiceInactiveState));
+            if (reRegistered && keepGuessing) {
                 iter = OSCollectionIterator::withCollection( (OSOrderedSet *)
                         gNotifications->getObject( gIOPublishNotification ) );
                 if( iter) {
@@ -3084,7 +3084,7 @@ void IOService::doServiceMatch( IOOptionBits options )
            UNLOCKNOTIFY();
             unlockForArbitration();
 
-            if( matches->getCount() && (kIOReturnSuccess == getResources()))
+            if (keepGuessing && matches->getCount() && (kIOReturnSuccess == getResources()))
                 probeCandidates( matches );
             else
                 matches->release();
index 57d40396f8f87685d3951d5a6086bf684921d26f..fcecfbf007433fff7b37225be96f694a5938d5ce 100644 (file)
@@ -1954,9 +1954,9 @@ IOReturn IOService::requestPowerDomainState(
     // at its current or impending power state. 
 
     outputPowerFlags = fPowerStates[fCurrentPowerState].outputPowerCharacter;
-       if ((fMachineState != kIOPM_Finished) && (getPMRootDomain() != this))
+       if (fMachineState != kIOPM_Finished)
        {
-               if (IS_POWER_DROP)
+               if (IS_POWER_DROP && (getPMRootDomain() != this))
                {
                        // Use the lower power state when dropping power. 
                        // Must be careful since a power drop can be canceled
@@ -1987,7 +1987,7 @@ IOReturn IOService::requestPowerDomainState(
                                        fPowerStates[fHeadNotePowerState].outputPowerCharacter;
                        }
                }
-               else
+               else if (IS_POWER_RISE)
                {
                        // When raising power, must report the output power flags from
                        // child's perspective. A child power request may arrive while
@@ -5576,6 +5576,26 @@ bool IOService::servicePMFreeQueue(
     if (root && (root != request))
         more = true;
 
+    if (fLockedFlags.PMStop && fPMWorkQueue && fPMWorkQueue->isEmpty())
+    {
+        // Driver PMstop'ed and the work queue is empty.
+        // Detach and destroy the work queue to avoid the similar cleanup by
+        // PMfree(), which is deadlock prone. After PMstop() if driver calls PM,
+        // or a request from power parent or child arrives, it is possible to
+        // create/cleanup work queue more than once. Should be rare.
+
+        gIOPMWorkLoop->removeEventSource(fPMWorkQueue);
+        fPMWorkQueue->release();
+        fPMWorkQueue = 0;
+
+        if ( fIdleTimerEventSource != NULL ) {
+            fIdleTimerEventSource->disable();
+            gIOPMWorkLoop->removeEventSource(fIdleTimerEventSource);
+            fIdleTimerEventSource->release();
+            fIdleTimerEventSource = NULL;
+        }              
+    }
+
        releasePMRequest( request );
        return more;
 }
index 772ac518cc41573c0af9ae70ab9d98eafab24960..818285f8e97acfbb7dc72a7abd679c709cb6300b 100644 (file)
@@ -491,6 +491,11 @@ protected:
 public:
     static  IOPMWorkQueue * create( IOService * inOwner, Action work, Action retire );
     void    queuePMRequest( IOPMRequest * request );
+
+    inline boolean_t isEmpty( void )
+    {
+        return queue_empty(&fWorkQueue);
+    }
 };
 
 class IOPMCompletionQueue : public IOEventSource
index b621622fa938e72cec9b9b9f209fd47b9e1aa391..7b70541d6486089b04e756e859add89749f4cc52 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- * Copyright (c) 1998,1999 Apple Inc.  All rights reserved. 
- *
- * HISTORY
- *
- */
 
 #include <libkern/c++/OSUnserialize.h>
 #include <libkern/c++/OSKext.h>
@@ -128,14 +122,24 @@ void StartIOKit( void * p1, void * p2, void * p3, void * p4 )
 {
     IOPlatformExpertDevice *   rootNub;
     int                                debugFlags;
+       uint32_t                intThreshold;
 
     if( PE_parse_boot_argn( "io", &debugFlags, sizeof (debugFlags) ))
        gIOKitDebug = debugFlags;
 
+    if( PE_parse_boot_argn( "iotrace", &debugFlags, sizeof (debugFlags) ))
+               gIOKitTrace = debugFlags;
+       
+       // Compat for boot-args
+       gIOKitTrace |= (gIOKitDebug & kIOTraceCompatBootArgs);
+
+    if( PE_parse_boot_argn( "iointthreshold", &intThreshold, sizeof (intThreshold) ))
+               gIOInterruptThresholdNS = intThreshold * 1000;
+       
     // Check for the log synchronous bit set in io
     if (gIOKitDebug & kIOLogSynchronous)
         debug_mode = true;
-
+       
     //
     // Have to start IOKit environment before we attempt to start
     // the C++ runtime environment.  At some stage we have to clean up
index ed45f6ab94ce550e28c44e473d2897064a77c482..112deeee73e2d73f0a0045f1ce5ec1aa339c1a5a 100644 (file)
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- * Copyright (c) 1999 Apple Computer, Inc.  All rights reserved. 
- *
- * IOTimerEventSource.cpp
- *
- * HISTORY
- * 2-Feb-1999          Joe Liu (jliu) created.
- * 1999-10-14          Godfrey van der Linden(gvdl)
- *             Revamped to use thread_call APIs
- *
- */
 
 #include <sys/cdefs.h>
 
@@ -51,6 +40,7 @@ __END_DECLS
 #include <IOKit/IOWorkLoop.h>
 
 #include <IOKit/IOTimeStamp.h>
+#include <IOKit/IOKitDebug.h>
 
 #define super IOEventSource
 OSDefineMetaClassAndStructors(IOTimerEventSource, IOEventSource)
@@ -88,9 +78,17 @@ void IOTimerEventSource::timeout(void *self)
             doit = (Action) me->action;
             if (doit && me->enabled && AbsoluteTime_to_scalar(&me->abstime))
             {
-                IOTimeStampConstant(IODBG_TIMES(IOTIMES_ACTION),
+               bool    trace = (gIOKitTrace & kIOTraceTimers) ? true : false;
+               
+               if (trace)
+                       IOTimeStampStartConstant(IODBG_TIMES(IOTIMES_ACTION),
                                     (uintptr_t) doit, (uintptr_t) me->owner);
+                               
                 (*doit)(me->owner, me);
+                
+                               if (trace)
+                       IOTimeStampEndConstant(IODBG_TIMES(IOTIMES_ACTION),
+                                                                                  (uintptr_t) doit, (uintptr_t) me->owner);
             }
             wl->openGate();
         }
@@ -115,9 +113,17 @@ void IOTimerEventSource::timeoutAndRelease(void * self, void * c)
             doit = (Action) me->action;
             if (doit && (me->reserved->calloutGeneration == count))
             {
-                IOTimeStampConstant(IODBG_TIMES(IOTIMES_ACTION),
+               bool    trace = (gIOKitTrace & kIOTraceTimers) ? true : false;
+               
+               if (trace)
+                       IOTimeStampStartConstant(IODBG_TIMES(IOTIMES_ACTION),
                                     (uintptr_t) doit, (uintptr_t) me->owner);
+                               
                 (*doit)(me->owner, me);
+                
+                               if (trace)
+                       IOTimeStampEndConstant(IODBG_TIMES(IOTIMES_ACTION),
+                                                                                  (uintptr_t) doit, (uintptr_t) me->owner);
             }
             wl->openGate();
         }
index 688a7c0135ad11e5c5adef29a0a677c3588ee211..c32a565f6861d4fa71bf875d51c4033870f0bebd 100644 (file)
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
-Copyright (c) 1998 Apple Computer, Inc.  All rights reserved.
-
-HISTORY
-    1998-7-13  Godfrey van der Linden(gvdl)
-        Created.
-*/
 
 #include <pexpert/pexpert.h>
 #include <IOKit/IOWorkLoop.h>
@@ -39,6 +32,7 @@ HISTORY
 #include <IOKit/IOInterruptEventSource.h>
 #include <IOKit/IOCommandGate.h>
 #include <IOKit/IOTimeStamp.h>
+#include <IOKit/IOKitDebug.h>
 #include <libkern/OSDebug.h>
 
 #define super OSObject
@@ -300,11 +294,16 @@ do {                                                                      \
 /* virtual */ bool IOWorkLoop::runEventSources()
 {
     bool res = false;
+    bool traceWL = (gIOKitTrace & kIOTraceWorkLoops) ? true : false;
+    bool traceES = (gIOKitTrace & kIOTraceEventSources) ? true : false;
+    
     closeGate();
     if (ISSETP(&fFlags, kLoopTerminate))
        goto abort;
 
-    IOTimeWorkS();
+    if (traceWL)
+       IOTimeStampStartConstant(IODBG_WORKLOOP(IOWL_WORK), (uintptr_t) this);
+       
     bool more;
     do {
        CLRP(&fFlags, kLoopRestart);
@@ -314,9 +313,13 @@ do {                                                                       \
        IOSimpleLockUnlockEnableInterrupt(workToDoLock, is);
        for (IOEventSource *evnt = eventChain; evnt; evnt = evnt->getNext()) {
 
-           IOTimeClientS();
+               if (traceES)
+                       IOTimeStampStartConstant(IODBG_WORKLOOP(IOWL_CLIENT), (uintptr_t) this, (uintptr_t) evnt);
+                       
            more |= evnt->checkForWork();
-           IOTimeClientE();
+                       
+               if (traceES)
+                       IOTimeStampEndConstant(IODBG_WORKLOOP(IOWL_CLIENT), (uintptr_t) this, (uintptr_t) evnt);
 
            if (ISSETP(&fFlags, kLoopTerminate))
                goto abort;
@@ -328,7 +331,9 @@ do {                                                                        \
     } while (more);
 
     res = true;
-    IOTimeWorkE();
+       
+    if (traceWL)
+       IOTimeStampEndConstant(IODBG_WORKLOOP(IOWL_WORK), (uintptr_t) this);
 
 abort:
     openGate();
index 53e0a769a6da9307b2ada3cb89d1d87d4b4ed44b..5c2205e249e8bfba5cc718d6e91a5c65e7823ebd 100644 (file)
--- a/kgmacros
+++ b/kgmacros
@@ -589,12 +589,15 @@ define showactint
                        set $stkmask = 0x3
                end
                set $kgm_return = 0
+               set $kgm_actint_framecount = 0
                while ($mysp != 0) && (($mysp & $stkmask) == 0) \
                      && ($mysp != $prevsp) \
                      && ((((unsigned long) $mysp ^ (unsigned long) $prevsp) < 0x2000) \
                      || (((unsigned long)$mysp < ((unsigned long) ($kgm_thread->kernel_stack+kernel_stack_size))) \
-                     && ((unsigned long)$mysp > (unsigned long) ($kgm_thread->kernel_stack))))
+                     && ((unsigned long)$mysp > (unsigned long) ($kgm_thread->kernel_stack)))) \
+                     && ($kgm_actint_framecount < 128)
                        printf "\n          "
+                       set $kgm_actint_framecount = $kgm_actint_framecount + 1
                        showptrhdrpad
                        printf "      "
                        showptr $mysp
@@ -7129,7 +7132,7 @@ define showbootermemorymap
             set $kgm_mptr = (EfiMemoryRange *)((unsigned long)kernelBootArgs->MemoryMap + $kgm_voffset + $kgm_i * $kgm_msize)
 #           p/x *$kgm_mptr
             if $kgm_mptr->Type == 0
-              printf "reserved  "
+              printf "Reserved  "
             end
             if $kgm_mptr->Type == 1
               printf "LoaderCode"
@@ -7150,7 +7153,7 @@ define showbootermemorymap
               printf "RT_data   "
             end
             if $kgm_mptr->Type == 7
-              printf "available "
+              printf "Convention"
             end
             if $kgm_mptr->Type == 8
               printf "Unusable  "
@@ -9254,6 +9257,8 @@ set $_ioapic_index_ver         = 0x01
 set $_ioapic_index_redir_base  = 0x10
 
 set $_apic_vector_mask         = 0xFF
+set $_apic_timer_tsc_deadline  = 0x40000
+set $_apic_timer_periodic      = 0x20000
 set $_apic_masked              = 0x10000
 set $_apic_trigger_level       = 0x08000
 set $_apic_polarity_high       = 0x02000
@@ -9301,30 +9306,39 @@ end
 define _apic_print
        set $value = $arg0
 
-       printf "[VEC=%3d ", $value & $_apic_vector_mask
+       printf "[VEC=%3d", $value & $_apic_vector_mask
        if $value & $_apic_masked
-         printf "MASK=yes "
+         printf " MASK=yes"
        else
-         printf "MASK=no  "
+         printf " MASK=no "
        end       
 
        if $value & $_apic_trigger_level
-                 printf "TRIG=level "
+                 printf " TRIG=level"
        else
-         printf "TRIG=edge  "
+         printf " TRIG=edge "
        end
 
        if $value & $_apic_polarity_high
-                 printf "POL=high"
+                 printf " POL=high"
        else
-         printf "POL=low "
+         printf " POL=low "
        end
 
        if $value & $_apic_pending
-                 printf " PEND=yes]\n"
+                 printf " PEND=yes"
        else
-         printf " PEND=no ]\n"
+         printf " PEND=no "
        end
+
+       if $value & $_apic_timer_periodic
+         printf " PERIODIC"
+       end
+       if $value & $_apic_timer_tsc_deadline
+         printf " TSC_DEADLINE"
+       end
+
+       printf "]\n"
 end
 
 define ioapic_read32
index 405c2089fc414e77a3169b4b9a10bf714ab4141d..d24ace3bf8da386fb355cd5bce7ad0a9331833b5 100644 (file)
@@ -24,7 +24,6 @@ OBJS_NO_WERROR=                               \
        security_server.o \
        device_server.o \
        gssd_mach.o \
-       mp.o # This is blocked on 6640051
 
 OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS))
 
index 36cb50feb4cb7eb431b15c4b0d28258d0c37b235..234a022b878560247ce6e8d6d740c3f323f74297 100644 (file)
@@ -307,7 +307,9 @@ cnputc(char c)
                 */
                while (cbp->buf_ptr-cbp->buf_base + 1 > console_ring_space()) {
                        simple_unlock(&console_ring.write_lock);
+                       ml_set_interrupts_enabled(state);
                        console_ring_try_empty();
+                       state = ml_set_interrupts_enabled(FALSE);
                        SIMPLE_LOCK_NO_INTRS(&console_ring.write_lock);
                }
                for (cp = cbp->buf_base; cp < cbp->buf_ptr; cp++)
index ebd35c82bfb7130675e268cb772b4b3486787a5c..4b088aa415bf69c3c028cfbd690836d3a6a90e77 100644 (file)
@@ -149,11 +149,12 @@ static struct {
 static unsigned char *gc_buffer_attributes;
 static unsigned char *gc_buffer_characters;
 static unsigned char *gc_buffer_colorcodes;
+static unsigned char *gc_buffer_tab_stops;
 static uint32_t gc_buffer_columns;
 static uint32_t gc_buffer_rows;
 static uint32_t gc_buffer_size;
 
-#ifdef __i386__
+#if defined(__i386__) || defined(__x86_64__)
 decl_simple_lock_data(static, vcputc_lock);
 
 #define VCPUTC_LOCK_INIT()                             \
@@ -225,8 +226,7 @@ static unsigned char gc_color_code;
 static unsigned int gc_x, gc_y, gc_savex, gc_savey;
 static unsigned int gc_par[MAXPARS], gc_numpars, gc_hanging_cursor, gc_attr, gc_saveattr;
 
-/* VT100 tab stops & scroll region */
-static char gc_tab_stops[255];
+/* VT100 scroll region */
 static unsigned int gc_scrreg_top, gc_scrreg_bottom;
 
 #ifdef CONFIG_VC_PROGRESS_WHITE
@@ -261,6 +261,7 @@ static void gc_clear_screen(unsigned int xx, unsigned int yy, int top,
 static void gc_enable(boolean_t enable);
 static void gc_hide_cursor(unsigned int xx, unsigned int yy);
 static void gc_initialize(struct vc_info * info);
+static boolean_t gc_is_tab_stop(unsigned int column);
 static void gc_paint_char(unsigned int xx, unsigned int yy, unsigned char ch,
                int attrs);
 static void gc_putchar(char ch);
@@ -277,6 +278,7 @@ static void gc_reset_tabs(void);
 static void gc_reset_vt100(void);
 static void gc_scroll_down(int num, unsigned int top, unsigned int bottom);
 static void gc_scroll_up(int num, unsigned int top, unsigned int bottom);
+static void gc_set_tab_stop(unsigned int column, boolean_t enabled);
 static void gc_show_cursor(unsigned int xx, unsigned int yy);
 static void gc_update_color(int color, boolean_t fore);
 
@@ -318,7 +320,7 @@ static void
 gc_clear_screen(unsigned int xx, unsigned int yy, int top, unsigned int bottom,
                int which)
 {
-        if (!gc_buffer_size) return;
+       if (!gc_buffer_size) return;
 
        if ( xx < gc_buffer_columns && yy < gc_buffer_rows && bottom <= gc_buffer_rows )
        {
@@ -357,6 +359,7 @@ gc_enable( boolean_t enable )
        unsigned char *buffer_attributes = NULL;
        unsigned char *buffer_characters = NULL;
        unsigned char *buffer_colorcodes = NULL;
+       unsigned char *buffer_tab_stops  = NULL;
        uint32_t buffer_columns = 0;
        uint32_t buffer_rows = 0;
        uint32_t buffer_size = 0;
@@ -379,11 +382,15 @@ gc_enable( boolean_t enable )
                buffer_attributes = gc_buffer_attributes;
                buffer_characters = gc_buffer_characters;
                buffer_colorcodes = gc_buffer_colorcodes;
+               buffer_tab_stops  = gc_buffer_tab_stops;
+               buffer_columns    = gc_buffer_columns;
+               buffer_rows       = gc_buffer_rows;
                buffer_size       = gc_buffer_size;
 
                gc_buffer_attributes = NULL;
                gc_buffer_characters = NULL;
                gc_buffer_colorcodes = NULL;
+               gc_buffer_tab_stops  = NULL;
                gc_buffer_columns    = 0;
                gc_buffer_rows       = 0;
                gc_buffer_size       = 0;
@@ -394,6 +401,7 @@ gc_enable( boolean_t enable )
                kfree( buffer_attributes, buffer_size );
                kfree( buffer_characters, buffer_size );
                kfree( buffer_colorcodes, buffer_size );
+               kfree( buffer_tab_stops,  buffer_columns );
        }
        else
        {
@@ -414,14 +422,17 @@ gc_enable( boolean_t enable )
                                buffer_attributes = (unsigned char *) kalloc( buffer_size );
                                buffer_characters = (unsigned char *) kalloc( buffer_size );
                                buffer_colorcodes = (unsigned char *) kalloc( buffer_size );
+                               buffer_tab_stops  = (unsigned char *) kalloc( buffer_columns );
 
                                if ( buffer_attributes == NULL ||
                                     buffer_characters == NULL ||
-                                    buffer_colorcodes == NULL )
+                                    buffer_colorcodes == NULL ||
+                                    buffer_tab_stops  == NULL )
                                {
                                        if ( buffer_attributes ) kfree( buffer_attributes, buffer_size );
                                        if ( buffer_characters ) kfree( buffer_characters, buffer_size );
                                        if ( buffer_colorcodes ) kfree( buffer_colorcodes, buffer_size );
+                                       if ( buffer_tab_stops  ) kfree( buffer_tab_stops,  buffer_columns );
 
                                        buffer_columns = 0;
                                        buffer_rows    = 0;
@@ -432,6 +443,7 @@ gc_enable( boolean_t enable )
                                        memset( buffer_attributes, ATTR_NONE, buffer_size );
                                        memset( buffer_characters, ' ', buffer_size );
                                        memset( buffer_colorcodes, COLOR_CODE_SET( 0, COLOR_FOREGROUND, TRUE ), buffer_size );
+                                       memset( buffer_tab_stops, 0, buffer_columns );
                                }
                        }
                }
@@ -442,6 +454,7 @@ gc_enable( boolean_t enable )
                gc_buffer_attributes = buffer_attributes;
                gc_buffer_characters = buffer_characters;
                gc_buffer_colorcodes = buffer_colorcodes;
+               gc_buffer_tab_stops  = buffer_tab_stops;
                gc_buffer_columns    = buffer_columns;
                gc_buffer_rows       = buffer_rows;
                gc_buffer_size       = buffer_size;
@@ -657,7 +670,7 @@ gc_putc_esc(unsigned char ch)
                if (ch == 'E') gc_x = 0;
                break;
        case 'H':               /* Set tab stop          */
-               gc_tab_stops[gc_x] = 1;
+               gc_set_tab_stop(gc_x, TRUE);
                break;
        case 'M':               /* Cursor up             */
                if (gc_y <= gc_scrreg_top) {
@@ -797,11 +810,11 @@ gc_putc_gotpars(unsigned char ch)
                        case 3: /* Clear every tabs */
                                {
                                        for (i = 0; i <= vinfo.v_columns; i++)
-                                               gc_tab_stops[i] = 0;
+                                               gc_set_tab_stop(i, FALSE);
                                }
                                break;
                        case 0:
-                               gc_tab_stops[gc_x] = 0;
+                               gc_set_tab_stop(gc_x, FALSE);
                                break;
                }
                break;
@@ -881,7 +894,8 @@ gc_putc_normal(unsigned char ch)
                        }
                break;
        case '\t':              /* Tab                   */
-               while (gc_x < vinfo.v_columns && !gc_tab_stops[++gc_x]);
+               if (gc_buffer_tab_stops) while (gc_x < vinfo.v_columns && !gc_is_tab_stop(++gc_x));
+
                if (gc_x >= vinfo.v_columns)
                        gc_x = vinfo.v_columns-1;
                break;
@@ -965,13 +979,33 @@ static void
 gc_reset_tabs(void)
 {
        unsigned int i;
+       
+       if (!gc_buffer_tab_stops) return;
 
-       for (i = 0; i<= vinfo.v_columns; i++) {
-               gc_tab_stops[i] = ((i % 8) == 0);
+       for (i = 0; i < vinfo.v_columns; i++) {
+               gc_buffer_tab_stops[i] = ((i % 8) == 0);
        }
 
 }
 
+static void
+gc_set_tab_stop(unsigned int column, boolean_t enabled)
+{
+       if (gc_buffer_tab_stops && (column < vinfo.v_columns)) {
+               gc_buffer_tab_stops[column] = enabled;
+       }
+}
+
+static boolean_t gc_is_tab_stop(unsigned int column)
+{
+       if (gc_buffer_tab_stops == NULL)
+               return ((column % 8) == 0);
+       if (column < vinfo.v_columns)
+               return gc_buffer_tab_stops[column];
+       else
+               return FALSE;
+}
+
 static void
 gc_reset_vt100(void)
 {
@@ -990,7 +1024,7 @@ gc_reset_vt100(void)
 static void 
 gc_scroll_down(int num, unsigned int top, unsigned int bottom)
 {
-        if (!gc_buffer_size) return;
+       if (!gc_buffer_size) return;
 
        if ( bottom <= gc_buffer_rows )
        {
@@ -1099,7 +1133,7 @@ gc_scroll_down(int num, unsigned int top, unsigned int bottom)
 static void 
 gc_scroll_up(int num, unsigned int top, unsigned int bottom)
 {
-        if (!gc_buffer_size) return;
+       if (!gc_buffer_size) return;
 
        if ( bottom <= gc_buffer_rows )
        {
@@ -1240,18 +1274,25 @@ gc_update_color(int color, boolean_t fore)
 void
 vcputc(__unused int l, __unused int u, int c)
 {
-       if ( gc_initialized && ( gc_enabled || debug_mode ) )
+       if ( gc_enabled || debug_mode )
        {
                spl_t s;
 
                s = splhigh();
+#if    defined(__i386__) || defined(__x86_64__)
+               x86_filter_TLB_coherency_interrupts(TRUE);
+#endif
                VCPUTC_LOCK_LOCK();
-
-               gc_hide_cursor(gc_x, gc_y);
-               gc_putchar(c);
-               gc_show_cursor(gc_x, gc_y);
-
+               if ( gc_enabled || debug_mode )
+               {
+                       gc_hide_cursor(gc_x, gc_y);
+                       gc_putchar(c);
+                       gc_show_cursor(gc_x, gc_y);
+               }
                VCPUTC_LOCK_UNLOCK();
+#if    defined(__i386__) || defined(__x86_64__)
+               x86_filter_TLB_coherency_interrupts(FALSE);
+#endif
                splx(s);
        }
 }
@@ -1795,6 +1836,7 @@ static const unsigned char *    vc_clut;
 static const unsigned char *    vc_clut8;
 static unsigned char            vc_revclut8[256];
 static uint32_t                vc_progress_interval;
+static uint32_t                vc_progress_count;
 static uint64_t                        vc_progress_deadline;
 static thread_call_data_t      vc_progress_call;
 static boolean_t               vc_needsave;
@@ -2211,8 +2253,9 @@ vc_progress_set(boolean_t enable, uint32_t vc_delay)
             vc_needsave      = TRUE;
             vc_saveunder     = saveBuf;
             vc_saveunder_len = saveLen;
-            saveBuf         = NULL;
-            saveLen         = 0;
+            saveBuf              = NULL;
+            saveLen          = 0;
+            vc_progress_count = 0;
 
             clock_interval_to_deadline(vc_delay,
                                       1000 * 1000 * 1000 /*second scale*/,
@@ -2240,10 +2283,9 @@ vc_progress_set(boolean_t enable, uint32_t vc_delay)
 
 
 static void
-vc_progress_task(__unused void *arg0, void *arg)
+vc_progress_task(__unused void *arg0, __unused void *arg)
 {
     spl_t              s;
-    int                        count = (int)(uintptr_t) arg;
     int                        x, y, width, height;
     const unsigned char * data;
 
@@ -2252,18 +2294,18 @@ vc_progress_task(__unused void *arg0, void *arg)
 
     if( vc_progress_enable) {
 
-       KERNEL_DEBUG_CONSTANT(0x7020008, count, 0, 0, 0, 0);
+       KERNEL_DEBUG_CONSTANT(0x7020008, vc_progress_count, 0, 0, 0, 0);
 
-        count++;
-        if( count >= vc_progress->count)
-            count = 0;
+        vc_progress_count++;
+        if( vc_progress_count >= vc_progress->count)
+            vc_progress_count = 0;
 
        width = vc_progress->width;
        height = vc_progress->height;
        x = vc_progress->dx;
        y = vc_progress->dy;
        data = vc_progress_data;
-       data += count * width * height;
+       data += vc_progress_count * width * height;
        if( 1 & vc_progress->flags) {
            x += ((vinfo.v_width - width) / 2);
            y += ((vinfo.v_height - height) / 2);
@@ -2275,7 +2317,7 @@ vc_progress_task(__unused void *arg0, void *arg)
         vc_needsave = FALSE;
 
         clock_deadline_for_periodic_event(vc_progress_interval, mach_absolute_time(), &vc_progress_deadline);
-        thread_call_enter1_delayed(&vc_progress_call, (void *)(uintptr_t)count, vc_progress_deadline);
+        thread_call_enter_delayed(&vc_progress_call, vc_progress_deadline);
     }
     simple_unlock(&vc_progress_lock);
     splx(s);
@@ -2294,11 +2336,34 @@ static boolean_t gc_acquired      = FALSE;
 static boolean_t gc_graphics_boot = FALSE;
 static boolean_t gc_desire_text   = FALSE;
 
-static unsigned int lastVideoPhys   = 0;
+static uint64_t lastVideoPhys   = 0;
 static vm_offset_t  lastVideoVirt   = 0;
 static vm_size_t lastVideoSize   = 0;
 static boolean_t    lastVideoMapped = FALSE;
 
+static void
+gc_pause( boolean_t pause, boolean_t graphics_now )
+{
+       spl_t s;
+
+       s = splhigh( );
+       VCPUTC_LOCK_LOCK( );
+
+    disableConsoleOutput = (pause && !console_is_serial());
+    gc_enabled           = (!pause && !graphics_now);
+
+    VCPUTC_LOCK_UNLOCK( );
+
+    simple_lock(&vc_progress_lock);
+
+    vc_progress_enable = gc_graphics_boot && !gc_desire_text && !pause;
+       if (vc_progress_enable)
+               thread_call_enter_delayed(&vc_progress_call, vc_progress_deadline);
+
+    simple_unlock(&vc_progress_lock);
+    splx(s);
+}
+
 void
 initialize_screen(PE_Video * boot_vinfo, unsigned int op)
 {
@@ -2310,26 +2375,29 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op)
        if ( boot_vinfo )
        {
                struct vc_info new_vinfo = vinfo;
-
-//             bcopy((const void *)boot_vinfo, (void *)&boot_video_info, sizeof(boot_video_info));
-
                /* 
                 *      First, check if we are changing the size and/or location of the framebuffer
                 */
                new_vinfo.v_name[0]  = 0;
-               new_vinfo.v_width    = (unsigned int)boot_vinfo->v_width;
-               new_vinfo.v_height   = (unsigned int)boot_vinfo->v_height;
-               new_vinfo.v_depth    = (unsigned int)boot_vinfo->v_depth;
-               new_vinfo.v_rowbytes = (unsigned int)boot_vinfo->v_rowBytes;
-               new_vinfo.v_physaddr = boot_vinfo->v_baseAddr;          /* Get the physical address */
+               new_vinfo.v_physaddr = boot_vinfo->v_baseAddr & ~3;             /* Get the physical address */
+#ifndef __LP64__
+               new_vinfo.v_physaddr |= (((uint64_t) boot_vinfo->v_baseAddrHigh) << 32);
+#endif
+               if (kPEBaseAddressChange != op)
+               {
+            new_vinfo.v_width    = (unsigned int)boot_vinfo->v_width;
+            new_vinfo.v_height   = (unsigned int)boot_vinfo->v_height;
+            new_vinfo.v_depth    = (unsigned int)boot_vinfo->v_depth;
+            new_vinfo.v_rowbytes = (unsigned int)boot_vinfo->v_rowBytes;
 #if defined(__i386__) || defined(__x86_64__)
-                new_vinfo.v_type     = (unsigned int)boot_vinfo->v_display;
+            new_vinfo.v_type     = (unsigned int)boot_vinfo->v_display;
 #else
-                new_vinfo.v_type = 0;
+            new_vinfo.v_type = 0;
 #endif
+               }
      
                if (!lastVideoMapped)
-                   kprintf("initialize_screen: b=%08lX, w=%08X, h=%08X, r=%08X, d=%08X\n",                  /* (BRINGUP) */
+                   kprintf("initialize_screen: b=%08llX, w=%08X, h=%08X, r=%08X, d=%08X\n",                  /* (BRINGUP) */
                            new_vinfo.v_physaddr, new_vinfo.v_width,  new_vinfo.v_height,  new_vinfo.v_rowbytes, new_vinfo.v_type);     /* (BRINGUP) */
 
                if (!new_vinfo.v_physaddr)                                                      /* Check to see if we have a framebuffer */
@@ -2344,17 +2412,16 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op)
                else
                {
                    /*
-                   *   Note that for the first time only, boot_vinfo->v_baseAddr is physical.
-                   */
-    
-                   if (kernel_map != VM_MAP_NULL)                                      /* If VM is up, we are given a virtual address */
+                    * If VM is up, we are given a virtual address, unless b0 is set to indicate physical.
+                    */
+                       if ((kernel_map != VM_MAP_NULL) && (0 == (1 & boot_vinfo->v_baseAddr)))
                    {
                            fbppage = pmap_find_phys(kernel_pmap, (addr64_t)boot_vinfo->v_baseAddr);    /* Get the physical address of frame buffer */
                            if(!fbppage)                                                /* Did we find it? */
                            {
                                    panic("initialize_screen: Strange framebuffer - addr = %08X\n", (uint32_t)boot_vinfo->v_baseAddr);
                            }
-                           new_vinfo.v_physaddr = (fbppage << 12) | (boot_vinfo->v_baseAddr & PAGE_MASK);                      /* Get the physical address */
+                           new_vinfo.v_physaddr = (((uint64_t)fbppage) << 12) | (boot_vinfo->v_baseAddr & PAGE_MASK);                  /* Get the physical address */
                    }
     
                    if (boot_vinfo->v_length != 0)
@@ -2366,7 +2433,7 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op)
                    if ((lastVideoPhys != new_vinfo.v_physaddr) || (fbsize > lastVideoSize))            /* Did framebuffer change location or get bigger? */
                    {
                            unsigned int flags = VM_WIMG_IO;
-                           newVideoVirt = io_map_spec((vm_offset_t)new_vinfo.v_physaddr, fbsize, flags);       /* Allocate address space for framebuffer */
+                           newVideoVirt = io_map_spec((vm_map_offset_t)new_vinfo.v_physaddr, fbsize, flags);   /* Allocate address space for framebuffer */
                    }
                }
 
@@ -2409,13 +2476,14 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op)
                                        kmem_free(kernel_map, lastVideoVirt, lastVideoSize);    /* Toss kernel addresses */
                                }
                        }
-                       lastVideoPhys = (unsigned int)new_vinfo.v_physaddr;                                     /* Remember the framebuffer address */
+                       lastVideoPhys = new_vinfo.v_physaddr;                                   /* Remember the framebuffer address */
                        lastVideoSize = fbsize;                                                 /* Remember the size */
                        lastVideoVirt = newVideoVirt;                                           /* Remember the virtual framebuffer address */
                        lastVideoMapped  = (NULL != kernel_map);
                }
 
-               {
+        if (kPEBaseAddressChange != op)
+        {
                        // Graphics mode setup by the booter.
 
                        gc_ops.initialize   = vc_initialize;
@@ -2427,15 +2495,15 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op)
                        gc_ops.hide_cursor  = vc_reverse_cursor;
                        gc_ops.show_cursor  = vc_reverse_cursor;
                        gc_ops.update_color = vc_update_color;
+            gc_initialize(&vinfo);
                }
 
-               gc_initialize(&vinfo);
-
 #ifdef GRATEFULDEBUGGER
                GratefulDebInit((bootBumbleC *)boot_vinfo);     /* Re-initialize GratefulDeb */
 #endif /* GRATEFULDEBUGGER */
        }
 
+    graphics_now = gc_graphics_boot && !gc_desire_text;
        switch ( op )
        {
                case kPEGraphicsMode:
@@ -2452,15 +2520,24 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op)
 
                case kPEAcquireScreen:
                        if ( gc_acquired ) break;
-                       graphics_now = gc_graphics_boot && !gc_desire_text;
                        vc_progress_set( graphics_now, kProgressAcquireDelay );
                        gc_enable( !graphics_now );
                        gc_acquired = TRUE;
                        gc_desire_text = FALSE;
                        break;
 
+               case kPEDisableScreen:
+            if (gc_acquired) 
+            {
+                gc_pause( TRUE, graphics_now );
+            }
+                       break;
+
                case kPEEnableScreen:
-                       /* deprecated */
+            if (gc_acquired) 
+            {
+                gc_pause( FALSE, graphics_now );
+            }
                        break;
 
                case kPETextScreen:
@@ -2482,10 +2559,6 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op)
                        gc_enable( TRUE );
                        break;
 
-               case kPEDisableScreen:
-                       /* deprecated */
-                       /* skip break */
-
                case kPEReleaseScreen:
                        gc_acquired = FALSE;
                        gc_desire_text = FALSE;
index 39f1a8640158a8fdfe47ed18fa82afffe7ff8e54..f67778b4e55339575c7b77678d7ddb656080fb3b 100644 (file)
@@ -61,7 +61,7 @@ struct vc_info
        unsigned long   v_baseaddr;
        unsigned int    v_type;
        char            v_name[32];
-       unsigned long   v_physaddr;
+       uint64_t        v_physaddr;
        unsigned int    v_rows;         /* characters */
        unsigned int    v_columns;      /* characters */
        unsigned int    v_rowscanbytes; /* Actualy number of bytes used for display per row*/
index 7e4196b02bc7e5ee7f35b47e290d8d7ff5408485..6727bae263ac03f66963f676f40fa98b66dffc4e 100644 (file)
@@ -758,7 +758,7 @@ Debugger(
 #endif
 
                /* Print backtrace - callee is internally synchronized */
-               panic_i386_backtrace(stackptr, 32, NULL, FALSE, NULL);
+               panic_i386_backtrace(stackptr, 64, NULL, FALSE, NULL);
 
                /* everything should be printed now so copy to NVRAM
                 */
index f90433cfb69cef41590c021e4332dd7267a60ba6..fb2cbe334839d430e0eb7c349d247dcc24f53e8c 100644 (file)
@@ -199,6 +199,11 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
         */
        cpu_IA32e_disable(current_cpu_datap());
 #endif
+       /*
+        * Enable FPU/SIMD unit for potential hibernate acceleration
+        */
+       clear_ts(); 
+
        KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 0) | DBG_FUNC_START, 0, 0, 0, 0, 0);
 
        save_kdebug_enable = kdebug_enable;
@@ -220,6 +225,7 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
 #else
        acpi_sleep_cpu(func, refcon);
 #endif
+
 #ifdef __x86_64__
        x86_64_post_sleep(old_cr3);
 #endif
@@ -278,6 +284,10 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
 
        ml_get_timebase(&now);
 
+       /* re-enable and re-init local apic (prior to starting timers) */
+       if (lapic_probe())
+               lapic_configure();
+
        /* let the realtime clock reset */
        rtc_sleep_wakeup(acpi_sleep_abstime);
 
@@ -299,21 +309,17 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
        } else
                KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 0) | DBG_FUNC_END, 0, 0, 0, 0, 0);
 
-       /* re-enable and re-init local apic */
-       if (lapic_probe())
-               lapic_configure();
-
        /* Restore power management register state */
        pmCPUMarkRunning(current_cpu_datap());
 
        /* Restore power management timer state */
        pmTimerRestore();
 
-       /* Restart tick interrupts from the LAPIC timer */
-       rtc_lapic_start_ticking();
+       /* Restart timer interrupts */
+       rtc_timer_start();
 
-       fpinit();
-       clear_fpu();
+       /* Reconfigure FP/SIMD unit */
+       init_fpu();
 
 #if HIBERNATION
 #ifdef __i386__
index 9578cc80b1ce75728c9a5b7c418a2302a5e155d5..63eb4446b6ee99443ece518ef700b51b67cb2108 100644 (file)
@@ -128,7 +128,6 @@ typedef struct {
         addr64_t       cu_user_gs_base;
 } cpu_uber_t;
 
-
 /*
  * Per-cpu data.
  *
@@ -157,6 +156,9 @@ typedef struct cpu_data
        int                     cpu_phys_number;        /* Physical CPU */
        cpu_id_t                cpu_id;                 /* Platform Expert */
        int                     cpu_signals;            /* IPI events */
+       int                     cpu_prior_signals;      /* Last set of events,
+                                                        * debugging
+                                                        */
        int                     cpu_mcount_off;         /* mcount recursion */
        ast_t                   cpu_pending_ast;
        int                     cpu_type;
@@ -227,6 +229,17 @@ typedef struct cpu_data
        rtc_nanotime_t          *cpu_nanotime;          /* Nanotime info */
        thread_t                csw_old_thread;
        thread_t                csw_new_thread;
+       uint64_t                cpu_max_observed_int_latency;
+       int                     cpu_max_observed_int_latency_vector;
+       uint64_t                debugger_entry_time;
+       volatile boolean_t      cpu_NMI_acknowledged;
+       /* A separate nested interrupt stack flag, to account
+        * for non-nested interrupts arriving while on the interrupt stack
+        * Currently only occurs when AICPM enables interrupts on the
+        * interrupt stack during processor offlining.
+        */
+       uint32_t                cpu_nested_istack;
+       uint32_t                cpu_nested_istack_events;
 } cpu_data_t;
 
 extern cpu_data_t      *cpu_data_ptr[];  
index a68a75bad233356fb1655bd851bace9fa77958fd..77445d9b15905b7aa0f07fb5eda5eb1e3923b420 100644 (file)
@@ -141,8 +141,8 @@ typedef struct x86_lcpu
     boolean_t          primary;        /* logical cpu is primary CPU in package */
     volatile lcpu_state_t      state;  /* state of the logical CPU */
     volatile boolean_t stopped;        /* used to indicate that the CPU has "stopped" */
-    uint64_t           rtcPop;         /* when etimer wants a timer pop */
-    uint64_t           rtcDeadline;
+    uint64_t           rtcPop;         /* next timer pop programmed */
+    uint64_t           rtcDeadline;    /* next etimer-requested deadline */
     x86_cpu_cache_t    *caches[MAX_CACHE_DEPTH];
     void               *pmStats;       /* Power management stats for lcpu */
     void               *pmState;       /* Power management state for lcpu */
index c974a71ede1f17f496ada4110744daaa02162d26..b836ba88a0223922dcd8da6e2c6c09190708392d 100644 (file)
@@ -203,7 +203,8 @@ static cpuid_cache_descriptor_t intel_cpuid_leaf2_descriptor_table[] = {
        { 0xE5, CACHE,  L3,             16,     16*M,   64  },  
        { 0xE6, CACHE,  L3,             16,     24*M,   64  },  
        { 0xF0, PREFETCH, NA,           NA,     64,     NA  },  
-       { 0xF1, PREFETCH, NA,           NA,     128,    NA  }   
+       { 0xF1, PREFETCH, NA,           NA,     128,    NA  },  
+       { 0xFF, CACHE,  NA,             NA,     0,      NA  }   
 };
 #define        INTEL_LEAF2_DESC_NUM (sizeof(intel_cpuid_leaf2_descriptor_table) / \
                                sizeof(cpuid_cache_descriptor_t))
@@ -240,7 +241,10 @@ static void cpuid_fn(uint32_t selector, uint32_t *result)
                          "=b" (result[1]),
                          "=c" (result[2]),
                          "=d" (result[3])
-                       : "a"(selector));
+                       : "a"(selector),
+                         "b" (0),
+                         "c" (0),
+                         "d" (0));
        } else {
                do_cpuid(selector, result);
        }
@@ -574,8 +578,13 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
                ctp->sensor               = bitfield32(reg[eax], 0, 0);
                ctp->dynamic_acceleration = bitfield32(reg[eax], 1, 1);
                ctp->invariant_APIC_timer = bitfield32(reg[eax], 2, 2);
+               ctp->core_power_limits    = bitfield32(reg[eax], 3, 3);
+               ctp->fine_grain_clock_mod = bitfield32(reg[eax], 4, 4);
+               ctp->package_thermal_intr = bitfield32(reg[eax], 5, 5);
                ctp->thresholds           = bitfield32(reg[ebx], 3, 0);
                ctp->ACNT_MCNT            = bitfield32(reg[ecx], 0, 0);
+               ctp->hardware_feedback    = bitfield32(reg[ecx], 1, 1);
+               ctp->energy_policy        = bitfield32(reg[ecx], 2, 2);
                info_p->cpuid_thermal_leafp = ctp;
        }
 
@@ -596,6 +605,15 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
                info_p->cpuid_arch_perf_leafp = capp;
        }
 
+       if (info_p->cpuid_max_basic >= 0xd) {
+               cpuid_xsave_leaf_t      *xsp = &info_p->cpuid_xsave_leaf;
+               /*
+                * XSAVE Features:
+                */
+               cpuid_fn(0xd, info_p->cpuid_xsave_leaf.extended_state);
+               info_p->cpuid_xsave_leafp = xsp;
+       }
+
        return;
 }
 
@@ -630,6 +648,10 @@ cpuid_set_cpufamily(i386_cpu_info_t *info_p)
                case CPUID_MODEL_WESTMERE_EX:
                        cpufamily = CPUFAMILY_INTEL_WESTMERE;
                        break;
+               case CPUID_MODEL_SANDYBRIDGE:
+               case CPUID_MODEL_JAKETOWN:
+                       cpufamily = CPUFAMILY_INTEL_SANDYBRIDGE;
+                       break;
                }
                break;
        }
@@ -637,7 +659,10 @@ cpuid_set_cpufamily(i386_cpu_info_t *info_p)
        info_p->cpuid_cpufamily = cpufamily;
        return cpufamily;
 }
-
+/*
+ * Must be invoked either when executing single threaded, or with
+ * independent synchronization.
+ */
 void
 cpuid_set_info(void)
 {
@@ -664,17 +689,13 @@ cpuid_set_info(void)
         * (which determines whether SMT/Hyperthreading is active).
         */
        switch (info_p->cpuid_cpufamily) {
-       /*
-        * This should be the same as Nehalem but an A0 silicon bug returns
-        * invalid data in the top 12 bits. Hence, we use only bits [19..16]
-        * rather than [31..16] for core count - which actually can't exceed 8. 
-        */
        case CPUFAMILY_INTEL_WESTMERE: {
                uint64_t msr = rdmsr64(MSR_CORE_THREAD_COUNT);
                info_p->core_count   = bitfield32((uint32_t)msr, 19, 16);
                info_p->thread_count = bitfield32((uint32_t)msr, 15,  0);
                break;
                }
+       case CPUFAMILY_INTEL_SANDYBRIDGE:
        case CPUFAMILY_INTEL_NEHALEM: {
                uint64_t msr = rdmsr64(MSR_CORE_THREAD_COUNT);
                info_p->core_count   = bitfield32((uint32_t)msr, 31, 16);
@@ -694,62 +715,71 @@ static struct {
        uint64_t        mask;
        const char      *name;
 } feature_map[] = {
-       {CPUID_FEATURE_FPU,   "FPU",},
-       {CPUID_FEATURE_VME,   "VME",},
-       {CPUID_FEATURE_DE,    "DE",},
-       {CPUID_FEATURE_PSE,   "PSE",},
-       {CPUID_FEATURE_TSC,   "TSC",},
-       {CPUID_FEATURE_MSR,   "MSR",},
-       {CPUID_FEATURE_PAE,   "PAE",},
-       {CPUID_FEATURE_MCE,   "MCE",},
-       {CPUID_FEATURE_CX8,   "CX8",},
-       {CPUID_FEATURE_APIC,  "APIC",},
-       {CPUID_FEATURE_SEP,   "SEP",},
-       {CPUID_FEATURE_MTRR,  "MTRR",},
-       {CPUID_FEATURE_PGE,   "PGE",},
-       {CPUID_FEATURE_MCA,   "MCA",},
-       {CPUID_FEATURE_CMOV,  "CMOV",},
-       {CPUID_FEATURE_PAT,   "PAT",},
-       {CPUID_FEATURE_PSE36, "PSE36",},
-       {CPUID_FEATURE_PSN,   "PSN",},
-       {CPUID_FEATURE_CLFSH, "CLFSH",},
-       {CPUID_FEATURE_DS,    "DS",},
-       {CPUID_FEATURE_ACPI,  "ACPI",},
-       {CPUID_FEATURE_MMX,   "MMX",},
-       {CPUID_FEATURE_FXSR,  "FXSR",},
-       {CPUID_FEATURE_SSE,   "SSE",},
-       {CPUID_FEATURE_SSE2,  "SSE2",},
-       {CPUID_FEATURE_SS,    "SS",},
-       {CPUID_FEATURE_HTT,   "HTT",},
-       {CPUID_FEATURE_TM,    "TM",},
-       {CPUID_FEATURE_SSE3,    "SSE3"},
+       {CPUID_FEATURE_FPU,       "FPU"},
+       {CPUID_FEATURE_VME,       "VME"},
+       {CPUID_FEATURE_DE,        "DE"},
+       {CPUID_FEATURE_PSE,       "PSE"},
+       {CPUID_FEATURE_TSC,       "TSC"},
+       {CPUID_FEATURE_MSR,       "MSR"},
+       {CPUID_FEATURE_PAE,       "PAE"},
+       {CPUID_FEATURE_MCE,       "MCE"},
+       {CPUID_FEATURE_CX8,       "CX8"},
+       {CPUID_FEATURE_APIC,      "APIC"},
+       {CPUID_FEATURE_SEP,       "SEP"},
+       {CPUID_FEATURE_MTRR,      "MTRR"},
+       {CPUID_FEATURE_PGE,       "PGE"},
+       {CPUID_FEATURE_MCA,       "MCA"},
+       {CPUID_FEATURE_CMOV,      "CMOV"},
+       {CPUID_FEATURE_PAT,       "PAT"},
+       {CPUID_FEATURE_PSE36,     "PSE36"},
+       {CPUID_FEATURE_PSN,       "PSN"},
+       {CPUID_FEATURE_CLFSH,     "CLFSH"},
+       {CPUID_FEATURE_DS,        "DS"},
+       {CPUID_FEATURE_ACPI,      "ACPI"},
+       {CPUID_FEATURE_MMX,       "MMX"},
+       {CPUID_FEATURE_FXSR,      "FXSR"},
+       {CPUID_FEATURE_SSE,       "SSE"},
+       {CPUID_FEATURE_SSE2,      "SSE2"},
+       {CPUID_FEATURE_SS,        "SS"},
+       {CPUID_FEATURE_HTT,       "HTT"},
+       {CPUID_FEATURE_TM,        "TM"},
+       {CPUID_FEATURE_PBE,       "PBE"},
+       {CPUID_FEATURE_SSE3,      "SSE3"},
        {CPUID_FEATURE_PCLMULQDQ, "PCLMULQDQ"},
-       {CPUID_FEATURE_MONITOR, "MON"},
-       {CPUID_FEATURE_DSCPL,   "DSCPL"},
-       {CPUID_FEATURE_VMX,     "VMX"},
-       {CPUID_FEATURE_SMX,     "SMX"},
-       {CPUID_FEATURE_EST,     "EST"},
-       {CPUID_FEATURE_TM2,     "TM2"},
-       {CPUID_FEATURE_SSSE3,   "SSSE3"},
-       {CPUID_FEATURE_CID,     "CID"},
-       {CPUID_FEATURE_CX16,    "CX16"},
-       {CPUID_FEATURE_xTPR,    "TPR"},
-       {CPUID_FEATURE_PDCM,    "PDCM"},
-       {CPUID_FEATURE_SSE4_1,  "SSE4.1"},
-       {CPUID_FEATURE_SSE4_2,  "SSE4.2"},
-       {CPUID_FEATURE_xAPIC,   "xAPIC"},
-       {CPUID_FEATURE_POPCNT,  "POPCNT"},
-       {CPUID_FEATURE_AES,     "AES"},
-       {CPUID_FEATURE_VMM,     "VMM"},
+       {CPUID_FEATURE_DTES64,    "DTES64"},
+       {CPUID_FEATURE_MONITOR,   "MON"},
+       {CPUID_FEATURE_DSCPL,     "DSCPL"},
+       {CPUID_FEATURE_VMX,       "VMX"},
+       {CPUID_FEATURE_SMX,       "SMX"},
+       {CPUID_FEATURE_EST,       "EST"},
+       {CPUID_FEATURE_TM2,       "TM2"},
+       {CPUID_FEATURE_SSSE3,     "SSSE3"},
+       {CPUID_FEATURE_CID,       "CID"},
+       {CPUID_FEATURE_CX16,      "CX16"},
+       {CPUID_FEATURE_xTPR,      "TPR"},
+       {CPUID_FEATURE_PDCM,      "PDCM"},
+       {CPUID_FEATURE_SSE4_1,    "SSE4.1"},
+       {CPUID_FEATURE_SSE4_2,    "SSE4.2"},
+       {CPUID_FEATURE_xAPIC,     "xAPIC"},
+       {CPUID_FEATURE_MOVBE,     "MOVBE"},
+       {CPUID_FEATURE_POPCNT,    "POPCNT"},
+       {CPUID_FEATURE_AES,       "AES"},
+       {CPUID_FEATURE_XSAVE,     "XSAVE"},
+       {CPUID_FEATURE_OSXSAVE,   "OSXSAVE"},
+       {CPUID_FEATURE_VMM,       "VMM"},
+       {CPUID_FEATURE_SEGLIM64,  "SEGLIM64"},
+       {CPUID_FEATURE_PCID,      "PCID"},
+       {CPUID_FEATURE_TSCTMR,    "TSCTMR"},
+       {CPUID_FEATURE_AVX1_0,    "AVX1.0"},
        {0, 0}
 },
 extfeature_map[] = {
        {CPUID_EXTFEATURE_SYSCALL, "SYSCALL"},
        {CPUID_EXTFEATURE_XD,      "XD"},
        {CPUID_EXTFEATURE_1GBPAGE, "1GBPAGE"},
-       {CPUID_EXTFEATURE_RDTSCP,  "RDTSCP"},
        {CPUID_EXTFEATURE_EM64T,   "EM64T"},
        {CPUID_EXTFEATURE_LAHF,    "LAHF"},
+       {CPUID_EXTFEATURE_RDTSCP,  "RDTSCP"},
        {CPUID_EXTFEATURE_TSCI,    "TSCI"},
        {0, 0}
 };
@@ -768,15 +798,16 @@ cpuid_info(void)
 char *
 cpuid_get_feature_names(uint64_t features, char *buf, unsigned buf_len)
 {
-       size_t  len = -1;
+       size_t  len = 0;
        char    *p = buf;
        int     i;
 
        for (i = 0; feature_map[i].mask != 0; i++) {
                if ((features & feature_map[i].mask) == 0)
                        continue;
-               if (len > 0)
+               if (len && ((size_t)(p - buf) < (buf_len - 1)))
                        *p++ = ' ';
+
                len = min(strlen(feature_map[i].name), (size_t) ((buf_len-1) - (p-buf)));
                if (len == 0)
                        break;
@@ -790,14 +821,14 @@ cpuid_get_feature_names(uint64_t features, char *buf, unsigned buf_len)
 char *
 cpuid_get_extfeature_names(uint64_t extfeatures, char *buf, unsigned buf_len)
 {
-       size_t  len = -1;
+       size_t  len = 0;
        char    *p = buf;
        int     i;
 
        for (i = 0; extfeature_map[i].mask != 0; i++) {
                if ((extfeatures & extfeature_map[i].mask) == 0)
                        continue;
-               if (len > 0)
+               if (len && ((size_t) (p - buf) < (buf_len - 1)))
                        *p++ = ' ';
                len = min(strlen(extfeature_map[i].name), (size_t) ((buf_len-1)-(p-buf)));
                if (len == 0)
index ce8b2a3785fdc2c0973320a1be222e97199fa675..4c3c329c0c5a6eb03c4fb6cecc7acac8ff416a45 100644 (file)
  * The CPUID_FEATURE_XXX values define 64-bit values
  * returned in %ecx:%edx to a CPUID request with %eax of 1: 
  */
-#define        CPUID_FEATURE_FPU     _Bit(0)   /* Floating point unit on-chip */
-#define        CPUID_FEATURE_VME     _Bit(1)   /* Virtual Mode Extension */
-#define        CPUID_FEATURE_DE      _Bit(2)   /* Debugging Extension */
-#define        CPUID_FEATURE_PSE     _Bit(3)   /* Page Size Extension */
-#define        CPUID_FEATURE_TSC     _Bit(4)   /* Time Stamp Counter */
-#define        CPUID_FEATURE_MSR     _Bit(5)   /* Model Specific Registers */
-#define CPUID_FEATURE_PAE     _Bit(6)  /* Physical Address Extension */
-#define        CPUID_FEATURE_MCE     _Bit(7)   /* Machine Check Exception */
-#define        CPUID_FEATURE_CX8     _Bit(8)   /* CMPXCHG8B */
-#define        CPUID_FEATURE_APIC    _Bit(9)   /* On-chip APIC */
-#define CPUID_FEATURE_SEP     _Bit(11) /* Fast System Call */
-#define        CPUID_FEATURE_MTRR    _Bit(12)  /* Memory Type Range Register */
-#define        CPUID_FEATURE_PGE     _Bit(13)  /* Page Global Enable */
-#define        CPUID_FEATURE_MCA     _Bit(14)  /* Machine Check Architecture */
-#define        CPUID_FEATURE_CMOV    _Bit(15)  /* Conditional Move Instruction */
-#define CPUID_FEATURE_PAT     _Bit(16) /* Page Attribute Table */
-#define CPUID_FEATURE_PSE36   _Bit(17) /* 36-bit Page Size Extension */
-#define CPUID_FEATURE_PSN     _Bit(18) /* Processor Serial Number */
-#define CPUID_FEATURE_CLFSH   _Bit(19) /* CLFLUSH Instruction supported */
-#define CPUID_FEATURE_DS      _Bit(21) /* Debug Store */
-#define CPUID_FEATURE_ACPI    _Bit(22) /* Thermal monitor and Clock Ctrl */
-#define CPUID_FEATURE_MMX     _Bit(23) /* MMX supported */
-#define CPUID_FEATURE_FXSR    _Bit(24) /* Fast floating pt save/restore */
-#define CPUID_FEATURE_SSE     _Bit(25) /* Streaming SIMD extensions */
-#define CPUID_FEATURE_SSE2    _Bit(26) /* Streaming SIMD extensions 2 */
-#define CPUID_FEATURE_SS      _Bit(27) /* Self-Snoop */
-#define CPUID_FEATURE_HTT     _Bit(28) /* Hyper-Threading Technology */
-#define CPUID_FEATURE_TM      _Bit(29) /* Thermal Monitor (TM1) */
-#define CPUID_FEATURE_PBE     _Bit(31) /* Pend Break Enable */
-
-#define CPUID_FEATURE_SSE3    _HBit(0) /* Streaming SIMD extensions 3 */
-#define CPUID_FEATURE_PCLMULQDQ _HBit(1) /* PCLMULQDQ Instruction */
-
-#define CPUID_FEATURE_MONITOR _HBit(3) /* Monitor/mwait */
-#define CPUID_FEATURE_DSCPL   _HBit(4) /* Debug Store CPL */
-#define CPUID_FEATURE_VMX     _HBit(5) /* VMX */
-#define CPUID_FEATURE_SMX     _HBit(6) /* SMX */
-#define CPUID_FEATURE_EST     _HBit(7) /* Enhanced SpeedsTep (GV3) */
-#define CPUID_FEATURE_TM2     _HBit(8) /* Thermal Monitor 2 */
-#define CPUID_FEATURE_SSSE3   _HBit(9) /* Supplemental SSE3 instructions */
-#define CPUID_FEATURE_CID     _HBit(10)        /* L1 Context ID */
-#define CPUID_FEATURE_CX16    _HBit(13)        /* CmpXchg16b instruction */
-#define CPUID_FEATURE_xTPR    _HBit(14)        /* Send Task PRiority msgs */
-#define CPUID_FEATURE_PDCM    _HBit(15)        /* Perf/Debug Capability MSR */
-
-#define CPUID_FEATURE_DCA     _HBit(18)        /* Direct Cache Access */
-#define CPUID_FEATURE_SSE4_1  _HBit(19)        /* Streaming SIMD extensions 4.1 */
-#define CPUID_FEATURE_SSE4_2  _HBit(20)        /* Streaming SIMD extensions 4.2 */
-#define CPUID_FEATURE_xAPIC   _HBit(21)        /* Extended APIC Mode */
-#define CPUID_FEATURE_POPCNT  _HBit(23)        /* POPCNT instruction */
-#define CPUID_FEATURE_AES     _HBit(25)        /* AES instructions */
-#define CPUID_FEATURE_VMM     _HBit(31)        /* VMM (Hypervisor) present */
+#define CPUID_FEATURE_FPU       _Bit(0)   /* Floating point unit on-chip */
+#define CPUID_FEATURE_VME       _Bit(1)   /* Virtual Mode Extension */
+#define CPUID_FEATURE_DE        _Bit(2)   /* Debugging Extension */
+#define CPUID_FEATURE_PSE       _Bit(3)   /* Page Size Extension */
+#define CPUID_FEATURE_TSC       _Bit(4)   /* Time Stamp Counter */
+#define CPUID_FEATURE_MSR       _Bit(5)   /* Model Specific Registers */
+#define CPUID_FEATURE_PAE       _Bit(6)   /* Physical Address Extension */
+#define CPUID_FEATURE_MCE       _Bit(7)   /* Machine Check Exception */
+#define CPUID_FEATURE_CX8       _Bit(8)   /* CMPXCHG8B */
+#define CPUID_FEATURE_APIC      _Bit(9)   /* On-chip APIC */
+#define CPUID_FEATURE_SEP       _Bit(11)  /* Fast System Call */
+#define CPUID_FEATURE_MTRR      _Bit(12)  /* Memory Type Range Register */
+#define CPUID_FEATURE_PGE       _Bit(13)  /* Page Global Enable */
+#define CPUID_FEATURE_MCA       _Bit(14)  /* Machine Check Architecture */
+#define CPUID_FEATURE_CMOV      _Bit(15)  /* Conditional Move Instruction */
+#define CPUID_FEATURE_PAT       _Bit(16)  /* Page Attribute Table */
+#define CPUID_FEATURE_PSE36     _Bit(17)  /* 36-bit Page Size Extension */
+#define CPUID_FEATURE_PSN       _Bit(18)  /* Processor Serial Number */
+#define CPUID_FEATURE_CLFSH     _Bit(19)  /* CLFLUSH Instruction supported */
+#define CPUID_FEATURE_DS        _Bit(21)  /* Debug Store */
+#define CPUID_FEATURE_ACPI      _Bit(22)  /* Thermal monitor and Clock Ctrl */
+#define CPUID_FEATURE_MMX       _Bit(23)  /* MMX supported */
+#define CPUID_FEATURE_FXSR      _Bit(24)  /* Fast floating pt save/restore */
+#define CPUID_FEATURE_SSE       _Bit(25)  /* Streaming SIMD extensions */
+#define CPUID_FEATURE_SSE2      _Bit(26)  /* Streaming SIMD extensions 2 */
+#define CPUID_FEATURE_SS        _Bit(27)  /* Self-Snoop */
+#define CPUID_FEATURE_HTT       _Bit(28)  /* Hyper-Threading Technology */
+#define CPUID_FEATURE_TM        _Bit(29)  /* Thermal Monitor (TM1) */
+#define CPUID_FEATURE_PBE       _Bit(31)  /* Pend Break Enable */
+#define CPUID_FEATURE_SSE3      _HBit(0)  /* Streaming SIMD extensions 3 */
+#define CPUID_FEATURE_PCLMULQDQ _HBit(1)  /* PCLMULQDQ instruction */
+#define CPUID_FEATURE_DTES64    _HBit(2)  /* 64-bit DS layout */
+#define CPUID_FEATURE_MONITOR   _HBit(3)  /* Monitor/mwait */
+#define CPUID_FEATURE_DSCPL     _HBit(4)  /* Debug Store CPL */
+#define CPUID_FEATURE_VMX       _HBit(5)  /* VMX */
+#define CPUID_FEATURE_SMX       _HBit(6)  /* SMX */
+#define CPUID_FEATURE_EST       _HBit(7)  /* Enhanced SpeedsTep (GV3) */
+#define CPUID_FEATURE_TM2       _HBit(8)  /* Thermal Monitor 2 */
+#define CPUID_FEATURE_SSSE3     _HBit(9)  /* Supplemental SSE3 instructions */
+#define CPUID_FEATURE_CID       _HBit(10) /* L1 Context ID */
+#define CPUID_FEATURE_CX16      _HBit(13) /* CmpXchg16b instruction */
+#define CPUID_FEATURE_xTPR      _HBit(14) /* Send Task PRiority msgs */
+#define CPUID_FEATURE_PDCM      _HBit(15) /* Perf/Debug Capability MSR */
+
+#define CPUID_FEATURE_DCA       _HBit(18) /* Direct Cache Access */
+#define CPUID_FEATURE_SSE4_1    _HBit(19) /* Streaming SIMD extensions 4.1 */
+#define CPUID_FEATURE_SSE4_2    _HBit(20) /* Streaming SIMD extensions 4.2 */
+#define CPUID_FEATURE_xAPIC     _HBit(21) /* Extended APIC Mode */
+#define CPUID_FEATURE_MOVBE     _HBit(22) /* MOVBE instruction */
+#define CPUID_FEATURE_POPCNT    _HBit(23) /* POPCNT instruction */
+#define CPUID_FEATURE_AES       _HBit(25) /* AES instructions */
+#define CPUID_FEATURE_XSAVE     _HBit(26) /* XSAVE instructions */
+#define CPUID_FEATURE_OSXSAVE   _HBit(27) /* XGETBV/XSETBV instructions */
+#define CPUID_FEATURE_VMM       _HBit(31) /* VMM (Hypervisor) present */
+#define CPUID_FEATURE_SEGLIM64  _HBit(11) /* 64-bit segment limit checking */
+#define CPUID_FEATURE_PCID      _HBit(17) /* ASID-PCID support */
+#define CPUID_FEATURE_TSCTMR    _HBit(24) /* TSC deadline timer */
+#define CPUID_FEATURE_AVX1_0   _HBit(28) /* AVX 1.0 instructions */
 
 /*
  * The CPUID_EXTFEATURE_XXX values define 64-bit values
 #define CPUID_EXTFEATURE_SYSCALL   _Bit(11)    /* SYSCALL/sysret */
 #define CPUID_EXTFEATURE_XD       _Bit(20)     /* eXecute Disable */
 
-#define CPUID_EXTFEATURE_1GBPAGE   _Bit(26)     /* 1G-Byte Page support */
+#define CPUID_EXTFEATURE_1GBPAGE   _Bit(26)    /* 1GB pages */
 #define CPUID_EXTFEATURE_RDTSCP           _Bit(27)     /* RDTSCP */
 #define CPUID_EXTFEATURE_EM64T    _Bit(29)     /* Extended Mem 64 Technology */
 
-#define CPUID_EXTFEATURE_LAHF     _HBit(0)     /* LAHF/SAHF instructions */
+#define CPUID_EXTFEATURE_LAHF     _HBit(0)     /* LAFH/SAHF instructions */
 
 /*
  * The CPUID_EXTFEATURE_XXX values define 64-bit values
 #define CPUID_MWAIT_EXTENSION  _Bit(0) /* enumeration of WMAIT extensions */
 #define CPUID_MWAIT_BREAK      _Bit(1) /* interrupts are break events     */
 
-#define CPUID_MODEL_YONAH      14
-#define CPUID_MODEL_MEROM      15
-#define CPUID_MODEL_PENRYN     23
-#define CPUID_MODEL_NEHALEM    26
-#define CPUID_MODEL_FIELDS     30      /* Lynnfield, Clarksfield, Jasper */
-#define CPUID_MODEL_DALES      31      /* Havendale, Auburndale */
-#define CPUID_MODEL_NEHALEM_EX 46
-#define CPUID_MODEL_DALES_32NM 37      /* Clarkdale, Arrandale */
-#define CPUID_MODEL_WESTMERE   44      /* Gulftown, Westmere-EP, Westmere-WS */
-#define CPUID_MODEL_WESTMERE_EX        47
+#define CPUID_MODEL_YONAH      0x0E
+#define CPUID_MODEL_MEROM      0x0F
+#define CPUID_MODEL_PENRYN     0x17
+#define CPUID_MODEL_NEHALEM    0x1A
+#define CPUID_MODEL_FIELDS     0x1E    /* Lynnfield, Clarksfield, Jasper */
+#define CPUID_MODEL_DALES      0x1F    /* Havendale, Auburndale */
+#define CPUID_MODEL_NEHALEM_EX 0x2E
+#define CPUID_MODEL_DALES_32NM 0x25    /* Clarkdale, Arrandale */
+#define CPUID_MODEL_WESTMERE   0x2C    /* Gulftown, Westmere-EP, Westmere-WS */
+#define CPUID_MODEL_WESTMERE_EX        0x2F
+/* Additional internal models go here */
+#define CPUID_MODEL_SANDYBRIDGE        0x2A
+#define CPUID_MODEL_JAKETOWN   0x2D
 
 #ifndef ASSEMBLER
 #include <stdint.h>
@@ -162,6 +172,7 @@ cpuid(uint32_t *data)
                  "c"  (data[ecx]),
                  "d"  (data[edx]));
 }
+
 static inline void
 do_cpuid(uint32_t selector, uint32_t *data)
 {
@@ -170,7 +181,10 @@ do_cpuid(uint32_t selector, uint32_t *data)
                  "=b" (data[1]),
                  "=c" (data[2]),
                  "=d" (data[3])
-               : "a"(selector));
+               : "a"(selector),
+                 "b" (0),
+                 "c" (0),
+                 "d" (0));
 }
 
 /*
@@ -209,10 +223,22 @@ typedef struct {
        boolean_t       sensor;
        boolean_t       dynamic_acceleration;
        boolean_t       invariant_APIC_timer;
+       boolean_t       core_power_limits;
+       boolean_t       fine_grain_clock_mod;
+       boolean_t       package_thermal_intr;
        uint32_t        thresholds;
        boolean_t       ACNT_MCNT;
+       boolean_t       hardware_feedback;
+       boolean_t       energy_policy;
 } cpuid_thermal_leaf_t;
 
+
+/* XSAVE Feature Leaf: */
+typedef struct {
+       uint32_t        extended_state[4];      /* eax .. edx */
+} cpuid_xsave_leaf_t;
+
+
 /* Architectural Performance Monitoring Leaf: */
 typedef struct {
        uint8_t         version;
@@ -262,6 +288,7 @@ typedef struct {
 #define cpuid_mwait_sub_Cstates                cpuid_mwait_leaf.sub_Cstates
        cpuid_thermal_leaf_t    cpuid_thermal_leaf;
        cpuid_arch_perf_leaf_t  cpuid_arch_perf_leaf;
+       cpuid_xsave_leaf_t      cpuid_xsave_leaf;
 
        /* Cache details: */
        uint32_t        cpuid_cache_linesize;
@@ -294,7 +321,7 @@ typedef struct {
        cpuid_mwait_leaf_t      *cpuid_mwait_leafp;     
        cpuid_thermal_leaf_t    *cpuid_thermal_leafp;
        cpuid_arch_perf_leaf_t  *cpuid_arch_perf_leafp;
-
+       cpuid_xsave_leaf_t      *cpuid_xsave_leafp;
 } i386_cpu_info_t;
 
 #ifdef __cplusplus
index aacc02ebc12baf2502f6975d48267849ec39917f..72d3c94b7510c9c083006b342d964d7d6f95ffef 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * XXX a better implementation would use a set of generic callouts and iterate over them
  */
 void
-etimer_intr(
-__unused int inuser,
-__unused uint64_t iaddr)
+etimer_intr(int                user_mode,
+           uint64_t    rip)
 {
        uint64_t                abstime;
        rtclock_timer_t         *mytimer;
        cpu_data_t              *pp;
-       x86_lcpu_t              *lcpu;
+       int32_t                 latency;
+       uint64_t                pmdeadline;
 
        pp = current_cpu_datap();
-       lcpu = x86_lcpu();
-
-       mytimer = &pp->rtclock_timer;                           /* Point to the event timer */
-       abstime = mach_absolute_time();                         /* Get the time now */
 
-       /* is it time for power management state change? */     
-       if (pmCPUGetDeadline(pp) <= abstime) {
-               KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) | DBG_FUNC_START, 0, 0, 0, 0, 0);
-               pmCPUDeadline(pp);
-               KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) | DBG_FUNC_END, 0, 0, 0, 0, 0);
-
-               abstime = mach_absolute_time();                 /* Get the time again since we ran a bit */
-       }
+       abstime = mach_absolute_time();                 /* Get the time now */
 
        /* has a pending clock timer expired? */
-       if (mytimer->deadline <= abstime) {                     /* Have we expired the deadline? */
-               mytimer->has_expired = TRUE;                    /* Remember that we popped */
+       mytimer = &pp->rtclock_timer;
+       if (mytimer->deadline <= abstime) {
+               /*
+                * Log interrupt service latency (-ve value expected by tool)
+                * a non-PM event is expected next.
+                */
+               latency = (int32_t) (abstime - mytimer->deadline);
+               KERNEL_DEBUG_CONSTANT(
+                   MACHDBG_CODE(DBG_MACH_EXCP_DECI, 0) | DBG_FUNC_NONE,
+                   -latency,
+                   (uint32_t)rip, user_mode, 0, 0);
+
+               mytimer->has_expired = TRUE;            /* Remember that we popped */
                mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime);
                mytimer->has_expired = FALSE;
+
+               /* Get the time again since we ran for a bit */
+               abstime = mach_absolute_time();
+       }
+
+       /* is it time for power management state change? */
+       if ((pmdeadline = pmCPUGetDeadline(pp)) && (pmdeadline <= abstime)) {
+               KERNEL_DEBUG_CONSTANT(
+                   MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) | DBG_FUNC_START,
+                   0, 0, 0, 0, 0);
+               pmCPUDeadline(pp);
+               KERNEL_DEBUG_CONSTANT(
+                   MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) | DBG_FUNC_END,
+                   0, 0, 0, 0, 0);
        }
 
-       /* schedule our next deadline */
-       lcpu->rtcPop = EndOfAllTime;                            /* any real deadline will be earlier */
        etimer_resync_deadlines();
 }
 
@@ -110,11 +122,11 @@ void etimer_set_deadline(uint64_t deadline)
        spl_t                   s;
        cpu_data_t              *pp;
 
-       s = splclock();                                 /* no interruptions */
+       s = splclock();                         /* no interruptions */
        pp = current_cpu_datap();
 
-       mytimer = &pp->rtclock_timer;                   /* Point to the timer itself */
-       mytimer->deadline = deadline;                   /* Set the new expiration time */
+       mytimer = &pp->rtclock_timer;           /* Point to the timer itself */
+       mytimer->deadline = deadline;           /* Set the new expiration time */
 
        etimer_resync_deadlines();
 
@@ -134,44 +146,37 @@ etimer_resync_deadlines(void)
        rtclock_timer_t         *mytimer;
        spl_t                   s = splclock();
        cpu_data_t              *pp;
-       x86_lcpu_t              *lcpu;
+       uint32_t                decr;
 
        pp = current_cpu_datap();
-       lcpu = x86_lcpu();
-       deadline = ~0ULL;
+       deadline = EndOfAllTime;
 
        /*
-        * If we have a clock timer set sooner, pop on that.
+        * If we have a clock timer set, pick that.
         */
        mytimer = &pp->rtclock_timer;
-       if (!mytimer->has_expired && mytimer->deadline > 0)
+       if (!mytimer->has_expired &&
+           0 < mytimer->deadline && mytimer->deadline < EndOfAllTime)
                deadline = mytimer->deadline;
 
        /*
         * If we have a power management deadline, see if that's earlier.
         */
        pmdeadline = pmCPUGetDeadline(pp);
-       if (pmdeadline > 0 && pmdeadline < deadline)
+       if (0 < pmdeadline && pmdeadline < deadline)
            deadline = pmdeadline;
 
        /*
         * Go and set the "pop" event.
         */
-       if (deadline > 0) {
-               int     decr;
-               uint64_t now;
-
-               now = mach_absolute_time();
-               decr = setPop(deadline);
-
-               if (deadline < now)
-                       lcpu->rtcPop = now + decr;
-               else
-                       lcpu->rtcPop = deadline;
-
-               lcpu->rtcDeadline = lcpu->rtcPop;
-
-               KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) | DBG_FUNC_NONE, decr, 2, 0, 0, 0);
+       decr = (uint32_t) setPop(deadline);
+
+       /* Record non-PM deadline for latency tool */
+       if (deadline != pmdeadline) {
+               KERNEL_DEBUG_CONSTANT(
+                   MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) | DBG_FUNC_NONE,
+                   decr, 2,
+                   deadline, (uint32_t)(deadline >> 32), 0);
        }
        splx(s);
 }
@@ -185,10 +190,8 @@ __unused void                      *arg)
        rtclock_timer_t         *mytimer;
        uint64_t                        abstime;
        cpu_data_t                      *pp;
-       x86_lcpu_t                      *lcpu;
 
        pp = current_cpu_datap();
-       lcpu = x86_lcpu();
 
        mytimer = &pp->rtclock_timer;
        abstime = mach_absolute_time();
@@ -197,7 +200,6 @@ __unused void                       *arg)
        mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime);
        mytimer->has_expired = FALSE;
 
-       lcpu->rtcPop = EndOfAllTime;
        etimer_resync_deadlines();
 }
 
index 5c458843be1a6fda95cb6691fdc3868199aa6967..7b4be4ebefbe56d4b42530dd655ee9c2823f862e 100644 (file)
@@ -70,6 +70,8 @@
 #include <kern/spl.h>
 #include <kern/assert.h>
 
+#include <libkern/OSAtomic.h>
+
 #include <architecture/i386/pio.h>
 #include <i386/cpuid.h>
 #include <i386/fpu.h>
@@ -91,59 +93,122 @@ extern void                fp_save(
 extern void            fp_load(
                                thread_t        thr_act);
 
-static void configure_mxcsr_capability_mask(struct x86_fpsave_state *ifps);
+static void configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps);
 
-struct x86_fpsave_state starting_fp_state;
+struct x86_avx_thread_state initial_fp_state __attribute((aligned(64)));
 
 
 /* Global MXCSR capability bitmask */
 static unsigned int mxcsr_capability_mask;
 
+#define        fninit() \
+       __asm__ volatile("fninit")
+
+#define        fnstcw(control) \
+       __asm__("fnstcw %0" : "=m" (*(unsigned short *)(control)))
+
+#define        fldcw(control) \
+       __asm__ volatile("fldcw %0" : : "m" (*(unsigned short *) &(control)) )
+
+#define        fnclex() \
+       __asm__ volatile("fnclex")
+
+#define        fnsave(state)  \
+       __asm__ volatile("fnsave %0" : "=m" (*state))
+
+#define        frstor(state) \
+       __asm__ volatile("frstor %0" : : "m" (state))
+
+#define fwait() \
+       __asm__("fwait");
+
+#define fxrstor(addr)           __asm__ __volatile__("fxrstor %0" : : "m" (*(addr)))     
+#define fxsave(addr)            __asm__ __volatile__("fxsave %0" : "=m" (*(addr)))
+
+static uint32_t        fp_register_state_size = 0;
+static uint32_t fpu_YMM_present        = FALSE;
+static uint32_t        cpuid_reevaluated = 0;
+
+static void fpu_store_registers(void *, boolean_t);
+static void fpu_load_registers(void *);
+
+extern void xsave64o(void);
+extern void xrstor64o(void);
+
+#define XMASK ((uint32_t) (XFEM_X87 | XFEM_SSE | XFEM_YMM))
+
+/* DRK: TODO replace opcodes with mnemonics when assembler support available */
+
+static inline void xsetbv(uint32_t mask_hi, uint32_t mask_lo) {
+       __asm__ __volatile__(".short 0x010F\n\t.byte 0xD1" :: "a"(mask_lo), "d"(mask_hi), "c" (XCR0));
+}
+
+static inline void xsave(void *a) {
+       /* MOD 0x4, operand ECX 0x1 */
+       __asm__ __volatile__(".short 0xAE0F\n\t.byte 0x21" :: "a"(XMASK), "d"(0), "c" (a));
+}
+
+static inline void xrstor(void *a) {
+       /* MOD 0x5, operand ECX 0x1 */
+       __asm__ __volatile__(".short 0xAE0F\n\t.byte 0x29" :: "a"(XMASK), "d"(0), "c" (a));
+}
+
+static inline void xsave64(void *a) {
+       /* Out of line call that executes in 64-bit mode on K32 */
+       __asm__ __volatile__("call _xsave64o" :: "a"(XMASK), "d"(0), "c" (a));
+}
+
+static inline void xrstor64(void *a) {
+       /* Out of line call that executes in 64-bit mode on K32 */
+       __asm__ __volatile__("call _xrstor64o" :: "a"(XMASK), "d"(0), "c" (a));
+}
+
+static inline unsigned short
+fnstsw(void)
+{
+       unsigned short status;
+       __asm__ volatile("fnstsw %0" : "=ma" (status));
+       return(status);
+}
+
 /*
+ * Configure the initial FPU state presented to new threads.
  * Determine the MXCSR capability mask, which allows us to mask off any
  * potentially unsafe "reserved" bits before restoring the FPU context.
  * *Not* per-cpu, assumes symmetry.
  */
+
 static void
-configure_mxcsr_capability_mask(struct x86_fpsave_state *ifps)
+configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps)
 {
-       /* FXSAVE requires a 16 byte aligned store */
-       assert(ALIGNED(ifps,16));
+       /* XSAVE requires a 64 byte aligned store */
+       assert(ALIGNED(fps, 64));
        /* Clear, to prepare for the diagnostic FXSAVE */
-       bzero(ifps, sizeof(*ifps));
-       /* Disable FPU/SSE Device Not Available exceptions */
-       clear_ts();
-       __asm__ volatile("fxsave %0" : "=m" (ifps->fx_save_state));
-       mxcsr_capability_mask = ifps->fx_save_state.fx_MXCSR_MASK;
+       bzero(fps, sizeof(*fps));
+
+       fpinit();
+       fpu_store_registers(fps, FALSE);
+
+       mxcsr_capability_mask = fps->fx_MXCSR_MASK;
 
        /* Set default mask value if necessary */
        if (mxcsr_capability_mask == 0)
                mxcsr_capability_mask = 0xffbf;
        
-       /* Re-enable FPU/SSE DNA exceptions */
-       set_ts();
-}
-
-/*
- * Allocate and initialize FP state for current thread.
- * Don't load state.
- */
-static struct x86_fpsave_state *
-fp_state_alloc(void)
-{
-       struct x86_fpsave_state *ifps;
+       /* Clear vector register store */
+       bzero(&fps->fx_XMM_reg[0][0], sizeof(fps->fx_XMM_reg));
+       bzero(&fps->x_YMMH_reg[0][0], sizeof(fps->x_YMMH_reg));
 
-       ifps = (struct x86_fpsave_state *)zalloc(ifps_zone);
-       assert(ALIGNED(ifps,16));
-       bzero((char *)ifps, sizeof *ifps);
+       fps->fp_valid = TRUE;
+       fps->fp_save_layout = fpu_YMM_present ? XSAVE32: FXSAVE32;
+       fpu_load_registers(fps);
 
-       return ifps;
-}
+       /* Poison values to trap unsafe usage */
+       fps->fp_valid = 0xFFFFFFFF;
+       fps->fp_save_layout = FP_UNUSED;
 
-static inline void
-fp_state_free(struct x86_fpsave_state *ifps)
-{
-       zfree(ifps_zone, ifps);
+       /* Re-enable FPU/SSE DNA exceptions */
+       set_ts();
 }
 
 
@@ -154,81 +219,248 @@ fp_state_free(struct x86_fpsave_state *ifps)
 void
 init_fpu(void)
 {
-       unsigned short  status, control;
-
+#if    DEBUG   
+       unsigned short  status;
+       unsigned short  control;
+#endif
        /*
         * Check for FPU by initializing it,
         * then trying to read the correct bit patterns from
         * the control and status registers.
         */
        set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | CR0_NE);       /* allow use of FPU */
-
        fninit();
+#if    DEBUG   
        status = fnstsw();
        fnstcw(&control);
+       
+       assert(((status & 0xff) == 0) && ((control & 0x103f) == 0x3f));
+#endif
+       /* Advertise SSE support */
+       if (cpuid_features() & CPUID_FEATURE_FXSR) {
+               fp_kind = FP_FXSR;
+               set_cr4(get_cr4() | CR4_OSFXS);
+               /* And allow SIMD exceptions if present */
+               if (cpuid_features() & CPUID_FEATURE_SSE) {
+                       set_cr4(get_cr4() | CR4_OSXMM);
+               }
+               fp_register_state_size = sizeof(struct x86_fx_thread_state);
 
-       if ((status & 0xff) == 0 &&
-           (control & 0x103f) == 0x3f) 
-        {
-           /* Use FPU save/restore instructions if available */
-               if (cpuid_features() & CPUID_FEATURE_FXSR) {
-               fp_kind = FP_FXSR;
-                       set_cr4(get_cr4() | CR4_FXS);
-                       /* And allow SIMD instructions if present */
-                       if (cpuid_features() & CPUID_FEATURE_SSE) {
-                       set_cr4(get_cr4() | CR4_XMM);
-                       }
-           } else
-                       panic("fpu is not FP_FXSR");
+       } else
+               panic("fpu is not FP_FXSR");
 
-           /*
-            * initialze FPU to normal starting 
-            * position so that we can take a snapshot
-            * of that state and store it for future use
-            * when we're asked for the FPU state of a 
-            * thread, and it hasn't initiated any yet
-            */
-            fpinit();
-            fxsave(&starting_fp_state.fx_save_state);
+       /* Configure the XSAVE context mechanism if the processor supports
+        * AVX/YMM registers
+        */
+       if (cpuid_features() & CPUID_FEATURE_XSAVE) {
+               cpuid_xsave_leaf_t *xsp = &cpuid_info()->cpuid_xsave_leaf;
+               if (xsp->extended_state[0] & (uint32_t)XFEM_YMM) {
+                       assert(xsp->extended_state[0] & (uint32_t) XFEM_SSE);
+                       /* XSAVE container size for all features */
+                       assert(xsp->extended_state[2] == sizeof(struct x86_avx_thread_state));
+                       fp_register_state_size = sizeof(struct x86_avx_thread_state);
+                       fpu_YMM_present = TRUE;
+                       set_cr4(get_cr4() | CR4_OSXSAVE);
+                       xsetbv(0, XMASK);
+                       /* Re-evaluate CPUID, once, to reflect OSXSAVE */
+                       if (OSCompareAndSwap(0, 1, &cpuid_reevaluated))
+                               cpuid_set_info();
+                       /* DRK: consider verifying AVX offset with cpuid(d, ECX:2) */
+               }
+       }
+       else
+               fpu_YMM_present = FALSE;
+
+       fpinit();
+
+       /*
+        * Trap wait instructions.  Turn off FPU for now.
+        */
+       set_cr0(get_cr0() | CR0_TS | CR0_MP);
+}
+
+/*
+ * Allocate and initialize FP state for current thread.
+ * Don't load state.
+ */
+static void *
+fp_state_alloc(void)
+{
+       void *ifps = zalloc(ifps_zone);
 
-            /*
-             * Trap wait instructions.  Turn off FPU for now.
-             */
-            set_cr0(get_cr0() | CR0_TS | CR0_MP);
+#if    DEBUG   
+       if (!(ALIGNED(ifps,64))) {
+               panic("fp_state_alloc: %p, %u, %p, %u", ifps, (unsigned) ifps_zone->elem_size, (void *) ifps_zone->free_elements, (unsigned) ifps_zone->alloc_size);
        }
+#endif
+       return ifps;
+}
+
+static inline void
+fp_state_free(void *ifps)
+{
+       zfree(ifps_zone, ifps);
+}
+
+void clear_fpu(void)
+{
+       set_ts();
+}
+
+
+static void fpu_load_registers(void *fstate) {
+       struct x86_fx_thread_state *ifps = fstate;
+       fp_save_layout_t layout = ifps->fp_save_layout;
+
+       assert(layout == FXSAVE32 || layout == FXSAVE64 || layout == XSAVE32 || layout == XSAVE64);
+       assert(ALIGNED(ifps, 64));
+       assert(ml_get_interrupts_enabled() == FALSE);
+
+#if    DEBUG   
+       if (layout == XSAVE32 || layout == XSAVE64) {
+               struct x86_avx_thread_state *iavx = fstate;
+               unsigned i;
+               /* Verify reserved bits in the XSAVE header*/
+               if (iavx->_xh.xsbv & ~7)
+                       panic("iavx->_xh.xsbv: 0x%llx", iavx->_xh.xsbv);
+               for (i = 0; i < sizeof(iavx->_xh.xhrsvd); i++)
+                       if (iavx->_xh.xhrsvd[i])
+                               panic("Reserved bit set");
+       }
+       if (fpu_YMM_present) {
+               if (layout != XSAVE32 && layout != XSAVE64)
+                       panic("Inappropriate layout: %u\n", layout);
+       }
+#endif /* DEBUG */
+
+#if defined(__i386__)
+       if (layout == FXSAVE32) {
+               /* Restore the compatibility/legacy mode XMM+x87 state */
+               fxrstor(ifps);
+       }
+       else if (layout == FXSAVE64) {
+               fxrstor64(ifps);
+       }
+       else if (layout == XSAVE32) {
+               xrstor(ifps);
+       }
+       else if (layout == XSAVE64) {
+               xrstor64(ifps);
+       }
+#elif defined(__x86_64__)
+       if ((layout == XSAVE64) || (layout == XSAVE32))
+               xrstor(ifps);
        else
-       {
-           /*
-            * NO FPU.
-            */
-               panic("fpu is not FP_FXSR");
+               fxrstor(ifps);
+#endif
+}
+
+static void fpu_store_registers(void *fstate, boolean_t is64) {
+       struct x86_fx_thread_state *ifps = fstate;
+       assert(ALIGNED(ifps, 64));
+#if defined(__i386__)
+       if (!is64) {
+               if (fpu_YMM_present) {
+                       xsave(ifps);
+                       ifps->fp_save_layout = XSAVE32;
+               }
+               else {
+                       /* save the compatibility/legacy mode XMM+x87 state */
+                       fxsave(ifps);
+                       ifps->fp_save_layout = FXSAVE32;
+               }
+       }
+       else {
+               if (fpu_YMM_present) {
+                       xsave64(ifps);
+                       ifps->fp_save_layout = XSAVE64;
+               }
+               else {
+                       fxsave64(ifps);
+                       ifps->fp_save_layout = FXSAVE64;
+               }
+       }
+#elif defined(__x86_64__)
+       if (fpu_YMM_present) {
+               xsave(ifps);
+               ifps->fp_save_layout = is64 ? XSAVE64 : XSAVE32;
        }
+       else {
+               fxsave(ifps);
+               ifps->fp_save_layout = is64 ? FXSAVE64 : FXSAVE32;
+       }
+#endif
 }
 
 /*
  * Initialize FP handling.
  */
+
 void
 fpu_module_init(void)
 {
-       struct x86_fpsave_state *new_ifps;
-       
-       ifps_zone = zinit(sizeof(struct x86_fpsave_state),
-                         thread_max * sizeof(struct x86_fpsave_state),
-                         THREAD_CHUNK * sizeof(struct x86_fpsave_state),
+       if ((fp_register_state_size != sizeof(struct x86_fx_thread_state)) &&
+           (fp_register_state_size != sizeof(struct x86_avx_thread_state)))
+               panic("fpu_module_init: incorrect savearea size %u\n", fp_register_state_size);
+
+       assert(fpu_YMM_present != 0xFFFFFFFF);
+
+       /* We explicitly choose an allocation size of 64
+        * to eliminate waste for the 832 byte sized
+        * AVX XSAVE register save area.
+        */
+       ifps_zone = zinit(fp_register_state_size,
+                         thread_max * fp_register_state_size,
+                         64 * fp_register_state_size,
                          "x86 fpsave state");
-       new_ifps = fp_state_alloc();
-       /* Determine MXCSR reserved bits */
-       configure_mxcsr_capability_mask(new_ifps);
-       fp_state_free(new_ifps);
+
+#if    ZONE_DEBUG
+       /* To maintain the required alignment, disable
+        * zone debugging for this zone as that appends
+        * 16 bytes to each element.
+        */
+       zone_debug_disable(ifps_zone);
+#endif 
+       /* Determine MXCSR reserved bits and configure initial FPU state*/
+       configure_mxcsr_capability_mask(&initial_fp_state);
+}
+
+/*
+ * Save thread`s FPU context.
+ */
+void
+fpu_save_context(thread_t thread)
+{
+       struct x86_fx_thread_state *ifps;
+
+       assert(ml_get_interrupts_enabled() == FALSE);
+       ifps = (thread)->machine.pcb->ifps;
+#if    DEBUG
+       if (ifps && ((ifps->fp_valid != FALSE) && (ifps->fp_valid != TRUE))) {
+               panic("ifps->fp_valid: %u\n", ifps->fp_valid);
+       }
+#endif
+       if (ifps != 0 && (ifps->fp_valid == FALSE)) {
+               /* Clear CR0.TS in preparation for the FP context save. In
+                * theory, this shouldn't be necessary since a live FPU should
+                * indicate that TS is clear. However, various routines
+                * (such as sendsig & sigreturn) manipulate TS directly.
+                */
+               clear_ts();
+               /* registers are in FPU - save to memory */
+               fpu_store_registers(ifps, (thread_is_64bit(thread) && is_saved_state64(thread->machine.pcb->iss)));
+               ifps->fp_valid = TRUE;
+       }
+       set_ts();
 }
 
+
 /*
  * Free a FPU save area.
  * Called only when thread terminating - no locking necessary.
  */
 void
-fpu_free(struct x86_fpsave_state *fps)
+fpu_free(void *fps)
 {
        fp_state_free(fps);
 }
@@ -244,14 +476,16 @@ fpu_free(struct x86_fpsave_state *fps)
  */
 kern_return_t
 fpu_set_fxstate(
-       thread_t                thr_act,
-       thread_state_t  tstate)
+       thread_t        thr_act,
+       thread_state_t  tstate,
+       thread_flavor_t f)
 {
-       struct x86_fpsave_state *ifps;
-       struct x86_fpsave_state *new_ifps;
+       struct x86_fx_thread_state *ifps;
+       struct x86_fx_thread_state *new_ifps;
        x86_float_state64_t     *state;
        pcb_t   pcb;
-
+       size_t  state_size = (((f == x86_AVX_STATE32) || (f == x86_AVX_STATE64)) && (fpu_YMM_present == TRUE)) ? sizeof(struct x86_avx_thread_state) : sizeof(struct x86_fx_thread_state);
+       boolean_t       old_valid;
        if (fp_kind == FP_NO)
            return KERN_FAILURE;
 
@@ -291,28 +525,46 @@ fpu_set_fxstate(
                }
                ifps = new_ifps;
                new_ifps = 0;
-                       pcb->ifps = ifps;
+               pcb->ifps = ifps;
            }
            /*
             * now copy over the new data.
             */
-            bcopy((char *)&state->fpu_fcw,
-                     (char *)&ifps->fx_save_state, sizeof(struct x86_fx_save));
+           old_valid = ifps->fp_valid;
 
-               /* XXX The layout of the state set from user-space may need to be
-                * validated for consistency.
-                */
+#if    DEBUG       
+           if ((old_valid == FALSE) && (thr_act != current_thread())) {
+                   panic("fpu_set_fxstate inconsistency, thread: %p not stopped", thr_act);
+           }
+#endif
+
+           bcopy((char *)&state->fpu_fcw, (char *)ifps, state_size);
+
+           if (fpu_YMM_present) {
+               struct x86_avx_thread_state *iavx = (void *) ifps;
+               iavx->fp_save_layout = thread_is_64bit(thr_act) ? XSAVE64 : XSAVE32;
+               /* Sanitize XSAVE header */
+               bzero(&iavx->_xh.xhrsvd[0], sizeof(iavx->_xh.xhrsvd));
+               if (state_size == sizeof(struct x86_avx_thread_state))
+                       iavx->_xh.xsbv = (XFEM_YMM | XFEM_SSE | XFEM_X87);
+               else
+                       iavx->_xh.xsbv = (XFEM_SSE | XFEM_X87);
+           }
+           else
                ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32;
-               /* Mark the thread's floating point status as non-live. */
-               /* Temporarily disabled: radar 4647827
-                * ifps->fp_valid = TRUE;
-                */
+           ifps->fp_valid = old_valid;
 
+           if (old_valid == FALSE) {
+                   boolean_t istate = ml_set_interrupts_enabled(FALSE);
+                   ifps->fp_valid = TRUE;
+                   set_ts();
+                   ml_set_interrupts_enabled(istate);
+           }
                /*
                 * Clear any reserved bits in the MXCSR to prevent a GPF
                 * when issuing an FXRSTOR.
                 */
-           ifps->fx_save_state.fx_MXCSR &= mxcsr_capability_mask;
+           ifps->fx_MXCSR &= mxcsr_capability_mask;
 
            simple_unlock(&pcb->lock);
 
@@ -330,13 +582,15 @@ fpu_set_fxstate(
  */
 kern_return_t
 fpu_get_fxstate(
-       thread_t                                thr_act,
-       thread_state_t  tstate)
+       thread_t        thr_act,
+       thread_state_t  tstate,
+       thread_flavor_t f)
 {
-       struct x86_fpsave_state *ifps;
+       struct x86_fx_thread_state      *ifps;
        x86_float_state64_t     *state;
        kern_return_t   ret = KERN_FAILURE;
        pcb_t   pcb;
+       size_t  state_size = (((f == x86_AVX_STATE32) || (f == x86_AVX_STATE64)) && (fpu_YMM_present == TRUE)) ? sizeof(struct x86_avx_thread_state) : sizeof(struct x86_fx_thread_state);
 
        if (fp_kind == FP_NO)
                return KERN_FAILURE;
@@ -353,8 +607,9 @@ fpu_get_fxstate(
                /*
                 * No valid floating-point state.
                 */
-               bcopy((char *)&starting_fp_state.fx_save_state,
-                     (char *)&state->fpu_fcw, sizeof(struct x86_fx_save));
+
+               bcopy((char *)&initial_fp_state, (char *)&state->fpu_fcw,
+                   state_size);
 
                simple_unlock(&pcb->lock);
 
@@ -376,8 +631,7 @@ fpu_get_fxstate(
                (void)ml_set_interrupts_enabled(intr);
        }
        if (ifps->fp_valid) {
-               bcopy((char *)&ifps->fx_save_state,
-                     (char *)&state->fpu_fcw, sizeof(struct x86_fx_save));
+               bcopy((char *)ifps, (char *)&state->fpu_fcw, state_size);
                ret = KERN_SUCCESS;
        }
        simple_unlock(&pcb->lock);
@@ -399,8 +653,8 @@ fpu_dup_fxstate(
        thread_t        parent,
        thread_t        child)
 {
-       struct x86_fpsave_state *new_ifps = NULL;
-        boolean_t      intr;
+       struct x86_fx_thread_state *new_ifps = NULL;
+       boolean_t       intr;
        pcb_t           ppcb;
 
        ppcb = parent->machine.pcb;
@@ -416,33 +670,35 @@ fpu_dup_fxstate(
        simple_lock(&ppcb->lock);
 
        if (ppcb->ifps != NULL) {
+               struct x86_fx_thread_state *ifps = ppcb->ifps;
                /*
                 * Make sure we`ve got the latest fp state info
                 */
                intr = ml_set_interrupts_enabled(FALSE);
-
+               assert(current_thread() == parent);
                clear_ts();
                fp_save(parent);
                clear_fpu();
 
                (void)ml_set_interrupts_enabled(intr);
 
-               if (ppcb->ifps->fp_valid) {
-                       child->machine.pcb->ifps = new_ifps;
+               if (ifps->fp_valid) {
+                       child->machine.pcb->ifps = new_ifps;
+                       assert((fp_register_state_size == sizeof(struct x86_fx_thread_state)) ||
+                           (fp_register_state_size == sizeof(struct x86_avx_thread_state)));
+                       bcopy((char *)(ppcb->ifps),
+                           (char *)(child->machine.pcb->ifps), fp_register_state_size);
 
-                       bcopy((char *)&(ppcb->ifps->fx_save_state),
-                             (char *)&(child->machine.pcb->ifps->fx_save_state), sizeof(struct x86_fx_save));
-
-                       new_ifps->fp_save_layout = ppcb->ifps->fp_save_layout;
                        /* Mark the new fp saved state as non-live. */
                        /* Temporarily disabled: radar 4647827
                         * new_ifps->fp_valid = TRUE;
                         */
+
                        /*
                         * Clear any reserved bits in the MXCSR to prevent a GPF
                         * when issuing an FXRSTOR.
                         */
-                       new_ifps->fx_save_state.fx_MXCSR &= mxcsr_capability_mask;
+                       new_ifps->fx_MXCSR &= mxcsr_capability_mask;
                        new_ifps = NULL;
                }
        }
@@ -457,6 +713,7 @@ fpu_dup_fxstate(
  * Initialize FPU.
  *
  */
+
 void
 fpinit(void)
 {
@@ -477,7 +734,7 @@ fpinit(void)
        fldcw(control);
 
        /* Initialize SSE/SSE2 */
-               __builtin_ia32_ldmxcsr(0x1f80);
+       __builtin_ia32_ldmxcsr(0x1f80);
 }
 
 /*
@@ -490,14 +747,24 @@ fpnoextflt(void)
        boolean_t       intr;
        thread_t        thr_act;
        pcb_t           pcb;
-       struct x86_fpsave_state *ifps = 0;
+       struct x86_fx_thread_state *ifps = 0;
 
        thr_act = current_thread();
        pcb = thr_act->machine.pcb;
 
-       if (pcb->ifps == 0 && !get_interrupt_level())
-               ifps = fp_state_alloc();
+       assert(fp_register_state_size != 0);
 
+       if (pcb->ifps == 0 && !get_interrupt_level()) {
+               ifps = fp_state_alloc();
+               bcopy((char *)&initial_fp_state, (char *)ifps,
+                   fp_register_state_size);
+               if (!thread_is_64bit(thr_act)) {
+                       ifps->fp_save_layout = fpu_YMM_present ? XSAVE32 : FXSAVE32;
+               }
+               else
+                       ifps->fp_save_layout = fpu_YMM_present ? XSAVE64 : FXSAVE64;
+               ifps->fp_valid = TRUE;
+       }
        intr = ml_set_interrupts_enabled(FALSE);
 
        clear_ts();                     /*  Enable FPU use */
@@ -535,7 +802,7 @@ fpextovrflt(void)
 {
        thread_t        thr_act = current_thread();
        pcb_t           pcb;
-       struct x86_fpsave_state *ifps;
+       struct x86_fx_thread_state *ifps;
        boolean_t       intr;
 
        intr = ml_set_interrupts_enabled(FALSE);
@@ -586,7 +853,7 @@ void
 fpexterrflt(void)
 {
        thread_t        thr_act = current_thread();
-       struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps;
+       struct x86_fx_thread_state *ifps = thr_act->machine.pcb->ifps;
        boolean_t       intr;
 
        intr = ml_set_interrupts_enabled(FALSE);
@@ -610,7 +877,7 @@ fpexterrflt(void)
         */
        i386_exception(EXC_ARITHMETIC,
                       EXC_I386_EXTERR,
-                      ifps->fx_save_state.fx_status);
+                      ifps->fx_status);
 
        /*NOTREACHED*/
 }
@@ -630,27 +897,14 @@ fp_save(
        thread_t        thr_act)
 {
        pcb_t pcb = thr_act->machine.pcb;
-       struct x86_fpsave_state *ifps = pcb->ifps;
+       struct x86_fx_thread_state *ifps = pcb->ifps;
 
+       assert(ifps != 0);
        if (ifps != 0 && !ifps->fp_valid) {
                assert((get_cr0() & CR0_TS) == 0);
                /* registers are in FPU */
                ifps->fp_valid = TRUE;
-
-#if defined(__i386__)
-               if (!thread_is_64bit(thr_act)) {
-                       /* save the compatibility/legacy mode XMM+x87 state */
-                       fxsave(&ifps->fx_save_state);
-                       ifps->fp_save_layout = FXSAVE32;
-               }
-               else {
-                       fxsave64(&ifps->fx_save_state);
-                       ifps->fp_save_layout = FXSAVE64;
-               }
-#elif defined(__x86_64__)
-               fxsave(&ifps->fx_save_state);
-               ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32;
-#endif
+               fpu_store_registers(ifps, thread_is_64bit(thr_act));
        }
 }
 
@@ -665,50 +919,19 @@ fp_load(
        thread_t        thr_act)
 {
        pcb_t pcb = thr_act->machine.pcb;
-       struct x86_fpsave_state *ifps;
+       struct x86_fx_thread_state *ifps = pcb->ifps;
 
-       ifps = pcb->ifps;
-       if (ifps == 0 || ifps->fp_valid == FALSE) {
-               if (ifps == 0) {
-                       /* FIXME: This allocation mechanism should be revised
-                        * for scenarios where interrupts are disabled.
-                        */
-                       ifps = fp_state_alloc();
-                       pcb->ifps = ifps;
-               }
+       assert(ifps);
+       assert(ifps->fp_valid == FALSE || ifps->fp_valid == TRUE);
+
+       if (ifps->fp_valid == FALSE) {
                fpinit();
        } else {
-               assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64);
-#if defined(__i386__)
-               if (ifps->fp_save_layout == FXSAVE32) {
-                       /* Restore the compatibility/legacy mode XMM+x87 state */
-                       fxrstor(&ifps->fx_save_state);
-               }
-               else if (ifps->fp_save_layout == FXSAVE64) {
-                       fxrstor64(&ifps->fx_save_state);
-               }
-#elif defined(__x86_64__)
-               fxrstor(&ifps->fx_save_state);
-#endif
+               fpu_load_registers(ifps);
        }
        ifps->fp_valid = FALSE;         /* in FPU */
 }
 
-
-
-/*
- * fpflush(thread_t)
- *     Flush the current act's state, if needed
- *     (used by thread_terminate_self to ensure fp faults
- *     aren't satisfied by overly general trap code in the
- *     context of the reaper thread)
- */
-void
-fpflush(__unused thread_t thr_act)
-{
-       /* not needed on MP x86s; fp not lazily evaluated */
-}
-
 /*
  * SSE arithmetic exception handling code.
  * Basically the same as the x87 exception handler with a different subtype
@@ -718,7 +941,7 @@ void
 fpSSEexterrflt(void)
 {
        thread_t        thr_act = current_thread();
-       struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps;
+       struct x86_fx_thread_state *ifps = thr_act->machine.pcb->ifps;
        boolean_t       intr;
 
        intr = ml_set_interrupts_enabled(FALSE);
@@ -742,20 +965,27 @@ fpSSEexterrflt(void)
        assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64);
        i386_exception(EXC_ARITHMETIC,
                       EXC_I386_SSEEXTERR,
-                      ifps->fx_save_state.fx_MXCSR);
+                      ifps->fx_MXCSR);
        /*NOTREACHED*/
 }
 
-
 void
 fp_setvalid(boolean_t value) {
         thread_t       thr_act = current_thread();
-       struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps;
+       struct x86_fx_thread_state *ifps = thr_act->machine.pcb->ifps;
 
        if (ifps) {
                ifps->fp_valid = value;
 
-               if (value == TRUE)
+               if (value == TRUE) {
+                       boolean_t istate = ml_set_interrupts_enabled(FALSE);
                        clear_fpu();
+                       ml_set_interrupts_enabled(istate);
+               }
        }
 }
+
+boolean_t
+ml_fpu_avx_enabled(void) {
+       return (fpu_YMM_present == TRUE);
+}
index 7b6f86a826fed74c55d39010f9671c42067a420d..a606aab41376ecf269151ed954be16e8fe5c77ec 100644 (file)
@@ -73,13 +73,15 @@ extern int          fp_kind;
 extern void            init_fpu(void);
 extern void            fpu_module_init(void);
 extern void            fpu_free(
-                               struct x86_fpsave_state * fps);
+                               void    * fps);
 extern kern_return_t   fpu_set_fxstate(
                                thread_t        thr_act,
-                               thread_state_t  state);
+                               thread_state_t  state,
+                               thread_flavor_t f);
 extern kern_return_t   fpu_get_fxstate(
                                thread_t        thr_act,
-                               thread_state_t  state);
+                               thread_state_t  state,
+                               thread_flavor_t f);
 extern void            fpu_dup_fxstate(
                                thread_t        parent,
                                thread_t        child);
@@ -90,96 +92,11 @@ extern void         fpSSEexterrflt(void);
 extern void            fpflush(thread_t);
 extern void            fp_setvalid(boolean_t);
 #ifdef __i386__
-extern void            fxsave64(struct x86_fx_save *);
-extern void            fxrstor64(struct x86_fx_save *);
+extern void            fxsave64(struct x86_fx_thread_state *);
+extern void            fxrstor64(struct x86_fx_thread_state *);
 #endif
 
-/*
- * FPU instructions.
- */
-#define        fninit() \
-       __asm__ volatile("fninit")
-
-#define        fnstcw(control) \
-       __asm__("fnstcw %0" : "=m" (*(unsigned short *)(control)))
-
-#define        fldcw(control) \
-       __asm__ volatile("fldcw %0" : : "m" (*(unsigned short *) &(control)) )
-
-static inline unsigned short
-fnstsw(void)
-{
-       unsigned short status;
-       __asm__ volatile("fnstsw %0" : "=ma" (status));
-       return(status);
-}
-
-#define        fnclex() \
-       __asm__ volatile("fnclex")
-
-#define        fnsave(state)  \
-       __asm__ volatile("fnsave %0" : "=m" (*state))
-
-#define        frstor(state) \
-       __asm__ volatile("frstor %0" : : "m" (state))
-
-#define fwait() \
-       __asm__("fwait");
-
-#define fxrstor(addr)           __asm("fxrstor %0" : : "m" (*(addr)))     
-#define fxsave(addr)            __asm __volatile("fxsave %0" : "=m" (*(addr)))
-
-#define FXSAFE() (fp_kind == FP_FXSR)
-
-
-static inline void clear_fpu(void)
-{
-       set_ts();
-}
-
-
-/*
- * Save thread`s FPU context.
- */
-
-static inline void fpu_save_context(thread_t thread)
-{
-       struct x86_fpsave_state *ifps;
-
-       assert(ml_get_interrupts_enabled() == FALSE);
-       ifps = (thread)->machine.pcb->ifps;
-       if (ifps != 0 && !ifps->fp_valid) {
-               /* Clear CR0.TS in preparation for the FP context save. In
-                * theory, this shouldn't be necessary since a live FPU should
-                * indicate that TS is clear. However, various routines
-                * (such as sendsig & sigreturn) manipulate TS directly.
-                */
-               clear_ts();
-               /* registers are in FPU - save to memory */
-               ifps->fp_valid = TRUE;
-
-#if defined(__i386__)
-               if (!thread_is_64bit(thread) || is_saved_state32(thread->machine.pcb->iss)) {
-                       /* save the compatibility/legacy mode XMM+x87 state */
-                       fxsave(&ifps->fx_save_state);
-                       ifps->fp_save_layout = FXSAVE32;
-               }
-               else {
-                       /* Execute a brief jump to 64-bit mode to save the 64
-                        * bit state
-                        */
-                       fxsave64(&ifps->fx_save_state);
-                       ifps->fp_save_layout = FXSAVE64;
-               }
-#elif defined(__x86_64__)
-               /* for a 64-bit long mode kernel, we can always use plain fxsave */
-               fxsave(&ifps->fx_save_state);
-               ifps->fp_save_layout = thread_is_64bit(thread) ? FXSAVE64
-                       : FXSAVE32;
-
-#endif
-       }
-       set_ts();
-}
+extern void clear_fpu(void);
+extern void fpu_save_context(thread_t thread);
 
 #endif /* _I386_FPU_H_ */
index a254013ddc597bd6c2e5cf75bef0e8c5ffae3853..0f5edf0e581d8cafb53a6eb94da26153df52c9da 100644 (file)
@@ -263,7 +263,7 @@ main(
        DECLARE("DS64_DR6",     offsetof(struct x86_debug_state64 *, dr6));
        DECLARE("DS64_DR7",     offsetof(struct x86_debug_state64 *, dr7));
 
-       DECLARE("FP_VALID",     offsetof(struct x86_fpsave_state *,fp_valid));
+       DECLARE("FP_VALID",     offsetof(struct x86_fx_thread_state *,fp_valid));
 
        DECLARE("SS_FLAVOR",    offsetof(x86_saved_state_t *, flavor));
        DECLARE("SS_32",        x86_SAVED_STATE32);
@@ -431,6 +431,8 @@ main(
                offsetof(cpu_data_t *, cpu_hibernate));
         DECLARE("CPU_INTERRUPT_LEVEL",
                offsetof(cpu_data_t *, cpu_interrupt_level));
+       DECLARE("CPU_NESTED_ISTACK",
+           offsetof(cpu_data_t *, cpu_nested_istack));
         DECLARE("CPU_SIMPLE_LOCK_COUNT",
                offsetof(cpu_data_t *,cpu_simple_lock_count));
         DECLARE("CPU_NUMBER_GS",
index 135f7a94272230b12b9ca3b4138cac970593d448..445c6afedf4dda5d4829a1ebe2e0a9fcc4f92375 100644 (file)
@@ -504,14 +504,14 @@ i386_init(vm_offset_t boot_args_start)
        if ( ! PE_parse_boot_argn("novmx", &noVMX, sizeof (noVMX)))
                noVMX = 0;      /* OK to support Altivec in rosetta? */
 
-       /* create the console for verbose or pretty mode */
-       /* Note: doing this prior to tsc_init() allows for graceful panic! */
-       PE_init_platform(TRUE, kernelBootArgs);
-       PE_create_console();
-
        tsc_init();
        power_management_init();
 
+       PE_init_platform(TRUE, kernelBootArgs);
+
+       /* create the console for verbose or pretty mode */
+       PE_create_console();
+
        processor_bootstrap();
        thread_bootstrap();
 
index bddcb54c7030fe0d3d7d14438b5e4145a7ebedf4..301d02274f77f0851b0353bd7fde9af961f245a1 100644 (file)
@@ -106,6 +106,10 @@ ppnum_t            lowest_lo = 0;
 ppnum_t                lowest_hi = 0;
 ppnum_t                highest_hi = 0;
 
+uint32_t pmap_reserved_pages_allocated = 0;
+uint32_t pmap_last_reserved_range = 0xFFFFFFFF;
+uint32_t pmap_reserved_ranges = 0;
+
 extern unsigned int bsd_mbuf_cluster_reserve(boolean_t *);
 
 pmap_paddr_t     avail_start, avail_end;
@@ -288,7 +292,6 @@ i386_vm_init(uint64_t       maxmem,
                        sane_size += region_bytes;
                        break;
 
-
                case kEfiReservedMemoryType:
                        firmware_Reserved_bytes += region_bytes;
                        break;
@@ -339,10 +342,31 @@ i386_vm_init(uint64_t     maxmem,
                                        pmptr->base = base;
                                else
                                        pmptr->base = I386_LOWMEM_RESERVED;
+
+                               pmptr->end = top;
+
                                /*
-                                * mark as already mapped
+                                * A range may be marked with with the
+                                * EFI_MEMORY_KERN_RESERVED attribute
+                                * on some systems, to indicate that the range
+                                * must not be made available to devices.
+                                * Simplifying assumptions are made regarding
+                                * the placement of the range.
                                 */
-                               pmptr->alloc = pmptr->end = top;
+                               if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED)
+                                       pmap_reserved_ranges++;
+
+                               if ((mptr->Attribute & EFI_MEMORY_KERN_RESERVED) &&
+                                   (top < I386_KERNEL_IMAGE_BASE_PAGE)) {
+                                       pmptr->alloc = pmptr->base;
+                                       pmap_last_reserved_range = pmap_memory_region_count;
+                               }
+                               else {
+                                       /*
+                                        * mark as already mapped
+                                        */
+                                       pmptr->alloc = top;
+                               }
                                pmptr->type = pmap_type;
                        }
                        else if ( (base < fap) && (top > fap) ) {
@@ -552,6 +576,38 @@ pmap_free_pages(void)
        return (unsigned int)avail_remaining;
 }
 
+boolean_t pmap_next_page_reserved(ppnum_t *);
+
+/*
+ * Pick a page from a "kernel private" reserved range; works around
+ * errata on some hardware.
+ */
+boolean_t
+pmap_next_page_reserved(ppnum_t *pn) {
+       if (pmap_reserved_ranges && pmap_last_reserved_range != 0xFFFFFFFF) {
+               uint32_t n;
+               pmap_memory_region_t *region;
+               for (n = 0; n <= pmap_last_reserved_range; n++) {
+                       region = &pmap_memory_regions[n];
+                       if (region->alloc < region->end) {
+                               *pn = region->alloc++;
+                               avail_remaining--;
+
+                               if (*pn > max_ppnum)
+                                       max_ppnum = *pn;
+
+                               if (lowest_lo == 0 || *pn < lowest_lo)
+                                       lowest_lo = *pn;
+
+                               pmap_reserved_pages_allocated++;
+                               return TRUE;
+                       }
+               }
+       }
+       return FALSE;
+}
+
+
 boolean_t
 pmap_next_page_hi(
                  ppnum_t *pn)
@@ -559,6 +615,9 @@ pmap_next_page_hi(
        pmap_memory_region_t *region;
        int     n;
 
+       if (pmap_next_page_reserved(pn))
+               return TRUE;
+
        if (avail_remaining) {
                for (n = pmap_memory_region_count - 1; n >= 0; n--) {
                        region = &pmap_memory_regions[n];
@@ -694,10 +753,15 @@ pmap_lowmem_finalize(void)
         *      entry in the memory region table. However, the loop is retained
         *      (with the intended termination criteria commented out) in the
         *      hope that some day we can free all low-memory ranges.
+        *      This loop assumes the first range does not span the kernel
+        *      image base & avail_start. We skip this process on systems
+        *      with "kernel reserved" ranges, as the low memory reclamation
+        *      is handled in the initial memory map processing loop on
+        *      such systems.
         */
        for (i = 0;
 //          pmap_memory_regions[i].end <= I386_KERNEL_IMAGE_BASE_PAGE;
-            i < 1;
+            i < 1 && (pmap_reserved_ranges == 0);
             i++) {
                vm_offset_t     pbase = (vm_offset_t)i386_ptob(pmap_memory_regions[i].base);
                vm_offset_t     pend  = (vm_offset_t)i386_ptob(pmap_memory_regions[i].end);
index 40892578bf4b5917bb5999ddbcb408e086820b95..1c141785addd65e47dcc0dfd7a2b57f2ebf9ab88 100644 (file)
@@ -69,7 +69,7 @@ extern vm_offset_t    virtual_avail;
  * Mach VM is running.
  */
 vm_offset_t
-io_map(vm_offset_t phys_addr, vm_size_t size, unsigned int flags)
+io_map(vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags)
 {
        vm_offset_t     start;
 
@@ -95,7 +95,7 @@ io_map(vm_offset_t phys_addr, vm_size_t size, unsigned int flags)
 
 /* just wrap this since io_map handles it */
 
-vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size, unsigned int flags)
+vm_offset_t io_map_spec(vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags)
 {
   return (io_map(phys_addr, size, flags));
 }
index 0913ba060fff6eb9f21c7eb89009a91c13714aef..3dc373b4745d002c9144687630e37c04660f50c5 100644 (file)
 #ifdef __APPLE_API_PRIVATE
 __BEGIN_DECLS
 extern vm_offset_t     io_map(
-                               vm_offset_t             phys_addr,
+                               vm_map_offset_t         phys_addr,
                                vm_size_t               size,
                                unsigned int            flags);
-extern vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size, unsigned int flags);
+extern vm_offset_t io_map_spec(vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags);
 __END_DECLS
 #endif /* __APPLE_API_PRIVATE */
 
index 21e974bff2763ca898cbcd4ac78d4bb97bb804b5..e98665f044f76dd56a2ac2da534ed2343383ddc7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <i386/machine_cpu.h>
 #include <i386/misc_protos.h>
 #include <i386/mp.h>
-#include <i386/mtrr.h>
 #include <i386/postcode.h>
 #include <i386/cpu_threads.h>
 #include <i386/machine_routines.h>
+#include <i386/tsc.h>
 #if CONFIG_MCA
 #include <i386/machine_check.h>
 #endif
 #define PAUSE
 #endif /* MP_DEBUG */
 
-/* Initialize lapic_id so cpu_number() works on non SMP systems */
-unsigned long  lapic_id_initdata = 0;
-unsigned long  lapic_id = (unsigned long)&lapic_id_initdata;
-vm_offset_t    lapic_start;
+/* Base vector for local APIC interrupt sources */
+int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE;
+
+lapic_ops_table_t      *lapic_ops;     /* Lapic operations switch */
+
+#define                MAX_LAPICIDS    (LAPIC_ID_MAX+1)
+int            lapic_to_cpu[MAX_LAPICIDS];
+int            cpu_to_lapic[MAX_CPUS];
+
+static vm_offset_t     lapic_pbase;    /* Physical base memory-mapped regs */
+static vm_offset_t     lapic_vbase;    /* Virtual base memory-mapped regs */
 
 static i386_intr_func_t        lapic_intr_func[LAPIC_FUNC_TABLE_SIZE];
 
@@ -91,13 +98,6 @@ static unsigned lapic_master_error_count = 0;
 static unsigned lapic_error_count_threshold = 5;
 static boolean_t lapic_dont_panic = FALSE;
 
-/* Base vector for local APIC interrupt sources */
-int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE;
-
-#define                MAX_LAPICIDS    (LAPIC_ID_MAX+1)
-int            lapic_to_cpu[MAX_LAPICIDS];
-int            cpu_to_lapic[MAX_CPUS];
-
 static void
 lapic_cpu_map_init(void)
 {
@@ -147,9 +147,8 @@ ml_get_cpuid(uint32_t lapic_index)
 
 }
 
-
 #ifdef MP_DEBUG
-static void
+void
 lapic_cpu_map_dump(void)
 {
        int     i;
@@ -169,48 +168,105 @@ lapic_cpu_map_dump(void)
 }
 #endif /* MP_DEBUG */
 
-void
-lapic_init(void)
+static void
+legacy_init(void)
 {
        int             result;
        vm_map_entry_t  entry;
-       uint32_t        lo;
-       uint32_t        hi;
-       boolean_t       is_boot_processor;
-       boolean_t       is_lapic_enabled;
-       vm_offset_t     lapic_base;
-
-       /* Examine the local APIC state */
-       rdmsr(MSR_IA32_APIC_BASE, lo, hi);
-       is_boot_processor = (lo & MSR_IA32_APIC_BASE_BSP) != 0;
-       is_lapic_enabled  = (lo & MSR_IA32_APIC_BASE_ENABLE) != 0;
-       lapic_base = (lo &  MSR_IA32_APIC_BASE_BASE);
-       kprintf("MSR_IA32_APIC_BASE %p %s %s\n", (void *) lapic_base,
-               is_lapic_enabled ? "enabled" : "disabled",
-               is_boot_processor ? "BSP" : "AP");
-       if (!is_boot_processor || !is_lapic_enabled)
-               panic("Unexpected local APIC state\n");
 
        /* Establish a map to the local apic */
-       lapic_start = (vm_offset_t)vm_map_min(kernel_map);
+       lapic_vbase = (vm_offset_t)vm_map_min(kernel_map);
        result = vm_map_find_space(kernel_map,
-                                  (vm_map_address_t *) &lapic_start,
+                                  (vm_map_address_t *) &lapic_vbase,
                                   round_page(LAPIC_SIZE), 0,
                                   VM_MAKE_TAG(VM_MEMORY_IOKIT), &entry);
        if (result != KERN_SUCCESS) {
-               panic("smp_init: vm_map_find_entry FAILED (err=%d)", result);
+               panic("legacy_init: vm_map_find_entry FAILED (err=%d)", result);
        }
        vm_map_unlock(kernel_map);
 /* Map in the local APIC non-cacheable, as recommended by Intel
  * in section 8.4.1 of the "System Programming Guide".
  */
        pmap_enter(pmap_kernel(),
-                       lapic_start,
-                       (ppnum_t) i386_btop(lapic_base),
+                       lapic_vbase,
+                       (ppnum_t) i386_btop(lapic_pbase),
                        VM_PROT_READ|VM_PROT_WRITE,
                        VM_WIMG_IO,
                        TRUE);
-       lapic_id = (unsigned long)(lapic_start + LAPIC_ID);
+}
+
+
+static uint32_t
+legacy_read(lapic_register_t reg)
+{
+       return  *LAPIC_MMIO(reg);
+}
+
+static void
+legacy_write(lapic_register_t reg, uint32_t value)
+{
+       *LAPIC_MMIO(reg) = value;
+}
+
+static lapic_ops_table_t legacy_ops = {
+       legacy_init,
+       legacy_read,
+       legacy_write
+};
+
+static void
+x2apic_init(void)
+{
+}
+
+static uint32_t
+x2apic_read(lapic_register_t reg)
+{
+       uint32_t        lo;
+       uint32_t        hi;
+
+       rdmsr(LAPIC_MSR(reg), lo, hi);
+       return lo;
+}
+
+static void
+x2apic_write(lapic_register_t reg, uint32_t value)
+{
+       wrmsr(LAPIC_MSR(reg), value, 0);
+}
+
+static lapic_ops_table_t x2apic_ops = {
+       x2apic_init,
+       x2apic_read,
+       x2apic_write
+};
+
+
+void
+lapic_init(void)
+{
+       uint32_t        lo;
+       uint32_t        hi;
+       boolean_t       is_boot_processor;
+       boolean_t       is_lapic_enabled;
+       boolean_t       is_x2apic;
+
+       /* Examine the local APIC state */
+       rdmsr(MSR_IA32_APIC_BASE, lo, hi);
+       is_boot_processor = (lo & MSR_IA32_APIC_BASE_BSP) != 0;
+       is_lapic_enabled  = (lo & MSR_IA32_APIC_BASE_ENABLE) != 0;
+       is_x2apic         = (lo & MSR_IA32_APIC_BASE_EXTENDED) != 0;
+       lapic_pbase = (lo &  MSR_IA32_APIC_BASE_BASE);
+       kprintf("MSR_IA32_APIC_BASE %p %s %s mode %s\n", (void *) lapic_pbase,
+               is_lapic_enabled ? "enabled" : "disabled",
+               is_x2apic ? "extended" : "legacy",
+               is_boot_processor ? "BSP" : "AP");
+       if (!is_boot_processor || !is_lapic_enabled)
+               panic("Unexpected local APIC state\n");
+
+       lapic_ops = is_x2apic ? &x2apic_ops : &legacy_ops;
+
+       lapic_ops->init();
 
        if ((LAPIC_READ(VERSION)&LAPIC_VERSION_MASK) < 0x14) {
                panic("Local APIC version 0x%x, 0x14 or more expected\n",
@@ -249,6 +305,13 @@ static const char *DM_str[8] = {
        "Invalid",
        "ExtINT"};
 
+static const char *TMR_str[] = {
+       "OneShot",
+       "Periodic",
+       "TSC-Deadline",
+       "Illegal"
+};
+
 void
 lapic_dump(void)
 {
@@ -270,7 +333,7 @@ lapic_dump(void)
 
        kprintf("LAPIC %d at %p version 0x%x\n", 
                (LAPIC_READ(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK,
-               (void *) lapic_start,
+               (void *) lapic_vbase,
                LAPIC_READ(VERSION)&LAPIC_VERSION_MASK);
        kprintf("Priorities: Task 0x%x  Arbitration 0x%x  Processor 0x%x\n",
                LAPIC_READ(TPR)&LAPIC_TPR_MASK,
@@ -295,7 +358,8 @@ lapic_dump(void)
                VEC(LVT_TIMER),
                DS(LVT_TIMER),
                MASK(LVT_TIMER),
-               (LAPIC_READ(LVT_TIMER)&LAPIC_LVT_PERIODIC)?"Periodic":"OneShot");
+               TMR_str[(LAPIC_READ(LVT_TIMER) >> LAPIC_LVT_TMR_SHIFT)
+                                               &  LAPIC_LVT_TMR_MASK]);
        kprintf("  Initial Count: 0x%08x \n", LAPIC_READ(TIMER_INITIAL_COUNT));
        kprintf("  Current Count: 0x%08x \n", LAPIC_READ(TIMER_CURRENT_COUNT));
        kprintf("  Divide Config: 0x%08x \n", LAPIC_READ(TIMER_DIVIDE_CONFIG));
@@ -334,15 +398,15 @@ lapic_dump(void)
        kprintf("\n");
        kprintf("TMR: 0x");
        for(i=7; i>=0; i--)
-               kprintf("%08x",LAPIC_READ_OFFSET(TMR_BASE, i*0x10));
+               kprintf("%08x",LAPIC_READ_OFFSET(TMR_BASE, i));
        kprintf("\n");
        kprintf("IRR: 0x");
        for(i=7; i>=0; i--)
-               kprintf("%08x",LAPIC_READ_OFFSET(IRR_BASE, i*0x10));
+               kprintf("%08x",LAPIC_READ_OFFSET(IRR_BASE, i));
        kprintf("\n");
        kprintf("ISR: 0x");
        for(i=7; i >= 0; i--)
-               kprintf("%08x",LAPIC_READ_OFFSET(ISR_BASE, i*0x10));
+               kprintf("%08x",LAPIC_READ_OFFSET(ISR_BASE, i));
        kprintf("\n");
 }
 
@@ -501,10 +565,9 @@ lapic_set_timer(
        lapic_timer_divide_t    divisor,
        lapic_timer_count_t     initial_count)
 {
-       boolean_t       state;
        uint32_t        timer_vector;
 
-       state = ml_set_interrupts_enabled(FALSE);
+       mp_disable_preemption();
        timer_vector = LAPIC_READ(LVT_TIMER);
        timer_vector &= ~(LAPIC_LVT_MASKED|LAPIC_LVT_PERIODIC);;
        timer_vector |= interrupt_unmasked ? 0 : LAPIC_LVT_MASKED;
@@ -512,7 +575,73 @@ lapic_set_timer(
        LAPIC_WRITE(LVT_TIMER, timer_vector);
        LAPIC_WRITE(TIMER_DIVIDE_CONFIG, divisor);
        LAPIC_WRITE(TIMER_INITIAL_COUNT, initial_count);
-       ml_set_interrupts_enabled(state);
+       mp_enable_preemption();
+}
+
+void
+lapic_config_timer(
+       boolean_t               interrupt_unmasked,
+       lapic_timer_mode_t      mode,
+       lapic_timer_divide_t    divisor)
+{
+       uint32_t        timer_vector;
+
+       mp_disable_preemption();
+       timer_vector = LAPIC_READ(LVT_TIMER);
+       timer_vector &= ~(LAPIC_LVT_MASKED |
+                         LAPIC_LVT_PERIODIC |
+                         LAPIC_LVT_TSC_DEADLINE);
+       timer_vector |= interrupt_unmasked ? 0 : LAPIC_LVT_MASKED;
+       timer_vector |= (mode == periodic) ? LAPIC_LVT_PERIODIC : 0;
+       LAPIC_WRITE(LVT_TIMER, timer_vector);
+       LAPIC_WRITE(TIMER_DIVIDE_CONFIG, divisor);
+       mp_enable_preemption();
+}
+
+/*
+ * Configure TSC-deadline timer mode. The lapic interrupt is always unmasked.
+ */
+void
+lapic_config_tsc_deadline_timer(void)
+{
+       uint32_t        timer_vector;
+
+       DBG("lapic_config_tsc_deadline_timer()\n");
+       mp_disable_preemption();
+       timer_vector = LAPIC_READ(LVT_TIMER);
+       timer_vector &= ~(LAPIC_LVT_MASKED |
+                         LAPIC_LVT_PERIODIC);
+       timer_vector |= LAPIC_LVT_TSC_DEADLINE;
+       LAPIC_WRITE(LVT_TIMER, timer_vector);
+
+       /* Serialize writes per Intel OSWG */
+       do {
+               lapic_set_tsc_deadline_timer(rdtsc64() + (1ULL<<32));
+       } while (lapic_get_tsc_deadline_timer() == 0);
+       lapic_set_tsc_deadline_timer(0);
+
+       mp_enable_preemption();
+       DBG("lapic_config_tsc_deadline_timer() done\n");
+}
+
+void
+lapic_set_timer_fast(
+       lapic_timer_count_t     initial_count)
+{
+       LAPIC_WRITE(LVT_TIMER, LAPIC_READ(LVT_TIMER) & ~LAPIC_LVT_MASKED);
+       LAPIC_WRITE(TIMER_INITIAL_COUNT, initial_count);
+}
+
+void
+lapic_set_tsc_deadline_timer(uint64_t deadline)
+{
+       wrmsr64(MSR_IA32_TSC_DEADLINE, deadline);
+}
+
+uint64_t
+lapic_get_tsc_deadline_timer(void)
+{
+       return rdmsr64(MSR_IA32_TSC_DEADLINE);
 }
 
 void
@@ -522,9 +651,7 @@ lapic_get_timer(
        lapic_timer_count_t     *initial_count,
        lapic_timer_count_t     *current_count)
 {
-       boolean_t       state;
-
-       state = ml_set_interrupts_enabled(FALSE);
+       mp_disable_preemption();
        if (mode)
                *mode = (LAPIC_READ(LVT_TIMER) & LAPIC_LVT_PERIODIC) ?
                                periodic : one_shot;
@@ -534,7 +661,7 @@ lapic_get_timer(
                *initial_count = LAPIC_READ(TIMER_INITIAL_COUNT);
        if (current_count)
                *current_count = LAPIC_READ(TIMER_CURRENT_COUNT);
-       ml_set_interrupts_enabled(state);
+       mp_enable_preemption();
 } 
 
 static inline void
@@ -553,6 +680,11 @@ void lapic_unmask_perfcnt_interrupt(void) {
        LAPIC_WRITE(LVT_PERFCNT, LAPIC_VECTOR(PERFCNT));
 }
 
+void lapic_set_perfcnt_interrupt_mask(boolean_t mask) {
+       uint32_t m = (mask ? LAPIC_LVT_MASKED : 0);
+       LAPIC_WRITE(LVT_PERFCNT, LAPIC_VECTOR(PERFCNT) | m);
+}
+
 void
 lapic_set_intr_func(int vector, i386_intr_func_t func)
 {
@@ -575,6 +707,10 @@ lapic_set_intr_func(int vector, i386_intr_func_t func)
        }
 }
 
+void   lapic_set_pmi_func(i386_intr_func_t func) {
+       lapic_set_intr_func(LAPIC_VECTOR(PERFCNT), func);
+}
+
 int
 lapic_interrupt(int interrupt_num, x86_saved_state_t *state)
 {
@@ -586,7 +722,6 @@ lapic_interrupt(int interrupt_num, x86_saved_state_t *state)
                if (interrupt_num == (LAPIC_NMI_INTERRUPT - lapic_interrupt_base) &&
                    lapic_intr_func[LAPIC_NMI_INTERRUPT] != NULL) {
                        retval = (*lapic_intr_func[LAPIC_NMI_INTERRUPT])(state);
-                       _lapic_end_of_interrupt();
                        return retval;
                }
                else
@@ -744,3 +879,88 @@ lapic_send_ipi(int cpu, int vector)
 
        (void) ml_set_interrupts_enabled(state);
 }
+
+/*
+ * The following interfaces are privately exported to AICPM.
+ */
+
+boolean_t
+lapic_is_interrupt_pending(void)
+{
+       int             i;
+
+       for (i = 0; i < 8; i += 1) {
+               if ((LAPIC_READ_OFFSET(IRR_BASE, i) != 0) ||
+                   (LAPIC_READ_OFFSET(ISR_BASE, i) != 0))
+                       return (TRUE);
+       }
+
+       return (FALSE);
+}
+
+boolean_t
+lapic_is_interrupting(uint8_t vector)
+{
+       int             i;
+       int             bit;
+       uint32_t        irr;
+       uint32_t        isr;
+
+       i = vector / 32;
+       bit = 1 << (vector % 32);
+
+       irr = LAPIC_READ_OFFSET(IRR_BASE, i);
+       isr = LAPIC_READ_OFFSET(ISR_BASE, i);
+
+       if ((irr | isr) & bit)
+               return (TRUE);
+
+       return (FALSE);
+}
+
+void
+lapic_interrupt_counts(uint64_t intrs[256])
+{
+       int             i;
+       int             j;
+       int             bit;
+       uint32_t        irr;
+       uint32_t        isr;
+
+       if (intrs == NULL)
+               return;
+
+       for (i = 0; i < 8; i += 1) {
+               irr = LAPIC_READ_OFFSET(IRR_BASE, i);
+               isr = LAPIC_READ_OFFSET(ISR_BASE, i);
+
+               if ((isr | irr) == 0)
+                       continue;
+
+               for (j = (i == 0) ? 16 : 0; j < 32; j += 1) {
+                       bit = (32 * i) + j;
+                       if ((isr | irr) & (1 << j))
+                               intrs[bit] += 1;
+               }
+       }
+}
+
+void
+lapic_disable_timer(void)
+{
+       uint32_t        lvt_timer;
+
+       /*
+         * If we're in deadline timer mode,
+        * simply clear the deadline timer, otherwise
+        * mask the timer interrupt and clear the countdown.
+         */
+       lvt_timer = LAPIC_READ(LVT_TIMER);
+       if (lvt_timer & LAPIC_LVT_TSC_DEADLINE) {
+               wrmsr64(MSR_IA32_TSC_DEADLINE, 0);
+       } else {
+               LAPIC_WRITE(LVT_TIMER, lvt_timer | LAPIC_LVT_MASKED);
+               LAPIC_WRITE(TIMER_INITIAL_COUNT, 0);
+               lvt_timer = LAPIC_READ(LVT_TIMER);
+       }
+}
index e8387bb211c87600717f60c2910b52396f18206b..655864230384b4eb609158226009919edbf86840 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #ifndef _I386_LAPIC_H_
 #define _I386_LAPIC_H_
 
+/*
+ * Legacy mode definitions.
+ * The register offsets are no longer used by XNU - see LAPIC_MMIO_OFFSET().
+ */
 #define LAPIC_START                    0xFEE00000
 #define LAPIC_SIZE                     0x00000400
 
 #define                LAPIC_LVT_TM_LEVEL      0x08000
 #define                LAPIC_LVT_MASKED        0x10000
 #define                LAPIC_LVT_PERIODIC      0x20000
+#define                LAPIC_LVT_TSC_DEADLINE  0x40000
+#define                LAPIC_LVT_TMR_SHIFT     17
+#define                LAPIC_LVT_TMR_MASK      3
 #define LAPIC_TIMER_INITIAL_COUNT      0x00000380
 #define LAPIC_TIMER_CURRENT_COUNT      0x00000390
 #define LAPIC_TIMER_DIVIDE_CONFIG      0x000003E0
 #define CPU_NUMBER(r)                          \
        movl    %gs:CPU_NUMBER_GS,r
 
-#define CPU_NUMBER_FROM_LAPIC(r)               \
-       movl    EXT(lapic_id),r;                \
-       movl    0(r),r;                         \
-       shrl    $(LAPIC_ID_SHIFT),r;            \
-       andl    $(LAPIC_ID_MASK),r;             \
-       movl    EXT(lapic_to_cpu)(,r,4),r
-
 #ifndef        ASSEMBLER
-#include <stdint.h>
-#include <sys/cdefs.h>
-#include <mach/boolean.h>
-#include <mach/kern_return.h>
+typedef enum {
+       ID                      = 0x02,
+       VERSION                 = 0x03,
+       TPR                     = 0x08,
+       APR                     = 0x09,
+       PPR                     = 0x0A,
+       EOI                     = 0x0B,
+       REMOTE_READ             = 0x0C,
+       LDR                     = 0x0D,
+       DFR                     = 0x0E,
+       SVR                     = 0x0F,
+       ISR_BASE                = 0x10,
+       TMR_BASE                = 0x18,
+       IRR_BASE                = 0x20,
+       ERROR_STATUS            = 0x28,
+       LVT_CMCI                = 0x2F,
+       ICR                     = 0x30,
+       ICRD                    = 0x31,
+       LVT_TIMER               = 0x32,
+       LVT_THERMAL             = 0x33,
+       LVT_PERFCNT             = 0x34,
+       LVT_LINT0               = 0x35,
+       LVT_LINT1               = 0x36,
+       LVT_ERROR               = 0x37,
+       TIMER_INITIAL_COUNT     = 0x38,
+       TIMER_CURRENT_COUNT     = 0x39,
+       TIMER_DIVIDE_CONFIG     = 0x3E,
+} lapic_register_t;
+
+#define LAPIC_MMIO_PBASE       0xFEE00000      /* Default physical MMIO addr */
+#define LAPIC_MMIO_VBASE       lapic_vbase     /* Actual virtual mapped addr */
+#define LAPIC_MSR_BASE         0x800
+
+#define        LAPIC_MMIO_OFFSET(reg)  (reg << 4)
+#define        LAPIC_MSR_OFFSET(reg)   (reg)
+
+#define        LAPIC_MMIO(reg)         ((volatile uint32_t *) \
+                                       (LAPIC_MMIO_VBASE + LAPIC_MMIO_OFFSET(reg)))
+#define        LAPIC_MSR(reg)          (LAPIC_MSR_BASE + LAPIC_MSR_OFFSET(reg))
+
+typedef struct {
+       void            (*init) (void);
+       uint32_t        (*read) (lapic_register_t);
+       void            (*write)(lapic_register_t, uint32_t);
+} lapic_ops_table_t;
+extern  lapic_ops_table_t *lapic_ops;
+
+#define LAPIC_WRITE(reg,val)           lapic_ops->write(reg, val)
+#define LAPIC_READ(reg)                        lapic_ops->read(reg)
+#define LAPIC_READ_OFFSET(reg,off)     LAPIC_READ((reg)+(off))
+
 typedef enum {
        periodic,
        one_shot
@@ -186,22 +233,13 @@ typedef uint32_t lapic_timer_count_t;
 #define LAPIC_NMI_INTERRUPT            0x2
 #define LAPIC_FUNC_TABLE_SIZE          (LAPIC_PERFCNT_INTERRUPT + 1)
 
-#define LAPIC_WRITE(reg,val) \
-       *((volatile uint32_t *)(lapic_start + LAPIC_##reg)) = (val)
-#define LAPIC_READ(reg) \
-       (*((volatile uint32_t *)(lapic_start + LAPIC_##reg)))
-#define LAPIC_READ_OFFSET(reg,off) \
-       (*((volatile uint32_t *)(lapic_start + LAPIC_##reg + (off))))
-
 #define LAPIC_VECTOR(src) \
        (lapic_interrupt_base + LAPIC_##src##_INTERRUPT)
 
 #define LAPIC_ISR_IS_SET(base,src) \
-       (LAPIC_READ_OFFSET(ISR_BASE,((base+LAPIC_##src##_INTERRUPT)/32)*0x10) \
+       (LAPIC_READ_OFFSET(ISR_BASE,(base+LAPIC_##src##_INTERRUPT)/32) \
                & (1 <<((base + LAPIC_##src##_INTERRUPT)%32)))
 
-extern vm_offset_t     lapic_start;
-
 extern void            lapic_init(void);
 extern void            lapic_configure(void);
 extern void            lapic_shutdown(void);
@@ -212,6 +250,7 @@ extern int          lapic_interrupt(
                                int interrupt, x86_saved_state_t *state);
 extern void            lapic_end_of_interrupt(void);
 extern void            lapic_unmask_perfcnt_interrupt(void);
+extern void            lapic_set_perfcnt_interrupt_mask(boolean_t);
 extern void            lapic_send_ipi(int cpu, int interupt);
 
 extern int             lapic_to_cpu[];
@@ -221,6 +260,14 @@ extern void                lapic_cpu_map(int lapic, int cpu_num);
 extern uint32_t                ml_get_apicid(uint32_t cpu);
 extern uint32_t                ml_get_cpuid(uint32_t lapic_index);
 
+extern void            lapic_config_timer(
+                               boolean_t               interrupt,
+                               lapic_timer_mode_t      mode,
+                               lapic_timer_divide_t    divisor);
+
+extern void            lapic_set_timer_fast(
+                               lapic_timer_count_t     initial_count);
+
 extern void            lapic_set_timer(
                                boolean_t               interrupt,
                                lapic_timer_mode_t      mode,
@@ -233,17 +280,20 @@ extern void               lapic_get_timer(
                                lapic_timer_count_t     *initial_count,
                                lapic_timer_count_t     *current_count);
 
+extern void            lapic_config_tsc_deadline_timer(void);
+extern void            lapic_set_tsc_deadline_timer(uint64_t deadline);
+extern uint64_t                lapic_get_tsc_deadline_timer(void);
+
 typedef        int (*i386_intr_func_t)(x86_saved_state_t *state);
 extern void            lapic_set_intr_func(int intr, i386_intr_func_t func);
 
+extern void            lapic_set_pmi_func(i386_intr_func_t);
+
 static inline void     lapic_set_timer_func(i386_intr_func_t func)
 {
        lapic_set_intr_func(LAPIC_VECTOR(TIMER), func);
 }
-static inline void     lapic_set_pmi_func(i386_intr_func_t func)
-{
-       lapic_set_intr_func(LAPIC_VECTOR(PERFCNT), func);
-}
+
 static inline void     lapic_set_thermal_func(i386_intr_func_t func)
 {
        lapic_set_intr_func(LAPIC_VECTOR(THERMAL), func);
@@ -257,7 +307,13 @@ static inline void lapic_set_pm_func(i386_intr_func_t func)
        lapic_set_intr_func(LAPIC_VECTOR(PM), func);
 }
 
+extern boolean_t       lapic_is_interrupt_pending(void);
+extern boolean_t       lapic_is_interrupting(uint8_t vector);
+extern void            lapic_interrupt_counts(uint64_t intrs[256]);
+extern void            lapic_disable_timer(void);
+
 #ifdef MP_DEBUG
+extern void            lapic_cpu_map_dump(void);
 #define LAPIC_CPU_MAP_DUMP()   lapic_cpu_map_dump()
 #define LAPIC_DUMP()           lapic_dump()
 #else
index 3d3e5a09f3505070fe265d5ae0be1737c9b56d3e..8c715d086afaaa46d6e520c20f724f0893e627d2 100644 (file)
 #include <kern/debug.h>
 #include <string.h>
 
-#include <i386/mp.h> /* mp_recent_debugger_activity() */
 #if    MACH_KDB
 #include <ddb/db_command.h>
 #include <ddb/db_output.h>
 #include <ddb/db_sym.h>
 #include <ddb/db_print.h>
 #endif /* MACH_KDB */
-
+#include <i386/machine_routines.h> /* machine_timeout_suspended() */
 #include <machine/machine_cpu.h>
+#include <i386/mp.h>
 
 #include <sys/kdebug.h>
 
@@ -319,6 +319,29 @@ usimple_lock_init(
 #endif
 }
 
+volatile uint32_t spinlock_owner_cpu = ~0;
+volatile usimple_lock_t spinlock_timed_out;
+
+static uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) {
+       uint64_t deadline;
+       uint32_t i;
+
+       for (i = 0; i < real_ncpus; i++) {
+               if ((uintptr_t)cpu_data_ptr[i]->cpu_active_thread == thread_addr) {
+                       spinlock_owner_cpu = i;
+                       if ((uint32_t)cpu_number() == i)
+                               break;
+                       cpu_datap(i)->cpu_NMI_acknowledged = FALSE;
+                       cpu_NMI_interrupt(i);
+                       deadline = mach_absolute_time() + (LockTimeOut * 2);
+                       while (mach_absolute_time() < deadline && cpu_datap(i)->cpu_NMI_acknowledged == FALSE)
+                               cpu_pause();
+                       break;
+               }
+       }
+
+       return spinlock_owner_cpu;
+}
 
 /*
  *     Acquire a usimple_lock.
@@ -336,14 +359,20 @@ usimple_lock(
 
        OBTAIN_PC(pc);
        USLDBG(usld_lock_pre(l, pc));
-
-       if(!hw_lock_to(&l->interlock, LockTimeOutTSC))  {/* Try to get the lock
-                                                         * with a timeout */
+/* Try to get the lock with a timeout */
+       if(!hw_lock_to(&l->interlock, LockTimeOutTSC))  {
                boolean_t uslock_acquired = FALSE;
-               while (mp_recent_debugger_activity() &&
-                   !(uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC)));
-               if (uslock_acquired == FALSE)
-                       panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p", l, (uintptr_t)l->interlock.lock_data, current_thread());
+               while (machine_timeout_suspended()) {
+                       enable_preemption();
+                       if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC)))
+                               break;
+       }
+               if (uslock_acquired == FALSE) {
+                       uint32_t lock_cpu;
+                       spinlock_timed_out = l;
+                       lock_cpu = spinlock_timeout_NMI((uintptr_t)l->interlock.lock_data);
+                       panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x", l, (uintptr_t)l->interlock.lock_data, current_thread(), lock_cpu);
+               }
        }
        USLDBG(usld_lock_post(l, pc));
 #else
index b58b7ece7cc834dc7bdbd42fc6060a01e829b735..65f7006c6df3c4651bcd6cc2389a7715474a0a97 100644 (file)
@@ -800,13 +800,14 @@ Entry(lo_allintrs)
 int_from_intstack:
        incl    %gs:CPU_PREEMPTION_LEVEL
        incl    %gs:CPU_INTERRUPT_LEVEL
+       incl    %gs:CPU_NESTED_ISTACK
 
        movl    %esp, %edx              /* x86_saved_state */
        CCALL1(interrupt, %edx)
 
        decl    %gs:CPU_INTERRUPT_LEVEL
        decl    %gs:CPU_PREEMPTION_LEVEL
-
+       decl    %gs:CPU_NESTED_ISTACK
        jmp     ret_to_kernel
 
 /*
index dc50a1ed158848ea3937272371f213a2887e2270..4525c8a311ff54116fc94a1ee5c73ce7896c6058 100644 (file)
@@ -46,6 +46,8 @@
 #include <mach/vm_param.h>
 #include <i386/pmap.h>
 #include <i386/misc_protos.h>
+#include <i386/mp.h>
+
 #if MACH_KDB
 #include <machine/db_machdep.h>
 #include <ddb/db_aout.h>
@@ -432,7 +434,7 @@ ml_cpu_get_info(ml_cpu_info_t *cpu_infop)
         * Are we supporting MMX/SSE/SSE2/SSE3?
         * As distinct from whether the cpu has these capabilities.
         */
-       os_supports_sse = !!(get_cr4() & CR4_XMM);
+       os_supports_sse = !!(get_cr4() & CR4_OSXMM);
        if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse)
                cpu_infop->vector_unit = 8;
        else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse)
@@ -541,7 +543,8 @@ ml_init_lock_timeout(void)
        }
        MutexSpin = (unsigned int)abstime;
 
-       nanoseconds_to_absolutetime(2 * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
+       nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
+       interrupt_latency_tracker_setup();
 }
 
 /*
@@ -646,6 +649,10 @@ vm_offset_t ml_stack_remaining(void)
        }
 }
 
+boolean_t machine_timeout_suspended(void) {
+       return (mp_recent_debugger_activity() || panic_active() || pmap_tlb_flush_timeout || spinlock_timed_out);
+}
+
 #if MACH_KDB
 
 /*
index 24c9aeca30572190f9643374b469daa3156ecaed..e222fb18d992a795f8045a3900fb90de75f2fbc0 100644 (file)
@@ -128,10 +128,15 @@ vm_offset_t ml_io_map(
 void   ml_get_bouncepool_info(
                               vm_offset_t *phys_addr,
                               vm_size_t   *size);
-
-
+/* Indicates if spinlock, IPI and other timeouts should be suspended */
+boolean_t machine_timeout_suspended(void);
 #endif /* PEXPERT_KERNEL_PRIVATE || MACH_KERNEL_PRIVATE  */
 
+
+void interrupt_latency_tracker_setup(void);
+void interrupt_reset_latency_stats(void);
+void interrupt_populate_latency_stats(char *, unsigned);
+boolean_t ml_fpu_avx_enabled(void);
 #endif /* XNU_KERNEL_PRIVATE */
 
 #ifdef KERNEL_PRIVATE
@@ -299,4 +304,6 @@ void ml_get_csw_threads(thread_t * /*old*/, thread_t * /*new*/);
 
 __END_DECLS
 
+
+
 #endif /* _I386_MACHINE_ROUTINES_H_ */
index 3e54df7b5c485e870864e73691bafb00c884c65e..71e707c0740a60b575fde2c592f320f60cdf15e2 100644 (file)
@@ -128,7 +128,7 @@ extern void rtc_nanotime_init_commpage(void);
 
 extern void    rtc_sleep_wakeup(uint64_t base);
 
-extern void rtc_lapic_start_ticking(void);
+extern void    rtc_timer_start(void);
 
 extern void    rtc_clock_stepping(
                        uint32_t new_frequency,
index 4dd1e625dcc199bd9d0a90dac7d139592903ea49..021f0638f455ae17226046f3706077bc96211c04 100644 (file)
@@ -163,6 +163,7 @@ static volatile long   mp_bc_count;
 decl_lck_mtx_data(static, mp_bc_lock);
 lck_mtx_ext_t  mp_bc_lock_ext;
 static volatile int    debugger_cpu = -1;
+volatile long NMIPI_acks = 0;
 
 static void    mp_cpus_call_action(void); 
 static void    mp_call_PM(void);
@@ -461,7 +462,12 @@ cpu_signal_handler(x86_saved_state_t *regs)
        mp_disable_preemption();
 
        my_cpu = cpu_number();
-       my_word = &current_cpu_datap()->cpu_signals;
+       my_word = &cpu_data_ptr[my_cpu]->cpu_signals;
+       /* Store the initial set of signals for diagnostics. New
+        * signals could arrive while these are being processed
+        * so it's no more than a hint.
+        */
+       cpu_data_ptr[my_cpu]->cpu_prior_signals = *my_word;
 
        do {
 #if    MACH_KDB && MACH_ASSERT
@@ -533,7 +539,8 @@ static int
 NMIInterruptHandler(x86_saved_state_t *regs)
 {
        void    *stackptr;
-       
+
+       atomic_incl(&NMIPI_acks, 1);
        sync_iss_to_iks_unconditionally(regs);
 #if defined (__i386__)
        __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr));
@@ -544,16 +551,22 @@ NMIInterruptHandler(x86_saved_state_t *regs)
        if (cpu_number() == debugger_cpu)
                        goto NMExit;
 
-       if (pmap_tlb_flush_timeout == TRUE && current_cpu_datap()->cpu_tlb_invalid) {
+       if (spinlock_timed_out) {
+               char pstr[160];
+               snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", cpu_number(), spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu);
+               panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
+               
+       } else if (pmap_tlb_flush_timeout == TRUE) {
                char pstr[128];
-               snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor\n", cpu_number());
-               panic_i386_backtrace(stackptr, 16, &pstr[0], TRUE, regs);
+               snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor, TLB state:%d\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid);
+               panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
        }
 
 #if MACH_KDP
        if (pmsafe_debug && !kdp_snapshot)
                pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
-       mp_kdp_wait(FALSE, pmap_tlb_flush_timeout);
+       current_cpu_datap()->cpu_NMI_acknowledged = TRUE;
+       mp_kdp_wait(FALSE, pmap_tlb_flush_timeout || spinlock_timed_out || panic_active());
        if (pmsafe_debug && !kdp_snapshot)
                pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
 #endif
@@ -899,7 +912,7 @@ handle_pending_TLB_flushes(void)
 {
        volatile int    *my_word = &current_cpu_datap()->cpu_signals;
 
-       if (i_bit(MP_TLB_FLUSH, my_word)) {
+       if (i_bit(MP_TLB_FLUSH, my_word) && (pmap_tlb_flush_timeout == FALSE)) {
                DBGLOG(cpu_handle, cpu_number(), MP_TLB_FLUSH);
                i_bit_clear(MP_TLB_FLUSH, my_word);
                pmap_update_interrupt();
@@ -1155,8 +1168,11 @@ mp_kdp_enter(void)
         * stopping others.
         */
        mp_kdp_state = ml_set_interrupts_enabled(FALSE);
+       my_cpu = cpu_number();
+       cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time();
+
        simple_lock(&mp_kdp_lock);
-       debugger_entry_time = mach_absolute_time();
+
        if (pmsafe_debug && !kdp_snapshot)
            pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
 
@@ -1170,8 +1186,10 @@ mp_kdp_enter(void)
        }
        my_cpu = cpu_number();
        debugger_cpu = my_cpu;
+       ncpus = 1;
        mp_kdp_ncpus = 1;       /* self */
        mp_kdp_trap = TRUE;
+       debugger_entry_time = cpu_datap(my_cpu)->debugger_entry_time;
        simple_unlock(&mp_kdp_lock);
 
        /*
@@ -1179,7 +1197,7 @@ mp_kdp_enter(void)
         */
        DBG("mp_kdp_enter() signaling other processors\n");
        if (force_immediate_debugger_NMI == FALSE) {
-               for (ncpus = 1, cpu = 0; cpu < real_ncpus; cpu++) {
+               for (cpu = 0; cpu < real_ncpus; cpu++) {
                        if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
                                continue;
                        ncpus++;
@@ -1227,7 +1245,7 @@ mp_kdp_enter(void)
                        cpu_NMI_interrupt(cpu);
                }
 
-       DBG("mp_kdp_enter() %u processors done %s\n",
+       DBG("mp_kdp_enter() %lu processors done %s\n",
            mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out");
        
        postcode(MP_KDP_ENTER);
@@ -1343,8 +1361,9 @@ mp_kdp_exit(void)
 
 boolean_t
 mp_recent_debugger_activity() {
-       return (((mach_absolute_time() - debugger_entry_time) < LastDebuggerEntryAllowance) ||
-           ((mach_absolute_time() - debugger_exit_time) < LastDebuggerEntryAllowance));
+       uint64_t abstime = mach_absolute_time();
+       return (((abstime - debugger_entry_time) < LastDebuggerEntryAllowance) ||
+           ((abstime - debugger_exit_time) < LastDebuggerEntryAllowance));
 }
 
 /*ARGSUSED*/
index 694f7c1795991cd35964fa0c3aa8ee275a279e7b..8a2abbd0a232147a2335c834f9687db8d6f3d75e 100644 (file)
@@ -105,6 +105,9 @@ extern      int     kdb_active[];
 extern volatile boolean_t mp_kdp_trap;
 extern         volatile boolean_t force_immediate_debugger_NMI;
 extern  volatile boolean_t pmap_tlb_flush_timeout;
+extern  volatile usimple_lock_t spinlock_timed_out;
+extern volatile uint32_t spinlock_owner_cpu;
+
 extern uint64_t        LastDebuggerEntryAllowance;
 
 extern void    mp_kdp_enter(void);
index fcf202e6bc21467178f1f410f3658215d5e78223..421cc3f53c1d90212cba2e433f29d6cc79d5a759 100644 (file)
@@ -886,7 +886,7 @@ machine_thread_state_initialize(
      * And if we're target, re-arm the no-fpu trap.
      */
        if (thread->machine.pcb->ifps) {
-               (void) fpu_set_fxstate(thread, NULL);
+               (void) fpu_set_fxstate(thread, NULL, x86_FLOAT_STATE64);
 
                if (thread == current_thread())
                        clear_fpu();
@@ -1355,7 +1355,7 @@ machine_thread_set_state(
                if (thread_is_64bit(thr_act))
                        return(KERN_INVALID_ARGUMENT);
 
-               return fpu_set_fxstate(thr_act, tstate);
+               return fpu_set_fxstate(thr_act, tstate, flavor);
        }
 
        case x86_FLOAT_STATE64:
@@ -1366,7 +1366,7 @@ machine_thread_set_state(
                if ( !thread_is_64bit(thr_act))
                        return(KERN_INVALID_ARGUMENT);
 
-               return fpu_set_fxstate(thr_act, tstate);
+               return fpu_set_fxstate(thr_act, tstate, flavor);
        }
 
        case x86_FLOAT_STATE:
@@ -1379,15 +1379,37 @@ machine_thread_set_state(
                state = (x86_float_state_t *)tstate;
                if (state->fsh.flavor == x86_FLOAT_STATE64 && state->fsh.count == x86_FLOAT_STATE64_COUNT &&
                    thread_is_64bit(thr_act)) {
-                       return fpu_set_fxstate(thr_act, (thread_state_t)&state->ufs.fs64);
+                       return fpu_set_fxstate(thr_act, (thread_state_t)&state->ufs.fs64, x86_FLOAT_STATE64);
                }
                if (state->fsh.flavor == x86_FLOAT_STATE32 && state->fsh.count == x86_FLOAT_STATE32_COUNT &&
                    !thread_is_64bit(thr_act)) {
-                       return fpu_set_fxstate(thr_act, (thread_state_t)&state->ufs.fs32); 
+                       return fpu_set_fxstate(thr_act, (thread_state_t)&state->ufs.fs32, x86_FLOAT_STATE32); 
                }
                return(KERN_INVALID_ARGUMENT);
        }
 
+       case x86_AVX_STATE32:
+       {
+               if (count != x86_AVX_STATE32_COUNT)
+                       return(KERN_INVALID_ARGUMENT);
+
+               if (thread_is_64bit(thr_act))
+                       return(KERN_INVALID_ARGUMENT);
+
+               return fpu_set_fxstate(thr_act, tstate, flavor);
+       }
+
+       case x86_AVX_STATE64:
+       {
+               if (count != x86_AVX_STATE64_COUNT)
+                       return(KERN_INVALID_ARGUMENT);
+
+               if (!thread_is_64bit(thr_act))
+                       return(KERN_INVALID_ARGUMENT);
+
+               return fpu_set_fxstate(thr_act, tstate, flavor);
+       }
+
        case x86_THREAD_STATE32: 
        {
                if (count != x86_THREAD_STATE32_COUNT)
@@ -1596,7 +1618,7 @@ machine_thread_get_state(
 
                *count = x86_FLOAT_STATE32_COUNT;
 
-               return fpu_get_fxstate(thr_act, tstate);
+               return fpu_get_fxstate(thr_act, tstate, flavor);
            }
 
            case x86_FLOAT_STATE64:
@@ -1609,7 +1631,7 @@ machine_thread_get_state(
 
                *count = x86_FLOAT_STATE64_COUNT;
 
-               return fpu_get_fxstate(thr_act, tstate);
+               return fpu_get_fxstate(thr_act, tstate, flavor);
            }
 
            case x86_FLOAT_STATE:
@@ -1630,18 +1652,44 @@ machine_thread_get_state(
                        state->fsh.flavor = x86_FLOAT_STATE64;
                        state->fsh.count  = x86_FLOAT_STATE64_COUNT;
 
-                       kret = fpu_get_fxstate(thr_act, (thread_state_t)&state->ufs.fs64);
+                       kret = fpu_get_fxstate(thr_act, (thread_state_t)&state->ufs.fs64, x86_FLOAT_STATE64);
                } else {
                        state->fsh.flavor = x86_FLOAT_STATE32;
                        state->fsh.count  = x86_FLOAT_STATE32_COUNT;
 
-                       kret = fpu_get_fxstate(thr_act, (thread_state_t)&state->ufs.fs32);
+                       kret = fpu_get_fxstate(thr_act, (thread_state_t)&state->ufs.fs32, x86_FLOAT_STATE32);
                }
                *count = x86_FLOAT_STATE_COUNT;
 
                return(kret);
            }
 
+       case x86_AVX_STATE32:
+       {
+               if (*count != x86_AVX_STATE32_COUNT)
+                       return(KERN_INVALID_ARGUMENT);
+
+               if (thread_is_64bit(thr_act))
+                       return(KERN_INVALID_ARGUMENT);
+
+               *count = x86_AVX_STATE32_COUNT;
+
+               return fpu_get_fxstate(thr_act, tstate, flavor);
+       }
+
+       case x86_AVX_STATE64:
+       {
+               if (*count != x86_AVX_STATE64_COUNT)
+                       return(KERN_INVALID_ARGUMENT);
+
+               if ( !thread_is_64bit(thr_act))
+                       return(KERN_INVALID_ARGUMENT);
+
+               *count = x86_AVX_STATE64_COUNT;
+
+               return fpu_get_fxstate(thr_act, tstate, flavor);
+       }
+
            case x86_THREAD_STATE32: 
            {
                if (*count < x86_THREAD_STATE32_COUNT)
@@ -2500,7 +2548,6 @@ act_thread_csave(void)
                val = x86_FLOAT_STATE64_COUNT; 
                kret = machine_thread_get_state(thr_act, x86_FLOAT_STATE64,
                                (thread_state_t) &ic64->fs, &val);
-
                if (kret != KERN_SUCCESS) {
                        kfree(ic64, sizeof(struct x86_act_context64));
                        return((void *)0);
@@ -2583,13 +2630,8 @@ act_thread_catt(void *ctx)
                kret = machine_thread_set_state(thr_act, x86_SAVED_STATE32,
                                                (thread_state_t) &ic32->ss, x86_SAVED_STATE32_COUNT);
                if (kret == KERN_SUCCESS) {
-                       kret = machine_thread_set_state(thr_act, x86_FLOAT_STATE32,
+                       (void) machine_thread_set_state(thr_act, x86_FLOAT_STATE32,
                                                 (thread_state_t) &ic32->fs, x86_FLOAT_STATE32_COUNT);
-                       if (kret == KERN_SUCCESS && thr_act->machine.pcb->ids)
-                               machine_thread_set_state(thr_act,
-                                                        x86_DEBUG_STATE32,
-                                                        (thread_state_t)&ic32->ds,
-                                                        x86_DEBUG_STATE32_COUNT);
                }
                kfree(ic32, sizeof(struct x86_act_context32));
        }
index 91e3799bb0e70032a5efaf61944acc8c079aef60..c469d7a1c3d6106417a2326dccdc30de1dc6944d 100644 (file)
@@ -42,6 +42,7 @@
 #include <kern/pms.h>
 #include <kern/processor.h>
 #include <kern/etimer.h>
+#include <sys/kdebug.h>
 #include <i386/cpu_threads.h>
 #include <i386/pmCPU.h>
 #include <i386/cpuid.h>
@@ -56,9 +57,10 @@ decl_simple_lock_data(,pm_init_lock);
 /*
  * The following is set when the KEXT loads and initializes.
  */
-pmDispatch_t   *pmDispatch     = NULL;
+pmDispatch_t           *pmDispatch     = NULL;
 
 static uint32_t                pmInitDone      = 0;
+static boolean_t       earlyTopology   = FALSE;
 
 
 /*
@@ -192,6 +194,9 @@ pmMarkAllCPUsOff(void)
 static void
 pmInitComplete(void)
 {
+    if (earlyTopology && pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL)
+       (*pmDispatch->pmCPUStateInit)();
+
     pmInitDone = 1;
 }
 
@@ -268,9 +273,9 @@ pmLockCPUTopology(int lock)
 uint64_t
 pmCPUGetDeadline(cpu_data_t *cpu)
 {
-    uint64_t   deadline        = EndOfAllTime;
+    uint64_t   deadline        = 0;
 
-       if (pmInitDone
+    if (pmInitDone
        && pmDispatch != NULL
        && pmDispatch->GetDeadline != NULL)
        deadline = (*pmDispatch->GetDeadline)(&cpu->lcpu);
@@ -357,6 +362,8 @@ pmCPUStateInit(void)
 {
     if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL)
        (*pmDispatch->pmCPUStateInit)();
+    else
+       earlyTopology = TRUE;
 }
 
 /*
@@ -596,6 +603,40 @@ machine_choose_processor(processor_set_t pset,
     return(preferred);
 }
 
+static int
+pmThreadGetUrgency(__unused uint64_t *rt_period, __unused uint64_t *rt_deadline)
+{
+
+    return(0);
+}
+
+void
+thread_tell_urgency(int urgency,
+                   uint64_t rt_period,
+                   uint64_t rt_deadline)
+{
+    KERNEL_DEBUG_CONSTANT(0x1400054,
+                         urgency, rt_period, (rt_deadline >> 32), rt_deadline, 0);
+
+    if (!pmInitDone
+       || pmDispatch == NULL
+       || pmDispatch->pmThreadTellUrgency == NULL)
+       return;
+
+    pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline);
+}
+
+void
+active_rt_threads(boolean_t active)
+{
+    if (!pmInitDone
+       || pmDispatch == NULL
+       || pmDispatch->pmActiveRTThreads == NULL)
+       return;
+
+    pmDispatch->pmActiveRTThreads(active);
+}
+
 static uint32_t
 pmGetSavedRunCount(void)
 {
@@ -645,10 +686,26 @@ pmSendIPI(int cpu)
     lapic_send_ipi(cpu, LAPIC_PM_INTERRUPT);
 }
 
-static rtc_nanotime_t *
-pmGetNanotimeInfo(void)
+static void
+pmGetNanotimeInfo(pm_rtc_nanotime_t *rtc_nanotime)
+{
+       /*
+        * Make sure that nanotime didn't change while we were reading it.
+        */
+       do {
+               rtc_nanotime->generation = rtc_nanotime_info.generation; /* must be first */
+               rtc_nanotime->tsc_base = rtc_nanotime_info.tsc_base;
+               rtc_nanotime->ns_base = rtc_nanotime_info.ns_base;
+               rtc_nanotime->scale = rtc_nanotime_info.scale;
+               rtc_nanotime->shift = rtc_nanotime_info.shift;
+       } while(rtc_nanotime_info.generation != 0
+               && rtc_nanotime->generation != rtc_nanotime_info.generation);
+}
+
+static uint32_t
+pmTimerQueueMigrate(__unused int target_cpu)
 {
-    return(&rtc_nanotime_info);
+    return (0);
 }
 
 /*
@@ -681,10 +738,17 @@ pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
        callbacks->GetSavedRunCount     = pmGetSavedRunCount;
        callbacks->pmSendIPI            = pmSendIPI;
        callbacks->GetNanotimeInfo      = pmGetNanotimeInfo;
-       callbacks->RTCClockAdjust       = rtc_clock_adjust;
+       callbacks->ThreadGetUrgency     = pmThreadGetUrgency;
+       callbacks->RTCClockAdjust       = rtc_clock_adjust;
+       callbacks->timerQueueMigrate    = pmTimerQueueMigrate;
        callbacks->topoParms            = &topoParms;
+       callbacks->InterruptPending     = lapic_is_interrupt_pending;
+       callbacks->IsInterrupting       = lapic_is_interrupting;
+       callbacks->InterruptStats       = lapic_interrupt_counts;
+       callbacks->DisableApicTimer     = lapic_disable_timer;
     } else {
-       panic("Version mis-match between Kernel and CPU PM");
+       panic("Version mis-match between Kernel (%d) and CPU PM (%d)",
+             PM_DISPATCH_VERSION, version);
     }
 
     if (cpuFuncs != NULL) {
index c6e36a616982b2b30e004324feaf67e64df20fdb..55041fc1014f08c7bf7932f26ab2ddfcfed5b8f7 100644 (file)
@@ -38,7 +38,7 @@
  * This value should be changed each time that pmDsipatch_t or pmCallBacks_t
  * changes.
  */
-#define PM_DISPATCH_VERSION    21
+#define PM_DISPATCH_VERSION    23
 
 /*
  * Dispatch table for functions that get installed when the power
@@ -77,11 +77,25 @@ typedef struct
     boolean_t          (*pmIsCPUUnAvailable)(x86_lcpu_t *lcpu);
     int                        (*pmChooseCPU)(int startCPU, int endCPU, int preferredCPU);
     int                        (*pmIPIHandler)(void *state);
+    void               (*pmThreadTellUrgency)(int urgency, uint64_t rt_period, uint64_t rt_deadline);
+    void               (*pmActiveRTThreads)(boolean_t active);
 } pmDispatch_t;
 
 
+/*
+ * common time fields exported to PM code. This structure may be
+ * allocated on the stack, so avoid making it unnecessarily large.
+ */
+typedef struct pm_rtc_nanotime {
+       uint64_t        tsc_base;               /* timestamp */
+       uint64_t        ns_base;                /* nanoseconds */
+       uint32_t        scale;                  /* tsc -> nanosec multiplier */
+       uint32_t        shift;                  /* tsc -> nanosec shift/div */
+       uint32_t        generation;             /* 0 == being updated */
+} pm_rtc_nanotime_t;
+
 typedef struct {
-    int                        (*setRTCPop)(uint64_t time);
+    uint64_t           (*setRTCPop)(uint64_t time);
     void               (*resyncDeadlines)(int cpu);
     void               (*initComplete)(void);
     x86_lcpu_t         *(*GetLCPU)(int cpu);
@@ -99,9 +113,16 @@ typedef struct {
     processor_t                (*ThreadBind)(processor_t proc);
     uint32_t           (*GetSavedRunCount)(void);
     void               (*pmSendIPI)(int cpu);
-    rtc_nanotime_t     *(*GetNanotimeInfo)(void);
+    void               (*GetNanotimeInfo)(pm_rtc_nanotime_t *);
+    int                        (*ThreadGetUrgency)(uint64_t *rt_period, uint64_t *rt_deadline);
+    uint32_t           (*timeQueueMigrate)(int cpu);
     void               (*RTCClockAdjust)(uint64_t adjustment);
+    uint32_t           (*timerQueueMigrate)(int cpu);
     x86_topology_parameters_t  *topoParms;
+    boolean_t          (*InterruptPending)(void);
+    boolean_t          (*IsInterrupting)(uint8_t vector);
+    void               (*InterruptStats)(uint64_t intrs[256]);
+    void               (*DisableApicTimer)(void);
 } pmCallBacks_t;
 
 extern pmDispatch_t    *pmDispatch;
@@ -123,6 +144,8 @@ void pmTimerSave(void);
 void pmTimerRestore(void);
 kern_return_t pmCPUExitHalt(int cpu);
 kern_return_t pmCPUExitHaltToOff(int cpu);
+void thread_tell_urgency(int urgency, uint64_t rt_period, uint64_t rt_deadline);
+void active_rt_threads(boolean_t active);
 
 #define PM_HALT_NORMAL         0               /* normal halt path */
 #define PM_HALT_DEBUG          1               /* debug code wants to halt */
index fc2147d4b01217ffe0d8bc3ec0f9ca2b7ce2ab96..3d12ba9f23e97743307b18934caaa5320e8cb72a 100644 (file)
@@ -273,10 +273,6 @@ static vm_object_t kptobj;
 char   *pmap_phys_attributes;
 unsigned int   last_managed_page = 0;
 
-extern ppnum_t lowest_lo;
-extern ppnum_t lowest_hi;
-extern ppnum_t highest_hi;
-
 /*
  *     Amount of virtual memory mapped by one
  *     page-directory entry.
@@ -392,6 +388,8 @@ extern char end;
 
 static int nkpt;
 
+extern         long    NMIPI_acks;
+
 pt_entry_t     *DMAP1, *DMAP2;
 caddr_t         DADDR1;
 caddr_t         DADDR2;
@@ -2988,7 +2986,7 @@ pmap_cpuset_NMIPI(cpu_set cpu_mask) {
                if (cpu_mask & cpu_bit)
                        cpu_NMI_interrupt(cpu);
        }
-       deadline = mach_absolute_time() + (LockTimeOut);
+       deadline = mach_absolute_time() + (LockTimeOut * 2);
        while (mach_absolute_time() < deadline)
                cpu_pause();
 }
@@ -3057,18 +3055,7 @@ pmap_flush_tlbs(pmap_t   pmap)
                 * Wait for those other cpus to acknowledge
                 */
                while (cpus_to_respond != 0) {
-                       if (mach_absolute_time() > deadline) {
-                               if (mp_recent_debugger_activity())
-                                       continue;
-                               if (!panic_active()) {
-                                       pmap_tlb_flush_timeout = TRUE;
-                                       pmap_cpuset_NMIPI(cpus_to_respond);
-                               }
-                               panic("pmap_flush_tlbs() timeout: "
-                                   "cpu(s) failing to respond to interrupts, pmap=%p cpus_to_respond=0x%lx",
-                                   pmap, cpus_to_respond);
-                       }
-
+                       long orig_acks = 0;
                        for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
                                if ((cpus_to_respond & cpu_bit) != 0) {
                                        if (!cpu_datap(cpu)->cpu_running ||
@@ -3081,6 +3068,17 @@ pmap_flush_tlbs(pmap_t   pmap)
                                if (cpus_to_respond == 0)
                                        break;
                        }
+                       if (mach_absolute_time() > deadline) {
+                               if (machine_timeout_suspended())
+                                       continue;
+                               pmap_tlb_flush_timeout = TRUE;
+                               orig_acks = NMIPI_acks;
+                               pmap_cpuset_NMIPI(cpus_to_respond);
+
+                               panic("TLB invalidation IPI timeout: "
+                                   "CPU(s) failed to respond to interrupts, unresponsive CPU bitmap: 0x%lx, NMIPI acks: orig: 0x%lx, now: 0x%lx",
+                                   cpus_to_respond, orig_acks, NMIPI_acks);
+                       }
                }
        }
        /*
index 0acf265d2ab0a2b24893f64092c9ff073dd744d8..5d3ac764ee06ee3be7e88688dd124c9cecc71c23 100644 (file)
@@ -658,7 +658,7 @@ extern int          pmap_list_resident_pages(
                                struct pmap     *pmap,
                                vm_offset_t     *listp,
                                int             space);
-
+extern void            x86_filter_TLB_coherency_interrupts(boolean_t);
 #ifdef __i386__
 extern void             pmap_commpage32_init(
                                           vm_offset_t kernel,
index eef4f7c4db36f1b79526a89ddb473ecb4337864f..63bebc3abf0c7f5a37a020f9aee8539fc15255f8 100644 (file)
@@ -355,6 +355,10 @@ extern uint64_t pde_mapped_size;
 extern char            *pmap_phys_attributes;
 extern unsigned int    last_managed_page;
 
+extern ppnum_t lowest_lo;
+extern ppnum_t lowest_hi;
+extern ppnum_t highest_hi;
+
 /*
  * when spinning through pmap_remove
  * ensure that we don't spend too much
index 63ec071f42b1523728da73e6adb4afba82c0d652..a8c3423b418f5d77e43b8caf9fa79d9f9b618317 100644 (file)
@@ -668,6 +668,11 @@ Retry:
                if (pmap->stats.resident_count > pmap->stats.resident_max) {
                        pmap->stats.resident_max = pmap->stats.resident_count;
                }
+       } else if (last_managed_page == 0) {
+               /* Account for early mappings created before "managed pages"
+                * are determined. Consider consulting the available DRAM map.
+                */
+               OSAddAtomic(+1,  &pmap->stats.resident_count);
        }
        /*
         * Step 3) Enter the mapping.
@@ -1329,3 +1334,16 @@ pmap_clear_noencrypt(ppnum_t pn)
        }
 }
 
+void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) {
+       assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
+
+       if (dofilter) {
+               CPU_CR3_MARK_INACTIVE();
+       } else {
+               CPU_CR3_MARK_ACTIVE();
+               __asm__ volatile("mfence");
+               if (current_cpu_datap()->cpu_tlb_invalid)
+                       process_pmap_updates();
+       }
+}
+
index a22ccd03b419e5ff51396e4a4b517f6a2a5cc371..54fca68e3e31eccd19200538e85b63e44a188258 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 /*
  * CR4
  */
-#define CR4_VMXE 0x00002000    /* Enable VMX operation */
-#define CR4_FXS 0x00000200     /* SSE/SSE2 OS supports FXSave */
-#define CR4_XMM 0x00000400     /* SSE/SSE2 instructions supported in OS */
-#define CR4_PGE 0x00000080     /* p6:   Page Global Enable */
-#define        CR4_MCE 0x00000040      /* p5:   Machine Check Exceptions */
-#define CR4_PAE 0x00000020      /* p5:   Physical Address Extensions */
-#define        CR4_PSE 0x00000010      /* p5:   Page Size Extensions */
-#define        CR4_DE  0x00000008      /* p5:   Debugging Extensions */
-#define        CR4_TSD 0x00000004      /* p5:   Time Stamp Disable */
-#define        CR4_PVI 0x00000002      /* p5:   Protected-mode Virtual Interrupts */
-#define        CR4_VME 0x00000001      /* p5:   Virtual-8086 Mode Extensions */
+#define CR4_OSXSAVE 0x00040000 /* OS supports XSAVE */
+#define CR4_SMXE    0x00004000 /* Enable SMX operation */
+#define CR4_VMXE    0x00002000 /* Enable VMX operation */
+#define CR4_OSXMM   0x00000400  /* SSE/SSE2 exceptions supported in OS */
+#define CR4_OSFXS   0x00000200  /* SSE/SSE2 OS supports FXSave */
+#define CR4_PCE     0x00000100  /* Performance-Monitor Count Enable */
+#define CR4_PGE     0x00000080  /* Page Global Enable */
+#define        CR4_MCE     0x00000040  /* Machine Check Exceptions */
+#define CR4_PAE     0x00000020  /* Physical Address Extensions */
+#define        CR4_PSE     0x00000010  /* Page Size Extensions */
+#define        CR4_DE      0x00000008  /* Debugging Extensions */
+#define        CR4_TSD     0x00000004  /* Time Stamp Disable */
+#define        CR4_PVI     0x00000002  /* Protected-mode Virtual Interrupts */
+#define        CR4_VME     0x00000001  /* Virtual-8086 Mode Extensions */
 
+/*
+ * XCR0 - XFEATURE_ENABLED_MASK (a.k.a. XFEM) register
+ */
+#define        XCR0_YMM 0x0000000000000004ULL /* YMM state available */
+#define        XFEM_YMM XCR0_YMM
+#define XCR0_SSE 0x0000000000000002ULL /* SSE supported by XSAVE/XRESTORE */
+#define XCR0_X87 0x0000000000000001ULL /* x87, FPU/MMX (always set) */
+#define XFEM_SSE XCR0_SSE
+#define XFEM_X87 XCR0_X87
+#define XCR0 (0)
 #ifndef        ASSEMBLER
 
 #include <sys/cdefs.h>
@@ -392,108 +405,124 @@ __END_DECLS
 
 #endif /* ASSEMBLER */
 
-#define MSR_IA32_P5_MC_ADDR            0
-#define MSR_IA32_P5_MC_TYPE            1
-#define MSR_IA32_PLATFORM_ID           0x17
-#define MSR_IA32_EBL_CR_POWERON                0x2a
-
-#define MSR_IA32_APIC_BASE             0x1b
-#define MSR_IA32_APIC_BASE_BSP         (1<<8)
-#define MSR_IA32_APIC_BASE_ENABLE      (1<<11)
-#define MSR_IA32_APIC_BASE_BASE                (0xfffff<<12)
-
-#define MSR_IA32_FEATURE_CONTROL       0x3a
-#define MSR_IA32_FEATCTL_LOCK          (1<<0)
-#define MSR_IA32_FEATCTL_VMXON_SMX     (1<<1)
-#define MSR_IA32_FEATCTL_VMXON         (1<<2)
-#define MSR_IA32_FEATCTL_CSTATE_SMI    (1<<16)
-
-#define MSR_IA32_UCODE_WRITE           0x79
-#define MSR_IA32_UCODE_REV             0x8b
-
-#define MSR_IA32_PERFCTR0              0xc1
-#define MSR_IA32_PERFCTR1              0xc2
-
-#define MSR_PMG_CST_CONFIG_CONTROL     0xe2
-
-#define MSR_IA32_BBL_CR_CTL            0x119
-
-#define MSR_IA32_SYSENTER_CS           0x174
-#define MSR_IA32_SYSENTER_ESP          0x175
-#define MSR_IA32_SYSENTER_EIP          0x176
-
-#define MSR_IA32_MCG_CAP               0x179
-#define MSR_IA32_MCG_STATUS            0x17a
-#define MSR_IA32_MCG_CTL               0x17b
-
-#define MSR_IA32_EVNTSEL0              0x186
-#define MSR_IA32_EVNTSEL1              0x187
-
-#define MSR_IA32_PERF_STS              0x198
-#define MSR_IA32_PERF_CTL              0x199
-
-#define MSR_IA32_MISC_ENABLE           0x1a0
-
-#define MSR_IA32_DEBUGCTLMSR           0x1d9
-#define MSR_IA32_LASTBRANCHFROMIP      0x1db
-#define MSR_IA32_LASTBRANCHTOIP                0x1dc
-#define MSR_IA32_LASTINTFROMIP         0x1dd
-#define MSR_IA32_LASTINTTOIP           0x1de
-
-#define MSR_IA32_CR_PAT                0x277   
-
-#define MSR_IA32_MC0_CTL               0x400
-#define MSR_IA32_MC0_STATUS            0x401
-#define MSR_IA32_MC0_ADDR              0x402
-#define MSR_IA32_MC0_MISC              0x403
-
-#define MSR_IA32_MTRRCAP               0xfe
-#define MSR_IA32_MTRR_DEF_TYPE         0x2ff
-#define MSR_IA32_MTRR_PHYSBASE(n)      (0x200 + 2*(n))
-#define MSR_IA32_MTRR_PHYSMASK(n)      (0x200 + 2*(n) + 1)
-#define MSR_IA32_MTRR_FIX64K_00000     0x250
-#define MSR_IA32_MTRR_FIX16K_80000     0x258
-#define MSR_IA32_MTRR_FIX16K_A0000     0x259
-#define MSR_IA32_MTRR_FIX4K_C0000      0x268
-#define MSR_IA32_MTRR_FIX4K_C8000      0x269
-#define MSR_IA32_MTRR_FIX4K_D0000      0x26a
-#define MSR_IA32_MTRR_FIX4K_D8000      0x26b
-#define MSR_IA32_MTRR_FIX4K_E0000      0x26c
-#define MSR_IA32_MTRR_FIX4K_E8000      0x26d
-#define MSR_IA32_MTRR_FIX4K_F0000      0x26e
-#define MSR_IA32_MTRR_FIX4K_F8000      0x26f
-
-#define MSR_IA32_VMX_BASE              0x480
-#define MSR_IA32_VMX_BASIC             MSR_IA32_VMX_BASE
+#define MSR_IA32_P5_MC_ADDR                    0
+#define MSR_IA32_P5_MC_TYPE                    1
+#define MSR_IA32_PLATFORM_ID                   0x17
+#define MSR_IA32_EBL_CR_POWERON                        0x2a
+
+#define MSR_IA32_APIC_BASE                     0x1b
+#define     MSR_IA32_APIC_BASE_BSP                 (1<<8)
+#define     MSR_IA32_APIC_BASE_EXTENDED                    (1<<10)
+#define     MSR_IA32_APIC_BASE_ENABLE              (1<<11)
+#define     MSR_IA32_APIC_BASE_BASE                (0xfffff<<12)
+
+#define MSR_CORE_THREAD_COUNT                  0x35
+
+#define MSR_IA32_FEATURE_CONTROL               0x3a
+#define     MSR_IA32_FEATCTL_LOCK                  (1<<0)
+#define     MSR_IA32_FEATCTL_VMXON_SMX             (1<<1)
+#define     MSR_IA32_FEATCTL_VMXON                 (1<<2)
+#define     MSR_IA32_FEATCTL_CSTATE_SMI                    (1<<16)
+
+#define MSR_IA32_UPDT_TRIG                     0x79
+#define MSR_IA32_BIOS_SIGN_ID                  0x8b
+#define MSR_IA32_UCODE_WRITE                   MSR_IA32_UPDT_TRIG
+#define MSR_IA32_UCODE_REV                     MSR_IA32_BIOS_SIGN_ID
+
+#define MSR_IA32_PERFCTR0                      0xc1
+#define MSR_IA32_PERFCTR1                      0xc2
+
+#define MSR_PLATFORM_INFO                      0xce
+
+#define MSR_PMG_CST_CONFIG_CONTROL             0xe2
+
+#define MSR_IA32_BBL_CR_CTL                    0x119
+
+#define MSR_IA32_SYSENTER_CS                   0x174
+#define MSR_IA32_SYSENTER_ESP                  0x175
+#define MSR_IA32_SYSENTER_EIP                  0x176
+
+#define MSR_IA32_MCG_CAP                       0x179
+#define MSR_IA32_MCG_STATUS                    0x17a
+#define MSR_IA32_MCG_CTL                       0x17b
+
+#define MSR_IA32_EVNTSEL0                      0x186
+#define MSR_IA32_EVNTSEL1                      0x187
+
+#define MSR_FLEX_RATIO                         0x194
+#define MSR_IA32_PERF_STS                      0x198
+#define MSR_IA32_PERF_CTL                      0x199
+#define MSR_IA32_CLOCK_MODULATION              0x19a
+
+#define MSR_IA32_MISC_ENABLE                   0x1a0
+
+#define MSR_IA32_ENERGY_PERFORMANCE_BIAS       0x1b0
+#define MSR_IA32_PACKAGE_THERM_STATUS          0x1b1
+#define MSR_IA32_PACKAGE_THERM_INTERRUPT       0x1b2
+
+#define MSR_IA32_DEBUGCTLMSR                   0x1d9
+#define MSR_IA32_LASTBRANCHFROMIP              0x1db
+#define MSR_IA32_LASTBRANCHTOIP                        0x1dc
+#define MSR_IA32_LASTINTFROMIP                 0x1dd
+#define MSR_IA32_LASTINTTOIP                   0x1de
+
+#define MSR_IA32_CR_PAT                        0x277   
+
+#define MSR_IA32_MTRRCAP                       0xfe
+#define MSR_IA32_MTRR_DEF_TYPE                 0x2ff
+#define MSR_IA32_MTRR_PHYSBASE(n)              (0x200 + 2*(n))
+#define MSR_IA32_MTRR_PHYSMASK(n)              (0x200 + 2*(n) + 1)
+#define MSR_IA32_MTRR_FIX64K_00000             0x250
+#define MSR_IA32_MTRR_FIX16K_80000             0x258
+#define MSR_IA32_MTRR_FIX16K_A0000             0x259
+#define MSR_IA32_MTRR_FIX4K_C0000              0x268
+#define MSR_IA32_MTRR_FIX4K_C8000              0x269
+#define MSR_IA32_MTRR_FIX4K_D0000              0x26a
+#define MSR_IA32_MTRR_FIX4K_D8000              0x26b
+#define MSR_IA32_MTRR_FIX4K_E0000              0x26c
+#define MSR_IA32_MTRR_FIX4K_E8000              0x26d
+#define MSR_IA32_MTRR_FIX4K_F0000              0x26e
+#define MSR_IA32_MTRR_FIX4K_F8000              0x26f
+
+#define MSR_IA32_MC0_CTL                       0x400
+#define MSR_IA32_MC0_STATUS                    0x401
+#define MSR_IA32_MC0_ADDR                      0x402
+#define MSR_IA32_MC0_MISC                      0x403
+
+#define MSR_IA32_VMX_BASE                      0x480
+#define MSR_IA32_VMX_BASIC                     MSR_IA32_VMX_BASE
 #define MSR_IA32_VMXPINBASED_CTLS              MSR_IA32_VMX_BASE+1
-#define MSR_IA32_PROCBASED_CTLS                MSR_IA32_VMX_BASE+2
-#define MSR_IA32_VMX_EXIT_CTLS         MSR_IA32_VMX_BASE+3
-#define MSR_IA32_VMX_ENTRY_CTLS                MSR_IA32_VMX_BASE+4
-#define MSR_IA32_VMX_MISC              MSR_IA32_VMX_BASE+5
-#define MSR_IA32_VMX_CR0_FIXED0                MSR_IA32_VMX_BASE+6
-#define MSR_IA32_VMX_CR0_FIXED1                MSR_IA32_VMX_BASE+7
-#define MSR_IA32_VMX_CR4_FIXED0                MSR_IA32_VMX_BASE+8
-#define MSR_IA32_VMX_CR4_FIXED1                MSR_IA32_VMX_BASE+9
-
-#define        MSR_IA32_EFER           0xC0000080
-#define        MSR_IA32_EFER_SCE       0x00000001
-#define        MSR_IA32_EFER_LME       0x00000100
-#define        MSR_IA32_EFER_LMA       0x00000400
-#define        MSR_IA32_EFER_NXE       0x00000800
-
-#define        MSR_IA32_STAR           0xC0000081
-#define        MSR_IA32_LSTAR          0xC0000082
-#define        MSR_IA32_CSTAR          0xC0000083
-#define        MSR_IA32_FMASK          0xC0000084
-
-#define MSR_IA32_FS_BASE       0xC0000100
-#define MSR_IA32_GS_BASE       0xC0000101
-#define MSR_IA32_KERNEL_GS_BASE        0xC0000102
-
-#define MSR_IA32_BIOS_SIGN_ID  0x08B
-
-#define MSR_FLEX_RATIO         0x194
-#define MSR_PLATFORM_INFO      0x0ce
-#define MSR_CORE_THREAD_COUNT  0x035
+#define MSR_IA32_PROCBASED_CTLS                        MSR_IA32_VMX_BASE+2
+#define MSR_IA32_VMX_EXIT_CTLS                 MSR_IA32_VMX_BASE+3
+#define MSR_IA32_VMX_ENTRY_CTLS                        MSR_IA32_VMX_BASE+4
+#define MSR_IA32_VMX_MISC                      MSR_IA32_VMX_BASE+5
+#define MSR_IA32_VMX_CR0_FIXED0                        MSR_IA32_VMX_BASE+6
+#define MSR_IA32_VMX_CR0_FIXED1                        MSR_IA32_VMX_BASE+7
+#define MSR_IA32_VMX_CR4_FIXED0                        MSR_IA32_VMX_BASE+8
+#define MSR_IA32_VMX_CR4_FIXED1                        MSR_IA32_VMX_BASE+9
+
+#define MSR_IA32_DS_AREA                       0x600
+
+#define MSR_IA32_PACKAGE_POWER_SKU_UNIT                0x606
+#define MSR_IA32_PACKAGE_ENERY_STATUS          0x611
+#define MSR_IA32_PRIMARY_PLANE_ENERY_STATUS    0x639
+#define MSR_IA32_SECONDARY_PLANE_ENERY_STATUS  0x641
+#define MSR_IA32_TSC_DEADLINE                  0x6e0
+
+#define        MSR_IA32_EFER                           0xC0000080
+#define            MSR_IA32_EFER_SCE                       0x00000001
+#define            MSR_IA32_EFER_LME                       0x00000100
+#define            MSR_IA32_EFER_LMA                       0x00000400
+#define     MSR_IA32_EFER_NXE                      0x00000800
+
+#define        MSR_IA32_STAR                           0xC0000081
+#define        MSR_IA32_LSTAR                          0xC0000082
+#define        MSR_IA32_CSTAR                          0xC0000083
+#define        MSR_IA32_FMASK                          0xC0000084
+
+#define MSR_IA32_FS_BASE                       0xC0000100
+#define MSR_IA32_GS_BASE                       0xC0000101
+#define MSR_IA32_KERNEL_GS_BASE                        0xC0000102
+#define MSR_IA32_TSC_AUX                       0xC0000103
 
 #endif /* _I386_PROC_REG_H_ */
index 244e787e056c12b6749a0c9f61255cd53dfc57ae..72b1f556f70a4d3d2a2440f2001e3241436eca0b 100644 (file)
 #include <i386/tsc.h>
 #include <i386/rtclock.h>
 
-#define NSEC_PER_HZ                    (NSEC_PER_SEC / 100) /* nsec per tick */
-
 #define UI_CPUFREQ_ROUNDING_FACTOR     10000000
 
 int            rtclock_config(void);
 
 int            rtclock_init(void);
 
-uint64_t       rtc_decrementer_min;
-
 uint64_t       tsc_rebase_abs_time = 0;
 
-void                   rtclock_intr(x86_saved_state_t *regs);
-static uint64_t                maxDec;                 /* longest interval our hardware timer can handle (nsec) */
+void           rtclock_intr(x86_saved_state_t *regs);
 
 static void    rtc_set_timescale(uint64_t cycles);
 static uint64_t        rtc_export_speed(uint64_t cycles);
 
 rtc_nanotime_t rtc_nanotime_info = {0,0,0,0,1,0};
 
+static uint64_t        rtc_decrementer_min;
+static uint64_t        rtc_decrementer_max;
+
+static uint64_t
+deadline_to_decrementer(
+       uint64_t        deadline,
+       uint64_t        now)
+{
+       uint64_t        delta;
+
+       if (deadline <= now)
+               return rtc_decrementer_min;
+       else {
+               delta = deadline - now;
+               return MIN(MAX(rtc_decrementer_min,delta),rtc_decrementer_max); 
+       }
+}
+
+static inline uint64_t
+_absolutetime_to_tsc(uint64_t ns)
+{
+       uint32_t        generation;
+       uint64_t        tsc;
+
+       do {
+               generation =  rtc_nanotime_info.generation;
+               tsc = tmrCvt(ns - rtc_nanotime_info.ns_base, tscFCvtn2t)
+                       + rtc_nanotime_info.tsc_base;
+       } while (generation == 0 ||
+                generation != rtc_nanotime_info.generation);
+
+       return tsc;
+}
+
+/*
+ * Regular local APIC timer case:
+ */
+static void
+rtc_lapic_config_timer(void)
+{
+       lapic_config_timer(TRUE, one_shot, divide_by_1);
+}
+static uint64_t
+rtc_lapic_set_timer(uint64_t deadline, uint64_t now)
+{
+       uint64_t count;
+       uint64_t set = 0;
+
+       if (deadline > 0) {
+               /*
+                * Convert delta to bus ticks
+                * - time now is not relevant
+                */
+               count = deadline_to_decrementer(deadline, now);
+               set = now + count;
+               lapic_set_timer_fast((uint32_t) tmrCvt(count, busFCvtn2t));
+       } else {
+               lapic_set_timer(FALSE, one_shot, divide_by_1, 0);
+       }
+       return set;
+}
+
+/*
+ * TSC-deadline timer case:
+ */
+static void
+rtc_lapic_config_tsc_deadline_timer(void)
+{
+       lapic_config_tsc_deadline_timer();
+}
+static uint64_t
+rtc_lapic_set_tsc_deadline_timer(uint64_t deadline, uint64_t now)
+{
+       uint64_t        set = 0;
+
+       if (deadline > 0) {
+               /*
+                * Convert to TSC
+                */
+               set = now + deadline_to_decrementer(deadline, now);
+               lapic_set_tsc_deadline_timer(_absolutetime_to_tsc(set));
+       } else {
+               lapic_set_tsc_deadline_timer(0);
+       }
+       return set;
+} 
+
+/*
+ * Definitions for timer operations table
+ */
+typedef struct {
+       void     (*config)(void);
+       uint64_t (*set)   (uint64_t, uint64_t);
+} rtc_timer_t;
+
+rtc_timer_t    rtc_timer_lapic  = {
+       rtc_lapic_config_timer,
+       rtc_lapic_set_timer
+};
+
+rtc_timer_t    rtc_timer_tsc_deadline  = {
+       rtc_lapic_config_tsc_deadline_timer,
+       rtc_lapic_set_tsc_deadline_timer
+};
+
+rtc_timer_t    *rtc_timer = &rtc_timer_lapic;  /* defaults to LAPIC timer */
+
+/*
+ * rtc_timer_init() is called at startup on the boot processor only.
+ */
+static void
+rtc_timer_init(void)
+{
+       int     TSC_deadline_timer = 0;
+       
+       /* See whether we can use the local apic in TSC-deadline mode */
+       if ((cpuid_features() & CPUID_FEATURE_TSCTMR)) {
+               TSC_deadline_timer = 1;
+               PE_parse_boot_argn("TSC_deadline_timer", &TSC_deadline_timer,
+                                  sizeof(TSC_deadline_timer));
+               printf("TSC Deadline Timer supported %s enabled\n",
+                       TSC_deadline_timer ? "and" : "but not");
+       }
+
+       if (TSC_deadline_timer) {
+               rtc_timer = &rtc_timer_tsc_deadline;
+               rtc_decrementer_max = UINT64_MAX;       /* effectively none */
+               /*
+                * The min could be as low as 1nsec,
+                * but we're being conservative for now and making it the same
+                * as for the local apic timer.
+                */
+               rtc_decrementer_min = 1*NSEC_PER_USEC;  /* 1 usec */
+       } else {
+               /*
+                * Compute the longest interval using LAPIC timer.
+                */
+               rtc_decrementer_max = tmrCvt(0x7fffffffULL, busFCvtt2n);
+               kprintf("maxDec: %lld\n", rtc_decrementer_max);
+               rtc_decrementer_min = 1*NSEC_PER_USEC;  /* 1 usec */
+       }
+
+       /* Point LAPIC interrupts to hardclock() */
+       lapic_set_timer_func((i386_intr_func_t) rtclock_intr);
+}
+
+static inline uint64_t
+rtc_timer_set(uint64_t deadline, uint64_t now)
+{
+       return rtc_timer->set(deadline, now);
+}
+
+void
+rtc_timer_start(void)
+{
+       /*
+        * Force a complete re-evaluation of timer deadlines.
+        */
+       etimer_resync_deadlines();
+}
+
 /*
  * tsc_to_nanoseconds:
  *
@@ -112,7 +268,7 @@ _tsc_to_nanoseconds(uint64_t value)
                 "addl  %%edi,%%eax     ;"      
                 "adcl  $0,%%edx         "
                 : "+A" (value)
-                : "c" (current_cpu_datap()->cpu_nanotime->scale)
+                : "c" (rtc_nanotime_info.scale)
                 : "esi", "edi");
 #elif defined(__x86_64__)
     asm volatile("mul %%rcx;"
@@ -168,33 +324,6 @@ _absolutetime_to_nanotime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *nan
 #endif
 }
 
-static uint32_t
-deadline_to_decrementer(
-       uint64_t        deadline,
-       uint64_t        now)
-{
-       uint64_t        delta;
-
-       if (deadline <= now)
-               return (uint32_t)rtc_decrementer_min;
-       else {
-               delta = deadline - now;
-               return (uint32_t)MIN(MAX(rtc_decrementer_min,delta),maxDec); 
-       }
-}
-
-void
-rtc_lapic_start_ticking(void)
-{
-       x86_lcpu_t      *lcpu = x86_lcpu();
-
-       /*
-        * Force a complete re-evaluation of timer deadlines.
-        */
-       lcpu->rtcPop = EndOfAllTime;
-       etimer_resync_deadlines();
-}
-
 /*
  * Configure the real-time clock device. Return success (1)
  * or failure (0).
@@ -251,10 +380,8 @@ _rtc_nanotime_init(rtc_nanotime_t *rntp, uint64_t base)
 static void
 rtc_nanotime_init(uint64_t base)
 {
-       rtc_nanotime_t  *rntp = current_cpu_datap()->cpu_nanotime;
-
-       _rtc_nanotime_init(rntp, base);
-       rtc_nanotime_set_commpage(rntp);
+       _rtc_nanotime_init(&rtc_nanotime_info, base);
+       rtc_nanotime_set_commpage(&rtc_nanotime_info);
 }
 
 /*
@@ -269,7 +396,7 @@ rtc_nanotime_init_commpage(void)
 {
        spl_t                   s = splclock();
 
-       rtc_nanotime_set_commpage(current_cpu_datap()->cpu_nanotime);
+       rtc_nanotime_set_commpage(&rtc_nanotime_info);
 
        splx(s);
 }
@@ -286,10 +413,10 @@ rtc_nanotime_read(void)
        
 #if CONFIG_EMBEDDED
        if (gPEClockFrequencyInfo.timebase_frequency_hz > SLOW_TSC_THRESHOLD)
-               return  _rtc_nanotime_read(current_cpu_datap()->cpu_nanotime, 1);       /* slow processor */
+               return  _rtc_nanotime_read(&rtc_nanotime_info, 1);      /* slow processor */
        else
 #endif
-       return  _rtc_nanotime_read(current_cpu_datap()->cpu_nanotime, 0);       /* assume fast processor */
+       return  _rtc_nanotime_read(&rtc_nanotime_info, 0);      /* assume fast processor */
 }
 
 /*
@@ -302,7 +429,7 @@ rtc_nanotime_read(void)
 void
 rtc_clock_napped(uint64_t base, uint64_t tsc_base)
 {
-       rtc_nanotime_t  *rntp = current_cpu_datap()->cpu_nanotime;
+       rtc_nanotime_t  *rntp = &rtc_nanotime_info;
        uint64_t        oldnsecs;
        uint64_t        newnsecs;
        uint64_t        tsc;
@@ -332,7 +459,7 @@ rtc_clock_napped(uint64_t base, uint64_t tsc_base)
 void
 rtc_clock_adjust(uint64_t tsc_base_delta)
 {
-       rtc_nanotime_t  *rntp = current_cpu_datap()->cpu_nanotime;
+       rtc_nanotime_t  *rntp = &rtc_nanotime_info;
 
        assert(!ml_get_interrupts_enabled());
        assert(tsc_base_delta < 100ULL);        /* i.e. it's small */
@@ -368,6 +495,9 @@ void
 rtc_sleep_wakeup(
        uint64_t                base)
 {
+       /* Set fixed configuration for lapic timers */
+       rtc_timer->config();
+
        /*
         * Reset nanotime.
         * The timestamp counter will have been reset
@@ -404,22 +534,15 @@ rtclock_init(void)
                gPEClockFrequencyInfo.cpu_frequency_min_hz = cycles;
                gPEClockFrequencyInfo.cpu_frequency_max_hz = cycles;
 
-               /*
-                * Compute the longest interval we can represent.
-                */
-               maxDec = tmrCvt(0x7fffffffULL, busFCvtt2n);
-               kprintf("maxDec: %lld\n", maxDec);
-
-               /* Minimum interval is 1usec */
-               rtc_decrementer_min = deadline_to_decrementer(NSEC_PER_USEC, 0ULL);
-               /* Point LAPIC interrupts to hardclock() */
-               lapic_set_timer_func((i386_intr_func_t) rtclock_intr);
-
+               rtc_timer_init();
                clock_timebase_init();
                ml_init_lock_timeout();
        }
 
-       rtc_lapic_start_ticking();
+       /* Set fixed configuration for lapic timers */
+       rtc_timer->config();
+
+       rtc_timer_start();
 
        return (1);
 }
@@ -430,7 +553,7 @@ rtclock_init(void)
 static void
 rtc_set_timescale(uint64_t cycles)
 {
-       rtc_nanotime_t  *rntp = current_cpu_datap()->cpu_nanotime;
+       rtc_nanotime_t  *rntp = &rtc_nanotime_info;
        rntp->scale = (uint32_t)(((uint64_t)NSEC_PER_SEC << 32) / cycles);
 
        if (cycles <= SLOW_TSC_THRESHOLD)
@@ -522,18 +645,10 @@ rtclock_intr(
 {
         uint64_t       rip;
        boolean_t       user_mode = FALSE;
-       uint64_t        abstime;
-       uint32_t        latency;
-       x86_lcpu_t      *lcpu = x86_lcpu();
 
        assert(get_preemption_level() > 0);
        assert(!ml_get_interrupts_enabled());
 
-       abstime = rtc_nanotime_read();
-       latency = (uint32_t)(abstime - lcpu->rtcDeadline);
-       if (abstime < lcpu->rtcDeadline)
-               latency = 1;
-
        if (is_saved_state64(tregs) == TRUE) {
                x86_saved_state64_t     *regs;
                  
@@ -552,38 +667,42 @@ rtclock_intr(
                rip = regs->eip;
        }
 
-       /* Log the interrupt service latency (-ve value expected by tool) */
-       KERNEL_DEBUG_CONSTANT(
-               MACHDBG_CODE(DBG_MACH_EXCP_DECI, 0) | DBG_FUNC_NONE,
-               -(int32_t)latency, (uint32_t)rip, user_mode, 0, 0);
-
        /* call the generic etimer */
        etimer_intr(user_mode, rip);
 }
 
+
 /*
  *     Request timer pop from the hardware 
  */
 
-
-int
+uint64_t
 setPop(
        uint64_t time)
 {
        uint64_t now;
-       uint32_t decr;
-       uint64_t count;
-       
-       now = rtc_nanotime_read();              /* The time in nanoseconds */
-       decr = deadline_to_decrementer(time, now);
+       uint64_t pop;
+
+       /* 0 and EndOfAllTime are special-cases for "clear the timer" */
+       if (time == 0 || time == EndOfAllTime) {
+               time = EndOfAllTime;
+               now = 0;
+               pop = rtc_timer_set(0, 0);
+       } else {
+               now = rtc_nanotime_read();
+               pop = rtc_timer_set(time, now);
+       }
 
-       count = tmrCvt(decr, busFCvtn2t);
-       lapic_set_timer(TRUE, one_shot, divide_by_1, (uint32_t) count);
+       /* Record actual deadline set */
+       x86_lcpu()->rtcDeadline = time;
+       x86_lcpu()->rtcPop = pop;
 
-       return decr;                            /* Pass back what we set */
+       /*
+        * Pass back the delta we set
+        */
+       return pop - now;
 }
 
-
 uint64_t
 mach_absolute_time(void)
 {
index 82441c2095cd90707ce76aa77cf5fc8b5b36b6f3..d98b8808f1e1d6df2a5c769fbb3709e6692bbf84 100644 (file)
 
 #ifndef ASSEMBLER
 typedef struct rtc_nanotime {
-       uint64_t        tsc_base;               /* timestamp */
-       uint64_t        ns_base;                /* nanoseconds */
-       uint32_t        scale;                  /* tsc -> nanosec multiplier */
-       uint32_t        shift;                  /* tsc -> nanosec shift/div */
+       volatile uint64_t       tsc_base;       /* timestamp */
+       volatile uint64_t       ns_base;        /* nanoseconds */
+       uint32_t                scale;          /* tsc -> nanosec multiplier */
+       uint32_t                shift;          /* tsc -> nanosec shift/div */
                                                /* shift is overloaded with
                                                 * lower 32bits of tsc_freq
                                                 * on slower machines (SLOW_TSC_THRESHOLD) */
-       uint32_t        generation;             /* 0 == being updated */
-       uint32_t        spare1;
+       volatile uint32_t       generation;     /* 0 == being updated */
+       uint32_t                spare1;
 } rtc_nanotime_t;
 
 #if 0
index 37d2b48ca67037ee5bbad3a0d5d715dc3c74ff04..89643edf2a7dbfc84162645d584ecdf3285f2602 100644 (file)
@@ -300,7 +300,6 @@ __END_DECLS
 #define        SYSCALL_CS      0x2f            /* 64-bit syscall pseudo-segment */
 #define        USER_CTHREAD    0x37            /* user cthread area */
 #define        USER_SETTABLE   0x3f            /* start of user settable ldt entries */
-#define        USLDTSZ         10              /* number of user settable entries */
 
 /*
  * Kernel descriptors for MACH - 32-bit flat address space.
@@ -327,13 +326,13 @@ __END_DECLS
 /*
  * Kernel descriptors for MACH - 64-bit flat address space.
  */
-#define KERNEL64_CS    0x08            /* 1:  First entry */
-#define SYSENTER_CS    0x0b            /*     alias to KERNEL64_CS */
-#define        KERNEL64_SS     0x10            /* 2:  must be SYSENTER_CS + 8  */
-#define USER_CS                0x1b            /* 3:  must be SYSENTER_CS + 16 */
-#define USER_DS                0x23            /* 4:  must be SYSENTER_CS + 24 */
-#define USER64_CS      0x2b            /* 5:  must be SYSENTER_CS + 32 */
-#define USER64_DS      USER_DS         /*     nothing special about 64bit DS */
+#define KERNEL64_CS    0x08            /* 1:  K64 code */
+#define SYSENTER_CS    0x0b            /*     U32 sysenter pseudo-segment */
+#define        KERNEL64_SS     0x10            /* 2:  KERNEL64_CS+8 for syscall */
+#define USER_CS                0x1b            /* 3:  U32 code */
+#define USER_DS                0x23            /* 4:  USER_CS+8 for sysret */
+#define USER64_CS      0x2b            /* 5:  USER_CS+16 for sysret */
+#define USER64_DS      USER_DS         /*     U64 data pseudo-segment */
 #define KERNEL_LDT     0x30            /* 6:  */
                                        /* 7:  other 8 bytes of KERNEL_LDT */
 #define KERNEL_TSS     0x40            /* 8:  */
@@ -341,8 +340,7 @@ __END_DECLS
 #define KERNEL32_CS    0x50            /* 10: */
 #define USER_LDT       0x58            /* 11: */
                                        /* 12: other 8 bytes of USER_LDT */
-#define KERNEL_DS      0x80            /* 16: */
-#define        SYSCALL_CS      0x8f            /* 17: 64-bit syscall pseudo-segment */
+#define KERNEL_DS      0x68            /* 13: 32-bit kernel data */
 
 #endif
 
@@ -365,9 +363,9 @@ __END_DECLS
 /*
  * 64-bit kernel LDT descriptors
  */
+#define        SYSCALL_CS      0x07            /* syscall pseudo-segment */
 #define        USER_CTHREAD    0x0f            /* user cthread area */
 #define        USER_SETTABLE   0x1f            /* start of user settable ldt entries */
-#define        USLDTSZ         10              /* number of user settable entries */
 #endif
 
 #endif /* _I386_SEG_H_ */
index 9c7188711af5630465f8005e70c7e110d82c4682..bcabe2829f10f14ab82f3e0007741bc7188b0703 100644 (file)
@@ -173,6 +173,13 @@ Entry(get64_cr3)
        EMARF
        ret
 
+Entry(cpuid64)
+       ENTER_64BIT_MODE()
+       cpuid
+       ENTER_COMPAT_MODE()
+       ret
+
+
 /* FXSAVE and FXRSTOR operate in a mode dependent fashion, hence these variants.
  * Must be called with interrupts disabled.
  */
@@ -180,20 +187,29 @@ Entry(get64_cr3)
 Entry(fxsave64)
        movl            S_ARG0,%eax
        ENTER_64BIT_MODE()
-       fxsave          0(%eax)
+       fxsave          (%eax)
        ENTER_COMPAT_MODE()
        ret
 
 Entry(fxrstor64)
        movl            S_ARG0,%eax
        ENTER_64BIT_MODE()
-       fxrstor         0(%rax)
+       fxrstor         (%rax)
        ENTER_COMPAT_MODE()
        ret
 
-Entry(cpuid64)
+Entry(xsave64o)
        ENTER_64BIT_MODE()
-       cpuid
+       .short  0xAE0F
+       /* MOD 0x4, ECX, 0x1 */
+       .byte   0x21
        ENTER_COMPAT_MODE()
        ret
 
+Entry(xrstor64o)
+       ENTER_64BIT_MODE()
+       .short  0xAE0F
+       /* MOD 0x5, ECX 0x1 */
+       .byte   0x29
+       ENTER_COMPAT_MODE()
+       ret
index 0ac0ee06f65a555bd619b00fb0d4f6682ffd6124..faab785af5fdfa3a91aedfbb6c2d059ce6f4e168 100644 (file)
  *     Allocated only when necessary.
  */
 
-struct x86_fpsave_state {
-       boolean_t               fp_valid;
-       enum {
+typedef        enum {
                FXSAVE32 = 1,
-               FXSAVE64 = 2
-       } fp_save_layout;
-        struct x86_fx_save     fx_save_state __attribute__ ((aligned (16)));
-};
+               FXSAVE64 = 2,
+               XSAVE32  = 3,
+               XSAVE64  = 4,
+               FP_UNUSED = 5
+       } fp_save_layout_t;
+
 
 
 /*
@@ -148,7 +148,7 @@ struct x86_kernel_state {
 typedef struct pcb {
        void                    *sf;
        x86_saved_state_t       *iss;
-       struct x86_fpsave_state *ifps;
+       void                    *ifps;
 #ifdef MACH_BSD
        uint64_t        cthread_self;           /* for use of cthread package */
         struct real_descriptor cthread_desc;
index 5cd9b390d8dd8e7dfe3644a7571e8f1ff9ec9397..07b3cf479063402e0401382720342bd658ac9140 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -128,6 +128,7 @@ static void user_page_fault_continue(kern_return_t kret);
 #ifdef __i386__
 static void panic_trap(x86_saved_state32_t *saved_state);
 static void set_recovery_ip(x86_saved_state32_t *saved_state, vm_offset_t ip);
+static void panic_64(x86_saved_state_t *, int, const char *, boolean_t);
 #else
 static void panic_trap(x86_saved_state64_t *saved_state);
 static void set_recovery_ip(x86_saved_state64_t *saved_state, vm_offset_t ip);
@@ -397,6 +398,52 @@ panic_idt64(x86_saved_state_t *rsp)
 }
 #endif
 
+
+
+/*
+ * Non-zero indicates latency assert is enabled and capped at valued
+ * absolute time units.
+ */
+   
+uint64_t interrupt_latency_cap = 0;
+boolean_t ilat_assert = FALSE;
+
+void
+interrupt_latency_tracker_setup(void) {
+       uint32_t ilat_cap_us;
+       if (PE_parse_boot_argn("interrupt_latency_cap_us", &ilat_cap_us, sizeof(ilat_cap_us))) {
+               interrupt_latency_cap = ilat_cap_us * NSEC_PER_USEC;
+               nanoseconds_to_absolutetime(interrupt_latency_cap, &interrupt_latency_cap);
+       } else {
+               interrupt_latency_cap = LockTimeOut;
+       }
+       PE_parse_boot_argn("-interrupt_latency_assert_enable", &ilat_assert, sizeof(ilat_assert));
+}
+
+void interrupt_reset_latency_stats(void) {
+       uint32_t i;
+       for (i = 0; i < real_ncpus; i++) {
+               cpu_data_ptr[i]->cpu_max_observed_int_latency =
+                   cpu_data_ptr[i]->cpu_max_observed_int_latency_vector = 0;
+       }
+}
+
+void interrupt_populate_latency_stats(char *buf, unsigned bufsize) {
+       uint32_t i, tcpu = ~0;
+       uint64_t cur_max = 0;
+
+       for (i = 0; i < real_ncpus; i++) {
+               if (cur_max < cpu_data_ptr[i]->cpu_max_observed_int_latency) {
+                       cur_max = cpu_data_ptr[i]->cpu_max_observed_int_latency;
+                       tcpu = i;
+               }
+       }
+
+       if (tcpu < real_ncpus)
+               snprintf(buf, bufsize, "0x%x 0x%x 0x%llx", tcpu, cpu_data_ptr[tcpu]->cpu_max_observed_int_latency_vector, cpu_data_ptr[tcpu]->cpu_max_observed_int_latency);
+}
+   
+
 extern void    PE_incoming_interrupt(int interrupt);
 
 /*
@@ -411,9 +458,9 @@ interrupt(x86_saved_state_t *state)
        uint64_t        rsp;
        int             interrupt_num;
        boolean_t       user_mode = FALSE;
+       int             cnum = cpu_number();
 
-
-        if (is_saved_state64(state) == TRUE) {
+       if (is_saved_state64(state) == TRUE) {
                x86_saved_state64_t     *state64;
 
                state64 = saved_state64(state);
@@ -443,18 +490,34 @@ interrupt(x86_saved_state_t *state)
         * Handle local APIC interrupts
         * else call platform expert for devices.
         */ 
-       if (!lapic_interrupt(interrupt_num, state))
+       if (!lapic_interrupt(interrupt_num, state)) {
                PE_incoming_interrupt(interrupt_num);
+       }
 
        KERNEL_DEBUG_CONSTANT(
                MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_END,
                0, 0, 0, 0, 0);
 
+       if (cpu_data_ptr[cnum]->cpu_nested_istack) {
+               cpu_data_ptr[cnum]->cpu_nested_istack_events++;
+       }
+       else {
+               uint64_t int_latency = mach_absolute_time() - cpu_data_ptr[cnum]->cpu_int_event_time;
+               if (ilat_assert && (int_latency > interrupt_latency_cap) && !machine_timeout_suspended()) {
+                       panic("Interrupt vector 0x%x exceeded interrupt latency threshold, 0x%llx absolute time delta, prior signals: 0x%x", interrupt_num, int_latency, cpu_data_ptr[cnum]->cpu_prior_signals);
+               }
+               if (int_latency > cpu_data_ptr[cnum]->cpu_max_observed_int_latency) {
+                       cpu_data_ptr[cnum]->cpu_max_observed_int_latency = int_latency;
+                       cpu_data_ptr[cnum]->cpu_max_observed_int_latency_vector = interrupt_num;
+               }
+       }
+
+
        /*
         * Having serviced the interrupt first, look at the interrupted stack depth.
         */
        if (!user_mode) {
-               uint64_t depth = current_cpu_datap()->cpu_kernel_stack
+               uint64_t depth = cpu_data_ptr[cnum]->cpu_kernel_stack
                                 + sizeof(struct x86_kernel_state)
                                 + sizeof(struct i386_exception_link *)
                                 - rsp;
@@ -516,8 +579,9 @@ kernel_trap(
        thread = current_thread();
 
 #ifdef __i386__
-       if (is_saved_state64(state))
-               panic("kernel_trap(%p) with 64-bit state", state);
+       if (is_saved_state64(state)) {
+               panic_64(state, 0, "Kernel trap with 64-bit state", FALSE);
+       }
        saved_state = saved_state32(state);
        vaddr = (user_addr_t)saved_state->cr2;
        type  = saved_state->trapno;
index a5488bbc55e0360629b768c0f258ef71294437c8..9e794797b933b265125505e3b14bfa35e37db6db 100644 (file)
@@ -143,6 +143,7 @@ tsc_init(void)
        busFreq = EFI_FSB_frequency();
 
        switch (cpuid_cpufamily()) {
+       case CPUFAMILY_INTEL_SANDYBRIDGE:
        case CPUFAMILY_INTEL_WESTMERE:
        case CPUFAMILY_INTEL_NEHALEM: {
                uint64_t cpu_mhz;
index 9f72fb57e6f5268daff6503e46f9a10289acb180..5536038e71d66d89ca2e8992ea33353eac1cffa2 100644 (file)
@@ -1047,7 +1047,7 @@ kdp_copyin(pmap_t p, uint64_t uaddr, void *dest, size_t size) {
 
        while (rem) {
                ppnum_t upn = pmap_find_phys(p, uaddr);
-               uint64_t phys_src = (upn << PAGE_SHIFT) | (uaddr & PAGE_MASK);
+               uint64_t phys_src = ptoa_64(upn) | (uaddr & PAGE_MASK);
                uint64_t phys_dest = kvtophys((vm_offset_t)kvaddr);
                uint64_t src_rem = PAGE_SIZE - (phys_src & PAGE_MASK);
                uint64_t dst_rem = PAGE_SIZE - (phys_dest & PAGE_MASK);
@@ -1085,12 +1085,12 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_fl
        boolean_t save_loadinfo_p = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0);
 
        queue_iterate(&tasks, task, task_t, tasks) {
-               int task_pid = pid_from_task(task);
-               boolean_t task64 = task_has_64BitAddr(task);
-
                if ((task == NULL) || (ml_nofault_copy((vm_offset_t) task, (vm_offset_t) &ctask, sizeof(struct task)) != sizeof(struct task)))
                        goto error_exit;
 
+               int task_pid = pid_from_task(task);
+               boolean_t task64 = task_has_64BitAddr(task);
+
                /* Trace everything, unless a process was specified */
                if ((pid == -1) || (pid == task_pid)) {
                        task_snapshot_t task_snap;
index 0a54c5f2e22ee8f068eae6bea32a685ff6391093..3b298fe6e18392e212796681e1fed945eaf1bd9e 100644 (file)
@@ -1541,8 +1541,10 @@ kdp_get_xnu_version(char *versionbuf)
        char *vptr;
 
        strlcpy(vstr, "custom", 10);
-       if (strlcpy(versionbuf, version, 95) < 95) {
-               versionpos = strnstr(versionbuf, "xnu-", 90);
+
+       if (kdp_machine_vm_read((mach_vm_address_t)(uintptr_t)version, versionbuf, 128)) {
+               versionbuf[127] = '\0';
+               versionpos = strnstr(versionbuf, "xnu-", 115);
                if (versionpos) {
                        strncpy(vstr, versionpos, sizeof(vstr));
                        vstr[sizeof(vstr)-1] = '\0';
@@ -1692,7 +1694,12 @@ kdp_panic_dump(void)
        }
                
        printf("Entering system dump routine\n");
-  
+
+       if (!kdp_en_recv_pkt || !kdp_en_send_pkt) {
+               printf("Error: No transport device registered for kernel crashdump\n");
+               return;
+       }
+
        if (!panicd_specified) {
                printf("A dump server was not specified in the boot-args, terminating kernel core dump.\n");
                goto panic_dump_exit;
index 48ec75e5246712983223b079a9c4d8b2462ec8a1..de66f9749753282e642a513b7fd9ee3c6eb83247 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -50,7 +50,11 @@ extern int setTimerReq(void);
 extern void etimer_intr(int inuser, uint64_t iaddr);
 
 extern void etimer_set_deadline(uint64_t deadline);
+#if defined(i386) || defined(x86_64)
+extern uint64_t setPop(uint64_t time);
+#else
 extern int setPop(uint64_t time);
+#endif
 
 extern void etimer_resync_deadlines(void);
 
index 2ed0f12e7371067fe5a52ae2f96ca82867346afb..dddaf47d77abf85a62f357695bc95ce278e75f35 100644 (file)
@@ -962,7 +962,7 @@ clear_wait_internal(
        wait_result_t   wresult)
 {
        wait_queue_t    wq = thread->wait_queue;
-       int                             i = LockTimeOut;
+       uint32_t        i = LockTimeOut;
 
        do {
                if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT))
@@ -986,7 +986,7 @@ clear_wait_internal(
                }
 
                return (thread_go(thread, wresult));
-       } while (--i > 0);
+       } while ((--i > 0) || machine_timeout_suspended());
 
        panic("clear_wait_internal: deadlock: thread=%p, wq=%p, cpu=%d\n",
                  thread, wq, cpu_number());
index 030e82d282bc7867f9d0ddd7650f1ba784667b70..386bd093c2f9cbbc180e38088ead087e3a51ccd6 100644 (file)
@@ -43,8 +43,9 @@
 
 #include <kern/lock.h>
 #include <kern/queue.h>
-#include <machine/cpu_number.h>
 
+#include <machine/cpu_number.h>
+#include <machine/machine_routines.h> /* machine_timeout_suspended() */
 /*
  *     wait_queue_t
  *     This is the definition of the common event wait queue
@@ -165,11 +166,25 @@ typedef struct _wait_queue_link {
  */
 
 static inline void wait_queue_lock(wait_queue_t wq) {
-       if (!hw_lock_to(&(wq)->wq_interlock, hwLockTimeOut * 2))
-               panic("wait queue deadlock - wq=%p, cpu=%d\n", wq, cpu_number(
-));
+       if (hw_lock_to(&(wq)->wq_interlock, hwLockTimeOut * 2) == 0) {
+               boolean_t wql_acquired = FALSE;
+               while (machine_timeout_suspended()) {
+#if    defined(__i386__) || defined(__x86_64__)
+/*
+ * i386/x86_64 return with preemption disabled on a timeout for
+ * diagnostic purposes.
+ */
+                       mp_enable_preemption();
+#endif
+                       if ((wql_acquired = hw_lock_to(&(wq)->wq_interlock, hwLockTimeOut * 2)))
+                               break;
+               }
+
+               if (wql_acquired == FALSE)
+                       panic("wait queue deadlock - wq=%p, cpu=%d\n", wq, cpu_number());
+       }
 }
+
 static inline void wait_queue_unlock(wait_queue_t wq) {
        assert(wait_queue_held(wq));
        hw_lock_unlock(&(wq)->wq_interlock);
index 927d12f635943283681e6bac456ead15d8d0aa10..bcac16be31ee3bacd10a19fb1080f54d48bcd768 100644 (file)
@@ -264,6 +264,53 @@ _STRUCT_X86_FLOAT_STATE32
        char                    __fpu_rsrv4[14*16];     /* reserved */
        int                     __fpu_reserved1;
 };
+
+#define        _STRUCT_X86_AVX_STATE32 struct __darwin_i386_avx_state
+_STRUCT_X86_AVX_STATE32
+{
+       int                     __fpu_reserved[2];
+       _STRUCT_FP_CONTROL      __fpu_fcw;              /* x87 FPU control word */
+       _STRUCT_FP_STATUS       __fpu_fsw;              /* x87 FPU status word */
+       __uint8_t               __fpu_ftw;              /* x87 FPU tag word */
+       __uint8_t               __fpu_rsrv1;            /* reserved */ 
+       __uint16_t              __fpu_fop;              /* x87 FPU Opcode */
+       __uint32_t              __fpu_ip;               /* x87 FPU Instruction Pointer offset */
+       __uint16_t              __fpu_cs;               /* x87 FPU Instruction Pointer Selector */
+       __uint16_t              __fpu_rsrv2;            /* reserved */
+       __uint32_t              __fpu_dp;               /* x87 FPU Instruction Operand(Data) Pointer offset */
+       __uint16_t              __fpu_ds;               /* x87 FPU Instruction Operand(Data) Pointer Selector */
+       __uint16_t              __fpu_rsrv3;            /* reserved */
+       __uint32_t              __fpu_mxcsr;            /* MXCSR Register state */
+       __uint32_t              __fpu_mxcsrmask;        /* MXCSR mask */
+       _STRUCT_MMST_REG        __fpu_stmm0;            /* ST0/MM0   */
+       _STRUCT_MMST_REG        __fpu_stmm1;            /* ST1/MM1  */
+       _STRUCT_MMST_REG        __fpu_stmm2;            /* ST2/MM2  */
+       _STRUCT_MMST_REG        __fpu_stmm3;            /* ST3/MM3  */
+       _STRUCT_MMST_REG        __fpu_stmm4;            /* ST4/MM4  */
+       _STRUCT_MMST_REG        __fpu_stmm5;            /* ST5/MM5  */
+       _STRUCT_MMST_REG        __fpu_stmm6;            /* ST6/MM6  */
+       _STRUCT_MMST_REG        __fpu_stmm7;            /* ST7/MM7  */
+       _STRUCT_XMM_REG         __fpu_xmm0;             /* XMM 0  */
+       _STRUCT_XMM_REG         __fpu_xmm1;             /* XMM 1  */
+       _STRUCT_XMM_REG         __fpu_xmm2;             /* XMM 2  */
+       _STRUCT_XMM_REG         __fpu_xmm3;             /* XMM 3  */
+       _STRUCT_XMM_REG         __fpu_xmm4;             /* XMM 4  */
+       _STRUCT_XMM_REG         __fpu_xmm5;             /* XMM 5  */
+       _STRUCT_XMM_REG         __fpu_xmm6;             /* XMM 6  */
+       _STRUCT_XMM_REG         __fpu_xmm7;             /* XMM 7  */
+       char                    __fpu_rsrv4[14*16];     /* reserved */
+       int                     __fpu_reserved1;
+       char                    __avx_reserved1[64];
+       _STRUCT_XMM_REG         __fpu_ymmh0;            /* YMMH 0  */
+       _STRUCT_XMM_REG         __fpu_ymmh1;            /* YMMH 1  */
+       _STRUCT_XMM_REG         __fpu_ymmh2;            /* YMMH 2  */
+       _STRUCT_XMM_REG         __fpu_ymmh3;            /* YMMH 3  */
+       _STRUCT_XMM_REG         __fpu_ymmh4;            /* YMMH 4  */
+       _STRUCT_XMM_REG         __fpu_ymmh5;            /* YMMH 5  */
+       _STRUCT_XMM_REG         __fpu_ymmh6;            /* YMMH 6  */
+       _STRUCT_XMM_REG         __fpu_ymmh7;            /* YMMH 7  */
+};
+
 #else /* !__DARWIN_UNIX03 */
 #define        _STRUCT_X86_FLOAT_STATE32       struct i386_float_state
 _STRUCT_X86_FLOAT_STATE32
@@ -301,6 +348,53 @@ _STRUCT_X86_FLOAT_STATE32
        char                    fpu_rsrv4[14*16];       /* reserved */
        int                     fpu_reserved1;
 };
+
+#define        _STRUCT_X86_AVX_STATE32 struct i386_avx_state
+_STRUCT_X86_AVX_STATE32
+{
+       int                     fpu_reserved[2];
+       _STRUCT_FP_CONTROL      fpu_fcw;                /* x87 FPU control word */
+       _STRUCT_FP_STATUS       fpu_fsw;                /* x87 FPU status word */
+       __uint8_t               fpu_ftw;                /* x87 FPU tag word */
+       __uint8_t               fpu_rsrv1;              /* reserved */ 
+       __uint16_t              fpu_fop;                /* x87 FPU Opcode */
+       __uint32_t              fpu_ip;                 /* x87 FPU Instruction Pointer offset */
+       __uint16_t              fpu_cs;                 /* x87 FPU Instruction Pointer Selector */
+       __uint16_t              fpu_rsrv2;              /* reserved */
+       __uint32_t              fpu_dp;                 /* x87 FPU Instruction Operand(Data) Pointer offset */
+       __uint16_t              fpu_ds;                 /* x87 FPU Instruction Operand(Data) Pointer Selector */
+       __uint16_t              fpu_rsrv3;              /* reserved */
+       __uint32_t              fpu_mxcsr;              /* MXCSR Register state */
+       __uint32_t              fpu_mxcsrmask;          /* MXCSR mask */
+       _STRUCT_MMST_REG        fpu_stmm0;              /* ST0/MM0   */
+       _STRUCT_MMST_REG        fpu_stmm1;              /* ST1/MM1  */
+       _STRUCT_MMST_REG        fpu_stmm2;              /* ST2/MM2  */
+       _STRUCT_MMST_REG        fpu_stmm3;              /* ST3/MM3  */
+       _STRUCT_MMST_REG        fpu_stmm4;              /* ST4/MM4  */
+       _STRUCT_MMST_REG        fpu_stmm5;              /* ST5/MM5  */
+       _STRUCT_MMST_REG        fpu_stmm6;              /* ST6/MM6  */
+       _STRUCT_MMST_REG        fpu_stmm7;              /* ST7/MM7  */
+       _STRUCT_XMM_REG         fpu_xmm0;               /* XMM 0  */
+       _STRUCT_XMM_REG         fpu_xmm1;               /* XMM 1  */
+       _STRUCT_XMM_REG         fpu_xmm2;               /* XMM 2  */
+       _STRUCT_XMM_REG         fpu_xmm3;               /* XMM 3  */
+       _STRUCT_XMM_REG         fpu_xmm4;               /* XMM 4  */
+       _STRUCT_XMM_REG         fpu_xmm5;               /* XMM 5  */
+       _STRUCT_XMM_REG         fpu_xmm6;               /* XMM 6  */
+       _STRUCT_XMM_REG         fpu_xmm7;               /* XMM 7  */
+       char                    fpu_rsrv4[14*16];       /* reserved */
+       int                     fpu_reserved1;
+       char                    __avx_reserved1[64];
+       _STRUCT_XMM_REG         __fpu_ymmh0;            /* YMMH 0  */
+       _STRUCT_XMM_REG         __fpu_ymmh1;            /* YMMH 1  */
+       _STRUCT_XMM_REG         __fpu_ymmh2;            /* YMMH 2  */
+       _STRUCT_XMM_REG         __fpu_ymmh3;            /* YMMH 3  */
+       _STRUCT_XMM_REG         __fpu_ymmh4;            /* YMMH 4  */
+       _STRUCT_XMM_REG         __fpu_ymmh5;            /* YMMH 5  */
+       _STRUCT_XMM_REG         __fpu_ymmh6;            /* YMMH 6  */
+       _STRUCT_XMM_REG         __fpu_ymmh7;            /* YMMH 7  */
+};
+
 #endif /* !__DARWIN_UNIX03 */
 
 #if __DARWIN_UNIX03
@@ -459,6 +553,75 @@ _STRUCT_X86_FLOAT_STATE64
        char                    __fpu_rsrv4[6*16];      /* reserved */
        int                     __fpu_reserved1;
 };
+
+#define        _STRUCT_X86_AVX_STATE64 struct __darwin_x86_avx_state64
+_STRUCT_X86_AVX_STATE64
+{
+       int                     __fpu_reserved[2];
+       _STRUCT_FP_CONTROL      __fpu_fcw;              /* x87 FPU control word */
+       _STRUCT_FP_STATUS       __fpu_fsw;              /* x87 FPU status word */
+       __uint8_t               __fpu_ftw;              /* x87 FPU tag word */
+       __uint8_t               __fpu_rsrv1;            /* reserved */ 
+       __uint16_t              __fpu_fop;              /* x87 FPU Opcode */
+
+       /* x87 FPU Instruction Pointer */
+       __uint32_t              __fpu_ip;               /* offset */
+       __uint16_t              __fpu_cs;               /* Selector */
+
+       __uint16_t              __fpu_rsrv2;            /* reserved */
+
+       /* x87 FPU Instruction Operand(Data) Pointer */
+       __uint32_t              __fpu_dp;               /* offset */
+       __uint16_t              __fpu_ds;               /* Selector */
+
+       __uint16_t              __fpu_rsrv3;            /* reserved */
+       __uint32_t              __fpu_mxcsr;            /* MXCSR Register state */
+       __uint32_t              __fpu_mxcsrmask;        /* MXCSR mask */
+       _STRUCT_MMST_REG        __fpu_stmm0;            /* ST0/MM0   */
+       _STRUCT_MMST_REG        __fpu_stmm1;            /* ST1/MM1  */
+       _STRUCT_MMST_REG        __fpu_stmm2;            /* ST2/MM2  */
+       _STRUCT_MMST_REG        __fpu_stmm3;            /* ST3/MM3  */
+       _STRUCT_MMST_REG        __fpu_stmm4;            /* ST4/MM4  */
+       _STRUCT_MMST_REG        __fpu_stmm5;            /* ST5/MM5  */
+       _STRUCT_MMST_REG        __fpu_stmm6;            /* ST6/MM6  */
+       _STRUCT_MMST_REG        __fpu_stmm7;            /* ST7/MM7  */
+       _STRUCT_XMM_REG         __fpu_xmm0;             /* XMM 0  */
+       _STRUCT_XMM_REG         __fpu_xmm1;             /* XMM 1  */
+       _STRUCT_XMM_REG         __fpu_xmm2;             /* XMM 2  */
+       _STRUCT_XMM_REG         __fpu_xmm3;             /* XMM 3  */
+       _STRUCT_XMM_REG         __fpu_xmm4;             /* XMM 4  */
+       _STRUCT_XMM_REG         __fpu_xmm5;             /* XMM 5  */
+       _STRUCT_XMM_REG         __fpu_xmm6;             /* XMM 6  */
+       _STRUCT_XMM_REG         __fpu_xmm7;             /* XMM 7  */
+       _STRUCT_XMM_REG         __fpu_xmm8;             /* XMM 8  */
+       _STRUCT_XMM_REG         __fpu_xmm9;             /* XMM 9  */
+       _STRUCT_XMM_REG         __fpu_xmm10;            /* XMM 10  */
+       _STRUCT_XMM_REG         __fpu_xmm11;            /* XMM 11 */
+       _STRUCT_XMM_REG         __fpu_xmm12;            /* XMM 12  */
+       _STRUCT_XMM_REG         __fpu_xmm13;            /* XMM 13  */
+       _STRUCT_XMM_REG         __fpu_xmm14;            /* XMM 14  */
+       _STRUCT_XMM_REG         __fpu_xmm15;            /* XMM 15  */
+       char                    __fpu_rsrv4[6*16];      /* reserved */
+       int                     __fpu_reserved1;
+       char                    __avx_reserved1[64];
+       _STRUCT_XMM_REG         __fpu_ymmh0;            /* YMMH 0  */
+       _STRUCT_XMM_REG         __fpu_ymmh1;            /* YMMH 1  */
+       _STRUCT_XMM_REG         __fpu_ymmh2;            /* YMMH 2  */
+       _STRUCT_XMM_REG         __fpu_ymmh3;            /* YMMH 3  */
+       _STRUCT_XMM_REG         __fpu_ymmh4;            /* YMMH 4  */
+       _STRUCT_XMM_REG         __fpu_ymmh5;            /* YMMH 5  */
+       _STRUCT_XMM_REG         __fpu_ymmh6;            /* YMMH 6  */
+       _STRUCT_XMM_REG         __fpu_ymmh7;            /* YMMH 7  */
+       _STRUCT_XMM_REG         __fpu_ymmh8;            /* YMMH 8  */
+       _STRUCT_XMM_REG         __fpu_ymmh9;            /* YMMH 9  */
+       _STRUCT_XMM_REG         __fpu_ymmh10;           /* YMMH 10  */
+       _STRUCT_XMM_REG         __fpu_ymmh11;           /* YMMH 11  */
+       _STRUCT_XMM_REG         __fpu_ymmh12;           /* YMMH 12  */
+       _STRUCT_XMM_REG         __fpu_ymmh13;           /* YMMH 13  */
+       _STRUCT_XMM_REG         __fpu_ymmh14;           /* YMMH 14  */
+       _STRUCT_XMM_REG         __fpu_ymmh15;           /* YMMH 15  */
+};
+
 #else /* !__DARWIN_UNIX03 */
 #define        _STRUCT_X86_FLOAT_STATE64       struct x86_float_state64
 _STRUCT_X86_FLOAT_STATE64
@@ -510,6 +673,75 @@ _STRUCT_X86_FLOAT_STATE64
        char                    fpu_rsrv4[6*16];        /* reserved */
        int                     fpu_reserved1;
 };
+
+#define        _STRUCT_X86_AVX_STATE64 struct x86_avx_state64
+_STRUCT_X86_AVX_STATE64
+{
+       int                     fpu_reserved[2];
+       _STRUCT_FP_CONTROL      fpu_fcw;                /* x87 FPU control word */
+       _STRUCT_FP_STATUS       fpu_fsw;                /* x87 FPU status word */
+       __uint8_t               fpu_ftw;                /* x87 FPU tag word */
+       __uint8_t               fpu_rsrv1;              /* reserved */ 
+       __uint16_t              fpu_fop;                /* x87 FPU Opcode */
+
+       /* x87 FPU Instruction Pointer */
+       __uint32_t              fpu_ip;                 /* offset */
+       __uint16_t              fpu_cs;                 /* Selector */
+
+       __uint16_t              fpu_rsrv2;              /* reserved */
+
+       /* x87 FPU Instruction Operand(Data) Pointer */
+       __uint32_t              fpu_dp;                 /* offset */
+       __uint16_t              fpu_ds;                 /* Selector */
+
+       __uint16_t              fpu_rsrv3;              /* reserved */
+       __uint32_t              fpu_mxcsr;              /* MXCSR Register state */
+       __uint32_t              fpu_mxcsrmask;          /* MXCSR mask */
+       _STRUCT_MMST_REG        fpu_stmm0;              /* ST0/MM0   */
+       _STRUCT_MMST_REG        fpu_stmm1;              /* ST1/MM1  */
+       _STRUCT_MMST_REG        fpu_stmm2;              /* ST2/MM2  */
+       _STRUCT_MMST_REG        fpu_stmm3;              /* ST3/MM3  */
+       _STRUCT_MMST_REG        fpu_stmm4;              /* ST4/MM4  */
+       _STRUCT_MMST_REG        fpu_stmm5;              /* ST5/MM5  */
+       _STRUCT_MMST_REG        fpu_stmm6;              /* ST6/MM6  */
+       _STRUCT_MMST_REG        fpu_stmm7;              /* ST7/MM7  */
+       _STRUCT_XMM_REG         fpu_xmm0;               /* XMM 0  */
+       _STRUCT_XMM_REG         fpu_xmm1;               /* XMM 1  */
+       _STRUCT_XMM_REG         fpu_xmm2;               /* XMM 2  */
+       _STRUCT_XMM_REG         fpu_xmm3;               /* XMM 3  */
+       _STRUCT_XMM_REG         fpu_xmm4;               /* XMM 4  */
+       _STRUCT_XMM_REG         fpu_xmm5;               /* XMM 5  */
+       _STRUCT_XMM_REG         fpu_xmm6;               /* XMM 6  */
+       _STRUCT_XMM_REG         fpu_xmm7;               /* XMM 7  */
+       _STRUCT_XMM_REG         fpu_xmm8;               /* XMM 8  */
+       _STRUCT_XMM_REG         fpu_xmm9;               /* XMM 9  */
+       _STRUCT_XMM_REG         fpu_xmm10;              /* XMM 10  */
+       _STRUCT_XMM_REG         fpu_xmm11;              /* XMM 11 */
+       _STRUCT_XMM_REG         fpu_xmm12;              /* XMM 12  */
+       _STRUCT_XMM_REG         fpu_xmm13;              /* XMM 13  */
+       _STRUCT_XMM_REG         fpu_xmm14;              /* XMM 14  */
+       _STRUCT_XMM_REG         fpu_xmm15;              /* XMM 15  */
+       char                    fpu_rsrv4[6*16];        /* reserved */
+       int                     fpu_reserved1;
+       char                    __avx_reserved1[64];
+       _STRUCT_XMM_REG         __fpu_ymmh0;            /* YMMH 0  */
+       _STRUCT_XMM_REG         __fpu_ymmh1;            /* YMMH 1  */
+       _STRUCT_XMM_REG         __fpu_ymmh2;            /* YMMH 2  */
+       _STRUCT_XMM_REG         __fpu_ymmh3;            /* YMMH 3  */
+       _STRUCT_XMM_REG         __fpu_ymmh4;            /* YMMH 4  */
+       _STRUCT_XMM_REG         __fpu_ymmh5;            /* YMMH 5  */
+       _STRUCT_XMM_REG         __fpu_ymmh6;            /* YMMH 6  */
+       _STRUCT_XMM_REG         __fpu_ymmh7;            /* YMMH 7  */
+       _STRUCT_XMM_REG         __fpu_ymmh8;            /* YMMH 8  */
+       _STRUCT_XMM_REG         __fpu_ymmh9;            /* YMMH 9  */
+       _STRUCT_XMM_REG         __fpu_ymmh10;           /* YMMH 10  */
+       _STRUCT_XMM_REG         __fpu_ymmh11;           /* YMMH 11  */
+       _STRUCT_XMM_REG         __fpu_ymmh12;           /* YMMH 12  */
+       _STRUCT_XMM_REG         __fpu_ymmh13;           /* YMMH 13  */
+       _STRUCT_XMM_REG         __fpu_ymmh14;           /* YMMH 14  */
+       _STRUCT_XMM_REG         __fpu_ymmh15;           /* YMMH 15  */
+};
+
 #endif /* !__DARWIN_UNIX03 */
 
 #if __DARWIN_UNIX03
index 47a26a99d2190a36e43a980cc759f476c9067974..92d504f3f94aa7267326e6241edb4c4ade4274ed 100644 (file)
 #ifndef        _I386_FP_SAVE_H_
 #define        _I386_FP_SAVE_H_
 
+#ifdef  MACH_KERNEL_PRIVATE
 
-/* note when allocating this data structure, it must be 16 byte aligned. */
-struct x86_fx_save {
-        unsigned short  fx_control;     /* control */
-        unsigned short  fx_status;      /* status */
-        unsigned char          fx_tag;         /* register tags */
-        unsigned char  fx_bbz1;        /* better be zero when calling fxrtstor */
-        unsigned short  fx_opcode;
-        unsigned int    fx_eip;         /* eip  instruction */
-        unsigned short  fx_cs;          /* cs instruction */
-        unsigned short  fx_bbz2;       /* better be zero when calling fxrtstor */ 
-        unsigned int    fx_dp;          /* data address */
-        unsigned short  fx_ds;          /* data segment */
-        unsigned short  fx_bbz3;       /* better be zero when calling fxrtstor */
-        unsigned int   fx_MXCSR;
-        unsigned int   fx_MXCSR_MASK;
-        unsigned short  fx_reg_word[8][8];      /* STx/MMx registers */
-        unsigned short  fx_XMM_reg[8][16];     /* XMM0-XMM15 on 64 bit processors */
+
+struct         x86_fx_thread_state {
+       unsigned short  fx_control;     /* control */
+       unsigned short  fx_status;      /* status */
+       unsigned char   fx_tag;         /* register tags */
+       unsigned char   fx_bbz1;        /* better be zero when calling fxrtstor */
+       unsigned short  fx_opcode;
+       unsigned int    fx_eip;         /* eip  instruction */
+       unsigned short  fx_cs;          /* cs instruction */
+       unsigned short  fx_bbz2;        /* better be zero when calling fxrtstor */ 
+       unsigned int    fx_dp;          /* data address */
+       unsigned short  fx_ds;          /* data segment */
+       unsigned short  fx_bbz3;        /* better be zero when calling fxrtstor */
+       unsigned int    fx_MXCSR;
+       unsigned int    fx_MXCSR_MASK;
+       unsigned short  fx_reg_word[8][8];      /* STx/MMx registers */
+       unsigned short  fx_XMM_reg[8][16];      /* XMM0-XMM15 on 64 bit processors */
                                                 /* XMM0-XMM7  on 32 bit processors... unused storage reserved */
-        unsigned char  fx_reserved[16*6];      /* reserved by intel for future expansion */
-};
 
+       unsigned char   fx_reserved[16*5];      /* reserved by intel for future
+                                                * expansion */
+       unsigned int    fp_valid;
+       unsigned int    fp_save_layout;
+       unsigned char   fx_pad[8];
+}__attribute__ ((packed));
+
+struct x86_avx_thread_state {
+       unsigned short  fx_control;     /* control */
+       unsigned short  fx_status;      /* status */
+       unsigned char   fx_tag;         /* register tags */
+       unsigned char   fx_bbz1;        /* reserved zero */
+       unsigned short  fx_opcode;
+       unsigned int    fx_eip;         /* eip  instruction */
+       unsigned short  fx_cs;          /* cs instruction */
+       unsigned short  fx_bbz2;        /* reserved zero */
+       unsigned int    fx_dp;          /* data address */
+       unsigned short  fx_ds;          /* data segment */
+       unsigned short  fx_bbz3;        /* reserved zero */
+       unsigned int    fx_MXCSR;
+       unsigned int    fx_MXCSR_MASK;
+       unsigned short  fx_reg_word[8][8];      /* STx/MMx registers */
+       unsigned short  fx_XMM_reg[8][16];      /* XMM0-XMM15 on 64 bit processors */
+                                                /* XMM0-XMM7  on 32 bit processors... unused storage reserved */
+       unsigned char   fx_reserved[16*5];      /* reserved */
+       unsigned int    fp_valid;
+       unsigned int    fp_save_layout;
+       unsigned char   fx_pad[8];
+
+       struct  xsave_header {                  /* Offset 512, xsave header */
+               uint64_t xsbv;
+               char    xhrsvd[56];
+       }_xh;
+
+       unsigned int    x_YMMH_reg[4][16];      /* Offset 576, high YMMs*/
+}__attribute__ ((packed));
 
+#endif /* MACH_KERNEL_PRIVATE */
 /*
  * Control register
  */
index 558d1c071303673b03a5b9b73e6b49fd980699e7..501fc8df057936f11cfc694440dcbca80be85ebd 100644 (file)
 #define x86_DEBUG_STATE64              11
 #define x86_DEBUG_STATE                        12
 #define THREAD_STATE_NONE              13
+/* 15 and 16 are used for the internal x86_SAVED_STATE flavours */
+#define x86_AVX_STATE32                        16
+#define x86_AVX_STATE64                        17
+
 
 /*
  * Largest state on this machine:
          (x == x86_FLOAT_STATE)        || \
          (x == x86_EXCEPTION_STATE)    || \
          (x == x86_DEBUG_STATE)        || \
+         (x == x86_AVX_STATE32)        || \
+         (x == x86_AVX_STATE64)        || \
          (x == THREAD_STATE_NONE))
 
 struct x86_state_hdr {
@@ -175,6 +181,10 @@ typedef _STRUCT_X86_FLOAT_STATE32 x86_float_state32_t;
 #define x86_FLOAT_STATE32_COUNT ((mach_msg_type_number_t) \
                (sizeof(x86_float_state32_t)/sizeof(unsigned int)))
 
+typedef _STRUCT_X86_AVX_STATE32 x86_avx_state32_t;
+#define x86_AVX_STATE32_COUNT ((mach_msg_type_number_t) \
+               (sizeof(x86_avx_state32_t)/sizeof(unsigned int)))
+
 /*
  * to be deprecated in the future
  */
@@ -201,7 +211,11 @@ typedef _STRUCT_X86_THREAD_STATE64 x86_thread_state64_t;
 typedef _STRUCT_X86_FLOAT_STATE64 x86_float_state64_t;
 #define x86_FLOAT_STATE64_COUNT ((mach_msg_type_number_t) \
                (sizeof(x86_float_state64_t)/sizeof(unsigned int)))
-               
+
+typedef _STRUCT_X86_AVX_STATE64 x86_avx_state64_t;
+#define x86_AVX_STATE64_COUNT ((mach_msg_type_number_t) \
+               (sizeof(x86_avx_state64_t)/sizeof(unsigned int)))
+
 typedef _STRUCT_X86_EXCEPTION_STATE64 x86_exception_state64_t;
 #define x86_EXCEPTION_STATE64_COUNT    ((mach_msg_type_number_t) \
     ( sizeof (x86_exception_state64_t) / sizeof (int) ))
index 8dbb71f66ee0509da3b4f77db15b1596a03ec873..0cd136c69f63c276601b5d7ae8c9ca1b4b094c21 100644 (file)
@@ -333,7 +333,7 @@ routine vm_remap(
        inout   target_address  : mach_vm_address_t;
                size            : mach_vm_size_t;
                mask            : mach_vm_offset_t;
-               anywhere        : boolean_t;
+               flags           : int;
                src_task        : vm_map_t;
                src_address     : mach_vm_address_t;
                copy            : boolean_t;
index 439c0145abc2c1b1dcc18c927239121d4ed0af86..b59e795ef6b1e1719f6c35dd76e07c22fdb260c8 100644 (file)
@@ -293,7 +293,7 @@ routine vm_remap(
        inout   target_address  : vm_address_t;
                size            : vm_size_t;
                mask            : vm_address_t;
-               anywhere        : boolean_t;
+               flags           : int;
                src_task        : vm_map_t;
                src_address     : vm_address_t;
                copy            : boolean_t;
index 36a79fdd16aa7b314715ea1da31d37e64fe62446..89ca4351e2f43ce5e515915880a032deec1063a3 100644 (file)
@@ -252,7 +252,9 @@ typedef struct pmap_statistics      *pmap_statistics_t;
 #define VM_FLAGS_SUBMAP                0x0800  /* mapping a VM submap */
 #define VM_FLAGS_ALREADY       0x1000  /* OK if same mapping already exists */
 #define VM_FLAGS_BEYOND_MAX    0x2000  /* map beyond the map's max offset */
+#endif /* KERNEL_PRIVATE */
 #define VM_FLAGS_OVERWRITE     0x4000  /* delete any existing mappings first */
+#ifdef KERNEL_PRIVATE
 #define VM_FLAGS_NO_PMAP_CHECK 0x8000  /* do not check that pmap is empty */
 #endif /* KERNEL_PRIVATE */
 
@@ -283,9 +285,13 @@ typedef struct pmap_statistics     *pmap_statistics_t;
                                 VM_FLAGS_ANYWHERE |            \
                                 VM_FLAGS_PURGABLE |            \
                                 VM_FLAGS_NO_CACHE |            \
+                                VM_FLAGS_OVERWRITE |           \
                                 VM_FLAGS_SUPERPAGE_MASK |      \
                                 VM_FLAGS_ALIAS_MASK)
 #define VM_FLAGS_USER_MAP      VM_FLAGS_USER_ALLOCATE
+#define VM_FLAGS_USER_REMAP    (VM_FLAGS_FIXED |    \
+                                VM_FLAGS_ANYWHERE | \
+                                VM_FLAGS_OVERWRITE)
 
 #define VM_MEMORY_MALLOC 1
 #define VM_MEMORY_MALLOC_SMALL 2
index 25607c1ccd4777719e8ca2eb0fa00d0dcc26de5d..e30d357ce5dc5e1f71dc1c41054e5c9fc1c632e8 100644 (file)
@@ -52,7 +52,7 @@ extern vm_offset_t    virtual_avail;
  * Note, this will onl
  */
 vm_offset_t
-io_map(vm_offset_t phys_addr, vm_size_t size, unsigned int flags)
+io_map(vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags)
 {
        vm_offset_t     start;
        vm_size_t       i;
@@ -75,7 +75,7 @@ io_map(vm_offset_t phys_addr, vm_size_t size, unsigned int flags)
                        mflags,                                 /* Map with requested cache mode */
                        (size >> 12), VM_PROT_READ|VM_PROT_WRITE);
 
-               return (start + (phys_addr & PAGE_MASK));       /* Pass back the physical address */
+               return (start + (phys_addr & PAGE_MASK));       /* Pass back the virtual address */
        
        } else {
        
@@ -107,7 +107,7 @@ io_map(vm_offset_t phys_addr, vm_size_t size, unsigned int flags)
  * Allocate and map memory for devices before the VM system comes alive.
  */
 
-vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size, unsigned int flags)
+vm_offset_t io_map_spec(vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags)
 {
        vm_offset_t     start;
        unsigned int    mflags;
index eb0adf48a9995463ffcfd2f2cc3605dfde530828..8fceaaf86fb622ba6f6df021614b7d793c45747d 100644 (file)
 #define        _PPC_IO_MAP_ENTRIES_H_
 
 extern vm_offset_t     io_map(
-                               vm_offset_t             phys_addr,
+                               vm_map_offset_t         phys_addr,
                                vm_size_t               size,
                                unsigned int            flags);
-extern vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size, unsigned int flags);
+extern vm_offset_t io_map_spec(vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags);
 
 #endif /* _PPC_IO_MAP_ENTRIES_H_ */
 
index bc79f0c7c70fe66ec96180e9ee366a5a23b23456..d4fb8e1ca5493cdcfd4479693c62a955808273b0 100644 (file)
@@ -841,3 +841,7 @@ vm_offset_t ml_stack_remaining(void)
            return (local - current_thread()->kernel_stack);
        }
 }
+
+boolean_t machine_timeout_suspended(void) {
+       return FALSE;
+}
index 3fcaf77ef0e7c0e7791dc1c5d229ea3eb79432e1..47b12432d494c44c1cf3886897b6488b7544da00 100644 (file)
@@ -261,6 +261,7 @@ extern void                         ml_init_lock_timeout(
 
 void ml_ppc_do_sleep(void);
 
+boolean_t machine_timeout_suspended(void);
 #endif /* MACH_KERNEL_PRIVATE */
 #endif /* XNU_KERNEL_PRIVATE */
 
index 64ef466b68747371bd67f13544967268baf0bd6d..68fece885676e851cab79a8b2f24e4549ec76f6e 100644 (file)
@@ -253,7 +253,7 @@ static kern_return_t        vm_map_remap_range_allocate(
        vm_map_address_t        *address,
        vm_map_size_t           size,
        vm_map_offset_t         mask,
-       boolean_t               anywhere,
+       int                     flags,
        vm_map_entry_t          *map_entry);
 
 static void            vm_map_region_look_for_page(
@@ -11155,7 +11155,7 @@ vm_map_remap(
        vm_map_address_t        *address,
        vm_map_size_t           size,
        vm_map_offset_t         mask,
-       boolean_t               anywhere,
+       int                     flags,
        vm_map_t                src_map,
        vm_map_offset_t         memory_address,
        boolean_t               copy,
@@ -11204,7 +11204,7 @@ vm_map_remap(
        *address = vm_map_trunc_page(*address);
        vm_map_lock(target_map);
        result = vm_map_remap_range_allocate(target_map, address, size,
-                                            mask, anywhere, &insp_entry);
+                                            mask, flags, &insp_entry);
 
        for (entry = map_header.links.next;
             entry != (struct vm_map_entry *)&map_header.links;
@@ -11255,18 +11255,19 @@ vm_map_remap_range_allocate(
        vm_map_address_t        *address,       /* IN/OUT */
        vm_map_size_t           size,
        vm_map_offset_t         mask,
-       boolean_t               anywhere,
+       int                     flags,
        vm_map_entry_t          *map_entry)     /* OUT */
 {
-       register vm_map_entry_t entry;
-       register vm_map_offset_t        start;
-       register vm_map_offset_t        end;
+       vm_map_entry_t  entry;
+       vm_map_offset_t start;
+       vm_map_offset_t end;
+       kern_return_t   kr;
 
 StartAgain: ;
 
        start = *address;
 
-       if (anywhere)
+       if (flags & VM_FLAGS_ANYWHERE)
        {
                /*
                 *      Calculate the first possible address.
@@ -11379,6 +11380,37 @@ StartAgain: ;
                        return(KERN_INVALID_ADDRESS);
                }
 
+               /*
+                * If we're asked to overwrite whatever was mapped in that
+                * range, first deallocate that range.
+                */
+               if (flags & VM_FLAGS_OVERWRITE) {
+                       vm_map_t zap_map;
+
+                       /*
+                        * We use a "zap_map" to avoid having to unlock
+                        * the "map" in vm_map_delete(), which would compromise
+                        * the atomicity of the "deallocate" and then "remap"
+                        * combination.
+                        */
+                       zap_map = vm_map_create(PMAP_NULL,
+                                               start,
+                                               end - start,
+                                               map->hdr.entries_pageable);
+                       if (zap_map == VM_MAP_NULL) {
+                               return KERN_RESOURCE_SHORTAGE;
+                       }
+
+                       kr = vm_map_delete(map, start, end,
+                                          VM_MAP_REMOVE_SAVE_ENTRIES,
+                                          zap_map);
+                       if (kr == KERN_SUCCESS) {
+                               vm_map_destroy(zap_map,
+                                              VM_MAP_REMOVE_NO_PMAP_CLEANUP);
+                               zap_map = VM_MAP_NULL;
+                       }
+               }
+
                /*
                 *      ...     the starting address isn't allocated
                 */
index 09eaa747306159c2f0da4f21677cdbc947aa6d53..dd39abb5c47b070490dcc3c616a852e45f8865c4 100644 (file)
@@ -690,7 +690,7 @@ extern kern_return_t vm_map_remap(
                                vm_map_offset_t         *address,
                                vm_map_size_t           size,
                                vm_map_offset_t         mask,
-                               boolean_t               anywhere,
+                               int                     flags,
                                vm_map_t                src_map,
                                vm_map_offset_t         memory_address,
                                boolean_t               copy,
index dae49ac1a5f2027a5fd0943eed8150a853d6f443..979c816246cc3fd1306b1b5775bca773137de74c 100644 (file)
@@ -1708,7 +1708,6 @@ vm_page_grablo(void)
                vm_lopages_allocated_cpm_success++;
                vm_page_unlock_queues();
        }
-       assert(mem->gobbled);
        assert(mem->busy);
        assert(!mem->free);
        assert(!mem->pmapped);
index 59c26ff7069c1eac5329d456aad46cd87894ba05..582e51fc0bfc79a71688683720f6ed23f64fb73f 100644 (file)
@@ -977,7 +977,7 @@ mach_vm_remap(
        mach_vm_offset_t        *address,
        mach_vm_size_t  size,
        mach_vm_offset_t        mask,
-       boolean_t               anywhere,
+       int                     flags,
        vm_map_t                src_map,
        mach_vm_offset_t        memory_address,
        boolean_t               copy,
@@ -991,13 +991,17 @@ mach_vm_remap(
        if (VM_MAP_NULL == target_map || VM_MAP_NULL == src_map)
                return KERN_INVALID_ARGUMENT;
 
+       /* filter out any kernel-only flags */
+       if (flags & ~VM_FLAGS_USER_REMAP)
+               return KERN_INVALID_ARGUMENT;
+
        map_addr = (vm_map_offset_t)*address;
 
        kr = vm_map_remap(target_map,
                          &map_addr,
                          size,
                          mask,
-                         anywhere,
+                         flags,
                          src_map,
                          memory_address,
                          copy,
@@ -1025,7 +1029,7 @@ vm_remap(
        vm_offset_t             *address,
        vm_size_t               size,
        vm_offset_t             mask,
-       boolean_t               anywhere,
+       int                     flags,
        vm_map_t                src_map,
        vm_offset_t             memory_address,
        boolean_t               copy,
@@ -1039,13 +1043,17 @@ vm_remap(
        if (VM_MAP_NULL == target_map || VM_MAP_NULL == src_map)
                return KERN_INVALID_ARGUMENT;
 
+       /* filter out any kernel-only flags */
+       if (flags & ~VM_FLAGS_USER_REMAP)
+               return KERN_INVALID_ARGUMENT;
+
        map_addr = (vm_map_offset_t)*address;
 
        kr = vm_map_remap(target_map,
                          &map_addr,
                          size,
                          mask,
-                         anywhere,
+                         flags,
                          src_map,
                          memory_address,
                          copy,
index 0f96dcd722ccd7b2d55254cd4372fc4d101d3710..a4ca1cecd6b930c544cfe9a9865f4305c7f2c126 100644 (file)
@@ -292,8 +292,8 @@ L_dispatch:
 
        swapgs
 
-       cmpl    $(USER_CS), ISF64_CS(%rsp)
-       je      L_32bit_dispatch /* 32-bit user task */
+       cmpl    $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
+       je      L_32bit_dispatch        /* 32-bit user task */
        /* fall through to 64bit user dispatch */
 
 /*
@@ -1104,7 +1104,7 @@ Entry(hndl_allintrs)
        leaq    -INTSTACK_SIZE(%rcx),%rdx
        cmpq    %rsp,%rdx
        jb      int_from_intstack
-1:     
+1:
        xchgq   %rcx,%rsp               /* switch to interrupt stack */
 
        mov     %cr0,%rax               /* get cr0 */
@@ -1208,13 +1208,13 @@ LEXT(return_to_iret)                    /* (label for kdb_kintr and hardclock) */
 int_from_intstack:
        incl    %gs:CPU_PREEMPTION_LEVEL
        incl    %gs:CPU_INTERRUPT_LEVEL
-
+       incl    %gs:CPU_NESTED_ISTACK
        mov     %rsp, %rdi              /* x86_saved_state */
        CCALL(interrupt)
 
        decl    %gs:CPU_INTERRUPT_LEVEL
        decl    %gs:CPU_PREEMPTION_LEVEL
-
+       decl    %gs:CPU_NESTED_ISTACK
 #if DEBUG_IDT64
        CCALL1(panic_idt64, %rsp)
        POSTCODE2(0x6411)
@@ -1352,15 +1352,16 @@ Entry(hndl_diag_scall)
        movq    ACT_TASK(%rcx),%rbx             /* point to current task  */
        TASK_VTIMER_CHECK(%rbx,%rcx)
 
-       pushq   %rdi                    /* push pcb stack so we can pop it later */
+       pushq   %rdi                    /* push pcb stack */
 
-       CCALL(diagCall)         // Call diagnostics
-       cli                             // Disable interruptions just in case they were enabled
-       popq    %rsp                    // Get back the original stack
-       
+       CCALL(diagCall)                 // Call diagnostics
+
+       cli                             // Disable interruptions just in case
        cmpl    $0,%eax                 // What kind of return is this?
-       jne     EXT(return_to_user)     // Normal return, do not check asts...
-                               
+       je      1f                      // - branch if bad (zero)
+       popq    %rsp                    // Get back the original stack
+       jmp     EXT(return_to_user)     // Normal return, do not check asts...
+1:
        CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
                // pass what would be the diag syscall
                // error return - cause an exception
@@ -1441,14 +1442,16 @@ Entry(hndl_mdep_scall64)
 
 
 Entry(hndl_diag_scall64)
-       pushq   %rdi                            // Push the previous stack
-       CCALL(diagCall64)                       // Call diagnostics
-       cli                                     // Disable interruptions just in case
-       popq    %rsp                            // Get back the original stack
-
-       cmpl    $0,%eax                         // What kind of return is this?
-       jne     EXT(return_to_user)             // Normal return, do not check asts...
-                               
+       pushq   %rdi                    // Push the previous stack
+
+       CCALL(diagCall64)               // Call diagnostics
+
+       cli                             // Disable interruptions just in case
+       cmpl    $0,%eax                 // What kind of return is this?
+       je      1f                      // - branch if bad (zero)
+       popq    %rsp                    // Get back the original stack
+       jmp     EXT(return_to_user)     // Normal return, do not check asts...
+1:
        CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
        /* no return */
 
index a7ad2bb9df340223094f3cc9241ef6c2842499a9..a8c8cbde4eef89a487b85d2597519cb9e79dac06 100644 (file)
@@ -212,8 +212,6 @@ int                 pv_hashed_kern_free_count = 0;
 
 zone_t         pv_hashed_list_zone;    /* zone of pv_hashed_entry structures */
 
-static zone_t pdpt_zone;
-
 /*
  *     Each entry in the pv_head_table is locked by a bit in the
  *     pv_lock_table.  The lock bits are accessed by the physical
@@ -644,7 +642,7 @@ pmap_init(void)
 {
        long                    npages;
        vm_offset_t             addr;
-       vm_size_t               s;
+       vm_size_t               s, vsize;
        vm_map_offset_t         vaddr;
        ppnum_t ppn;
 
@@ -683,6 +681,9 @@ pmap_init(void)
 
        memset((char *)addr, 0, s);
 
+       vaddr = addr;
+       vsize = s;
+
 #if PV_DEBUG
        if (0 == npvhash) panic("npvhash not initialized");
 #endif
@@ -714,23 +715,37 @@ pmap_init(void)
                for (pn = pmptr->base; pn <= pmptr->end; pn++) {
                        if (pn < last_pn) {
                                pmap_phys_attributes[pn] |= PHYS_MANAGED;
+
                                if (pn > last_managed_page)
                                        last_managed_page = pn;
+
+                               if (pn < lowest_lo)
+                                       pmap_phys_attributes[pn] |= PHYS_NOENCRYPT;
+                               else if (pn >= lowest_hi && pn <= highest_hi)
+                                       pmap_phys_attributes[pn] |= PHYS_NOENCRYPT;
+
                        }
                }
        }
+       while (vsize) {
+               ppn = pmap_find_phys(kernel_pmap, vaddr);
 
+               pmap_phys_attributes[ppn] |= PHYS_NOENCRYPT;
+
+               vaddr += PAGE_SIZE;
+               vsize -= PAGE_SIZE;
+       }
        /*
         *      Create the zone of physical maps,
         *      and of the physical-to-virtual entries.
         */
        s = (vm_size_t) sizeof(struct pmap);
        pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */
+        zone_change(pmap_zone, Z_NOENCRYPT, TRUE);
+
        s = (vm_size_t) sizeof(struct pv_hashed_entry);
        pv_hashed_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */
-       s = 63;
-       pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */
-
+       zone_change(pv_hashed_list_zone, Z_NOENCRYPT, TRUE);
 
        /* create pv entries for kernel pages mapped by low level
           startup code.  these have to exist so we can pmap_remove()
@@ -2119,6 +2134,8 @@ pt_fake_zone_info(
        *exhaustable = 0;
 }
 
+extern         long    NMIPI_acks;
+
 static inline void
 pmap_cpuset_NMIPI(cpu_set cpu_mask) {
        unsigned int cpu, cpu_bit;
@@ -2221,17 +2238,7 @@ pmap_flush_tlbs(pmap_t   pmap)
                 * Wait for those other cpus to acknowledge
                 */
                while (cpus_to_respond != 0) {
-                       if (mach_absolute_time() > deadline) {
-                               if (mp_recent_debugger_activity())
-                                       continue;
-                               if (!panic_active()) {
-                                       pmap_tlb_flush_timeout = TRUE;
-                                       pmap_cpuset_NMIPI(cpus_to_respond);
-                               }
-                               panic("pmap_flush_tlbs() timeout: "
-                                   "cpu(s) failing to respond to interrupts, pmap=%p cpus_to_respond=0x%lx",
-                                   pmap, cpus_to_respond);
-                       }
+                       long orig_acks = 0;
 
                        for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
                                if ((cpus_to_respond & cpu_bit) != 0) {
@@ -2245,6 +2252,17 @@ pmap_flush_tlbs(pmap_t   pmap)
                                if (cpus_to_respond == 0)
                                        break;
                        }
+                       if (mach_absolute_time() > deadline) {
+                               if (machine_timeout_suspended())
+                                       continue;
+                               pmap_tlb_flush_timeout = TRUE;
+                               orig_acks = NMIPI_acks;
+                               pmap_cpuset_NMIPI(cpus_to_respond);
+
+                               panic("TLB invalidation IPI timeout: "
+                                   "CPU(s) failed to respond to interrupts, unresponsive CPU bitmap: 0x%lx, NMIPI acks: orig: 0x%lx, now: 0x%lx",
+                                   cpus_to_respond, orig_acks, NMIPI_acks);
+                       }
                }
        }
 
index 5a8fa5eff404a38874ddba79d85b82fcc97574ba..7b37f5eb20531fde793d6b83de8b6f874fda72f7 100644 (file)
@@ -75,12 +75,16 @@ int PE_initialize_console( PE_Video * info, int op )
 
         case kPEEnableScreen:
             initialize_screen(info, op);
-           if (info) PE_state.video = *info;
+            if (info) PE_state.video = *info;
             kprintf("kPEEnableScreen %d\n", last_console);
             if( last_console != -1)
                 switch_to_old_console( last_console);
             break;
        
+        case kPEBaseAddressChange:
+            if (info) PE_state.video = *info;
+            /* fall thru */
+
         default:
             initialize_screen(info, op);
             break;
index 5ef7a5bf665f42897020bee74cf286d27b401c0c..08ff10f84947dbf7a6d848db38533cc71422a9c9 100644 (file)
@@ -230,7 +230,7 @@ typedef struct {
 // range requires a runtime mapping
 //
 #define EFI_MEMORY_RUNTIME  0x8000000000000000ULL
-
+#define EFI_MEMORY_KERN_RESERVED (1ULL << 59)
 typedef EFI_UINT64  EFI_PHYSICAL_ADDRESS;
 typedef EFI_UINT64  EFI_VIRTUAL_ADDRESS;
 
index c9adfe5eb6c96c5dd716b765df78ec9f82bb396e..f3a539af826d815025f9999fc4e9939d99fa4ee1 100644 (file)
@@ -185,7 +185,11 @@ struct PE_Video {
        unsigned char   v_rotate;       /* Rotation: 0:normal, 1:right 90, 2:left 180, 3:left 90 */
        unsigned char   v_scale;        /* Scale Factor for both X & Y */
        char            reserved1[2];
+#ifdef __LP64__
        long            reserved2;
+#else
+       long            v_baseAddrHigh;
+#endif
 };
 
 typedef struct PE_Video       PE_Video;
@@ -211,6 +215,7 @@ extern int PE_initialize_console(
 #define kPEReleaseScreen       5
 #define kPEEnableScreen                6
 #define kPEDisableScreen       7
+#define kPEBaseAddressChange   8
 
 extern void PE_display_icon( unsigned int flags,
                             const char * name );