From 060df5ea7c632b1ac8cc8aac1fb59758165c2084 Mon Sep 17 00:00:00 2001 From: Apple Date: Fri, 24 Jun 2011 14:00:12 +0000 Subject: [PATCH] xnu-1504.15.3.tar.gz --- bsd/conf/MASTER | 1 + bsd/conf/MASTER.i386 | 2 +- bsd/conf/MASTER.ppc | 2 +- bsd/conf/MASTER.x86_64 | 2 +- bsd/dev/i386/sysctl.c | 87 ++- bsd/dev/i386/unix_signal.c | 142 +++-- bsd/hfs/hfs.h | 5 +- bsd/hfs/hfs_vfsops.c | 19 +- bsd/hfs/hfs_xattr.c | 8 +- bsd/hfs/hfscommon/Misc/VolumeAllocation.c | 122 +++- bsd/i386/_structs.h | 36 ++ bsd/kern/pthread_synch.c | 11 +- bsd/miscfs/devfs/devfs_tree.c | 6 +- bsd/net/if_media.h | 2 + bsd/netinet6/ip6_fw.h | 3 +- bsd/netinet6/ip6_output.c | 22 +- bsd/netinet6/raw_ip6.c | 4 +- bsd/sys/buf_internal.h | 1 + bsd/sys/disk.h | 36 +- bsd/sys/kdebug.h | 1 + bsd/sys/mount.h | 5 +- bsd/sys/mount_internal.h | 3 +- bsd/vfs/vfs_cluster.c | 54 +- bsd/vfs/vfs_journal.c | 483 ++++++++++++++- bsd/vfs/vfs_journal.h | 20 +- bsd/vfs/vfs_subr.c | 5 +- config/IOKit.exports | 1 + config/MasterVersion | 2 +- config/Private.i386.exports | 2 +- config/System6.0.exports | 1 + config/Unsupported.exports | 1 + config/Unsupported.i386.exports | 5 +- config/Unsupported.x86_64.exports | 5 +- iokit/IOKit/IOInterruptEventSource.h | 7 + iokit/IOKit/IOKitDebug.h | 22 +- iokit/IOKit/IOReturn.h | 1 + iokit/IOKit/IOTimeStamp.h | 4 + iokit/IOKit/pwr_mgt/IOPM.h | 4 +- iokit/Kernel/IOCommandGate.cpp | 25 +- iokit/Kernel/IOCommandQueue.cpp | 16 +- iokit/Kernel/IOFilterInterruptEventSource.cpp | 76 +-- iokit/Kernel/IOHibernateIO.cpp | 104 +++- iokit/Kernel/IOInterruptController.cpp | 74 ++- iokit/Kernel/IOInterruptEventSource.cpp | 162 ++--- iokit/Kernel/IOKitDebug.cpp | 15 +- iokit/Kernel/IOKitKernelInternal.h | 6 +- iokit/Kernel/IOMemoryDescriptor.cpp | 174 ++++-- iokit/Kernel/IOPMrootDomain.cpp | 9 +- iokit/Kernel/IOPlatformExpert.cpp | 6 +- iokit/Kernel/IOService.cpp | 12 +- iokit/Kernel/IOServicePM.cpp | 26 +- iokit/Kernel/IOServicePMPrivate.h | 5 + iokit/Kernel/IOStartIOKit.cpp | 20 +- iokit/Kernel/IOTimerEventSource.cpp | 32 +- iokit/Kernel/IOWorkLoop.cpp | 27 +- kgmacros | 38 +- osfmk/conf/Makefile.x86_64 | 1 - osfmk/console/i386/serial_console.c | 2 + osfmk/console/video_console.c | 187 ++++-- osfmk/console/video_console.h | 2 +- osfmk/i386/AT386/model_dep.c | 2 +- osfmk/i386/acpi.c | 22 +- osfmk/i386/cpu_data.h | 15 +- osfmk/i386/cpu_topology.h | 4 +- osfmk/i386/cpuid.c | 149 +++-- osfmk/i386/cpuid.h | 159 +++-- osfmk/i386/etimer.c | 98 +-- osfmk/i386/fpu.c | 576 ++++++++++++------ osfmk/i386/fpu.h | 101 +-- osfmk/i386/genassym.c | 4 +- osfmk/i386/i386_init.c | 10 +- osfmk/i386/i386_vm_init.c | 72 ++- osfmk/i386/io_map.c | 4 +- osfmk/i386/io_map_entries.h | 4 +- osfmk/i386/lapic.c | 324 ++++++++-- osfmk/i386/lapic.h | 108 +++- osfmk/i386/locks_i386.c | 47 +- osfmk/i386/locore.s | 3 +- osfmk/i386/machine_routines.c | 11 +- osfmk/i386/machine_routines.h | 11 +- osfmk/i386/misc_protos.h | 2 +- osfmk/i386/mp.c | 43 +- osfmk/i386/mp.h | 3 + osfmk/i386/pcb.c | 74 ++- osfmk/i386/pmCPU.c | 80 ++- osfmk/i386/pmCPU.h | 29 +- osfmk/i386/pmap.c | 32 +- osfmk/i386/pmap.h | 2 +- osfmk/i386/pmap_internal.h | 4 + osfmk/i386/pmap_x86_common.c | 18 + osfmk/i386/proc_reg.h | 257 ++++---- osfmk/i386/rtclock.c | 279 ++++++--- osfmk/i386/rtclock.h | 12 +- osfmk/i386/seg.h | 20 +- osfmk/i386/start64.s | 24 +- osfmk/i386/thread.h | 16 +- osfmk/i386/trap.c | 78 ++- osfmk/i386/tsc.c | 1 + osfmk/kdp/kdp.c | 8 +- osfmk/kdp/kdp_udp.c | 13 +- osfmk/kern/etimer.h | 6 +- osfmk/kern/sched_prim.c | 4 +- osfmk/kern/wait_queue.h | 25 +- osfmk/mach/i386/_structs.h | 232 +++++++ osfmk/mach/i386/fp_reg.h | 74 ++- osfmk/mach/i386/thread_status.h | 16 +- osfmk/mach/mach_vm.defs | 2 +- osfmk/mach/vm_map.defs | 2 +- osfmk/mach/vm_statistics.h | 6 + osfmk/ppc/io_map.c | 6 +- osfmk/ppc/io_map_entries.h | 4 +- osfmk/ppc/machine_routines.c | 4 + osfmk/ppc/machine_routines.h | 1 + osfmk/vm/vm_map.c | 48 +- osfmk/vm/vm_map.h | 2 +- osfmk/vm/vm_resident.c | 1 - osfmk/vm/vm_user.c | 16 +- osfmk/x86_64/idt64.s | 43 +- osfmk/x86_64/pmap.c | 52 +- pexpert/i386/pe_init.c | 6 +- pexpert/pexpert/i386/efi.h | 2 +- pexpert/pexpert/pexpert.h | 5 + 122 files changed, 3962 insertions(+), 1470 deletions(-) diff --git a/bsd/conf/MASTER b/bsd/conf/MASTER index fd9635408..93872ad07 100644 --- a/bsd/conf/MASTER +++ b/bsd/conf/MASTER @@ -189,6 +189,7 @@ options FDESC # fdesc_fs support # options DEVFS # devfs support # options JOURNALING # journaling support # options HFS_COMPRESSION # hfs compression # +options CONFIG_HFS_TRIM # HFS trims unused blocks # # # file system features diff --git a/bsd/conf/MASTER.i386 b/bsd/conf/MASTER.i386 index f5e48b2f5..1e6641911 100644 --- a/bsd/conf/MASTER.i386 +++ b/bsd/conf/MASTER.i386 @@ -45,7 +45,7 @@ # Standard Apple Research Configurations: # -------- ----- -------- --------------- # BASE = [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ] -# FILESYS = [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression config_imgsrc_access ] +# FILESYS = [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs config_hfs_trim hfs_compression config_imgsrc_access ] # NETWORKING = [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ipflow pkt_priority if_bridge ] # NFS = [ nfsclient nfsserver ] # VPN = [ ipsec ] diff --git a/bsd/conf/MASTER.ppc b/bsd/conf/MASTER.ppc index bd15e6657..d99b6e4f5 100644 --- a/bsd/conf/MASTER.ppc +++ b/bsd/conf/MASTER.ppc @@ -46,7 +46,7 @@ # -------- ----- -------- --------------- # # BASE = [ ppc mach medium config_dtrace vol pst gdb noprofiling simple_clock kernstack sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue ] -# FILESYS = [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression ] +# FILESYS = [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs config_hfs_trim hfs_compression ] # NETWORKING = [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk ipflow pkt_priority ] # NFS = [ nfsclient nfsserver ] # VPN = [ ipsec ] diff --git a/bsd/conf/MASTER.x86_64 b/bsd/conf/MASTER.x86_64 index b43836a82..1050897d2 100644 --- a/bsd/conf/MASTER.x86_64 +++ b/bsd/conf/MASTER.x86_64 @@ -45,7 +45,7 @@ # Standard Apple Research Configurations: # -------- ----- -------- --------------- # BASE = [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ] -# FILESYS = [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression config_imgsrc_access ] +# FILESYS = [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs config_hfs_trim hfs_compression config_imgsrc_access ] # NETWORKING = [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ipflow pkt_priority if_bridge ] # NFS = [ nfsclient nfsserver ] # VPN = [ ipsec ] diff --git a/bsd/dev/i386/sysctl.c b/bsd/dev/i386/sysctl.c index 597a208c1..c255529a1 100644 --- a/bsd/dev/i386/sysctl.c +++ b/bsd/dev/i386/sysctl.c @@ -32,6 +32,7 @@ #include #include #include +#include static int _i386_cpu_info SYSCTL_HANDLER_ARGS @@ -105,13 +106,23 @@ cpu_arch_perf SYSCTL_HANDLER_ARGS return _i386_cpu_info(oidp, ptr, arg2, req); } +static int +cpu_xsave SYSCTL_HANDLER_ARGS +{ + i386_cpu_info_t *cpu_info = cpuid_info(); + void *ptr = (uint8_t *)cpu_info->cpuid_xsave_leafp + (uintptr_t)arg1; + if (cpu_info->cpuid_xsave_leafp == NULL) + return ENOENT; + return _i386_cpu_info(oidp, ptr, arg2, req); +} + static int cpu_features SYSCTL_HANDLER_ARGS { __unused struct sysctl_oid *unused_oidp = oidp; __unused void *unused_arg1 = arg1; __unused int unused_arg2 = arg2; - char buf[256]; + char buf[512]; buf[0] = '\0'; cpuid_get_feature_names(cpuid_features(), buf, sizeof(buf)); @@ -125,7 +136,7 @@ cpu_extfeatures SYSCTL_HANDLER_ARGS __unused struct sysctl_oid *unused_oidp = oidp; __unused void *unused_arg1 = arg1; __unused int unused_arg2 = arg2; - char buf[256]; + char buf[512]; buf[0] = '\0'; cpuid_get_extfeature_names(cpuid_extfeatures(), buf, sizeof(buf)); @@ -190,6 +201,28 @@ cpu_flex_ratio_max SYSCTL_HANDLER_ARGS return SYSCTL_OUT(req, &flex_ratio_max, sizeof(flex_ratio_max)); } +/* + * Populates the {CPU, vector, latency} triple for the maximum observed primary + * interrupt latency + */ +static int +misc_interrupt_latency_max(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int changed = 0, error; + char buf[128]; + buf[0] = '\0'; + + interrupt_populate_latency_stats(buf, sizeof(buf)); + + error = sysctl_io_string(req, buf, sizeof(buf), 0, &changed); + + if (error == 0 && changed) { + interrupt_reset_latency_stats(); + } + + return error; +} + SYSCTL_NODE(_machdep, OID_AUTO, cpu, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "CPU info"); @@ -332,6 +365,46 @@ SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, ACNT_MCNT, sizeof(boolean_t), cpu_thermal, "I", "ACNT_MCNT capability"); +SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, core_power_limits, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *)offsetof(cpuid_thermal_leaf_t, core_power_limits), + sizeof(boolean_t), + cpu_thermal, "I", "Power Limit Notifications at a Core Level"); + +SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, fine_grain_clock_mod, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *)offsetof(cpuid_thermal_leaf_t, fine_grain_clock_mod), + sizeof(boolean_t), + cpu_thermal, "I", "Fine Grain Clock Modulation"); + +SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, package_thermal_intr, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *)offsetof(cpuid_thermal_leaf_t, package_thermal_intr), + sizeof(boolean_t), + cpu_thermal, "I", "Packge Thermal interrupt and Status"); + +SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, hardware_feedback, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *)offsetof(cpuid_thermal_leaf_t, hardware_feedback), + sizeof(boolean_t), + cpu_thermal, "I", "Hardware Coordination Feedback"); + +SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, energy_policy, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *)offsetof(cpuid_thermal_leaf_t, energy_policy), + sizeof(boolean_t), + cpu_thermal, "I", "Energy Efficient Policy Support"); + + +SYSCTL_NODE(_machdep_cpu, OID_AUTO, xsave, CTLFLAG_RW|CTLFLAG_LOCKED, 0, + "xsave"); + +SYSCTL_PROC(_machdep_cpu_xsave, OID_AUTO, extended_state, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *)offsetof(cpuid_xsave_leaf_t, extended_state), + sizeof(cpuid_xsave_leaf_t), + cpu_xsave, "IU", "XSAVE Extended State"); + SYSCTL_NODE(_machdep_cpu, OID_AUTO, arch_perf, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "arch_perf"); @@ -544,3 +617,13 @@ SYSCTL_QUAD(_machdep_memmap, OID_AUTO, PalCode, CTLFLAG_RD|CTLFLAG_LOCKED, &firm SYSCTL_QUAD(_machdep_memmap, OID_AUTO, Reserved, CTLFLAG_RD|CTLFLAG_LOCKED, &firmware_Reserved_bytes, ""); SYSCTL_QUAD(_machdep_memmap, OID_AUTO, Unusable, CTLFLAG_RD|CTLFLAG_LOCKED, &firmware_Unusable_bytes, ""); SYSCTL_QUAD(_machdep_memmap, OID_AUTO, Other, CTLFLAG_RD|CTLFLAG_LOCKED, &firmware_other_bytes, ""); + +SYSCTL_NODE(_machdep, OID_AUTO, tsc, CTLFLAG_RD|CTLFLAG_LOCKED, NULL, "Timestamp counter parameters"); + +SYSCTL_QUAD(_machdep_tsc, OID_AUTO, frequency, CTLFLAG_RD|CTLFLAG_LOCKED, &tscFreq, ""); +SYSCTL_NODE(_machdep, OID_AUTO, misc, CTLFLAG_RW|CTLFLAG_LOCKED, 0, + "Miscellaneous x86 kernel parameters"); + +SYSCTL_PROC(_machdep_misc, OID_AUTO, interrupt_latency_max, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, + misc_interrupt_latency_max, "A", "Maximum Interrupt latency"); diff --git a/bsd/dev/i386/unix_signal.c b/bsd/dev/i386/unix_signal.c index eb96e879e..06ed4172c 100644 --- a/bsd/dev/i386/unix_signal.c +++ b/bsd/dev/i386/unix_signal.c @@ -137,10 +137,11 @@ siginfo_user_to_user64(user_siginfo_t *in, user64_siginfo_t *out) void sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint32_t code) { - union { - struct mcontext32 mctx32; - struct mcontext64 mctx64; - } mctx; + union { + struct mcontext_avx32 mctx_avx32; + struct mcontext_avx64 mctx_avx64; + } mctx_store, *mctxp = &mctx_store; + user_addr_t ua_sp; user_addr_t ua_fp; user_addr_t ua_cr2; @@ -160,7 +161,8 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint struct uthread * ut; int stack_size = 0; int infostyle = UC_TRAD; - + boolean_t sig_avx; + thread = current_thread(); ut = get_bsdthread_info(thread); @@ -178,7 +180,9 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint bzero((caddr_t)&sinfo64, sizeof(sinfo64)); sinfo64.si_signo = sig; - + + bzero(mctxp, sizeof(*mctxp)); + sig_avx = ml_fpu_avx_enabled(); if (proc_is64bit(p)) { x86_thread_state64_t *tstate64; @@ -186,23 +190,29 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint flavor = x86_THREAD_STATE64; state_count = x86_THREAD_STATE64_COUNT; - state = (void *)&mctx.mctx64.ss; + state = (void *)&mctxp->mctx_avx64.ss; if (thread_getstatus(thread, flavor, (thread_state_t)state, &state_count) != KERN_SUCCESS) goto bad; - flavor = x86_FLOAT_STATE64; - state_count = x86_FLOAT_STATE64_COUNT; - state = (void *)&mctx.mctx64.fs; + if (sig_avx) { + flavor = x86_AVX_STATE64; + state_count = x86_AVX_STATE64_COUNT; + } + else { + flavor = x86_FLOAT_STATE64; + state_count = x86_FLOAT_STATE64_COUNT; + } + state = (void *)&mctxp->mctx_avx64.fs; if (thread_getstatus(thread, flavor, (thread_state_t)state, &state_count) != KERN_SUCCESS) goto bad; flavor = x86_EXCEPTION_STATE64; state_count = x86_EXCEPTION_STATE64_COUNT; - state = (void *)&mctx.mctx64.es; + state = (void *)&mctxp->mctx_avx64.es; if (thread_getstatus(thread, flavor, (thread_state_t)state, &state_count) != KERN_SUCCESS) goto bad; - tstate64 = &mctx.mctx64.ss; + tstate64 = &mctxp->mctx_avx64.ss; /* figure out where our new stack lives */ if ((ut->uu_flag & UT_ALTSTACK) && !oonstack && @@ -214,7 +224,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint } else { ua_sp = tstate64->rsp; } - ua_cr2 = mctx.mctx64.es.faultvaddr; + ua_cr2 = mctxp->mctx_avx64.es.faultvaddr; /* The x86_64 ABI defines a 128-byte red zone. */ ua_sp -= C_64_REDZONE_LEN; @@ -225,7 +235,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint ua_sp -= sizeof (user64_siginfo_t); ua_sip = ua_sp; - ua_sp -= sizeof (struct mcontext64); + ua_sp -= sizeof (struct mcontext_avx64); ua_mctxp = ua_sp; /* @@ -254,13 +264,13 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint uctx64.uc_stack.ss_flags |= SS_ONSTACK; uctx64.uc_link = 0; - uctx64.uc_mcsize = sizeof(struct mcontext64); + uctx64.uc_mcsize = sig_avx ? sizeof(struct mcontext_avx64) : sizeof(struct mcontext64); uctx64.uc_mcontext64 = ua_mctxp; if (copyout((caddr_t)&uctx64, ua_uctxp, sizeof (uctx64))) goto bad; - if (copyout((caddr_t)&mctx.mctx64, ua_mctxp, sizeof (struct mcontext64))) + if (copyout((caddr_t)&mctxp->mctx_avx64, ua_mctxp, sizeof (struct mcontext_avx64))) goto bad; sinfo64.pad[0] = tstate64->rsp; @@ -293,23 +303,30 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint flavor = x86_THREAD_STATE32; state_count = x86_THREAD_STATE32_COUNT; - state = (void *)&mctx.mctx32.ss; + state = (void *)&mctxp->mctx_avx32.ss; if (thread_getstatus(thread, flavor, (thread_state_t)state, &state_count) != KERN_SUCCESS) goto bad; - flavor = x86_FLOAT_STATE32; - state_count = x86_FLOAT_STATE32_COUNT; - state = (void *)&mctx.mctx32.fs; + if (sig_avx) { + flavor = x86_AVX_STATE32; + state_count = x86_AVX_STATE32_COUNT; + } + else { + flavor = x86_FLOAT_STATE32; + state_count = x86_FLOAT_STATE32_COUNT; + } + + state = (void *)&mctxp->mctx_avx32.fs; if (thread_getstatus(thread, flavor, (thread_state_t)state, &state_count) != KERN_SUCCESS) goto bad; flavor = x86_EXCEPTION_STATE32; state_count = x86_EXCEPTION_STATE32_COUNT; - state = (void *)&mctx.mctx32.es; + state = (void *)&mctxp->mctx_avx32.es; if (thread_getstatus(thread, flavor, (thread_state_t)state, &state_count) != KERN_SUCCESS) goto bad; - tstate32 = &mctx.mctx32.ss; + tstate32 = &mctxp->mctx_avx32.ss; /* figure out where our new stack lives */ if ((ut->uu_flag & UT_ALTSTACK) && !oonstack && @@ -321,7 +338,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint } else { ua_sp = tstate32->esp; } - ua_cr2 = mctx.mctx32.es.faultvaddr; + ua_cr2 = mctxp->mctx_avx32.es.faultvaddr; ua_sp -= sizeof (struct user_ucontext32); ua_uctxp = ua_sp; // someone tramples the first word! @@ -329,7 +346,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint ua_sp -= sizeof (user32_siginfo_t); ua_sip = ua_sp; - ua_sp -= sizeof (struct mcontext32); + ua_sp -= sizeof (struct mcontext_avx32); ua_mctxp = ua_sp; ua_sp -= sizeof (struct sigframe32); @@ -375,14 +392,14 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint uctx32.uc_stack.ss_flags |= SS_ONSTACK; uctx32.uc_link = 0; - uctx32.uc_mcsize = sizeof(struct mcontext32); + uctx32.uc_mcsize = sig_avx ? sizeof(struct mcontext_avx32) : sizeof(struct mcontext32); uctx32.uc_mcontext = CAST_DOWN_EXPLICIT(user32_addr_t, ua_mctxp); if (copyout((caddr_t)&uctx32, ua_uctxp, sizeof (uctx32))) goto bad; - if (copyout((caddr_t)&mctx.mctx32, ua_mctxp, sizeof (struct mcontext32))) + if (copyout((caddr_t)&mctxp->mctx_avx32, ua_mctxp, sizeof (struct mcontext_avx32))) goto bad; sinfo64.pad[0] = tstate32->esp; @@ -536,7 +553,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint flavor = x86_THREAD_STATE64; state_count = x86_THREAD_STATE64_COUNT; - state = (void *)&mctx.mctx64.ss; + state = (void *)&mctxp->mctx_avx64.ss; } else { x86_thread_state32_t *tstate32; user32_siginfo_t sinfo32; @@ -571,7 +588,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint if (copyout((caddr_t)&sinfo32, ua_sip, sizeof (sinfo32))) goto bad; - tstate32 = &mctx.mctx32.ss; + tstate32 = &mctxp->mctx_avx32.ss; tstate32->eip = CAST_DOWN_EXPLICIT(user32_addr_t, trampact); tstate32->esp = CAST_DOWN_EXPLICIT(user32_addr_t, ua_fp); @@ -599,6 +616,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint return; bad: + proc_lock(p); SIGACTION(p, SIGILL) = SIG_DFL; sig = sigmask(SIGILL); @@ -626,10 +644,11 @@ bad: int sigreturn(struct proc *p, struct sigreturn_args *uap, __unused int *retval) { - union { - struct mcontext32 mctx32; - struct mcontext64 mctx64; - } mctx; + union { + struct mcontext_avx32 mctx_avx32; + struct mcontext_avx64 mctx_avx64; + } mctx_store, *mctxp = &mctx_store; + thread_t thread = current_thread(); struct uthread * ut; int error; @@ -641,6 +660,8 @@ sigreturn(struct proc *p, struct sigreturn_args *uap, __unused int *retval) mach_msg_type_number_t fs_count; unsigned int fs_flavor; void * fs; + int rval = EJUSTRETURN; + boolean_t sig_avx; ut = (struct uthread *)get_bsdthread_info(thread); @@ -656,25 +677,35 @@ sigreturn(struct proc *p, struct sigreturn_args *uap, __unused int *retval) return (0); } + bzero(mctxp, sizeof(*mctxp)); + sig_avx = ml_fpu_avx_enabled(); + if (proc_is64bit(p)) { struct user_ucontext64 uctx64; if ((error = copyin(uap->uctx, (void *)&uctx64, sizeof (uctx64)))) return(error); - if ((error = copyin(uctx64.uc_mcontext64, (void *)&mctx.mctx64, sizeof (struct mcontext64)))) + if ((error = copyin(uctx64.uc_mcontext64, (void *)&mctxp->mctx_avx64, sizeof (struct mcontext_avx64)))) return(error); onstack = uctx64.uc_onstack & 01; ut->uu_sigmask = uctx64.uc_sigmask & ~sigcantmask; - ts_flavor = x86_THREAD_STATE64; + ts_flavor = x86_THREAD_STATE64; ts_count = x86_THREAD_STATE64_COUNT; - ts = (void *)&mctx.mctx64.ss; + ts = (void *)&mctxp->mctx_avx64.ss; - fs_flavor = x86_FLOAT_STATE64; - fs_count = x86_FLOAT_STATE64_COUNT; - fs = (void *)&mctx.mctx64.fs; + if (sig_avx) { + fs_flavor = x86_AVX_STATE64; + fs_count = x86_AVX_STATE64_COUNT; + } + else { + fs_flavor = x86_FLOAT_STATE64; + fs_count = x86_FLOAT_STATE64_COUNT; + } + + fs = (void *)&mctxp->mctx_avx64.fs; } else { struct user_ucontext32 uctx32; @@ -682,7 +713,7 @@ sigreturn(struct proc *p, struct sigreturn_args *uap, __unused int *retval) if ((error = copyin(uap->uctx, (void *)&uctx32, sizeof (uctx32)))) return(error); - if ((error = copyin(CAST_USER_ADDR_T(uctx32.uc_mcontext), (void *)&mctx.mctx32, sizeof (struct mcontext32)))) + if ((error = copyin(CAST_USER_ADDR_T(uctx32.uc_mcontext), (void *)&mctxp->mctx_avx32, sizeof (struct mcontext_avx32)))) return(error); onstack = uctx32.uc_onstack & 01; @@ -690,11 +721,18 @@ sigreturn(struct proc *p, struct sigreturn_args *uap, __unused int *retval) ts_flavor = x86_THREAD_STATE32; ts_count = x86_THREAD_STATE32_COUNT; - ts = (void *)&mctx.mctx32.ss; + ts = (void *)&mctxp->mctx_avx32.ss; + + if (sig_avx) { + fs_flavor = x86_AVX_STATE32; + fs_count = x86_AVX_STATE32_COUNT; + } + else { + fs_flavor = x86_FLOAT_STATE32; + fs_count = x86_FLOAT_STATE32_COUNT; + } - fs_flavor = x86_FLOAT_STATE32; - fs_count = x86_FLOAT_STATE32_COUNT; - fs = (void *)&mctx.mctx32.fs; + fs = (void *)&mctxp->mctx_avx32.fs; } if (onstack) @@ -704,20 +742,24 @@ sigreturn(struct proc *p, struct sigreturn_args *uap, __unused int *retval) if (ut->uu_siglist & ~ut->uu_sigmask) signal_setast(thread); - /* * thread_set_state() does all the needed checks for the passed in * content */ - if (thread_setstatus(thread, ts_flavor, ts, ts_count) != KERN_SUCCESS) - return(EINVAL); - + if (thread_setstatus(thread, ts_flavor, ts, ts_count) != KERN_SUCCESS) { + rval = EINVAL; + goto error_ret; + } + ml_fp_setvalid(TRUE); - if (thread_setstatus(thread, fs_flavor, fs, fs_count) != KERN_SUCCESS) - return(EINVAL); + if (thread_setstatus(thread, fs_flavor, fs, fs_count) != KERN_SUCCESS) { + rval = EINVAL; + goto error_ret; - return (EJUSTRETURN); + } +error_ret: + return rval; } diff --git a/bsd/hfs/hfs.h b/bsd/hfs/hfs.h index 89f97ebc1..24807f7f3 100644 --- a/bsd/hfs/hfs.h +++ b/bsd/hfs/hfs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -312,7 +313,6 @@ typedef struct hfsmount { u_int64_t hfs_max_pending_io; thread_call_t hfs_syncer; // removeable devices get sync'ed by this guy - } hfsmount_t; #define HFS_META_DELAY (100) @@ -391,6 +391,7 @@ enum privdirtype {FILE_HARDLINKS, DIR_HARDLINKS}; */ #define HFS_RDONLY_DOWNGRADE 0x80000 #define HFS_DID_CONTIG_SCAN 0x100000 +#define HFS_UNMAP 0x200000 /* Macro to update next allocation block in the HFS mount structure. If diff --git a/bsd/hfs/hfs_vfsops.c b/bsd/hfs/hfs_vfsops.c index aaac6d0df..7a049916f 100644 --- a/bsd/hfs/hfs_vfsops.c +++ b/bsd/hfs/hfs_vfsops.c @@ -985,7 +985,8 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, daddr64_t mdb_offset; int isvirtual = 0; int isroot = 0; - + u_int32_t device_features = 0; + if (args == NULL) { /* only hfs_mountroot passes us NULL as the 'args' argument */ isroot = 1; @@ -1121,7 +1122,19 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, bzero(hfsmp, sizeof(struct hfsmount)); hfs_chashinit_finish(hfsmp); - + + /* + * See if the disk supports unmap (trim). + * + * NOTE: vfs_init_io_attributes has not been called yet, so we can't use the io_flags field + * returned by vfs_ioattr. We need to call VNOP_IOCTL ourselves. + */ + if (VNOP_IOCTL(devvp, DKIOCGETFEATURES, (caddr_t)&device_features, 0, context) == 0) { + if (device_features & DK_FEATURE_UNMAP) { + hfsmp->hfs_flags |= HFS_UNMAP; + } + } + /* * Init the volume information structure */ @@ -1615,7 +1628,7 @@ error_exit: vnode_rele(hfsmp->hfs_devvp); } hfs_delete_chash(hfsmp); - + FREE(hfsmp, M_HFSMNT); vfs_setfsprivate(mp, NULL); } diff --git a/bsd/hfs/hfs_xattr.c b/bsd/hfs/hfs_xattr.c index 598f1dd7b..6eec7028b 100644 --- a/bsd/hfs/hfs_xattr.c +++ b/bsd/hfs/hfs_xattr.c @@ -1184,7 +1184,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) bzero(iterator, sizeof(*iterator)); if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { - return (result); + goto exit_nolock; } result = hfs_buildattrkey(cp->c_fileid, ap->a_name, (HFSPlusAttrKey *)&iterator->key); @@ -1228,6 +1228,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) hfs_end_transaction(hfsmp); exit: hfs_unlock(cp); +exit_nolock: FREE(iterator, M_TEMP); return MacToVFSError(result); } @@ -1545,7 +1546,10 @@ exit: if (user_start) { vsunlock(user_start, user_len, TRUE); } - FREE(iterator, M_TEMP); + + if (iterator) { + FREE(iterator, M_TEMP); + } hfs_unlock(cp); diff --git a/bsd/hfs/hfscommon/Misc/VolumeAllocation.c b/bsd/hfs/hfscommon/Misc/VolumeAllocation.c index 3d8255a9e..bc58bd947 100644 --- a/bsd/hfs/hfscommon/Misc/VolumeAllocation.c +++ b/bsd/hfs/hfscommon/Misc/VolumeAllocation.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -87,7 +87,9 @@ Internal routines: #include #include #include +#include #include +#include #include "../../hfs.h" #include "../../hfs_dbg.h" @@ -96,6 +98,10 @@ Internal routines: #include "../headers/FileMgrInternal.h" +#ifndef CONFIG_HFS_TRIM +#define CONFIG_HFS_TRIM 0 +#endif + enum { kBytesPerWord = 4, @@ -158,6 +164,86 @@ static OSErr BlockAllocateKnown( static int free_extent_cache_active( ExtendedVCB *vcb); + +/* +;________________________________________________________________________________ +; +; Routine: hfs_unmap_free_extent +; +; Function: Make note of a range of allocation blocks that should be +; unmapped (trimmed). That is, the given range of blocks no +; longer have useful content, and the device can unmap the +; previous contents. For example, a solid state disk may reuse +; the underlying storage for other blocks. +; +; This routine is only supported for journaled volumes. The extent +; being freed is passed to the journal code, and the extent will +; be unmapped after the current transaction is written to disk. +; +; Input Arguments: +; hfsmp - The volume containing the allocation blocks. +; startingBlock - The first allocation block of the extent being freed. +; numBlocks - The number of allocation blocks of the extent being freed. +;________________________________________________________________________________ +*/ +static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks) +{ + if (CONFIG_HFS_TRIM) { + u_int64_t offset; + u_int64_t length; + int err; + + if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL)) { + offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset; + length = (u_int64_t) numBlocks * hfsmp->blockSize; + + err = journal_trim_add_extent(hfsmp->jnl, offset, length); + if (err) { + printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent", err); + hfsmp->hfs_flags &= ~HFS_UNMAP; + } + } + } +} + + +/* +;________________________________________________________________________________ +; +; Routine: hfs_unmap_alloc_extent +; +; Function: Make note of a range of allocation blocks, some of +; which may have previously been passed to hfs_unmap_free_extent, +; is now in use on the volume. The given blocks will be removed +; from any pending DKIOCUNMAP. +; +; Input Arguments: +; hfsmp - The volume containing the allocation blocks. +; startingBlock - The first allocation block of the extent being allocated. +; numBlocks - The number of allocation blocks being allocated. +;________________________________________________________________________________ +*/ +static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks) +{ + if (CONFIG_HFS_TRIM) { + u_int64_t offset; + u_int64_t length; + int err; + + if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL)) { + offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset; + length = (u_int64_t) numBlocks * hfsmp->blockSize; + + err = journal_trim_remove_extent(hfsmp->jnl, offset, length); + if (err) { + printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent", err); + hfsmp->hfs_flags &= ~HFS_UNMAP; + } + } + } +} + + /* ;________________________________________________________________________________ ; @@ -1038,9 +1124,15 @@ Exit: if (err == noErr) { *actualNumBlocks = block - *actualStartBlock; - // sanity check - if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) - panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN); + // sanity check + if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) { + panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN); + } + + /* Remove these blocks from the TRIM list if applicable */ + if (CONFIG_HFS_TRIM) { + hfs_unmap_alloc_extent(vcb, *actualStartBlock, *actualNumBlocks); + } } else { *actualStartBlock = 0; @@ -1212,7 +1304,10 @@ OSErr BlockMarkAllocated( // XXXdbg struct hfsmount *hfsmp = VCBTOHFS(vcb); - + if (CONFIG_HFS_TRIM) { + hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks); + } + // // Pre-read the bitmap block containing the first word of allocation // @@ -1365,10 +1460,12 @@ _______________________________________________________________________ __private_extern__ OSErr BlockMarkFree( ExtendedVCB *vcb, - u_int32_t startingBlock, - register u_int32_t numBlocks) + u_int32_t startingBlock_in, + register u_int32_t numBlocks_in) { OSErr err; + u_int32_t startingBlock = startingBlock_in; + u_int32_t numBlocks = numBlocks_in; register u_int32_t *currentWord; // Pointer to current word within bitmap block register u_int32_t wordsLeft; // Number of words left in this bitmap block register u_int32_t bitMask; // Word with given bits already set (ready to OR in) @@ -1380,7 +1477,6 @@ OSErr BlockMarkFree( u_int32_t wordsPerBlock; // XXXdbg struct hfsmount *hfsmp = VCBTOHFS(vcb); - dk_discard_t discard; /* * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we @@ -1393,11 +1489,6 @@ OSErr BlockMarkFree( goto Exit; } - memset(&discard, 0, sizeof(dk_discard_t)); - discard.offset = (uint64_t)startingBlock * (uint64_t)vcb->blockSize; - discard.length = (uint64_t)numBlocks * (uint64_t)vcb->blockSize; - - // // Pre-read the bitmap block containing the first word of allocation // @@ -1521,9 +1612,8 @@ Exit: if (buffer) (void)ReleaseBitmapBlock(vcb, blockRef, true); - if (err == noErr) { - // it doesn't matter if this fails, it's just informational anyway - VNOP_IOCTL(vcb->hfs_devvp, DKIOCDISCARD, (caddr_t)&discard, 0, vfs_context_kernel()); + if (CONFIG_HFS_TRIM && err == noErr) { + hfs_unmap_free_extent(vcb, startingBlock_in, numBlocks_in); } diff --git a/bsd/i386/_structs.h b/bsd/i386/_structs.h index 9cad355eb..3bdac83bc 100644 --- a/bsd/i386/_structs.h +++ b/bsd/i386/_structs.h @@ -51,6 +51,15 @@ _STRUCT_MCONTEXT32 _STRUCT_X86_THREAD_STATE32 __ss; _STRUCT_X86_FLOAT_STATE32 __fs; }; + +#define _STRUCT_MCONTEXT_AVX32 struct __darwin_mcontext_avx32 +_STRUCT_MCONTEXT_AVX32 +{ + _STRUCT_X86_EXCEPTION_STATE32 __es; + _STRUCT_X86_THREAD_STATE32 __ss; + _STRUCT_X86_AVX_STATE32 __fs; +}; + #else /* !__DARWIN_UNIX03 */ #define _STRUCT_MCONTEXT32 struct mcontext32 _STRUCT_MCONTEXT32 @@ -59,6 +68,15 @@ _STRUCT_MCONTEXT32 _STRUCT_X86_THREAD_STATE32 ss; _STRUCT_X86_FLOAT_STATE32 fs; }; + +#define _STRUCT_MCONTEXT_AVX32 struct mcontext_avx32 +_STRUCT_MCONTEXT_AVX32 +{ + _STRUCT_X86_EXCEPTION_STATE32 es; + _STRUCT_X86_THREAD_STATE32 ss; + _STRUCT_X86_AVX_STATE32 fs; +}; + #endif /* __DARWIN_UNIX03 */ #endif /* _STRUCT_MCONTEXT32 */ @@ -71,6 +89,15 @@ _STRUCT_MCONTEXT64 _STRUCT_X86_THREAD_STATE64 __ss; _STRUCT_X86_FLOAT_STATE64 __fs; }; + +#define _STRUCT_MCONTEXT_AVX64 struct __darwin_mcontext_avx64 +_STRUCT_MCONTEXT_AVX64 +{ + _STRUCT_X86_EXCEPTION_STATE64 __es; + _STRUCT_X86_THREAD_STATE64 __ss; + _STRUCT_X86_AVX_STATE64 __fs; +}; + #else /* !__DARWIN_UNIX03 */ #define _STRUCT_MCONTEXT64 struct mcontext64 _STRUCT_MCONTEXT64 @@ -79,6 +106,15 @@ _STRUCT_MCONTEXT64 _STRUCT_X86_THREAD_STATE64 ss; _STRUCT_X86_FLOAT_STATE64 fs; }; + +#define _STRUCT_MCONTEXT_AVX64 struct mcontext_avx64 +_STRUCT_MCONTEXT_AVX64 +{ + _STRUCT_X86_EXCEPTION_STATE64 es; + _STRUCT_X86_THREAD_STATE64 ss; + _STRUCT_X86_AVX_STATE64 fs; +}; + #endif /* __DARWIN_UNIX03 */ #endif /* _STRUCT_MCONTEXT64 */ #endif /* __need_struct_mcontext */ diff --git a/bsd/kern/pthread_synch.c b/bsd/kern/pthread_synch.c index a29065584..7a00399cc 100644 --- a/bsd/kern/pthread_synch.c +++ b/bsd/kern/pthread_synch.c @@ -2033,6 +2033,7 @@ wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl, int reuse_thread, int wake_thread, int return_directly) { int ret = 0; + boolean_t need_resume = FALSE; KERNEL_DEBUG1(0xefffd004 | DBG_FUNC_START, tl->th_workq, tl->th_priority, tl->th_affinity_tag, thread_tid(current_thread()), thread_tid(th)); @@ -2063,11 +2064,19 @@ wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl, if (tl->th_flags & TH_LIST_NEED_WAKEUP) wakeup(tl); else - thread_resume(th); + need_resume = TRUE; tl->th_flags &= ~(TH_LIST_BUSY | TH_LIST_NEED_WAKEUP); workqueue_unlock(p); + + if (need_resume) { + /* + * need to do this outside of the workqueue spin lock + * since thread_resume locks the thread via a full mutex + */ + thread_resume(th); + } } } diff --git a/bsd/miscfs/devfs/devfs_tree.c b/bsd/miscfs/devfs/devfs_tree.c index d8f3ae088..58aea8eb9 100644 --- a/bsd/miscfs/devfs/devfs_tree.c +++ b/bsd/miscfs/devfs/devfs_tree.c @@ -1050,12 +1050,12 @@ dev_free_name(devdirent_t * dirent_p) if(dnp->dn_linklist == dirent_p) { dnp->dn_linklist = dirent_p->de_nextlink; } - dirent_p->de_nextlink->de_prevlinkp - = dirent_p->de_prevlinkp; - *dirent_p->de_prevlinkp = dirent_p->de_nextlink; } devfs_dn_free(dnp); } + + dirent_p->de_nextlink->de_prevlinkp = dirent_p->de_prevlinkp; + *(dirent_p->de_prevlinkp) = dirent_p->de_nextlink; /* * unlink ourselves from the directory on this plane diff --git a/bsd/net/if_media.h b/bsd/net/if_media.h index 51be28e3c..12cbc871b 100644 --- a/bsd/net/if_media.h +++ b/bsd/net/if_media.h @@ -221,6 +221,7 @@ int ifmedia_ioctl(struct ifnet *ifp, struct ifreq *ifr, #define IFM_FDX 0x00100000 /* Force full duplex */ #define IFM_HDX 0x00200000 /* Force half duplex */ #define IFM_FLOW 0x00400000 /* enable hardware flow control */ +#define IFM_EEE 0x00800000 /* Support energy efficient ethernet */ #define IFM_FLAG0 0x01000000 /* Driver defined flag */ #define IFM_FLAG1 0x02000000 /* Driver defined flag */ #define IFM_FLAG2 0x04000000 /* Driver defined flag */ @@ -408,6 +409,7 @@ struct ifmedia_description { { IFM_FDX, "full-duplex" }, \ { IFM_HDX, "half-duplex" }, \ { IFM_FLOW, "flow-control" }, \ + { IFM_EEE, "energy-efficient-ethernet" }, \ { IFM_FLAG0, "flag0" }, \ { IFM_FLAG1, "flag1" }, \ { IFM_FLAG2, "flag2" }, \ diff --git a/bsd/netinet6/ip6_fw.h b/bsd/netinet6/ip6_fw.h index 1cfa5e116..32f4f280b 100644 --- a/bsd/netinet6/ip6_fw.h +++ b/bsd/netinet6/ip6_fw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008-2011 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -329,6 +329,7 @@ struct ip6_fw_chain { * Function definitions. */ void ip6_fw_init(void); +void load_ip6fw(void); /* Firewall hooks */ struct ip6_hdr; diff --git a/bsd/netinet6/ip6_output.c b/bsd/netinet6/ip6_output.c index 143625b09..309686f7f 100644 --- a/bsd/netinet6/ip6_output.c +++ b/bsd/netinet6/ip6_output.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Apple Inc. All rights reserved. + * Copyright (c) 2008-2011 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -1741,10 +1741,12 @@ do { \ case IPV6_FW_FLUSH: case IPV6_FW_ZERO: { - if (ip6_fw_ctl_ptr == NULL && load_ipfw() != 0) - return EINVAL; - - error = (*ip6_fw_ctl_ptr)(sopt); + if (ip6_fw_ctl_ptr == NULL) + load_ip6fw(); + if (ip6_fw_ctl_ptr != NULL) + error = (*ip6_fw_ctl_ptr)(sopt); + else + return ENOPROTOOPT; } break; #endif /* IPFIREWALL */ @@ -1909,10 +1911,12 @@ do { \ #if IPFIREWALL case IPV6_FW_GET: { - if (ip6_fw_ctl_ptr == NULL && load_ipfw() != 0) - return EINVAL; - - error = (*ip6_fw_ctl_ptr)(sopt); + if (ip6_fw_ctl_ptr == NULL) + load_ip6fw(); + if (ip6_fw_ctl_ptr != NULL) + error = (*ip6_fw_ctl_ptr)(sopt); + else + return ENOPROTOOPT; } break; #endif /* IPFIREWALL */ diff --git a/bsd/netinet6/raw_ip6.c b/bsd/netinet6/raw_ip6.c index 3a665a2c4..169b7992d 100644 --- a/bsd/netinet6/raw_ip6.c +++ b/bsd/netinet6/raw_ip6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2011 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -542,7 +542,7 @@ rip6_output( } #if IPFW2 -static void +__private_extern__ void load_ip6fw(void) { ip6_fw_init(); diff --git a/bsd/sys/buf_internal.h b/bsd/sys/buf_internal.h index 50aafaa53..5718861f6 100644 --- a/bsd/sys/buf_internal.h +++ b/bsd/sys/buf_internal.h @@ -191,6 +191,7 @@ struct buf { */ #define B_NEED_IODONE 0x20000000 /* need biodone on the real_bp associated with a cluster_io */ #define B_COMMIT_UPL 0x40000000 /* commit/abort the UPL on I/O success/failure */ +#define B_TDONE 0x80000000 /* buf_t that is part of a cluster level transaction has completed */ /* Flags to low-level allocation routines. */ diff --git a/bsd/sys/disk.h b/bsd/sys/disk.h index 6013db9df..0232617ca 100644 --- a/bsd/sys/disk.h +++ b/bsd/sys/disk.h @@ -51,7 +51,7 @@ * DKIOCISWRITABLE is media writable? * * DKIOCREQUESTIDLE idle media - * DKIOCDISCARD delete unused data + * DKIOCUNMAP delete unused data * * DKIOCGETMAXBLOCKCOUNTREAD get maximum block count for reads * DKIOCGETMAXBLOCKCOUNTWRITE get maximum block count for writes @@ -66,17 +66,18 @@ * DKIOCGETMINSEGMENTALIGNMENTBYTECOUNT get minimum segment alignment in bytes * DKIOCGETMAXSEGMENTADDRESSABLEBITCOUNT get maximum segment width in bits * + * DKIOCGETFEATURES get device's feature set * DKIOCGETPHYSICALBLOCKSIZE get device's block size * DKIOCGETCOMMANDPOOLSIZE get device's queue depth */ +#define DK_FEATURE_UNMAP 0x00000010 + typedef struct { uint64_t offset; uint64_t length; - - uint8_t reserved0128[16]; /* reserved, clear to zero */ -} dk_discard_t; +} dk_extent_t; typedef struct { @@ -103,6 +104,18 @@ typedef struct #endif /* !__LP64__ */ } dk_format_capacities_t; +typedef struct +{ + dk_extent_t * extents; + uint32_t extentsCount; + +#ifdef __LP64__ + uint8_t reserved0096[4]; /* reserved, clear to zero */ +#else /* !__LP64__ */ + uint8_t reserved0064[8]; /* reserved, clear to zero */ +#endif /* !__LP64__ */ +} dk_unmap_t; + #define DKIOCEJECT _IO('d', 21) #define DKIOCSYNCHRONIZECACHE _IO('d', 22) @@ -117,7 +130,7 @@ typedef struct #define DKIOCISWRITABLE _IOR('d', 29, uint32_t) #define DKIOCREQUESTIDLE _IO('d', 30) -#define DKIOCDISCARD _IOW('d', 31, dk_discard_t) +#define DKIOCUNMAP _IOW('d', 31, dk_unmap_t) #define DKIOCGETMAXBLOCKCOUNTREAD _IOR('d', 64, uint64_t) #define DKIOCGETMAXBLOCKCOUNTWRITE _IOR('d', 65, uint64_t) @@ -132,11 +145,21 @@ typedef struct #define DKIOCGETMINSEGMENTALIGNMENTBYTECOUNT _IOR('d', 74, uint64_t) #define DKIOCGETMAXSEGMENTADDRESSABLEBITCOUNT _IOR('d', 75, uint64_t) +#define DKIOCGETFEATURES _IOR('d', 76, uint32_t) #define DKIOCGETPHYSICALBLOCKSIZE _IOR('d', 77, uint32_t) #define DKIOCGETCOMMANDPOOLSIZE _IOR('d', 78, uint32_t) +typedef struct +{ + uint64_t offset; + uint64_t length; + + uint8_t reserved0128[16]; /* reserved, clear to zero */ +} dk_discard_t __attribute__ ((deprecated)); + +#define DKIOCDISCARD _IOW('d', 31, dk_discard_t) + #ifdef KERNEL -#define DK_FEATURE_DISCARD 0x00000010 #define DK_FEATURE_FORCE_UNIT_ACCESS 0x00000001 #define DKIOCGETBLOCKCOUNT32 _IOR('d', 25, uint32_t) #define DKIOCSETBLOCKSIZE _IOW('d', 24, uint32_t) @@ -144,7 +167,6 @@ typedef struct #define DKIOCISSOLIDSTATE _IOR('d', 79, uint32_t) #define DKIOCISVIRTUAL _IOR('d', 72, uint32_t) #define DKIOCGETBASE _IOR('d', 73, uint64_t) -#define DKIOCGETFEATURES _IOR('d', 76, uint32_t) #endif /* KERNEL */ #endif /* _SYS_DISK_H_ */ diff --git a/bsd/sys/kdebug.h b/bsd/sys/kdebug.h index 6767bad2d..9f7b789c9 100644 --- a/bsd/sys/kdebug.h +++ b/bsd/sys/kdebug.h @@ -168,6 +168,7 @@ __BEGIN_DECLS #define DBG_NETIPSEC 128 /* IPsec Protocol */ /* **** The Kernel Debug Sub Classes for IOKIT (DBG_IOKIT) **** */ +#define DBG_IOINTC 0 /* Interrupt controller */ #define DBG_IOWORKLOOP 1 /* Work from work loop */ #define DBG_IOINTES 2 /* Interrupt event source */ #define DBG_IOCLKES 3 /* Clock event source */ diff --git a/bsd/sys/mount.h b/bsd/sys/mount.h index 598471f91..8633a1465 100644 --- a/bsd/sys/mount.h +++ b/bsd/sys/mount.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -480,7 +480,8 @@ struct vfsioattr { void * io_reserved[2]; /* extended attribute information */ }; -#define VFS_IOATTR_FLAGS_FUA 0x01 /* Write-through cache supported */ +#define VFS_IOATTR_FLAGS_FUA 0x01 /* Write-through cache supported */ +#define VFS_IOATTR_FLAGS_UNMAP 0x02 /* Unmap (trim) supported */ /* * Filesystem Registration information diff --git a/bsd/sys/mount_internal.h b/bsd/sys/mount_internal.h index d36e8ea74..b069b1a0f 100644 --- a/bsd/sys/mount_internal.h +++ b/bsd/sys/mount_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -202,6 +202,7 @@ struct mount { * ioflags */ #define MNT_IOFLAGS_FUA_SUPPORTED 0x00000001 +#define MNT_IOFLAGS_UNMAP_SUPPORTED 0x00000002 /* * ioqueue depth for devices that don't report one diff --git a/bsd/vfs/vfs_cluster.c b/bsd/vfs/vfs_cluster.c index f1f9f649b..499056a3b 100644 --- a/bsd/vfs/vfs_cluster.c +++ b/bsd/vfs/vfs_cluster.c @@ -132,6 +132,7 @@ static lck_grp_t *cl_mtx_grp; static lck_attr_t *cl_mtx_attr; static lck_grp_attr_t *cl_mtx_grp_attr; static lck_mtx_t *cl_mtxp; +static lck_mtx_t *cl_transaction_mtxp; #define IO_UNKNOWN 0 @@ -242,6 +243,11 @@ cluster_init(void) { if (cl_mtxp == NULL) panic("cluster_init: failed to allocate cl_mtxp"); + + cl_transaction_mtxp = lck_mtx_alloc_init(cl_mtx_grp, cl_mtx_attr); + + if (cl_transaction_mtxp == NULL) + panic("cluster_init: failed to allocate cl_transaction_mtxp"); } @@ -510,26 +516,36 @@ cluster_iodone(buf_t bp, void *callback_arg) KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_START, cbp_head, bp->b_lblkno, bp->b_bcount, bp->b_flags, 0); - for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next) { - /* - * all I/O requests that are part of this transaction - * have to complete before we can process it - */ - if ( !(cbp->b_flags & B_DONE)) { + if (cbp_head->b_trans_next || !(cbp_head->b_flags & B_EOT)) { + + lck_mtx_lock_spin(cl_transaction_mtxp); + + bp->b_flags |= B_TDONE; + + for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next) { + /* + * all I/O requests that are part of this transaction + * have to complete before we can process it + */ + if ( !(cbp->b_flags & B_TDONE)) { + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END, + cbp_head, cbp, cbp->b_bcount, cbp->b_flags, 0); + + lck_mtx_unlock(cl_transaction_mtxp); + return 0; + } + if (cbp->b_flags & B_EOT) + transaction_complete = TRUE; + } + lck_mtx_unlock(cl_transaction_mtxp); + + if (transaction_complete == FALSE) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END, - cbp_head, cbp, cbp->b_bcount, cbp->b_flags, 0); + cbp_head, 0, 0, 0, 0); return 0; } - if (cbp->b_flags & B_EOT) - transaction_complete = TRUE; - } - if (transaction_complete == FALSE) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END, - cbp_head, 0, 0, 0, 0); - - return 0; } error = 0; total_size = 0; @@ -759,6 +775,14 @@ cluster_complete_transaction(buf_t *cbp_head, void *callback_arg, int *retval, i for (cbp = *cbp_head; cbp; cbp = cbp->b_trans_next) buf_biowait(cbp); } + /* + * we've already waited on all of the I/Os in this transaction, + * so mark all of the buf_t's in this transaction as B_TDONE + * so that cluster_iodone sees the transaction as completed + */ + for (cbp = *cbp_head; cbp; cbp = cbp->b_trans_next) + cbp->b_flags |= B_TDONE; + error = cluster_iodone(*cbp_head, callback_arg); if ( !(flags & CL_ASYNC) && error && *retval == 0) { diff --git a/bsd/vfs/vfs_journal.c b/bsd/vfs/vfs_journal.c index 7ff95f478..0a967aba9 100644 --- a/bsd/vfs/vfs_journal.c +++ b/bsd/vfs/vfs_journal.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1995-2008 Apple Inc. All rights reserved. + * Copyright (c) 1995-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -79,8 +80,28 @@ extern task_t kernel_task; #include "vfs_journal.h" +#ifndef CONFIG_HFS_TRIM +#define CONFIG_HFS_TRIM 0 +#endif + #if JOURNALING +// +// By default, we grow the list of extents to trim by one page at a time. +// We'll opt to flush a transaction if it contains at least +// JOURNAL_FLUSH_TRIM_EXTENTS extents to be trimmed (even if the number +// of modified blocks is small). +// +enum { + JOURNAL_DEFAULT_TRIM_BYTES = PAGE_SIZE, + JOURNAL_DEFAULT_TRIM_EXTENTS = JOURNAL_DEFAULT_TRIM_BYTES / sizeof(dk_extent_t), + JOURNAL_FLUSH_TRIM_EXTENTS = JOURNAL_DEFAULT_TRIM_EXTENTS * 15 / 16 +}; + +unsigned int jnl_trim_flush_limit = JOURNAL_FLUSH_TRIM_EXTENTS; +SYSCTL_UINT (_kern, OID_AUTO, jnl_trim_flush, CTLFLAG_RW, &jnl_trim_flush_limit, 0, "number of trimmed extents to cause a journal flush"); + + /* XXX next prototytype should be from libsa/stdlib.h> but conflicts libkern */ __private_extern__ void qsort( void * array, @@ -1789,24 +1810,20 @@ journal_open(struct vnode *jvp, /* * The volume has probably been resized (such that we had to adjust the * logical sector size), or copied to media with a different logical - * sector size. If the journal is empty, then just switch to the - * current logical sector size. If the journal is not empty, then - * fail to open the journal. + * sector size. + * + * Temporarily change the device's logical block size to match the + * journal's header size. This will allow us to replay the journal + * safely. If the replay succeeds, we will update the journal's header + * size (later in this function). */ - - if (jnl->jhdr->start == jnl->jhdr->end) { - printf("jnl: %s: open: changing journal header size from %d to %u\n", - jdev_name, jnl->jhdr->jhdr_size, phys_blksz); - jnl->jhdr->jhdr_size = phys_blksz; - if (write_journal_header(jnl, 1)) { - printf("jnl: %s: open: failed to update journal header size\n", jdev_name); - goto bad_journal; - } - } else { - printf("jnl: %s: open: phys_blksz %u does not match journal header size %d, and journal is not empty!\n", - jdev_name, phys_blksz, jnl->jhdr->jhdr_size); - goto bad_journal; - } + + orig_blksz = phys_blksz; + phys_blksz = jnl->jhdr->jhdr_size; + VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&phys_blksz, FWRITE, &context); + + printf("jnl: %s: open: temporarily switched block size from %u to %u\n", + jdev_name, orig_blksz, phys_blksz); } if ( jnl->jhdr->start <= 0 @@ -1859,14 +1876,32 @@ journal_open(struct vnode *jvp, goto bad_journal; } + /* + * When we get here, we know that the journal is empty (jnl->jhdr->start == + * jnl->jhdr->end). If the device's logical block size was different from + * the journal's header size, then we can now restore the device's logical + * block size and update the journal's header size to match. + * + * Note that we also adjust the journal's start and end so that they will + * be aligned on the new block size. We pick a new sequence number to + * avoid any problems if a replay found previous transactions using the old + * journal header size. (See the comments in journal_create(), above.) + */ if (orig_blksz != 0) { VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&orig_blksz, FWRITE, &context); phys_blksz = orig_blksz; - if (orig_blksz < (uint32_t)jnl->jhdr->jhdr_size) { - printf("jnl: %s: open: jhdr_size is %d but orig phys blk size is %d. switching.\n", - jdev_name, jnl->jhdr->jhdr_size, orig_blksz); - - jnl->jhdr->jhdr_size = orig_blksz; + orig_blksz = 0; + + jnl->jhdr->jhdr_size = phys_blksz; + jnl->jhdr->start = phys_blksz; + jnl->jhdr->end = phys_blksz; + jnl->jhdr->sequence_num = (jnl->jhdr->sequence_num + + (journal_size / phys_blksz) + + (random() % 16384)) & 0x00ffffff; + + if (write_journal_header(jnl, 1)) { + printf("jnl: %s: open: failed to update journal header size\n", jdev_name); + goto bad_journal; } } @@ -1876,6 +1911,7 @@ journal_open(struct vnode *jvp, // set this now, after we've replayed the journal size_up_tbuffer(jnl, tbuffer_size, phys_blksz); + // TODO: Does this need to change if the device's logical block size changed? if ((off_t)(jnl->jhdr->blhdr_size/sizeof(block_info)-1) > (jnl->jhdr->size/jnl->jhdr->jhdr_size)) { printf("jnl: %s: open: jhdr size and blhdr size are not compatible (0x%llx, %d, %d)\n", jdev_name, jnl->jhdr->size, jnl->jhdr->blhdr_size, jnl->jhdr->jhdr_size); @@ -1890,6 +1926,7 @@ journal_open(struct vnode *jvp, if (orig_blksz != 0) { phys_blksz = orig_blksz; VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&orig_blksz, FWRITE, &context); + printf("jnl: %s: open: restored block size after error\n", jdev_name); } kmem_free(kernel_map, (vm_offset_t)jnl->header_buf, phys_blksz); bad_kmem_alloc: @@ -2752,6 +2789,383 @@ journal_kill_block(journal *jnl, struct buf *bp) } +/* +;________________________________________________________________________________ +; +; Routine: journal_trim_realloc +; +; Function: Increase the amount of memory allocated for the list of extents +; to be unmapped (trimmed). This routine will be called when +; adding an extent to the list, and the list already occupies +; all of the space allocated to it. This routine returns ENOMEM +; if unable to allocate more space, or 0 if the extent list was +; grown successfully. +; +; Input Arguments: +; tr - The transaction containing the extent list. +; +; Output: +; (result) - ENOMEM or 0. +; +; Side effects: +; The allocated_count and extents fields of tr->trim are updated +; if the function returned 0. +;________________________________________________________________________________ +*/ +static int +journal_trim_realloc(transaction *tr) +{ + if (CONFIG_HFS_TRIM) { + void *new_extents; + uint32_t new_allocated_count; + + new_allocated_count = tr->trim.allocated_count + JOURNAL_DEFAULT_TRIM_EXTENTS; + new_extents = kalloc(new_allocated_count * sizeof(dk_extent_t)); + if (new_extents == NULL) { + printf("journal_trim_realloc: unable to grow extent list!\n"); + /* + * Since we could be called when allocating space previously marked + * to be trimmed, we need to empty out the list to be safe. + */ + tr->trim.extent_count = 0; + return ENOMEM; + } + + /* Copy the old extent list to the newly allocated list. */ + if (tr->trim.extents != NULL) { + memmove(new_extents, + tr->trim.extents, + tr->trim.allocated_count * sizeof(dk_extent_t)); + kfree(tr->trim.extents, + tr->trim.allocated_count * sizeof(dk_extent_t)); + } + + tr->trim.allocated_count = new_allocated_count; + tr->trim.extents = new_extents; + } + return 0; +} + + +/* +;________________________________________________________________________________ +; +; Routine: journal_trim_add_extent +; +; Function: Make note of a range of bytes that should be unmapped +; (trimmed). That is, the given range of bytes no longer have +; useful content, and the device can unmap the previous +; contents. For example, a solid state disk may reuse the +; underlying storage for other blocks. +; +; The extent will be unmapped after the transaction is written +; to the journal. +; +; Input Arguments: +; jnl - The journal for the volume containing the byte range. +; offset - The first byte of the range to be trimmed. +; length - The number of bytes of the extent being trimmed. +;________________________________________________________________________________ +*/ +__private_extern__ int +journal_trim_add_extent(journal *jnl, uint64_t offset, uint64_t length) +{ + if (CONFIG_HFS_TRIM) { + uint64_t end; + transaction *tr; + dk_extent_t *extent; + uint32_t insert_index; + uint32_t replace_count; + + CHECK_JOURNAL(jnl); + + if (jnl->flags & JOURNAL_TRIM_ERR) { + /* + * A previous trim failed, so we have disabled trim for this volume + * for as long as it remains mounted. + */ + return 0; + } + + if (jnl->flags & JOURNAL_INVALID) { + return EINVAL; + } + + tr = jnl->active_tr; + CHECK_TRANSACTION(tr); + + if (jnl->owner != current_thread()) { + panic("jnl: trim_add_extent: called w/out a transaction! jnl %p, owner %p, curact %p\n", + jnl, jnl->owner, current_thread()); + } + + free_old_stuff(jnl); + + end = offset + length; + + /* + * Find the range of existing extents that can be combined with the + * input extent. We start by counting the number of extents that end + * strictly before the input extent, then count the number of extents + * that overlap or are contiguous with the input extent. + */ + extent = tr->trim.extents; + insert_index = 0; + while (insert_index < tr->trim.extent_count && extent->offset + extent->length < offset) { + ++insert_index; + ++extent; + } + replace_count = 0; + while (insert_index + replace_count < tr->trim.extent_count && extent->offset <= end) { + ++replace_count; + ++extent; + } + + /* + * If none of the existing extents can be combined with the input extent, + * then just insert it in the list (before item number insert_index). + */ + if (replace_count == 0) { + /* If the list was already full, we need to grow it. */ + if (tr->trim.extent_count == tr->trim.allocated_count) { + if (journal_trim_realloc(tr) != 0) { + printf("jnl: trim_add_extent: out of memory!"); + return ENOMEM; + } + } + + /* Shift any existing extents with larger offsets. */ + if (insert_index < tr->trim.extent_count) { + memmove(&tr->trim.extents[insert_index+1], + &tr->trim.extents[insert_index], + (tr->trim.extent_count - insert_index) * sizeof(dk_extent_t)); + } + tr->trim.extent_count++; + + /* Store the new extent in the list. */ + tr->trim.extents[insert_index].offset = offset; + tr->trim.extents[insert_index].length = length; + + /* We're done. */ + return 0; + } + + /* + * Update extent number insert_index to be the union of the input extent + * and all of the replaced extents. + */ + if (tr->trim.extents[insert_index].offset < offset) + offset = tr->trim.extents[insert_index].offset; + extent = &tr->trim.extents[insert_index + replace_count - 1]; + if (extent->offset + extent->length > end) + end = extent->offset + extent->length; + tr->trim.extents[insert_index].offset = offset; + tr->trim.extents[insert_index].length = end - offset; + + /* + * If we were replacing more than one existing extent, then shift any + * extents with larger offsets, and update the count of extents. + * + * We're going to leave extent #insert_index alone since it was just updated, above. + * We need to move extents from index (insert_index + replace_count) through the end of + * the list by (replace_count - 1) positions so that they overwrite extent #(insert_index + 1). + */ + if (replace_count > 1 && (insert_index + replace_count) < tr->trim.extent_count) { + memmove(&tr->trim.extents[insert_index + 1], + &tr->trim.extents[insert_index + replace_count], + (tr->trim.extent_count - insert_index - replace_count) * sizeof(dk_extent_t)); + } + tr->trim.extent_count -= replace_count - 1; + } + return 0; +} + + +/* +;________________________________________________________________________________ +; +; Routine: journal_trim_remove_extent +; +; Function: Make note of a range of bytes, some of which may have previously +; been passed to journal_trim_add_extent, is now in use on the +; volume. The given bytes will be not be trimmed as part of +; this transaction. +; +; Input Arguments: +; jnl - The journal for the volume containing the byte range. +; offset - The first byte of the range to be trimmed. +; length - The number of bytes of the extent being trimmed. +;________________________________________________________________________________ +*/ +__private_extern__ int +journal_trim_remove_extent(journal *jnl, uint64_t offset, uint64_t length) +{ + if (CONFIG_HFS_TRIM) { + u_int64_t end; + dk_extent_t *extent; + transaction *tr; + u_int32_t keep_before; + u_int32_t keep_after; + + CHECK_JOURNAL(jnl); + + if (jnl->flags & JOURNAL_TRIM_ERR) { + /* + * A previous trim failed, so we have disabled trim for this volume + * for as long as it remains mounted. + */ + return 0; + } + + if (jnl->flags & JOURNAL_INVALID) { + return EINVAL; + } + + tr = jnl->active_tr; + CHECK_TRANSACTION(tr); + + if (jnl->owner != current_thread()) { + panic("jnl: trim_remove_extent: called w/out a transaction! jnl %p, owner %p, curact %p\n", + jnl, jnl->owner, current_thread()); + } + + free_old_stuff(jnl); + + end = offset + length; + + /* + * Find any existing extents that start before or end after the input + * extent. These extents will be modified if they overlap the input + * extent. Other extents between them will be deleted. + */ + extent = tr->trim.extents; + keep_before = 0; + while (keep_before < tr->trim.extent_count && extent->offset < offset) { + ++keep_before; + ++extent; + } + keep_after = keep_before; + if (keep_after > 0) { + /* See if previous extent extends beyond both ends of input extent. */ + --keep_after; + --extent; + } + while (keep_after < tr->trim.extent_count && (extent->offset + extent->length) <= end) { + ++keep_after; + ++extent; + } + + /* + * When we get here, the first keep_before extents (0 .. keep_before-1) + * start before the input extent, and extents (keep_after .. extent_count-1) + * end after the input extent. We'll need to keep, all of those extents, + * but possibly modify #(keep_before-1) and #keep_after to remove the portion + * that overlaps with the input extent. + */ + + /* + * Does the input extent start after and end before the same existing + * extent? If so, we have to "punch a hole" in that extent and convert + * it to two separate extents. + */ + if (keep_before > keep_after) { + /* If the list was already full, we need to grow it. */ + if (tr->trim.extent_count == tr->trim.allocated_count) { + if (journal_trim_realloc(tr) != 0) { + printf("jnl: trim_remove_extent: out of memory!"); + return ENOMEM; + } + } + + /* + * Make room for a new extent by shifting extents #keep_after and later + * down by one extent. When we're done, extents #keep_before and + * #keep_after will be identical, and we can fall through to removing + * the portion that overlaps the input extent. + */ + memmove(&tr->trim.extents[keep_before], + &tr->trim.extents[keep_after], + (tr->trim.extent_count - keep_after) * sizeof(dk_extent_t)); + ++tr->trim.extent_count; + ++keep_after; + + /* + * Fall through. We now have the case where the length of extent + * #(keep_before - 1) needs to be updated, and the start of extent + * #(keep_after) needs to be updated. + */ + } + + /* + * May need to truncate the end of extent #(keep_before - 1) if it overlaps + * the input extent. + */ + if (keep_before > 0) { + extent = &tr->trim.extents[keep_before - 1]; + if (extent->offset + extent->length > offset) { + extent->length = offset - extent->offset; + } + } + + /* + * May need to update the start of extent #(keep_after) if it overlaps the + * input extent. + */ + if (keep_after < tr->trim.extent_count) { + extent = &tr->trim.extents[keep_after]; + if (extent->offset < end) { + extent->length = extent->offset + extent->length - end; + extent->offset = end; + } + } + + /* + * If there were whole extents that overlapped the input extent, get rid + * of them by shifting any following extents, and updating the count. + */ + if (keep_after > keep_before && keep_after < tr->trim.extent_count) { + memmove(&tr->trim.extents[keep_before], + &tr->trim.extents[keep_after], + (tr->trim.extent_count - keep_after) * sizeof(dk_extent_t)); + } + tr->trim.extent_count -= keep_after - keep_before; + } + return 0; +} + + +static int +journal_trim_flush(journal *jnl, transaction *tr) +{ + int errno = 0; + + if (CONFIG_HFS_TRIM) { + if ((jnl->flags & JOURNAL_TRIM_ERR) == 0 && tr->trim.extent_count > 0) { + dk_unmap_t unmap; + + bzero(&unmap, sizeof(unmap)); + unmap.extents = tr->trim.extents; + unmap.extentsCount = tr->trim.extent_count; + errno = VNOP_IOCTL(jnl->fsdev, DKIOCUNMAP, (caddr_t)&unmap, FWRITE, vfs_context_kernel()); + if (errno) { + printf("jnl: error %d from DKIOCUNMAP (extents=%lx, count=%u); disabling trim for %s\n", + errno, (unsigned long) (tr->trim.extents), tr->trim.extent_count, + jnl->jdev_name); + jnl->flags |= JOURNAL_TRIM_ERR; + } + } + if (tr->trim.extents) { + kfree(tr->trim.extents, tr->trim.allocated_count * sizeof(dk_extent_t)); + tr->trim.allocated_count = 0; + tr->trim.extent_count = 0; + tr->trim.extents = NULL; + } + } + + return errno; +} + + static int journal_binfo_cmp(const void *a, const void *b) { @@ -2834,10 +3248,17 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void // transaction buffer if it's full or if we have more than // one of them so we don't start hogging too much memory. // + // We also check the number of extents waiting to be trimmed. + // If it is small enough, then keep accumulating more (so we + // can reduce the overhead of trimming). If there was a + // prior trim error, then we stop issuing trims for this + // volume, so we can also coalesce transactions. + // if ( force_it == 0 && (jnl->flags & JOURNAL_NO_GROUP_COMMIT) == 0 && tr->num_blhdrs < 3 - && (tr->total_bytes <= ((tr->tbuffer_size*tr->num_blhdrs) - tr->tbuffer_size/8))) { + && (tr->total_bytes <= ((tr->tbuffer_size*tr->num_blhdrs) - tr->tbuffer_size/8)) + && ((jnl->flags & JOURNAL_TRIM_ERR) || (tr->trim.extent_count < jnl_trim_flush_limit))) { jnl->cur_tr = tr; return 0; @@ -3064,6 +3485,12 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void goto bad_journal; } + // + // Send a DKIOCUNMAP for the extents trimmed by this transaction, and + // free up the extent list. + // + errno = journal_trim_flush(jnl, tr); + // // setup for looping through all the blhdr's. we null out the // tbuffer and blhdr fields so that they're not used any more. @@ -3148,7 +3575,7 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void bad_journal: jnl->flags |= JOURNAL_INVALID; jnl->old_start[sizeof(jnl->old_start)/sizeof(jnl->old_start[0]) - 1] &= ~0x8000000000000000LL; - abort_transaction(jnl, tr); + abort_transaction(jnl, tr); // cleans up list of extents to be trimmed return -1; } @@ -3212,6 +3639,12 @@ abort_transaction(journal *jnl, transaction *tr) kmem_free(kernel_map, (vm_offset_t)blhdr, tr->tbuffer_size); } + if (tr->trim.extents) { + kfree(tr->trim.extents, tr->trim.allocated_count * sizeof(dk_extent_t)); + } + tr->trim.allocated_count = 0; + tr->trim.extent_count = 0; + tr->trim.extents = NULL; tr->tbuffer = NULL; tr->blhdr = NULL; tr->total_bytes = 0xdbadc0de; diff --git a/bsd/vfs/vfs_journal.h b/bsd/vfs/vfs_journal.h index c3e058b40..310445395 100644 --- a/bsd/vfs/vfs_journal.h +++ b/bsd/vfs/vfs_journal.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -42,6 +42,7 @@ #include #include +#include typedef struct _blk_info { int32_t bsize; @@ -74,6 +75,12 @@ typedef struct block_list_header { struct journal; +struct jnl_trim_list { + uint32_t allocated_count; + uint32_t extent_count; + dk_extent_t *extents; +}; + typedef struct transaction { int tbuffer_size; // in bytes char *tbuffer; // memory copy of the transaction @@ -87,6 +94,7 @@ typedef struct transaction { struct journal *jnl; // ptr back to the journal structure struct transaction *next; // list of tr's (either completed or to be free'd) uint32_t sequence_num; + struct jnl_trim_list trim; } transaction; @@ -166,6 +174,7 @@ typedef struct journal { #define JOURNAL_FLUSHCACHE_ERR 0x00040000 // means we already printed this err #define JOURNAL_NEED_SWAP 0x00080000 // swap any data read from disk #define JOURNAL_DO_FUA_WRITES 0x00100000 // do force-unit-access writes +#define JOURNAL_TRIM_ERR 0x00200000 // a previous trim failed /* journal_open/create options are always in the low-16 bits */ #define JOURNAL_OPTION_FLAGS_MASK 0x0000ffff @@ -283,12 +292,21 @@ void journal_close(journal *journalp); * then call journal_kill_block(). This will mark it so * that the journal does not play it back (effectively * dropping it). + * + * journal_trim_add_extent() marks a range of bytes on the device which should + * be trimmed (invalidated, unmapped). journal_trim_remove_extent() marks a + * range of bytes which should no longer be trimmed. Accumulated extents + * will be trimmed when the transaction is flushed to the on-disk journal. */ int journal_start_transaction(journal *jnl); int journal_modify_block_start(journal *jnl, struct buf *bp); int journal_modify_block_abort(journal *jnl, struct buf *bp); int journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(struct buf *bp, void *arg), void *arg); int journal_kill_block(journal *jnl, struct buf *bp); +#ifdef BSD_KERNEL_PRIVATE +int journal_trim_add_extent(journal *jnl, uint64_t offset, uint64_t length); +int journal_trim_remove_extent(journal *jnl, uint64_t offset, uint64_t length); +#endif int journal_end_transaction(journal *jnl); int journal_active(journal *jnl); diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c index a8fc4b148..3b10114cb 100644 --- a/bsd/vfs/vfs_subr.c +++ b/bsd/vfs/vfs_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -2994,7 +2994,8 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp) if (features & DK_FEATURE_FORCE_UNIT_ACCESS) mp->mnt_ioflags |= MNT_IOFLAGS_FUA_SUPPORTED; - + if (features & DK_FEATURE_UNMAP) + mp->mnt_ioflags |= MNT_IOFLAGS_UNMAP_SUPPORTED; return (error); } diff --git a/config/IOKit.exports b/config/IOKit.exports index deb1b0fbe..8f1cb8e73 100644 --- a/config/IOKit.exports +++ b/config/IOKit.exports @@ -1008,6 +1008,7 @@ __ZN21IOSubMemoryDescriptorD0Ev __ZN21IOSubMemoryDescriptorD2Ev __ZN22IOInterruptEventSource10gMetaClassE __ZN22IOInterruptEventSource10superClassE +__ZN22IOInterruptEventSource11setWorkLoopEP10IOWorkLoop __ZN22IOInterruptEventSource12checkForWorkEv __ZN22IOInterruptEventSource17interruptOccurredEPvP9IOServicei __ZN22IOInterruptEventSource20interruptEventSourceEP8OSObjectPFvS1_PS_iEP9IOServicei diff --git a/config/MasterVersion b/config/MasterVersion index 1ad776f4b..237d3331d 100644 --- a/config/MasterVersion +++ b/config/MasterVersion @@ -1,4 +1,4 @@ -10.7.0 +10.8.0 # The first line of this file contains the master version number for the kernel. # All other instances of the kernel version in xnu are derived from this file. diff --git a/config/Private.i386.exports b/config/Private.i386.exports index 63d85a3b0..5ff0653e9 100644 --- a/config/Private.i386.exports +++ b/config/Private.i386.exports @@ -7,8 +7,8 @@ _cpuid_features _cpuid_info _gOSKextUnresolved _lapic_end_of_interrupt +_lapic_unmask_perfcnt_interrupt _mp_broadcast _mp_cpus_call _need_fsevent _smp_initialized -_lapic_unmask_perfcnt_interrupt diff --git a/config/System6.0.exports b/config/System6.0.exports index ab5e6038b..75146568c 100644 --- a/config/System6.0.exports +++ b/config/System6.0.exports @@ -1431,6 +1431,7 @@ __ZN21IOSubMemoryDescriptorD0Ev __ZN21IOSubMemoryDescriptorD2Ev __ZN22IOInterruptEventSource10gMetaClassE __ZN22IOInterruptEventSource10superClassE +__ZN22IOInterruptEventSource11setWorkLoopEP10IOWorkLoop __ZN22IOInterruptEventSource12checkForWorkEv __ZN22IOInterruptEventSource17interruptOccurredEPvP9IOServicei __ZN22IOInterruptEventSource20interruptEventSourceEP8OSObjectPFvS1_PS_iEP9IOServicei diff --git a/config/Unsupported.exports b/config/Unsupported.exports index 0944c326c..8886533d8 100644 --- a/config/Unsupported.exports +++ b/config/Unsupported.exports @@ -156,6 +156,7 @@ _task_get_special_port _task_resume _task_suspend _thread_notrigger +_thread_tid _tsleep _vfs_context_current _vfs_setlocklocal diff --git a/config/Unsupported.i386.exports b/config/Unsupported.i386.exports index bf2cedbf7..66029e241 100644 --- a/config/Unsupported.i386.exports +++ b/config/Unsupported.i386.exports @@ -13,7 +13,6 @@ _in6addr_local _in_broadcast _inaddr_local _inet_domain_mutex -_io_map_spec _ip_mutex _ip_output _ip_protox @@ -21,7 +20,8 @@ _kdp_register_callout _kdp_set_ip_and_mac_addresses _kernel_flock _kernel_thread -_lapic_start +_lapic_set_perfcnt_interrupt_mask +_lapic_set_pmi_func _lo_ifp _m_adj _m_cat @@ -73,6 +73,7 @@ _pru_sense_null _pru_shutdown_notsupp _pru_sockaddr_notsupp _pru_sopoll_notsupp +_rdmsr_carefully _real_ncpus _rtc_clock_napped _sbappendaddr diff --git a/config/Unsupported.x86_64.exports b/config/Unsupported.x86_64.exports index f4dc69724..79dce8fdc 100644 --- a/config/Unsupported.x86_64.exports +++ b/config/Unsupported.x86_64.exports @@ -7,10 +7,10 @@ _cpu_number _dsmos_page_transform_hook _gPEEFIRuntimeServices _gPEEFISystemTable -_io_map_spec _kdp_register_callout _kdp_set_ip_and_mac_addresses -_lapic_start +_lapic_set_perfcnt_interrupt_mask +_lapic_set_pmi_func _ml_get_apicid _ml_get_maxbusdelay _ml_get_maxsnoop @@ -20,6 +20,7 @@ _mp_rendezvous_no_intrs _pmCPUControl _pmKextRegister _pm_init_lock +_rdmsr_carefully _real_ncpus _rtc_clock_napped _serial_getc diff --git a/iokit/IOKit/IOInterruptEventSource.h b/iokit/IOKit/IOInterruptEventSource.h index 0be7caf45..fe5d4ae12 100644 --- a/iokit/IOKit/IOInterruptEventSource.h +++ b/iokit/IOKit/IOInterruptEventSource.h @@ -112,6 +112,10 @@ protected: @result Return true if this function needs to be called again before all its outstanding events have been processed. */ virtual bool checkForWork(); +/*! @function setWorkLoop + @abstract Sub-class implementation of setWorkLoop method. */ + virtual void setWorkLoop(IOWorkLoop *inWorkLoop); + public: /*! @function interruptEventSource @@ -186,6 +190,9 @@ state when checkForWork is called. */ @param ind What is this interrupts index within 'nub'. */ virtual void disableInterruptOccurred(void *, IOService *nub, int ind); +private: + IOReturn registerInterruptHandler(IOService *inProvider, int inIntIndex); + private: OSMetaClassDeclareReservedUnused(IOInterruptEventSource, 0); OSMetaClassDeclareReservedUnused(IOInterruptEventSource, 1); diff --git a/iokit/IOKit/IOKitDebug.h b/iokit/IOKit/IOKitDebug.h index 499faa3c8..96fb7c5a0 100644 --- a/iokit/IOKit/IOKitDebug.h +++ b/iokit/IOKit/IOKitDebug.h @@ -71,7 +71,7 @@ enum { kIOLogPower = 0x00000080ULL, kIOLogMapping = 0x00000100ULL, kIOLogCatalogue = 0x00000200ULL, - kIOLogTracePower = 0x00000400ULL, + kIOLogTracePower = 0x00000400ULL, // Obsolete: Use iotrace=0x00000400ULL to enable now kIOLogDebugPower = 0x00000800ULL, kIOLogServiceTree = 0x00001000ULL, kIOLogDTree = 0x00002000ULL, @@ -80,7 +80,7 @@ enum { kOSLogRegistryMods = 0x00010000ULL, // Log attempts to modify registry collections kIOLogPMRootDomain = 0x00020000ULL, kOSRegistryModsMode = 0x00040000ULL, // Change default registry modification handling - panic vs. log - kIOTraceIOService = 0x00080000ULL, +// kIOTraceIOService = 0x00080000ULL, // Obsolete: Use iotrace=0x00080000ULL to enable now kIOLogHibernate = 0x00100000ULL, // debug aids - change behaviour @@ -91,7 +91,25 @@ enum { _kIODebugTopFlag = 0x8000000000000000ULL // force enum to be 64 bits }; +enum { + kIOTraceInterrupts = 0x00000001ULL, // Trace primary interrupts + kIOTraceWorkLoops = 0x00000002ULL, // Trace workloop activity + kIOTraceEventSources = 0x00000004ULL, // Trace non-passive event sources + kIOTraceIntEventSource = 0x00000008ULL, // Trace IOIES and IOFIES sources + kIOTraceCommandGates = 0x00000010ULL, // Trace command gate activity + kIOTraceTimers = 0x00000020ULL, // Trace timer event source activity + + kIOTracePowerMgmt = 0x00000400ULL, // Trace power management changes + + kIOTraceIOService = 0x00080000ULL, // registerService/termination + + kIOTraceCompatBootArgs = kIOTraceIOService | kIOTracePowerMgmt +}; + extern SInt64 gIOKitDebug; +extern SInt64 gIOKitTrace; +extern UInt64 gIOInterruptThresholdNS; + #ifdef __cplusplus extern "C" { diff --git a/iokit/IOKit/IOReturn.h b/iokit/IOKit/IOReturn.h index 38811b63a..9f1853785 100644 --- a/iokit/IOKit/IOReturn.h +++ b/iokit/IOKit/IOReturn.h @@ -62,6 +62,7 @@ typedef kern_return_t IOReturn; //#define sub_iokit_hidsystem err_sub(14) #define sub_iokit_scsi err_sub(16) //#define sub_iokit_pccard err_sub(21) +#define sub_iokit_thunderbolt err_sub(29) #define sub_iokit_vendor_specific err_sub(-2) #define sub_iokit_reserved err_sub(-1) diff --git a/iokit/IOKit/IOTimeStamp.h b/iokit/IOKit/IOTimeStamp.h index b1b09057f..a1d22f4d3 100644 --- a/iokit/IOKit/IOTimeStamp.h +++ b/iokit/IOKit/IOTimeStamp.h @@ -107,6 +107,7 @@ IOTimeStamp(uintptr_t csc, /* IOKit infrastructure subclasses */ +#define IODBG_INTC(code) (KDBG_CODE(DBG_IOKIT, DBG_IOINTC, code)) #define IODBG_WORKLOOP(code) (KDBG_CODE(DBG_IOKIT, DBG_IOWORKLOOP, code)) #define IODBG_INTES(code) (KDBG_CODE(DBG_IOKIT, DBG_IOINTES, code)) #define IODBG_TIMES(code) (KDBG_CODE(DBG_IOKIT, DBG_IOCLKES, code)) @@ -132,6 +133,9 @@ IOTimeStamp(uintptr_t csc, /* DBG_IOKIT/DBG_IOTTY codes */ +/* DBG_IOKIT/DBG_IOINTC codes */ +#define IOINTC_HANDLER 1 /* 0x05000004 */ + /* DBG_IOKIT/DBG_IOWORKLOOP codes */ #define IOWL_CLIENT 1 /* 0x05010004 */ #define IOWL_WORK 2 /* 0x05010008 */ diff --git a/iokit/IOKit/pwr_mgt/IOPM.h b/iokit/IOKit/pwr_mgt/IOPM.h index fc8d7ca8f..804b9bbfd 100644 --- a/iokit/IOKit/pwr_mgt/IOPM.h +++ b/iokit/IOKit/pwr_mgt/IOPM.h @@ -230,14 +230,14 @@ enum { * false == Deep Sleep is disabled * not present == Deep Sleep is not supported on this hardware */ -#define kIOPMDeepSleepEnabledKey "DeepSleep Enabled" +#define kIOPMDeepSleepEnabledKey "Standby Enabled" /* kIOPMDeepSleepDelayKey * Key refers to a CFNumberRef that represents the delay in seconds before * entering Deep Sleep state. The property is not present if Deep Sleep is * unsupported. */ -#define kIOPMDeepSleepDelayKey "DeepSleep Delay" +#define kIOPMDeepSleepDelayKey "Standby Delay" /* kIOPMLowBatteryWakeThresholdKey * Key refers to a CFNumberRef that represents the percentage of battery diff --git a/iokit/Kernel/IOCommandGate.cpp b/iokit/Kernel/IOCommandGate.cpp index 55d6eee7f..0b823d2b6 100644 --- a/iokit/Kernel/IOCommandGate.cpp +++ b/iokit/Kernel/IOCommandGate.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #define super IOEventSource @@ -129,9 +130,6 @@ IOReturn IOCommandGate::runAction(Action inAction, if (!inAction) return kIOReturnBadArgument; - IOTimeStampConstant(IODBG_CMDQ(IOCMDQ_ACTION), - (uintptr_t) inAction, (uintptr_t) owner); - // closeGate is recursive needn't worry if we already hold the lock. closeGate(); @@ -158,8 +156,19 @@ IOReturn IOCommandGate::runAction(Action inAction, } } + bool trace = ( gIOKitTrace & kIOTraceCommandGates ) ? true : false; + + if (trace) + IOTimeStampStartConstant(IODBG_CMDQ(IOCMDQ_ACTION), + (uintptr_t) inAction, (uintptr_t) owner); + // Must be gated and on the work loop or enabled res = (*inAction)(owner, arg0, arg1, arg2, arg3); + + if (trace) + IOTimeStampEndConstant(IODBG_CMDQ(IOCMDQ_ACTION), + (uintptr_t) inAction, (uintptr_t) owner); + openGate(); return res; @@ -182,10 +191,18 @@ IOReturn IOCommandGate::attemptAction(Action inAction, if (!workLoop->onThread() && !enabled) res = kIOReturnNotPermitted; else { - IOTimeStampConstant(IODBG_CMDQ(IOCMDQ_ACTION), + + bool trace = ( gIOKitTrace & kIOTraceCommandGates ) ? true : false; + + if (trace) + IOTimeStampStartConstant(IODBG_CMDQ(IOCMDQ_ACTION), (uintptr_t) inAction, (uintptr_t) owner); res = (*inAction)(owner, arg0, arg1, arg2, arg3); + + if (trace) + IOTimeStampEndConstant(IODBG_CMDQ(IOCMDQ_ACTION), + (uintptr_t) inAction, (uintptr_t) owner); } openGate(); diff --git a/iokit/Kernel/IOCommandQueue.cpp b/iokit/Kernel/IOCommandQueue.cpp index e2cd65b4e..7d7249dee 100644 --- a/iokit/Kernel/IOCommandQueue.cpp +++ b/iokit/Kernel/IOCommandQueue.cpp @@ -25,19 +25,13 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* -Copyright (c) 1998 Apple Computer, Inc. All rights reserved. - -HISTORY - 1998-7-13 Godfrey van der Linden(gvdl) - Created. -]*/ #if !defined(__LP64__) #include #include #include +#include #include @@ -137,6 +131,7 @@ void IOCommandQueue::free() bool IOCommandQueue::checkForWork() { void *field0, *field1, *field2, *field3; + bool trace = ( gIOKitTrace & kIOTraceCommandGates ) ? true : false; if (!enabled || consumerIndex == producerIndex) return false; @@ -153,11 +148,16 @@ bool IOCommandQueue::checkForWork() if (++consumerIndex >= size) consumerIndex = 0; - IOTimeStampConstant(IODBG_CMDQ(IOCMDQ_ACTION), + if (trace) + IOTimeStampStartConstant(IODBG_CMDQ(IOCMDQ_ACTION), (uintptr_t) action, (uintptr_t) owner); (*(IOCommandQueueAction) action)(owner, field0, field1, field2, field3); + if (trace) + IOTimeStampEndConstant(IODBG_CMDQ(IOCMDQ_ACTION), + (uintptr_t) action, (uintptr_t) owner); + return (consumerIndex != producerIndex); } diff --git a/iokit/Kernel/IOFilterInterruptEventSource.cpp b/iokit/Kernel/IOFilterInterruptEventSource.cpp index 47a3b8a14..f4f73e2b4 100644 --- a/iokit/Kernel/IOFilterInterruptEventSource.cpp +++ b/iokit/Kernel/IOFilterInterruptEventSource.cpp @@ -25,44 +25,13 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* -Copyright (c) 1999 Apple Computer, Inc. All rights reserved. -HISTORY - 1999-4-15 Godfrey van der Linden(gvdl) - Created. -*/ #include #include +#include #include #include -#if KDEBUG - -#define IOTimeTypeStampS(t) \ -do { \ - IOTimeStampStart(IODBG_INTES(t), \ - (uintptr_t) this, (uintptr_t) owner); \ -} while(0) - -#define IOTimeTypeStampE(t) \ -do { \ - IOTimeStampEnd(IODBG_INTES(t), \ - (uintptr_t) this, (uintptr_t) owner); \ -} while(0) - -#define IOTimeStampLatency() \ -do { \ - IOTimeStampEnd(IODBG_INTES(IOINTES_LAT), \ - (uintptr_t) this, (uintptr_t) owner); \ -} while(0) - -#else /* !KDEBUG */ -#define IOTimeTypeStampS(t) -#define IOTimeTypeStampE(t) -#define IOTimeStampLatency() -#endif /* KDEBUG */ - #define super IOInterruptEventSource OSDefineMetaClassAndStructors @@ -133,13 +102,18 @@ IOFilterInterruptEventSource *IOFilterInterruptEventSource void IOFilterInterruptEventSource::signalInterrupt() { -IOTimeStampLatency(); + bool trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false; producerCount++; -IOTimeTypeStampS(IOINTES_SEMA); + if (trace) + IOTimeStampStartConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner); + signalWorkAvailable(); -IOTimeTypeStampE(IOINTES_SEMA); + + if (trace) + IOTimeStampEndConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner); + } @@ -156,38 +130,42 @@ void IOFilterInterruptEventSource::normalInterruptOccurred (void */*refcon*/, IOService */*prov*/, int /*source*/) { bool filterRes; + bool trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false; -IOTimeTypeStampS(IOINTES_INTCTXT); + if (trace) + IOTimeStampStartConstant(IODBG_INTES(IOINTES_FILTER), + (uintptr_t) filterAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop); -IOTimeTypeStampS(IOINTES_INTFLTR); - IOTimeStampConstant(IODBG_INTES(IOINTES_FILTER), - (uintptr_t) filterAction, (uintptr_t) owner); + // Call the filter. filterRes = (*filterAction)(owner, this); -IOTimeTypeStampE(IOINTES_INTFLTR); + + if (trace) + IOTimeStampEndConstant(IODBG_INTES(IOINTES_FILTER), + (uintptr_t) filterAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop); if (filterRes) signalInterrupt(); - -IOTimeTypeStampE(IOINTES_INTCTXT); } void IOFilterInterruptEventSource::disableInterruptOccurred (void */*refcon*/, IOService *prov, int source) { bool filterRes; + bool trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false; -IOTimeTypeStampS(IOINTES_INTCTXT); + if (trace) + IOTimeStampStartConstant(IODBG_INTES(IOINTES_FILTER), + (uintptr_t) filterAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop); -IOTimeTypeStampS(IOINTES_INTFLTR); - IOTimeStampConstant(IODBG_INTES(IOINTES_FILTER), - (uintptr_t) filterAction, (uintptr_t) owner); + // Call the filter. filterRes = (*filterAction)(owner, this); -IOTimeTypeStampE(IOINTES_INTFLTR); + + if (trace) + IOTimeStampEndConstant(IODBG_INTES(IOINTES_FILTER), + (uintptr_t) filterAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop); if (filterRes) { prov->disableInterrupt(source); /* disable the interrupt */ - signalInterrupt(); } -IOTimeTypeStampE(IOINTES_INTCTXT); } diff --git a/iokit/Kernel/IOHibernateIO.cpp b/iokit/Kernel/IOHibernateIO.cpp index 6b906baa9..bc180fb5b 100644 --- a/iokit/Kernel/IOHibernateIO.cpp +++ b/iokit/Kernel/IOHibernateIO.cpp @@ -180,6 +180,12 @@ static IODTNVRAM * gIOOptionsEntry; static IORegistryEntry * gIOChosenEntry; #if defined(__i386__) || defined(__x86_64__) static const OSSymbol * gIOCreateEFIDevicePathSymbol; +static const OSSymbol * gIOHibernateRTCVariablesKey; +static const OSSymbol * gIOHibernateBoot0082Key; +static const OSSymbol * gIOHibernateBootNextKey; +static OSData * gIOHibernateBoot0082Data; +static OSData * gIOHibernateBootNextData; +static OSObject * gIOHibernateBootNextSave; #endif static IOPolledFileIOVars gFileVars; @@ -1210,29 +1216,73 @@ IOHibernateSystemSleep(void) data = OSData::withBytes(&rtcVars, sizeof(rtcVars)); if (data) { - IOService::getPMRootDomain()->setProperty(kIOHibernateRTCVariablesKey, data); - - if( gIOOptionsEntry ) + if (!gIOHibernateRTCVariablesKey) + gIOHibernateRTCVariablesKey = OSSymbol::withCStringNoCopy(kIOHibernateRTCVariablesKey); + if (gIOHibernateRTCVariablesKey) + IOService::getPMRootDomain()->setProperty(gIOHibernateRTCVariablesKey, data); + + if( gIOOptionsEntry ) + { + if( gIOHibernateMode & kIOHibernateModeSwitch ) + { + const OSSymbol *sym; + sym = OSSymbol::withCStringNoCopy(kIOHibernateBootSwitchVarsKey); + if( sym ) { - if( gIOHibernateMode & kIOHibernateModeSwitch ) - { - const OSSymbol *sym; - sym = OSSymbol::withCStringNoCopy(kIOHibernateBootSwitchVarsKey); - if( sym ) - { - gIOOptionsEntry->setProperty(sym, data); /* intentional insecure backup of rtc boot vars */ - sym->release(); - } - } + gIOOptionsEntry->setProperty(sym, data); /* intentional insecure backup of rtc boot vars */ + sym->release(); } + } + } - data->release(); + data->release(); } if (gIOChosenEntry) { data = OSDynamicCast(OSData, gIOChosenEntry->getProperty(kIOHibernateMachineSignatureKey)); if (data) gIOHibernateCurrentHeader->machineSignature = *((UInt32 *)data->getBytesNoCopy()); + { + // set BootNext + + if (!gIOHibernateBoot0082Data) + { + data = OSDynamicCast(OSData, gIOChosenEntry->getProperty("boot-device-path")); + if (data) + { + // AppleNVRAM_EFI_LOAD_OPTION + struct { + uint32_t Attributes; + uint16_t FilePathLength; + uint16_t Desc; + } loadOptionHeader; + loadOptionHeader.Attributes = 1; + loadOptionHeader.FilePathLength = data->getLength(); + loadOptionHeader.Desc = 0; + gIOHibernateBoot0082Data = OSData::withCapacity(sizeof(loadOptionHeader) + loadOptionHeader.FilePathLength); + if (gIOHibernateBoot0082Data) + { + gIOHibernateBoot0082Data->appendBytes(&loadOptionHeader, sizeof(loadOptionHeader)); + gIOHibernateBoot0082Data->appendBytes(data); + } + } + } + if (!gIOHibernateBoot0082Key) + gIOHibernateBoot0082Key = OSSymbol::withCString("8BE4DF61-93CA-11D2-AA0D-00E098032B8C:Boot0082"); + if (!gIOHibernateBootNextKey) + gIOHibernateBootNextKey = OSSymbol::withCString("8BE4DF61-93CA-11D2-AA0D-00E098032B8C:BootNext"); + if (!gIOHibernateBootNextData) + { + uint16_t bits = 0x0082; + gIOHibernateBootNextData = OSData::withBytes(&bits, sizeof(bits)); + } + if (gIOHibernateBoot0082Key && gIOHibernateBoot0082Data && gIOHibernateBootNextKey && gIOHibernateBootNextData) + { + gIOHibernateBootNextSave = gIOOptionsEntry->copyProperty(gIOHibernateBootNextKey); + gIOOptionsEntry->setProperty(gIOHibernateBoot0082Key, gIOHibernateBoot0082Data); + gIOOptionsEntry->setProperty(gIOHibernateBootNextKey, gIOHibernateBootNextData); + } + } } #else /* !i386 && !x86_64 */ if (kIOHibernateModeEncrypt & gIOHibernateMode) @@ -1589,22 +1639,32 @@ IOHibernateSystemWake(void) #endif #if defined(__i386__) || defined(__x86_64__) - IOService::getPMRootDomain()->removeProperty(kIOHibernateRTCVariablesKey); + IOService::getPMRootDomain()->removeProperty(gIOHibernateRTCVariablesKey); /* * Hibernate variable is written to NVRAM on platforms in which RtcRam * is not backed by coin cell. Remove Hibernate data from NVRAM. */ if (gIOOptionsEntry) { - const OSSymbol * sym = OSSymbol::withCStringNoCopy(kIOHibernateRTCVariablesKey); - if (sym) { - if (gIOOptionsEntry->getProperty(sym)) { - gIOOptionsEntry->removeProperty(sym); - gIOOptionsEntry->sync(); - } - sym->release(); + if (gIOHibernateRTCVariablesKey) { + if (gIOOptionsEntry->getProperty(gIOHibernateRTCVariablesKey)) { + gIOOptionsEntry->removeProperty(gIOHibernateRTCVariablesKey); + } + } + + if (gIOHibernateBootNextKey) + { + if (gIOHibernateBootNextSave) + { + gIOOptionsEntry->setProperty(gIOHibernateBootNextKey, gIOHibernateBootNextSave); + gIOHibernateBootNextSave->release(); + gIOHibernateBootNextSave = NULL; } + else + gIOOptionsEntry->removeProperty(gIOHibernateBootNextKey); + } + gIOOptionsEntry->sync(); } #endif diff --git a/iokit/Kernel/IOInterruptController.cpp b/iokit/Kernel/IOInterruptController.cpp index beedc1a8b..a8e04bddd 100644 --- a/iokit/Kernel/IOInterruptController.cpp +++ b/iokit/Kernel/IOInterruptController.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2008 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2011 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,12 +25,6 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. - * - * DRI: Josh de Cesare - * - */ #if __ppc__ @@ -43,6 +37,9 @@ #include #include #include +#include +#include + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -83,7 +80,7 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source, vector = &vectors[vectorNumber]; // Get the lock for this vector. - IOTakeLock(vector->interruptLock); + IOLockLock(vector->interruptLock); // Check if the interrupt source can/should be shared. canBeShared = vectorCanBeShared(vectorNumber, vector); @@ -102,7 +99,7 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source, // If the vector is registered and can not be shared return error. if (wasAlreadyRegisterd && !canBeShared) { - IOUnlock(vector->interruptLock); + IOLockUnlock(vector->interruptLock); return kIOReturnNoResources; } @@ -115,7 +112,7 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source, // Make the IOShareInterruptController instance vector->sharedController = new IOSharedInterruptController; if (vector->sharedController == 0) { - IOUnlock(vector->interruptLock); + IOLockUnlock(vector->interruptLock); return kIOReturnNoMemory; } @@ -139,7 +136,7 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source, if (wasAlreadyRegisterd) enableInterrupt(originalNub, originalSource); vector->sharedController->release(); vector->sharedController = 0; - IOUnlock(vector->interruptLock); + IOLockUnlock(vector->interruptLock); return error; } @@ -167,7 +164,7 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source, vector->sharedController->release(); vector->sharedController = 0; - IOUnlock(vector->interruptLock); + IOLockUnlock(vector->interruptLock); return error; } } @@ -199,7 +196,7 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source, error = vector->sharedController->registerInterrupt(nub, source, target, handler, refCon); - IOUnlock(vector->interruptLock); + IOLockUnlock(vector->interruptLock); return error; } @@ -218,7 +215,7 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source, vector->interruptDisabledSoft = 1; vector->interruptRegistered = 1; - IOUnlock(vector->interruptLock); + IOLockUnlock(vector->interruptLock); return kIOReturnSuccess; } @@ -235,11 +232,11 @@ IOReturn IOInterruptController::unregisterInterrupt(IOService *nub, int source) vector = &vectors[vectorNumber]; // Get the lock for this vector. - IOTakeLock(vector->interruptLock); + IOLockLock(vector->interruptLock); // Return success if it is not already registered if (!vector->interruptRegistered) { - IOUnlock(vector->interruptLock); + IOLockUnlock(vector->interruptLock); return kIOReturnSuccess; } @@ -260,7 +257,7 @@ IOReturn IOInterruptController::unregisterInterrupt(IOService *nub, int source) vector->target = 0; vector->refCon = 0; - IOUnlock(vector->interruptLock); + IOLockUnlock(vector->interruptLock); return kIOReturnSuccess; } @@ -512,13 +509,13 @@ IOReturn IOSharedInterruptController::registerInterrupt(IOService *nub, vector = &vectors[vectorNumber]; // Get the lock for this vector. - IOTakeLock(vector->interruptLock); + IOLockLock(vector->interruptLock); // Is it unregistered? if (!vector->interruptRegistered) break; // Move along to the next one. - IOUnlock(vector->interruptLock); + IOLockUnlock(vector->interruptLock); } if (vectorNumber != kIOSharedInterruptControllerDefaultVectors) break; @@ -555,7 +552,7 @@ IOReturn IOSharedInterruptController::registerInterrupt(IOService *nub, if (++vectorsRegistered > numVectors) numVectors = vectorsRegistered; IOSimpleLockUnlockEnableInterrupt(controllerLock, interruptState); - IOUnlock(vector->interruptLock); + IOLockUnlock(vector->interruptLock); return kIOReturnSuccess; } @@ -570,12 +567,12 @@ IOReturn IOSharedInterruptController::unregisterInterrupt(IOService *nub, vector = &vectors[vectorNumber]; // Get the lock for this vector. - IOTakeLock(vector->interruptLock); + IOLockLock(vector->interruptLock); // Return success if it is not already registered if (!vector->interruptRegistered || (vector->nub != nub) || (vector->source != source)) { - IOUnlock(vector->interruptLock); + IOLockUnlock(vector->interruptLock); continue; } @@ -598,7 +595,7 @@ IOReturn IOSharedInterruptController::unregisterInterrupt(IOService *nub, IOSimpleLockUnlockEnableInterrupt(controllerLock, interruptState); // Move along to the next one. - IOUnlock(vector->interruptLock); + IOLockUnlock(vector->interruptLock); } // Re-enable the controller if all vectors are enabled. @@ -713,9 +710,36 @@ IOReturn IOSharedInterruptController::handleInterrupt(void * /*refCon*/, // Call the handler if it exists. if (vector->interruptRegistered) { - vector->handler(vector->target, vector->refCon, - vector->nub, vector->source); + + bool trace = (gIOKitTrace & kIOTraceInterrupts) ? true : false; + bool timeHandler = gIOInterruptThresholdNS ? true : false; + uint64_t startTime = 0; + uint64_t endTime = 0; + + if (trace) + IOTimeStampStartConstant(IODBG_INTC(IOINTC_HANDLER), + (uintptr_t) vectorNumber, (uintptr_t) vector->handler, (uintptr_t)vector->target); + + if (timeHandler) + startTime = mach_absolute_time(); + + // Call handler. + vector->handler(vector->target, vector->refCon, vector->nub, vector->source); + + if (timeHandler) + { + endTime = mach_absolute_time(); + if ((endTime - startTime) > gIOInterruptThresholdNS) + panic("IOSIC::handleInterrupt: interrupt exceeded threshold, handlerTime = %qd, vectorNumber = %d, handler = %p, target = %p\n", + endTime - startTime, (int)vectorNumber, vector->handler, vector->target); + } + + if (trace) + IOTimeStampEndConstant(IODBG_INTC(IOINTC_HANDLER), + (uintptr_t) vectorNumber, (uintptr_t) vector->handler, (uintptr_t)vector->target); + } + } vector->interruptActive = 0; diff --git a/iokit/Kernel/IOInterruptEventSource.cpp b/iokit/Kernel/IOInterruptEventSource.cpp index 9694b1130..97d4c5957 100644 --- a/iokit/Kernel/IOInterruptEventSource.cpp +++ b/iokit/Kernel/IOInterruptEventSource.cpp @@ -33,38 +33,13 @@ HISTORY Created. */ #include +#include #include #include #include #include #include -#if KDEBUG - -#define IOTimeTypeStampS(t) \ -do { \ - IOTimeStampStart(IODBG_INTES(t), \ - (uintptr_t) this, (uintptr_t) owner); \ -} while(0) - -#define IOTimeTypeStampE(t) \ -do { \ - IOTimeStampEnd(IODBG_INTES(t), \ - (uintptr_t) this, (uintptr_t) owner); \ -} while(0) - -#define IOTimeStampLatency() \ -do { \ - IOTimeStampEnd(IODBG_INTES(IOINTES_LAT), \ - (uintptr_t) this, (uintptr_t) owner); \ -} while(0) - -#else /* !KDEBUG */ -#define IOTimeTypeStampS(t) -#define IOTimeTypeStampE(t) -#define IOTimeStampLatency() -#endif /* KDEBUG */ - #define super IOEventSource OSDefineMetaClassAndStructors(IOInterruptEventSource, IOEventSource) @@ -90,36 +65,43 @@ bool IOInterruptEventSource::init(OSObject *inOwner, provider = inProvider; producerCount = consumerCount = 0; autoDisable = explicitDisable = false; - intIndex = -1; + intIndex = ~inIntIndex; // Assumes inOwner holds a reference(retain) on the provider if (inProvider) { - int intType; - - res = (kIOReturnSuccess - == inProvider->getInterruptType(inIntIndex, &intType)); - if (res) { - IOInterruptAction intHandler; - - autoDisable = (intType == kIOInterruptTypeLevel); - if (autoDisable) { - intHandler = OSMemberFunctionCast(IOInterruptAction, - this, &IOInterruptEventSource::disableInterruptOccurred); - } - else - intHandler = OSMemberFunctionCast(IOInterruptAction, - this, &IOInterruptEventSource::normalInterruptOccurred); - - res = (kIOReturnSuccess == inProvider->registerInterrupt - (inIntIndex, this, intHandler)); - if (res) - intIndex = inIntIndex; - } + res = (kIOReturnSuccess == registerInterruptHandler(inProvider, inIntIndex)); + if (res) + intIndex = inIntIndex; } return res; } +IOReturn IOInterruptEventSource::registerInterruptHandler(IOService *inProvider, + int inIntIndex) +{ + IOReturn ret; + int intType; + IOInterruptAction intHandler; + + ret = inProvider->getInterruptType(inIntIndex, &intType); + if (kIOReturnSuccess != ret) + return (ret); + + autoDisable = (intType == kIOInterruptTypeLevel); + if (autoDisable) { + intHandler = OSMemberFunctionCast(IOInterruptAction, + this, &IOInterruptEventSource::disableInterruptOccurred); + } + else + intHandler = OSMemberFunctionCast(IOInterruptAction, + this, &IOInterruptEventSource::normalInterruptOccurred); + + ret = provider->registerInterrupt(inIntIndex, this, intHandler); + + return (ret); +} + IOInterruptEventSource * IOInterruptEventSource::interruptEventSource(OSObject *inOwner, Action inAction, @@ -138,7 +120,7 @@ IOInterruptEventSource::interruptEventSource(OSObject *inOwner, void IOInterruptEventSource::free() { - if (provider && intIndex != -1) + if (provider && intIndex >= 0) provider->unregisterInterrupt(intIndex); super::free(); @@ -146,7 +128,7 @@ void IOInterruptEventSource::free() void IOInterruptEventSource::enable() { - if (provider && intIndex != -1) { + if (provider && intIndex >= 0) { provider->enableInterrupt(intIndex); explicitDisable = false; enabled = true; @@ -155,13 +137,30 @@ void IOInterruptEventSource::enable() void IOInterruptEventSource::disable() { - if (provider && intIndex != -1) { + if (provider && intIndex >= 0) { provider->disableInterrupt(intIndex); explicitDisable = true; enabled = false; } } +void IOInterruptEventSource::setWorkLoop(IOWorkLoop *inWorkLoop) +{ + super::setWorkLoop(inWorkLoop); + + if (!provider) + return; + + if ( !inWorkLoop ) { + if (intIndex >= 0) { + provider->unregisterInterrupt(intIndex); + intIndex = ~intIndex; + } + } else if ((intIndex < 0) && (kIOReturnSuccess == registerInterruptHandler(provider, ~intIndex))) { + intIndex = ~intIndex; + } +} + const IOService *IOInterruptEventSource::getProvider() const { return provider; @@ -182,27 +181,38 @@ bool IOInterruptEventSource::checkForWork() unsigned int cacheProdCount = producerCount; int numInts = cacheProdCount - consumerCount; IOInterruptEventAction intAction = (IOInterruptEventAction) action; + bool trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false; - if (numInts > 0) { + if ( numInts > 0 ) + { + if (trace) + IOTimeStampStartConstant(IODBG_INTES(IOINTES_ACTION), + (uintptr_t) intAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop); - IOTimeStampLatency(); - IOTimeTypeStampS(IOINTES_CLIENT); - IOTimeStampConstant(IODBG_INTES(IOINTES_ACTION), - (uintptr_t) intAction, (uintptr_t) owner); - (*intAction)(owner, this, numInts); - IOTimeTypeStampE(IOINTES_CLIENT); + // Call the handler + (*intAction)(owner, this, numInts); + + if (trace) + IOTimeStampEndConstant(IODBG_INTES(IOINTES_ACTION), + (uintptr_t) intAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop); consumerCount = cacheProdCount; if (autoDisable && !explicitDisable) enable(); } - else if (numInts < 0) { - IOTimeStampLatency(); - IOTimeTypeStampS(IOINTES_CLIENT); - IOTimeStampConstant(IODBG_INTES(IOINTES_ACTION), - (uintptr_t) intAction, (uintptr_t) owner); - (*intAction)(owner, this, -numInts); - IOTimeTypeStampE(IOINTES_CLIENT); + + else if ( numInts < 0 ) + { + if (trace) + IOTimeStampStartConstant(IODBG_INTES(IOINTES_ACTION), + (uintptr_t) intAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop); + + // Call the handler + (*intAction)(owner, this, -numInts); + + if (trace) + IOTimeStampEndConstant(IODBG_INTES(IOINTES_ACTION), + (uintptr_t) intAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop); consumerCount = cacheProdCount; if (autoDisable && !explicitDisable) @@ -215,33 +225,35 @@ bool IOInterruptEventSource::checkForWork() void IOInterruptEventSource::normalInterruptOccurred (void */*refcon*/, IOService */*prov*/, int /*source*/) { -IOTimeTypeStampS(IOINTES_INTCTXT); -IOTimeStampLatency(); + bool trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false; producerCount++; -IOTimeTypeStampS(IOINTES_SEMA); + if (trace) + IOTimeStampStartConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner); + signalWorkAvailable(); -IOTimeTypeStampE(IOINTES_SEMA); -IOTimeTypeStampE(IOINTES_INTCTXT); + if (trace) + IOTimeStampEndConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner); } void IOInterruptEventSource::disableInterruptOccurred (void */*refcon*/, IOService *prov, int source) { -IOTimeTypeStampS(IOINTES_INTCTXT); -IOTimeStampLatency(); + bool trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false; prov->disableInterrupt(source); /* disable the interrupt */ producerCount++; -IOTimeTypeStampS(IOINTES_SEMA); + if (trace) + IOTimeStampStartConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner); + signalWorkAvailable(); -IOTimeTypeStampE(IOINTES_SEMA); -IOTimeTypeStampE(IOINTES_INTCTXT); + if (trace) + IOTimeStampEndConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner); } void IOInterruptEventSource::interruptOccurred diff --git a/iokit/Kernel/IOKitDebug.cpp b/iokit/Kernel/IOKitDebug.cpp index f061b516c..31d681664 100644 --- a/iokit/Kernel/IOKitDebug.cpp +++ b/iokit/Kernel/IOKitDebug.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2011 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,12 +25,6 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1998 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * - */ #include @@ -50,7 +44,12 @@ #endif SInt64 gIOKitDebug = DEBUG_INIT_VALUE; -SYSCTL_QUAD(_debug, OID_AUTO, iokit, CTLFLAG_RW, &gIOKitDebug, "boot_arg io"); +SInt64 gIOKitTrace = 0x3B; +UInt64 gIOInterruptThresholdNS = 0; + +SYSCTL_QUAD(_debug, OID_AUTO, iokit, CTLFLAG_RW | CTLFLAG_LOCKED, &gIOKitDebug, "boot_arg io"); +SYSCTL_QUAD(_debug, OID_AUTO, iotrace, CTLFLAG_RW | CTLFLAG_LOCKED, &gIOKitTrace, "trace io"); +SYSCTL_QUAD(_debug, OID_AUTO, iointthreshold, CTLFLAG_RW | CTLFLAG_LOCKED, &gIOInterruptThresholdNS, "io interrupt threshold"); int debug_malloc_size; diff --git a/iokit/Kernel/IOKitKernelInternal.h b/iokit/Kernel/IOKitKernelInternal.h index 2deffc415..804c57f24 100644 --- a/iokit/Kernel/IOKitKernelInternal.h +++ b/iokit/Kernel/IOKitKernelInternal.h @@ -46,12 +46,10 @@ kern_return_t IOIteratePageableMaps(vm_size_t size, vm_map_t IOPageableMapForAddress(uintptr_t address); kern_return_t -IOMemoryDescriptorMapMemEntry(vm_map_t map, ipc_port_t entry, IOOptionBits options, bool pageable, +IOMemoryDescriptorMapMemEntry(vm_map_t * map, ipc_port_t entry, IOOptionBits options, bool pageable, mach_vm_size_t offset, mach_vm_address_t * address, mach_vm_size_t length); kern_return_t -IOMemoryDescriptorMapCopy(vm_map_t map, - vm_map_t src_map, - mach_vm_offset_t src_address, +IOMemoryDescriptorMapCopy(vm_map_t * map, IOOptionBits options, mach_vm_size_t offset, mach_vm_address_t * address, mach_vm_size_t length); diff --git a/iokit/Kernel/IOMemoryDescriptor.cpp b/iokit/Kernel/IOMemoryDescriptor.cpp index bd3c67176..a46021ede 100644 --- a/iokit/Kernel/IOMemoryDescriptor.cpp +++ b/iokit/Kernel/IOMemoryDescriptor.cpp @@ -2311,6 +2311,9 @@ IOReturn IOGeneralMemoryDescriptor::doMap( user_addr_t range0Addr = 0; IOByteCount range0Len = 0; + if ((offset >= _length) || ((offset + length) > _length)) + return( kIOReturnBadArgument ); + if (vec.v) getAddrLenForInd(range0Addr, range0Len, type, vec, 0); @@ -2339,38 +2342,110 @@ IOReturn IOGeneralMemoryDescriptor::doMap( else if (kIOMapDefaultCache != (options & kIOMapCacheMask)) prot |= VM_PROT_WRITE; - kr = mach_make_memory_entry_64(get_task_map(_task), - &actualSize, range0Addr, - prot, &sharedMem, - NULL ); - - if( (KERN_SUCCESS == kr) && (actualSize != round_page(size))) + if (_rangesCount == 1) + { + kr = mach_make_memory_entry_64(get_task_map(_task), + &actualSize, range0Addr, + prot, &sharedMem, + NULL); + } + if( (_rangesCount != 1) + || ((KERN_SUCCESS == kr) && (actualSize != round_page(size)))) + do { - // map will cross vm objects #if IOASSERT - IOLog("mach_make_memory_entry_64 (%08llx) size (%08llx:%08llx)\n", - range0Addr, (UInt64)actualSize, (UInt64)size); + IOLog("mach_vm_remap path for ranges %d size (%08llx:%08llx)\n", + _rangesCount, (UInt64)actualSize, (UInt64)size); #endif kr = kIOReturnVMError; - ipc_port_release_send( sharedMem ); - sharedMem = MACH_PORT_NULL; - - mach_vm_address_t address; - mach_vm_size_t pageOffset = (range0Addr & PAGE_MASK); + if (sharedMem) + { + ipc_port_release_send(sharedMem); + sharedMem = MACH_PORT_NULL; + } + mach_vm_address_t address, segDestAddr; + mach_vm_size_t mapLength; + unsigned rangesIndex; + IOOptionBits type = _flags & kIOMemoryTypeMask; + user_addr_t srcAddr; + IOPhysicalLength segLen = 0; + + // Find starting address within the vector of ranges + for (rangesIndex = 0; rangesIndex < _rangesCount; rangesIndex++) { + getAddrLenForInd(srcAddr, segLen, type, _ranges, rangesIndex); + if (offset < segLen) + break; + offset -= segLen; // (make offset relative) + } + + mach_vm_size_t pageOffset = (srcAddr & PAGE_MASK); address = trunc_page_64(mapping->fAddress); + if ((options & kIOMapAnywhere) || ((mapping->fAddress - address) == pageOffset)) { - kr = IOMemoryDescriptorMapCopy(mapping->fAddressMap, - get_task_map(_task), range0Addr, + vm_map_t map = mapping->fAddressMap; + kr = IOMemoryDescriptorMapCopy(&map, options, offset, &address, round_page_64(length + pageOffset)); - if (kr == KERN_SUCCESS) + if (kr == KERN_SUCCESS) + { + segDestAddr = address; + segLen -= offset; + mapLength = length; + + while (true) + { + vm_prot_t cur_prot, max_prot; + kr = mach_vm_remap(map, &segDestAddr, round_page_64(segLen), PAGE_MASK, + VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, + get_task_map(_task), trunc_page_64(srcAddr), + FALSE /* copy */, + &cur_prot, + &max_prot, + VM_INHERIT_NONE); + if (KERN_SUCCESS == kr) + { + if ((!(VM_PROT_READ & cur_prot)) + || (!(kIOMapReadOnly & options) && !(VM_PROT_WRITE & cur_prot))) + { + kr = KERN_PROTECTION_FAILURE; + } + } + if (KERN_SUCCESS != kr) + break; + segDestAddr += segLen; + mapLength -= segLen; + if (!mapLength) + break; + rangesIndex++; + if (rangesIndex >= _rangesCount) + { + kr = kIOReturnBadArgument; + break; + } + getAddrLenForInd(srcAddr, segLen, type, vec, rangesIndex); + if (srcAddr & PAGE_MASK) + { + kr = kIOReturnBadArgument; + break; + } + if (segLen > mapLength) + segLen = mapLength; + } + if (KERN_SUCCESS != kr) + { + mach_vm_deallocate(mapping->fAddressMap, address, round_page_64(length + pageOffset)); + } + } + + if (KERN_SUCCESS == kr) mapping->fAddress = address + pageOffset; else mapping->fAddress = NULL; } } + while (false); } else do { // _task == 0, must be physical @@ -2553,8 +2628,7 @@ bool IOMemoryMap::setMemoryDescriptor(IOMemoryDescriptor * _memory, mach_vm_size struct IOMemoryDescriptorMapAllocRef { ipc_port_t sharedMem; - vm_map_t src_map; - mach_vm_offset_t src_address; + vm_map_t map; mach_vm_address_t mapped; mach_vm_size_t size; mach_vm_size_t sourceOffset; @@ -2624,40 +2698,20 @@ static kern_return_t IOMemoryDescriptorMapAlloc(vm_map_t map, void * _ref) ref->mapped = 0; continue; } + ref->map = map; } - else if (ref->src_map) - { - vm_prot_t cur_prot, max_prot; - err = mach_vm_remap(map, &ref->mapped, ref->size, PAGE_MASK, - (ref->options & kIOMapAnywhere) ? TRUE : FALSE, - ref->src_map, ref->src_address, - FALSE /* copy */, - &cur_prot, - &max_prot, - VM_INHERIT_NONE); - if (KERN_SUCCESS == err) - { - if ((!(VM_PROT_READ & cur_prot)) - || (!(kIOMapReadOnly & ref->options) && !(VM_PROT_WRITE & cur_prot))) - { - mach_vm_deallocate(map, ref->mapped, ref->size); - err = KERN_PROTECTION_FAILURE; - } - } - if (KERN_SUCCESS != err) - ref->mapped = 0; - } else { - err = mach_vm_allocate( map, &ref->mapped, ref->size, + err = mach_vm_allocate(map, &ref->mapped, ref->size, ((ref->options & kIOMapAnywhere) ? VM_FLAGS_ANYWHERE : VM_FLAGS_FIXED) | VM_MAKE_TAG(VM_MEMORY_IOKIT) ); if( KERN_SUCCESS != err) { ref->mapped = 0; continue; } + ref->map = map; // we have to make sure that these guys don't get copied if we fork. - err = vm_inherit( map, ref->mapped, ref->size, VM_INHERIT_NONE); + err = vm_inherit(map, ref->mapped, ref->size, VM_INHERIT_NONE); assert( KERN_SUCCESS == err ); } } @@ -2667,15 +2721,14 @@ static kern_return_t IOMemoryDescriptorMapAlloc(vm_map_t map, void * _ref) } kern_return_t -IOMemoryDescriptorMapMemEntry(vm_map_t map, ipc_port_t entry, IOOptionBits options, bool pageable, +IOMemoryDescriptorMapMemEntry(vm_map_t * map, ipc_port_t entry, IOOptionBits options, bool pageable, mach_vm_size_t offset, mach_vm_address_t * address, mach_vm_size_t length) { IOReturn err; IOMemoryDescriptorMapAllocRef ref; - ref.sharedMem = entry; - ref.src_map = NULL; + ref.map = *map; ref.sharedMem = entry; ref.sourceOffset = trunc_page_64(offset); ref.options = options; @@ -2687,19 +2740,19 @@ IOMemoryDescriptorMapMemEntry(vm_map_t map, ipc_port_t entry, IOOptionBits optio else ref.mapped = *address; - if( ref.sharedMem && (map == kernel_map) && pageable) + if( ref.sharedMem && (ref.map == kernel_map) && pageable) err = IOIteratePageableMaps( ref.size, &IOMemoryDescriptorMapAlloc, &ref ); else - err = IOMemoryDescriptorMapAlloc( map, &ref ); + err = IOMemoryDescriptorMapAlloc( ref.map, &ref ); *address = ref.mapped; + *map = ref.map; + return (err); } kern_return_t -IOMemoryDescriptorMapCopy(vm_map_t map, - vm_map_t src_map, - mach_vm_offset_t src_address, +IOMemoryDescriptorMapCopy(vm_map_t * map, IOOptionBits options, mach_vm_size_t offset, mach_vm_address_t * address, mach_vm_size_t length) @@ -2707,9 +2760,8 @@ IOMemoryDescriptorMapCopy(vm_map_t map, IOReturn err; IOMemoryDescriptorMapAllocRef ref; + ref.map = *map; ref.sharedMem = NULL; - ref.src_map = src_map; - ref.src_address = src_address; ref.sourceOffset = trunc_page_64(offset); ref.options = options; ref.size = length; @@ -2720,12 +2772,14 @@ IOMemoryDescriptorMapCopy(vm_map_t map, else ref.mapped = *address; - if (map == kernel_map) + if (ref.map == kernel_map) err = IOIteratePageableMaps(ref.size, &IOMemoryDescriptorMapAlloc, &ref); else - err = IOMemoryDescriptorMapAlloc(map, &ref); + err = IOMemoryDescriptorMapAlloc(ref.map, &ref); *address = ref.mapped; + *map = ref.map; + return (err); } @@ -2828,7 +2882,8 @@ IOReturn IOMemoryDescriptor::doMap( } } - err = IOMemoryDescriptorMapMemEntry(mapping->fAddressMap, (ipc_port_t) _memEntry, + vm_map_t map = mapping->fAddressMap; + err = IOMemoryDescriptorMapMemEntry(&map, (ipc_port_t) _memEntry, options, (kIOMemoryBufferPageable & _flags), offset, &address, round_page_64(length + pageOffset)); if( err != KERN_SUCCESS) @@ -3515,7 +3570,7 @@ IOMemoryMap * IOMemoryDescriptor::makeMapping( if (kIOMapUnique & options) { - IOPhysicalAddress phys; + addr64_t phys; IOByteCount physLen; // if (owner != this) continue; @@ -3717,7 +3772,7 @@ bool IOGeneralMemoryDescriptor::serialize(OSSerialize * s) const user_addr_t addr = vcopy[index].address; IOByteCount len = (IOByteCount) vcopy[index].length; values[0] = - OSNumber::withNumber(addr, (((UInt64) addr) >> 32)? 64 : 32); + OSNumber::withNumber(addr, sizeof(addr) * 8); if (values[0] == 0) { result = false; goto bail; @@ -3791,6 +3846,3 @@ OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 15); IOPhysicalAddress IOMemoryDescriptor::getPhysicalAddress() { return( getPhysicalSegment( 0, 0 )); } - - - diff --git a/iokit/Kernel/IOPMrootDomain.cpp b/iokit/Kernel/IOPMrootDomain.cpp index 2144447ae..3ccda1a1b 100644 --- a/iokit/Kernel/IOPMrootDomain.cpp +++ b/iokit/Kernel/IOPMrootDomain.cpp @@ -3167,6 +3167,11 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) clamshellIsClosed = false; clamshellExists = true; + if (msg & kIOPMSetValue) + { + reportUserInput(); + } + // Tell PMCPU informCPUStateChange(kInformLid, 0); @@ -3585,8 +3590,8 @@ void IOPMrootDomain::handlePlatformHaltRestart( UInt32 pe_type ) // Notify legacy clients applyToInterested(gIOPriorityPowerStateInterest, platformHaltRestartApplier, &ctx); - // For UPS shutdown leave File Server Mode intact, otherwise turn it off. - if (kPEUPSDelayHaltCPU != pe_type) + // For normal shutdown, turn off File Server Mode. + if (kPEHaltCPU == pe_type) { const OSSymbol * setting = OSSymbol::withCString(kIOPMSettingRestartOnPowerLossKey); OSNumber * num = OSNumber::withNumber((unsigned long long) 0, 32); diff --git a/iokit/Kernel/IOPlatformExpert.cpp b/iokit/Kernel/IOPlatformExpert.cpp index 8109e190e..f00ffd725 100644 --- a/iokit/Kernel/IOPlatformExpert.cpp +++ b/iokit/Kernel/IOPlatformExpert.cpp @@ -25,9 +25,6 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * HISTORY - */ #include #include @@ -373,6 +370,7 @@ PMLog(const char *who, unsigned long event, unsigned long param1, unsigned long param2) { UInt32 debugFlags = gIOKitDebug; + UInt32 traceFlags = gIOKitTrace; if (debugFlags & kIOLogPower) { @@ -385,7 +383,7 @@ PMLog(const char *who, unsigned long event, nowus, current_thread(), who, // Identity (int) event, (long) param1, (long) param2); // Args - if (debugFlags & kIOLogTracePower) { + if (traceFlags & kIOTracePowerMgmt) { static const UInt32 sStartStopBitField[] = { 0x00000000, 0x00000040 }; // Only Program Hardware so far diff --git a/iokit/Kernel/IOService.cpp b/iokit/Kernel/IOService.cpp index 62dda56fe..1a28626cf 100644 --- a/iokit/Kernel/IOService.cpp +++ b/iokit/Kernel/IOService.cpp @@ -216,7 +216,7 @@ bool IOService::isInactive( void ) const /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #define IOServiceTrace(csc, a, b, c, d) { \ - if(kIOTraceIOService & gIOKitDebug) { \ + if(kIOTraceIOService & gIOKitTrace) { \ KERNEL_DEBUG_CONSTANT(IODBG_IOSERVICE(csc), a, b, c, d, 0); \ } \ } @@ -2124,8 +2124,8 @@ void IOService::terminateWorker( IOOptionBits options ) (uintptr_t) (regID2 >> 32)); } else { - // not ready for stop if it has clients, skip it - if( (client->__state[1] & kIOServiceTermPhase3State) && client->getClient()) { + // a terminated client is not ready for stop if it has clients, skip it + if( (kIOServiceInactiveState & client->__state[0]) && client->getClient()) { TLOG("%s::defer stop(%s)\n", client->getName(), provider->getName()); uint64_t regID1 = provider->getRegistryEntryID(); @@ -3065,8 +3065,8 @@ void IOService::doServiceMatch( IOOptionBits options ) __state[1] |= kIOServiceConfigState; __state[0] |= kIOServiceRegisteredState; - if( reRegistered && (0 == (__state[0] & kIOServiceInactiveState))) { - + keepGuessing &= (0 == (__state[0] & kIOServiceInactiveState)); + if (reRegistered && keepGuessing) { iter = OSCollectionIterator::withCollection( (OSOrderedSet *) gNotifications->getObject( gIOPublishNotification ) ); if( iter) { @@ -3084,7 +3084,7 @@ void IOService::doServiceMatch( IOOptionBits options ) UNLOCKNOTIFY(); unlockForArbitration(); - if( matches->getCount() && (kIOReturnSuccess == getResources())) + if (keepGuessing && matches->getCount() && (kIOReturnSuccess == getResources())) probeCandidates( matches ); else matches->release(); diff --git a/iokit/Kernel/IOServicePM.cpp b/iokit/Kernel/IOServicePM.cpp index 57d40396f..fcecfbf00 100644 --- a/iokit/Kernel/IOServicePM.cpp +++ b/iokit/Kernel/IOServicePM.cpp @@ -1954,9 +1954,9 @@ IOReturn IOService::requestPowerDomainState( // at its current or impending power state. outputPowerFlags = fPowerStates[fCurrentPowerState].outputPowerCharacter; - if ((fMachineState != kIOPM_Finished) && (getPMRootDomain() != this)) + if (fMachineState != kIOPM_Finished) { - if (IS_POWER_DROP) + if (IS_POWER_DROP && (getPMRootDomain() != this)) { // Use the lower power state when dropping power. // Must be careful since a power drop can be canceled @@ -1987,7 +1987,7 @@ IOReturn IOService::requestPowerDomainState( fPowerStates[fHeadNotePowerState].outputPowerCharacter; } } - else + else if (IS_POWER_RISE) { // When raising power, must report the output power flags from // child's perspective. A child power request may arrive while @@ -5576,6 +5576,26 @@ bool IOService::servicePMFreeQueue( if (root && (root != request)) more = true; + if (fLockedFlags.PMStop && fPMWorkQueue && fPMWorkQueue->isEmpty()) + { + // Driver PMstop'ed and the work queue is empty. + // Detach and destroy the work queue to avoid the similar cleanup by + // PMfree(), which is deadlock prone. After PMstop() if driver calls PM, + // or a request from power parent or child arrives, it is possible to + // create/cleanup work queue more than once. Should be rare. + + gIOPMWorkLoop->removeEventSource(fPMWorkQueue); + fPMWorkQueue->release(); + fPMWorkQueue = 0; + + if ( fIdleTimerEventSource != NULL ) { + fIdleTimerEventSource->disable(); + gIOPMWorkLoop->removeEventSource(fIdleTimerEventSource); + fIdleTimerEventSource->release(); + fIdleTimerEventSource = NULL; + } + } + releasePMRequest( request ); return more; } diff --git a/iokit/Kernel/IOServicePMPrivate.h b/iokit/Kernel/IOServicePMPrivate.h index 772ac518c..818285f8e 100644 --- a/iokit/Kernel/IOServicePMPrivate.h +++ b/iokit/Kernel/IOServicePMPrivate.h @@ -491,6 +491,11 @@ protected: public: static IOPMWorkQueue * create( IOService * inOwner, Action work, Action retire ); void queuePMRequest( IOPMRequest * request ); + + inline boolean_t isEmpty( void ) + { + return queue_empty(&fWorkQueue); + } }; class IOPMCompletionQueue : public IOEventSource diff --git a/iokit/Kernel/IOStartIOKit.cpp b/iokit/Kernel/IOStartIOKit.cpp index b621622fa..7b70541d6 100644 --- a/iokit/Kernel/IOStartIOKit.cpp +++ b/iokit/Kernel/IOStartIOKit.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2008 Apple Inc. All rights reserved. + * Copyright (c) 1998-2011 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,12 +25,6 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1998,1999 Apple Inc. All rights reserved. - * - * HISTORY - * - */ #include #include @@ -128,14 +122,24 @@ void StartIOKit( void * p1, void * p2, void * p3, void * p4 ) { IOPlatformExpertDevice * rootNub; int debugFlags; + uint32_t intThreshold; if( PE_parse_boot_argn( "io", &debugFlags, sizeof (debugFlags) )) gIOKitDebug = debugFlags; + if( PE_parse_boot_argn( "iotrace", &debugFlags, sizeof (debugFlags) )) + gIOKitTrace = debugFlags; + + // Compat for boot-args + gIOKitTrace |= (gIOKitDebug & kIOTraceCompatBootArgs); + + if( PE_parse_boot_argn( "iointthreshold", &intThreshold, sizeof (intThreshold) )) + gIOInterruptThresholdNS = intThreshold * 1000; + // Check for the log synchronous bit set in io if (gIOKitDebug & kIOLogSynchronous) debug_mode = true; - + // // Have to start IOKit environment before we attempt to start // the C++ runtime environment. At some stage we have to clean up diff --git a/iokit/Kernel/IOTimerEventSource.cpp b/iokit/Kernel/IOTimerEventSource.cpp index ed45f6ab9..112deeee7 100644 --- a/iokit/Kernel/IOTimerEventSource.cpp +++ b/iokit/Kernel/IOTimerEventSource.cpp @@ -25,17 +25,6 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. - * - * IOTimerEventSource.cpp - * - * HISTORY - * 2-Feb-1999 Joe Liu (jliu) created. - * 1999-10-14 Godfrey van der Linden(gvdl) - * Revamped to use thread_call APIs - * - */ #include @@ -51,6 +40,7 @@ __END_DECLS #include #include +#include #define super IOEventSource OSDefineMetaClassAndStructors(IOTimerEventSource, IOEventSource) @@ -88,9 +78,17 @@ void IOTimerEventSource::timeout(void *self) doit = (Action) me->action; if (doit && me->enabled && AbsoluteTime_to_scalar(&me->abstime)) { - IOTimeStampConstant(IODBG_TIMES(IOTIMES_ACTION), + bool trace = (gIOKitTrace & kIOTraceTimers) ? true : false; + + if (trace) + IOTimeStampStartConstant(IODBG_TIMES(IOTIMES_ACTION), (uintptr_t) doit, (uintptr_t) me->owner); + (*doit)(me->owner, me); + + if (trace) + IOTimeStampEndConstant(IODBG_TIMES(IOTIMES_ACTION), + (uintptr_t) doit, (uintptr_t) me->owner); } wl->openGate(); } @@ -115,9 +113,17 @@ void IOTimerEventSource::timeoutAndRelease(void * self, void * c) doit = (Action) me->action; if (doit && (me->reserved->calloutGeneration == count)) { - IOTimeStampConstant(IODBG_TIMES(IOTIMES_ACTION), + bool trace = (gIOKitTrace & kIOTraceTimers) ? true : false; + + if (trace) + IOTimeStampStartConstant(IODBG_TIMES(IOTIMES_ACTION), (uintptr_t) doit, (uintptr_t) me->owner); + (*doit)(me->owner, me); + + if (trace) + IOTimeStampEndConstant(IODBG_TIMES(IOTIMES_ACTION), + (uintptr_t) doit, (uintptr_t) me->owner); } wl->openGate(); } diff --git a/iokit/Kernel/IOWorkLoop.cpp b/iokit/Kernel/IOWorkLoop.cpp index 688a7c013..c32a565f6 100644 --- a/iokit/Kernel/IOWorkLoop.cpp +++ b/iokit/Kernel/IOWorkLoop.cpp @@ -25,13 +25,6 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* -Copyright (c) 1998 Apple Computer, Inc. All rights reserved. - -HISTORY - 1998-7-13 Godfrey van der Linden(gvdl) - Created. -*/ #include #include @@ -39,6 +32,7 @@ HISTORY #include #include #include +#include #include #define super OSObject @@ -300,11 +294,16 @@ do { \ /* virtual */ bool IOWorkLoop::runEventSources() { bool res = false; + bool traceWL = (gIOKitTrace & kIOTraceWorkLoops) ? true : false; + bool traceES = (gIOKitTrace & kIOTraceEventSources) ? true : false; + closeGate(); if (ISSETP(&fFlags, kLoopTerminate)) goto abort; - IOTimeWorkS(); + if (traceWL) + IOTimeStampStartConstant(IODBG_WORKLOOP(IOWL_WORK), (uintptr_t) this); + bool more; do { CLRP(&fFlags, kLoopRestart); @@ -314,9 +313,13 @@ do { \ IOSimpleLockUnlockEnableInterrupt(workToDoLock, is); for (IOEventSource *evnt = eventChain; evnt; evnt = evnt->getNext()) { - IOTimeClientS(); + if (traceES) + IOTimeStampStartConstant(IODBG_WORKLOOP(IOWL_CLIENT), (uintptr_t) this, (uintptr_t) evnt); + more |= evnt->checkForWork(); - IOTimeClientE(); + + if (traceES) + IOTimeStampEndConstant(IODBG_WORKLOOP(IOWL_CLIENT), (uintptr_t) this, (uintptr_t) evnt); if (ISSETP(&fFlags, kLoopTerminate)) goto abort; @@ -328,7 +331,9 @@ do { \ } while (more); res = true; - IOTimeWorkE(); + + if (traceWL) + IOTimeStampEndConstant(IODBG_WORKLOOP(IOWL_WORK), (uintptr_t) this); abort: openGate(); diff --git a/kgmacros b/kgmacros index 53e0a769a..5c2205e24 100644 --- a/kgmacros +++ b/kgmacros @@ -589,12 +589,15 @@ define showactint set $stkmask = 0x3 end set $kgm_return = 0 + set $kgm_actint_framecount = 0 while ($mysp != 0) && (($mysp & $stkmask) == 0) \ && ($mysp != $prevsp) \ && ((((unsigned long) $mysp ^ (unsigned long) $prevsp) < 0x2000) \ || (((unsigned long)$mysp < ((unsigned long) ($kgm_thread->kernel_stack+kernel_stack_size))) \ - && ((unsigned long)$mysp > (unsigned long) ($kgm_thread->kernel_stack)))) + && ((unsigned long)$mysp > (unsigned long) ($kgm_thread->kernel_stack)))) \ + && ($kgm_actint_framecount < 128) printf "\n " + set $kgm_actint_framecount = $kgm_actint_framecount + 1 showptrhdrpad printf " " showptr $mysp @@ -7129,7 +7132,7 @@ define showbootermemorymap set $kgm_mptr = (EfiMemoryRange *)((unsigned long)kernelBootArgs->MemoryMap + $kgm_voffset + $kgm_i * $kgm_msize) # p/x *$kgm_mptr if $kgm_mptr->Type == 0 - printf "reserved " + printf "Reserved " end if $kgm_mptr->Type == 1 printf "LoaderCode" @@ -7150,7 +7153,7 @@ define showbootermemorymap printf "RT_data " end if $kgm_mptr->Type == 7 - printf "available " + printf "Convention" end if $kgm_mptr->Type == 8 printf "Unusable " @@ -9254,6 +9257,8 @@ set $_ioapic_index_ver = 0x01 set $_ioapic_index_redir_base = 0x10 set $_apic_vector_mask = 0xFF +set $_apic_timer_tsc_deadline = 0x40000 +set $_apic_timer_periodic = 0x20000 set $_apic_masked = 0x10000 set $_apic_trigger_level = 0x08000 set $_apic_polarity_high = 0x02000 @@ -9301,30 +9306,39 @@ end define _apic_print set $value = $arg0 - printf "[VEC=%3d ", $value & $_apic_vector_mask + printf "[VEC=%3d", $value & $_apic_vector_mask if $value & $_apic_masked - printf "MASK=yes " + printf " MASK=yes" else - printf "MASK=no " + printf " MASK=no " end if $value & $_apic_trigger_level - printf "TRIG=level " + printf " TRIG=level" else - printf "TRIG=edge " + printf " TRIG=edge " end if $value & $_apic_polarity_high - printf "POL=high" + printf " POL=high" else - printf "POL=low " + printf " POL=low " end if $value & $_apic_pending - printf " PEND=yes]\n" + printf " PEND=yes" else - printf " PEND=no ]\n" + printf " PEND=no " end + + if $value & $_apic_timer_periodic + printf " PERIODIC" + end + if $value & $_apic_timer_tsc_deadline + printf " TSC_DEADLINE" + end + + printf "]\n" end define ioapic_read32 diff --git a/osfmk/conf/Makefile.x86_64 b/osfmk/conf/Makefile.x86_64 index 405c2089f..d24ace3bf 100644 --- a/osfmk/conf/Makefile.x86_64 +++ b/osfmk/conf/Makefile.x86_64 @@ -24,7 +24,6 @@ OBJS_NO_WERROR= \ security_server.o \ device_server.o \ gssd_mach.o \ - mp.o # This is blocked on 6640051 OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) diff --git a/osfmk/console/i386/serial_console.c b/osfmk/console/i386/serial_console.c index 36cb50feb..234a022b8 100644 --- a/osfmk/console/i386/serial_console.c +++ b/osfmk/console/i386/serial_console.c @@ -307,7 +307,9 @@ cnputc(char c) */ while (cbp->buf_ptr-cbp->buf_base + 1 > console_ring_space()) { simple_unlock(&console_ring.write_lock); + ml_set_interrupts_enabled(state); console_ring_try_empty(); + state = ml_set_interrupts_enabled(FALSE); SIMPLE_LOCK_NO_INTRS(&console_ring.write_lock); } for (cp = cbp->buf_base; cp < cbp->buf_ptr; cp++) diff --git a/osfmk/console/video_console.c b/osfmk/console/video_console.c index ebd35c82b..4b088aa41 100644 --- a/osfmk/console/video_console.c +++ b/osfmk/console/video_console.c @@ -149,11 +149,12 @@ static struct { static unsigned char *gc_buffer_attributes; static unsigned char *gc_buffer_characters; static unsigned char *gc_buffer_colorcodes; +static unsigned char *gc_buffer_tab_stops; static uint32_t gc_buffer_columns; static uint32_t gc_buffer_rows; static uint32_t gc_buffer_size; -#ifdef __i386__ +#if defined(__i386__) || defined(__x86_64__) decl_simple_lock_data(static, vcputc_lock); #define VCPUTC_LOCK_INIT() \ @@ -225,8 +226,7 @@ static unsigned char gc_color_code; static unsigned int gc_x, gc_y, gc_savex, gc_savey; static unsigned int gc_par[MAXPARS], gc_numpars, gc_hanging_cursor, gc_attr, gc_saveattr; -/* VT100 tab stops & scroll region */ -static char gc_tab_stops[255]; +/* VT100 scroll region */ static unsigned int gc_scrreg_top, gc_scrreg_bottom; #ifdef CONFIG_VC_PROGRESS_WHITE @@ -261,6 +261,7 @@ static void gc_clear_screen(unsigned int xx, unsigned int yy, int top, static void gc_enable(boolean_t enable); static void gc_hide_cursor(unsigned int xx, unsigned int yy); static void gc_initialize(struct vc_info * info); +static boolean_t gc_is_tab_stop(unsigned int column); static void gc_paint_char(unsigned int xx, unsigned int yy, unsigned char ch, int attrs); static void gc_putchar(char ch); @@ -277,6 +278,7 @@ static void gc_reset_tabs(void); static void gc_reset_vt100(void); static void gc_scroll_down(int num, unsigned int top, unsigned int bottom); static void gc_scroll_up(int num, unsigned int top, unsigned int bottom); +static void gc_set_tab_stop(unsigned int column, boolean_t enabled); static void gc_show_cursor(unsigned int xx, unsigned int yy); static void gc_update_color(int color, boolean_t fore); @@ -318,7 +320,7 @@ static void gc_clear_screen(unsigned int xx, unsigned int yy, int top, unsigned int bottom, int which) { - if (!gc_buffer_size) return; + if (!gc_buffer_size) return; if ( xx < gc_buffer_columns && yy < gc_buffer_rows && bottom <= gc_buffer_rows ) { @@ -357,6 +359,7 @@ gc_enable( boolean_t enable ) unsigned char *buffer_attributes = NULL; unsigned char *buffer_characters = NULL; unsigned char *buffer_colorcodes = NULL; + unsigned char *buffer_tab_stops = NULL; uint32_t buffer_columns = 0; uint32_t buffer_rows = 0; uint32_t buffer_size = 0; @@ -379,11 +382,15 @@ gc_enable( boolean_t enable ) buffer_attributes = gc_buffer_attributes; buffer_characters = gc_buffer_characters; buffer_colorcodes = gc_buffer_colorcodes; + buffer_tab_stops = gc_buffer_tab_stops; + buffer_columns = gc_buffer_columns; + buffer_rows = gc_buffer_rows; buffer_size = gc_buffer_size; gc_buffer_attributes = NULL; gc_buffer_characters = NULL; gc_buffer_colorcodes = NULL; + gc_buffer_tab_stops = NULL; gc_buffer_columns = 0; gc_buffer_rows = 0; gc_buffer_size = 0; @@ -394,6 +401,7 @@ gc_enable( boolean_t enable ) kfree( buffer_attributes, buffer_size ); kfree( buffer_characters, buffer_size ); kfree( buffer_colorcodes, buffer_size ); + kfree( buffer_tab_stops, buffer_columns ); } else { @@ -414,14 +422,17 @@ gc_enable( boolean_t enable ) buffer_attributes = (unsigned char *) kalloc( buffer_size ); buffer_characters = (unsigned char *) kalloc( buffer_size ); buffer_colorcodes = (unsigned char *) kalloc( buffer_size ); + buffer_tab_stops = (unsigned char *) kalloc( buffer_columns ); if ( buffer_attributes == NULL || buffer_characters == NULL || - buffer_colorcodes == NULL ) + buffer_colorcodes == NULL || + buffer_tab_stops == NULL ) { if ( buffer_attributes ) kfree( buffer_attributes, buffer_size ); if ( buffer_characters ) kfree( buffer_characters, buffer_size ); if ( buffer_colorcodes ) kfree( buffer_colorcodes, buffer_size ); + if ( buffer_tab_stops ) kfree( buffer_tab_stops, buffer_columns ); buffer_columns = 0; buffer_rows = 0; @@ -432,6 +443,7 @@ gc_enable( boolean_t enable ) memset( buffer_attributes, ATTR_NONE, buffer_size ); memset( buffer_characters, ' ', buffer_size ); memset( buffer_colorcodes, COLOR_CODE_SET( 0, COLOR_FOREGROUND, TRUE ), buffer_size ); + memset( buffer_tab_stops, 0, buffer_columns ); } } } @@ -442,6 +454,7 @@ gc_enable( boolean_t enable ) gc_buffer_attributes = buffer_attributes; gc_buffer_characters = buffer_characters; gc_buffer_colorcodes = buffer_colorcodes; + gc_buffer_tab_stops = buffer_tab_stops; gc_buffer_columns = buffer_columns; gc_buffer_rows = buffer_rows; gc_buffer_size = buffer_size; @@ -657,7 +670,7 @@ gc_putc_esc(unsigned char ch) if (ch == 'E') gc_x = 0; break; case 'H': /* Set tab stop */ - gc_tab_stops[gc_x] = 1; + gc_set_tab_stop(gc_x, TRUE); break; case 'M': /* Cursor up */ if (gc_y <= gc_scrreg_top) { @@ -797,11 +810,11 @@ gc_putc_gotpars(unsigned char ch) case 3: /* Clear every tabs */ { for (i = 0; i <= vinfo.v_columns; i++) - gc_tab_stops[i] = 0; + gc_set_tab_stop(i, FALSE); } break; case 0: - gc_tab_stops[gc_x] = 0; + gc_set_tab_stop(gc_x, FALSE); break; } break; @@ -881,7 +894,8 @@ gc_putc_normal(unsigned char ch) } break; case '\t': /* Tab */ - while (gc_x < vinfo.v_columns && !gc_tab_stops[++gc_x]); + if (gc_buffer_tab_stops) while (gc_x < vinfo.v_columns && !gc_is_tab_stop(++gc_x)); + if (gc_x >= vinfo.v_columns) gc_x = vinfo.v_columns-1; break; @@ -965,13 +979,33 @@ static void gc_reset_tabs(void) { unsigned int i; + + if (!gc_buffer_tab_stops) return; - for (i = 0; i<= vinfo.v_columns; i++) { - gc_tab_stops[i] = ((i % 8) == 0); + for (i = 0; i < vinfo.v_columns; i++) { + gc_buffer_tab_stops[i] = ((i % 8) == 0); } } +static void +gc_set_tab_stop(unsigned int column, boolean_t enabled) +{ + if (gc_buffer_tab_stops && (column < vinfo.v_columns)) { + gc_buffer_tab_stops[column] = enabled; + } +} + +static boolean_t gc_is_tab_stop(unsigned int column) +{ + if (gc_buffer_tab_stops == NULL) + return ((column % 8) == 0); + if (column < vinfo.v_columns) + return gc_buffer_tab_stops[column]; + else + return FALSE; +} + static void gc_reset_vt100(void) { @@ -990,7 +1024,7 @@ gc_reset_vt100(void) static void gc_scroll_down(int num, unsigned int top, unsigned int bottom) { - if (!gc_buffer_size) return; + if (!gc_buffer_size) return; if ( bottom <= gc_buffer_rows ) { @@ -1099,7 +1133,7 @@ gc_scroll_down(int num, unsigned int top, unsigned int bottom) static void gc_scroll_up(int num, unsigned int top, unsigned int bottom) { - if (!gc_buffer_size) return; + if (!gc_buffer_size) return; if ( bottom <= gc_buffer_rows ) { @@ -1240,18 +1274,25 @@ gc_update_color(int color, boolean_t fore) void vcputc(__unused int l, __unused int u, int c) { - if ( gc_initialized && ( gc_enabled || debug_mode ) ) + if ( gc_enabled || debug_mode ) { spl_t s; s = splhigh(); +#if defined(__i386__) || defined(__x86_64__) + x86_filter_TLB_coherency_interrupts(TRUE); +#endif VCPUTC_LOCK_LOCK(); - - gc_hide_cursor(gc_x, gc_y); - gc_putchar(c); - gc_show_cursor(gc_x, gc_y); - + if ( gc_enabled || debug_mode ) + { + gc_hide_cursor(gc_x, gc_y); + gc_putchar(c); + gc_show_cursor(gc_x, gc_y); + } VCPUTC_LOCK_UNLOCK(); +#if defined(__i386__) || defined(__x86_64__) + x86_filter_TLB_coherency_interrupts(FALSE); +#endif splx(s); } } @@ -1795,6 +1836,7 @@ static const unsigned char * vc_clut; static const unsigned char * vc_clut8; static unsigned char vc_revclut8[256]; static uint32_t vc_progress_interval; +static uint32_t vc_progress_count; static uint64_t vc_progress_deadline; static thread_call_data_t vc_progress_call; static boolean_t vc_needsave; @@ -2211,8 +2253,9 @@ vc_progress_set(boolean_t enable, uint32_t vc_delay) vc_needsave = TRUE; vc_saveunder = saveBuf; vc_saveunder_len = saveLen; - saveBuf = NULL; - saveLen = 0; + saveBuf = NULL; + saveLen = 0; + vc_progress_count = 0; clock_interval_to_deadline(vc_delay, 1000 * 1000 * 1000 /*second scale*/, @@ -2240,10 +2283,9 @@ vc_progress_set(boolean_t enable, uint32_t vc_delay) static void -vc_progress_task(__unused void *arg0, void *arg) +vc_progress_task(__unused void *arg0, __unused void *arg) { spl_t s; - int count = (int)(uintptr_t) arg; int x, y, width, height; const unsigned char * data; @@ -2252,18 +2294,18 @@ vc_progress_task(__unused void *arg0, void *arg) if( vc_progress_enable) { - KERNEL_DEBUG_CONSTANT(0x7020008, count, 0, 0, 0, 0); + KERNEL_DEBUG_CONSTANT(0x7020008, vc_progress_count, 0, 0, 0, 0); - count++; - if( count >= vc_progress->count) - count = 0; + vc_progress_count++; + if( vc_progress_count >= vc_progress->count) + vc_progress_count = 0; width = vc_progress->width; height = vc_progress->height; x = vc_progress->dx; y = vc_progress->dy; data = vc_progress_data; - data += count * width * height; + data += vc_progress_count * width * height; if( 1 & vc_progress->flags) { x += ((vinfo.v_width - width) / 2); y += ((vinfo.v_height - height) / 2); @@ -2275,7 +2317,7 @@ vc_progress_task(__unused void *arg0, void *arg) vc_needsave = FALSE; clock_deadline_for_periodic_event(vc_progress_interval, mach_absolute_time(), &vc_progress_deadline); - thread_call_enter1_delayed(&vc_progress_call, (void *)(uintptr_t)count, vc_progress_deadline); + thread_call_enter_delayed(&vc_progress_call, vc_progress_deadline); } simple_unlock(&vc_progress_lock); splx(s); @@ -2294,11 +2336,34 @@ static boolean_t gc_acquired = FALSE; static boolean_t gc_graphics_boot = FALSE; static boolean_t gc_desire_text = FALSE; -static unsigned int lastVideoPhys = 0; +static uint64_t lastVideoPhys = 0; static vm_offset_t lastVideoVirt = 0; static vm_size_t lastVideoSize = 0; static boolean_t lastVideoMapped = FALSE; +static void +gc_pause( boolean_t pause, boolean_t graphics_now ) +{ + spl_t s; + + s = splhigh( ); + VCPUTC_LOCK_LOCK( ); + + disableConsoleOutput = (pause && !console_is_serial()); + gc_enabled = (!pause && !graphics_now); + + VCPUTC_LOCK_UNLOCK( ); + + simple_lock(&vc_progress_lock); + + vc_progress_enable = gc_graphics_boot && !gc_desire_text && !pause; + if (vc_progress_enable) + thread_call_enter_delayed(&vc_progress_call, vc_progress_deadline); + + simple_unlock(&vc_progress_lock); + splx(s); +} + void initialize_screen(PE_Video * boot_vinfo, unsigned int op) { @@ -2310,26 +2375,29 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) if ( boot_vinfo ) { struct vc_info new_vinfo = vinfo; - -// bcopy((const void *)boot_vinfo, (void *)&boot_video_info, sizeof(boot_video_info)); - /* * First, check if we are changing the size and/or location of the framebuffer */ new_vinfo.v_name[0] = 0; - new_vinfo.v_width = (unsigned int)boot_vinfo->v_width; - new_vinfo.v_height = (unsigned int)boot_vinfo->v_height; - new_vinfo.v_depth = (unsigned int)boot_vinfo->v_depth; - new_vinfo.v_rowbytes = (unsigned int)boot_vinfo->v_rowBytes; - new_vinfo.v_physaddr = boot_vinfo->v_baseAddr; /* Get the physical address */ + new_vinfo.v_physaddr = boot_vinfo->v_baseAddr & ~3; /* Get the physical address */ +#ifndef __LP64__ + new_vinfo.v_physaddr |= (((uint64_t) boot_vinfo->v_baseAddrHigh) << 32); +#endif + if (kPEBaseAddressChange != op) + { + new_vinfo.v_width = (unsigned int)boot_vinfo->v_width; + new_vinfo.v_height = (unsigned int)boot_vinfo->v_height; + new_vinfo.v_depth = (unsigned int)boot_vinfo->v_depth; + new_vinfo.v_rowbytes = (unsigned int)boot_vinfo->v_rowBytes; #if defined(__i386__) || defined(__x86_64__) - new_vinfo.v_type = (unsigned int)boot_vinfo->v_display; + new_vinfo.v_type = (unsigned int)boot_vinfo->v_display; #else - new_vinfo.v_type = 0; + new_vinfo.v_type = 0; #endif + } if (!lastVideoMapped) - kprintf("initialize_screen: b=%08lX, w=%08X, h=%08X, r=%08X, d=%08X\n", /* (BRINGUP) */ + kprintf("initialize_screen: b=%08llX, w=%08X, h=%08X, r=%08X, d=%08X\n", /* (BRINGUP) */ new_vinfo.v_physaddr, new_vinfo.v_width, new_vinfo.v_height, new_vinfo.v_rowbytes, new_vinfo.v_type); /* (BRINGUP) */ if (!new_vinfo.v_physaddr) /* Check to see if we have a framebuffer */ @@ -2344,17 +2412,16 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) else { /* - * Note that for the first time only, boot_vinfo->v_baseAddr is physical. - */ - - if (kernel_map != VM_MAP_NULL) /* If VM is up, we are given a virtual address */ + * If VM is up, we are given a virtual address, unless b0 is set to indicate physical. + */ + if ((kernel_map != VM_MAP_NULL) && (0 == (1 & boot_vinfo->v_baseAddr))) { fbppage = pmap_find_phys(kernel_pmap, (addr64_t)boot_vinfo->v_baseAddr); /* Get the physical address of frame buffer */ if(!fbppage) /* Did we find it? */ { panic("initialize_screen: Strange framebuffer - addr = %08X\n", (uint32_t)boot_vinfo->v_baseAddr); } - new_vinfo.v_physaddr = (fbppage << 12) | (boot_vinfo->v_baseAddr & PAGE_MASK); /* Get the physical address */ + new_vinfo.v_physaddr = (((uint64_t)fbppage) << 12) | (boot_vinfo->v_baseAddr & PAGE_MASK); /* Get the physical address */ } if (boot_vinfo->v_length != 0) @@ -2366,7 +2433,7 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) if ((lastVideoPhys != new_vinfo.v_physaddr) || (fbsize > lastVideoSize)) /* Did framebuffer change location or get bigger? */ { unsigned int flags = VM_WIMG_IO; - newVideoVirt = io_map_spec((vm_offset_t)new_vinfo.v_physaddr, fbsize, flags); /* Allocate address space for framebuffer */ + newVideoVirt = io_map_spec((vm_map_offset_t)new_vinfo.v_physaddr, fbsize, flags); /* Allocate address space for framebuffer */ } } @@ -2409,13 +2476,14 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) kmem_free(kernel_map, lastVideoVirt, lastVideoSize); /* Toss kernel addresses */ } } - lastVideoPhys = (unsigned int)new_vinfo.v_physaddr; /* Remember the framebuffer address */ + lastVideoPhys = new_vinfo.v_physaddr; /* Remember the framebuffer address */ lastVideoSize = fbsize; /* Remember the size */ lastVideoVirt = newVideoVirt; /* Remember the virtual framebuffer address */ lastVideoMapped = (NULL != kernel_map); } - { + if (kPEBaseAddressChange != op) + { // Graphics mode setup by the booter. gc_ops.initialize = vc_initialize; @@ -2427,15 +2495,15 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) gc_ops.hide_cursor = vc_reverse_cursor; gc_ops.show_cursor = vc_reverse_cursor; gc_ops.update_color = vc_update_color; + gc_initialize(&vinfo); } - gc_initialize(&vinfo); - #ifdef GRATEFULDEBUGGER GratefulDebInit((bootBumbleC *)boot_vinfo); /* Re-initialize GratefulDeb */ #endif /* GRATEFULDEBUGGER */ } + graphics_now = gc_graphics_boot && !gc_desire_text; switch ( op ) { case kPEGraphicsMode: @@ -2452,15 +2520,24 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) case kPEAcquireScreen: if ( gc_acquired ) break; - graphics_now = gc_graphics_boot && !gc_desire_text; vc_progress_set( graphics_now, kProgressAcquireDelay ); gc_enable( !graphics_now ); gc_acquired = TRUE; gc_desire_text = FALSE; break; + case kPEDisableScreen: + if (gc_acquired) + { + gc_pause( TRUE, graphics_now ); + } + break; + case kPEEnableScreen: - /* deprecated */ + if (gc_acquired) + { + gc_pause( FALSE, graphics_now ); + } break; case kPETextScreen: @@ -2482,10 +2559,6 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) gc_enable( TRUE ); break; - case kPEDisableScreen: - /* deprecated */ - /* skip break */ - case kPEReleaseScreen: gc_acquired = FALSE; gc_desire_text = FALSE; diff --git a/osfmk/console/video_console.h b/osfmk/console/video_console.h index 39f1a8640..f67778b4e 100644 --- a/osfmk/console/video_console.h +++ b/osfmk/console/video_console.h @@ -61,7 +61,7 @@ struct vc_info unsigned long v_baseaddr; unsigned int v_type; char v_name[32]; - unsigned long v_physaddr; + uint64_t v_physaddr; unsigned int v_rows; /* characters */ unsigned int v_columns; /* characters */ unsigned int v_rowscanbytes; /* Actualy number of bytes used for display per row*/ diff --git a/osfmk/i386/AT386/model_dep.c b/osfmk/i386/AT386/model_dep.c index 7e4196b02..6727bae26 100644 --- a/osfmk/i386/AT386/model_dep.c +++ b/osfmk/i386/AT386/model_dep.c @@ -758,7 +758,7 @@ Debugger( #endif /* Print backtrace - callee is internally synchronized */ - panic_i386_backtrace(stackptr, 32, NULL, FALSE, NULL); + panic_i386_backtrace(stackptr, 64, NULL, FALSE, NULL); /* everything should be printed now so copy to NVRAM */ diff --git a/osfmk/i386/acpi.c b/osfmk/i386/acpi.c index f90433cfb..fb2cbe334 100644 --- a/osfmk/i386/acpi.c +++ b/osfmk/i386/acpi.c @@ -199,6 +199,11 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) */ cpu_IA32e_disable(current_cpu_datap()); #endif + /* + * Enable FPU/SIMD unit for potential hibernate acceleration + */ + clear_ts(); + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 0) | DBG_FUNC_START, 0, 0, 0, 0, 0); save_kdebug_enable = kdebug_enable; @@ -220,6 +225,7 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) #else acpi_sleep_cpu(func, refcon); #endif + #ifdef __x86_64__ x86_64_post_sleep(old_cr3); #endif @@ -278,6 +284,10 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) ml_get_timebase(&now); + /* re-enable and re-init local apic (prior to starting timers) */ + if (lapic_probe()) + lapic_configure(); + /* let the realtime clock reset */ rtc_sleep_wakeup(acpi_sleep_abstime); @@ -299,21 +309,17 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) } else KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 0) | DBG_FUNC_END, 0, 0, 0, 0, 0); - /* re-enable and re-init local apic */ - if (lapic_probe()) - lapic_configure(); - /* Restore power management register state */ pmCPUMarkRunning(current_cpu_datap()); /* Restore power management timer state */ pmTimerRestore(); - /* Restart tick interrupts from the LAPIC timer */ - rtc_lapic_start_ticking(); + /* Restart timer interrupts */ + rtc_timer_start(); - fpinit(); - clear_fpu(); + /* Reconfigure FP/SIMD unit */ + init_fpu(); #if HIBERNATION #ifdef __i386__ diff --git a/osfmk/i386/cpu_data.h b/osfmk/i386/cpu_data.h index 9578cc80b..63eb4446b 100644 --- a/osfmk/i386/cpu_data.h +++ b/osfmk/i386/cpu_data.h @@ -128,7 +128,6 @@ typedef struct { addr64_t cu_user_gs_base; } cpu_uber_t; - /* * Per-cpu data. * @@ -157,6 +156,9 @@ typedef struct cpu_data int cpu_phys_number; /* Physical CPU */ cpu_id_t cpu_id; /* Platform Expert */ int cpu_signals; /* IPI events */ + int cpu_prior_signals; /* Last set of events, + * debugging + */ int cpu_mcount_off; /* mcount recursion */ ast_t cpu_pending_ast; int cpu_type; @@ -227,6 +229,17 @@ typedef struct cpu_data rtc_nanotime_t *cpu_nanotime; /* Nanotime info */ thread_t csw_old_thread; thread_t csw_new_thread; + uint64_t cpu_max_observed_int_latency; + int cpu_max_observed_int_latency_vector; + uint64_t debugger_entry_time; + volatile boolean_t cpu_NMI_acknowledged; + /* A separate nested interrupt stack flag, to account + * for non-nested interrupts arriving while on the interrupt stack + * Currently only occurs when AICPM enables interrupts on the + * interrupt stack during processor offlining. + */ + uint32_t cpu_nested_istack; + uint32_t cpu_nested_istack_events; } cpu_data_t; extern cpu_data_t *cpu_data_ptr[]; diff --git a/osfmk/i386/cpu_topology.h b/osfmk/i386/cpu_topology.h index a68a75bad..77445d9b1 100644 --- a/osfmk/i386/cpu_topology.h +++ b/osfmk/i386/cpu_topology.h @@ -141,8 +141,8 @@ typedef struct x86_lcpu boolean_t primary; /* logical cpu is primary CPU in package */ volatile lcpu_state_t state; /* state of the logical CPU */ volatile boolean_t stopped; /* used to indicate that the CPU has "stopped" */ - uint64_t rtcPop; /* when etimer wants a timer pop */ - uint64_t rtcDeadline; + uint64_t rtcPop; /* next timer pop programmed */ + uint64_t rtcDeadline; /* next etimer-requested deadline */ x86_cpu_cache_t *caches[MAX_CACHE_DEPTH]; void *pmStats; /* Power management stats for lcpu */ void *pmState; /* Power management state for lcpu */ diff --git a/osfmk/i386/cpuid.c b/osfmk/i386/cpuid.c index c974a71ed..b836ba88a 100644 --- a/osfmk/i386/cpuid.c +++ b/osfmk/i386/cpuid.c @@ -203,7 +203,8 @@ static cpuid_cache_descriptor_t intel_cpuid_leaf2_descriptor_table[] = { { 0xE5, CACHE, L3, 16, 16*M, 64 }, { 0xE6, CACHE, L3, 16, 24*M, 64 }, { 0xF0, PREFETCH, NA, NA, 64, NA }, - { 0xF1, PREFETCH, NA, NA, 128, NA } + { 0xF1, PREFETCH, NA, NA, 128, NA }, + { 0xFF, CACHE, NA, NA, 0, NA } }; #define INTEL_LEAF2_DESC_NUM (sizeof(intel_cpuid_leaf2_descriptor_table) / \ sizeof(cpuid_cache_descriptor_t)) @@ -240,7 +241,10 @@ static void cpuid_fn(uint32_t selector, uint32_t *result) "=b" (result[1]), "=c" (result[2]), "=d" (result[3]) - : "a"(selector)); + : "a"(selector), + "b" (0), + "c" (0), + "d" (0)); } else { do_cpuid(selector, result); } @@ -574,8 +578,13 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p) ctp->sensor = bitfield32(reg[eax], 0, 0); ctp->dynamic_acceleration = bitfield32(reg[eax], 1, 1); ctp->invariant_APIC_timer = bitfield32(reg[eax], 2, 2); + ctp->core_power_limits = bitfield32(reg[eax], 3, 3); + ctp->fine_grain_clock_mod = bitfield32(reg[eax], 4, 4); + ctp->package_thermal_intr = bitfield32(reg[eax], 5, 5); ctp->thresholds = bitfield32(reg[ebx], 3, 0); ctp->ACNT_MCNT = bitfield32(reg[ecx], 0, 0); + ctp->hardware_feedback = bitfield32(reg[ecx], 1, 1); + ctp->energy_policy = bitfield32(reg[ecx], 2, 2); info_p->cpuid_thermal_leafp = ctp; } @@ -596,6 +605,15 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p) info_p->cpuid_arch_perf_leafp = capp; } + if (info_p->cpuid_max_basic >= 0xd) { + cpuid_xsave_leaf_t *xsp = &info_p->cpuid_xsave_leaf; + /* + * XSAVE Features: + */ + cpuid_fn(0xd, info_p->cpuid_xsave_leaf.extended_state); + info_p->cpuid_xsave_leafp = xsp; + } + return; } @@ -630,6 +648,10 @@ cpuid_set_cpufamily(i386_cpu_info_t *info_p) case CPUID_MODEL_WESTMERE_EX: cpufamily = CPUFAMILY_INTEL_WESTMERE; break; + case CPUID_MODEL_SANDYBRIDGE: + case CPUID_MODEL_JAKETOWN: + cpufamily = CPUFAMILY_INTEL_SANDYBRIDGE; + break; } break; } @@ -637,7 +659,10 @@ cpuid_set_cpufamily(i386_cpu_info_t *info_p) info_p->cpuid_cpufamily = cpufamily; return cpufamily; } - +/* + * Must be invoked either when executing single threaded, or with + * independent synchronization. + */ void cpuid_set_info(void) { @@ -664,17 +689,13 @@ cpuid_set_info(void) * (which determines whether SMT/Hyperthreading is active). */ switch (info_p->cpuid_cpufamily) { - /* - * This should be the same as Nehalem but an A0 silicon bug returns - * invalid data in the top 12 bits. Hence, we use only bits [19..16] - * rather than [31..16] for core count - which actually can't exceed 8. - */ case CPUFAMILY_INTEL_WESTMERE: { uint64_t msr = rdmsr64(MSR_CORE_THREAD_COUNT); info_p->core_count = bitfield32((uint32_t)msr, 19, 16); info_p->thread_count = bitfield32((uint32_t)msr, 15, 0); break; } + case CPUFAMILY_INTEL_SANDYBRIDGE: case CPUFAMILY_INTEL_NEHALEM: { uint64_t msr = rdmsr64(MSR_CORE_THREAD_COUNT); info_p->core_count = bitfield32((uint32_t)msr, 31, 16); @@ -694,62 +715,71 @@ static struct { uint64_t mask; const char *name; } feature_map[] = { - {CPUID_FEATURE_FPU, "FPU",}, - {CPUID_FEATURE_VME, "VME",}, - {CPUID_FEATURE_DE, "DE",}, - {CPUID_FEATURE_PSE, "PSE",}, - {CPUID_FEATURE_TSC, "TSC",}, - {CPUID_FEATURE_MSR, "MSR",}, - {CPUID_FEATURE_PAE, "PAE",}, - {CPUID_FEATURE_MCE, "MCE",}, - {CPUID_FEATURE_CX8, "CX8",}, - {CPUID_FEATURE_APIC, "APIC",}, - {CPUID_FEATURE_SEP, "SEP",}, - {CPUID_FEATURE_MTRR, "MTRR",}, - {CPUID_FEATURE_PGE, "PGE",}, - {CPUID_FEATURE_MCA, "MCA",}, - {CPUID_FEATURE_CMOV, "CMOV",}, - {CPUID_FEATURE_PAT, "PAT",}, - {CPUID_FEATURE_PSE36, "PSE36",}, - {CPUID_FEATURE_PSN, "PSN",}, - {CPUID_FEATURE_CLFSH, "CLFSH",}, - {CPUID_FEATURE_DS, "DS",}, - {CPUID_FEATURE_ACPI, "ACPI",}, - {CPUID_FEATURE_MMX, "MMX",}, - {CPUID_FEATURE_FXSR, "FXSR",}, - {CPUID_FEATURE_SSE, "SSE",}, - {CPUID_FEATURE_SSE2, "SSE2",}, - {CPUID_FEATURE_SS, "SS",}, - {CPUID_FEATURE_HTT, "HTT",}, - {CPUID_FEATURE_TM, "TM",}, - {CPUID_FEATURE_SSE3, "SSE3"}, + {CPUID_FEATURE_FPU, "FPU"}, + {CPUID_FEATURE_VME, "VME"}, + {CPUID_FEATURE_DE, "DE"}, + {CPUID_FEATURE_PSE, "PSE"}, + {CPUID_FEATURE_TSC, "TSC"}, + {CPUID_FEATURE_MSR, "MSR"}, + {CPUID_FEATURE_PAE, "PAE"}, + {CPUID_FEATURE_MCE, "MCE"}, + {CPUID_FEATURE_CX8, "CX8"}, + {CPUID_FEATURE_APIC, "APIC"}, + {CPUID_FEATURE_SEP, "SEP"}, + {CPUID_FEATURE_MTRR, "MTRR"}, + {CPUID_FEATURE_PGE, "PGE"}, + {CPUID_FEATURE_MCA, "MCA"}, + {CPUID_FEATURE_CMOV, "CMOV"}, + {CPUID_FEATURE_PAT, "PAT"}, + {CPUID_FEATURE_PSE36, "PSE36"}, + {CPUID_FEATURE_PSN, "PSN"}, + {CPUID_FEATURE_CLFSH, "CLFSH"}, + {CPUID_FEATURE_DS, "DS"}, + {CPUID_FEATURE_ACPI, "ACPI"}, + {CPUID_FEATURE_MMX, "MMX"}, + {CPUID_FEATURE_FXSR, "FXSR"}, + {CPUID_FEATURE_SSE, "SSE"}, + {CPUID_FEATURE_SSE2, "SSE2"}, + {CPUID_FEATURE_SS, "SS"}, + {CPUID_FEATURE_HTT, "HTT"}, + {CPUID_FEATURE_TM, "TM"}, + {CPUID_FEATURE_PBE, "PBE"}, + {CPUID_FEATURE_SSE3, "SSE3"}, {CPUID_FEATURE_PCLMULQDQ, "PCLMULQDQ"}, - {CPUID_FEATURE_MONITOR, "MON"}, - {CPUID_FEATURE_DSCPL, "DSCPL"}, - {CPUID_FEATURE_VMX, "VMX"}, - {CPUID_FEATURE_SMX, "SMX"}, - {CPUID_FEATURE_EST, "EST"}, - {CPUID_FEATURE_TM2, "TM2"}, - {CPUID_FEATURE_SSSE3, "SSSE3"}, - {CPUID_FEATURE_CID, "CID"}, - {CPUID_FEATURE_CX16, "CX16"}, - {CPUID_FEATURE_xTPR, "TPR"}, - {CPUID_FEATURE_PDCM, "PDCM"}, - {CPUID_FEATURE_SSE4_1, "SSE4.1"}, - {CPUID_FEATURE_SSE4_2, "SSE4.2"}, - {CPUID_FEATURE_xAPIC, "xAPIC"}, - {CPUID_FEATURE_POPCNT, "POPCNT"}, - {CPUID_FEATURE_AES, "AES"}, - {CPUID_FEATURE_VMM, "VMM"}, + {CPUID_FEATURE_DTES64, "DTES64"}, + {CPUID_FEATURE_MONITOR, "MON"}, + {CPUID_FEATURE_DSCPL, "DSCPL"}, + {CPUID_FEATURE_VMX, "VMX"}, + {CPUID_FEATURE_SMX, "SMX"}, + {CPUID_FEATURE_EST, "EST"}, + {CPUID_FEATURE_TM2, "TM2"}, + {CPUID_FEATURE_SSSE3, "SSSE3"}, + {CPUID_FEATURE_CID, "CID"}, + {CPUID_FEATURE_CX16, "CX16"}, + {CPUID_FEATURE_xTPR, "TPR"}, + {CPUID_FEATURE_PDCM, "PDCM"}, + {CPUID_FEATURE_SSE4_1, "SSE4.1"}, + {CPUID_FEATURE_SSE4_2, "SSE4.2"}, + {CPUID_FEATURE_xAPIC, "xAPIC"}, + {CPUID_FEATURE_MOVBE, "MOVBE"}, + {CPUID_FEATURE_POPCNT, "POPCNT"}, + {CPUID_FEATURE_AES, "AES"}, + {CPUID_FEATURE_XSAVE, "XSAVE"}, + {CPUID_FEATURE_OSXSAVE, "OSXSAVE"}, + {CPUID_FEATURE_VMM, "VMM"}, + {CPUID_FEATURE_SEGLIM64, "SEGLIM64"}, + {CPUID_FEATURE_PCID, "PCID"}, + {CPUID_FEATURE_TSCTMR, "TSCTMR"}, + {CPUID_FEATURE_AVX1_0, "AVX1.0"}, {0, 0} }, extfeature_map[] = { {CPUID_EXTFEATURE_SYSCALL, "SYSCALL"}, {CPUID_EXTFEATURE_XD, "XD"}, {CPUID_EXTFEATURE_1GBPAGE, "1GBPAGE"}, - {CPUID_EXTFEATURE_RDTSCP, "RDTSCP"}, {CPUID_EXTFEATURE_EM64T, "EM64T"}, {CPUID_EXTFEATURE_LAHF, "LAHF"}, + {CPUID_EXTFEATURE_RDTSCP, "RDTSCP"}, {CPUID_EXTFEATURE_TSCI, "TSCI"}, {0, 0} }; @@ -768,15 +798,16 @@ cpuid_info(void) char * cpuid_get_feature_names(uint64_t features, char *buf, unsigned buf_len) { - size_t len = -1; + size_t len = 0; char *p = buf; int i; for (i = 0; feature_map[i].mask != 0; i++) { if ((features & feature_map[i].mask) == 0) continue; - if (len > 0) + if (len && ((size_t)(p - buf) < (buf_len - 1))) *p++ = ' '; + len = min(strlen(feature_map[i].name), (size_t) ((buf_len-1) - (p-buf))); if (len == 0) break; @@ -790,14 +821,14 @@ cpuid_get_feature_names(uint64_t features, char *buf, unsigned buf_len) char * cpuid_get_extfeature_names(uint64_t extfeatures, char *buf, unsigned buf_len) { - size_t len = -1; + size_t len = 0; char *p = buf; int i; for (i = 0; extfeature_map[i].mask != 0; i++) { if ((extfeatures & extfeature_map[i].mask) == 0) continue; - if (len > 0) + if (len && ((size_t) (p - buf) < (buf_len - 1))) *p++ = ' '; len = min(strlen(extfeature_map[i].name), (size_t) ((buf_len-1)-(p-buf))); if (len == 0) diff --git a/osfmk/i386/cpuid.h b/osfmk/i386/cpuid.h index ce8b2a378..4c3c329c0 100644 --- a/osfmk/i386/cpuid.h +++ b/osfmk/i386/cpuid.h @@ -53,58 +53,65 @@ * The CPUID_FEATURE_XXX values define 64-bit values * returned in %ecx:%edx to a CPUID request with %eax of 1: */ -#define CPUID_FEATURE_FPU _Bit(0) /* Floating point unit on-chip */ -#define CPUID_FEATURE_VME _Bit(1) /* Virtual Mode Extension */ -#define CPUID_FEATURE_DE _Bit(2) /* Debugging Extension */ -#define CPUID_FEATURE_PSE _Bit(3) /* Page Size Extension */ -#define CPUID_FEATURE_TSC _Bit(4) /* Time Stamp Counter */ -#define CPUID_FEATURE_MSR _Bit(5) /* Model Specific Registers */ -#define CPUID_FEATURE_PAE _Bit(6) /* Physical Address Extension */ -#define CPUID_FEATURE_MCE _Bit(7) /* Machine Check Exception */ -#define CPUID_FEATURE_CX8 _Bit(8) /* CMPXCHG8B */ -#define CPUID_FEATURE_APIC _Bit(9) /* On-chip APIC */ -#define CPUID_FEATURE_SEP _Bit(11) /* Fast System Call */ -#define CPUID_FEATURE_MTRR _Bit(12) /* Memory Type Range Register */ -#define CPUID_FEATURE_PGE _Bit(13) /* Page Global Enable */ -#define CPUID_FEATURE_MCA _Bit(14) /* Machine Check Architecture */ -#define CPUID_FEATURE_CMOV _Bit(15) /* Conditional Move Instruction */ -#define CPUID_FEATURE_PAT _Bit(16) /* Page Attribute Table */ -#define CPUID_FEATURE_PSE36 _Bit(17) /* 36-bit Page Size Extension */ -#define CPUID_FEATURE_PSN _Bit(18) /* Processor Serial Number */ -#define CPUID_FEATURE_CLFSH _Bit(19) /* CLFLUSH Instruction supported */ -#define CPUID_FEATURE_DS _Bit(21) /* Debug Store */ -#define CPUID_FEATURE_ACPI _Bit(22) /* Thermal monitor and Clock Ctrl */ -#define CPUID_FEATURE_MMX _Bit(23) /* MMX supported */ -#define CPUID_FEATURE_FXSR _Bit(24) /* Fast floating pt save/restore */ -#define CPUID_FEATURE_SSE _Bit(25) /* Streaming SIMD extensions */ -#define CPUID_FEATURE_SSE2 _Bit(26) /* Streaming SIMD extensions 2 */ -#define CPUID_FEATURE_SS _Bit(27) /* Self-Snoop */ -#define CPUID_FEATURE_HTT _Bit(28) /* Hyper-Threading Technology */ -#define CPUID_FEATURE_TM _Bit(29) /* Thermal Monitor (TM1) */ -#define CPUID_FEATURE_PBE _Bit(31) /* Pend Break Enable */ - -#define CPUID_FEATURE_SSE3 _HBit(0) /* Streaming SIMD extensions 3 */ -#define CPUID_FEATURE_PCLMULQDQ _HBit(1) /* PCLMULQDQ Instruction */ - -#define CPUID_FEATURE_MONITOR _HBit(3) /* Monitor/mwait */ -#define CPUID_FEATURE_DSCPL _HBit(4) /* Debug Store CPL */ -#define CPUID_FEATURE_VMX _HBit(5) /* VMX */ -#define CPUID_FEATURE_SMX _HBit(6) /* SMX */ -#define CPUID_FEATURE_EST _HBit(7) /* Enhanced SpeedsTep (GV3) */ -#define CPUID_FEATURE_TM2 _HBit(8) /* Thermal Monitor 2 */ -#define CPUID_FEATURE_SSSE3 _HBit(9) /* Supplemental SSE3 instructions */ -#define CPUID_FEATURE_CID _HBit(10) /* L1 Context ID */ -#define CPUID_FEATURE_CX16 _HBit(13) /* CmpXchg16b instruction */ -#define CPUID_FEATURE_xTPR _HBit(14) /* Send Task PRiority msgs */ -#define CPUID_FEATURE_PDCM _HBit(15) /* Perf/Debug Capability MSR */ - -#define CPUID_FEATURE_DCA _HBit(18) /* Direct Cache Access */ -#define CPUID_FEATURE_SSE4_1 _HBit(19) /* Streaming SIMD extensions 4.1 */ -#define CPUID_FEATURE_SSE4_2 _HBit(20) /* Streaming SIMD extensions 4.2 */ -#define CPUID_FEATURE_xAPIC _HBit(21) /* Extended APIC Mode */ -#define CPUID_FEATURE_POPCNT _HBit(23) /* POPCNT instruction */ -#define CPUID_FEATURE_AES _HBit(25) /* AES instructions */ -#define CPUID_FEATURE_VMM _HBit(31) /* VMM (Hypervisor) present */ +#define CPUID_FEATURE_FPU _Bit(0) /* Floating point unit on-chip */ +#define CPUID_FEATURE_VME _Bit(1) /* Virtual Mode Extension */ +#define CPUID_FEATURE_DE _Bit(2) /* Debugging Extension */ +#define CPUID_FEATURE_PSE _Bit(3) /* Page Size Extension */ +#define CPUID_FEATURE_TSC _Bit(4) /* Time Stamp Counter */ +#define CPUID_FEATURE_MSR _Bit(5) /* Model Specific Registers */ +#define CPUID_FEATURE_PAE _Bit(6) /* Physical Address Extension */ +#define CPUID_FEATURE_MCE _Bit(7) /* Machine Check Exception */ +#define CPUID_FEATURE_CX8 _Bit(8) /* CMPXCHG8B */ +#define CPUID_FEATURE_APIC _Bit(9) /* On-chip APIC */ +#define CPUID_FEATURE_SEP _Bit(11) /* Fast System Call */ +#define CPUID_FEATURE_MTRR _Bit(12) /* Memory Type Range Register */ +#define CPUID_FEATURE_PGE _Bit(13) /* Page Global Enable */ +#define CPUID_FEATURE_MCA _Bit(14) /* Machine Check Architecture */ +#define CPUID_FEATURE_CMOV _Bit(15) /* Conditional Move Instruction */ +#define CPUID_FEATURE_PAT _Bit(16) /* Page Attribute Table */ +#define CPUID_FEATURE_PSE36 _Bit(17) /* 36-bit Page Size Extension */ +#define CPUID_FEATURE_PSN _Bit(18) /* Processor Serial Number */ +#define CPUID_FEATURE_CLFSH _Bit(19) /* CLFLUSH Instruction supported */ +#define CPUID_FEATURE_DS _Bit(21) /* Debug Store */ +#define CPUID_FEATURE_ACPI _Bit(22) /* Thermal monitor and Clock Ctrl */ +#define CPUID_FEATURE_MMX _Bit(23) /* MMX supported */ +#define CPUID_FEATURE_FXSR _Bit(24) /* Fast floating pt save/restore */ +#define CPUID_FEATURE_SSE _Bit(25) /* Streaming SIMD extensions */ +#define CPUID_FEATURE_SSE2 _Bit(26) /* Streaming SIMD extensions 2 */ +#define CPUID_FEATURE_SS _Bit(27) /* Self-Snoop */ +#define CPUID_FEATURE_HTT _Bit(28) /* Hyper-Threading Technology */ +#define CPUID_FEATURE_TM _Bit(29) /* Thermal Monitor (TM1) */ +#define CPUID_FEATURE_PBE _Bit(31) /* Pend Break Enable */ + +#define CPUID_FEATURE_SSE3 _HBit(0) /* Streaming SIMD extensions 3 */ +#define CPUID_FEATURE_PCLMULQDQ _HBit(1) /* PCLMULQDQ instruction */ +#define CPUID_FEATURE_DTES64 _HBit(2) /* 64-bit DS layout */ +#define CPUID_FEATURE_MONITOR _HBit(3) /* Monitor/mwait */ +#define CPUID_FEATURE_DSCPL _HBit(4) /* Debug Store CPL */ +#define CPUID_FEATURE_VMX _HBit(5) /* VMX */ +#define CPUID_FEATURE_SMX _HBit(6) /* SMX */ +#define CPUID_FEATURE_EST _HBit(7) /* Enhanced SpeedsTep (GV3) */ +#define CPUID_FEATURE_TM2 _HBit(8) /* Thermal Monitor 2 */ +#define CPUID_FEATURE_SSSE3 _HBit(9) /* Supplemental SSE3 instructions */ +#define CPUID_FEATURE_CID _HBit(10) /* L1 Context ID */ +#define CPUID_FEATURE_CX16 _HBit(13) /* CmpXchg16b instruction */ +#define CPUID_FEATURE_xTPR _HBit(14) /* Send Task PRiority msgs */ +#define CPUID_FEATURE_PDCM _HBit(15) /* Perf/Debug Capability MSR */ + +#define CPUID_FEATURE_DCA _HBit(18) /* Direct Cache Access */ +#define CPUID_FEATURE_SSE4_1 _HBit(19) /* Streaming SIMD extensions 4.1 */ +#define CPUID_FEATURE_SSE4_2 _HBit(20) /* Streaming SIMD extensions 4.2 */ +#define CPUID_FEATURE_xAPIC _HBit(21) /* Extended APIC Mode */ +#define CPUID_FEATURE_MOVBE _HBit(22) /* MOVBE instruction */ +#define CPUID_FEATURE_POPCNT _HBit(23) /* POPCNT instruction */ +#define CPUID_FEATURE_AES _HBit(25) /* AES instructions */ +#define CPUID_FEATURE_XSAVE _HBit(26) /* XSAVE instructions */ +#define CPUID_FEATURE_OSXSAVE _HBit(27) /* XGETBV/XSETBV instructions */ +#define CPUID_FEATURE_VMM _HBit(31) /* VMM (Hypervisor) present */ +#define CPUID_FEATURE_SEGLIM64 _HBit(11) /* 64-bit segment limit checking */ +#define CPUID_FEATURE_PCID _HBit(17) /* ASID-PCID support */ +#define CPUID_FEATURE_TSCTMR _HBit(24) /* TSC deadline timer */ +#define CPUID_FEATURE_AVX1_0 _HBit(28) /* AVX 1.0 instructions */ /* * The CPUID_EXTFEATURE_XXX values define 64-bit values @@ -113,11 +120,11 @@ #define CPUID_EXTFEATURE_SYSCALL _Bit(11) /* SYSCALL/sysret */ #define CPUID_EXTFEATURE_XD _Bit(20) /* eXecute Disable */ -#define CPUID_EXTFEATURE_1GBPAGE _Bit(26) /* 1G-Byte Page support */ +#define CPUID_EXTFEATURE_1GBPAGE _Bit(26) /* 1GB pages */ #define CPUID_EXTFEATURE_RDTSCP _Bit(27) /* RDTSCP */ #define CPUID_EXTFEATURE_EM64T _Bit(29) /* Extended Mem 64 Technology */ -#define CPUID_EXTFEATURE_LAHF _HBit(0) /* LAHF/SAHF instructions */ +#define CPUID_EXTFEATURE_LAHF _HBit(0) /* LAFH/SAHF instructions */ /* * The CPUID_EXTFEATURE_XXX values define 64-bit values @@ -130,16 +137,19 @@ #define CPUID_MWAIT_EXTENSION _Bit(0) /* enumeration of WMAIT extensions */ #define CPUID_MWAIT_BREAK _Bit(1) /* interrupts are break events */ -#define CPUID_MODEL_YONAH 14 -#define CPUID_MODEL_MEROM 15 -#define CPUID_MODEL_PENRYN 23 -#define CPUID_MODEL_NEHALEM 26 -#define CPUID_MODEL_FIELDS 30 /* Lynnfield, Clarksfield, Jasper */ -#define CPUID_MODEL_DALES 31 /* Havendale, Auburndale */ -#define CPUID_MODEL_NEHALEM_EX 46 -#define CPUID_MODEL_DALES_32NM 37 /* Clarkdale, Arrandale */ -#define CPUID_MODEL_WESTMERE 44 /* Gulftown, Westmere-EP, Westmere-WS */ -#define CPUID_MODEL_WESTMERE_EX 47 +#define CPUID_MODEL_YONAH 0x0E +#define CPUID_MODEL_MEROM 0x0F +#define CPUID_MODEL_PENRYN 0x17 +#define CPUID_MODEL_NEHALEM 0x1A +#define CPUID_MODEL_FIELDS 0x1E /* Lynnfield, Clarksfield, Jasper */ +#define CPUID_MODEL_DALES 0x1F /* Havendale, Auburndale */ +#define CPUID_MODEL_NEHALEM_EX 0x2E +#define CPUID_MODEL_DALES_32NM 0x25 /* Clarkdale, Arrandale */ +#define CPUID_MODEL_WESTMERE 0x2C /* Gulftown, Westmere-EP, Westmere-WS */ +#define CPUID_MODEL_WESTMERE_EX 0x2F +/* Additional internal models go here */ +#define CPUID_MODEL_SANDYBRIDGE 0x2A +#define CPUID_MODEL_JAKETOWN 0x2D #ifndef ASSEMBLER #include @@ -162,6 +172,7 @@ cpuid(uint32_t *data) "c" (data[ecx]), "d" (data[edx])); } + static inline void do_cpuid(uint32_t selector, uint32_t *data) { @@ -170,7 +181,10 @@ do_cpuid(uint32_t selector, uint32_t *data) "=b" (data[1]), "=c" (data[2]), "=d" (data[3]) - : "a"(selector)); + : "a"(selector), + "b" (0), + "c" (0), + "d" (0)); } /* @@ -209,10 +223,22 @@ typedef struct { boolean_t sensor; boolean_t dynamic_acceleration; boolean_t invariant_APIC_timer; + boolean_t core_power_limits; + boolean_t fine_grain_clock_mod; + boolean_t package_thermal_intr; uint32_t thresholds; boolean_t ACNT_MCNT; + boolean_t hardware_feedback; + boolean_t energy_policy; } cpuid_thermal_leaf_t; + +/* XSAVE Feature Leaf: */ +typedef struct { + uint32_t extended_state[4]; /* eax .. edx */ +} cpuid_xsave_leaf_t; + + /* Architectural Performance Monitoring Leaf: */ typedef struct { uint8_t version; @@ -262,6 +288,7 @@ typedef struct { #define cpuid_mwait_sub_Cstates cpuid_mwait_leaf.sub_Cstates cpuid_thermal_leaf_t cpuid_thermal_leaf; cpuid_arch_perf_leaf_t cpuid_arch_perf_leaf; + cpuid_xsave_leaf_t cpuid_xsave_leaf; /* Cache details: */ uint32_t cpuid_cache_linesize; @@ -294,7 +321,7 @@ typedef struct { cpuid_mwait_leaf_t *cpuid_mwait_leafp; cpuid_thermal_leaf_t *cpuid_thermal_leafp; cpuid_arch_perf_leaf_t *cpuid_arch_perf_leafp; - + cpuid_xsave_leaf_t *cpuid_xsave_leafp; } i386_cpu_info_t; #ifdef __cplusplus diff --git a/osfmk/i386/etimer.c b/osfmk/i386/etimer.c index aacc02ebc..72d3c94b7 100644 --- a/osfmk/i386/etimer.c +++ b/osfmk/i386/etimer.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -65,39 +65,51 @@ * XXX a better implementation would use a set of generic callouts and iterate over them */ void -etimer_intr( -__unused int inuser, -__unused uint64_t iaddr) +etimer_intr(int user_mode, + uint64_t rip) { uint64_t abstime; rtclock_timer_t *mytimer; cpu_data_t *pp; - x86_lcpu_t *lcpu; + int32_t latency; + uint64_t pmdeadline; pp = current_cpu_datap(); - lcpu = x86_lcpu(); - - mytimer = &pp->rtclock_timer; /* Point to the event timer */ - abstime = mach_absolute_time(); /* Get the time now */ - /* is it time for power management state change? */ - if (pmCPUGetDeadline(pp) <= abstime) { - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) | DBG_FUNC_START, 0, 0, 0, 0, 0); - pmCPUDeadline(pp); - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) | DBG_FUNC_END, 0, 0, 0, 0, 0); - - abstime = mach_absolute_time(); /* Get the time again since we ran a bit */ - } + abstime = mach_absolute_time(); /* Get the time now */ /* has a pending clock timer expired? */ - if (mytimer->deadline <= abstime) { /* Have we expired the deadline? */ - mytimer->has_expired = TRUE; /* Remember that we popped */ + mytimer = &pp->rtclock_timer; + if (mytimer->deadline <= abstime) { + /* + * Log interrupt service latency (-ve value expected by tool) + * a non-PM event is expected next. + */ + latency = (int32_t) (abstime - mytimer->deadline); + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_EXCP_DECI, 0) | DBG_FUNC_NONE, + -latency, + (uint32_t)rip, user_mode, 0, 0); + + mytimer->has_expired = TRUE; /* Remember that we popped */ mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime); mytimer->has_expired = FALSE; + + /* Get the time again since we ran for a bit */ + abstime = mach_absolute_time(); + } + + /* is it time for power management state change? */ + if ((pmdeadline = pmCPUGetDeadline(pp)) && (pmdeadline <= abstime)) { + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) | DBG_FUNC_START, + 0, 0, 0, 0, 0); + pmCPUDeadline(pp); + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) | DBG_FUNC_END, + 0, 0, 0, 0, 0); } - /* schedule our next deadline */ - lcpu->rtcPop = EndOfAllTime; /* any real deadline will be earlier */ etimer_resync_deadlines(); } @@ -110,11 +122,11 @@ void etimer_set_deadline(uint64_t deadline) spl_t s; cpu_data_t *pp; - s = splclock(); /* no interruptions */ + s = splclock(); /* no interruptions */ pp = current_cpu_datap(); - mytimer = &pp->rtclock_timer; /* Point to the timer itself */ - mytimer->deadline = deadline; /* Set the new expiration time */ + mytimer = &pp->rtclock_timer; /* Point to the timer itself */ + mytimer->deadline = deadline; /* Set the new expiration time */ etimer_resync_deadlines(); @@ -134,44 +146,37 @@ etimer_resync_deadlines(void) rtclock_timer_t *mytimer; spl_t s = splclock(); cpu_data_t *pp; - x86_lcpu_t *lcpu; + uint32_t decr; pp = current_cpu_datap(); - lcpu = x86_lcpu(); - deadline = ~0ULL; + deadline = EndOfAllTime; /* - * If we have a clock timer set sooner, pop on that. + * If we have a clock timer set, pick that. */ mytimer = &pp->rtclock_timer; - if (!mytimer->has_expired && mytimer->deadline > 0) + if (!mytimer->has_expired && + 0 < mytimer->deadline && mytimer->deadline < EndOfAllTime) deadline = mytimer->deadline; /* * If we have a power management deadline, see if that's earlier. */ pmdeadline = pmCPUGetDeadline(pp); - if (pmdeadline > 0 && pmdeadline < deadline) + if (0 < pmdeadline && pmdeadline < deadline) deadline = pmdeadline; /* * Go and set the "pop" event. */ - if (deadline > 0) { - int decr; - uint64_t now; - - now = mach_absolute_time(); - decr = setPop(deadline); - - if (deadline < now) - lcpu->rtcPop = now + decr; - else - lcpu->rtcPop = deadline; - - lcpu->rtcDeadline = lcpu->rtcPop; - - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) | DBG_FUNC_NONE, decr, 2, 0, 0, 0); + decr = (uint32_t) setPop(deadline); + + /* Record non-PM deadline for latency tool */ + if (deadline != pmdeadline) { + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) | DBG_FUNC_NONE, + decr, 2, + deadline, (uint32_t)(deadline >> 32), 0); } splx(s); } @@ -185,10 +190,8 @@ __unused void *arg) rtclock_timer_t *mytimer; uint64_t abstime; cpu_data_t *pp; - x86_lcpu_t *lcpu; pp = current_cpu_datap(); - lcpu = x86_lcpu(); mytimer = &pp->rtclock_timer; abstime = mach_absolute_time(); @@ -197,7 +200,6 @@ __unused void *arg) mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime); mytimer->has_expired = FALSE; - lcpu->rtcPop = EndOfAllTime; etimer_resync_deadlines(); } diff --git a/osfmk/i386/fpu.c b/osfmk/i386/fpu.c index 5c458843b..7b4be4ebe 100644 --- a/osfmk/i386/fpu.c +++ b/osfmk/i386/fpu.c @@ -70,6 +70,8 @@ #include #include +#include + #include #include #include @@ -91,59 +93,122 @@ extern void fp_save( extern void fp_load( thread_t thr_act); -static void configure_mxcsr_capability_mask(struct x86_fpsave_state *ifps); +static void configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps); -struct x86_fpsave_state starting_fp_state; +struct x86_avx_thread_state initial_fp_state __attribute((aligned(64))); /* Global MXCSR capability bitmask */ static unsigned int mxcsr_capability_mask; +#define fninit() \ + __asm__ volatile("fninit") + +#define fnstcw(control) \ + __asm__("fnstcw %0" : "=m" (*(unsigned short *)(control))) + +#define fldcw(control) \ + __asm__ volatile("fldcw %0" : : "m" (*(unsigned short *) &(control)) ) + +#define fnclex() \ + __asm__ volatile("fnclex") + +#define fnsave(state) \ + __asm__ volatile("fnsave %0" : "=m" (*state)) + +#define frstor(state) \ + __asm__ volatile("frstor %0" : : "m" (state)) + +#define fwait() \ + __asm__("fwait"); + +#define fxrstor(addr) __asm__ __volatile__("fxrstor %0" : : "m" (*(addr))) +#define fxsave(addr) __asm__ __volatile__("fxsave %0" : "=m" (*(addr))) + +static uint32_t fp_register_state_size = 0; +static uint32_t fpu_YMM_present = FALSE; +static uint32_t cpuid_reevaluated = 0; + +static void fpu_store_registers(void *, boolean_t); +static void fpu_load_registers(void *); + +extern void xsave64o(void); +extern void xrstor64o(void); + +#define XMASK ((uint32_t) (XFEM_X87 | XFEM_SSE | XFEM_YMM)) + +/* DRK: TODO replace opcodes with mnemonics when assembler support available */ + +static inline void xsetbv(uint32_t mask_hi, uint32_t mask_lo) { + __asm__ __volatile__(".short 0x010F\n\t.byte 0xD1" :: "a"(mask_lo), "d"(mask_hi), "c" (XCR0)); +} + +static inline void xsave(void *a) { + /* MOD 0x4, operand ECX 0x1 */ + __asm__ __volatile__(".short 0xAE0F\n\t.byte 0x21" :: "a"(XMASK), "d"(0), "c" (a)); +} + +static inline void xrstor(void *a) { + /* MOD 0x5, operand ECX 0x1 */ + __asm__ __volatile__(".short 0xAE0F\n\t.byte 0x29" :: "a"(XMASK), "d"(0), "c" (a)); +} + +static inline void xsave64(void *a) { + /* Out of line call that executes in 64-bit mode on K32 */ + __asm__ __volatile__("call _xsave64o" :: "a"(XMASK), "d"(0), "c" (a)); +} + +static inline void xrstor64(void *a) { + /* Out of line call that executes in 64-bit mode on K32 */ + __asm__ __volatile__("call _xrstor64o" :: "a"(XMASK), "d"(0), "c" (a)); +} + +static inline unsigned short +fnstsw(void) +{ + unsigned short status; + __asm__ volatile("fnstsw %0" : "=ma" (status)); + return(status); +} + /* + * Configure the initial FPU state presented to new threads. * Determine the MXCSR capability mask, which allows us to mask off any * potentially unsafe "reserved" bits before restoring the FPU context. * *Not* per-cpu, assumes symmetry. */ + static void -configure_mxcsr_capability_mask(struct x86_fpsave_state *ifps) +configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps) { - /* FXSAVE requires a 16 byte aligned store */ - assert(ALIGNED(ifps,16)); + /* XSAVE requires a 64 byte aligned store */ + assert(ALIGNED(fps, 64)); /* Clear, to prepare for the diagnostic FXSAVE */ - bzero(ifps, sizeof(*ifps)); - /* Disable FPU/SSE Device Not Available exceptions */ - clear_ts(); - __asm__ volatile("fxsave %0" : "=m" (ifps->fx_save_state)); - mxcsr_capability_mask = ifps->fx_save_state.fx_MXCSR_MASK; + bzero(fps, sizeof(*fps)); + + fpinit(); + fpu_store_registers(fps, FALSE); + + mxcsr_capability_mask = fps->fx_MXCSR_MASK; /* Set default mask value if necessary */ if (mxcsr_capability_mask == 0) mxcsr_capability_mask = 0xffbf; - /* Re-enable FPU/SSE DNA exceptions */ - set_ts(); -} - -/* - * Allocate and initialize FP state for current thread. - * Don't load state. - */ -static struct x86_fpsave_state * -fp_state_alloc(void) -{ - struct x86_fpsave_state *ifps; + /* Clear vector register store */ + bzero(&fps->fx_XMM_reg[0][0], sizeof(fps->fx_XMM_reg)); + bzero(&fps->x_YMMH_reg[0][0], sizeof(fps->x_YMMH_reg)); - ifps = (struct x86_fpsave_state *)zalloc(ifps_zone); - assert(ALIGNED(ifps,16)); - bzero((char *)ifps, sizeof *ifps); + fps->fp_valid = TRUE; + fps->fp_save_layout = fpu_YMM_present ? XSAVE32: FXSAVE32; + fpu_load_registers(fps); - return ifps; -} + /* Poison values to trap unsafe usage */ + fps->fp_valid = 0xFFFFFFFF; + fps->fp_save_layout = FP_UNUSED; -static inline void -fp_state_free(struct x86_fpsave_state *ifps) -{ - zfree(ifps_zone, ifps); + /* Re-enable FPU/SSE DNA exceptions */ + set_ts(); } @@ -154,81 +219,248 @@ fp_state_free(struct x86_fpsave_state *ifps) void init_fpu(void) { - unsigned short status, control; - +#if DEBUG + unsigned short status; + unsigned short control; +#endif /* * Check for FPU by initializing it, * then trying to read the correct bit patterns from * the control and status registers. */ set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | CR0_NE); /* allow use of FPU */ - fninit(); +#if DEBUG status = fnstsw(); fnstcw(&control); + + assert(((status & 0xff) == 0) && ((control & 0x103f) == 0x3f)); +#endif + /* Advertise SSE support */ + if (cpuid_features() & CPUID_FEATURE_FXSR) { + fp_kind = FP_FXSR; + set_cr4(get_cr4() | CR4_OSFXS); + /* And allow SIMD exceptions if present */ + if (cpuid_features() & CPUID_FEATURE_SSE) { + set_cr4(get_cr4() | CR4_OSXMM); + } + fp_register_state_size = sizeof(struct x86_fx_thread_state); - if ((status & 0xff) == 0 && - (control & 0x103f) == 0x3f) - { - /* Use FPU save/restore instructions if available */ - if (cpuid_features() & CPUID_FEATURE_FXSR) { - fp_kind = FP_FXSR; - set_cr4(get_cr4() | CR4_FXS); - /* And allow SIMD instructions if present */ - if (cpuid_features() & CPUID_FEATURE_SSE) { - set_cr4(get_cr4() | CR4_XMM); - } - } else - panic("fpu is not FP_FXSR"); + } else + panic("fpu is not FP_FXSR"); - /* - * initialze FPU to normal starting - * position so that we can take a snapshot - * of that state and store it for future use - * when we're asked for the FPU state of a - * thread, and it hasn't initiated any yet - */ - fpinit(); - fxsave(&starting_fp_state.fx_save_state); + /* Configure the XSAVE context mechanism if the processor supports + * AVX/YMM registers + */ + if (cpuid_features() & CPUID_FEATURE_XSAVE) { + cpuid_xsave_leaf_t *xsp = &cpuid_info()->cpuid_xsave_leaf; + if (xsp->extended_state[0] & (uint32_t)XFEM_YMM) { + assert(xsp->extended_state[0] & (uint32_t) XFEM_SSE); + /* XSAVE container size for all features */ + assert(xsp->extended_state[2] == sizeof(struct x86_avx_thread_state)); + fp_register_state_size = sizeof(struct x86_avx_thread_state); + fpu_YMM_present = TRUE; + set_cr4(get_cr4() | CR4_OSXSAVE); + xsetbv(0, XMASK); + /* Re-evaluate CPUID, once, to reflect OSXSAVE */ + if (OSCompareAndSwap(0, 1, &cpuid_reevaluated)) + cpuid_set_info(); + /* DRK: consider verifying AVX offset with cpuid(d, ECX:2) */ + } + } + else + fpu_YMM_present = FALSE; + + fpinit(); + + /* + * Trap wait instructions. Turn off FPU for now. + */ + set_cr0(get_cr0() | CR0_TS | CR0_MP); +} + +/* + * Allocate and initialize FP state for current thread. + * Don't load state. + */ +static void * +fp_state_alloc(void) +{ + void *ifps = zalloc(ifps_zone); - /* - * Trap wait instructions. Turn off FPU for now. - */ - set_cr0(get_cr0() | CR0_TS | CR0_MP); +#if DEBUG + if (!(ALIGNED(ifps,64))) { + panic("fp_state_alloc: %p, %u, %p, %u", ifps, (unsigned) ifps_zone->elem_size, (void *) ifps_zone->free_elements, (unsigned) ifps_zone->alloc_size); } +#endif + return ifps; +} + +static inline void +fp_state_free(void *ifps) +{ + zfree(ifps_zone, ifps); +} + +void clear_fpu(void) +{ + set_ts(); +} + + +static void fpu_load_registers(void *fstate) { + struct x86_fx_thread_state *ifps = fstate; + fp_save_layout_t layout = ifps->fp_save_layout; + + assert(layout == FXSAVE32 || layout == FXSAVE64 || layout == XSAVE32 || layout == XSAVE64); + assert(ALIGNED(ifps, 64)); + assert(ml_get_interrupts_enabled() == FALSE); + +#if DEBUG + if (layout == XSAVE32 || layout == XSAVE64) { + struct x86_avx_thread_state *iavx = fstate; + unsigned i; + /* Verify reserved bits in the XSAVE header*/ + if (iavx->_xh.xsbv & ~7) + panic("iavx->_xh.xsbv: 0x%llx", iavx->_xh.xsbv); + for (i = 0; i < sizeof(iavx->_xh.xhrsvd); i++) + if (iavx->_xh.xhrsvd[i]) + panic("Reserved bit set"); + } + if (fpu_YMM_present) { + if (layout != XSAVE32 && layout != XSAVE64) + panic("Inappropriate layout: %u\n", layout); + } +#endif /* DEBUG */ + +#if defined(__i386__) + if (layout == FXSAVE32) { + /* Restore the compatibility/legacy mode XMM+x87 state */ + fxrstor(ifps); + } + else if (layout == FXSAVE64) { + fxrstor64(ifps); + } + else if (layout == XSAVE32) { + xrstor(ifps); + } + else if (layout == XSAVE64) { + xrstor64(ifps); + } +#elif defined(__x86_64__) + if ((layout == XSAVE64) || (layout == XSAVE32)) + xrstor(ifps); else - { - /* - * NO FPU. - */ - panic("fpu is not FP_FXSR"); + fxrstor(ifps); +#endif +} + +static void fpu_store_registers(void *fstate, boolean_t is64) { + struct x86_fx_thread_state *ifps = fstate; + assert(ALIGNED(ifps, 64)); +#if defined(__i386__) + if (!is64) { + if (fpu_YMM_present) { + xsave(ifps); + ifps->fp_save_layout = XSAVE32; + } + else { + /* save the compatibility/legacy mode XMM+x87 state */ + fxsave(ifps); + ifps->fp_save_layout = FXSAVE32; + } + } + else { + if (fpu_YMM_present) { + xsave64(ifps); + ifps->fp_save_layout = XSAVE64; + } + else { + fxsave64(ifps); + ifps->fp_save_layout = FXSAVE64; + } + } +#elif defined(__x86_64__) + if (fpu_YMM_present) { + xsave(ifps); + ifps->fp_save_layout = is64 ? XSAVE64 : XSAVE32; } + else { + fxsave(ifps); + ifps->fp_save_layout = is64 ? FXSAVE64 : FXSAVE32; + } +#endif } /* * Initialize FP handling. */ + void fpu_module_init(void) { - struct x86_fpsave_state *new_ifps; - - ifps_zone = zinit(sizeof(struct x86_fpsave_state), - thread_max * sizeof(struct x86_fpsave_state), - THREAD_CHUNK * sizeof(struct x86_fpsave_state), + if ((fp_register_state_size != sizeof(struct x86_fx_thread_state)) && + (fp_register_state_size != sizeof(struct x86_avx_thread_state))) + panic("fpu_module_init: incorrect savearea size %u\n", fp_register_state_size); + + assert(fpu_YMM_present != 0xFFFFFFFF); + + /* We explicitly choose an allocation size of 64 + * to eliminate waste for the 832 byte sized + * AVX XSAVE register save area. + */ + ifps_zone = zinit(fp_register_state_size, + thread_max * fp_register_state_size, + 64 * fp_register_state_size, "x86 fpsave state"); - new_ifps = fp_state_alloc(); - /* Determine MXCSR reserved bits */ - configure_mxcsr_capability_mask(new_ifps); - fp_state_free(new_ifps); + +#if ZONE_DEBUG + /* To maintain the required alignment, disable + * zone debugging for this zone as that appends + * 16 bytes to each element. + */ + zone_debug_disable(ifps_zone); +#endif + /* Determine MXCSR reserved bits and configure initial FPU state*/ + configure_mxcsr_capability_mask(&initial_fp_state); +} + +/* + * Save thread`s FPU context. + */ +void +fpu_save_context(thread_t thread) +{ + struct x86_fx_thread_state *ifps; + + assert(ml_get_interrupts_enabled() == FALSE); + ifps = (thread)->machine.pcb->ifps; +#if DEBUG + if (ifps && ((ifps->fp_valid != FALSE) && (ifps->fp_valid != TRUE))) { + panic("ifps->fp_valid: %u\n", ifps->fp_valid); + } +#endif + if (ifps != 0 && (ifps->fp_valid == FALSE)) { + /* Clear CR0.TS in preparation for the FP context save. In + * theory, this shouldn't be necessary since a live FPU should + * indicate that TS is clear. However, various routines + * (such as sendsig & sigreturn) manipulate TS directly. + */ + clear_ts(); + /* registers are in FPU - save to memory */ + fpu_store_registers(ifps, (thread_is_64bit(thread) && is_saved_state64(thread->machine.pcb->iss))); + ifps->fp_valid = TRUE; + } + set_ts(); } + /* * Free a FPU save area. * Called only when thread terminating - no locking necessary. */ void -fpu_free(struct x86_fpsave_state *fps) +fpu_free(void *fps) { fp_state_free(fps); } @@ -244,14 +476,16 @@ fpu_free(struct x86_fpsave_state *fps) */ kern_return_t fpu_set_fxstate( - thread_t thr_act, - thread_state_t tstate) + thread_t thr_act, + thread_state_t tstate, + thread_flavor_t f) { - struct x86_fpsave_state *ifps; - struct x86_fpsave_state *new_ifps; + struct x86_fx_thread_state *ifps; + struct x86_fx_thread_state *new_ifps; x86_float_state64_t *state; pcb_t pcb; - + size_t state_size = (((f == x86_AVX_STATE32) || (f == x86_AVX_STATE64)) && (fpu_YMM_present == TRUE)) ? sizeof(struct x86_avx_thread_state) : sizeof(struct x86_fx_thread_state); + boolean_t old_valid; if (fp_kind == FP_NO) return KERN_FAILURE; @@ -291,28 +525,46 @@ fpu_set_fxstate( } ifps = new_ifps; new_ifps = 0; - pcb->ifps = ifps; + pcb->ifps = ifps; } /* * now copy over the new data. */ - bcopy((char *)&state->fpu_fcw, - (char *)&ifps->fx_save_state, sizeof(struct x86_fx_save)); + old_valid = ifps->fp_valid; - /* XXX The layout of the state set from user-space may need to be - * validated for consistency. - */ +#if DEBUG + if ((old_valid == FALSE) && (thr_act != current_thread())) { + panic("fpu_set_fxstate inconsistency, thread: %p not stopped", thr_act); + } +#endif + + bcopy((char *)&state->fpu_fcw, (char *)ifps, state_size); + + if (fpu_YMM_present) { + struct x86_avx_thread_state *iavx = (void *) ifps; + iavx->fp_save_layout = thread_is_64bit(thr_act) ? XSAVE64 : XSAVE32; + /* Sanitize XSAVE header */ + bzero(&iavx->_xh.xhrsvd[0], sizeof(iavx->_xh.xhrsvd)); + if (state_size == sizeof(struct x86_avx_thread_state)) + iavx->_xh.xsbv = (XFEM_YMM | XFEM_SSE | XFEM_X87); + else + iavx->_xh.xsbv = (XFEM_SSE | XFEM_X87); + } + else ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32; - /* Mark the thread's floating point status as non-live. */ - /* Temporarily disabled: radar 4647827 - * ifps->fp_valid = TRUE; - */ + ifps->fp_valid = old_valid; + if (old_valid == FALSE) { + boolean_t istate = ml_set_interrupts_enabled(FALSE); + ifps->fp_valid = TRUE; + set_ts(); + ml_set_interrupts_enabled(istate); + } /* * Clear any reserved bits in the MXCSR to prevent a GPF * when issuing an FXRSTOR. */ - ifps->fx_save_state.fx_MXCSR &= mxcsr_capability_mask; + ifps->fx_MXCSR &= mxcsr_capability_mask; simple_unlock(&pcb->lock); @@ -330,13 +582,15 @@ fpu_set_fxstate( */ kern_return_t fpu_get_fxstate( - thread_t thr_act, - thread_state_t tstate) + thread_t thr_act, + thread_state_t tstate, + thread_flavor_t f) { - struct x86_fpsave_state *ifps; + struct x86_fx_thread_state *ifps; x86_float_state64_t *state; kern_return_t ret = KERN_FAILURE; pcb_t pcb; + size_t state_size = (((f == x86_AVX_STATE32) || (f == x86_AVX_STATE64)) && (fpu_YMM_present == TRUE)) ? sizeof(struct x86_avx_thread_state) : sizeof(struct x86_fx_thread_state); if (fp_kind == FP_NO) return KERN_FAILURE; @@ -353,8 +607,9 @@ fpu_get_fxstate( /* * No valid floating-point state. */ - bcopy((char *)&starting_fp_state.fx_save_state, - (char *)&state->fpu_fcw, sizeof(struct x86_fx_save)); + + bcopy((char *)&initial_fp_state, (char *)&state->fpu_fcw, + state_size); simple_unlock(&pcb->lock); @@ -376,8 +631,7 @@ fpu_get_fxstate( (void)ml_set_interrupts_enabled(intr); } if (ifps->fp_valid) { - bcopy((char *)&ifps->fx_save_state, - (char *)&state->fpu_fcw, sizeof(struct x86_fx_save)); + bcopy((char *)ifps, (char *)&state->fpu_fcw, state_size); ret = KERN_SUCCESS; } simple_unlock(&pcb->lock); @@ -399,8 +653,8 @@ fpu_dup_fxstate( thread_t parent, thread_t child) { - struct x86_fpsave_state *new_ifps = NULL; - boolean_t intr; + struct x86_fx_thread_state *new_ifps = NULL; + boolean_t intr; pcb_t ppcb; ppcb = parent->machine.pcb; @@ -416,33 +670,35 @@ fpu_dup_fxstate( simple_lock(&ppcb->lock); if (ppcb->ifps != NULL) { + struct x86_fx_thread_state *ifps = ppcb->ifps; /* * Make sure we`ve got the latest fp state info */ intr = ml_set_interrupts_enabled(FALSE); - + assert(current_thread() == parent); clear_ts(); fp_save(parent); clear_fpu(); (void)ml_set_interrupts_enabled(intr); - if (ppcb->ifps->fp_valid) { - child->machine.pcb->ifps = new_ifps; + if (ifps->fp_valid) { + child->machine.pcb->ifps = new_ifps; + assert((fp_register_state_size == sizeof(struct x86_fx_thread_state)) || + (fp_register_state_size == sizeof(struct x86_avx_thread_state))); + bcopy((char *)(ppcb->ifps), + (char *)(child->machine.pcb->ifps), fp_register_state_size); - bcopy((char *)&(ppcb->ifps->fx_save_state), - (char *)&(child->machine.pcb->ifps->fx_save_state), sizeof(struct x86_fx_save)); - - new_ifps->fp_save_layout = ppcb->ifps->fp_save_layout; /* Mark the new fp saved state as non-live. */ /* Temporarily disabled: radar 4647827 * new_ifps->fp_valid = TRUE; */ + /* * Clear any reserved bits in the MXCSR to prevent a GPF * when issuing an FXRSTOR. */ - new_ifps->fx_save_state.fx_MXCSR &= mxcsr_capability_mask; + new_ifps->fx_MXCSR &= mxcsr_capability_mask; new_ifps = NULL; } } @@ -457,6 +713,7 @@ fpu_dup_fxstate( * Initialize FPU. * */ + void fpinit(void) { @@ -477,7 +734,7 @@ fpinit(void) fldcw(control); /* Initialize SSE/SSE2 */ - __builtin_ia32_ldmxcsr(0x1f80); + __builtin_ia32_ldmxcsr(0x1f80); } /* @@ -490,14 +747,24 @@ fpnoextflt(void) boolean_t intr; thread_t thr_act; pcb_t pcb; - struct x86_fpsave_state *ifps = 0; + struct x86_fx_thread_state *ifps = 0; thr_act = current_thread(); pcb = thr_act->machine.pcb; - if (pcb->ifps == 0 && !get_interrupt_level()) - ifps = fp_state_alloc(); + assert(fp_register_state_size != 0); + if (pcb->ifps == 0 && !get_interrupt_level()) { + ifps = fp_state_alloc(); + bcopy((char *)&initial_fp_state, (char *)ifps, + fp_register_state_size); + if (!thread_is_64bit(thr_act)) { + ifps->fp_save_layout = fpu_YMM_present ? XSAVE32 : FXSAVE32; + } + else + ifps->fp_save_layout = fpu_YMM_present ? XSAVE64 : FXSAVE64; + ifps->fp_valid = TRUE; + } intr = ml_set_interrupts_enabled(FALSE); clear_ts(); /* Enable FPU use */ @@ -535,7 +802,7 @@ fpextovrflt(void) { thread_t thr_act = current_thread(); pcb_t pcb; - struct x86_fpsave_state *ifps; + struct x86_fx_thread_state *ifps; boolean_t intr; intr = ml_set_interrupts_enabled(FALSE); @@ -586,7 +853,7 @@ void fpexterrflt(void) { thread_t thr_act = current_thread(); - struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps; + struct x86_fx_thread_state *ifps = thr_act->machine.pcb->ifps; boolean_t intr; intr = ml_set_interrupts_enabled(FALSE); @@ -610,7 +877,7 @@ fpexterrflt(void) */ i386_exception(EXC_ARITHMETIC, EXC_I386_EXTERR, - ifps->fx_save_state.fx_status); + ifps->fx_status); /*NOTREACHED*/ } @@ -630,27 +897,14 @@ fp_save( thread_t thr_act) { pcb_t pcb = thr_act->machine.pcb; - struct x86_fpsave_state *ifps = pcb->ifps; + struct x86_fx_thread_state *ifps = pcb->ifps; + assert(ifps != 0); if (ifps != 0 && !ifps->fp_valid) { assert((get_cr0() & CR0_TS) == 0); /* registers are in FPU */ ifps->fp_valid = TRUE; - -#if defined(__i386__) - if (!thread_is_64bit(thr_act)) { - /* save the compatibility/legacy mode XMM+x87 state */ - fxsave(&ifps->fx_save_state); - ifps->fp_save_layout = FXSAVE32; - } - else { - fxsave64(&ifps->fx_save_state); - ifps->fp_save_layout = FXSAVE64; - } -#elif defined(__x86_64__) - fxsave(&ifps->fx_save_state); - ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32; -#endif + fpu_store_registers(ifps, thread_is_64bit(thr_act)); } } @@ -665,50 +919,19 @@ fp_load( thread_t thr_act) { pcb_t pcb = thr_act->machine.pcb; - struct x86_fpsave_state *ifps; + struct x86_fx_thread_state *ifps = pcb->ifps; - ifps = pcb->ifps; - if (ifps == 0 || ifps->fp_valid == FALSE) { - if (ifps == 0) { - /* FIXME: This allocation mechanism should be revised - * for scenarios where interrupts are disabled. - */ - ifps = fp_state_alloc(); - pcb->ifps = ifps; - } + assert(ifps); + assert(ifps->fp_valid == FALSE || ifps->fp_valid == TRUE); + + if (ifps->fp_valid == FALSE) { fpinit(); } else { - assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64); -#if defined(__i386__) - if (ifps->fp_save_layout == FXSAVE32) { - /* Restore the compatibility/legacy mode XMM+x87 state */ - fxrstor(&ifps->fx_save_state); - } - else if (ifps->fp_save_layout == FXSAVE64) { - fxrstor64(&ifps->fx_save_state); - } -#elif defined(__x86_64__) - fxrstor(&ifps->fx_save_state); -#endif + fpu_load_registers(ifps); } ifps->fp_valid = FALSE; /* in FPU */ } - - -/* - * fpflush(thread_t) - * Flush the current act's state, if needed - * (used by thread_terminate_self to ensure fp faults - * aren't satisfied by overly general trap code in the - * context of the reaper thread) - */ -void -fpflush(__unused thread_t thr_act) -{ - /* not needed on MP x86s; fp not lazily evaluated */ -} - /* * SSE arithmetic exception handling code. * Basically the same as the x87 exception handler with a different subtype @@ -718,7 +941,7 @@ void fpSSEexterrflt(void) { thread_t thr_act = current_thread(); - struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps; + struct x86_fx_thread_state *ifps = thr_act->machine.pcb->ifps; boolean_t intr; intr = ml_set_interrupts_enabled(FALSE); @@ -742,20 +965,27 @@ fpSSEexterrflt(void) assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64); i386_exception(EXC_ARITHMETIC, EXC_I386_SSEEXTERR, - ifps->fx_save_state.fx_MXCSR); + ifps->fx_MXCSR); /*NOTREACHED*/ } - void fp_setvalid(boolean_t value) { thread_t thr_act = current_thread(); - struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps; + struct x86_fx_thread_state *ifps = thr_act->machine.pcb->ifps; if (ifps) { ifps->fp_valid = value; - if (value == TRUE) + if (value == TRUE) { + boolean_t istate = ml_set_interrupts_enabled(FALSE); clear_fpu(); + ml_set_interrupts_enabled(istate); + } } } + +boolean_t +ml_fpu_avx_enabled(void) { + return (fpu_YMM_present == TRUE); +} diff --git a/osfmk/i386/fpu.h b/osfmk/i386/fpu.h index 7b6f86a82..a606aab41 100644 --- a/osfmk/i386/fpu.h +++ b/osfmk/i386/fpu.h @@ -73,13 +73,15 @@ extern int fp_kind; extern void init_fpu(void); extern void fpu_module_init(void); extern void fpu_free( - struct x86_fpsave_state * fps); + void * fps); extern kern_return_t fpu_set_fxstate( thread_t thr_act, - thread_state_t state); + thread_state_t state, + thread_flavor_t f); extern kern_return_t fpu_get_fxstate( thread_t thr_act, - thread_state_t state); + thread_state_t state, + thread_flavor_t f); extern void fpu_dup_fxstate( thread_t parent, thread_t child); @@ -90,96 +92,11 @@ extern void fpSSEexterrflt(void); extern void fpflush(thread_t); extern void fp_setvalid(boolean_t); #ifdef __i386__ -extern void fxsave64(struct x86_fx_save *); -extern void fxrstor64(struct x86_fx_save *); +extern void fxsave64(struct x86_fx_thread_state *); +extern void fxrstor64(struct x86_fx_thread_state *); #endif -/* - * FPU instructions. - */ -#define fninit() \ - __asm__ volatile("fninit") - -#define fnstcw(control) \ - __asm__("fnstcw %0" : "=m" (*(unsigned short *)(control))) - -#define fldcw(control) \ - __asm__ volatile("fldcw %0" : : "m" (*(unsigned short *) &(control)) ) - -static inline unsigned short -fnstsw(void) -{ - unsigned short status; - __asm__ volatile("fnstsw %0" : "=ma" (status)); - return(status); -} - -#define fnclex() \ - __asm__ volatile("fnclex") - -#define fnsave(state) \ - __asm__ volatile("fnsave %0" : "=m" (*state)) - -#define frstor(state) \ - __asm__ volatile("frstor %0" : : "m" (state)) - -#define fwait() \ - __asm__("fwait"); - -#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) -#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) - -#define FXSAFE() (fp_kind == FP_FXSR) - - -static inline void clear_fpu(void) -{ - set_ts(); -} - - -/* - * Save thread`s FPU context. - */ - -static inline void fpu_save_context(thread_t thread) -{ - struct x86_fpsave_state *ifps; - - assert(ml_get_interrupts_enabled() == FALSE); - ifps = (thread)->machine.pcb->ifps; - if (ifps != 0 && !ifps->fp_valid) { - /* Clear CR0.TS in preparation for the FP context save. In - * theory, this shouldn't be necessary since a live FPU should - * indicate that TS is clear. However, various routines - * (such as sendsig & sigreturn) manipulate TS directly. - */ - clear_ts(); - /* registers are in FPU - save to memory */ - ifps->fp_valid = TRUE; - -#if defined(__i386__) - if (!thread_is_64bit(thread) || is_saved_state32(thread->machine.pcb->iss)) { - /* save the compatibility/legacy mode XMM+x87 state */ - fxsave(&ifps->fx_save_state); - ifps->fp_save_layout = FXSAVE32; - } - else { - /* Execute a brief jump to 64-bit mode to save the 64 - * bit state - */ - fxsave64(&ifps->fx_save_state); - ifps->fp_save_layout = FXSAVE64; - } -#elif defined(__x86_64__) - /* for a 64-bit long mode kernel, we can always use plain fxsave */ - fxsave(&ifps->fx_save_state); - ifps->fp_save_layout = thread_is_64bit(thread) ? FXSAVE64 - : FXSAVE32; - -#endif - } - set_ts(); -} +extern void clear_fpu(void); +extern void fpu_save_context(thread_t thread); #endif /* _I386_FPU_H_ */ diff --git a/osfmk/i386/genassym.c b/osfmk/i386/genassym.c index a254013dd..0f5edf0e5 100644 --- a/osfmk/i386/genassym.c +++ b/osfmk/i386/genassym.c @@ -263,7 +263,7 @@ main( DECLARE("DS64_DR6", offsetof(struct x86_debug_state64 *, dr6)); DECLARE("DS64_DR7", offsetof(struct x86_debug_state64 *, dr7)); - DECLARE("FP_VALID", offsetof(struct x86_fpsave_state *,fp_valid)); + DECLARE("FP_VALID", offsetof(struct x86_fx_thread_state *,fp_valid)); DECLARE("SS_FLAVOR", offsetof(x86_saved_state_t *, flavor)); DECLARE("SS_32", x86_SAVED_STATE32); @@ -431,6 +431,8 @@ main( offsetof(cpu_data_t *, cpu_hibernate)); DECLARE("CPU_INTERRUPT_LEVEL", offsetof(cpu_data_t *, cpu_interrupt_level)); + DECLARE("CPU_NESTED_ISTACK", + offsetof(cpu_data_t *, cpu_nested_istack)); DECLARE("CPU_SIMPLE_LOCK_COUNT", offsetof(cpu_data_t *,cpu_simple_lock_count)); DECLARE("CPU_NUMBER_GS", diff --git a/osfmk/i386/i386_init.c b/osfmk/i386/i386_init.c index 135f7a942..445c6afed 100644 --- a/osfmk/i386/i386_init.c +++ b/osfmk/i386/i386_init.c @@ -504,14 +504,14 @@ i386_init(vm_offset_t boot_args_start) if ( ! PE_parse_boot_argn("novmx", &noVMX, sizeof (noVMX))) noVMX = 0; /* OK to support Altivec in rosetta? */ - /* create the console for verbose or pretty mode */ - /* Note: doing this prior to tsc_init() allows for graceful panic! */ - PE_init_platform(TRUE, kernelBootArgs); - PE_create_console(); - tsc_init(); power_management_init(); + PE_init_platform(TRUE, kernelBootArgs); + + /* create the console for verbose or pretty mode */ + PE_create_console(); + processor_bootstrap(); thread_bootstrap(); diff --git a/osfmk/i386/i386_vm_init.c b/osfmk/i386/i386_vm_init.c index bddcb54c7..301d02274 100644 --- a/osfmk/i386/i386_vm_init.c +++ b/osfmk/i386/i386_vm_init.c @@ -106,6 +106,10 @@ ppnum_t lowest_lo = 0; ppnum_t lowest_hi = 0; ppnum_t highest_hi = 0; +uint32_t pmap_reserved_pages_allocated = 0; +uint32_t pmap_last_reserved_range = 0xFFFFFFFF; +uint32_t pmap_reserved_ranges = 0; + extern unsigned int bsd_mbuf_cluster_reserve(boolean_t *); pmap_paddr_t avail_start, avail_end; @@ -288,7 +292,6 @@ i386_vm_init(uint64_t maxmem, sane_size += region_bytes; break; - case kEfiReservedMemoryType: firmware_Reserved_bytes += region_bytes; break; @@ -339,10 +342,31 @@ i386_vm_init(uint64_t maxmem, pmptr->base = base; else pmptr->base = I386_LOWMEM_RESERVED; + + pmptr->end = top; + /* - * mark as already mapped + * A range may be marked with with the + * EFI_MEMORY_KERN_RESERVED attribute + * on some systems, to indicate that the range + * must not be made available to devices. + * Simplifying assumptions are made regarding + * the placement of the range. */ - pmptr->alloc = pmptr->end = top; + if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED) + pmap_reserved_ranges++; + + if ((mptr->Attribute & EFI_MEMORY_KERN_RESERVED) && + (top < I386_KERNEL_IMAGE_BASE_PAGE)) { + pmptr->alloc = pmptr->base; + pmap_last_reserved_range = pmap_memory_region_count; + } + else { + /* + * mark as already mapped + */ + pmptr->alloc = top; + } pmptr->type = pmap_type; } else if ( (base < fap) && (top > fap) ) { @@ -552,6 +576,38 @@ pmap_free_pages(void) return (unsigned int)avail_remaining; } +boolean_t pmap_next_page_reserved(ppnum_t *); + +/* + * Pick a page from a "kernel private" reserved range; works around + * errata on some hardware. + */ +boolean_t +pmap_next_page_reserved(ppnum_t *pn) { + if (pmap_reserved_ranges && pmap_last_reserved_range != 0xFFFFFFFF) { + uint32_t n; + pmap_memory_region_t *region; + for (n = 0; n <= pmap_last_reserved_range; n++) { + region = &pmap_memory_regions[n]; + if (region->alloc < region->end) { + *pn = region->alloc++; + avail_remaining--; + + if (*pn > max_ppnum) + max_ppnum = *pn; + + if (lowest_lo == 0 || *pn < lowest_lo) + lowest_lo = *pn; + + pmap_reserved_pages_allocated++; + return TRUE; + } + } + } + return FALSE; +} + + boolean_t pmap_next_page_hi( ppnum_t *pn) @@ -559,6 +615,9 @@ pmap_next_page_hi( pmap_memory_region_t *region; int n; + if (pmap_next_page_reserved(pn)) + return TRUE; + if (avail_remaining) { for (n = pmap_memory_region_count - 1; n >= 0; n--) { region = &pmap_memory_regions[n]; @@ -694,10 +753,15 @@ pmap_lowmem_finalize(void) * entry in the memory region table. However, the loop is retained * (with the intended termination criteria commented out) in the * hope that some day we can free all low-memory ranges. + * This loop assumes the first range does not span the kernel + * image base & avail_start. We skip this process on systems + * with "kernel reserved" ranges, as the low memory reclamation + * is handled in the initial memory map processing loop on + * such systems. */ for (i = 0; // pmap_memory_regions[i].end <= I386_KERNEL_IMAGE_BASE_PAGE; - i < 1; + i < 1 && (pmap_reserved_ranges == 0); i++) { vm_offset_t pbase = (vm_offset_t)i386_ptob(pmap_memory_regions[i].base); vm_offset_t pend = (vm_offset_t)i386_ptob(pmap_memory_regions[i].end); diff --git a/osfmk/i386/io_map.c b/osfmk/i386/io_map.c index 40892578b..1c141785a 100644 --- a/osfmk/i386/io_map.c +++ b/osfmk/i386/io_map.c @@ -69,7 +69,7 @@ extern vm_offset_t virtual_avail; * Mach VM is running. */ vm_offset_t -io_map(vm_offset_t phys_addr, vm_size_t size, unsigned int flags) +io_map(vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags) { vm_offset_t start; @@ -95,7 +95,7 @@ io_map(vm_offset_t phys_addr, vm_size_t size, unsigned int flags) /* just wrap this since io_map handles it */ -vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size, unsigned int flags) +vm_offset_t io_map_spec(vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags) { return (io_map(phys_addr, size, flags)); } diff --git a/osfmk/i386/io_map_entries.h b/osfmk/i386/io_map_entries.h index 0913ba060..3dc373b47 100644 --- a/osfmk/i386/io_map_entries.h +++ b/osfmk/i386/io_map_entries.h @@ -39,10 +39,10 @@ #ifdef __APPLE_API_PRIVATE __BEGIN_DECLS extern vm_offset_t io_map( - vm_offset_t phys_addr, + vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags); -extern vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size, unsigned int flags); +extern vm_offset_t io_map_spec(vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags); __END_DECLS #endif /* __APPLE_API_PRIVATE */ diff --git a/osfmk/i386/lapic.c b/osfmk/i386/lapic.c index 21e974bff..e98665f04 100644 --- a/osfmk/i386/lapic.c +++ b/osfmk/i386/lapic.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2009 Apple Inc. All rights reserved. + * Copyright (c) 2008-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -48,10 +48,10 @@ #include #include #include -#include #include #include #include +#include #if CONFIG_MCA #include #endif @@ -74,10 +74,17 @@ #define PAUSE #endif /* MP_DEBUG */ -/* Initialize lapic_id so cpu_number() works on non SMP systems */ -unsigned long lapic_id_initdata = 0; -unsigned long lapic_id = (unsigned long)&lapic_id_initdata; -vm_offset_t lapic_start; +/* Base vector for local APIC interrupt sources */ +int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE; + +lapic_ops_table_t *lapic_ops; /* Lapic operations switch */ + +#define MAX_LAPICIDS (LAPIC_ID_MAX+1) +int lapic_to_cpu[MAX_LAPICIDS]; +int cpu_to_lapic[MAX_CPUS]; + +static vm_offset_t lapic_pbase; /* Physical base memory-mapped regs */ +static vm_offset_t lapic_vbase; /* Virtual base memory-mapped regs */ static i386_intr_func_t lapic_intr_func[LAPIC_FUNC_TABLE_SIZE]; @@ -91,13 +98,6 @@ static unsigned lapic_master_error_count = 0; static unsigned lapic_error_count_threshold = 5; static boolean_t lapic_dont_panic = FALSE; -/* Base vector for local APIC interrupt sources */ -int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE; - -#define MAX_LAPICIDS (LAPIC_ID_MAX+1) -int lapic_to_cpu[MAX_LAPICIDS]; -int cpu_to_lapic[MAX_CPUS]; - static void lapic_cpu_map_init(void) { @@ -147,9 +147,8 @@ ml_get_cpuid(uint32_t lapic_index) } - #ifdef MP_DEBUG -static void +void lapic_cpu_map_dump(void) { int i; @@ -169,48 +168,105 @@ lapic_cpu_map_dump(void) } #endif /* MP_DEBUG */ -void -lapic_init(void) +static void +legacy_init(void) { int result; vm_map_entry_t entry; - uint32_t lo; - uint32_t hi; - boolean_t is_boot_processor; - boolean_t is_lapic_enabled; - vm_offset_t lapic_base; - - /* Examine the local APIC state */ - rdmsr(MSR_IA32_APIC_BASE, lo, hi); - is_boot_processor = (lo & MSR_IA32_APIC_BASE_BSP) != 0; - is_lapic_enabled = (lo & MSR_IA32_APIC_BASE_ENABLE) != 0; - lapic_base = (lo & MSR_IA32_APIC_BASE_BASE); - kprintf("MSR_IA32_APIC_BASE %p %s %s\n", (void *) lapic_base, - is_lapic_enabled ? "enabled" : "disabled", - is_boot_processor ? "BSP" : "AP"); - if (!is_boot_processor || !is_lapic_enabled) - panic("Unexpected local APIC state\n"); /* Establish a map to the local apic */ - lapic_start = (vm_offset_t)vm_map_min(kernel_map); + lapic_vbase = (vm_offset_t)vm_map_min(kernel_map); result = vm_map_find_space(kernel_map, - (vm_map_address_t *) &lapic_start, + (vm_map_address_t *) &lapic_vbase, round_page(LAPIC_SIZE), 0, VM_MAKE_TAG(VM_MEMORY_IOKIT), &entry); if (result != KERN_SUCCESS) { - panic("smp_init: vm_map_find_entry FAILED (err=%d)", result); + panic("legacy_init: vm_map_find_entry FAILED (err=%d)", result); } vm_map_unlock(kernel_map); /* Map in the local APIC non-cacheable, as recommended by Intel * in section 8.4.1 of the "System Programming Guide". */ pmap_enter(pmap_kernel(), - lapic_start, - (ppnum_t) i386_btop(lapic_base), + lapic_vbase, + (ppnum_t) i386_btop(lapic_pbase), VM_PROT_READ|VM_PROT_WRITE, VM_WIMG_IO, TRUE); - lapic_id = (unsigned long)(lapic_start + LAPIC_ID); +} + + +static uint32_t +legacy_read(lapic_register_t reg) +{ + return *LAPIC_MMIO(reg); +} + +static void +legacy_write(lapic_register_t reg, uint32_t value) +{ + *LAPIC_MMIO(reg) = value; +} + +static lapic_ops_table_t legacy_ops = { + legacy_init, + legacy_read, + legacy_write +}; + +static void +x2apic_init(void) +{ +} + +static uint32_t +x2apic_read(lapic_register_t reg) +{ + uint32_t lo; + uint32_t hi; + + rdmsr(LAPIC_MSR(reg), lo, hi); + return lo; +} + +static void +x2apic_write(lapic_register_t reg, uint32_t value) +{ + wrmsr(LAPIC_MSR(reg), value, 0); +} + +static lapic_ops_table_t x2apic_ops = { + x2apic_init, + x2apic_read, + x2apic_write +}; + + +void +lapic_init(void) +{ + uint32_t lo; + uint32_t hi; + boolean_t is_boot_processor; + boolean_t is_lapic_enabled; + boolean_t is_x2apic; + + /* Examine the local APIC state */ + rdmsr(MSR_IA32_APIC_BASE, lo, hi); + is_boot_processor = (lo & MSR_IA32_APIC_BASE_BSP) != 0; + is_lapic_enabled = (lo & MSR_IA32_APIC_BASE_ENABLE) != 0; + is_x2apic = (lo & MSR_IA32_APIC_BASE_EXTENDED) != 0; + lapic_pbase = (lo & MSR_IA32_APIC_BASE_BASE); + kprintf("MSR_IA32_APIC_BASE %p %s %s mode %s\n", (void *) lapic_pbase, + is_lapic_enabled ? "enabled" : "disabled", + is_x2apic ? "extended" : "legacy", + is_boot_processor ? "BSP" : "AP"); + if (!is_boot_processor || !is_lapic_enabled) + panic("Unexpected local APIC state\n"); + + lapic_ops = is_x2apic ? &x2apic_ops : &legacy_ops; + + lapic_ops->init(); if ((LAPIC_READ(VERSION)&LAPIC_VERSION_MASK) < 0x14) { panic("Local APIC version 0x%x, 0x14 or more expected\n", @@ -249,6 +305,13 @@ static const char *DM_str[8] = { "Invalid", "ExtINT"}; +static const char *TMR_str[] = { + "OneShot", + "Periodic", + "TSC-Deadline", + "Illegal" +}; + void lapic_dump(void) { @@ -270,7 +333,7 @@ lapic_dump(void) kprintf("LAPIC %d at %p version 0x%x\n", (LAPIC_READ(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK, - (void *) lapic_start, + (void *) lapic_vbase, LAPIC_READ(VERSION)&LAPIC_VERSION_MASK); kprintf("Priorities: Task 0x%x Arbitration 0x%x Processor 0x%x\n", LAPIC_READ(TPR)&LAPIC_TPR_MASK, @@ -295,7 +358,8 @@ lapic_dump(void) VEC(LVT_TIMER), DS(LVT_TIMER), MASK(LVT_TIMER), - (LAPIC_READ(LVT_TIMER)&LAPIC_LVT_PERIODIC)?"Periodic":"OneShot"); + TMR_str[(LAPIC_READ(LVT_TIMER) >> LAPIC_LVT_TMR_SHIFT) + & LAPIC_LVT_TMR_MASK]); kprintf(" Initial Count: 0x%08x \n", LAPIC_READ(TIMER_INITIAL_COUNT)); kprintf(" Current Count: 0x%08x \n", LAPIC_READ(TIMER_CURRENT_COUNT)); kprintf(" Divide Config: 0x%08x \n", LAPIC_READ(TIMER_DIVIDE_CONFIG)); @@ -334,15 +398,15 @@ lapic_dump(void) kprintf("\n"); kprintf("TMR: 0x"); for(i=7; i>=0; i--) - kprintf("%08x",LAPIC_READ_OFFSET(TMR_BASE, i*0x10)); + kprintf("%08x",LAPIC_READ_OFFSET(TMR_BASE, i)); kprintf("\n"); kprintf("IRR: 0x"); for(i=7; i>=0; i--) - kprintf("%08x",LAPIC_READ_OFFSET(IRR_BASE, i*0x10)); + kprintf("%08x",LAPIC_READ_OFFSET(IRR_BASE, i)); kprintf("\n"); kprintf("ISR: 0x"); for(i=7; i >= 0; i--) - kprintf("%08x",LAPIC_READ_OFFSET(ISR_BASE, i*0x10)); + kprintf("%08x",LAPIC_READ_OFFSET(ISR_BASE, i)); kprintf("\n"); } @@ -501,10 +565,9 @@ lapic_set_timer( lapic_timer_divide_t divisor, lapic_timer_count_t initial_count) { - boolean_t state; uint32_t timer_vector; - state = ml_set_interrupts_enabled(FALSE); + mp_disable_preemption(); timer_vector = LAPIC_READ(LVT_TIMER); timer_vector &= ~(LAPIC_LVT_MASKED|LAPIC_LVT_PERIODIC);; timer_vector |= interrupt_unmasked ? 0 : LAPIC_LVT_MASKED; @@ -512,7 +575,73 @@ lapic_set_timer( LAPIC_WRITE(LVT_TIMER, timer_vector); LAPIC_WRITE(TIMER_DIVIDE_CONFIG, divisor); LAPIC_WRITE(TIMER_INITIAL_COUNT, initial_count); - ml_set_interrupts_enabled(state); + mp_enable_preemption(); +} + +void +lapic_config_timer( + boolean_t interrupt_unmasked, + lapic_timer_mode_t mode, + lapic_timer_divide_t divisor) +{ + uint32_t timer_vector; + + mp_disable_preemption(); + timer_vector = LAPIC_READ(LVT_TIMER); + timer_vector &= ~(LAPIC_LVT_MASKED | + LAPIC_LVT_PERIODIC | + LAPIC_LVT_TSC_DEADLINE); + timer_vector |= interrupt_unmasked ? 0 : LAPIC_LVT_MASKED; + timer_vector |= (mode == periodic) ? LAPIC_LVT_PERIODIC : 0; + LAPIC_WRITE(LVT_TIMER, timer_vector); + LAPIC_WRITE(TIMER_DIVIDE_CONFIG, divisor); + mp_enable_preemption(); +} + +/* + * Configure TSC-deadline timer mode. The lapic interrupt is always unmasked. + */ +void +lapic_config_tsc_deadline_timer(void) +{ + uint32_t timer_vector; + + DBG("lapic_config_tsc_deadline_timer()\n"); + mp_disable_preemption(); + timer_vector = LAPIC_READ(LVT_TIMER); + timer_vector &= ~(LAPIC_LVT_MASKED | + LAPIC_LVT_PERIODIC); + timer_vector |= LAPIC_LVT_TSC_DEADLINE; + LAPIC_WRITE(LVT_TIMER, timer_vector); + + /* Serialize writes per Intel OSWG */ + do { + lapic_set_tsc_deadline_timer(rdtsc64() + (1ULL<<32)); + } while (lapic_get_tsc_deadline_timer() == 0); + lapic_set_tsc_deadline_timer(0); + + mp_enable_preemption(); + DBG("lapic_config_tsc_deadline_timer() done\n"); +} + +void +lapic_set_timer_fast( + lapic_timer_count_t initial_count) +{ + LAPIC_WRITE(LVT_TIMER, LAPIC_READ(LVT_TIMER) & ~LAPIC_LVT_MASKED); + LAPIC_WRITE(TIMER_INITIAL_COUNT, initial_count); +} + +void +lapic_set_tsc_deadline_timer(uint64_t deadline) +{ + wrmsr64(MSR_IA32_TSC_DEADLINE, deadline); +} + +uint64_t +lapic_get_tsc_deadline_timer(void) +{ + return rdmsr64(MSR_IA32_TSC_DEADLINE); } void @@ -522,9 +651,7 @@ lapic_get_timer( lapic_timer_count_t *initial_count, lapic_timer_count_t *current_count) { - boolean_t state; - - state = ml_set_interrupts_enabled(FALSE); + mp_disable_preemption(); if (mode) *mode = (LAPIC_READ(LVT_TIMER) & LAPIC_LVT_PERIODIC) ? periodic : one_shot; @@ -534,7 +661,7 @@ lapic_get_timer( *initial_count = LAPIC_READ(TIMER_INITIAL_COUNT); if (current_count) *current_count = LAPIC_READ(TIMER_CURRENT_COUNT); - ml_set_interrupts_enabled(state); + mp_enable_preemption(); } static inline void @@ -553,6 +680,11 @@ void lapic_unmask_perfcnt_interrupt(void) { LAPIC_WRITE(LVT_PERFCNT, LAPIC_VECTOR(PERFCNT)); } +void lapic_set_perfcnt_interrupt_mask(boolean_t mask) { + uint32_t m = (mask ? LAPIC_LVT_MASKED : 0); + LAPIC_WRITE(LVT_PERFCNT, LAPIC_VECTOR(PERFCNT) | m); +} + void lapic_set_intr_func(int vector, i386_intr_func_t func) { @@ -575,6 +707,10 @@ lapic_set_intr_func(int vector, i386_intr_func_t func) } } +void lapic_set_pmi_func(i386_intr_func_t func) { + lapic_set_intr_func(LAPIC_VECTOR(PERFCNT), func); +} + int lapic_interrupt(int interrupt_num, x86_saved_state_t *state) { @@ -586,7 +722,6 @@ lapic_interrupt(int interrupt_num, x86_saved_state_t *state) if (interrupt_num == (LAPIC_NMI_INTERRUPT - lapic_interrupt_base) && lapic_intr_func[LAPIC_NMI_INTERRUPT] != NULL) { retval = (*lapic_intr_func[LAPIC_NMI_INTERRUPT])(state); - _lapic_end_of_interrupt(); return retval; } else @@ -744,3 +879,88 @@ lapic_send_ipi(int cpu, int vector) (void) ml_set_interrupts_enabled(state); } + +/* + * The following interfaces are privately exported to AICPM. + */ + +boolean_t +lapic_is_interrupt_pending(void) +{ + int i; + + for (i = 0; i < 8; i += 1) { + if ((LAPIC_READ_OFFSET(IRR_BASE, i) != 0) || + (LAPIC_READ_OFFSET(ISR_BASE, i) != 0)) + return (TRUE); + } + + return (FALSE); +} + +boolean_t +lapic_is_interrupting(uint8_t vector) +{ + int i; + int bit; + uint32_t irr; + uint32_t isr; + + i = vector / 32; + bit = 1 << (vector % 32); + + irr = LAPIC_READ_OFFSET(IRR_BASE, i); + isr = LAPIC_READ_OFFSET(ISR_BASE, i); + + if ((irr | isr) & bit) + return (TRUE); + + return (FALSE); +} + +void +lapic_interrupt_counts(uint64_t intrs[256]) +{ + int i; + int j; + int bit; + uint32_t irr; + uint32_t isr; + + if (intrs == NULL) + return; + + for (i = 0; i < 8; i += 1) { + irr = LAPIC_READ_OFFSET(IRR_BASE, i); + isr = LAPIC_READ_OFFSET(ISR_BASE, i); + + if ((isr | irr) == 0) + continue; + + for (j = (i == 0) ? 16 : 0; j < 32; j += 1) { + bit = (32 * i) + j; + if ((isr | irr) & (1 << j)) + intrs[bit] += 1; + } + } +} + +void +lapic_disable_timer(void) +{ + uint32_t lvt_timer; + + /* + * If we're in deadline timer mode, + * simply clear the deadline timer, otherwise + * mask the timer interrupt and clear the countdown. + */ + lvt_timer = LAPIC_READ(LVT_TIMER); + if (lvt_timer & LAPIC_LVT_TSC_DEADLINE) { + wrmsr64(MSR_IA32_TSC_DEADLINE, 0); + } else { + LAPIC_WRITE(LVT_TIMER, lvt_timer | LAPIC_LVT_MASKED); + LAPIC_WRITE(TIMER_INITIAL_COUNT, 0); + lvt_timer = LAPIC_READ(LVT_TIMER); + } +} diff --git a/osfmk/i386/lapic.h b/osfmk/i386/lapic.h index e8387bb21..655864230 100644 --- a/osfmk/i386/lapic.h +++ b/osfmk/i386/lapic.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Apple Inc. All rights reserved. + * Copyright (c) 2008-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -32,6 +32,10 @@ #ifndef _I386_LAPIC_H_ #define _I386_LAPIC_H_ +/* + * Legacy mode definitions. + * The register offsets are no longer used by XNU - see LAPIC_MMIO_OFFSET(). + */ #define LAPIC_START 0xFEE00000 #define LAPIC_SIZE 0x00000400 @@ -106,6 +110,9 @@ #define LAPIC_LVT_TM_LEVEL 0x08000 #define LAPIC_LVT_MASKED 0x10000 #define LAPIC_LVT_PERIODIC 0x20000 +#define LAPIC_LVT_TSC_DEADLINE 0x40000 +#define LAPIC_LVT_TMR_SHIFT 17 +#define LAPIC_LVT_TMR_MASK 3 #define LAPIC_TIMER_INITIAL_COUNT 0x00000380 #define LAPIC_TIMER_CURRENT_COUNT 0x00000390 #define LAPIC_TIMER_DIVIDE_CONFIG 0x000003E0 @@ -125,18 +132,58 @@ #define CPU_NUMBER(r) \ movl %gs:CPU_NUMBER_GS,r -#define CPU_NUMBER_FROM_LAPIC(r) \ - movl EXT(lapic_id),r; \ - movl 0(r),r; \ - shrl $(LAPIC_ID_SHIFT),r; \ - andl $(LAPIC_ID_MASK),r; \ - movl EXT(lapic_to_cpu)(,r,4),r - #ifndef ASSEMBLER -#include -#include -#include -#include +typedef enum { + ID = 0x02, + VERSION = 0x03, + TPR = 0x08, + APR = 0x09, + PPR = 0x0A, + EOI = 0x0B, + REMOTE_READ = 0x0C, + LDR = 0x0D, + DFR = 0x0E, + SVR = 0x0F, + ISR_BASE = 0x10, + TMR_BASE = 0x18, + IRR_BASE = 0x20, + ERROR_STATUS = 0x28, + LVT_CMCI = 0x2F, + ICR = 0x30, + ICRD = 0x31, + LVT_TIMER = 0x32, + LVT_THERMAL = 0x33, + LVT_PERFCNT = 0x34, + LVT_LINT0 = 0x35, + LVT_LINT1 = 0x36, + LVT_ERROR = 0x37, + TIMER_INITIAL_COUNT = 0x38, + TIMER_CURRENT_COUNT = 0x39, + TIMER_DIVIDE_CONFIG = 0x3E, +} lapic_register_t; + +#define LAPIC_MMIO_PBASE 0xFEE00000 /* Default physical MMIO addr */ +#define LAPIC_MMIO_VBASE lapic_vbase /* Actual virtual mapped addr */ +#define LAPIC_MSR_BASE 0x800 + +#define LAPIC_MMIO_OFFSET(reg) (reg << 4) +#define LAPIC_MSR_OFFSET(reg) (reg) + +#define LAPIC_MMIO(reg) ((volatile uint32_t *) \ + (LAPIC_MMIO_VBASE + LAPIC_MMIO_OFFSET(reg))) +#define LAPIC_MSR(reg) (LAPIC_MSR_BASE + LAPIC_MSR_OFFSET(reg)) + +typedef struct { + void (*init) (void); + uint32_t (*read) (lapic_register_t); + void (*write)(lapic_register_t, uint32_t); +} lapic_ops_table_t; +extern lapic_ops_table_t *lapic_ops; + +#define LAPIC_WRITE(reg,val) lapic_ops->write(reg, val) +#define LAPIC_READ(reg) lapic_ops->read(reg) +#define LAPIC_READ_OFFSET(reg,off) LAPIC_READ((reg)+(off)) + typedef enum { periodic, one_shot @@ -186,22 +233,13 @@ typedef uint32_t lapic_timer_count_t; #define LAPIC_NMI_INTERRUPT 0x2 #define LAPIC_FUNC_TABLE_SIZE (LAPIC_PERFCNT_INTERRUPT + 1) -#define LAPIC_WRITE(reg,val) \ - *((volatile uint32_t *)(lapic_start + LAPIC_##reg)) = (val) -#define LAPIC_READ(reg) \ - (*((volatile uint32_t *)(lapic_start + LAPIC_##reg))) -#define LAPIC_READ_OFFSET(reg,off) \ - (*((volatile uint32_t *)(lapic_start + LAPIC_##reg + (off)))) - #define LAPIC_VECTOR(src) \ (lapic_interrupt_base + LAPIC_##src##_INTERRUPT) #define LAPIC_ISR_IS_SET(base,src) \ - (LAPIC_READ_OFFSET(ISR_BASE,((base+LAPIC_##src##_INTERRUPT)/32)*0x10) \ + (LAPIC_READ_OFFSET(ISR_BASE,(base+LAPIC_##src##_INTERRUPT)/32) \ & (1 <<((base + LAPIC_##src##_INTERRUPT)%32))) -extern vm_offset_t lapic_start; - extern void lapic_init(void); extern void lapic_configure(void); extern void lapic_shutdown(void); @@ -212,6 +250,7 @@ extern int lapic_interrupt( int interrupt, x86_saved_state_t *state); extern void lapic_end_of_interrupt(void); extern void lapic_unmask_perfcnt_interrupt(void); +extern void lapic_set_perfcnt_interrupt_mask(boolean_t); extern void lapic_send_ipi(int cpu, int interupt); extern int lapic_to_cpu[]; @@ -221,6 +260,14 @@ extern void lapic_cpu_map(int lapic, int cpu_num); extern uint32_t ml_get_apicid(uint32_t cpu); extern uint32_t ml_get_cpuid(uint32_t lapic_index); +extern void lapic_config_timer( + boolean_t interrupt, + lapic_timer_mode_t mode, + lapic_timer_divide_t divisor); + +extern void lapic_set_timer_fast( + lapic_timer_count_t initial_count); + extern void lapic_set_timer( boolean_t interrupt, lapic_timer_mode_t mode, @@ -233,17 +280,20 @@ extern void lapic_get_timer( lapic_timer_count_t *initial_count, lapic_timer_count_t *current_count); +extern void lapic_config_tsc_deadline_timer(void); +extern void lapic_set_tsc_deadline_timer(uint64_t deadline); +extern uint64_t lapic_get_tsc_deadline_timer(void); + typedef int (*i386_intr_func_t)(x86_saved_state_t *state); extern void lapic_set_intr_func(int intr, i386_intr_func_t func); +extern void lapic_set_pmi_func(i386_intr_func_t); + static inline void lapic_set_timer_func(i386_intr_func_t func) { lapic_set_intr_func(LAPIC_VECTOR(TIMER), func); } -static inline void lapic_set_pmi_func(i386_intr_func_t func) -{ - lapic_set_intr_func(LAPIC_VECTOR(PERFCNT), func); -} + static inline void lapic_set_thermal_func(i386_intr_func_t func) { lapic_set_intr_func(LAPIC_VECTOR(THERMAL), func); @@ -257,7 +307,13 @@ static inline void lapic_set_pm_func(i386_intr_func_t func) lapic_set_intr_func(LAPIC_VECTOR(PM), func); } +extern boolean_t lapic_is_interrupt_pending(void); +extern boolean_t lapic_is_interrupting(uint8_t vector); +extern void lapic_interrupt_counts(uint64_t intrs[256]); +extern void lapic_disable_timer(void); + #ifdef MP_DEBUG +extern void lapic_cpu_map_dump(void); #define LAPIC_CPU_MAP_DUMP() lapic_cpu_map_dump() #define LAPIC_DUMP() lapic_dump() #else diff --git a/osfmk/i386/locks_i386.c b/osfmk/i386/locks_i386.c index 3d3e5a09f..8c715d086 100644 --- a/osfmk/i386/locks_i386.c +++ b/osfmk/i386/locks_i386.c @@ -77,15 +77,15 @@ #include #include -#include /* mp_recent_debugger_activity() */ #if MACH_KDB #include #include #include #include #endif /* MACH_KDB */ - +#include /* machine_timeout_suspended() */ #include +#include #include @@ -319,6 +319,29 @@ usimple_lock_init( #endif } +volatile uint32_t spinlock_owner_cpu = ~0; +volatile usimple_lock_t spinlock_timed_out; + +static uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) { + uint64_t deadline; + uint32_t i; + + for (i = 0; i < real_ncpus; i++) { + if ((uintptr_t)cpu_data_ptr[i]->cpu_active_thread == thread_addr) { + spinlock_owner_cpu = i; + if ((uint32_t)cpu_number() == i) + break; + cpu_datap(i)->cpu_NMI_acknowledged = FALSE; + cpu_NMI_interrupt(i); + deadline = mach_absolute_time() + (LockTimeOut * 2); + while (mach_absolute_time() < deadline && cpu_datap(i)->cpu_NMI_acknowledged == FALSE) + cpu_pause(); + break; + } + } + + return spinlock_owner_cpu; +} /* * Acquire a usimple_lock. @@ -336,14 +359,20 @@ usimple_lock( OBTAIN_PC(pc); USLDBG(usld_lock_pre(l, pc)); - - if(!hw_lock_to(&l->interlock, LockTimeOutTSC)) {/* Try to get the lock - * with a timeout */ +/* Try to get the lock with a timeout */ + if(!hw_lock_to(&l->interlock, LockTimeOutTSC)) { boolean_t uslock_acquired = FALSE; - while (mp_recent_debugger_activity() && - !(uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC))); - if (uslock_acquired == FALSE) - panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p", l, (uintptr_t)l->interlock.lock_data, current_thread()); + while (machine_timeout_suspended()) { + enable_preemption(); + if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC))) + break; + } + if (uslock_acquired == FALSE) { + uint32_t lock_cpu; + spinlock_timed_out = l; + lock_cpu = spinlock_timeout_NMI((uintptr_t)l->interlock.lock_data); + panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x", l, (uintptr_t)l->interlock.lock_data, current_thread(), lock_cpu); + } } USLDBG(usld_lock_post(l, pc)); #else diff --git a/osfmk/i386/locore.s b/osfmk/i386/locore.s index b58b7ece7..65f7006c6 100644 --- a/osfmk/i386/locore.s +++ b/osfmk/i386/locore.s @@ -800,13 +800,14 @@ Entry(lo_allintrs) int_from_intstack: incl %gs:CPU_PREEMPTION_LEVEL incl %gs:CPU_INTERRUPT_LEVEL + incl %gs:CPU_NESTED_ISTACK movl %esp, %edx /* x86_saved_state */ CCALL1(interrupt, %edx) decl %gs:CPU_INTERRUPT_LEVEL decl %gs:CPU_PREEMPTION_LEVEL - + decl %gs:CPU_NESTED_ISTACK jmp ret_to_kernel /* diff --git a/osfmk/i386/machine_routines.c b/osfmk/i386/machine_routines.c index dc50a1ed1..4525c8a31 100644 --- a/osfmk/i386/machine_routines.c +++ b/osfmk/i386/machine_routines.c @@ -46,6 +46,8 @@ #include #include #include +#include + #if MACH_KDB #include #include @@ -432,7 +434,7 @@ ml_cpu_get_info(ml_cpu_info_t *cpu_infop) * Are we supporting MMX/SSE/SSE2/SSE3? * As distinct from whether the cpu has these capabilities. */ - os_supports_sse = !!(get_cr4() & CR4_XMM); + os_supports_sse = !!(get_cr4() & CR4_OSXMM); if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse) cpu_infop->vector_unit = 8; else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse) @@ -541,7 +543,8 @@ ml_init_lock_timeout(void) } MutexSpin = (unsigned int)abstime; - nanoseconds_to_absolutetime(2 * NSEC_PER_SEC, &LastDebuggerEntryAllowance); + nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC, &LastDebuggerEntryAllowance); + interrupt_latency_tracker_setup(); } /* @@ -646,6 +649,10 @@ vm_offset_t ml_stack_remaining(void) } } +boolean_t machine_timeout_suspended(void) { + return (mp_recent_debugger_activity() || panic_active() || pmap_tlb_flush_timeout || spinlock_timed_out); +} + #if MACH_KDB /* diff --git a/osfmk/i386/machine_routines.h b/osfmk/i386/machine_routines.h index 24c9aeca3..e222fb18d 100644 --- a/osfmk/i386/machine_routines.h +++ b/osfmk/i386/machine_routines.h @@ -128,10 +128,15 @@ vm_offset_t ml_io_map( void ml_get_bouncepool_info( vm_offset_t *phys_addr, vm_size_t *size); - - +/* Indicates if spinlock, IPI and other timeouts should be suspended */ +boolean_t machine_timeout_suspended(void); #endif /* PEXPERT_KERNEL_PRIVATE || MACH_KERNEL_PRIVATE */ + +void interrupt_latency_tracker_setup(void); +void interrupt_reset_latency_stats(void); +void interrupt_populate_latency_stats(char *, unsigned); +boolean_t ml_fpu_avx_enabled(void); #endif /* XNU_KERNEL_PRIVATE */ #ifdef KERNEL_PRIVATE @@ -299,4 +304,6 @@ void ml_get_csw_threads(thread_t * /*old*/, thread_t * /*new*/); __END_DECLS + + #endif /* _I386_MACHINE_ROUTINES_H_ */ diff --git a/osfmk/i386/misc_protos.h b/osfmk/i386/misc_protos.h index 3e54df7b5..71e707c07 100644 --- a/osfmk/i386/misc_protos.h +++ b/osfmk/i386/misc_protos.h @@ -128,7 +128,7 @@ extern void rtc_nanotime_init_commpage(void); extern void rtc_sleep_wakeup(uint64_t base); -extern void rtc_lapic_start_ticking(void); +extern void rtc_timer_start(void); extern void rtc_clock_stepping( uint32_t new_frequency, diff --git a/osfmk/i386/mp.c b/osfmk/i386/mp.c index 4dd1e625d..021f0638f 100644 --- a/osfmk/i386/mp.c +++ b/osfmk/i386/mp.c @@ -163,6 +163,7 @@ static volatile long mp_bc_count; decl_lck_mtx_data(static, mp_bc_lock); lck_mtx_ext_t mp_bc_lock_ext; static volatile int debugger_cpu = -1; +volatile long NMIPI_acks = 0; static void mp_cpus_call_action(void); static void mp_call_PM(void); @@ -461,7 +462,12 @@ cpu_signal_handler(x86_saved_state_t *regs) mp_disable_preemption(); my_cpu = cpu_number(); - my_word = ¤t_cpu_datap()->cpu_signals; + my_word = &cpu_data_ptr[my_cpu]->cpu_signals; + /* Store the initial set of signals for diagnostics. New + * signals could arrive while these are being processed + * so it's no more than a hint. + */ + cpu_data_ptr[my_cpu]->cpu_prior_signals = *my_word; do { #if MACH_KDB && MACH_ASSERT @@ -533,7 +539,8 @@ static int NMIInterruptHandler(x86_saved_state_t *regs) { void *stackptr; - + + atomic_incl(&NMIPI_acks, 1); sync_iss_to_iks_unconditionally(regs); #if defined (__i386__) __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr)); @@ -544,16 +551,22 @@ NMIInterruptHandler(x86_saved_state_t *regs) if (cpu_number() == debugger_cpu) goto NMExit; - if (pmap_tlb_flush_timeout == TRUE && current_cpu_datap()->cpu_tlb_invalid) { + if (spinlock_timed_out) { + char pstr[160]; + snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", cpu_number(), spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu); + panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs); + + } else if (pmap_tlb_flush_timeout == TRUE) { char pstr[128]; - snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor\n", cpu_number()); - panic_i386_backtrace(stackptr, 16, &pstr[0], TRUE, regs); + snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor, TLB state:%d\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid); + panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs); } #if MACH_KDP if (pmsafe_debug && !kdp_snapshot) pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); - mp_kdp_wait(FALSE, pmap_tlb_flush_timeout); + current_cpu_datap()->cpu_NMI_acknowledged = TRUE; + mp_kdp_wait(FALSE, pmap_tlb_flush_timeout || spinlock_timed_out || panic_active()); if (pmsafe_debug && !kdp_snapshot) pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL); #endif @@ -899,7 +912,7 @@ handle_pending_TLB_flushes(void) { volatile int *my_word = ¤t_cpu_datap()->cpu_signals; - if (i_bit(MP_TLB_FLUSH, my_word)) { + if (i_bit(MP_TLB_FLUSH, my_word) && (pmap_tlb_flush_timeout == FALSE)) { DBGLOG(cpu_handle, cpu_number(), MP_TLB_FLUSH); i_bit_clear(MP_TLB_FLUSH, my_word); pmap_update_interrupt(); @@ -1155,8 +1168,11 @@ mp_kdp_enter(void) * stopping others. */ mp_kdp_state = ml_set_interrupts_enabled(FALSE); + my_cpu = cpu_number(); + cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time(); + simple_lock(&mp_kdp_lock); - debugger_entry_time = mach_absolute_time(); + if (pmsafe_debug && !kdp_snapshot) pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); @@ -1170,8 +1186,10 @@ mp_kdp_enter(void) } my_cpu = cpu_number(); debugger_cpu = my_cpu; + ncpus = 1; mp_kdp_ncpus = 1; /* self */ mp_kdp_trap = TRUE; + debugger_entry_time = cpu_datap(my_cpu)->debugger_entry_time; simple_unlock(&mp_kdp_lock); /* @@ -1179,7 +1197,7 @@ mp_kdp_enter(void) */ DBG("mp_kdp_enter() signaling other processors\n"); if (force_immediate_debugger_NMI == FALSE) { - for (ncpus = 1, cpu = 0; cpu < real_ncpus; cpu++) { + for (cpu = 0; cpu < real_ncpus; cpu++) { if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) continue; ncpus++; @@ -1227,7 +1245,7 @@ mp_kdp_enter(void) cpu_NMI_interrupt(cpu); } - DBG("mp_kdp_enter() %u processors done %s\n", + DBG("mp_kdp_enter() %lu processors done %s\n", mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out"); postcode(MP_KDP_ENTER); @@ -1343,8 +1361,9 @@ mp_kdp_exit(void) boolean_t mp_recent_debugger_activity() { - return (((mach_absolute_time() - debugger_entry_time) < LastDebuggerEntryAllowance) || - ((mach_absolute_time() - debugger_exit_time) < LastDebuggerEntryAllowance)); + uint64_t abstime = mach_absolute_time(); + return (((abstime - debugger_entry_time) < LastDebuggerEntryAllowance) || + ((abstime - debugger_exit_time) < LastDebuggerEntryAllowance)); } /*ARGSUSED*/ diff --git a/osfmk/i386/mp.h b/osfmk/i386/mp.h index 694f7c179..8a2abbd0a 100644 --- a/osfmk/i386/mp.h +++ b/osfmk/i386/mp.h @@ -105,6 +105,9 @@ extern int kdb_active[]; extern volatile boolean_t mp_kdp_trap; extern volatile boolean_t force_immediate_debugger_NMI; extern volatile boolean_t pmap_tlb_flush_timeout; +extern volatile usimple_lock_t spinlock_timed_out; +extern volatile uint32_t spinlock_owner_cpu; + extern uint64_t LastDebuggerEntryAllowance; extern void mp_kdp_enter(void); diff --git a/osfmk/i386/pcb.c b/osfmk/i386/pcb.c index fcf202e6b..421cc3f53 100644 --- a/osfmk/i386/pcb.c +++ b/osfmk/i386/pcb.c @@ -886,7 +886,7 @@ machine_thread_state_initialize( * And if we're target, re-arm the no-fpu trap. */ if (thread->machine.pcb->ifps) { - (void) fpu_set_fxstate(thread, NULL); + (void) fpu_set_fxstate(thread, NULL, x86_FLOAT_STATE64); if (thread == current_thread()) clear_fpu(); @@ -1355,7 +1355,7 @@ machine_thread_set_state( if (thread_is_64bit(thr_act)) return(KERN_INVALID_ARGUMENT); - return fpu_set_fxstate(thr_act, tstate); + return fpu_set_fxstate(thr_act, tstate, flavor); } case x86_FLOAT_STATE64: @@ -1366,7 +1366,7 @@ machine_thread_set_state( if ( !thread_is_64bit(thr_act)) return(KERN_INVALID_ARGUMENT); - return fpu_set_fxstate(thr_act, tstate); + return fpu_set_fxstate(thr_act, tstate, flavor); } case x86_FLOAT_STATE: @@ -1379,15 +1379,37 @@ machine_thread_set_state( state = (x86_float_state_t *)tstate; if (state->fsh.flavor == x86_FLOAT_STATE64 && state->fsh.count == x86_FLOAT_STATE64_COUNT && thread_is_64bit(thr_act)) { - return fpu_set_fxstate(thr_act, (thread_state_t)&state->ufs.fs64); + return fpu_set_fxstate(thr_act, (thread_state_t)&state->ufs.fs64, x86_FLOAT_STATE64); } if (state->fsh.flavor == x86_FLOAT_STATE32 && state->fsh.count == x86_FLOAT_STATE32_COUNT && !thread_is_64bit(thr_act)) { - return fpu_set_fxstate(thr_act, (thread_state_t)&state->ufs.fs32); + return fpu_set_fxstate(thr_act, (thread_state_t)&state->ufs.fs32, x86_FLOAT_STATE32); } return(KERN_INVALID_ARGUMENT); } + case x86_AVX_STATE32: + { + if (count != x86_AVX_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + return fpu_set_fxstate(thr_act, tstate, flavor); + } + + case x86_AVX_STATE64: + { + if (count != x86_AVX_STATE64_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (!thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + return fpu_set_fxstate(thr_act, tstate, flavor); + } + case x86_THREAD_STATE32: { if (count != x86_THREAD_STATE32_COUNT) @@ -1596,7 +1618,7 @@ machine_thread_get_state( *count = x86_FLOAT_STATE32_COUNT; - return fpu_get_fxstate(thr_act, tstate); + return fpu_get_fxstate(thr_act, tstate, flavor); } case x86_FLOAT_STATE64: @@ -1609,7 +1631,7 @@ machine_thread_get_state( *count = x86_FLOAT_STATE64_COUNT; - return fpu_get_fxstate(thr_act, tstate); + return fpu_get_fxstate(thr_act, tstate, flavor); } case x86_FLOAT_STATE: @@ -1630,18 +1652,44 @@ machine_thread_get_state( state->fsh.flavor = x86_FLOAT_STATE64; state->fsh.count = x86_FLOAT_STATE64_COUNT; - kret = fpu_get_fxstate(thr_act, (thread_state_t)&state->ufs.fs64); + kret = fpu_get_fxstate(thr_act, (thread_state_t)&state->ufs.fs64, x86_FLOAT_STATE64); } else { state->fsh.flavor = x86_FLOAT_STATE32; state->fsh.count = x86_FLOAT_STATE32_COUNT; - kret = fpu_get_fxstate(thr_act, (thread_state_t)&state->ufs.fs32); + kret = fpu_get_fxstate(thr_act, (thread_state_t)&state->ufs.fs32, x86_FLOAT_STATE32); } *count = x86_FLOAT_STATE_COUNT; return(kret); } + case x86_AVX_STATE32: + { + if (*count != x86_AVX_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + *count = x86_AVX_STATE32_COUNT; + + return fpu_get_fxstate(thr_act, tstate, flavor); + } + + case x86_AVX_STATE64: + { + if (*count != x86_AVX_STATE64_COUNT) + return(KERN_INVALID_ARGUMENT); + + if ( !thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + *count = x86_AVX_STATE64_COUNT; + + return fpu_get_fxstate(thr_act, tstate, flavor); + } + case x86_THREAD_STATE32: { if (*count < x86_THREAD_STATE32_COUNT) @@ -2500,7 +2548,6 @@ act_thread_csave(void) val = x86_FLOAT_STATE64_COUNT; kret = machine_thread_get_state(thr_act, x86_FLOAT_STATE64, (thread_state_t) &ic64->fs, &val); - if (kret != KERN_SUCCESS) { kfree(ic64, sizeof(struct x86_act_context64)); return((void *)0); @@ -2583,13 +2630,8 @@ act_thread_catt(void *ctx) kret = machine_thread_set_state(thr_act, x86_SAVED_STATE32, (thread_state_t) &ic32->ss, x86_SAVED_STATE32_COUNT); if (kret == KERN_SUCCESS) { - kret = machine_thread_set_state(thr_act, x86_FLOAT_STATE32, + (void) machine_thread_set_state(thr_act, x86_FLOAT_STATE32, (thread_state_t) &ic32->fs, x86_FLOAT_STATE32_COUNT); - if (kret == KERN_SUCCESS && thr_act->machine.pcb->ids) - machine_thread_set_state(thr_act, - x86_DEBUG_STATE32, - (thread_state_t)&ic32->ds, - x86_DEBUG_STATE32_COUNT); } kfree(ic32, sizeof(struct x86_act_context32)); } diff --git a/osfmk/i386/pmCPU.c b/osfmk/i386/pmCPU.c index 91e3799bb..c469d7a1c 100644 --- a/osfmk/i386/pmCPU.c +++ b/osfmk/i386/pmCPU.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -56,9 +57,10 @@ decl_simple_lock_data(,pm_init_lock); /* * The following is set when the KEXT loads and initializes. */ -pmDispatch_t *pmDispatch = NULL; +pmDispatch_t *pmDispatch = NULL; static uint32_t pmInitDone = 0; +static boolean_t earlyTopology = FALSE; /* @@ -192,6 +194,9 @@ pmMarkAllCPUsOff(void) static void pmInitComplete(void) { + if (earlyTopology && pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL) + (*pmDispatch->pmCPUStateInit)(); + pmInitDone = 1; } @@ -268,9 +273,9 @@ pmLockCPUTopology(int lock) uint64_t pmCPUGetDeadline(cpu_data_t *cpu) { - uint64_t deadline = EndOfAllTime; + uint64_t deadline = 0; - if (pmInitDone + if (pmInitDone && pmDispatch != NULL && pmDispatch->GetDeadline != NULL) deadline = (*pmDispatch->GetDeadline)(&cpu->lcpu); @@ -357,6 +362,8 @@ pmCPUStateInit(void) { if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL) (*pmDispatch->pmCPUStateInit)(); + else + earlyTopology = TRUE; } /* @@ -596,6 +603,40 @@ machine_choose_processor(processor_set_t pset, return(preferred); } +static int +pmThreadGetUrgency(__unused uint64_t *rt_period, __unused uint64_t *rt_deadline) +{ + + return(0); +} + +void +thread_tell_urgency(int urgency, + uint64_t rt_period, + uint64_t rt_deadline) +{ + KERNEL_DEBUG_CONSTANT(0x1400054, + urgency, rt_period, (rt_deadline >> 32), rt_deadline, 0); + + if (!pmInitDone + || pmDispatch == NULL + || pmDispatch->pmThreadTellUrgency == NULL) + return; + + pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline); +} + +void +active_rt_threads(boolean_t active) +{ + if (!pmInitDone + || pmDispatch == NULL + || pmDispatch->pmActiveRTThreads == NULL) + return; + + pmDispatch->pmActiveRTThreads(active); +} + static uint32_t pmGetSavedRunCount(void) { @@ -645,10 +686,26 @@ pmSendIPI(int cpu) lapic_send_ipi(cpu, LAPIC_PM_INTERRUPT); } -static rtc_nanotime_t * -pmGetNanotimeInfo(void) +static void +pmGetNanotimeInfo(pm_rtc_nanotime_t *rtc_nanotime) +{ + /* + * Make sure that nanotime didn't change while we were reading it. + */ + do { + rtc_nanotime->generation = rtc_nanotime_info.generation; /* must be first */ + rtc_nanotime->tsc_base = rtc_nanotime_info.tsc_base; + rtc_nanotime->ns_base = rtc_nanotime_info.ns_base; + rtc_nanotime->scale = rtc_nanotime_info.scale; + rtc_nanotime->shift = rtc_nanotime_info.shift; + } while(rtc_nanotime_info.generation != 0 + && rtc_nanotime->generation != rtc_nanotime_info.generation); +} + +static uint32_t +pmTimerQueueMigrate(__unused int target_cpu) { - return(&rtc_nanotime_info); + return (0); } /* @@ -681,10 +738,17 @@ pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs, callbacks->GetSavedRunCount = pmGetSavedRunCount; callbacks->pmSendIPI = pmSendIPI; callbacks->GetNanotimeInfo = pmGetNanotimeInfo; - callbacks->RTCClockAdjust = rtc_clock_adjust; + callbacks->ThreadGetUrgency = pmThreadGetUrgency; + callbacks->RTCClockAdjust = rtc_clock_adjust; + callbacks->timerQueueMigrate = pmTimerQueueMigrate; callbacks->topoParms = &topoParms; + callbacks->InterruptPending = lapic_is_interrupt_pending; + callbacks->IsInterrupting = lapic_is_interrupting; + callbacks->InterruptStats = lapic_interrupt_counts; + callbacks->DisableApicTimer = lapic_disable_timer; } else { - panic("Version mis-match between Kernel and CPU PM"); + panic("Version mis-match between Kernel (%d) and CPU PM (%d)", + PM_DISPATCH_VERSION, version); } if (cpuFuncs != NULL) { diff --git a/osfmk/i386/pmCPU.h b/osfmk/i386/pmCPU.h index c6e36a616..55041fc10 100644 --- a/osfmk/i386/pmCPU.h +++ b/osfmk/i386/pmCPU.h @@ -38,7 +38,7 @@ * This value should be changed each time that pmDsipatch_t or pmCallBacks_t * changes. */ -#define PM_DISPATCH_VERSION 21 +#define PM_DISPATCH_VERSION 23 /* * Dispatch table for functions that get installed when the power @@ -77,11 +77,25 @@ typedef struct boolean_t (*pmIsCPUUnAvailable)(x86_lcpu_t *lcpu); int (*pmChooseCPU)(int startCPU, int endCPU, int preferredCPU); int (*pmIPIHandler)(void *state); + void (*pmThreadTellUrgency)(int urgency, uint64_t rt_period, uint64_t rt_deadline); + void (*pmActiveRTThreads)(boolean_t active); } pmDispatch_t; +/* + * common time fields exported to PM code. This structure may be + * allocated on the stack, so avoid making it unnecessarily large. + */ +typedef struct pm_rtc_nanotime { + uint64_t tsc_base; /* timestamp */ + uint64_t ns_base; /* nanoseconds */ + uint32_t scale; /* tsc -> nanosec multiplier */ + uint32_t shift; /* tsc -> nanosec shift/div */ + uint32_t generation; /* 0 == being updated */ +} pm_rtc_nanotime_t; + typedef struct { - int (*setRTCPop)(uint64_t time); + uint64_t (*setRTCPop)(uint64_t time); void (*resyncDeadlines)(int cpu); void (*initComplete)(void); x86_lcpu_t *(*GetLCPU)(int cpu); @@ -99,9 +113,16 @@ typedef struct { processor_t (*ThreadBind)(processor_t proc); uint32_t (*GetSavedRunCount)(void); void (*pmSendIPI)(int cpu); - rtc_nanotime_t *(*GetNanotimeInfo)(void); + void (*GetNanotimeInfo)(pm_rtc_nanotime_t *); + int (*ThreadGetUrgency)(uint64_t *rt_period, uint64_t *rt_deadline); + uint32_t (*timeQueueMigrate)(int cpu); void (*RTCClockAdjust)(uint64_t adjustment); + uint32_t (*timerQueueMigrate)(int cpu); x86_topology_parameters_t *topoParms; + boolean_t (*InterruptPending)(void); + boolean_t (*IsInterrupting)(uint8_t vector); + void (*InterruptStats)(uint64_t intrs[256]); + void (*DisableApicTimer)(void); } pmCallBacks_t; extern pmDispatch_t *pmDispatch; @@ -123,6 +144,8 @@ void pmTimerSave(void); void pmTimerRestore(void); kern_return_t pmCPUExitHalt(int cpu); kern_return_t pmCPUExitHaltToOff(int cpu); +void thread_tell_urgency(int urgency, uint64_t rt_period, uint64_t rt_deadline); +void active_rt_threads(boolean_t active); #define PM_HALT_NORMAL 0 /* normal halt path */ #define PM_HALT_DEBUG 1 /* debug code wants to halt */ diff --git a/osfmk/i386/pmap.c b/osfmk/i386/pmap.c index fc2147d4b..3d12ba9f2 100644 --- a/osfmk/i386/pmap.c +++ b/osfmk/i386/pmap.c @@ -273,10 +273,6 @@ static vm_object_t kptobj; char *pmap_phys_attributes; unsigned int last_managed_page = 0; -extern ppnum_t lowest_lo; -extern ppnum_t lowest_hi; -extern ppnum_t highest_hi; - /* * Amount of virtual memory mapped by one * page-directory entry. @@ -392,6 +388,8 @@ extern char end; static int nkpt; +extern long NMIPI_acks; + pt_entry_t *DMAP1, *DMAP2; caddr_t DADDR1; caddr_t DADDR2; @@ -2988,7 +2986,7 @@ pmap_cpuset_NMIPI(cpu_set cpu_mask) { if (cpu_mask & cpu_bit) cpu_NMI_interrupt(cpu); } - deadline = mach_absolute_time() + (LockTimeOut); + deadline = mach_absolute_time() + (LockTimeOut * 2); while (mach_absolute_time() < deadline) cpu_pause(); } @@ -3057,18 +3055,7 @@ pmap_flush_tlbs(pmap_t pmap) * Wait for those other cpus to acknowledge */ while (cpus_to_respond != 0) { - if (mach_absolute_time() > deadline) { - if (mp_recent_debugger_activity()) - continue; - if (!panic_active()) { - pmap_tlb_flush_timeout = TRUE; - pmap_cpuset_NMIPI(cpus_to_respond); - } - panic("pmap_flush_tlbs() timeout: " - "cpu(s) failing to respond to interrupts, pmap=%p cpus_to_respond=0x%lx", - pmap, cpus_to_respond); - } - + long orig_acks = 0; for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { if ((cpus_to_respond & cpu_bit) != 0) { if (!cpu_datap(cpu)->cpu_running || @@ -3081,6 +3068,17 @@ pmap_flush_tlbs(pmap_t pmap) if (cpus_to_respond == 0) break; } + if (mach_absolute_time() > deadline) { + if (machine_timeout_suspended()) + continue; + pmap_tlb_flush_timeout = TRUE; + orig_acks = NMIPI_acks; + pmap_cpuset_NMIPI(cpus_to_respond); + + panic("TLB invalidation IPI timeout: " + "CPU(s) failed to respond to interrupts, unresponsive CPU bitmap: 0x%lx, NMIPI acks: orig: 0x%lx, now: 0x%lx", + cpus_to_respond, orig_acks, NMIPI_acks); + } } } /* diff --git a/osfmk/i386/pmap.h b/osfmk/i386/pmap.h index 0acf265d2..5d3ac764e 100644 --- a/osfmk/i386/pmap.h +++ b/osfmk/i386/pmap.h @@ -658,7 +658,7 @@ extern int pmap_list_resident_pages( struct pmap *pmap, vm_offset_t *listp, int space); - +extern void x86_filter_TLB_coherency_interrupts(boolean_t); #ifdef __i386__ extern void pmap_commpage32_init( vm_offset_t kernel, diff --git a/osfmk/i386/pmap_internal.h b/osfmk/i386/pmap_internal.h index eef4f7c4d..63bebc3ab 100644 --- a/osfmk/i386/pmap_internal.h +++ b/osfmk/i386/pmap_internal.h @@ -355,6 +355,10 @@ extern uint64_t pde_mapped_size; extern char *pmap_phys_attributes; extern unsigned int last_managed_page; +extern ppnum_t lowest_lo; +extern ppnum_t lowest_hi; +extern ppnum_t highest_hi; + /* * when spinning through pmap_remove * ensure that we don't spend too much diff --git a/osfmk/i386/pmap_x86_common.c b/osfmk/i386/pmap_x86_common.c index 63ec071f4..a8c3423b4 100644 --- a/osfmk/i386/pmap_x86_common.c +++ b/osfmk/i386/pmap_x86_common.c @@ -668,6 +668,11 @@ Retry: if (pmap->stats.resident_count > pmap->stats.resident_max) { pmap->stats.resident_max = pmap->stats.resident_count; } + } else if (last_managed_page == 0) { + /* Account for early mappings created before "managed pages" + * are determined. Consider consulting the available DRAM map. + */ + OSAddAtomic(+1, &pmap->stats.resident_count); } /* * Step 3) Enter the mapping. @@ -1329,3 +1334,16 @@ pmap_clear_noencrypt(ppnum_t pn) } } +void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) { + assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); + + if (dofilter) { + CPU_CR3_MARK_INACTIVE(); + } else { + CPU_CR3_MARK_ACTIVE(); + __asm__ volatile("mfence"); + if (current_cpu_datap()->cpu_tlb_invalid) + process_pmap_updates(); + } +} + diff --git a/osfmk/i386/proc_reg.h b/osfmk/i386/proc_reg.h index a22ccd03b..54fca68e3 100644 --- a/osfmk/i386/proc_reg.h +++ b/osfmk/i386/proc_reg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -145,18 +145,31 @@ /* * CR4 */ -#define CR4_VMXE 0x00002000 /* Enable VMX operation */ -#define CR4_FXS 0x00000200 /* SSE/SSE2 OS supports FXSave */ -#define CR4_XMM 0x00000400 /* SSE/SSE2 instructions supported in OS */ -#define CR4_PGE 0x00000080 /* p6: Page Global Enable */ -#define CR4_MCE 0x00000040 /* p5: Machine Check Exceptions */ -#define CR4_PAE 0x00000020 /* p5: Physical Address Extensions */ -#define CR4_PSE 0x00000010 /* p5: Page Size Extensions */ -#define CR4_DE 0x00000008 /* p5: Debugging Extensions */ -#define CR4_TSD 0x00000004 /* p5: Time Stamp Disable */ -#define CR4_PVI 0x00000002 /* p5: Protected-mode Virtual Interrupts */ -#define CR4_VME 0x00000001 /* p5: Virtual-8086 Mode Extensions */ +#define CR4_OSXSAVE 0x00040000 /* OS supports XSAVE */ +#define CR4_SMXE 0x00004000 /* Enable SMX operation */ +#define CR4_VMXE 0x00002000 /* Enable VMX operation */ +#define CR4_OSXMM 0x00000400 /* SSE/SSE2 exceptions supported in OS */ +#define CR4_OSFXS 0x00000200 /* SSE/SSE2 OS supports FXSave */ +#define CR4_PCE 0x00000100 /* Performance-Monitor Count Enable */ +#define CR4_PGE 0x00000080 /* Page Global Enable */ +#define CR4_MCE 0x00000040 /* Machine Check Exceptions */ +#define CR4_PAE 0x00000020 /* Physical Address Extensions */ +#define CR4_PSE 0x00000010 /* Page Size Extensions */ +#define CR4_DE 0x00000008 /* Debugging Extensions */ +#define CR4_TSD 0x00000004 /* Time Stamp Disable */ +#define CR4_PVI 0x00000002 /* Protected-mode Virtual Interrupts */ +#define CR4_VME 0x00000001 /* Virtual-8086 Mode Extensions */ +/* + * XCR0 - XFEATURE_ENABLED_MASK (a.k.a. XFEM) register + */ +#define XCR0_YMM 0x0000000000000004ULL /* YMM state available */ +#define XFEM_YMM XCR0_YMM +#define XCR0_SSE 0x0000000000000002ULL /* SSE supported by XSAVE/XRESTORE */ +#define XCR0_X87 0x0000000000000001ULL /* x87, FPU/MMX (always set) */ +#define XFEM_SSE XCR0_SSE +#define XFEM_X87 XCR0_X87 +#define XCR0 (0) #ifndef ASSEMBLER #include @@ -392,108 +405,124 @@ __END_DECLS #endif /* ASSEMBLER */ -#define MSR_IA32_P5_MC_ADDR 0 -#define MSR_IA32_P5_MC_TYPE 1 -#define MSR_IA32_PLATFORM_ID 0x17 -#define MSR_IA32_EBL_CR_POWERON 0x2a - -#define MSR_IA32_APIC_BASE 0x1b -#define MSR_IA32_APIC_BASE_BSP (1<<8) -#define MSR_IA32_APIC_BASE_ENABLE (1<<11) -#define MSR_IA32_APIC_BASE_BASE (0xfffff<<12) - -#define MSR_IA32_FEATURE_CONTROL 0x3a -#define MSR_IA32_FEATCTL_LOCK (1<<0) -#define MSR_IA32_FEATCTL_VMXON_SMX (1<<1) -#define MSR_IA32_FEATCTL_VMXON (1<<2) -#define MSR_IA32_FEATCTL_CSTATE_SMI (1<<16) - -#define MSR_IA32_UCODE_WRITE 0x79 -#define MSR_IA32_UCODE_REV 0x8b - -#define MSR_IA32_PERFCTR0 0xc1 -#define MSR_IA32_PERFCTR1 0xc2 - -#define MSR_PMG_CST_CONFIG_CONTROL 0xe2 - -#define MSR_IA32_BBL_CR_CTL 0x119 - -#define MSR_IA32_SYSENTER_CS 0x174 -#define MSR_IA32_SYSENTER_ESP 0x175 -#define MSR_IA32_SYSENTER_EIP 0x176 - -#define MSR_IA32_MCG_CAP 0x179 -#define MSR_IA32_MCG_STATUS 0x17a -#define MSR_IA32_MCG_CTL 0x17b - -#define MSR_IA32_EVNTSEL0 0x186 -#define MSR_IA32_EVNTSEL1 0x187 - -#define MSR_IA32_PERF_STS 0x198 -#define MSR_IA32_PERF_CTL 0x199 - -#define MSR_IA32_MISC_ENABLE 0x1a0 - -#define MSR_IA32_DEBUGCTLMSR 0x1d9 -#define MSR_IA32_LASTBRANCHFROMIP 0x1db -#define MSR_IA32_LASTBRANCHTOIP 0x1dc -#define MSR_IA32_LASTINTFROMIP 0x1dd -#define MSR_IA32_LASTINTTOIP 0x1de - -#define MSR_IA32_CR_PAT 0x277 - -#define MSR_IA32_MC0_CTL 0x400 -#define MSR_IA32_MC0_STATUS 0x401 -#define MSR_IA32_MC0_ADDR 0x402 -#define MSR_IA32_MC0_MISC 0x403 - -#define MSR_IA32_MTRRCAP 0xfe -#define MSR_IA32_MTRR_DEF_TYPE 0x2ff -#define MSR_IA32_MTRR_PHYSBASE(n) (0x200 + 2*(n)) -#define MSR_IA32_MTRR_PHYSMASK(n) (0x200 + 2*(n) + 1) -#define MSR_IA32_MTRR_FIX64K_00000 0x250 -#define MSR_IA32_MTRR_FIX16K_80000 0x258 -#define MSR_IA32_MTRR_FIX16K_A0000 0x259 -#define MSR_IA32_MTRR_FIX4K_C0000 0x268 -#define MSR_IA32_MTRR_FIX4K_C8000 0x269 -#define MSR_IA32_MTRR_FIX4K_D0000 0x26a -#define MSR_IA32_MTRR_FIX4K_D8000 0x26b -#define MSR_IA32_MTRR_FIX4K_E0000 0x26c -#define MSR_IA32_MTRR_FIX4K_E8000 0x26d -#define MSR_IA32_MTRR_FIX4K_F0000 0x26e -#define MSR_IA32_MTRR_FIX4K_F8000 0x26f - -#define MSR_IA32_VMX_BASE 0x480 -#define MSR_IA32_VMX_BASIC MSR_IA32_VMX_BASE +#define MSR_IA32_P5_MC_ADDR 0 +#define MSR_IA32_P5_MC_TYPE 1 +#define MSR_IA32_PLATFORM_ID 0x17 +#define MSR_IA32_EBL_CR_POWERON 0x2a + +#define MSR_IA32_APIC_BASE 0x1b +#define MSR_IA32_APIC_BASE_BSP (1<<8) +#define MSR_IA32_APIC_BASE_EXTENDED (1<<10) +#define MSR_IA32_APIC_BASE_ENABLE (1<<11) +#define MSR_IA32_APIC_BASE_BASE (0xfffff<<12) + +#define MSR_CORE_THREAD_COUNT 0x35 + +#define MSR_IA32_FEATURE_CONTROL 0x3a +#define MSR_IA32_FEATCTL_LOCK (1<<0) +#define MSR_IA32_FEATCTL_VMXON_SMX (1<<1) +#define MSR_IA32_FEATCTL_VMXON (1<<2) +#define MSR_IA32_FEATCTL_CSTATE_SMI (1<<16) + +#define MSR_IA32_UPDT_TRIG 0x79 +#define MSR_IA32_BIOS_SIGN_ID 0x8b +#define MSR_IA32_UCODE_WRITE MSR_IA32_UPDT_TRIG +#define MSR_IA32_UCODE_REV MSR_IA32_BIOS_SIGN_ID + +#define MSR_IA32_PERFCTR0 0xc1 +#define MSR_IA32_PERFCTR1 0xc2 + +#define MSR_PLATFORM_INFO 0xce + +#define MSR_PMG_CST_CONFIG_CONTROL 0xe2 + +#define MSR_IA32_BBL_CR_CTL 0x119 + +#define MSR_IA32_SYSENTER_CS 0x174 +#define MSR_IA32_SYSENTER_ESP 0x175 +#define MSR_IA32_SYSENTER_EIP 0x176 + +#define MSR_IA32_MCG_CAP 0x179 +#define MSR_IA32_MCG_STATUS 0x17a +#define MSR_IA32_MCG_CTL 0x17b + +#define MSR_IA32_EVNTSEL0 0x186 +#define MSR_IA32_EVNTSEL1 0x187 + +#define MSR_FLEX_RATIO 0x194 +#define MSR_IA32_PERF_STS 0x198 +#define MSR_IA32_PERF_CTL 0x199 +#define MSR_IA32_CLOCK_MODULATION 0x19a + +#define MSR_IA32_MISC_ENABLE 0x1a0 + +#define MSR_IA32_ENERGY_PERFORMANCE_BIAS 0x1b0 +#define MSR_IA32_PACKAGE_THERM_STATUS 0x1b1 +#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x1b2 + +#define MSR_IA32_DEBUGCTLMSR 0x1d9 +#define MSR_IA32_LASTBRANCHFROMIP 0x1db +#define MSR_IA32_LASTBRANCHTOIP 0x1dc +#define MSR_IA32_LASTINTFROMIP 0x1dd +#define MSR_IA32_LASTINTTOIP 0x1de + +#define MSR_IA32_CR_PAT 0x277 + +#define MSR_IA32_MTRRCAP 0xfe +#define MSR_IA32_MTRR_DEF_TYPE 0x2ff +#define MSR_IA32_MTRR_PHYSBASE(n) (0x200 + 2*(n)) +#define MSR_IA32_MTRR_PHYSMASK(n) (0x200 + 2*(n) + 1) +#define MSR_IA32_MTRR_FIX64K_00000 0x250 +#define MSR_IA32_MTRR_FIX16K_80000 0x258 +#define MSR_IA32_MTRR_FIX16K_A0000 0x259 +#define MSR_IA32_MTRR_FIX4K_C0000 0x268 +#define MSR_IA32_MTRR_FIX4K_C8000 0x269 +#define MSR_IA32_MTRR_FIX4K_D0000 0x26a +#define MSR_IA32_MTRR_FIX4K_D8000 0x26b +#define MSR_IA32_MTRR_FIX4K_E0000 0x26c +#define MSR_IA32_MTRR_FIX4K_E8000 0x26d +#define MSR_IA32_MTRR_FIX4K_F0000 0x26e +#define MSR_IA32_MTRR_FIX4K_F8000 0x26f + +#define MSR_IA32_MC0_CTL 0x400 +#define MSR_IA32_MC0_STATUS 0x401 +#define MSR_IA32_MC0_ADDR 0x402 +#define MSR_IA32_MC0_MISC 0x403 + +#define MSR_IA32_VMX_BASE 0x480 +#define MSR_IA32_VMX_BASIC MSR_IA32_VMX_BASE #define MSR_IA32_VMXPINBASED_CTLS MSR_IA32_VMX_BASE+1 -#define MSR_IA32_PROCBASED_CTLS MSR_IA32_VMX_BASE+2 -#define MSR_IA32_VMX_EXIT_CTLS MSR_IA32_VMX_BASE+3 -#define MSR_IA32_VMX_ENTRY_CTLS MSR_IA32_VMX_BASE+4 -#define MSR_IA32_VMX_MISC MSR_IA32_VMX_BASE+5 -#define MSR_IA32_VMX_CR0_FIXED0 MSR_IA32_VMX_BASE+6 -#define MSR_IA32_VMX_CR0_FIXED1 MSR_IA32_VMX_BASE+7 -#define MSR_IA32_VMX_CR4_FIXED0 MSR_IA32_VMX_BASE+8 -#define MSR_IA32_VMX_CR4_FIXED1 MSR_IA32_VMX_BASE+9 - -#define MSR_IA32_EFER 0xC0000080 -#define MSR_IA32_EFER_SCE 0x00000001 -#define MSR_IA32_EFER_LME 0x00000100 -#define MSR_IA32_EFER_LMA 0x00000400 -#define MSR_IA32_EFER_NXE 0x00000800 - -#define MSR_IA32_STAR 0xC0000081 -#define MSR_IA32_LSTAR 0xC0000082 -#define MSR_IA32_CSTAR 0xC0000083 -#define MSR_IA32_FMASK 0xC0000084 - -#define MSR_IA32_FS_BASE 0xC0000100 -#define MSR_IA32_GS_BASE 0xC0000101 -#define MSR_IA32_KERNEL_GS_BASE 0xC0000102 - -#define MSR_IA32_BIOS_SIGN_ID 0x08B - -#define MSR_FLEX_RATIO 0x194 -#define MSR_PLATFORM_INFO 0x0ce -#define MSR_CORE_THREAD_COUNT 0x035 +#define MSR_IA32_PROCBASED_CTLS MSR_IA32_VMX_BASE+2 +#define MSR_IA32_VMX_EXIT_CTLS MSR_IA32_VMX_BASE+3 +#define MSR_IA32_VMX_ENTRY_CTLS MSR_IA32_VMX_BASE+4 +#define MSR_IA32_VMX_MISC MSR_IA32_VMX_BASE+5 +#define MSR_IA32_VMX_CR0_FIXED0 MSR_IA32_VMX_BASE+6 +#define MSR_IA32_VMX_CR0_FIXED1 MSR_IA32_VMX_BASE+7 +#define MSR_IA32_VMX_CR4_FIXED0 MSR_IA32_VMX_BASE+8 +#define MSR_IA32_VMX_CR4_FIXED1 MSR_IA32_VMX_BASE+9 + +#define MSR_IA32_DS_AREA 0x600 + +#define MSR_IA32_PACKAGE_POWER_SKU_UNIT 0x606 +#define MSR_IA32_PACKAGE_ENERY_STATUS 0x611 +#define MSR_IA32_PRIMARY_PLANE_ENERY_STATUS 0x639 +#define MSR_IA32_SECONDARY_PLANE_ENERY_STATUS 0x641 +#define MSR_IA32_TSC_DEADLINE 0x6e0 + +#define MSR_IA32_EFER 0xC0000080 +#define MSR_IA32_EFER_SCE 0x00000001 +#define MSR_IA32_EFER_LME 0x00000100 +#define MSR_IA32_EFER_LMA 0x00000400 +#define MSR_IA32_EFER_NXE 0x00000800 + +#define MSR_IA32_STAR 0xC0000081 +#define MSR_IA32_LSTAR 0xC0000082 +#define MSR_IA32_CSTAR 0xC0000083 +#define MSR_IA32_FMASK 0xC0000084 + +#define MSR_IA32_FS_BASE 0xC0000100 +#define MSR_IA32_GS_BASE 0xC0000101 +#define MSR_IA32_KERNEL_GS_BASE 0xC0000102 +#define MSR_IA32_TSC_AUX 0xC0000103 #endif /* _I386_PROC_REG_H_ */ diff --git a/osfmk/i386/rtclock.c b/osfmk/i386/rtclock.c index 244e787e0..72b1f556f 100644 --- a/osfmk/i386/rtclock.c +++ b/osfmk/i386/rtclock.c @@ -73,26 +73,182 @@ #include #include -#define NSEC_PER_HZ (NSEC_PER_SEC / 100) /* nsec per tick */ - #define UI_CPUFREQ_ROUNDING_FACTOR 10000000 int rtclock_config(void); int rtclock_init(void); -uint64_t rtc_decrementer_min; - uint64_t tsc_rebase_abs_time = 0; -void rtclock_intr(x86_saved_state_t *regs); -static uint64_t maxDec; /* longest interval our hardware timer can handle (nsec) */ +void rtclock_intr(x86_saved_state_t *regs); static void rtc_set_timescale(uint64_t cycles); static uint64_t rtc_export_speed(uint64_t cycles); rtc_nanotime_t rtc_nanotime_info = {0,0,0,0,1,0}; +static uint64_t rtc_decrementer_min; +static uint64_t rtc_decrementer_max; + +static uint64_t +deadline_to_decrementer( + uint64_t deadline, + uint64_t now) +{ + uint64_t delta; + + if (deadline <= now) + return rtc_decrementer_min; + else { + delta = deadline - now; + return MIN(MAX(rtc_decrementer_min,delta),rtc_decrementer_max); + } +} + +static inline uint64_t +_absolutetime_to_tsc(uint64_t ns) +{ + uint32_t generation; + uint64_t tsc; + + do { + generation = rtc_nanotime_info.generation; + tsc = tmrCvt(ns - rtc_nanotime_info.ns_base, tscFCvtn2t) + + rtc_nanotime_info.tsc_base; + } while (generation == 0 || + generation != rtc_nanotime_info.generation); + + return tsc; +} + +/* + * Regular local APIC timer case: + */ +static void +rtc_lapic_config_timer(void) +{ + lapic_config_timer(TRUE, one_shot, divide_by_1); +} +static uint64_t +rtc_lapic_set_timer(uint64_t deadline, uint64_t now) +{ + uint64_t count; + uint64_t set = 0; + + if (deadline > 0) { + /* + * Convert delta to bus ticks + * - time now is not relevant + */ + count = deadline_to_decrementer(deadline, now); + set = now + count; + lapic_set_timer_fast((uint32_t) tmrCvt(count, busFCvtn2t)); + } else { + lapic_set_timer(FALSE, one_shot, divide_by_1, 0); + } + return set; +} + +/* + * TSC-deadline timer case: + */ +static void +rtc_lapic_config_tsc_deadline_timer(void) +{ + lapic_config_tsc_deadline_timer(); +} +static uint64_t +rtc_lapic_set_tsc_deadline_timer(uint64_t deadline, uint64_t now) +{ + uint64_t set = 0; + + if (deadline > 0) { + /* + * Convert to TSC + */ + set = now + deadline_to_decrementer(deadline, now); + lapic_set_tsc_deadline_timer(_absolutetime_to_tsc(set)); + } else { + lapic_set_tsc_deadline_timer(0); + } + return set; +} + +/* + * Definitions for timer operations table + */ +typedef struct { + void (*config)(void); + uint64_t (*set) (uint64_t, uint64_t); +} rtc_timer_t; + +rtc_timer_t rtc_timer_lapic = { + rtc_lapic_config_timer, + rtc_lapic_set_timer +}; + +rtc_timer_t rtc_timer_tsc_deadline = { + rtc_lapic_config_tsc_deadline_timer, + rtc_lapic_set_tsc_deadline_timer +}; + +rtc_timer_t *rtc_timer = &rtc_timer_lapic; /* defaults to LAPIC timer */ + +/* + * rtc_timer_init() is called at startup on the boot processor only. + */ +static void +rtc_timer_init(void) +{ + int TSC_deadline_timer = 0; + + /* See whether we can use the local apic in TSC-deadline mode */ + if ((cpuid_features() & CPUID_FEATURE_TSCTMR)) { + TSC_deadline_timer = 1; + PE_parse_boot_argn("TSC_deadline_timer", &TSC_deadline_timer, + sizeof(TSC_deadline_timer)); + printf("TSC Deadline Timer supported %s enabled\n", + TSC_deadline_timer ? "and" : "but not"); + } + + if (TSC_deadline_timer) { + rtc_timer = &rtc_timer_tsc_deadline; + rtc_decrementer_max = UINT64_MAX; /* effectively none */ + /* + * The min could be as low as 1nsec, + * but we're being conservative for now and making it the same + * as for the local apic timer. + */ + rtc_decrementer_min = 1*NSEC_PER_USEC; /* 1 usec */ + } else { + /* + * Compute the longest interval using LAPIC timer. + */ + rtc_decrementer_max = tmrCvt(0x7fffffffULL, busFCvtt2n); + kprintf("maxDec: %lld\n", rtc_decrementer_max); + rtc_decrementer_min = 1*NSEC_PER_USEC; /* 1 usec */ + } + + /* Point LAPIC interrupts to hardclock() */ + lapic_set_timer_func((i386_intr_func_t) rtclock_intr); +} + +static inline uint64_t +rtc_timer_set(uint64_t deadline, uint64_t now) +{ + return rtc_timer->set(deadline, now); +} + +void +rtc_timer_start(void) +{ + /* + * Force a complete re-evaluation of timer deadlines. + */ + etimer_resync_deadlines(); +} + /* * tsc_to_nanoseconds: * @@ -112,7 +268,7 @@ _tsc_to_nanoseconds(uint64_t value) "addl %%edi,%%eax ;" "adcl $0,%%edx " : "+A" (value) - : "c" (current_cpu_datap()->cpu_nanotime->scale) + : "c" (rtc_nanotime_info.scale) : "esi", "edi"); #elif defined(__x86_64__) asm volatile("mul %%rcx;" @@ -168,33 +324,6 @@ _absolutetime_to_nanotime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *nan #endif } -static uint32_t -deadline_to_decrementer( - uint64_t deadline, - uint64_t now) -{ - uint64_t delta; - - if (deadline <= now) - return (uint32_t)rtc_decrementer_min; - else { - delta = deadline - now; - return (uint32_t)MIN(MAX(rtc_decrementer_min,delta),maxDec); - } -} - -void -rtc_lapic_start_ticking(void) -{ - x86_lcpu_t *lcpu = x86_lcpu(); - - /* - * Force a complete re-evaluation of timer deadlines. - */ - lcpu->rtcPop = EndOfAllTime; - etimer_resync_deadlines(); -} - /* * Configure the real-time clock device. Return success (1) * or failure (0). @@ -251,10 +380,8 @@ _rtc_nanotime_init(rtc_nanotime_t *rntp, uint64_t base) static void rtc_nanotime_init(uint64_t base) { - rtc_nanotime_t *rntp = current_cpu_datap()->cpu_nanotime; - - _rtc_nanotime_init(rntp, base); - rtc_nanotime_set_commpage(rntp); + _rtc_nanotime_init(&rtc_nanotime_info, base); + rtc_nanotime_set_commpage(&rtc_nanotime_info); } /* @@ -269,7 +396,7 @@ rtc_nanotime_init_commpage(void) { spl_t s = splclock(); - rtc_nanotime_set_commpage(current_cpu_datap()->cpu_nanotime); + rtc_nanotime_set_commpage(&rtc_nanotime_info); splx(s); } @@ -286,10 +413,10 @@ rtc_nanotime_read(void) #if CONFIG_EMBEDDED if (gPEClockFrequencyInfo.timebase_frequency_hz > SLOW_TSC_THRESHOLD) - return _rtc_nanotime_read(current_cpu_datap()->cpu_nanotime, 1); /* slow processor */ + return _rtc_nanotime_read(&rtc_nanotime_info, 1); /* slow processor */ else #endif - return _rtc_nanotime_read(current_cpu_datap()->cpu_nanotime, 0); /* assume fast processor */ + return _rtc_nanotime_read(&rtc_nanotime_info, 0); /* assume fast processor */ } /* @@ -302,7 +429,7 @@ rtc_nanotime_read(void) void rtc_clock_napped(uint64_t base, uint64_t tsc_base) { - rtc_nanotime_t *rntp = current_cpu_datap()->cpu_nanotime; + rtc_nanotime_t *rntp = &rtc_nanotime_info; uint64_t oldnsecs; uint64_t newnsecs; uint64_t tsc; @@ -332,7 +459,7 @@ rtc_clock_napped(uint64_t base, uint64_t tsc_base) void rtc_clock_adjust(uint64_t tsc_base_delta) { - rtc_nanotime_t *rntp = current_cpu_datap()->cpu_nanotime; + rtc_nanotime_t *rntp = &rtc_nanotime_info; assert(!ml_get_interrupts_enabled()); assert(tsc_base_delta < 100ULL); /* i.e. it's small */ @@ -368,6 +495,9 @@ void rtc_sleep_wakeup( uint64_t base) { + /* Set fixed configuration for lapic timers */ + rtc_timer->config(); + /* * Reset nanotime. * The timestamp counter will have been reset @@ -404,22 +534,15 @@ rtclock_init(void) gPEClockFrequencyInfo.cpu_frequency_min_hz = cycles; gPEClockFrequencyInfo.cpu_frequency_max_hz = cycles; - /* - * Compute the longest interval we can represent. - */ - maxDec = tmrCvt(0x7fffffffULL, busFCvtt2n); - kprintf("maxDec: %lld\n", maxDec); - - /* Minimum interval is 1usec */ - rtc_decrementer_min = deadline_to_decrementer(NSEC_PER_USEC, 0ULL); - /* Point LAPIC interrupts to hardclock() */ - lapic_set_timer_func((i386_intr_func_t) rtclock_intr); - + rtc_timer_init(); clock_timebase_init(); ml_init_lock_timeout(); } - rtc_lapic_start_ticking(); + /* Set fixed configuration for lapic timers */ + rtc_timer->config(); + + rtc_timer_start(); return (1); } @@ -430,7 +553,7 @@ rtclock_init(void) static void rtc_set_timescale(uint64_t cycles) { - rtc_nanotime_t *rntp = current_cpu_datap()->cpu_nanotime; + rtc_nanotime_t *rntp = &rtc_nanotime_info; rntp->scale = (uint32_t)(((uint64_t)NSEC_PER_SEC << 32) / cycles); if (cycles <= SLOW_TSC_THRESHOLD) @@ -522,18 +645,10 @@ rtclock_intr( { uint64_t rip; boolean_t user_mode = FALSE; - uint64_t abstime; - uint32_t latency; - x86_lcpu_t *lcpu = x86_lcpu(); assert(get_preemption_level() > 0); assert(!ml_get_interrupts_enabled()); - abstime = rtc_nanotime_read(); - latency = (uint32_t)(abstime - lcpu->rtcDeadline); - if (abstime < lcpu->rtcDeadline) - latency = 1; - if (is_saved_state64(tregs) == TRUE) { x86_saved_state64_t *regs; @@ -552,38 +667,42 @@ rtclock_intr( rip = regs->eip; } - /* Log the interrupt service latency (-ve value expected by tool) */ - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_EXCP_DECI, 0) | DBG_FUNC_NONE, - -(int32_t)latency, (uint32_t)rip, user_mode, 0, 0); - /* call the generic etimer */ etimer_intr(user_mode, rip); } + /* * Request timer pop from the hardware */ - -int +uint64_t setPop( uint64_t time) { uint64_t now; - uint32_t decr; - uint64_t count; - - now = rtc_nanotime_read(); /* The time in nanoseconds */ - decr = deadline_to_decrementer(time, now); + uint64_t pop; + + /* 0 and EndOfAllTime are special-cases for "clear the timer" */ + if (time == 0 || time == EndOfAllTime) { + time = EndOfAllTime; + now = 0; + pop = rtc_timer_set(0, 0); + } else { + now = rtc_nanotime_read(); + pop = rtc_timer_set(time, now); + } - count = tmrCvt(decr, busFCvtn2t); - lapic_set_timer(TRUE, one_shot, divide_by_1, (uint32_t) count); + /* Record actual deadline set */ + x86_lcpu()->rtcDeadline = time; + x86_lcpu()->rtcPop = pop; - return decr; /* Pass back what we set */ + /* + * Pass back the delta we set + */ + return pop - now; } - uint64_t mach_absolute_time(void) { diff --git a/osfmk/i386/rtclock.h b/osfmk/i386/rtclock.h index 82441c209..d98b8808f 100644 --- a/osfmk/i386/rtclock.h +++ b/osfmk/i386/rtclock.h @@ -42,15 +42,15 @@ #ifndef ASSEMBLER typedef struct rtc_nanotime { - uint64_t tsc_base; /* timestamp */ - uint64_t ns_base; /* nanoseconds */ - uint32_t scale; /* tsc -> nanosec multiplier */ - uint32_t shift; /* tsc -> nanosec shift/div */ + volatile uint64_t tsc_base; /* timestamp */ + volatile uint64_t ns_base; /* nanoseconds */ + uint32_t scale; /* tsc -> nanosec multiplier */ + uint32_t shift; /* tsc -> nanosec shift/div */ /* shift is overloaded with * lower 32bits of tsc_freq * on slower machines (SLOW_TSC_THRESHOLD) */ - uint32_t generation; /* 0 == being updated */ - uint32_t spare1; + volatile uint32_t generation; /* 0 == being updated */ + uint32_t spare1; } rtc_nanotime_t; #if 0 diff --git a/osfmk/i386/seg.h b/osfmk/i386/seg.h index 37d2b48ca..89643edf2 100644 --- a/osfmk/i386/seg.h +++ b/osfmk/i386/seg.h @@ -300,7 +300,6 @@ __END_DECLS #define SYSCALL_CS 0x2f /* 64-bit syscall pseudo-segment */ #define USER_CTHREAD 0x37 /* user cthread area */ #define USER_SETTABLE 0x3f /* start of user settable ldt entries */ -#define USLDTSZ 10 /* number of user settable entries */ /* * Kernel descriptors for MACH - 32-bit flat address space. @@ -327,13 +326,13 @@ __END_DECLS /* * Kernel descriptors for MACH - 64-bit flat address space. */ -#define KERNEL64_CS 0x08 /* 1: First entry */ -#define SYSENTER_CS 0x0b /* alias to KERNEL64_CS */ -#define KERNEL64_SS 0x10 /* 2: must be SYSENTER_CS + 8 */ -#define USER_CS 0x1b /* 3: must be SYSENTER_CS + 16 */ -#define USER_DS 0x23 /* 4: must be SYSENTER_CS + 24 */ -#define USER64_CS 0x2b /* 5: must be SYSENTER_CS + 32 */ -#define USER64_DS USER_DS /* nothing special about 64bit DS */ +#define KERNEL64_CS 0x08 /* 1: K64 code */ +#define SYSENTER_CS 0x0b /* U32 sysenter pseudo-segment */ +#define KERNEL64_SS 0x10 /* 2: KERNEL64_CS+8 for syscall */ +#define USER_CS 0x1b /* 3: U32 code */ +#define USER_DS 0x23 /* 4: USER_CS+8 for sysret */ +#define USER64_CS 0x2b /* 5: USER_CS+16 for sysret */ +#define USER64_DS USER_DS /* U64 data pseudo-segment */ #define KERNEL_LDT 0x30 /* 6: */ /* 7: other 8 bytes of KERNEL_LDT */ #define KERNEL_TSS 0x40 /* 8: */ @@ -341,8 +340,7 @@ __END_DECLS #define KERNEL32_CS 0x50 /* 10: */ #define USER_LDT 0x58 /* 11: */ /* 12: other 8 bytes of USER_LDT */ -#define KERNEL_DS 0x80 /* 16: */ -#define SYSCALL_CS 0x8f /* 17: 64-bit syscall pseudo-segment */ +#define KERNEL_DS 0x68 /* 13: 32-bit kernel data */ #endif @@ -365,9 +363,9 @@ __END_DECLS /* * 64-bit kernel LDT descriptors */ +#define SYSCALL_CS 0x07 /* syscall pseudo-segment */ #define USER_CTHREAD 0x0f /* user cthread area */ #define USER_SETTABLE 0x1f /* start of user settable ldt entries */ -#define USLDTSZ 10 /* number of user settable entries */ #endif #endif /* _I386_SEG_H_ */ diff --git a/osfmk/i386/start64.s b/osfmk/i386/start64.s index 9c7188711..bcabe2829 100644 --- a/osfmk/i386/start64.s +++ b/osfmk/i386/start64.s @@ -173,6 +173,13 @@ Entry(get64_cr3) EMARF ret +Entry(cpuid64) + ENTER_64BIT_MODE() + cpuid + ENTER_COMPAT_MODE() + ret + + /* FXSAVE and FXRSTOR operate in a mode dependent fashion, hence these variants. * Must be called with interrupts disabled. */ @@ -180,20 +187,29 @@ Entry(get64_cr3) Entry(fxsave64) movl S_ARG0,%eax ENTER_64BIT_MODE() - fxsave 0(%eax) + fxsave (%eax) ENTER_COMPAT_MODE() ret Entry(fxrstor64) movl S_ARG0,%eax ENTER_64BIT_MODE() - fxrstor 0(%rax) + fxrstor (%rax) ENTER_COMPAT_MODE() ret -Entry(cpuid64) +Entry(xsave64o) ENTER_64BIT_MODE() - cpuid + .short 0xAE0F + /* MOD 0x4, ECX, 0x1 */ + .byte 0x21 ENTER_COMPAT_MODE() ret +Entry(xrstor64o) + ENTER_64BIT_MODE() + .short 0xAE0F + /* MOD 0x5, ECX 0x1 */ + .byte 0x29 + ENTER_COMPAT_MODE() + ret diff --git a/osfmk/i386/thread.h b/osfmk/i386/thread.h index 0ac0ee06f..faab785af 100644 --- a/osfmk/i386/thread.h +++ b/osfmk/i386/thread.h @@ -99,14 +99,14 @@ * Allocated only when necessary. */ -struct x86_fpsave_state { - boolean_t fp_valid; - enum { +typedef enum { FXSAVE32 = 1, - FXSAVE64 = 2 - } fp_save_layout; - struct x86_fx_save fx_save_state __attribute__ ((aligned (16))); -}; + FXSAVE64 = 2, + XSAVE32 = 3, + XSAVE64 = 4, + FP_UNUSED = 5 + } fp_save_layout_t; + /* @@ -148,7 +148,7 @@ struct x86_kernel_state { typedef struct pcb { void *sf; x86_saved_state_t *iss; - struct x86_fpsave_state *ifps; + void *ifps; #ifdef MACH_BSD uint64_t cthread_self; /* for use of cthread package */ struct real_descriptor cthread_desc; diff --git a/osfmk/i386/trap.c b/osfmk/i386/trap.c index 5cd9b390d..07b3cf479 100644 --- a/osfmk/i386/trap.c +++ b/osfmk/i386/trap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -128,6 +128,7 @@ static void user_page_fault_continue(kern_return_t kret); #ifdef __i386__ static void panic_trap(x86_saved_state32_t *saved_state); static void set_recovery_ip(x86_saved_state32_t *saved_state, vm_offset_t ip); +static void panic_64(x86_saved_state_t *, int, const char *, boolean_t); #else static void panic_trap(x86_saved_state64_t *saved_state); static void set_recovery_ip(x86_saved_state64_t *saved_state, vm_offset_t ip); @@ -397,6 +398,52 @@ panic_idt64(x86_saved_state_t *rsp) } #endif + + +/* + * Non-zero indicates latency assert is enabled and capped at valued + * absolute time units. + */ + +uint64_t interrupt_latency_cap = 0; +boolean_t ilat_assert = FALSE; + +void +interrupt_latency_tracker_setup(void) { + uint32_t ilat_cap_us; + if (PE_parse_boot_argn("interrupt_latency_cap_us", &ilat_cap_us, sizeof(ilat_cap_us))) { + interrupt_latency_cap = ilat_cap_us * NSEC_PER_USEC; + nanoseconds_to_absolutetime(interrupt_latency_cap, &interrupt_latency_cap); + } else { + interrupt_latency_cap = LockTimeOut; + } + PE_parse_boot_argn("-interrupt_latency_assert_enable", &ilat_assert, sizeof(ilat_assert)); +} + +void interrupt_reset_latency_stats(void) { + uint32_t i; + for (i = 0; i < real_ncpus; i++) { + cpu_data_ptr[i]->cpu_max_observed_int_latency = + cpu_data_ptr[i]->cpu_max_observed_int_latency_vector = 0; + } +} + +void interrupt_populate_latency_stats(char *buf, unsigned bufsize) { + uint32_t i, tcpu = ~0; + uint64_t cur_max = 0; + + for (i = 0; i < real_ncpus; i++) { + if (cur_max < cpu_data_ptr[i]->cpu_max_observed_int_latency) { + cur_max = cpu_data_ptr[i]->cpu_max_observed_int_latency; + tcpu = i; + } + } + + if (tcpu < real_ncpus) + snprintf(buf, bufsize, "0x%x 0x%x 0x%llx", tcpu, cpu_data_ptr[tcpu]->cpu_max_observed_int_latency_vector, cpu_data_ptr[tcpu]->cpu_max_observed_int_latency); +} + + extern void PE_incoming_interrupt(int interrupt); /* @@ -411,9 +458,9 @@ interrupt(x86_saved_state_t *state) uint64_t rsp; int interrupt_num; boolean_t user_mode = FALSE; + int cnum = cpu_number(); - - if (is_saved_state64(state) == TRUE) { + if (is_saved_state64(state) == TRUE) { x86_saved_state64_t *state64; state64 = saved_state64(state); @@ -443,18 +490,34 @@ interrupt(x86_saved_state_t *state) * Handle local APIC interrupts * else call platform expert for devices. */ - if (!lapic_interrupt(interrupt_num, state)) + if (!lapic_interrupt(interrupt_num, state)) { PE_incoming_interrupt(interrupt_num); + } KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_END, 0, 0, 0, 0, 0); + if (cpu_data_ptr[cnum]->cpu_nested_istack) { + cpu_data_ptr[cnum]->cpu_nested_istack_events++; + } + else { + uint64_t int_latency = mach_absolute_time() - cpu_data_ptr[cnum]->cpu_int_event_time; + if (ilat_assert && (int_latency > interrupt_latency_cap) && !machine_timeout_suspended()) { + panic("Interrupt vector 0x%x exceeded interrupt latency threshold, 0x%llx absolute time delta, prior signals: 0x%x", interrupt_num, int_latency, cpu_data_ptr[cnum]->cpu_prior_signals); + } + if (int_latency > cpu_data_ptr[cnum]->cpu_max_observed_int_latency) { + cpu_data_ptr[cnum]->cpu_max_observed_int_latency = int_latency; + cpu_data_ptr[cnum]->cpu_max_observed_int_latency_vector = interrupt_num; + } + } + + /* * Having serviced the interrupt first, look at the interrupted stack depth. */ if (!user_mode) { - uint64_t depth = current_cpu_datap()->cpu_kernel_stack + uint64_t depth = cpu_data_ptr[cnum]->cpu_kernel_stack + sizeof(struct x86_kernel_state) + sizeof(struct i386_exception_link *) - rsp; @@ -516,8 +579,9 @@ kernel_trap( thread = current_thread(); #ifdef __i386__ - if (is_saved_state64(state)) - panic("kernel_trap(%p) with 64-bit state", state); + if (is_saved_state64(state)) { + panic_64(state, 0, "Kernel trap with 64-bit state", FALSE); + } saved_state = saved_state32(state); vaddr = (user_addr_t)saved_state->cr2; type = saved_state->trapno; diff --git a/osfmk/i386/tsc.c b/osfmk/i386/tsc.c index a5488bbc5..9e794797b 100644 --- a/osfmk/i386/tsc.c +++ b/osfmk/i386/tsc.c @@ -143,6 +143,7 @@ tsc_init(void) busFreq = EFI_FSB_frequency(); switch (cpuid_cpufamily()) { + case CPUFAMILY_INTEL_SANDYBRIDGE: case CPUFAMILY_INTEL_WESTMERE: case CPUFAMILY_INTEL_NEHALEM: { uint64_t cpu_mhz; diff --git a/osfmk/kdp/kdp.c b/osfmk/kdp/kdp.c index 9f72fb57e..5536038e7 100644 --- a/osfmk/kdp/kdp.c +++ b/osfmk/kdp/kdp.c @@ -1047,7 +1047,7 @@ kdp_copyin(pmap_t p, uint64_t uaddr, void *dest, size_t size) { while (rem) { ppnum_t upn = pmap_find_phys(p, uaddr); - uint64_t phys_src = (upn << PAGE_SHIFT) | (uaddr & PAGE_MASK); + uint64_t phys_src = ptoa_64(upn) | (uaddr & PAGE_MASK); uint64_t phys_dest = kvtophys((vm_offset_t)kvaddr); uint64_t src_rem = PAGE_SIZE - (phys_src & PAGE_MASK); uint64_t dst_rem = PAGE_SIZE - (phys_dest & PAGE_MASK); @@ -1085,12 +1085,12 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_fl boolean_t save_loadinfo_p = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0); queue_iterate(&tasks, task, task_t, tasks) { - int task_pid = pid_from_task(task); - boolean_t task64 = task_has_64BitAddr(task); - if ((task == NULL) || (ml_nofault_copy((vm_offset_t) task, (vm_offset_t) &ctask, sizeof(struct task)) != sizeof(struct task))) goto error_exit; + int task_pid = pid_from_task(task); + boolean_t task64 = task_has_64BitAddr(task); + /* Trace everything, unless a process was specified */ if ((pid == -1) || (pid == task_pid)) { task_snapshot_t task_snap; diff --git a/osfmk/kdp/kdp_udp.c b/osfmk/kdp/kdp_udp.c index 0a54c5f2e..3b298fe6e 100644 --- a/osfmk/kdp/kdp_udp.c +++ b/osfmk/kdp/kdp_udp.c @@ -1541,8 +1541,10 @@ kdp_get_xnu_version(char *versionbuf) char *vptr; strlcpy(vstr, "custom", 10); - if (strlcpy(versionbuf, version, 95) < 95) { - versionpos = strnstr(versionbuf, "xnu-", 90); + + if (kdp_machine_vm_read((mach_vm_address_t)(uintptr_t)version, versionbuf, 128)) { + versionbuf[127] = '\0'; + versionpos = strnstr(versionbuf, "xnu-", 115); if (versionpos) { strncpy(vstr, versionpos, sizeof(vstr)); vstr[sizeof(vstr)-1] = '\0'; @@ -1692,7 +1694,12 @@ kdp_panic_dump(void) } printf("Entering system dump routine\n"); - + + if (!kdp_en_recv_pkt || !kdp_en_send_pkt) { + printf("Error: No transport device registered for kernel crashdump\n"); + return; + } + if (!panicd_specified) { printf("A dump server was not specified in the boot-args, terminating kernel core dump.\n"); goto panic_dump_exit; diff --git a/osfmk/kern/etimer.h b/osfmk/kern/etimer.h index 48ec75e52..de66f9749 100644 --- a/osfmk/kern/etimer.h +++ b/osfmk/kern/etimer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2008 Apple Inc. All rights reserved. + * Copyright (c) 2004-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -50,7 +50,11 @@ extern int setTimerReq(void); extern void etimer_intr(int inuser, uint64_t iaddr); extern void etimer_set_deadline(uint64_t deadline); +#if defined(i386) || defined(x86_64) +extern uint64_t setPop(uint64_t time); +#else extern int setPop(uint64_t time); +#endif extern void etimer_resync_deadlines(void); diff --git a/osfmk/kern/sched_prim.c b/osfmk/kern/sched_prim.c index 2ed0f12e7..dddaf47d7 100644 --- a/osfmk/kern/sched_prim.c +++ b/osfmk/kern/sched_prim.c @@ -962,7 +962,7 @@ clear_wait_internal( wait_result_t wresult) { wait_queue_t wq = thread->wait_queue; - int i = LockTimeOut; + uint32_t i = LockTimeOut; do { if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT)) @@ -986,7 +986,7 @@ clear_wait_internal( } return (thread_go(thread, wresult)); - } while (--i > 0); + } while ((--i > 0) || machine_timeout_suspended()); panic("clear_wait_internal: deadlock: thread=%p, wq=%p, cpu=%d\n", thread, wq, cpu_number()); diff --git a/osfmk/kern/wait_queue.h b/osfmk/kern/wait_queue.h index 030e82d28..386bd093c 100644 --- a/osfmk/kern/wait_queue.h +++ b/osfmk/kern/wait_queue.h @@ -43,8 +43,9 @@ #include #include -#include +#include +#include /* machine_timeout_suspended() */ /* * wait_queue_t * This is the definition of the common event wait queue @@ -165,11 +166,25 @@ typedef struct _wait_queue_link { */ static inline void wait_queue_lock(wait_queue_t wq) { - if (!hw_lock_to(&(wq)->wq_interlock, hwLockTimeOut * 2)) - panic("wait queue deadlock - wq=%p, cpu=%d\n", wq, cpu_number( -)); + if (hw_lock_to(&(wq)->wq_interlock, hwLockTimeOut * 2) == 0) { + boolean_t wql_acquired = FALSE; + while (machine_timeout_suspended()) { +#if defined(__i386__) || defined(__x86_64__) +/* + * i386/x86_64 return with preemption disabled on a timeout for + * diagnostic purposes. + */ + mp_enable_preemption(); +#endif + if ((wql_acquired = hw_lock_to(&(wq)->wq_interlock, hwLockTimeOut * 2))) + break; + } + + if (wql_acquired == FALSE) + panic("wait queue deadlock - wq=%p, cpu=%d\n", wq, cpu_number()); + } } - + static inline void wait_queue_unlock(wait_queue_t wq) { assert(wait_queue_held(wq)); hw_lock_unlock(&(wq)->wq_interlock); diff --git a/osfmk/mach/i386/_structs.h b/osfmk/mach/i386/_structs.h index 927d12f63..bcac16be3 100644 --- a/osfmk/mach/i386/_structs.h +++ b/osfmk/mach/i386/_structs.h @@ -264,6 +264,53 @@ _STRUCT_X86_FLOAT_STATE32 char __fpu_rsrv4[14*16]; /* reserved */ int __fpu_reserved1; }; + +#define _STRUCT_X86_AVX_STATE32 struct __darwin_i386_avx_state +_STRUCT_X86_AVX_STATE32 +{ + int __fpu_reserved[2]; + _STRUCT_FP_CONTROL __fpu_fcw; /* x87 FPU control word */ + _STRUCT_FP_STATUS __fpu_fsw; /* x87 FPU status word */ + __uint8_t __fpu_ftw; /* x87 FPU tag word */ + __uint8_t __fpu_rsrv1; /* reserved */ + __uint16_t __fpu_fop; /* x87 FPU Opcode */ + __uint32_t __fpu_ip; /* x87 FPU Instruction Pointer offset */ + __uint16_t __fpu_cs; /* x87 FPU Instruction Pointer Selector */ + __uint16_t __fpu_rsrv2; /* reserved */ + __uint32_t __fpu_dp; /* x87 FPU Instruction Operand(Data) Pointer offset */ + __uint16_t __fpu_ds; /* x87 FPU Instruction Operand(Data) Pointer Selector */ + __uint16_t __fpu_rsrv3; /* reserved */ + __uint32_t __fpu_mxcsr; /* MXCSR Register state */ + __uint32_t __fpu_mxcsrmask; /* MXCSR mask */ + _STRUCT_MMST_REG __fpu_stmm0; /* ST0/MM0 */ + _STRUCT_MMST_REG __fpu_stmm1; /* ST1/MM1 */ + _STRUCT_MMST_REG __fpu_stmm2; /* ST2/MM2 */ + _STRUCT_MMST_REG __fpu_stmm3; /* ST3/MM3 */ + _STRUCT_MMST_REG __fpu_stmm4; /* ST4/MM4 */ + _STRUCT_MMST_REG __fpu_stmm5; /* ST5/MM5 */ + _STRUCT_MMST_REG __fpu_stmm6; /* ST6/MM6 */ + _STRUCT_MMST_REG __fpu_stmm7; /* ST7/MM7 */ + _STRUCT_XMM_REG __fpu_xmm0; /* XMM 0 */ + _STRUCT_XMM_REG __fpu_xmm1; /* XMM 1 */ + _STRUCT_XMM_REG __fpu_xmm2; /* XMM 2 */ + _STRUCT_XMM_REG __fpu_xmm3; /* XMM 3 */ + _STRUCT_XMM_REG __fpu_xmm4; /* XMM 4 */ + _STRUCT_XMM_REG __fpu_xmm5; /* XMM 5 */ + _STRUCT_XMM_REG __fpu_xmm6; /* XMM 6 */ + _STRUCT_XMM_REG __fpu_xmm7; /* XMM 7 */ + char __fpu_rsrv4[14*16]; /* reserved */ + int __fpu_reserved1; + char __avx_reserved1[64]; + _STRUCT_XMM_REG __fpu_ymmh0; /* YMMH 0 */ + _STRUCT_XMM_REG __fpu_ymmh1; /* YMMH 1 */ + _STRUCT_XMM_REG __fpu_ymmh2; /* YMMH 2 */ + _STRUCT_XMM_REG __fpu_ymmh3; /* YMMH 3 */ + _STRUCT_XMM_REG __fpu_ymmh4; /* YMMH 4 */ + _STRUCT_XMM_REG __fpu_ymmh5; /* YMMH 5 */ + _STRUCT_XMM_REG __fpu_ymmh6; /* YMMH 6 */ + _STRUCT_XMM_REG __fpu_ymmh7; /* YMMH 7 */ +}; + #else /* !__DARWIN_UNIX03 */ #define _STRUCT_X86_FLOAT_STATE32 struct i386_float_state _STRUCT_X86_FLOAT_STATE32 @@ -301,6 +348,53 @@ _STRUCT_X86_FLOAT_STATE32 char fpu_rsrv4[14*16]; /* reserved */ int fpu_reserved1; }; + +#define _STRUCT_X86_AVX_STATE32 struct i386_avx_state +_STRUCT_X86_AVX_STATE32 +{ + int fpu_reserved[2]; + _STRUCT_FP_CONTROL fpu_fcw; /* x87 FPU control word */ + _STRUCT_FP_STATUS fpu_fsw; /* x87 FPU status word */ + __uint8_t fpu_ftw; /* x87 FPU tag word */ + __uint8_t fpu_rsrv1; /* reserved */ + __uint16_t fpu_fop; /* x87 FPU Opcode */ + __uint32_t fpu_ip; /* x87 FPU Instruction Pointer offset */ + __uint16_t fpu_cs; /* x87 FPU Instruction Pointer Selector */ + __uint16_t fpu_rsrv2; /* reserved */ + __uint32_t fpu_dp; /* x87 FPU Instruction Operand(Data) Pointer offset */ + __uint16_t fpu_ds; /* x87 FPU Instruction Operand(Data) Pointer Selector */ + __uint16_t fpu_rsrv3; /* reserved */ + __uint32_t fpu_mxcsr; /* MXCSR Register state */ + __uint32_t fpu_mxcsrmask; /* MXCSR mask */ + _STRUCT_MMST_REG fpu_stmm0; /* ST0/MM0 */ + _STRUCT_MMST_REG fpu_stmm1; /* ST1/MM1 */ + _STRUCT_MMST_REG fpu_stmm2; /* ST2/MM2 */ + _STRUCT_MMST_REG fpu_stmm3; /* ST3/MM3 */ + _STRUCT_MMST_REG fpu_stmm4; /* ST4/MM4 */ + _STRUCT_MMST_REG fpu_stmm5; /* ST5/MM5 */ + _STRUCT_MMST_REG fpu_stmm6; /* ST6/MM6 */ + _STRUCT_MMST_REG fpu_stmm7; /* ST7/MM7 */ + _STRUCT_XMM_REG fpu_xmm0; /* XMM 0 */ + _STRUCT_XMM_REG fpu_xmm1; /* XMM 1 */ + _STRUCT_XMM_REG fpu_xmm2; /* XMM 2 */ + _STRUCT_XMM_REG fpu_xmm3; /* XMM 3 */ + _STRUCT_XMM_REG fpu_xmm4; /* XMM 4 */ + _STRUCT_XMM_REG fpu_xmm5; /* XMM 5 */ + _STRUCT_XMM_REG fpu_xmm6; /* XMM 6 */ + _STRUCT_XMM_REG fpu_xmm7; /* XMM 7 */ + char fpu_rsrv4[14*16]; /* reserved */ + int fpu_reserved1; + char __avx_reserved1[64]; + _STRUCT_XMM_REG __fpu_ymmh0; /* YMMH 0 */ + _STRUCT_XMM_REG __fpu_ymmh1; /* YMMH 1 */ + _STRUCT_XMM_REG __fpu_ymmh2; /* YMMH 2 */ + _STRUCT_XMM_REG __fpu_ymmh3; /* YMMH 3 */ + _STRUCT_XMM_REG __fpu_ymmh4; /* YMMH 4 */ + _STRUCT_XMM_REG __fpu_ymmh5; /* YMMH 5 */ + _STRUCT_XMM_REG __fpu_ymmh6; /* YMMH 6 */ + _STRUCT_XMM_REG __fpu_ymmh7; /* YMMH 7 */ +}; + #endif /* !__DARWIN_UNIX03 */ #if __DARWIN_UNIX03 @@ -459,6 +553,75 @@ _STRUCT_X86_FLOAT_STATE64 char __fpu_rsrv4[6*16]; /* reserved */ int __fpu_reserved1; }; + +#define _STRUCT_X86_AVX_STATE64 struct __darwin_x86_avx_state64 +_STRUCT_X86_AVX_STATE64 +{ + int __fpu_reserved[2]; + _STRUCT_FP_CONTROL __fpu_fcw; /* x87 FPU control word */ + _STRUCT_FP_STATUS __fpu_fsw; /* x87 FPU status word */ + __uint8_t __fpu_ftw; /* x87 FPU tag word */ + __uint8_t __fpu_rsrv1; /* reserved */ + __uint16_t __fpu_fop; /* x87 FPU Opcode */ + + /* x87 FPU Instruction Pointer */ + __uint32_t __fpu_ip; /* offset */ + __uint16_t __fpu_cs; /* Selector */ + + __uint16_t __fpu_rsrv2; /* reserved */ + + /* x87 FPU Instruction Operand(Data) Pointer */ + __uint32_t __fpu_dp; /* offset */ + __uint16_t __fpu_ds; /* Selector */ + + __uint16_t __fpu_rsrv3; /* reserved */ + __uint32_t __fpu_mxcsr; /* MXCSR Register state */ + __uint32_t __fpu_mxcsrmask; /* MXCSR mask */ + _STRUCT_MMST_REG __fpu_stmm0; /* ST0/MM0 */ + _STRUCT_MMST_REG __fpu_stmm1; /* ST1/MM1 */ + _STRUCT_MMST_REG __fpu_stmm2; /* ST2/MM2 */ + _STRUCT_MMST_REG __fpu_stmm3; /* ST3/MM3 */ + _STRUCT_MMST_REG __fpu_stmm4; /* ST4/MM4 */ + _STRUCT_MMST_REG __fpu_stmm5; /* ST5/MM5 */ + _STRUCT_MMST_REG __fpu_stmm6; /* ST6/MM6 */ + _STRUCT_MMST_REG __fpu_stmm7; /* ST7/MM7 */ + _STRUCT_XMM_REG __fpu_xmm0; /* XMM 0 */ + _STRUCT_XMM_REG __fpu_xmm1; /* XMM 1 */ + _STRUCT_XMM_REG __fpu_xmm2; /* XMM 2 */ + _STRUCT_XMM_REG __fpu_xmm3; /* XMM 3 */ + _STRUCT_XMM_REG __fpu_xmm4; /* XMM 4 */ + _STRUCT_XMM_REG __fpu_xmm5; /* XMM 5 */ + _STRUCT_XMM_REG __fpu_xmm6; /* XMM 6 */ + _STRUCT_XMM_REG __fpu_xmm7; /* XMM 7 */ + _STRUCT_XMM_REG __fpu_xmm8; /* XMM 8 */ + _STRUCT_XMM_REG __fpu_xmm9; /* XMM 9 */ + _STRUCT_XMM_REG __fpu_xmm10; /* XMM 10 */ + _STRUCT_XMM_REG __fpu_xmm11; /* XMM 11 */ + _STRUCT_XMM_REG __fpu_xmm12; /* XMM 12 */ + _STRUCT_XMM_REG __fpu_xmm13; /* XMM 13 */ + _STRUCT_XMM_REG __fpu_xmm14; /* XMM 14 */ + _STRUCT_XMM_REG __fpu_xmm15; /* XMM 15 */ + char __fpu_rsrv4[6*16]; /* reserved */ + int __fpu_reserved1; + char __avx_reserved1[64]; + _STRUCT_XMM_REG __fpu_ymmh0; /* YMMH 0 */ + _STRUCT_XMM_REG __fpu_ymmh1; /* YMMH 1 */ + _STRUCT_XMM_REG __fpu_ymmh2; /* YMMH 2 */ + _STRUCT_XMM_REG __fpu_ymmh3; /* YMMH 3 */ + _STRUCT_XMM_REG __fpu_ymmh4; /* YMMH 4 */ + _STRUCT_XMM_REG __fpu_ymmh5; /* YMMH 5 */ + _STRUCT_XMM_REG __fpu_ymmh6; /* YMMH 6 */ + _STRUCT_XMM_REG __fpu_ymmh7; /* YMMH 7 */ + _STRUCT_XMM_REG __fpu_ymmh8; /* YMMH 8 */ + _STRUCT_XMM_REG __fpu_ymmh9; /* YMMH 9 */ + _STRUCT_XMM_REG __fpu_ymmh10; /* YMMH 10 */ + _STRUCT_XMM_REG __fpu_ymmh11; /* YMMH 11 */ + _STRUCT_XMM_REG __fpu_ymmh12; /* YMMH 12 */ + _STRUCT_XMM_REG __fpu_ymmh13; /* YMMH 13 */ + _STRUCT_XMM_REG __fpu_ymmh14; /* YMMH 14 */ + _STRUCT_XMM_REG __fpu_ymmh15; /* YMMH 15 */ +}; + #else /* !__DARWIN_UNIX03 */ #define _STRUCT_X86_FLOAT_STATE64 struct x86_float_state64 _STRUCT_X86_FLOAT_STATE64 @@ -510,6 +673,75 @@ _STRUCT_X86_FLOAT_STATE64 char fpu_rsrv4[6*16]; /* reserved */ int fpu_reserved1; }; + +#define _STRUCT_X86_AVX_STATE64 struct x86_avx_state64 +_STRUCT_X86_AVX_STATE64 +{ + int fpu_reserved[2]; + _STRUCT_FP_CONTROL fpu_fcw; /* x87 FPU control word */ + _STRUCT_FP_STATUS fpu_fsw; /* x87 FPU status word */ + __uint8_t fpu_ftw; /* x87 FPU tag word */ + __uint8_t fpu_rsrv1; /* reserved */ + __uint16_t fpu_fop; /* x87 FPU Opcode */ + + /* x87 FPU Instruction Pointer */ + __uint32_t fpu_ip; /* offset */ + __uint16_t fpu_cs; /* Selector */ + + __uint16_t fpu_rsrv2; /* reserved */ + + /* x87 FPU Instruction Operand(Data) Pointer */ + __uint32_t fpu_dp; /* offset */ + __uint16_t fpu_ds; /* Selector */ + + __uint16_t fpu_rsrv3; /* reserved */ + __uint32_t fpu_mxcsr; /* MXCSR Register state */ + __uint32_t fpu_mxcsrmask; /* MXCSR mask */ + _STRUCT_MMST_REG fpu_stmm0; /* ST0/MM0 */ + _STRUCT_MMST_REG fpu_stmm1; /* ST1/MM1 */ + _STRUCT_MMST_REG fpu_stmm2; /* ST2/MM2 */ + _STRUCT_MMST_REG fpu_stmm3; /* ST3/MM3 */ + _STRUCT_MMST_REG fpu_stmm4; /* ST4/MM4 */ + _STRUCT_MMST_REG fpu_stmm5; /* ST5/MM5 */ + _STRUCT_MMST_REG fpu_stmm6; /* ST6/MM6 */ + _STRUCT_MMST_REG fpu_stmm7; /* ST7/MM7 */ + _STRUCT_XMM_REG fpu_xmm0; /* XMM 0 */ + _STRUCT_XMM_REG fpu_xmm1; /* XMM 1 */ + _STRUCT_XMM_REG fpu_xmm2; /* XMM 2 */ + _STRUCT_XMM_REG fpu_xmm3; /* XMM 3 */ + _STRUCT_XMM_REG fpu_xmm4; /* XMM 4 */ + _STRUCT_XMM_REG fpu_xmm5; /* XMM 5 */ + _STRUCT_XMM_REG fpu_xmm6; /* XMM 6 */ + _STRUCT_XMM_REG fpu_xmm7; /* XMM 7 */ + _STRUCT_XMM_REG fpu_xmm8; /* XMM 8 */ + _STRUCT_XMM_REG fpu_xmm9; /* XMM 9 */ + _STRUCT_XMM_REG fpu_xmm10; /* XMM 10 */ + _STRUCT_XMM_REG fpu_xmm11; /* XMM 11 */ + _STRUCT_XMM_REG fpu_xmm12; /* XMM 12 */ + _STRUCT_XMM_REG fpu_xmm13; /* XMM 13 */ + _STRUCT_XMM_REG fpu_xmm14; /* XMM 14 */ + _STRUCT_XMM_REG fpu_xmm15; /* XMM 15 */ + char fpu_rsrv4[6*16]; /* reserved */ + int fpu_reserved1; + char __avx_reserved1[64]; + _STRUCT_XMM_REG __fpu_ymmh0; /* YMMH 0 */ + _STRUCT_XMM_REG __fpu_ymmh1; /* YMMH 1 */ + _STRUCT_XMM_REG __fpu_ymmh2; /* YMMH 2 */ + _STRUCT_XMM_REG __fpu_ymmh3; /* YMMH 3 */ + _STRUCT_XMM_REG __fpu_ymmh4; /* YMMH 4 */ + _STRUCT_XMM_REG __fpu_ymmh5; /* YMMH 5 */ + _STRUCT_XMM_REG __fpu_ymmh6; /* YMMH 6 */ + _STRUCT_XMM_REG __fpu_ymmh7; /* YMMH 7 */ + _STRUCT_XMM_REG __fpu_ymmh8; /* YMMH 8 */ + _STRUCT_XMM_REG __fpu_ymmh9; /* YMMH 9 */ + _STRUCT_XMM_REG __fpu_ymmh10; /* YMMH 10 */ + _STRUCT_XMM_REG __fpu_ymmh11; /* YMMH 11 */ + _STRUCT_XMM_REG __fpu_ymmh12; /* YMMH 12 */ + _STRUCT_XMM_REG __fpu_ymmh13; /* YMMH 13 */ + _STRUCT_XMM_REG __fpu_ymmh14; /* YMMH 14 */ + _STRUCT_XMM_REG __fpu_ymmh15; /* YMMH 15 */ +}; + #endif /* !__DARWIN_UNIX03 */ #if __DARWIN_UNIX03 diff --git a/osfmk/mach/i386/fp_reg.h b/osfmk/mach/i386/fp_reg.h index 47a26a99d..92d504f3f 100644 --- a/osfmk/mach/i386/fp_reg.h +++ b/osfmk/mach/i386/fp_reg.h @@ -59,29 +59,65 @@ #ifndef _I386_FP_SAVE_H_ #define _I386_FP_SAVE_H_ +#ifdef MACH_KERNEL_PRIVATE -/* note when allocating this data structure, it must be 16 byte aligned. */ -struct x86_fx_save { - unsigned short fx_control; /* control */ - unsigned short fx_status; /* status */ - unsigned char fx_tag; /* register tags */ - unsigned char fx_bbz1; /* better be zero when calling fxrtstor */ - unsigned short fx_opcode; - unsigned int fx_eip; /* eip instruction */ - unsigned short fx_cs; /* cs instruction */ - unsigned short fx_bbz2; /* better be zero when calling fxrtstor */ - unsigned int fx_dp; /* data address */ - unsigned short fx_ds; /* data segment */ - unsigned short fx_bbz3; /* better be zero when calling fxrtstor */ - unsigned int fx_MXCSR; - unsigned int fx_MXCSR_MASK; - unsigned short fx_reg_word[8][8]; /* STx/MMx registers */ - unsigned short fx_XMM_reg[8][16]; /* XMM0-XMM15 on 64 bit processors */ + +struct x86_fx_thread_state { + unsigned short fx_control; /* control */ + unsigned short fx_status; /* status */ + unsigned char fx_tag; /* register tags */ + unsigned char fx_bbz1; /* better be zero when calling fxrtstor */ + unsigned short fx_opcode; + unsigned int fx_eip; /* eip instruction */ + unsigned short fx_cs; /* cs instruction */ + unsigned short fx_bbz2; /* better be zero when calling fxrtstor */ + unsigned int fx_dp; /* data address */ + unsigned short fx_ds; /* data segment */ + unsigned short fx_bbz3; /* better be zero when calling fxrtstor */ + unsigned int fx_MXCSR; + unsigned int fx_MXCSR_MASK; + unsigned short fx_reg_word[8][8]; /* STx/MMx registers */ + unsigned short fx_XMM_reg[8][16]; /* XMM0-XMM15 on 64 bit processors */ /* XMM0-XMM7 on 32 bit processors... unused storage reserved */ - unsigned char fx_reserved[16*6]; /* reserved by intel for future expansion */ -}; + unsigned char fx_reserved[16*5]; /* reserved by intel for future + * expansion */ + unsigned int fp_valid; + unsigned int fp_save_layout; + unsigned char fx_pad[8]; +}__attribute__ ((packed)); + +struct x86_avx_thread_state { + unsigned short fx_control; /* control */ + unsigned short fx_status; /* status */ + unsigned char fx_tag; /* register tags */ + unsigned char fx_bbz1; /* reserved zero */ + unsigned short fx_opcode; + unsigned int fx_eip; /* eip instruction */ + unsigned short fx_cs; /* cs instruction */ + unsigned short fx_bbz2; /* reserved zero */ + unsigned int fx_dp; /* data address */ + unsigned short fx_ds; /* data segment */ + unsigned short fx_bbz3; /* reserved zero */ + unsigned int fx_MXCSR; + unsigned int fx_MXCSR_MASK; + unsigned short fx_reg_word[8][8]; /* STx/MMx registers */ + unsigned short fx_XMM_reg[8][16]; /* XMM0-XMM15 on 64 bit processors */ + /* XMM0-XMM7 on 32 bit processors... unused storage reserved */ + unsigned char fx_reserved[16*5]; /* reserved */ + unsigned int fp_valid; + unsigned int fp_save_layout; + unsigned char fx_pad[8]; + + struct xsave_header { /* Offset 512, xsave header */ + uint64_t xsbv; + char xhrsvd[56]; + }_xh; + + unsigned int x_YMMH_reg[4][16]; /* Offset 576, high YMMs*/ +}__attribute__ ((packed)); +#endif /* MACH_KERNEL_PRIVATE */ /* * Control register */ diff --git a/osfmk/mach/i386/thread_status.h b/osfmk/mach/i386/thread_status.h index 558d1c071..501fc8df0 100644 --- a/osfmk/mach/i386/thread_status.h +++ b/osfmk/mach/i386/thread_status.h @@ -110,6 +110,10 @@ #define x86_DEBUG_STATE64 11 #define x86_DEBUG_STATE 12 #define THREAD_STATE_NONE 13 +/* 15 and 16 are used for the internal x86_SAVED_STATE flavours */ +#define x86_AVX_STATE32 16 +#define x86_AVX_STATE64 17 + /* * Largest state on this machine: @@ -136,6 +140,8 @@ (x == x86_FLOAT_STATE) || \ (x == x86_EXCEPTION_STATE) || \ (x == x86_DEBUG_STATE) || \ + (x == x86_AVX_STATE32) || \ + (x == x86_AVX_STATE64) || \ (x == THREAD_STATE_NONE)) struct x86_state_hdr { @@ -175,6 +181,10 @@ typedef _STRUCT_X86_FLOAT_STATE32 x86_float_state32_t; #define x86_FLOAT_STATE32_COUNT ((mach_msg_type_number_t) \ (sizeof(x86_float_state32_t)/sizeof(unsigned int))) +typedef _STRUCT_X86_AVX_STATE32 x86_avx_state32_t; +#define x86_AVX_STATE32_COUNT ((mach_msg_type_number_t) \ + (sizeof(x86_avx_state32_t)/sizeof(unsigned int))) + /* * to be deprecated in the future */ @@ -201,7 +211,11 @@ typedef _STRUCT_X86_THREAD_STATE64 x86_thread_state64_t; typedef _STRUCT_X86_FLOAT_STATE64 x86_float_state64_t; #define x86_FLOAT_STATE64_COUNT ((mach_msg_type_number_t) \ (sizeof(x86_float_state64_t)/sizeof(unsigned int))) - + +typedef _STRUCT_X86_AVX_STATE64 x86_avx_state64_t; +#define x86_AVX_STATE64_COUNT ((mach_msg_type_number_t) \ + (sizeof(x86_avx_state64_t)/sizeof(unsigned int))) + typedef _STRUCT_X86_EXCEPTION_STATE64 x86_exception_state64_t; #define x86_EXCEPTION_STATE64_COUNT ((mach_msg_type_number_t) \ ( sizeof (x86_exception_state64_t) / sizeof (int) )) diff --git a/osfmk/mach/mach_vm.defs b/osfmk/mach/mach_vm.defs index 8dbb71f66..0cd136c69 100644 --- a/osfmk/mach/mach_vm.defs +++ b/osfmk/mach/mach_vm.defs @@ -333,7 +333,7 @@ routine vm_remap( inout target_address : mach_vm_address_t; size : mach_vm_size_t; mask : mach_vm_offset_t; - anywhere : boolean_t; + flags : int; src_task : vm_map_t; src_address : mach_vm_address_t; copy : boolean_t; diff --git a/osfmk/mach/vm_map.defs b/osfmk/mach/vm_map.defs index 439c0145a..b59e795ef 100644 --- a/osfmk/mach/vm_map.defs +++ b/osfmk/mach/vm_map.defs @@ -293,7 +293,7 @@ routine vm_remap( inout target_address : vm_address_t; size : vm_size_t; mask : vm_address_t; - anywhere : boolean_t; + flags : int; src_task : vm_map_t; src_address : vm_address_t; copy : boolean_t; diff --git a/osfmk/mach/vm_statistics.h b/osfmk/mach/vm_statistics.h index 36a79fdd1..89ca4351e 100644 --- a/osfmk/mach/vm_statistics.h +++ b/osfmk/mach/vm_statistics.h @@ -252,7 +252,9 @@ typedef struct pmap_statistics *pmap_statistics_t; #define VM_FLAGS_SUBMAP 0x0800 /* mapping a VM submap */ #define VM_FLAGS_ALREADY 0x1000 /* OK if same mapping already exists */ #define VM_FLAGS_BEYOND_MAX 0x2000 /* map beyond the map's max offset */ +#endif /* KERNEL_PRIVATE */ #define VM_FLAGS_OVERWRITE 0x4000 /* delete any existing mappings first */ +#ifdef KERNEL_PRIVATE #define VM_FLAGS_NO_PMAP_CHECK 0x8000 /* do not check that pmap is empty */ #endif /* KERNEL_PRIVATE */ @@ -283,9 +285,13 @@ typedef struct pmap_statistics *pmap_statistics_t; VM_FLAGS_ANYWHERE | \ VM_FLAGS_PURGABLE | \ VM_FLAGS_NO_CACHE | \ + VM_FLAGS_OVERWRITE | \ VM_FLAGS_SUPERPAGE_MASK | \ VM_FLAGS_ALIAS_MASK) #define VM_FLAGS_USER_MAP VM_FLAGS_USER_ALLOCATE +#define VM_FLAGS_USER_REMAP (VM_FLAGS_FIXED | \ + VM_FLAGS_ANYWHERE | \ + VM_FLAGS_OVERWRITE) #define VM_MEMORY_MALLOC 1 #define VM_MEMORY_MALLOC_SMALL 2 diff --git a/osfmk/ppc/io_map.c b/osfmk/ppc/io_map.c index 25607c1cc..e30d357ce 100644 --- a/osfmk/ppc/io_map.c +++ b/osfmk/ppc/io_map.c @@ -52,7 +52,7 @@ extern vm_offset_t virtual_avail; * Note, this will onl */ vm_offset_t -io_map(vm_offset_t phys_addr, vm_size_t size, unsigned int flags) +io_map(vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags) { vm_offset_t start; vm_size_t i; @@ -75,7 +75,7 @@ io_map(vm_offset_t phys_addr, vm_size_t size, unsigned int flags) mflags, /* Map with requested cache mode */ (size >> 12), VM_PROT_READ|VM_PROT_WRITE); - return (start + (phys_addr & PAGE_MASK)); /* Pass back the physical address */ + return (start + (phys_addr & PAGE_MASK)); /* Pass back the virtual address */ } else { @@ -107,7 +107,7 @@ io_map(vm_offset_t phys_addr, vm_size_t size, unsigned int flags) * Allocate and map memory for devices before the VM system comes alive. */ -vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size, unsigned int flags) +vm_offset_t io_map_spec(vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags) { vm_offset_t start; unsigned int mflags; diff --git a/osfmk/ppc/io_map_entries.h b/osfmk/ppc/io_map_entries.h index eb0adf48a..8fceaaf86 100644 --- a/osfmk/ppc/io_map_entries.h +++ b/osfmk/ppc/io_map_entries.h @@ -35,10 +35,10 @@ #define _PPC_IO_MAP_ENTRIES_H_ extern vm_offset_t io_map( - vm_offset_t phys_addr, + vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags); -extern vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size, unsigned int flags); +extern vm_offset_t io_map_spec(vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags); #endif /* _PPC_IO_MAP_ENTRIES_H_ */ diff --git a/osfmk/ppc/machine_routines.c b/osfmk/ppc/machine_routines.c index bc79f0c7c..d4fb8e1ca 100644 --- a/osfmk/ppc/machine_routines.c +++ b/osfmk/ppc/machine_routines.c @@ -841,3 +841,7 @@ vm_offset_t ml_stack_remaining(void) return (local - current_thread()->kernel_stack); } } + +boolean_t machine_timeout_suspended(void) { + return FALSE; +} diff --git a/osfmk/ppc/machine_routines.h b/osfmk/ppc/machine_routines.h index 3fcaf77ef..47b12432d 100644 --- a/osfmk/ppc/machine_routines.h +++ b/osfmk/ppc/machine_routines.h @@ -261,6 +261,7 @@ extern void ml_init_lock_timeout( void ml_ppc_do_sleep(void); +boolean_t machine_timeout_suspended(void); #endif /* MACH_KERNEL_PRIVATE */ #endif /* XNU_KERNEL_PRIVATE */ diff --git a/osfmk/vm/vm_map.c b/osfmk/vm/vm_map.c index 64ef466b6..68fece885 100644 --- a/osfmk/vm/vm_map.c +++ b/osfmk/vm/vm_map.c @@ -253,7 +253,7 @@ static kern_return_t vm_map_remap_range_allocate( vm_map_address_t *address, vm_map_size_t size, vm_map_offset_t mask, - boolean_t anywhere, + int flags, vm_map_entry_t *map_entry); static void vm_map_region_look_for_page( @@ -11155,7 +11155,7 @@ vm_map_remap( vm_map_address_t *address, vm_map_size_t size, vm_map_offset_t mask, - boolean_t anywhere, + int flags, vm_map_t src_map, vm_map_offset_t memory_address, boolean_t copy, @@ -11204,7 +11204,7 @@ vm_map_remap( *address = vm_map_trunc_page(*address); vm_map_lock(target_map); result = vm_map_remap_range_allocate(target_map, address, size, - mask, anywhere, &insp_entry); + mask, flags, &insp_entry); for (entry = map_header.links.next; entry != (struct vm_map_entry *)&map_header.links; @@ -11255,18 +11255,19 @@ vm_map_remap_range_allocate( vm_map_address_t *address, /* IN/OUT */ vm_map_size_t size, vm_map_offset_t mask, - boolean_t anywhere, + int flags, vm_map_entry_t *map_entry) /* OUT */ { - register vm_map_entry_t entry; - register vm_map_offset_t start; - register vm_map_offset_t end; + vm_map_entry_t entry; + vm_map_offset_t start; + vm_map_offset_t end; + kern_return_t kr; StartAgain: ; start = *address; - if (anywhere) + if (flags & VM_FLAGS_ANYWHERE) { /* * Calculate the first possible address. @@ -11379,6 +11380,37 @@ StartAgain: ; return(KERN_INVALID_ADDRESS); } + /* + * If we're asked to overwrite whatever was mapped in that + * range, first deallocate that range. + */ + if (flags & VM_FLAGS_OVERWRITE) { + vm_map_t zap_map; + + /* + * We use a "zap_map" to avoid having to unlock + * the "map" in vm_map_delete(), which would compromise + * the atomicity of the "deallocate" and then "remap" + * combination. + */ + zap_map = vm_map_create(PMAP_NULL, + start, + end - start, + map->hdr.entries_pageable); + if (zap_map == VM_MAP_NULL) { + return KERN_RESOURCE_SHORTAGE; + } + + kr = vm_map_delete(map, start, end, + VM_MAP_REMOVE_SAVE_ENTRIES, + zap_map); + if (kr == KERN_SUCCESS) { + vm_map_destroy(zap_map, + VM_MAP_REMOVE_NO_PMAP_CLEANUP); + zap_map = VM_MAP_NULL; + } + } + /* * ... the starting address isn't allocated */ diff --git a/osfmk/vm/vm_map.h b/osfmk/vm/vm_map.h index 09eaa7473..dd39abb5c 100644 --- a/osfmk/vm/vm_map.h +++ b/osfmk/vm/vm_map.h @@ -690,7 +690,7 @@ extern kern_return_t vm_map_remap( vm_map_offset_t *address, vm_map_size_t size, vm_map_offset_t mask, - boolean_t anywhere, + int flags, vm_map_t src_map, vm_map_offset_t memory_address, boolean_t copy, diff --git a/osfmk/vm/vm_resident.c b/osfmk/vm/vm_resident.c index dae49ac1a..979c81624 100644 --- a/osfmk/vm/vm_resident.c +++ b/osfmk/vm/vm_resident.c @@ -1708,7 +1708,6 @@ vm_page_grablo(void) vm_lopages_allocated_cpm_success++; vm_page_unlock_queues(); } - assert(mem->gobbled); assert(mem->busy); assert(!mem->free); assert(!mem->pmapped); diff --git a/osfmk/vm/vm_user.c b/osfmk/vm/vm_user.c index 59c26ff70..582e51fc0 100644 --- a/osfmk/vm/vm_user.c +++ b/osfmk/vm/vm_user.c @@ -977,7 +977,7 @@ mach_vm_remap( mach_vm_offset_t *address, mach_vm_size_t size, mach_vm_offset_t mask, - boolean_t anywhere, + int flags, vm_map_t src_map, mach_vm_offset_t memory_address, boolean_t copy, @@ -991,13 +991,17 @@ mach_vm_remap( if (VM_MAP_NULL == target_map || VM_MAP_NULL == src_map) return KERN_INVALID_ARGUMENT; + /* filter out any kernel-only flags */ + if (flags & ~VM_FLAGS_USER_REMAP) + return KERN_INVALID_ARGUMENT; + map_addr = (vm_map_offset_t)*address; kr = vm_map_remap(target_map, &map_addr, size, mask, - anywhere, + flags, src_map, memory_address, copy, @@ -1025,7 +1029,7 @@ vm_remap( vm_offset_t *address, vm_size_t size, vm_offset_t mask, - boolean_t anywhere, + int flags, vm_map_t src_map, vm_offset_t memory_address, boolean_t copy, @@ -1039,13 +1043,17 @@ vm_remap( if (VM_MAP_NULL == target_map || VM_MAP_NULL == src_map) return KERN_INVALID_ARGUMENT; + /* filter out any kernel-only flags */ + if (flags & ~VM_FLAGS_USER_REMAP) + return KERN_INVALID_ARGUMENT; + map_addr = (vm_map_offset_t)*address; kr = vm_map_remap(target_map, &map_addr, size, mask, - anywhere, + flags, src_map, memory_address, copy, diff --git a/osfmk/x86_64/idt64.s b/osfmk/x86_64/idt64.s index 0f96dcd72..a4ca1cecd 100644 --- a/osfmk/x86_64/idt64.s +++ b/osfmk/x86_64/idt64.s @@ -292,8 +292,8 @@ L_dispatch: swapgs - cmpl $(USER_CS), ISF64_CS(%rsp) - je L_32bit_dispatch /* 32-bit user task */ + cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP + je L_32bit_dispatch /* 32-bit user task */ /* fall through to 64bit user dispatch */ /* @@ -1104,7 +1104,7 @@ Entry(hndl_allintrs) leaq -INTSTACK_SIZE(%rcx),%rdx cmpq %rsp,%rdx jb int_from_intstack -1: +1: xchgq %rcx,%rsp /* switch to interrupt stack */ mov %cr0,%rax /* get cr0 */ @@ -1208,13 +1208,13 @@ LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */ int_from_intstack: incl %gs:CPU_PREEMPTION_LEVEL incl %gs:CPU_INTERRUPT_LEVEL - + incl %gs:CPU_NESTED_ISTACK mov %rsp, %rdi /* x86_saved_state */ CCALL(interrupt) decl %gs:CPU_INTERRUPT_LEVEL decl %gs:CPU_PREEMPTION_LEVEL - + decl %gs:CPU_NESTED_ISTACK #if DEBUG_IDT64 CCALL1(panic_idt64, %rsp) POSTCODE2(0x6411) @@ -1352,15 +1352,16 @@ Entry(hndl_diag_scall) movq ACT_TASK(%rcx),%rbx /* point to current task */ TASK_VTIMER_CHECK(%rbx,%rcx) - pushq %rdi /* push pcb stack so we can pop it later */ + pushq %rdi /* push pcb stack */ - CCALL(diagCall) // Call diagnostics - cli // Disable interruptions just in case they were enabled - popq %rsp // Get back the original stack - + CCALL(diagCall) // Call diagnostics + + cli // Disable interruptions just in case cmpl $0,%eax // What kind of return is this? - jne EXT(return_to_user) // Normal return, do not check asts... - + je 1f // - branch if bad (zero) + popq %rsp // Get back the original stack + jmp EXT(return_to_user) // Normal return, do not check asts... +1: CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1) // pass what would be the diag syscall // error return - cause an exception @@ -1441,14 +1442,16 @@ Entry(hndl_mdep_scall64) Entry(hndl_diag_scall64) - pushq %rdi // Push the previous stack - CCALL(diagCall64) // Call diagnostics - cli // Disable interruptions just in case - popq %rsp // Get back the original stack - - cmpl $0,%eax // What kind of return is this? - jne EXT(return_to_user) // Normal return, do not check asts... - + pushq %rdi // Push the previous stack + + CCALL(diagCall64) // Call diagnostics + + cli // Disable interruptions just in case + cmpl $0,%eax // What kind of return is this? + je 1f // - branch if bad (zero) + popq %rsp // Get back the original stack + jmp EXT(return_to_user) // Normal return, do not check asts... +1: CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1) /* no return */ diff --git a/osfmk/x86_64/pmap.c b/osfmk/x86_64/pmap.c index a7ad2bb9d..a8c8cbde4 100644 --- a/osfmk/x86_64/pmap.c +++ b/osfmk/x86_64/pmap.c @@ -212,8 +212,6 @@ int pv_hashed_kern_free_count = 0; zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry structures */ -static zone_t pdpt_zone; - /* * Each entry in the pv_head_table is locked by a bit in the * pv_lock_table. The lock bits are accessed by the physical @@ -644,7 +642,7 @@ pmap_init(void) { long npages; vm_offset_t addr; - vm_size_t s; + vm_size_t s, vsize; vm_map_offset_t vaddr; ppnum_t ppn; @@ -683,6 +681,9 @@ pmap_init(void) memset((char *)addr, 0, s); + vaddr = addr; + vsize = s; + #if PV_DEBUG if (0 == npvhash) panic("npvhash not initialized"); #endif @@ -714,23 +715,37 @@ pmap_init(void) for (pn = pmptr->base; pn <= pmptr->end; pn++) { if (pn < last_pn) { pmap_phys_attributes[pn] |= PHYS_MANAGED; + if (pn > last_managed_page) last_managed_page = pn; + + if (pn < lowest_lo) + pmap_phys_attributes[pn] |= PHYS_NOENCRYPT; + else if (pn >= lowest_hi && pn <= highest_hi) + pmap_phys_attributes[pn] |= PHYS_NOENCRYPT; + } } } + while (vsize) { + ppn = pmap_find_phys(kernel_pmap, vaddr); + pmap_phys_attributes[ppn] |= PHYS_NOENCRYPT; + + vaddr += PAGE_SIZE; + vsize -= PAGE_SIZE; + } /* * Create the zone of physical maps, * and of the physical-to-virtual entries. */ s = (vm_size_t) sizeof(struct pmap); pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */ + zone_change(pmap_zone, Z_NOENCRYPT, TRUE); + s = (vm_size_t) sizeof(struct pv_hashed_entry); pv_hashed_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */ - s = 63; - pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */ - + zone_change(pv_hashed_list_zone, Z_NOENCRYPT, TRUE); /* create pv entries for kernel pages mapped by low level startup code. these have to exist so we can pmap_remove() @@ -2119,6 +2134,8 @@ pt_fake_zone_info( *exhaustable = 0; } +extern long NMIPI_acks; + static inline void pmap_cpuset_NMIPI(cpu_set cpu_mask) { unsigned int cpu, cpu_bit; @@ -2221,17 +2238,7 @@ pmap_flush_tlbs(pmap_t pmap) * Wait for those other cpus to acknowledge */ while (cpus_to_respond != 0) { - if (mach_absolute_time() > deadline) { - if (mp_recent_debugger_activity()) - continue; - if (!panic_active()) { - pmap_tlb_flush_timeout = TRUE; - pmap_cpuset_NMIPI(cpus_to_respond); - } - panic("pmap_flush_tlbs() timeout: " - "cpu(s) failing to respond to interrupts, pmap=%p cpus_to_respond=0x%lx", - pmap, cpus_to_respond); - } + long orig_acks = 0; for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { if ((cpus_to_respond & cpu_bit) != 0) { @@ -2245,6 +2252,17 @@ pmap_flush_tlbs(pmap_t pmap) if (cpus_to_respond == 0) break; } + if (mach_absolute_time() > deadline) { + if (machine_timeout_suspended()) + continue; + pmap_tlb_flush_timeout = TRUE; + orig_acks = NMIPI_acks; + pmap_cpuset_NMIPI(cpus_to_respond); + + panic("TLB invalidation IPI timeout: " + "CPU(s) failed to respond to interrupts, unresponsive CPU bitmap: 0x%lx, NMIPI acks: orig: 0x%lx, now: 0x%lx", + cpus_to_respond, orig_acks, NMIPI_acks); + } } } diff --git a/pexpert/i386/pe_init.c b/pexpert/i386/pe_init.c index 5a8fa5eff..7b37f5eb2 100644 --- a/pexpert/i386/pe_init.c +++ b/pexpert/i386/pe_init.c @@ -75,12 +75,16 @@ int PE_initialize_console( PE_Video * info, int op ) case kPEEnableScreen: initialize_screen(info, op); - if (info) PE_state.video = *info; + if (info) PE_state.video = *info; kprintf("kPEEnableScreen %d\n", last_console); if( last_console != -1) switch_to_old_console( last_console); break; + case kPEBaseAddressChange: + if (info) PE_state.video = *info; + /* fall thru */ + default: initialize_screen(info, op); break; diff --git a/pexpert/pexpert/i386/efi.h b/pexpert/pexpert/i386/efi.h index 5ef7a5bf6..08ff10f84 100644 --- a/pexpert/pexpert/i386/efi.h +++ b/pexpert/pexpert/i386/efi.h @@ -230,7 +230,7 @@ typedef struct { // range requires a runtime mapping // #define EFI_MEMORY_RUNTIME 0x8000000000000000ULL - +#define EFI_MEMORY_KERN_RESERVED (1ULL << 59) typedef EFI_UINT64 EFI_PHYSICAL_ADDRESS; typedef EFI_UINT64 EFI_VIRTUAL_ADDRESS; diff --git a/pexpert/pexpert/pexpert.h b/pexpert/pexpert/pexpert.h index c9adfe5eb..f3a539af8 100644 --- a/pexpert/pexpert/pexpert.h +++ b/pexpert/pexpert/pexpert.h @@ -185,7 +185,11 @@ struct PE_Video { unsigned char v_rotate; /* Rotation: 0:normal, 1:right 90, 2:left 180, 3:left 90 */ unsigned char v_scale; /* Scale Factor for both X & Y */ char reserved1[2]; +#ifdef __LP64__ long reserved2; +#else + long v_baseAddrHigh; +#endif }; typedef struct PE_Video PE_Video; @@ -211,6 +215,7 @@ extern int PE_initialize_console( #define kPEReleaseScreen 5 #define kPEEnableScreen 6 #define kPEDisableScreen 7 +#define kPEBaseAddressChange 8 extern void PE_display_icon( unsigned int flags, const char * name ); -- 2.45.2