]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/i386/commpage/commpage.c
xnu-1699.24.23.tar.gz
[apple/xnu.git] / osfmk / i386 / commpage / commpage.c
index 328e095ab65b6a0a38a5389f30fd364bcd778b49..cc52576c5bf3ebfc0a5452ad05f324065f5802c2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2003-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <mach/mach_types.h>
 #include <mach/machine.h>
 #include <mach/vm_map.h>
+#include <mach/mach_vm.h>
+#include <mach/machine.h>
+#include <i386/cpuid.h>
+#include <i386/tsc.h>
+#include <i386/rtclock_protos.h>
+#include <i386/cpu_data.h>
 #include <i386/machine_routines.h>
 #include <i386/misc_protos.h>
+#include <i386/cpuid.h>
 #include <machine/cpu_capabilities.h>
 #include <machine/commpage.h>
 #include <machine/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
+
 #include <ipc/ipc_port.h>
 
 #include <kern/page_decrypt.h>
+#include <kern/processor.h>
 
 /* the lists of commpage routines are in commpage_asm.s  */
 extern commpage_descriptor*    commpage_32_routines[];
 extern commpage_descriptor*    commpage_64_routines[];
 
-/* translated commpage descriptors from commpage_sigs.c  */
-extern commpage_descriptor sigdata_descriptor;
-extern commpage_descriptor *ba_descriptors[];
-
-extern vm_map_t        com_region_map32;       // the shared submap, set up in vm init
-extern vm_map_t        com_region_map64;       // the shared submap, set up in vm init
+extern vm_map_t        commpage32_map; // the shared submap, set up in vm init
+extern vm_map_t        commpage64_map; // the shared submap, set up in vm init
 
 char   *commPagePtr32 = NULL;          // virtual addr in kernel map of 32-bit commpage
 char   *commPagePtr64 = NULL;          // ...and of 64-bit commpage
-int     _cpu_capabilities = 0;          // define the capability vector
+uint32_t     _cpu_capabilities = 0;          // define the capability vector
 
 int    noVMX = 0;              /* if true, do not set kHasAltivec in ppc _cpu_capabilities */
 
-void*  dsmos_blobs[3];         /* ptrs to the system integrity data in each commpage */
-int    dsmos_blob_count = 0;
+typedef uint32_t commpage_address_t;
 
-static uintptr_t next;                 // next available byte in comm page
-static int             cur_routine;            // comm page address of "current" routine
-static int             matched;                // true if we've found a match for "current" routine
+static commpage_address_t      next;                   // next available address in comm page
+static commpage_address_t      cur_routine;            // comm page address of "current" routine
+static boolean_t               matched;                // true if we've found a match for "current" routine
 
 static char    *commPagePtr;           // virtual addr in kernel map of commpage we are working on
-static size_t  commPageBaseOffset;     // add to 32-bit runtime address to get offset in commpage
+static commpage_address_t      commPageBaseOffset; // subtract from 32-bit runtime address to get offset in virtual commpage in kernel map
+
+static commpage_time_data      *time_data32 = NULL;
+static commpage_time_data      *time_data64 = NULL;
+
+decl_simple_lock_data(static,commpage_active_cpus_lock);
 
 /* Allocate the commpage and add to the shared submap created by vm:
  *     1. allocate a page in the kernel map (RW)
@@ -95,10 +104,10 @@ static size_t      commPageBaseOffset;     // add to 32-bit runtime address to get offset
 
 static  void*
 commpage_allocate( 
-       vm_map_t        submap,                 // com_region_map32 or com_region_map64
+       vm_map_t        submap,                 // commpage32_map or commpage_map64
        size_t          area_used )             // _COMM_PAGE32_AREA_USED or _COMM_PAGE64_AREA_USED
 {
-       vm_offset_t     kernel_addr;            // address of commpage in kernel map
+       vm_offset_t     kernel_addr = 0;        // address of commpage in kernel map
        vm_offset_t     zero = 0;
        vm_size_t       size = area_used;       // size actually populated
        vm_map_entry_t  entry;
@@ -107,7 +116,7 @@ commpage_allocate(
        if (submap == NULL)
                panic("commpage submap is null");
 
-       if (vm_allocate(kernel_map,&kernel_addr,area_used,VM_FLAGS_ANYWHERE))
+       if (vm_map(kernel_map,&kernel_addr,area_used,0,VM_FLAGS_ANYWHERE,NULL,0,FALSE,VM_PROT_ALL,VM_PROT_ALL,VM_INHERIT_NONE))
                panic("cannot allocate commpage");
 
        if (vm_map_wire(kernel_map,kernel_addr,kernel_addr+area_used,VM_PROT_DEFAULT,FALSE))
@@ -128,7 +137,7 @@ commpage_allocate(
        if (mach_make_memory_entry( kernel_map,         // target map
                                    &size,              // size 
                                    kernel_addr,        // offset (address in kernel map)
-                                   VM_PROT_DEFAULT,    // map it RW
+                                   VM_PROT_ALL,        // map it RWX
                                    &handle,            // this is the object handle we get
                                    NULL ))             // parent_entry (what is this?)
                panic("cannot make entry for commpage");
@@ -141,23 +150,30 @@ commpage_allocate(
                        handle,                         // port is the memory entry we just made
                        0,                              // offset (map 1st page in memory entry)
                        FALSE,                          // copy
-                       VM_PROT_READ,                   // cur_protection (R-only in user map)
-                       VM_PROT_READ,                   // max_protection
+                       VM_PROT_READ|VM_PROT_EXECUTE,   // cur_protection (R-only in user map)
+                       VM_PROT_READ|VM_PROT_EXECUTE,   // max_protection
                        VM_INHERIT_SHARE ))             // inheritance
                panic("cannot map commpage");
 
        ipc_port_release(handle);
-
-       return (void*) kernel_addr;                     // return address in kernel map
+       
+       // Initialize the text section of the commpage with INT3
+       char *commpage_ptr = (char*)(intptr_t)kernel_addr;
+       vm_size_t i;
+       for( i = _COMM_PAGE_TEXT_START - _COMM_PAGE_START_ADDRESS; i < size; i++ )
+               // This is the hex for the X86 opcode INT3
+               commpage_ptr[i] = 0xCC;
+
+       return (void*)(intptr_t)kernel_addr;                     // return address in kernel map
 }
 
 /* Get address (in kernel map) of a commpage field. */
 
 static void*
 commpage_addr_of(
-    int     addr_at_runtime )
+    commpage_address_t     addr_at_runtime )
 {
-    return  (void*) ((uintptr_t)commPagePtr + addr_at_runtime - commPageBaseOffset);
+       return  (void*) ((uintptr_t)commPagePtr + (addr_at_runtime - commPageBaseOffset));
 }
 
 /* Determine number of CPUs on this system.  We cannot rely on
@@ -183,7 +199,7 @@ commpage_cpus( void )
 static void
 commpage_init_cpu_capabilities( void )
 {
-       int bits;
+       uint32_t bits;
        int cpus;
        ml_cpu_info_t cpu_info;
 
@@ -191,6 +207,15 @@ commpage_init_cpu_capabilities( void )
        ml_cpu_get_info(&cpu_info);
        
        switch (cpu_info.vector_unit) {
+               case 9:
+                       bits |= kHasAVX1_0;
+                       /* fall thru */
+               case 8:
+                       bits |= kHasSSE4_2;
+                       /* fall thru */
+               case 7:
+                       bits |= kHasSSE4_1;
+                       /* fall thru */
                case 6:
                        bits |= kHasSupplementalSSE3;
                        /* fall thru */
@@ -233,11 +258,17 @@ commpage_init_cpu_capabilities( void )
        if (cpu_mode_is64bit())                 // k64Bit means processor is 64-bit capable
                bits |= k64Bit;
 
+       if (tscFreq <= SLOW_TSC_THRESHOLD)      /* is TSC too slow for _commpage_nanotime?  */
+               bits |= kSlow;
+
+       if (cpuid_features() & CPUID_FEATURE_AES)
+               bits |= kHasAES;
+
        _cpu_capabilities = bits;               // set kernel version for use by drivers etc
 }
 
 int
-_get_cpu_capabilities()
+_get_cpu_capabilities(void)
 {
        return _cpu_capabilities;
 }
@@ -246,53 +277,18 @@ _get_cpu_capabilities()
 
 static void
 commpage_stuff(
-    int        address,
+    commpage_address_t         address,
     const void         *source,
     int        length  )
 {    
     void       *dest = commpage_addr_of(address);
     
-    if ((uintptr_t)dest < next)
-        panic("commpage overlap at address 0x%x, 0x%x < 0x%x", address, dest, next);
+    if (address < next)
+       panic("commpage overlap at address 0x%p, 0x%x < 0x%x", dest, address, next);
     
     bcopy(source,dest,length);
     
-    next = ((uintptr_t)dest + length);
-}
-
-static void
-commpage_stuff_swap(
-       int     address,
-       void    *source,
-       int     length,
-       int     legacy )
-{
-       if ( legacy ) {
-               void *dest = commpage_addr_of(address);
-               dest = (void *)((uintptr_t) dest + _COMM_PAGE_SIGS_OFFSET);
-               switch (length) {
-                       case 2:
-                               OSWriteSwapInt16(dest, 0, *(uint16_t *)source);
-                               break;
-                       case 4:
-                               OSWriteSwapInt32(dest, 0, *(uint32_t *)source);
-                               break;
-                       case 8:
-                               OSWriteSwapInt64(dest, 0, *(uint64_t *)source);
-                               break;
-               }
-       }
-}
-
-static void
-commpage_stuff2(
-       int     address,
-       void    *source,
-       int     length,
-       int     legacy )
-{
-       commpage_stuff_swap(address, source, length, legacy);
-       commpage_stuff(address, source, length);
+    next = address + length;
 }
 
 /* Copy a routine into comm page if it matches running machine.
@@ -301,7 +297,7 @@ static void
 commpage_stuff_routine(
     commpage_descriptor        *rd     )
 {
-    int                must,cant;
+    uint32_t           must,cant;
     
     if (rd->commpage_address != cur_routine) {
         if ((cur_routine!=0) && (matched==0))
@@ -323,65 +319,62 @@ commpage_stuff_routine(
 }
 
 /* Fill in the 32- or 64-bit commpage.  Called once for each.
- * The 32-bit ("legacy") commpage has a bunch of stuff added to it
- * for translated processes, some of which is byte-swapped.
  */
 
 static void
 commpage_populate_one( 
-       vm_map_t        submap,         // com_region_map32 or com_region_map64
+       vm_map_t        submap,         // commpage32_map or compage64_map
        char **         kernAddressPtr, // &commPagePtr32 or &commPagePtr64
        size_t          area_used,      // _COMM_PAGE32_AREA_USED or _COMM_PAGE64_AREA_USED
-       size_t          base_offset,    // will become commPageBaseOffset
+       commpage_address_t base_offset, // will become commPageBaseOffset
        commpage_descriptor** commpage_routines, // list of routine ptrs for this commpage
-       boolean_t       legacy,         // true if 32-bit commpage
+       commpage_time_data** time_data, // &time_data32 or &time_data64
        const char*     signature )     // "commpage 32-bit" or "commpage 64-bit"
 {
+       uint8_t c1;
        short   c2;
-       static double   two52 = 1048576.0 * 1048576.0 * 4096.0; // 2**52
-       static double   ten6 = 1000000.0;                       // 10**6
+       int         c4;
+       uint64_t c8;
+       uint32_t        cfamily;
        commpage_descriptor **rd;
        short   version = _COMM_PAGE_THIS_VERSION;
-       int             swapcaps;
 
-       next = (uintptr_t) NULL;
+       next = 0;
        cur_routine = 0;
        commPagePtr = (char *)commpage_allocate( submap, (vm_size_t) area_used );
        *kernAddressPtr = commPagePtr;                          // save address either in commPagePtr32 or 64
        commPageBaseOffset = base_offset;
 
+       *time_data = commpage_addr_of( _COMM_PAGE_TIME_DATA_START );
+
        /* Stuff in the constants.  We move things into the comm page in strictly
        * ascending order, so we can check for overlap and panic if so.
        */
-       commpage_stuff(_COMM_PAGE_SIGNATURE,signature,strlen(signature));
-       commpage_stuff2(_COMM_PAGE_VERSION,&version,sizeof(short),legacy);
+       commpage_stuff(_COMM_PAGE_SIGNATURE,signature,(int)strlen(signature));
+       commpage_stuff(_COMM_PAGE_VERSION,&version,sizeof(short));
        commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES,&_cpu_capabilities,sizeof(int));
 
-       /* excuse our magic constants, we cannot include ppc/cpu_capabilities.h */
-       /* always set kCache32 and kDcbaAvailable */
-       swapcaps =  0x44;
-       if ( _cpu_capabilities & kUP )
-               swapcaps |= (kUP + (1 << kNumCPUsShift));
-       else
-               swapcaps |= 2 << kNumCPUsShift; /* limit #cpus to 2 */
-       if ( ! noVMX )          /* if rosetta will be emulating altivec... */
-               swapcaps |= 0x101;      /* ...then set kHasAltivec and kDataStreamsAvailable too */
-       commpage_stuff_swap(_COMM_PAGE_CPU_CAPABILITIES, &swapcaps, sizeof(int), legacy);
-       c2 = 32;
-       commpage_stuff_swap(_COMM_PAGE_CACHE_LINESIZE,&c2,2,legacy);
-
-       if (_cpu_capabilities & kCache32)
-               c2 = 32;
-       else if (_cpu_capabilities & kCache64)
+       c2 = 32;  // default
+       if (_cpu_capabilities & kCache64)
                c2 = 64;
        else if (_cpu_capabilities & kCache128)
                c2 = 128;
        commpage_stuff(_COMM_PAGE_CACHE_LINESIZE,&c2,2);
+       
+       c4 = MP_SPIN_TRIES;
+       commpage_stuff(_COMM_PAGE_SPIN_COUNT,&c4,4);
 
-       if ( legacy ) {
-               commpage_stuff2(_COMM_PAGE_2_TO_52,&two52,8,legacy);
-               commpage_stuff2(_COMM_PAGE_10_TO_6,&ten6,8,legacy);
-       }
+       /* machine_info valid after ml_get_max_cpus() */
+       c1 = machine_info.physical_cpu_max;
+       commpage_stuff(_COMM_PAGE_PHYSICAL_CPUS,&c1,1);
+       c1 = machine_info.logical_cpu_max;
+       commpage_stuff(_COMM_PAGE_LOGICAL_CPUS,&c1,1);
+
+       c8 = ml_cpu_cache_size(0);
+       commpage_stuff(_COMM_PAGE_MEMORY_SIZE, &c8, 8);
+
+       cfamily = cpuid_info()->cpuid_cpufamily;
+       commpage_stuff(_COMM_PAGE_CPUFAMILY, &cfamily, 4);
 
        for( rd = commpage_routines; *rd != NULL ; rd++ )
                commpage_stuff_routine(*rd);
@@ -389,21 +382,9 @@ commpage_populate_one(
        if (!matched)
                panic("commpage no match on last routine");
 
-       if (next > (uintptr_t)_COMM_PAGE_END)
-               panic("commpage overflow: next = 0x%08x, commPagePtr = 0x%08x", next, (uintptr_t)commPagePtr);
+       if (next > _COMM_PAGE_END)
+               panic("commpage overflow: next = 0x%08x, commPagePtr = 0x%p", next, commPagePtr);
 
-       if ( legacy ) {
-               next = (uintptr_t) NULL;
-               for( rd = ba_descriptors; *rd != NULL ; rd++ )
-                       commpage_stuff_routine(*rd);
-
-               next = (uintptr_t) NULL;
-               commpage_stuff_routine(&sigdata_descriptor);
-       }       
-
-       /* salt away a ptr to the system integrity data in this commpage */
-       dsmos_blobs[dsmos_blob_count++] = 
-               commpage_addr_of( _COMM_PAGE_SYSTEM_INTEGRITY );
 }
 
 
@@ -419,27 +400,241 @@ commpage_populate( void )
 {
        commpage_init_cpu_capabilities();
        
-       commpage_populate_one(  com_region_map32
+       commpage_populate_one(  commpage32_map
                                &commPagePtr32,
                                _COMM_PAGE32_AREA_USED,
                                _COMM_PAGE32_BASE_ADDRESS,
                                commpage_32_routines, 
-                               TRUE,                   /* legacy (32-bit) commpage */
+                               &time_data32,
                                "commpage 32-bit");
+#ifndef __LP64__
        pmap_commpage32_init((vm_offset_t) commPagePtr32, _COMM_PAGE32_BASE_ADDRESS, 
                           _COMM_PAGE32_AREA_USED/INTEL_PGBYTES);
+#endif                    
+       time_data64 = time_data32;                      /* if no 64-bit commpage, point to 32-bit */
 
        if (_cpu_capabilities & k64Bit) {
-               commpage_populate_one(  com_region_map64
+               commpage_populate_one(  commpage64_map
                                        &commPagePtr64,
                                        _COMM_PAGE64_AREA_USED,
-                                       _COMM_PAGE32_START_ADDRESS, /* because kernel is built 32-bit */
+                                       _COMM_PAGE32_START_ADDRESS, /* commpage address are relative to 32-bit commpage placement */
                                        commpage_64_routines, 
-                                       FALSE,          /* not a legacy commpage */
+                                       &time_data64,
                                        "commpage 64-bit");
+#ifndef __LP64__
                pmap_commpage64_init((vm_offset_t) commPagePtr64, _COMM_PAGE64_BASE_ADDRESS, 
                                   _COMM_PAGE64_AREA_USED/INTEL_PGBYTES);
+#endif
        }
 
+       simple_lock_init(&commpage_active_cpus_lock, 0);
+
+       commpage_update_active_cpus();
        rtc_nanotime_init_commpage();
 }
+
+
+/* Update commpage nanotime information.  Note that we interleave
+ * setting the 32- and 64-bit commpages, in order to keep nanotime more
+ * nearly in sync between the two environments.
+ *
+ * This routine must be serialized by some external means, ie a lock.
+ */
+
+void
+commpage_set_nanotime(
+       uint64_t        tsc_base,
+       uint64_t        ns_base,
+       uint32_t        scale,
+       uint32_t        shift )
+{
+       commpage_time_data      *p32 = time_data32;
+       commpage_time_data      *p64 = time_data64;
+       static uint32_t generation = 0;
+       uint32_t        next_gen;
+       
+       if (p32 == NULL)                /* have commpages been allocated yet? */
+               return;
+               
+       if ( generation != p32->nt_generation )
+               panic("nanotime trouble 1");    /* possibly not serialized */
+       if ( ns_base < p32->nt_ns_base )
+               panic("nanotime trouble 2");
+       if ((shift != 32) && ((_cpu_capabilities & kSlow)==0) )
+               panic("nanotime trouble 3");
+               
+       next_gen = ++generation;
+       if (next_gen == 0)
+               next_gen = ++generation;
+       
+       p32->nt_generation = 0;         /* mark invalid, so commpage won't try to use it */
+       p64->nt_generation = 0;
+       
+       p32->nt_tsc_base = tsc_base;
+       p64->nt_tsc_base = tsc_base;
+       
+       p32->nt_ns_base = ns_base;
+       p64->nt_ns_base = ns_base;
+       
+       p32->nt_scale = scale;
+       p64->nt_scale = scale;
+       
+       p32->nt_shift = shift;
+       p64->nt_shift = shift;
+       
+       p32->nt_generation = next_gen;  /* mark data as valid */
+       p64->nt_generation = next_gen;
+}
+
+
+/* Disable commpage gettimeofday(), forcing commpage to call through to the kernel.  */
+
+void
+commpage_disable_timestamp( void )
+{
+       time_data32->gtod_generation = 0;
+       time_data64->gtod_generation = 0;
+}
+
+
+/* Update commpage gettimeofday() information.  As with nanotime(), we interleave
+ * updates to the 32- and 64-bit commpage, in order to keep time more nearly in sync 
+ * between the two environments.
+ *
+ * This routine must be serializeed by some external means, ie a lock.
+ */
+ void
+ commpage_set_timestamp(
+       uint64_t        abstime,
+       uint64_t        secs )
+{
+       commpage_time_data      *p32 = time_data32;
+       commpage_time_data      *p64 = time_data64;
+       static uint32_t generation = 0;
+       uint32_t        next_gen;
+       
+       next_gen = ++generation;
+       if (next_gen == 0)
+               next_gen = ++generation;
+       
+       p32->gtod_generation = 0;               /* mark invalid, so commpage won't try to use it */
+       p64->gtod_generation = 0;
+       
+       p32->gtod_ns_base = abstime;
+       p64->gtod_ns_base = abstime;
+       
+       p32->gtod_sec_base = secs;
+       p64->gtod_sec_base = secs;
+       
+       p32->gtod_generation = next_gen;        /* mark data as valid */
+       p64->gtod_generation = next_gen;
+}
+
+
+/* Update _COMM_PAGE_MEMORY_PRESSURE.  Called periodically from vm's compute_memory_pressure()  */
+
+void
+commpage_set_memory_pressure(
+       unsigned int    pressure )
+{
+       char        *cp;
+       uint32_t    *ip;
+       
+       cp = commPagePtr32;
+       if ( cp ) {
+               cp += (_COMM_PAGE_MEMORY_PRESSURE - _COMM_PAGE32_BASE_ADDRESS);
+               ip = (uint32_t*) cp;
+               *ip = (uint32_t) pressure;
+       }
+       
+       cp = commPagePtr64;
+       if ( cp ) {
+               cp += (_COMM_PAGE_MEMORY_PRESSURE - _COMM_PAGE32_START_ADDRESS);
+               ip = (uint32_t*) cp;
+               *ip = (uint32_t) pressure;
+       }
+
+}
+
+
+/* Update _COMM_PAGE_SPIN_COUNT.  We might want to reduce when running on a battery, etc. */
+
+void
+commpage_set_spin_count(
+       unsigned int    count )
+{
+       char        *cp;
+       uint32_t    *ip;
+       
+       if (count == 0)     /* we test for 0 after decrement, not before */
+           count = 1;
+           
+       cp = commPagePtr32;
+       if ( cp ) {
+               cp += (_COMM_PAGE_SPIN_COUNT - _COMM_PAGE32_BASE_ADDRESS);
+               ip = (uint32_t*) cp;
+               *ip = (uint32_t) count;
+       }
+       
+       cp = commPagePtr64;
+       if ( cp ) {
+               cp += (_COMM_PAGE_SPIN_COUNT - _COMM_PAGE32_START_ADDRESS);
+               ip = (uint32_t*) cp;
+               *ip = (uint32_t) count;
+       }
+
+}
+
+/* Updated every time a logical CPU goes offline/online */
+void
+commpage_update_active_cpus(void)
+{
+       char        *cp;
+       volatile uint8_t    *ip;
+       
+       /* At least 32-bit commpage must be initialized */
+       if (!commPagePtr32)
+               return;
+
+       simple_lock(&commpage_active_cpus_lock);
+
+       cp = commPagePtr32;
+       cp += (_COMM_PAGE_ACTIVE_CPUS - _COMM_PAGE32_BASE_ADDRESS);
+       ip = (volatile uint8_t*) cp;
+       *ip = (uint8_t) processor_avail_count;
+       
+       cp = commPagePtr64;
+       if ( cp ) {
+               cp += (_COMM_PAGE_ACTIVE_CPUS - _COMM_PAGE32_START_ADDRESS);
+               ip = (volatile uint8_t*) cp;
+               *ip = (uint8_t) processor_avail_count;
+       }
+
+       simple_unlock(&commpage_active_cpus_lock);
+}
+
+
+/* Check to see if a given address is in the Preemption Free Zone (PFZ) */
+
+uint32_t
+commpage_is_in_pfz32(uint32_t addr32)
+{
+       if ( (addr32 >= _COMM_PAGE_PFZ_START) && (addr32 < _COMM_PAGE_PFZ_END)) {
+               return 1;
+       }
+       else
+               return 0;
+}
+
+uint32_t
+commpage_is_in_pfz64(addr64_t addr64)
+{
+       if ( (addr64 >= _COMM_PAGE_32_TO_64(_COMM_PAGE_PFZ_START))
+            && (addr64 <  _COMM_PAGE_32_TO_64(_COMM_PAGE_PFZ_END))) {
+               return 1;
+       }
+       else
+               return 0;
+}
+