]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/i386/commpage/commpage.c
xnu-3789.1.32.tar.gz
[apple/xnu.git] / osfmk / i386 / commpage / commpage.c
index 62d0af6a5b9196e698e21f559c4910b0993231ae..6dae085679e3d1ecd2364c0aea225697bf3d0c14 100644 (file)
@@ -1,48 +1,46 @@
 /*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2003-2010 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
- * This file contains Original Code and/or Modifications of Original Code 
- * as defined in and that are subject to the Apple Public Source License 
- * Version 2.0 (the 'License'). You may not use this file except in 
- * compliance with the License.  The rights granted to you under the 
- * License may not be used to create, or enable the creation or 
- * redistribution of, unlawful or unlicensed copies of an Apple operating 
- * system, or to circumvent, violate, or enable the circumvention or 
- * violation of, any terms of an Apple operating system software license 
- * agreement.
- *
- * Please obtain a copy of the License at 
- * http://www.opensource.apple.com/apsl/ and read it before using this 
- * file.
- *
- * The Original Code and all software distributed under the License are 
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
- * Please see the License for the specific language governing rights and 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
  * limitations under the License.
- *
- * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
 /*
  *     Here's what to do if you want to add a new routine to the comm page:
  *
- *             1. Add a definition for it's address in osfmk/ppc/cpu_capabilities.h,
+ *             1. Add a definition for it's address in osfmk/i386/cpu_capabilities.h,
  *                being careful to reserve room for future expansion.
  *
  *             2. Write one or more versions of the routine, each with it's own
  *                commpage_descriptor.  The tricky part is getting the "special",
  *                "musthave", and "canthave" fields right, so that exactly one
  *                version of the routine is selected for every machine.
- *                The source files should be in osfmk/ppc/commpage/.
+ *                The source files should be in osfmk/i386/commpage/.
  *
  *             3. Add a ptr to your new commpage_descriptor(s) in the "routines"
- *                array in commpage_populate().  Of course, you'll also have to
- *                declare them "extern" in commpage_populate().
+ *                array in osfmk/i386/commpage/commpage_asm.s.  There are two
+ *                arrays, one for the 32-bit and one for the 64-bit commpage.
  *
  *             4. Write the code in Libc to use the new routine.
  */
 #include <mach/mach_types.h>
 #include <mach/machine.h>
 #include <mach/vm_map.h>
+#include <mach/mach_vm.h>
+#include <mach/machine.h>
+#include <i386/cpuid.h>
+#include <i386/tsc.h>
+#include <i386/rtclock_protos.h>
+#include <i386/cpu_data.h>
 #include <i386/machine_routines.h>
+#include <i386/misc_protos.h>
+#include <i386/cpuid.h>
 #include <machine/cpu_capabilities.h>
 #include <machine/commpage.h>
 #include <machine/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
+
 #include <ipc/ipc_port.h>
 
+#include <kern/page_decrypt.h>
+#include <kern/processor.h>
+
+#include <sys/kdebug.h>
+
+#if CONFIG_ATM
+#include <atm/atm_internal.h>
+#endif
+
+/* the lists of commpage routines are in commpage_asm.s  */
+extern commpage_descriptor*    commpage_32_routines[];
+extern commpage_descriptor*    commpage_64_routines[];
+
+extern vm_map_t        commpage32_map; // the shared submap, set up in vm init
+extern vm_map_t        commpage64_map; // the shared submap, set up in vm init
+extern vm_map_t        commpage_text32_map;    // the shared submap, set up in vm init
+extern vm_map_t        commpage_text64_map;    // the shared submap, set up in vm init
+
 
-extern vm_map_t        com_region_map32;       // the shared submap, set up in vm init
+char   *commPagePtr32 = NULL;          // virtual addr in kernel map of 32-bit commpage
+char   *commPagePtr64 = NULL;          // ...and of 64-bit commpage
+char   *commPageTextPtr32 = NULL;      // virtual addr in kernel map of 32-bit commpage
+char   *commPageTextPtr64 = NULL;      // ...and of 64-bit commpage
 
-static uintptr_t next = 0;             // next available byte in comm page
-static int             cur_routine = 0;        // comm page address of "current" routine
-static int             matched;                // true if we've found a match for "current" routine
+uint64_t     _cpu_capabilities = 0;     // define the capability vector
 
-int     _cpu_capabilities = 0;          // define the capability vector
+typedef uint32_t commpage_address_t;
 
-char    *commPagePtr = NULL;            // virtual address of comm page in kernel map
+static commpage_address_t      next;   // next available address in comm page
+
+static char    *commPagePtr;           // virtual addr in kernel map of commpage we are working on
+static commpage_address_t      commPageBaseOffset; // subtract from 32-bit runtime address to get offset in virtual commpage in kernel map
+
+static commpage_time_data      *time_data32 = NULL;
+static commpage_time_data      *time_data64 = NULL;
+
+decl_simple_lock_data(static,commpage_active_cpus_lock);
 
 /* Allocate the commpage and add to the shared submap created by vm:
  *     1. allocate a page in the kernel map (RW)
@@ -77,68 +111,92 @@ char    *commPagePtr = NULL;            // virtual address of comm page in kerne
  */
 
 static  void*
-commpage_allocate( void )
+commpage_allocate( 
+       vm_map_t        submap,                 // commpage32_map or commpage_map64
+       size_t          area_used,              // _COMM_PAGE32_AREA_USED or _COMM_PAGE64_AREA_USED
+       vm_prot_t       uperm)
 {
-    vm_offset_t         kernel_addr;                // address of commpage in kernel map
-    vm_offset_t         zero = 0;
-    vm_size_t           size = _COMM_PAGE_AREA_LENGTH;
-    vm_map_entry_t     entry;
-    ipc_port_t          handle;
-
-    if (com_region_map32 == NULL)
-        panic("commpage map is null");
-
-    if (vm_allocate(kernel_map,&kernel_addr,_COMM_PAGE_AREA_LENGTH,VM_FLAGS_ANYWHERE))
-        panic("cannot allocate commpage");
-
-    if (vm_map_wire(kernel_map,kernel_addr,kernel_addr+_COMM_PAGE_AREA_LENGTH,VM_PROT_DEFAULT,FALSE))
-        panic("cannot wire commpage");
-
-    /* 
-     * Now that the object is created and wired into the kernel map, mark it so that no delay
-     * copy-on-write will ever be performed on it as a result of mapping it into user-space.
-     * If such a delayed copy ever occurred, we could remove the kernel's wired mapping - and
-     * that would be a real disaster.
-     *
-     * JMM - What we really need is a way to create it like this in the first place.
-     */
-    if (!vm_map_lookup_entry( kernel_map, vm_map_trunc_page(kernel_addr), &entry) || entry->is_sub_map)
-       panic("cannot find commpage entry");
-    entry->object.vm_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
-
-    if (mach_make_memory_entry( kernel_map,         // target map
-                                &size,              // size 
-                                kernel_addr,        // offset (address in kernel map)
-                                VM_PROT_DEFAULT,    // map it RW
-                                &handle,            // this is the object handle we get
-                                NULL ))             // parent_entry (what is this?)
-        panic("cannot make entry for commpage");
-
-    if (vm_map_64(  com_region_map32,               // target map (shared submap)
-                    &zero,                          // address (map into 1st page in submap)
-                    _COMM_PAGE_AREA_LENGTH,         // size
-                    0,                              // mask
-                    VM_FLAGS_FIXED,                 // flags (it must be 1st page in submap)
-                    handle,                         // port is the memory entry we just made
-                    0,                              // offset (map 1st page in memory entry)
-                    FALSE,                          // copy
-                    VM_PROT_READ,                   // cur_protection (R-only in user map)
-                    VM_PROT_READ,                   // max_protection
-                    VM_INHERIT_SHARE ))             // inheritance
-        panic("cannot map commpage");
-
-    ipc_port_release(handle);
-
-    return (void*) kernel_addr;                     // return address in kernel map
+       vm_offset_t     kernel_addr = 0;        // address of commpage in kernel map
+       vm_offset_t     zero = 0;
+       vm_size_t       size = area_used;       // size actually populated
+       vm_map_entry_t  entry;
+       ipc_port_t      handle;
+       kern_return_t   kr;
+
+       if (submap == NULL)
+               panic("commpage submap is null");
+
+       if ((kr = vm_map(kernel_map,
+                        &kernel_addr,
+                        area_used,
+                        0,
+                        VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_OSFMK),
+                        NULL,
+                        0,
+                        FALSE,
+                        VM_PROT_ALL,
+                        VM_PROT_ALL,
+                        VM_INHERIT_NONE)))
+               panic("cannot allocate commpage %d", kr);
+
+       if ((kr = vm_map_wire(kernel_map,
+                             kernel_addr,
+                             kernel_addr+area_used,
+                             VM_PROT_DEFAULT|VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK),
+                             FALSE)))
+               panic("cannot wire commpage: %d", kr);
+
+       /* 
+        * Now that the object is created and wired into the kernel map, mark it so that no delay
+        * copy-on-write will ever be performed on it as a result of mapping it into user-space.
+        * If such a delayed copy ever occurred, we could remove the kernel's wired mapping - and
+        * that would be a real disaster.
+        *
+        * JMM - What we really need is a way to create it like this in the first place.
+        */
+       if (!(kr = vm_map_lookup_entry( kernel_map, vm_map_trunc_page(kernel_addr, VM_MAP_PAGE_MASK(kernel_map)), &entry) || entry->is_sub_map))
+               panic("cannot find commpage entry %d", kr);
+       VME_OBJECT(entry)->copy_strategy = MEMORY_OBJECT_COPY_NONE;
+
+       if ((kr = mach_make_memory_entry( kernel_map,           // target map
+                                   &size,              // size 
+                                   kernel_addr,        // offset (address in kernel map)
+                                   uperm,      // protections as specified
+                                   &handle,            // this is the object handle we get
+                                   NULL )))            // parent_entry (what is this?)
+               panic("cannot make entry for commpage %d", kr);
+
+       if ((kr = vm_map_64(    submap,                         // target map (shared submap)
+                       &zero,                          // address (map into 1st page in submap)
+                       area_used,                      // size
+                       0,                              // mask
+                       VM_FLAGS_FIXED,                 // flags (it must be 1st page in submap)
+                       handle,                         // port is the memory entry we just made
+                       0,                              // offset (map 1st page in memory entry)
+                       FALSE,                          // copy
+                       uperm,   // cur_protection (R-only in user map)
+                       uperm,   // max_protection
+                       VM_INHERIT_SHARE )))             // inheritance
+               panic("cannot map commpage %d", kr);
+
+       ipc_port_release(handle);
+       /* Make the kernel mapping non-executable. This cannot be done
+        * at the time of map entry creation as mach_make_memory_entry
+        * cannot handle disjoint permissions at this time.
+        */
+       kr = vm_protect(kernel_map, kernel_addr, area_used, FALSE, VM_PROT_READ | VM_PROT_WRITE);
+       assert (kr == KERN_SUCCESS);
+
+       return (void*)(intptr_t)kernel_addr;                     // return address in kernel map
 }
 
 /* Get address (in kernel map) of a commpage field. */
 
 static void*
 commpage_addr_of(
-    int     addr_at_runtime )
+    commpage_address_t     addr_at_runtime )
 {
-    return  (void*) ((uintptr_t)commPagePtr + addr_at_runtime - _COMM_PAGE_BASE_ADDRESS);
+       return  (void*) ((uintptr_t)commPagePtr + (addr_at_runtime - commPageBaseOffset));
 }
 
 /* Determine number of CPUs on this system.  We cannot rely on
@@ -164,7 +222,7 @@ commpage_cpus( void )
 static void
 commpage_init_cpu_capabilities( void )
 {
-       int bits;
+       uint64_t bits;
        int cpus;
        ml_cpu_info_t cpu_info;
 
@@ -172,6 +230,18 @@ commpage_init_cpu_capabilities( void )
        ml_cpu_get_info(&cpu_info);
        
        switch (cpu_info.vector_unit) {
+               case 9:
+                       bits |= kHasAVX1_0;
+                       /* fall thru */
+               case 8:
+                       bits |= kHasSSE4_2;
+                       /* fall thru */
+               case 7:
+                       bits |= kHasSSE4_1;
+                       /* fall thru */
+               case 6:
+                       bits |= kHasSupplementalSSE3;
+                       /* fall thru */
                case 5:
                        bits |= kHasSSE3;
                        /* fall thru */
@@ -201,211 +271,624 @@ commpage_init_cpu_capabilities( void )
        }
        cpus = commpage_cpus();                 // how many CPUs do we have
 
-       if (cpus == 1)
-               bits |= kUP;
-
        bits |= (cpus << kNumCPUsShift);
 
        bits |= kFastThreadLocalStorage;        // we use %gs for TLS
 
+#define setif(_bits, _bit, _condition) \
+       if (_condition) _bits |= _bit
+
+       setif(bits, kUP,         cpus == 1);
+       setif(bits, k64Bit,      cpu_mode_is64bit());
+       setif(bits, kSlow,       tscFreq <= SLOW_TSC_THRESHOLD);
+
+       setif(bits, kHasAES,     cpuid_features() &
+                                       CPUID_FEATURE_AES);
+       setif(bits, kHasF16C,    cpuid_features() &
+                                       CPUID_FEATURE_F16C);
+       setif(bits, kHasRDRAND,  cpuid_features() &
+                                       CPUID_FEATURE_RDRAND);
+       setif(bits, kHasFMA,     cpuid_features() &
+                                       CPUID_FEATURE_FMA);
+
+       setif(bits, kHasBMI1,    cpuid_leaf7_features() &
+                                       CPUID_LEAF7_FEATURE_BMI1);
+       setif(bits, kHasBMI2,    cpuid_leaf7_features() &
+                                       CPUID_LEAF7_FEATURE_BMI2);
+       setif(bits, kHasRTM,     cpuid_leaf7_features() &
+                                       CPUID_LEAF7_FEATURE_RTM);
+       setif(bits, kHasHLE,     cpuid_leaf7_features() &
+                                       CPUID_LEAF7_FEATURE_HLE);
+       setif(bits, kHasAVX2_0,  cpuid_leaf7_features() &
+                                       CPUID_LEAF7_FEATURE_AVX2);
+       setif(bits, kHasRDSEED,  cpuid_features() &
+                                       CPUID_LEAF7_FEATURE_RDSEED);
+       setif(bits, kHasADX,     cpuid_features() &
+                                       CPUID_LEAF7_FEATURE_ADX);
+       
+       setif(bits, kHasMPX,     cpuid_leaf7_features() &
+                                       CPUID_LEAF7_FEATURE_MPX);
+       setif(bits, kHasSGX,     cpuid_leaf7_features() &
+                                       CPUID_LEAF7_FEATURE_SGX);
+       uint64_t misc_enable = rdmsr64(MSR_IA32_MISC_ENABLE);
+       setif(bits, kHasENFSTRG, (misc_enable & 1ULL) &&
+                                (cpuid_leaf7_features() &
+                                       CPUID_LEAF7_FEATURE_ERMS));
+       
        _cpu_capabilities = bits;               // set kernel version for use by drivers etc
 }
 
+/* initialize the approx_time_supported flag and set the approx time to 0.
+ * Called during initial commpage population.
+ */
+static void
+commpage_mach_approximate_time_init(void)
+{
+       char *cp = commPagePtr32;
+       uint8_t supported;
+
+#ifdef CONFIG_MACH_APPROXIMATE_TIME
+       supported = 1;
+#else
+       supported = 0;
+#endif
+       if ( cp ) {
+               cp += (_COMM_PAGE_APPROX_TIME_SUPPORTED - _COMM_PAGE32_BASE_ADDRESS);
+               *(boolean_t *)cp = supported;
+       }
+       
+       cp = commPagePtr64;
+       if ( cp ) {
+               cp += (_COMM_PAGE_APPROX_TIME_SUPPORTED - _COMM_PAGE32_START_ADDRESS);
+               *(boolean_t *)cp = supported;
+       }
+       commpage_update_mach_approximate_time(0);
+}
+
+static void
+commpage_mach_continuous_time_init(void)
+{
+       commpage_update_mach_continuous_time(0);
+}
+
+static void
+commpage_boottime_init(void)
+{
+       clock_sec_t secs;
+       clock_usec_t microsecs;
+       clock_get_boottime_microtime(&secs, &microsecs);
+       commpage_update_boottime(secs * USEC_PER_SEC + microsecs);
+}
+
+uint64_t
+_get_cpu_capabilities(void)
+{
+       return _cpu_capabilities;
+}
+
 /* Copy data into commpage. */
 
 static void
 commpage_stuff(
-    int        address,
-    void       *source,
+    commpage_address_t         address,
+    const void         *source,
     int        length  )
 {    
     void       *dest = commpage_addr_of(address);
     
-    if ((uintptr_t)dest < next)
-        panic("commpage overlap at address 0x%x, 0x%x < 0x%x", address, dest, next);
+    if (address < next)
+       panic("commpage overlap at address 0x%p, 0x%x < 0x%x", dest, address, next);
     
     bcopy(source,dest,length);
     
-    next = ((uintptr_t)dest + length);
-}
-
-
-static void
-commpage_stuff2(
-       int address,
-       void *source,
-       int length )
-{
-       commpage_stuff(address, source, length);
+    next = address + length;
 }
 
 /* Copy a routine into comm page if it matches running machine.
  */
 static void
 commpage_stuff_routine(
-    commpage_descriptor        *rd     )
+    commpage_descriptor *rd     )
 {
-    int                must,cant;
-    
-    if (rd->commpage_address != cur_routine) {
-        if ((cur_routine!=0) && (matched==0))
-            panic("commpage no match");
-        cur_routine = rd->commpage_address;
-        matched = 0;
-    }
-    
-    must = _cpu_capabilities & rd->musthave;
-    cant = _cpu_capabilities & rd->canthave;
-    
-    if ((must == rd->musthave) && (cant == 0)) {
-        if (matched)
-            panic("commpage duplicate matches");
-        matched = 1;
-        
-        commpage_stuff(rd->commpage_address,rd->code_address,rd->code_length);
-       }
+       commpage_stuff(rd->commpage_address,rd->code_address,rd->code_length);
 }
 
-#define COMMPAGE_DESC(name)    commpage_ ## name
-#define EXTERN_COMMPAGE_DESC(name)                             \
-       extern commpage_descriptor COMMPAGE_DESC(name)
-
-EXTERN_COMMPAGE_DESC(compare_and_swap32_mp);
-EXTERN_COMMPAGE_DESC(compare_and_swap32_up);
-EXTERN_COMMPAGE_DESC(compare_and_swap64_mp);
-EXTERN_COMMPAGE_DESC(compare_and_swap64_up);
-EXTERN_COMMPAGE_DESC(atomic_add32_mp);
-EXTERN_COMMPAGE_DESC(atomic_add32_up);
-EXTERN_COMMPAGE_DESC(mach_absolute_time);
-EXTERN_COMMPAGE_DESC(spin_lock_try_mp);
-EXTERN_COMMPAGE_DESC(spin_lock_try_up);
-EXTERN_COMMPAGE_DESC(spin_lock_mp);
-EXTERN_COMMPAGE_DESC(spin_lock_up);
-EXTERN_COMMPAGE_DESC(spin_unlock);
-EXTERN_COMMPAGE_DESC(pthread_getspecific);
-EXTERN_COMMPAGE_DESC(gettimeofday);
-EXTERN_COMMPAGE_DESC(sys_flush_dcache);
-EXTERN_COMMPAGE_DESC(sys_icache_invalidate);
-EXTERN_COMMPAGE_DESC(pthread_self);
-EXTERN_COMMPAGE_DESC(relinquish);
-EXTERN_COMMPAGE_DESC(bit_test_and_set_mp);
-EXTERN_COMMPAGE_DESC(bit_test_and_set_up);
-EXTERN_COMMPAGE_DESC(bit_test_and_clear_mp);
-EXTERN_COMMPAGE_DESC(bit_test_and_clear_up);
-EXTERN_COMMPAGE_DESC(bzero_scalar);
-EXTERN_COMMPAGE_DESC(bcopy_scalar);
-EXTERN_COMMPAGE_DESC(nanotime);
-
-static  commpage_descriptor *routines[] = {
-       &COMMPAGE_DESC(compare_and_swap32_mp),
-       &COMMPAGE_DESC(compare_and_swap32_up),
-       &COMMPAGE_DESC(compare_and_swap64_mp),
-       &COMMPAGE_DESC(compare_and_swap64_up),
-       &COMMPAGE_DESC(atomic_add32_mp),
-       &COMMPAGE_DESC(atomic_add32_up),
-       &COMMPAGE_DESC(mach_absolute_time),
-       &COMMPAGE_DESC(spin_lock_try_mp),
-       &COMMPAGE_DESC(spin_lock_try_up),
-       &COMMPAGE_DESC(spin_lock_mp),
-       &COMMPAGE_DESC(spin_lock_up),
-       &COMMPAGE_DESC(spin_unlock),
-       &COMMPAGE_DESC(pthread_getspecific),
-       &COMMPAGE_DESC(gettimeofday),
-       &COMMPAGE_DESC(sys_flush_dcache),
-       &COMMPAGE_DESC(sys_icache_invalidate),
-       &COMMPAGE_DESC(pthread_self),
-       &COMMPAGE_DESC(relinquish),
-       &COMMPAGE_DESC(bit_test_and_set_mp),
-       &COMMPAGE_DESC(bit_test_and_set_up),
-       &COMMPAGE_DESC(bit_test_and_clear_mp),
-       &COMMPAGE_DESC(bit_test_and_clear_up),
-       &COMMPAGE_DESC(bzero_scalar),
-       &COMMPAGE_DESC(bcopy_scalar),
-       &COMMPAGE_DESC(nanotime),
-       NULL
-};
-
-
-/* Fill in commpage: called once, during kernel initialization, from the
- * startup thread before user-mode code is running.
- * See the top of this file for a list of what you have to do to add
- * a new routine to the commpage.
+/* Fill in the 32- or 64-bit commpage.  Called once for each.
  */
 
-void
-commpage_populate( void )
+static void
+commpage_populate_one( 
+       vm_map_t        submap,         // commpage32_map or compage64_map
+       char **         kernAddressPtr, // &commPagePtr32 or &commPagePtr64
+       size_t          area_used,      // _COMM_PAGE32_AREA_USED or _COMM_PAGE64_AREA_USED
+       commpage_address_t base_offset, // will become commPageBaseOffset
+       commpage_time_data** time_data, // &time_data32 or &time_data64
+       const char*     signature,      // "commpage 32-bit" or "commpage 64-bit"
+       vm_prot_t       uperm)
 {
-       short   c2;
-       static double   two52 = 1048576.0 * 1048576.0 * 4096.0; // 2**52
-       static double   ten6 = 1000000.0;                       // 10**6
-       commpage_descriptor **rd;
+       uint8_t         c1;
+       uint16_t        c2;
+       int             c4;
+       uint64_t        c8;
+       uint32_t        cfamily;
        short   version = _COMM_PAGE_THIS_VERSION;
 
-       commPagePtr = (char *)commpage_allocate();
+       next = 0;
+       commPagePtr = (char *)commpage_allocate( submap, (vm_size_t) area_used, uperm );
+       *kernAddressPtr = commPagePtr;                          // save address either in commPagePtr32 or 64
+       commPageBaseOffset = base_offset;
 
-       commpage_init_cpu_capabilities();
+       *time_data = commpage_addr_of( _COMM_PAGE_TIME_DATA_START );
 
        /* Stuff in the constants.  We move things into the comm page in strictly
        * ascending order, so we can check for overlap and panic if so.
+       * Note: the 32-bit cpu_capabilities vector is retained in addition to
+       * the expanded 64-bit vector.
        */
+       commpage_stuff(_COMM_PAGE_SIGNATURE,signature,(int)MIN(_COMM_PAGE_SIGNATURELEN, strlen(signature)));
+       commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES64,&_cpu_capabilities,sizeof(_cpu_capabilities));
+       commpage_stuff(_COMM_PAGE_VERSION,&version,sizeof(short));
+       commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES,&_cpu_capabilities,sizeof(uint32_t));
 
-       commpage_stuff2(_COMM_PAGE_VERSION,&version,sizeof(short));
-       commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES,&_cpu_capabilities,
-               sizeof(int));
-
-       if (_cpu_capabilities & kCache32)
-               c2 = 32;
-       else if (_cpu_capabilities & kCache64)
+       c2 = 32;  // default
+       if (_cpu_capabilities & kCache64)
                c2 = 64;
        else if (_cpu_capabilities & kCache128)
                c2 = 128;
        commpage_stuff(_COMM_PAGE_CACHE_LINESIZE,&c2,2);
 
-       c2 = 32;
+       c4 = MP_SPIN_TRIES;
+       commpage_stuff(_COMM_PAGE_SPIN_COUNT,&c4,4);
+
+       /* machine_info valid after ml_get_max_cpus() */
+       c1 = machine_info.physical_cpu_max;
+       commpage_stuff(_COMM_PAGE_PHYSICAL_CPUS,&c1,1);
+       c1 = machine_info.logical_cpu_max;
+       commpage_stuff(_COMM_PAGE_LOGICAL_CPUS,&c1,1);
+
+       c8 = ml_cpu_cache_size(0);
+       commpage_stuff(_COMM_PAGE_MEMORY_SIZE, &c8, 8);
+
+       cfamily = cpuid_info()->cpuid_cpufamily;
+       commpage_stuff(_COMM_PAGE_CPUFAMILY, &cfamily, 4);
+
+       if (next > _COMM_PAGE_END)
+               panic("commpage overflow: next = 0x%08x, commPagePtr = 0x%p", next, commPagePtr);
+
+}
+
+
+/* Fill in commpages: called once, during kernel initialization, from the
+ * startup thread before user-mode code is running.
+ *
+ * See the top of this file for a list of what you have to do to add
+ * a new routine to the commpage.
+ */  
 
-       commpage_stuff2(_COMM_PAGE_2_TO_52,&two52,8);
+void
+commpage_populate( void )
+{
+       commpage_init_cpu_capabilities();
+       
+       commpage_populate_one(  commpage32_map, 
+                               &commPagePtr32,
+                               _COMM_PAGE32_AREA_USED,
+                               _COMM_PAGE32_BASE_ADDRESS,
+                               &time_data32,
+                               "commpage 32-bit",
+                               VM_PROT_READ);
+#ifndef __LP64__
+       pmap_commpage32_init((vm_offset_t) commPagePtr32, _COMM_PAGE32_BASE_ADDRESS, 
+                          _COMM_PAGE32_AREA_USED/INTEL_PGBYTES);
+#endif                    
+       time_data64 = time_data32;                      /* if no 64-bit commpage, point to 32-bit */
+
+       if (_cpu_capabilities & k64Bit) {
+               commpage_populate_one(  commpage64_map, 
+                                       &commPagePtr64,
+                                       _COMM_PAGE64_AREA_USED,
+                                       _COMM_PAGE32_START_ADDRESS, /* commpage address are relative to 32-bit commpage placement */
+                                       &time_data64,
+                                       "commpage 64-bit",
+                                       VM_PROT_READ);
+#ifndef __LP64__
+               pmap_commpage64_init((vm_offset_t) commPagePtr64, _COMM_PAGE64_BASE_ADDRESS, 
+                                  _COMM_PAGE64_AREA_USED/INTEL_PGBYTES);
+#endif
+       }
 
-       commpage_stuff2(_COMM_PAGE_10_TO_6,&ten6,8);
+       simple_lock_init(&commpage_active_cpus_lock, 0);
+
+       commpage_update_active_cpus();
+       commpage_mach_approximate_time_init();
+       commpage_mach_continuous_time_init();
+       commpage_boottime_init();
+       rtc_nanotime_init_commpage();
+       commpage_update_kdebug_state();
+#if CONFIG_ATM
+       commpage_update_atm_diagnostic_config(atm_get_diagnostic_config());
+#endif
+}
 
-       for( rd = routines; *rd != NULL ; rd++ )
+/* Fill in the common routines during kernel initialization. 
+ * This is called before user-mode code is running.
+ */
+void commpage_text_populate( void ){
+       commpage_descriptor **rd;
+       
+       next = 0;
+       commPagePtr = (char *) commpage_allocate(commpage_text32_map, (vm_size_t) _COMM_PAGE_TEXT_AREA_USED, VM_PROT_READ | VM_PROT_EXECUTE);
+       commPageTextPtr32 = commPagePtr;
+       
+       char *cptr = commPagePtr;
+       int i=0;
+       for(; i< _COMM_PAGE_TEXT_AREA_USED; i++){
+               cptr[i]=0xCC;
+       }
+       
+       commPageBaseOffset = _COMM_PAGE_TEXT_START;
+       for (rd = commpage_32_routines; *rd != NULL; rd++) {
                commpage_stuff_routine(*rd);
+       }
+
+#ifndef __LP64__
+       pmap_commpage32_init((vm_offset_t) commPageTextPtr32, _COMM_PAGE_TEXT_START, 
+                          _COMM_PAGE_TEXT_AREA_USED/INTEL_PGBYTES);
+#endif 
+
+       if (_cpu_capabilities & k64Bit) {
+               next = 0;
+               commPagePtr = (char *) commpage_allocate(commpage_text64_map, (vm_size_t) _COMM_PAGE_TEXT_AREA_USED, VM_PROT_READ | VM_PROT_EXECUTE);
+               commPageTextPtr64 = commPagePtr;
+
+               cptr=commPagePtr;
+               for(i=0; i<_COMM_PAGE_TEXT_AREA_USED; i++){
+                       cptr[i]=0xCC;
+               }
+
+               for (rd = commpage_64_routines; *rd !=NULL; rd++) {
+                       commpage_stuff_routine(*rd);
+               }
+
+#ifndef __LP64__
+       pmap_commpage64_init((vm_offset_t) commPageTextPtr64, _COMM_PAGE_TEXT_START, 
+                          _COMM_PAGE_TEXT_AREA_USED/INTEL_PGBYTES);
+#endif 
+       }
+
+       if (next > _COMM_PAGE_TEXT_END) 
+               panic("commpage text overflow: next=0x%08x, commPagePtr=%p", next, commPagePtr); 
+
+}
+
+/* Update commpage nanotime information.
+ *
+ * This routine must be serialized by some external means, ie a lock.
+ */
+
+void
+commpage_set_nanotime(
+       uint64_t        tsc_base,
+       uint64_t        ns_base,
+       uint32_t        scale,
+       uint32_t        shift )
+{
+       commpage_time_data      *p32 = time_data32;
+       commpage_time_data      *p64 = time_data64;
+       static uint32_t generation = 0;
+       uint32_t        next_gen;
+       
+       if (p32 == NULL)                /* have commpages been allocated yet? */
+               return;
+               
+       if ( generation != p32->nt_generation )
+               panic("nanotime trouble 1");    /* possibly not serialized */
+       if ( ns_base < p32->nt_ns_base )
+               panic("nanotime trouble 2");
+       if ((shift != 0) && ((_cpu_capabilities & kSlow)==0) )
+               panic("nanotime trouble 3");
+               
+       next_gen = ++generation;
+       if (next_gen == 0)
+               next_gen = ++generation;
+       
+       p32->nt_generation = 0;         /* mark invalid, so commpage won't try to use it */
+       p64->nt_generation = 0;
+       
+       p32->nt_tsc_base = tsc_base;
+       p64->nt_tsc_base = tsc_base;
+       
+       p32->nt_ns_base = ns_base;
+       p64->nt_ns_base = ns_base;
+       
+       p32->nt_scale = scale;
+       p64->nt_scale = scale;
+       
+       p32->nt_shift = shift;
+       p64->nt_shift = shift;
+       
+       p32->nt_generation = next_gen;  /* mark data as valid */
+       p64->nt_generation = next_gen;
+}
+
+
+/* Disable commpage gettimeofday(), forcing commpage to call through to the kernel.  */
+
+void
+commpage_disable_timestamp( void )
+{
+       time_data32->gtod_generation = 0;
+       time_data64->gtod_generation = 0;
+}
+
+
+/* Update commpage gettimeofday() information.  As with nanotime(), we interleave
+ * updates to the 32- and 64-bit commpage, in order to keep time more nearly in sync 
+ * between the two environments.
+ *
+ * This routine must be serializeed by some external means, ie a lock.
+ */
+ void
+ commpage_set_timestamp(
+       uint64_t        abstime,
+       uint64_t        secs )
+{
+       commpage_time_data      *p32 = time_data32;
+       commpage_time_data      *p64 = time_data64;
+       static uint32_t generation = 0;
+       uint32_t        next_gen;
+       
+       next_gen = ++generation;
+       if (next_gen == 0)
+               next_gen = ++generation;
+       
+       p32->gtod_generation = 0;               /* mark invalid, so commpage won't try to use it */
+       p64->gtod_generation = 0;
+       
+       p32->gtod_ns_base = abstime;
+       p64->gtod_ns_base = abstime;
+       
+       p32->gtod_sec_base = secs;
+       p64->gtod_sec_base = secs;
+       
+       p32->gtod_generation = next_gen;        /* mark data as valid */
+       p64->gtod_generation = next_gen;
+}
+
+
+/* Update _COMM_PAGE_MEMORY_PRESSURE.  Called periodically from vm's compute_memory_pressure()  */
+
+void
+commpage_set_memory_pressure(
+       unsigned int    pressure )
+{
+       char        *cp;
+       uint32_t    *ip;
+       
+       cp = commPagePtr32;
+       if ( cp ) {
+               cp += (_COMM_PAGE_MEMORY_PRESSURE - _COMM_PAGE32_BASE_ADDRESS);
+               ip = (uint32_t*) (void *) cp;
+               *ip = (uint32_t) pressure;
+       }
+       
+       cp = commPagePtr64;
+       if ( cp ) {
+               cp += (_COMM_PAGE_MEMORY_PRESSURE - _COMM_PAGE32_START_ADDRESS);
+               ip = (uint32_t*) (void *) cp;
+               *ip = (uint32_t) pressure;
+       }
+
+}
+
+
+/* Update _COMM_PAGE_SPIN_COUNT.  We might want to reduce when running on a battery, etc. */
+
+void
+commpage_set_spin_count(
+       unsigned int    count )
+{
+       char        *cp;
+       uint32_t    *ip;
+       
+       if (count == 0)     /* we test for 0 after decrement, not before */
+           count = 1;
+           
+       cp = commPagePtr32;
+       if ( cp ) {
+               cp += (_COMM_PAGE_SPIN_COUNT - _COMM_PAGE32_BASE_ADDRESS);
+               ip = (uint32_t*) (void *) cp;
+               *ip = (uint32_t) count;
+       }
+       
+       cp = commPagePtr64;
+       if ( cp ) {
+               cp += (_COMM_PAGE_SPIN_COUNT - _COMM_PAGE32_START_ADDRESS);
+               ip = (uint32_t*) (void *) cp;
+               *ip = (uint32_t) count;
+       }
+
+}
 
-       if (!matched)
-               panic("commpage no match on last routine");
+/* Updated every time a logical CPU goes offline/online */
+void
+commpage_update_active_cpus(void)
+{
+       char        *cp;
+       volatile uint8_t    *ip;
+       
+       /* At least 32-bit commpage must be initialized */
+       if (!commPagePtr32)
+               return;
 
-       if (next > (uintptr_t)_COMM_PAGE_END)
-               panic("commpage overflow: next = 0x%08x, commPagePtr = 0x%08x", next, (uintptr_t)commPagePtr);
+       simple_lock(&commpage_active_cpus_lock);
 
+       cp = commPagePtr32;
+       cp += (_COMM_PAGE_ACTIVE_CPUS - _COMM_PAGE32_BASE_ADDRESS);
+       ip = (volatile uint8_t*) cp;
+       *ip = (uint8_t) processor_avail_count;
+       
+       cp = commPagePtr64;
+       if ( cp ) {
+               cp += (_COMM_PAGE_ACTIVE_CPUS - _COMM_PAGE32_START_ADDRESS);
+               ip = (volatile uint8_t*) cp;
+               *ip = (uint8_t) processor_avail_count;
+       }
 
-       pmap_commpage_init((vm_offset_t) commPagePtr, _COMM_PAGE_BASE_ADDRESS, 
-                          _COMM_PAGE_AREA_LENGTH/INTEL_PGBYTES);
+       simple_unlock(&commpage_active_cpus_lock);
 }
 
 /*
- * This macro prevents compiler instruction scheduling:
+ * Update the commpage with current kdebug state. This currently has bits for
+ * global trace state, and typefilter enablement. It is likely additional state
+ * will be tracked in the future.
+ *
+ * INVARIANT: This value will always be 0 if global tracing is disabled. This
+ * allows simple guard tests of "if (*_COMM_PAGE_KDEBUG_ENABLE) { ... }"
  */
-#define NO_REORDERING  asm volatile("" : : : "memory")
+void
+commpage_update_kdebug_state(void)
+{
+       volatile uint32_t *saved_data_ptr;
+       char *cp;
+
+       cp = commPagePtr32;
+       if (cp) {
+               cp += (_COMM_PAGE_KDEBUG_ENABLE - _COMM_PAGE32_BASE_ADDRESS);
+               saved_data_ptr = (volatile uint32_t *)cp;
+               *saved_data_ptr = kdebug_commpage_state();
+       }
+
+       cp = commPagePtr64;
+       if (cp) {
+               cp += (_COMM_PAGE_KDEBUG_ENABLE - _COMM_PAGE32_START_ADDRESS);
+               saved_data_ptr = (volatile uint32_t *)cp;
+               *saved_data_ptr = kdebug_commpage_state();
+       }
+}
 
+/* Ditto for atm_diagnostic_config */
 void
-commpage_set_nanotime(commpage_nanotime_t *newp)
+commpage_update_atm_diagnostic_config(uint32_t diagnostic_config)
 {
-       commpage_nanotime_t     *cnp;
+       volatile uint32_t *saved_data_ptr;
+       char *cp;
+
+       cp = commPagePtr32;
+       if (cp) {
+               cp += (_COMM_PAGE_ATM_DIAGNOSTIC_CONFIG - _COMM_PAGE32_BASE_ADDRESS);
+               saved_data_ptr = (volatile uint32_t *)cp;
+               *saved_data_ptr = diagnostic_config;
+       }
 
-       /* Nop if commpage not set up yet */
-       if (commPagePtr == NULL)
-               return;
+       cp = commPagePtr64;
+       if ( cp ) {
+               cp += (_COMM_PAGE_ATM_DIAGNOSTIC_CONFIG - _COMM_PAGE32_START_ADDRESS);
+               saved_data_ptr = (volatile uint32_t *)cp;
+               *saved_data_ptr = diagnostic_config;
+       }
+}
 
-       cnp = (commpage_nanotime_t *)commpage_addr_of(_COMM_PAGE_NANOTIME_INFO);
+/*
+ * update the commpage data for last known value of mach_absolute_time()
+ */
 
-       /*
-        * Update in reverse order:
-        * check_tsc first - it's read and compared with base_tsc last.
-        */
-       cnp->nt_check_tsc = newp->nt_base_tsc;  NO_REORDERING;
-       cnp->nt_shift     = newp->nt_shift;     NO_REORDERING;
-       cnp->nt_scale     = newp->nt_scale;     NO_REORDERING;
-       cnp->nt_base_ns   = newp->nt_base_ns;   NO_REORDERING;
-       cnp->nt_base_tsc  = newp->nt_base_tsc;
+void
+commpage_update_mach_approximate_time(uint64_t abstime)
+{
+#ifdef CONFIG_MACH_APPROXIMATE_TIME
+       uint64_t saved_data;
+       char *cp;
+       
+       cp = commPagePtr32;
+       if ( cp ) {
+               cp += (_COMM_PAGE_APPROX_TIME - _COMM_PAGE32_BASE_ADDRESS);
+               saved_data = *(uint64_t *)cp;
+               if (saved_data < abstime) {
+                       /* ignoring the success/fail return value assuming that
+                        * if the value has been updated since we last read it,
+                        * "someone" has a newer timestamp than us and ours is
+                        * now invalid. */
+                       OSCompareAndSwap64(saved_data, abstime, (uint64_t *)cp);
+               }
+       }
+       cp = commPagePtr64;
+       if ( cp ) {
+               cp += (_COMM_PAGE_APPROX_TIME - _COMM_PAGE32_START_ADDRESS);
+               saved_data = *(uint64_t *)cp;
+               if (saved_data < abstime) {
+                       /* ignoring the success/fail return value assuming that
+                        * if the value has been updated since we last read it,
+                        * "someone" has a newer timestamp than us and ours is
+                        * now invalid. */
+                       OSCompareAndSwap64(saved_data, abstime, (uint64_t *)cp);
+               }
+       }
+#else
+#pragma unused (abstime)
+#endif
+}
+
+void
+commpage_update_mach_continuous_time(uint64_t sleeptime)
+{
+       char *cp;
+       cp = commPagePtr32;
+       if (cp) {
+               cp += (_COMM_PAGE_CONT_TIMEBASE - _COMM_PAGE32_START_ADDRESS);
+               *(uint64_t *)cp = sleeptime;
+       }
+       
+       cp = commPagePtr64;
+       if (cp) {
+               cp += (_COMM_PAGE_CONT_TIMEBASE - _COMM_PAGE32_START_ADDRESS);
+               *(uint64_t *)cp = sleeptime;
+       }
+}
+
+void
+commpage_update_boottime(uint64_t boottime)
+{
+       char *cp;
+       cp = commPagePtr32;
+       if (cp) {
+               cp += (_COMM_PAGE_BOOTTIME_USEC - _COMM_PAGE32_START_ADDRESS);
+               *(uint64_t *)cp = boottime;
+       }
+
+       cp = commPagePtr64;
+       if (cp) {
+               cp += (_COMM_PAGE_BOOTTIME_USEC - _COMM_PAGE32_START_ADDRESS);
+               *(uint64_t *)cp = boottime;
+       }
+}
+
+
+extern user32_addr_t commpage_text32_location;
+extern user64_addr_t commpage_text64_location;
+
+/* Check to see if a given address is in the Preemption Free Zone (PFZ) */
+
+uint32_t
+commpage_is_in_pfz32(uint32_t addr32)
+{
+       if ( (addr32 >= (commpage_text32_location + _COMM_TEXT_PFZ_START_OFFSET)) 
+               && (addr32 < (commpage_text32_location+_COMM_TEXT_PFZ_END_OFFSET))) {
+               return 1;
+       }
+       else
+               return 0;
+}
+
+uint32_t
+commpage_is_in_pfz64(addr64_t addr64)
+{
+       if ( (addr64 >= (commpage_text64_location + _COMM_TEXT_PFZ_START_OFFSET))
+            && (addr64 <  (commpage_text64_location + _COMM_TEXT_PFZ_END_OFFSET))) {
+               return 1;
+       }
+       else
+               return 0;
 }