2 * Copyright (c) 2003-2010 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * Here's what to do if you want to add a new routine to the comm page:
32 * 1. Add a definition for it's address in osfmk/i386/cpu_capabilities.h,
33 * being careful to reserve room for future expansion.
35 * 2. Write one or more versions of the routine, each with it's own
36 * commpage_descriptor. The tricky part is getting the "special",
37 * "musthave", and "canthave" fields right, so that exactly one
38 * version of the routine is selected for every machine.
39 * The source files should be in osfmk/i386/commpage/.
41 * 3. Add a ptr to your new commpage_descriptor(s) in the "routines"
42 * array in osfmk/i386/commpage/commpage_asm.s. There are two
43 * arrays, one for the 32-bit and one for the 64-bit commpage.
45 * 4. Write the code in Libc to use the new routine.
48 #include <mach/mach_types.h>
49 #include <mach/machine.h>
50 #include <mach/vm_map.h>
51 #include <mach/mach_vm.h>
52 #include <mach/machine.h>
53 #include <i386/cpuid.h>
55 #include <i386/rtclock_protos.h>
56 #include <i386/cpu_data.h>
57 #include <i386/machine_routines.h>
58 #include <i386/misc_protos.h>
59 #include <i386/cpuid.h>
60 #include <machine/cpu_capabilities.h>
61 #include <machine/commpage.h>
62 #include <machine/pmap.h>
63 #include <vm/vm_kern.h>
64 #include <vm/vm_map.h>
66 #include <ipc/ipc_port.h>
68 #include <kern/page_decrypt.h>
69 #include <kern/processor.h>
71 /* the lists of commpage routines are in commpage_asm.s */
72 extern commpage_descriptor
* commpage_32_routines
[];
73 extern commpage_descriptor
* commpage_64_routines
[];
75 extern vm_map_t commpage32_map
; // the shared submap, set up in vm init
76 extern vm_map_t commpage64_map
; // the shared submap, set up in vm init
77 extern vm_map_t commpage_text32_map
; // the shared submap, set up in vm init
78 extern vm_map_t commpage_text64_map
; // the shared submap, set up in vm init
81 char *commPagePtr32
= NULL
; // virtual addr in kernel map of 32-bit commpage
82 char *commPagePtr64
= NULL
; // ...and of 64-bit commpage
83 char *commPageTextPtr32
= NULL
; // virtual addr in kernel map of 32-bit commpage
84 char *commPageTextPtr64
= NULL
; // ...and of 64-bit commpage
86 uint64_t _cpu_capabilities
= 0; // define the capability vector
88 typedef uint32_t commpage_address_t
;
90 static commpage_address_t next
; // next available address in comm page
92 static char *commPagePtr
; // virtual addr in kernel map of commpage we are working on
93 static commpage_address_t commPageBaseOffset
; // subtract from 32-bit runtime address to get offset in virtual commpage in kernel map
95 static commpage_time_data
*time_data32
= NULL
;
96 static commpage_time_data
*time_data64
= NULL
;
98 decl_simple_lock_data(static,commpage_active_cpus_lock
);
100 /* Allocate the commpage and add to the shared submap created by vm:
101 * 1. allocate a page in the kernel map (RW)
103 * 3. make a memory entry out of it
104 * 4. map that entry into the shared comm region map (R-only)
109 vm_map_t submap
, // commpage32_map or commpage_map64
110 size_t area_used
, // _COMM_PAGE32_AREA_USED or _COMM_PAGE64_AREA_USED
113 vm_offset_t kernel_addr
= 0; // address of commpage in kernel map
114 vm_offset_t zero
= 0;
115 vm_size_t size
= area_used
; // size actually populated
116 vm_map_entry_t entry
;
121 panic("commpage submap is null");
123 if ((kr
= vm_map(kernel_map
,&kernel_addr
,area_used
,0,VM_FLAGS_ANYWHERE
,NULL
,0,FALSE
,VM_PROT_ALL
,VM_PROT_ALL
,VM_INHERIT_NONE
)))
124 panic("cannot allocate commpage %d", kr
);
126 if ((kr
= vm_map_wire(kernel_map
,kernel_addr
,kernel_addr
+area_used
,VM_PROT_DEFAULT
,FALSE
)))
127 panic("cannot wire commpage: %d", kr
);
130 * Now that the object is created and wired into the kernel map, mark it so that no delay
131 * copy-on-write will ever be performed on it as a result of mapping it into user-space.
132 * If such a delayed copy ever occurred, we could remove the kernel's wired mapping - and
133 * that would be a real disaster.
135 * JMM - What we really need is a way to create it like this in the first place.
137 if (!(kr
= vm_map_lookup_entry( kernel_map
, vm_map_trunc_page(kernel_addr
, VM_MAP_PAGE_MASK(kernel_map
)), &entry
) || entry
->is_sub_map
))
138 panic("cannot find commpage entry %d", kr
);
139 entry
->object
.vm_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
141 if ((kr
= mach_make_memory_entry( kernel_map
, // target map
143 kernel_addr
, // offset (address in kernel map)
144 uperm
, // protections as specified
145 &handle
, // this is the object handle we get
146 NULL
))) // parent_entry (what is this?)
147 panic("cannot make entry for commpage %d", kr
);
149 if ((kr
= vm_map_64( submap
, // target map (shared submap)
150 &zero
, // address (map into 1st page in submap)
153 VM_FLAGS_FIXED
, // flags (it must be 1st page in submap)
154 handle
, // port is the memory entry we just made
155 0, // offset (map 1st page in memory entry)
157 uperm
, // cur_protection (R-only in user map)
158 uperm
, // max_protection
159 VM_INHERIT_SHARE
))) // inheritance
160 panic("cannot map commpage %d", kr
);
162 ipc_port_release(handle
);
163 /* Make the kernel mapping non-executable. This cannot be done
164 * at the time of map entry creation as mach_make_memory_entry
165 * cannot handle disjoint permissions at this time.
167 kr
= vm_protect(kernel_map
, kernel_addr
, area_used
, FALSE
, VM_PROT_READ
| VM_PROT_WRITE
);
168 assert (kr
== KERN_SUCCESS
);
170 return (void*)(intptr_t)kernel_addr
; // return address in kernel map
173 /* Get address (in kernel map) of a commpage field. */
177 commpage_address_t addr_at_runtime
)
179 return (void*) ((uintptr_t)commPagePtr
+ (addr_at_runtime
- commPageBaseOffset
));
182 /* Determine number of CPUs on this system. We cannot rely on
183 * machine_info.max_cpus this early in the boot.
186 commpage_cpus( void )
190 cpus
= ml_get_max_cpus(); // NB: this call can block
193 panic("commpage cpus==0");
200 /* Initialize kernel version of _cpu_capabilities vector (used by KEXTs.) */
203 commpage_init_cpu_capabilities( void )
207 ml_cpu_info_t cpu_info
;
210 ml_cpu_get_info(&cpu_info
);
212 switch (cpu_info
.vector_unit
) {
223 bits
|= kHasSupplementalSSE3
;
239 switch (cpu_info
.cache_line_size
) {
252 cpus
= commpage_cpus(); // how many CPUs do we have
254 bits
|= (cpus
<< kNumCPUsShift
);
256 bits
|= kFastThreadLocalStorage
; // we use %gs for TLS
258 #define setif(_bits, _bit, _condition) \
259 if (_condition) _bits |= _bit
261 setif(bits
, kUP
, cpus
== 1);
262 setif(bits
, k64Bit
, cpu_mode_is64bit());
263 setif(bits
, kSlow
, tscFreq
<= SLOW_TSC_THRESHOLD
);
265 setif(bits
, kHasAES
, cpuid_features() &
267 setif(bits
, kHasF16C
, cpuid_features() &
269 setif(bits
, kHasRDRAND
, cpuid_features() &
270 CPUID_FEATURE_RDRAND
);
271 setif(bits
, kHasFMA
, cpuid_features() &
274 setif(bits
, kHasBMI1
, cpuid_leaf7_features() &
275 CPUID_LEAF7_FEATURE_BMI1
);
276 setif(bits
, kHasBMI2
, cpuid_leaf7_features() &
277 CPUID_LEAF7_FEATURE_BMI2
);
278 setif(bits
, kHasRTM
, cpuid_leaf7_features() &
279 CPUID_LEAF7_FEATURE_RTM
);
280 setif(bits
, kHasHLE
, cpuid_leaf7_features() &
281 CPUID_LEAF7_FEATURE_HLE
);
282 setif(bits
, kHasAVX2_0
, cpuid_leaf7_features() &
283 CPUID_LEAF7_FEATURE_AVX2
);
285 uint64_t misc_enable
= rdmsr64(MSR_IA32_MISC_ENABLE
);
286 setif(bits
, kHasENFSTRG
, (misc_enable
& 1ULL) &&
287 (cpuid_leaf7_features() &
288 CPUID_LEAF7_FEATURE_ERMS
));
290 _cpu_capabilities
= bits
; // set kernel version for use by drivers etc
293 /* initialize the approx_time_supported flag and set the approx time to 0.
294 * Called during initial commpage population.
297 commpage_mach_approximate_time_init(void)
299 char *cp
= commPagePtr32
;
302 #ifdef CONFIG_MACH_APPROXIMATE_TIME
308 cp
+= (_COMM_PAGE_APPROX_TIME_SUPPORTED
- _COMM_PAGE32_BASE_ADDRESS
);
309 *(boolean_t
*)cp
= supported
;
313 cp
+= (_COMM_PAGE_APPROX_TIME_SUPPORTED
- _COMM_PAGE32_START_ADDRESS
);
314 *(boolean_t
*)cp
= supported
;
316 commpage_update_mach_approximate_time(0);
321 _get_cpu_capabilities(void)
323 return _cpu_capabilities
;
326 /* Copy data into commpage. */
330 commpage_address_t address
,
334 void *dest
= commpage_addr_of(address
);
337 panic("commpage overlap at address 0x%p, 0x%x < 0x%x", dest
, address
, next
);
339 bcopy(source
,dest
,length
);
341 next
= address
+ length
;
344 /* Copy a routine into comm page if it matches running machine.
347 commpage_stuff_routine(
348 commpage_descriptor
*rd
)
350 commpage_stuff(rd
->commpage_address
,rd
->code_address
,rd
->code_length
);
353 /* Fill in the 32- or 64-bit commpage. Called once for each.
357 commpage_populate_one(
358 vm_map_t submap
, // commpage32_map or compage64_map
359 char ** kernAddressPtr
, // &commPagePtr32 or &commPagePtr64
360 size_t area_used
, // _COMM_PAGE32_AREA_USED or _COMM_PAGE64_AREA_USED
361 commpage_address_t base_offset
, // will become commPageBaseOffset
362 commpage_time_data
** time_data
, // &time_data32 or &time_data64
363 const char* signature
, // "commpage 32-bit" or "commpage 64-bit"
371 short version
= _COMM_PAGE_THIS_VERSION
;
374 commPagePtr
= (char *)commpage_allocate( submap
, (vm_size_t
) area_used
, uperm
);
375 *kernAddressPtr
= commPagePtr
; // save address either in commPagePtr32 or 64
376 commPageBaseOffset
= base_offset
;
378 *time_data
= commpage_addr_of( _COMM_PAGE_TIME_DATA_START
);
380 /* Stuff in the constants. We move things into the comm page in strictly
381 * ascending order, so we can check for overlap and panic if so.
382 * Note: the 32-bit cpu_capabilities vector is retained in addition to
383 * the expanded 64-bit vector.
385 commpage_stuff(_COMM_PAGE_SIGNATURE
,signature
,(int)MIN(_COMM_PAGE_SIGNATURELEN
, strlen(signature
)));
386 commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES64
,&_cpu_capabilities
,sizeof(_cpu_capabilities
));
387 commpage_stuff(_COMM_PAGE_VERSION
,&version
,sizeof(short));
388 commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES
,&_cpu_capabilities
,sizeof(uint32_t));
391 if (_cpu_capabilities
& kCache64
)
393 else if (_cpu_capabilities
& kCache128
)
395 commpage_stuff(_COMM_PAGE_CACHE_LINESIZE
,&c2
,2);
398 commpage_stuff(_COMM_PAGE_SPIN_COUNT
,&c4
,4);
400 /* machine_info valid after ml_get_max_cpus() */
401 c1
= machine_info
.physical_cpu_max
;
402 commpage_stuff(_COMM_PAGE_PHYSICAL_CPUS
,&c1
,1);
403 c1
= machine_info
.logical_cpu_max
;
404 commpage_stuff(_COMM_PAGE_LOGICAL_CPUS
,&c1
,1);
406 c8
= ml_cpu_cache_size(0);
407 commpage_stuff(_COMM_PAGE_MEMORY_SIZE
, &c8
, 8);
409 cfamily
= cpuid_info()->cpuid_cpufamily
;
410 commpage_stuff(_COMM_PAGE_CPUFAMILY
, &cfamily
, 4);
412 if (next
> _COMM_PAGE_END
)
413 panic("commpage overflow: next = 0x%08x, commPagePtr = 0x%p", next
, commPagePtr
);
418 /* Fill in commpages: called once, during kernel initialization, from the
419 * startup thread before user-mode code is running.
421 * See the top of this file for a list of what you have to do to add
422 * a new routine to the commpage.
426 commpage_populate( void )
428 commpage_init_cpu_capabilities();
430 commpage_populate_one( commpage32_map
,
432 _COMM_PAGE32_AREA_USED
,
433 _COMM_PAGE32_BASE_ADDRESS
,
438 pmap_commpage32_init((vm_offset_t
) commPagePtr32
, _COMM_PAGE32_BASE_ADDRESS
,
439 _COMM_PAGE32_AREA_USED
/INTEL_PGBYTES
);
441 time_data64
= time_data32
; /* if no 64-bit commpage, point to 32-bit */
443 if (_cpu_capabilities
& k64Bit
) {
444 commpage_populate_one( commpage64_map
,
446 _COMM_PAGE64_AREA_USED
,
447 _COMM_PAGE32_START_ADDRESS
, /* commpage address are relative to 32-bit commpage placement */
452 pmap_commpage64_init((vm_offset_t
) commPagePtr64
, _COMM_PAGE64_BASE_ADDRESS
,
453 _COMM_PAGE64_AREA_USED
/INTEL_PGBYTES
);
457 simple_lock_init(&commpage_active_cpus_lock
, 0);
459 commpage_update_active_cpus();
460 commpage_mach_approximate_time_init();
461 rtc_nanotime_init_commpage();
464 /* Fill in the common routines during kernel initialization.
465 * This is called before user-mode code is running.
467 void commpage_text_populate( void ){
468 commpage_descriptor
**rd
;
471 commPagePtr
= (char *) commpage_allocate(commpage_text32_map
, (vm_size_t
) _COMM_PAGE_TEXT_AREA_USED
, VM_PROT_READ
| VM_PROT_EXECUTE
);
472 commPageTextPtr32
= commPagePtr
;
474 char *cptr
= commPagePtr
;
476 for(; i
< _COMM_PAGE_TEXT_AREA_USED
; i
++){
480 commPageBaseOffset
= _COMM_PAGE_TEXT_START
;
481 for (rd
= commpage_32_routines
; *rd
!= NULL
; rd
++) {
482 commpage_stuff_routine(*rd
);
486 pmap_commpage32_init((vm_offset_t
) commPageTextPtr32
, _COMM_PAGE_TEXT_START
,
487 _COMM_PAGE_TEXT_AREA_USED
/INTEL_PGBYTES
);
490 if (_cpu_capabilities
& k64Bit
) {
492 commPagePtr
= (char *) commpage_allocate(commpage_text64_map
, (vm_size_t
) _COMM_PAGE_TEXT_AREA_USED
, VM_PROT_READ
| VM_PROT_EXECUTE
);
493 commPageTextPtr64
= commPagePtr
;
496 for(i
=0; i
<_COMM_PAGE_TEXT_AREA_USED
; i
++){
500 for (rd
= commpage_64_routines
; *rd
!=NULL
; rd
++) {
501 commpage_stuff_routine(*rd
);
505 pmap_commpage64_init((vm_offset_t
) commPageTextPtr64
, _COMM_PAGE_TEXT_START
,
506 _COMM_PAGE_TEXT_AREA_USED
/INTEL_PGBYTES
);
510 if (next
> _COMM_PAGE_TEXT_END
)
511 panic("commpage text overflow: next=0x%08x, commPagePtr=%p", next
, commPagePtr
);
515 /* Update commpage nanotime information.
517 * This routine must be serialized by some external means, ie a lock.
521 commpage_set_nanotime(
527 commpage_time_data
*p32
= time_data32
;
528 commpage_time_data
*p64
= time_data64
;
529 static uint32_t generation
= 0;
532 if (p32
== NULL
) /* have commpages been allocated yet? */
535 if ( generation
!= p32
->nt_generation
)
536 panic("nanotime trouble 1"); /* possibly not serialized */
537 if ( ns_base
< p32
->nt_ns_base
)
538 panic("nanotime trouble 2");
539 if ((shift
!= 0) && ((_cpu_capabilities
& kSlow
)==0) )
540 panic("nanotime trouble 3");
542 next_gen
= ++generation
;
544 next_gen
= ++generation
;
546 p32
->nt_generation
= 0; /* mark invalid, so commpage won't try to use it */
547 p64
->nt_generation
= 0;
549 p32
->nt_tsc_base
= tsc_base
;
550 p64
->nt_tsc_base
= tsc_base
;
552 p32
->nt_ns_base
= ns_base
;
553 p64
->nt_ns_base
= ns_base
;
555 p32
->nt_scale
= scale
;
556 p64
->nt_scale
= scale
;
558 p32
->nt_shift
= shift
;
559 p64
->nt_shift
= shift
;
561 p32
->nt_generation
= next_gen
; /* mark data as valid */
562 p64
->nt_generation
= next_gen
;
566 /* Disable commpage gettimeofday(), forcing commpage to call through to the kernel. */
569 commpage_disable_timestamp( void )
571 time_data32
->gtod_generation
= 0;
572 time_data64
->gtod_generation
= 0;
576 /* Update commpage gettimeofday() information. As with nanotime(), we interleave
577 * updates to the 32- and 64-bit commpage, in order to keep time more nearly in sync
578 * between the two environments.
580 * This routine must be serializeed by some external means, ie a lock.
584 commpage_set_timestamp(
588 commpage_time_data
*p32
= time_data32
;
589 commpage_time_data
*p64
= time_data64
;
590 static uint32_t generation
= 0;
593 next_gen
= ++generation
;
595 next_gen
= ++generation
;
597 p32
->gtod_generation
= 0; /* mark invalid, so commpage won't try to use it */
598 p64
->gtod_generation
= 0;
600 p32
->gtod_ns_base
= abstime
;
601 p64
->gtod_ns_base
= abstime
;
603 p32
->gtod_sec_base
= secs
;
604 p64
->gtod_sec_base
= secs
;
606 p32
->gtod_generation
= next_gen
; /* mark data as valid */
607 p64
->gtod_generation
= next_gen
;
611 /* Update _COMM_PAGE_MEMORY_PRESSURE. Called periodically from vm's compute_memory_pressure() */
614 commpage_set_memory_pressure(
615 unsigned int pressure
)
622 cp
+= (_COMM_PAGE_MEMORY_PRESSURE
- _COMM_PAGE32_BASE_ADDRESS
);
623 ip
= (uint32_t*) (void *) cp
;
624 *ip
= (uint32_t) pressure
;
629 cp
+= (_COMM_PAGE_MEMORY_PRESSURE
- _COMM_PAGE32_START_ADDRESS
);
630 ip
= (uint32_t*) (void *) cp
;
631 *ip
= (uint32_t) pressure
;
637 /* Update _COMM_PAGE_SPIN_COUNT. We might want to reduce when running on a battery, etc. */
640 commpage_set_spin_count(
646 if (count
== 0) /* we test for 0 after decrement, not before */
651 cp
+= (_COMM_PAGE_SPIN_COUNT
- _COMM_PAGE32_BASE_ADDRESS
);
652 ip
= (uint32_t*) (void *) cp
;
653 *ip
= (uint32_t) count
;
658 cp
+= (_COMM_PAGE_SPIN_COUNT
- _COMM_PAGE32_START_ADDRESS
);
659 ip
= (uint32_t*) (void *) cp
;
660 *ip
= (uint32_t) count
;
665 /* Updated every time a logical CPU goes offline/online */
667 commpage_update_active_cpus(void)
670 volatile uint8_t *ip
;
672 /* At least 32-bit commpage must be initialized */
676 simple_lock(&commpage_active_cpus_lock
);
679 cp
+= (_COMM_PAGE_ACTIVE_CPUS
- _COMM_PAGE32_BASE_ADDRESS
);
680 ip
= (volatile uint8_t*) cp
;
681 *ip
= (uint8_t) processor_avail_count
;
685 cp
+= (_COMM_PAGE_ACTIVE_CPUS
- _COMM_PAGE32_START_ADDRESS
);
686 ip
= (volatile uint8_t*) cp
;
687 *ip
= (uint8_t) processor_avail_count
;
690 simple_unlock(&commpage_active_cpus_lock
);
694 * update the commpage data for last known value of mach_absolute_time()
698 commpage_update_mach_approximate_time(uint64_t abstime
)
700 #ifdef CONFIG_MACH_APPROXIMATE_TIME
706 cp
+= (_COMM_PAGE_APPROX_TIME
- _COMM_PAGE32_BASE_ADDRESS
);
707 saved_data
= *(uint64_t *)cp
;
708 if (saved_data
< abstime
) {
709 /* ignoring the success/fail return value assuming that
710 * if the value has been updated since we last read it,
711 * "someone" has a newer timestamp than us and ours is
713 OSCompareAndSwap64(saved_data
, abstime
, (uint64_t *)cp
);
718 cp
+= (_COMM_PAGE_APPROX_TIME
- _COMM_PAGE32_START_ADDRESS
);
719 saved_data
= *(uint64_t *)cp
;
720 if (saved_data
< abstime
) {
721 /* ignoring the success/fail return value assuming that
722 * if the value has been updated since we last read it,
723 * "someone" has a newer timestamp than us and ours is
725 OSCompareAndSwap64(saved_data
, abstime
, (uint64_t *)cp
);
729 #pragma unused (abstime)
734 extern user32_addr_t commpage_text32_location
;
735 extern user64_addr_t commpage_text64_location
;
737 /* Check to see if a given address is in the Preemption Free Zone (PFZ) */
740 commpage_is_in_pfz32(uint32_t addr32
)
742 if ( (addr32
>= (commpage_text32_location
+ _COMM_TEXT_PFZ_START_OFFSET
))
743 && (addr32
< (commpage_text32_location
+_COMM_TEXT_PFZ_END_OFFSET
))) {
751 commpage_is_in_pfz64(addr64_t addr64
)
753 if ( (addr64
>= (commpage_text64_location
+ _COMM_TEXT_PFZ_START_OFFSET
))
754 && (addr64
< (commpage_text64_location
+ _COMM_TEXT_PFZ_END_OFFSET
))) {