2 * Copyright (c) 2003-2010 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * Here's what to do if you want to add a new routine to the comm page:
32 * 1. Add a definition for it's address in osfmk/i386/cpu_capabilities.h,
33 * being careful to reserve room for future expansion.
35 * 2. Write one or more versions of the routine, each with it's own
36 * commpage_descriptor. The tricky part is getting the "special",
37 * "musthave", and "canthave" fields right, so that exactly one
38 * version of the routine is selected for every machine.
39 * The source files should be in osfmk/i386/commpage/.
41 * 3. Add a ptr to your new commpage_descriptor(s) in the "routines"
42 * array in osfmk/i386/commpage/commpage_asm.s. There are two
43 * arrays, one for the 32-bit and one for the 64-bit commpage.
45 * 4. Write the code in Libc to use the new routine.
48 #include <mach/mach_types.h>
49 #include <mach/machine.h>
50 #include <mach/vm_map.h>
51 #include <mach/mach_vm.h>
52 #include <mach/machine.h>
53 #include <i386/cpuid.h>
55 #include <i386/rtclock_protos.h>
56 #include <i386/cpu_data.h>
57 #include <i386/machine_routines.h>
58 #include <i386/misc_protos.h>
59 #include <i386/cpuid.h>
60 #include <machine/cpu_capabilities.h>
61 #include <machine/commpage.h>
62 #include <machine/pmap.h>
63 #include <vm/vm_kern.h>
64 #include <vm/vm_map.h>
66 #include <ipc/ipc_port.h>
68 #include <kern/page_decrypt.h>
69 #include <kern/processor.h>
71 /* the lists of commpage routines are in commpage_asm.s */
72 extern commpage_descriptor
* commpage_32_routines
[];
73 extern commpage_descriptor
* commpage_64_routines
[];
75 extern vm_map_t commpage32_map
; // the shared submap, set up in vm init
76 extern vm_map_t commpage64_map
; // the shared submap, set up in vm init
77 extern vm_map_t commpage_text32_map
; // the shared submap, set up in vm init
78 extern vm_map_t commpage_text64_map
; // the shared submap, set up in vm init
81 char *commPagePtr32
= NULL
; // virtual addr in kernel map of 32-bit commpage
82 char *commPagePtr64
= NULL
; // ...and of 64-bit commpage
83 char *commPageTextPtr32
= NULL
; // virtual addr in kernel map of 32-bit commpage
84 char *commPageTextPtr64
= NULL
; // ...and of 64-bit commpage
86 uint64_t _cpu_capabilities
= 0; // define the capability vector
88 typedef uint32_t commpage_address_t
;
90 static commpage_address_t next
; // next available address in comm page
92 static char *commPagePtr
; // virtual addr in kernel map of commpage we are working on
93 static commpage_address_t commPageBaseOffset
; // subtract from 32-bit runtime address to get offset in virtual commpage in kernel map
95 static commpage_time_data
*time_data32
= NULL
;
96 static commpage_time_data
*time_data64
= NULL
;
98 decl_simple_lock_data(static,commpage_active_cpus_lock
);
100 /* Allocate the commpage and add to the shared submap created by vm:
101 * 1. allocate a page in the kernel map (RW)
103 * 3. make a memory entry out of it
104 * 4. map that entry into the shared comm region map (R-only)
109 vm_map_t submap
, // commpage32_map or commpage_map64
110 size_t area_used
, // _COMM_PAGE32_AREA_USED or _COMM_PAGE64_AREA_USED
113 vm_offset_t kernel_addr
= 0; // address of commpage in kernel map
114 vm_offset_t zero
= 0;
115 vm_size_t size
= area_used
; // size actually populated
116 vm_map_entry_t entry
;
121 panic("commpage submap is null");
123 if ((kr
= vm_map(kernel_map
,&kernel_addr
,area_used
,0,VM_FLAGS_ANYWHERE
,NULL
,0,FALSE
,VM_PROT_ALL
,VM_PROT_ALL
,VM_INHERIT_NONE
)))
124 panic("cannot allocate commpage %d", kr
);
126 if ((kr
= vm_map_wire(kernel_map
,kernel_addr
,kernel_addr
+area_used
,VM_PROT_DEFAULT
,FALSE
)))
127 panic("cannot wire commpage: %d", kr
);
130 * Now that the object is created and wired into the kernel map, mark it so that no delay
131 * copy-on-write will ever be performed on it as a result of mapping it into user-space.
132 * If such a delayed copy ever occurred, we could remove the kernel's wired mapping - and
133 * that would be a real disaster.
135 * JMM - What we really need is a way to create it like this in the first place.
137 if (!(kr
= vm_map_lookup_entry( kernel_map
, vm_map_trunc_page(kernel_addr
, VM_MAP_PAGE_MASK(kernel_map
)), &entry
) || entry
->is_sub_map
))
138 panic("cannot find commpage entry %d", kr
);
139 entry
->object
.vm_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
141 if ((kr
= mach_make_memory_entry( kernel_map
, // target map
143 kernel_addr
, // offset (address in kernel map)
144 uperm
, // protections as specified
145 &handle
, // this is the object handle we get
146 NULL
))) // parent_entry (what is this?)
147 panic("cannot make entry for commpage %d", kr
);
149 if ((kr
= vm_map_64( submap
, // target map (shared submap)
150 &zero
, // address (map into 1st page in submap)
153 VM_FLAGS_FIXED
, // flags (it must be 1st page in submap)
154 handle
, // port is the memory entry we just made
155 0, // offset (map 1st page in memory entry)
157 uperm
, // cur_protection (R-only in user map)
158 uperm
, // max_protection
159 VM_INHERIT_SHARE
))) // inheritance
160 panic("cannot map commpage %d", kr
);
162 ipc_port_release(handle
);
163 /* Make the kernel mapping non-executable. This cannot be done
164 * at the time of map entry creation as mach_make_memory_entry
165 * cannot handle disjoint permissions at this time.
167 kr
= vm_protect(kernel_map
, kernel_addr
, area_used
, FALSE
, VM_PROT_READ
| VM_PROT_WRITE
);
168 assert (kr
== KERN_SUCCESS
);
170 return (void*)(intptr_t)kernel_addr
; // return address in kernel map
173 /* Get address (in kernel map) of a commpage field. */
177 commpage_address_t addr_at_runtime
)
179 return (void*) ((uintptr_t)commPagePtr
+ (addr_at_runtime
- commPageBaseOffset
));
182 /* Determine number of CPUs on this system. We cannot rely on
183 * machine_info.max_cpus this early in the boot.
186 commpage_cpus( void )
190 cpus
= ml_get_max_cpus(); // NB: this call can block
193 panic("commpage cpus==0");
200 /* Initialize kernel version of _cpu_capabilities vector (used by KEXTs.) */
203 commpage_init_cpu_capabilities( void )
207 ml_cpu_info_t cpu_info
;
210 ml_cpu_get_info(&cpu_info
);
212 switch (cpu_info
.vector_unit
) {
223 bits
|= kHasSupplementalSSE3
;
239 switch (cpu_info
.cache_line_size
) {
252 cpus
= commpage_cpus(); // how many CPUs do we have
254 bits
|= (cpus
<< kNumCPUsShift
);
256 bits
|= kFastThreadLocalStorage
; // we use %gs for TLS
258 #define setif(_bits, _bit, _condition) \
259 if (_condition) _bits |= _bit
261 setif(bits
, kUP
, cpus
== 1);
262 setif(bits
, k64Bit
, cpu_mode_is64bit());
263 setif(bits
, kSlow
, tscFreq
<= SLOW_TSC_THRESHOLD
);
265 setif(bits
, kHasAES
, cpuid_features() &
267 setif(bits
, kHasF16C
, cpuid_features() &
269 setif(bits
, kHasRDRAND
, cpuid_features() &
270 CPUID_FEATURE_RDRAND
);
271 setif(bits
, kHasFMA
, cpuid_features() &
274 setif(bits
, kHasBMI1
, cpuid_leaf7_features() &
275 CPUID_LEAF7_FEATURE_BMI1
);
276 setif(bits
, kHasBMI2
, cpuid_leaf7_features() &
277 CPUID_LEAF7_FEATURE_BMI2
);
278 setif(bits
, kHasRTM
, cpuid_leaf7_features() &
279 CPUID_LEAF7_FEATURE_RTM
);
280 setif(bits
, kHasHLE
, cpuid_leaf7_features() &
281 CPUID_LEAF7_FEATURE_HLE
);
282 setif(bits
, kHasAVX2_0
, cpuid_leaf7_features() &
283 CPUID_LEAF7_FEATURE_AVX2
);
285 uint64_t misc_enable
= rdmsr64(MSR_IA32_MISC_ENABLE
);
286 setif(bits
, kHasENFSTRG
, (misc_enable
& 1ULL) &&
287 (cpuid_leaf7_features() &
288 CPUID_LEAF7_FEATURE_ENFSTRG
));
290 _cpu_capabilities
= bits
; // set kernel version for use by drivers etc
294 _get_cpu_capabilities(void)
296 return _cpu_capabilities
;
299 /* Copy data into commpage. */
303 commpage_address_t address
,
307 void *dest
= commpage_addr_of(address
);
310 panic("commpage overlap at address 0x%p, 0x%x < 0x%x", dest
, address
, next
);
312 bcopy(source
,dest
,length
);
314 next
= address
+ length
;
317 /* Copy a routine into comm page if it matches running machine.
320 commpage_stuff_routine(
321 commpage_descriptor
*rd
)
323 commpage_stuff(rd
->commpage_address
,rd
->code_address
,rd
->code_length
);
326 /* Fill in the 32- or 64-bit commpage. Called once for each.
330 commpage_populate_one(
331 vm_map_t submap
, // commpage32_map or compage64_map
332 char ** kernAddressPtr
, // &commPagePtr32 or &commPagePtr64
333 size_t area_used
, // _COMM_PAGE32_AREA_USED or _COMM_PAGE64_AREA_USED
334 commpage_address_t base_offset
, // will become commPageBaseOffset
335 commpage_time_data
** time_data
, // &time_data32 or &time_data64
336 const char* signature
, // "commpage 32-bit" or "commpage 64-bit"
344 short version
= _COMM_PAGE_THIS_VERSION
;
347 commPagePtr
= (char *)commpage_allocate( submap
, (vm_size_t
) area_used
, uperm
);
348 *kernAddressPtr
= commPagePtr
; // save address either in commPagePtr32 or 64
349 commPageBaseOffset
= base_offset
;
351 *time_data
= commpage_addr_of( _COMM_PAGE_TIME_DATA_START
);
353 /* Stuff in the constants. We move things into the comm page in strictly
354 * ascending order, so we can check for overlap and panic if so.
355 * Note: the 32-bit cpu_capabilities vector is retained in addition to
356 * the expanded 64-bit vector.
358 commpage_stuff(_COMM_PAGE_SIGNATURE
,signature
,(int)MIN(_COMM_PAGE_SIGNATURELEN
, strlen(signature
)));
359 commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES64
,&_cpu_capabilities
,sizeof(_cpu_capabilities
));
360 commpage_stuff(_COMM_PAGE_VERSION
,&version
,sizeof(short));
361 commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES
,&_cpu_capabilities
,sizeof(uint32_t));
364 if (_cpu_capabilities
& kCache64
)
366 else if (_cpu_capabilities
& kCache128
)
368 commpage_stuff(_COMM_PAGE_CACHE_LINESIZE
,&c2
,2);
371 commpage_stuff(_COMM_PAGE_SPIN_COUNT
,&c4
,4);
373 /* machine_info valid after ml_get_max_cpus() */
374 c1
= machine_info
.physical_cpu_max
;
375 commpage_stuff(_COMM_PAGE_PHYSICAL_CPUS
,&c1
,1);
376 c1
= machine_info
.logical_cpu_max
;
377 commpage_stuff(_COMM_PAGE_LOGICAL_CPUS
,&c1
,1);
379 c8
= ml_cpu_cache_size(0);
380 commpage_stuff(_COMM_PAGE_MEMORY_SIZE
, &c8
, 8);
382 cfamily
= cpuid_info()->cpuid_cpufamily
;
383 commpage_stuff(_COMM_PAGE_CPUFAMILY
, &cfamily
, 4);
385 if (next
> _COMM_PAGE_END
)
386 panic("commpage overflow: next = 0x%08x, commPagePtr = 0x%p", next
, commPagePtr
);
391 /* Fill in commpages: called once, during kernel initialization, from the
392 * startup thread before user-mode code is running.
394 * See the top of this file for a list of what you have to do to add
395 * a new routine to the commpage.
399 commpage_populate( void )
401 commpage_init_cpu_capabilities();
403 commpage_populate_one( commpage32_map
,
405 _COMM_PAGE32_AREA_USED
,
406 _COMM_PAGE32_BASE_ADDRESS
,
411 pmap_commpage32_init((vm_offset_t
) commPagePtr32
, _COMM_PAGE32_BASE_ADDRESS
,
412 _COMM_PAGE32_AREA_USED
/INTEL_PGBYTES
);
414 time_data64
= time_data32
; /* if no 64-bit commpage, point to 32-bit */
416 if (_cpu_capabilities
& k64Bit
) {
417 commpage_populate_one( commpage64_map
,
419 _COMM_PAGE64_AREA_USED
,
420 _COMM_PAGE32_START_ADDRESS
, /* commpage address are relative to 32-bit commpage placement */
425 pmap_commpage64_init((vm_offset_t
) commPagePtr64
, _COMM_PAGE64_BASE_ADDRESS
,
426 _COMM_PAGE64_AREA_USED
/INTEL_PGBYTES
);
430 simple_lock_init(&commpage_active_cpus_lock
, 0);
432 commpage_update_active_cpus();
433 rtc_nanotime_init_commpage();
436 /* Fill in the common routines during kernel initialization.
437 * This is called before user-mode code is running.
439 void commpage_text_populate( void ){
440 commpage_descriptor
**rd
;
443 commPagePtr
= (char *) commpage_allocate(commpage_text32_map
, (vm_size_t
) _COMM_PAGE_TEXT_AREA_USED
, VM_PROT_READ
| VM_PROT_EXECUTE
);
444 commPageTextPtr32
= commPagePtr
;
446 char *cptr
= commPagePtr
;
448 for(; i
< _COMM_PAGE_TEXT_AREA_USED
; i
++){
452 commPageBaseOffset
= _COMM_PAGE_TEXT_START
;
453 for (rd
= commpage_32_routines
; *rd
!= NULL
; rd
++) {
454 commpage_stuff_routine(*rd
);
458 pmap_commpage32_init((vm_offset_t
) commPageTextPtr32
, _COMM_PAGE_TEXT_START
,
459 _COMM_PAGE_TEXT_AREA_USED
/INTEL_PGBYTES
);
462 if (_cpu_capabilities
& k64Bit
) {
464 commPagePtr
= (char *) commpage_allocate(commpage_text64_map
, (vm_size_t
) _COMM_PAGE_TEXT_AREA_USED
, VM_PROT_READ
| VM_PROT_EXECUTE
);
465 commPageTextPtr64
= commPagePtr
;
468 for(i
=0; i
<_COMM_PAGE_TEXT_AREA_USED
; i
++){
472 for (rd
= commpage_64_routines
; *rd
!=NULL
; rd
++) {
473 commpage_stuff_routine(*rd
);
477 pmap_commpage64_init((vm_offset_t
) commPageTextPtr64
, _COMM_PAGE_TEXT_START
,
478 _COMM_PAGE_TEXT_AREA_USED
/INTEL_PGBYTES
);
482 if (next
> _COMM_PAGE_TEXT_END
)
483 panic("commpage text overflow: next=0x%08x, commPagePtr=%p", next
, commPagePtr
);
487 /* Update commpage nanotime information.
489 * This routine must be serialized by some external means, ie a lock.
493 commpage_set_nanotime(
499 commpage_time_data
*p32
= time_data32
;
500 commpage_time_data
*p64
= time_data64
;
501 static uint32_t generation
= 0;
504 if (p32
== NULL
) /* have commpages been allocated yet? */
507 if ( generation
!= p32
->nt_generation
)
508 panic("nanotime trouble 1"); /* possibly not serialized */
509 if ( ns_base
< p32
->nt_ns_base
)
510 panic("nanotime trouble 2");
511 if ((shift
!= 0) && ((_cpu_capabilities
& kSlow
)==0) )
512 panic("nanotime trouble 3");
514 next_gen
= ++generation
;
516 next_gen
= ++generation
;
518 p32
->nt_generation
= 0; /* mark invalid, so commpage won't try to use it */
519 p64
->nt_generation
= 0;
521 p32
->nt_tsc_base
= tsc_base
;
522 p64
->nt_tsc_base
= tsc_base
;
524 p32
->nt_ns_base
= ns_base
;
525 p64
->nt_ns_base
= ns_base
;
527 p32
->nt_scale
= scale
;
528 p64
->nt_scale
= scale
;
530 p32
->nt_shift
= shift
;
531 p64
->nt_shift
= shift
;
533 p32
->nt_generation
= next_gen
; /* mark data as valid */
534 p64
->nt_generation
= next_gen
;
538 /* Disable commpage gettimeofday(), forcing commpage to call through to the kernel. */
541 commpage_disable_timestamp( void )
543 time_data32
->gtod_generation
= 0;
544 time_data64
->gtod_generation
= 0;
548 /* Update commpage gettimeofday() information. As with nanotime(), we interleave
549 * updates to the 32- and 64-bit commpage, in order to keep time more nearly in sync
550 * between the two environments.
552 * This routine must be serializeed by some external means, ie a lock.
556 commpage_set_timestamp(
560 commpage_time_data
*p32
= time_data32
;
561 commpage_time_data
*p64
= time_data64
;
562 static uint32_t generation
= 0;
565 next_gen
= ++generation
;
567 next_gen
= ++generation
;
569 p32
->gtod_generation
= 0; /* mark invalid, so commpage won't try to use it */
570 p64
->gtod_generation
= 0;
572 p32
->gtod_ns_base
= abstime
;
573 p64
->gtod_ns_base
= abstime
;
575 p32
->gtod_sec_base
= secs
;
576 p64
->gtod_sec_base
= secs
;
578 p32
->gtod_generation
= next_gen
; /* mark data as valid */
579 p64
->gtod_generation
= next_gen
;
583 /* Update _COMM_PAGE_MEMORY_PRESSURE. Called periodically from vm's compute_memory_pressure() */
586 commpage_set_memory_pressure(
587 unsigned int pressure
)
594 cp
+= (_COMM_PAGE_MEMORY_PRESSURE
- _COMM_PAGE32_BASE_ADDRESS
);
595 ip
= (uint32_t*) (void *) cp
;
596 *ip
= (uint32_t) pressure
;
601 cp
+= (_COMM_PAGE_MEMORY_PRESSURE
- _COMM_PAGE32_START_ADDRESS
);
602 ip
= (uint32_t*) (void *) cp
;
603 *ip
= (uint32_t) pressure
;
609 /* Update _COMM_PAGE_SPIN_COUNT. We might want to reduce when running on a battery, etc. */
612 commpage_set_spin_count(
618 if (count
== 0) /* we test for 0 after decrement, not before */
623 cp
+= (_COMM_PAGE_SPIN_COUNT
- _COMM_PAGE32_BASE_ADDRESS
);
624 ip
= (uint32_t*) (void *) cp
;
625 *ip
= (uint32_t) count
;
630 cp
+= (_COMM_PAGE_SPIN_COUNT
- _COMM_PAGE32_START_ADDRESS
);
631 ip
= (uint32_t*) (void *) cp
;
632 *ip
= (uint32_t) count
;
637 /* Updated every time a logical CPU goes offline/online */
639 commpage_update_active_cpus(void)
642 volatile uint8_t *ip
;
644 /* At least 32-bit commpage must be initialized */
648 simple_lock(&commpage_active_cpus_lock
);
651 cp
+= (_COMM_PAGE_ACTIVE_CPUS
- _COMM_PAGE32_BASE_ADDRESS
);
652 ip
= (volatile uint8_t*) cp
;
653 *ip
= (uint8_t) processor_avail_count
;
657 cp
+= (_COMM_PAGE_ACTIVE_CPUS
- _COMM_PAGE32_START_ADDRESS
);
658 ip
= (volatile uint8_t*) cp
;
659 *ip
= (uint8_t) processor_avail_count
;
662 simple_unlock(&commpage_active_cpus_lock
);
665 extern user32_addr_t commpage_text32_location
;
666 extern user64_addr_t commpage_text64_location
;
668 /* Check to see if a given address is in the Preemption Free Zone (PFZ) */
671 commpage_is_in_pfz32(uint32_t addr32
)
673 if ( (addr32
>= (commpage_text32_location
+ _COMM_TEXT_PFZ_START_OFFSET
))
674 && (addr32
< (commpage_text32_location
+_COMM_TEXT_PFZ_END_OFFSET
))) {
682 commpage_is_in_pfz64(addr64_t addr64
)
684 if ( (addr64
>= (commpage_text64_location
+ _COMM_TEXT_PFZ_START_OFFSET
))
685 && (addr64
< (commpage_text64_location
+ _COMM_TEXT_PFZ_END_OFFSET
))) {