2 * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
32 * Here's what to do if you want to add a new routine to the comm page:
34 * 1. Add a definition for it's address in osfmk/ppc/cpu_capabilities.h,
35 * being careful to reserve room for future expansion.
37 * 2. Write one or more versions of the routine, each with it's own
38 * commpage_descriptor. The tricky part is getting the "special",
39 * "musthave", and "canthave" fields right, so that exactly one
40 * version of the routine is selected for every machine.
41 * The source files should be in osfmk/ppc/commpage/.
43 * 3. Add a ptr to your new commpage_descriptor(s) in the "routines"
44 * static array below. Of course, you'll also have to declare them
47 * 4. Write the code in Libc to use the new routine.
50 #include <mach/mach_types.h>
51 #include <mach/machine.h>
52 #include <mach/vm_map.h>
53 #include <ppc/exception.h>
54 #include <ppc/machine_routines.h>
55 #include <machine/cpu_capabilities.h>
56 #include <machine/commpage.h>
57 #include <machine/pmap.h>
58 #include <vm/vm_kern.h>
59 #include <vm/vm_map.h>
60 #include <ipc/ipc_port.h>
62 extern vm_map_t com_region_map32
; // the 32-bit shared submap, set up in vm init
63 extern vm_map_t com_region_map64
; // the 64-bit shared submap
65 char *commPagePtr32
= NULL
; // virtual address of 32-bit comm page in kernel map
66 char *commPagePtr64
= NULL
; // and 64-bit commpage
67 int _cpu_capabilities
= 0; // define the capability vector
69 static char *next
; // next available byte in comm page
70 static int cur_routine
; // comm page address of "current" routine
71 static int matched
; // true if we've found a match for "current" routine
72 static char *commPagePtr
; // virtual address in kernel of commpage we are working on
74 extern commpage_descriptor compare_and_swap32_on32
;
75 extern commpage_descriptor compare_and_swap32_on64
;
76 extern commpage_descriptor compare_and_swap64
;
77 extern commpage_descriptor atomic_enqueue32
;
78 extern commpage_descriptor atomic_enqueue64
;
79 extern commpage_descriptor atomic_dequeue32_on32
;
80 extern commpage_descriptor atomic_dequeue32_on64
;
81 extern commpage_descriptor atomic_dequeue64
;
82 extern commpage_descriptor memory_barrier_up
;
83 extern commpage_descriptor memory_barrier_mp32
;
84 extern commpage_descriptor memory_barrier_mp64
;
85 extern commpage_descriptor atomic_add32
;
86 extern commpage_descriptor atomic_add64
;
87 extern commpage_descriptor mach_absolute_time_32
;
88 extern commpage_descriptor mach_absolute_time_64
;
89 extern commpage_descriptor mach_absolute_time_lp64
;
90 extern commpage_descriptor spinlock_32_try_mp
;
91 extern commpage_descriptor spinlock_32_try_up
;
92 extern commpage_descriptor spinlock_64_try_mp
;
93 extern commpage_descriptor spinlock_64_try_up
;
94 extern commpage_descriptor spinlock_32_lock_mp
;
95 extern commpage_descriptor spinlock_32_lock_up
;
96 extern commpage_descriptor spinlock_64_lock_mp
;
97 extern commpage_descriptor spinlock_64_lock_up
;
98 extern commpage_descriptor spinlock_32_unlock_mp
;
99 extern commpage_descriptor spinlock_32_unlock_up
;
100 extern commpage_descriptor spinlock_64_unlock_mp
;
101 extern commpage_descriptor spinlock_64_unlock_up
;
102 extern commpage_descriptor pthread_getspecific_sprg3_32
;
103 extern commpage_descriptor pthread_getspecific_sprg3_64
;
104 extern commpage_descriptor pthread_getspecific_uftrap
;
105 extern commpage_descriptor gettimeofday_32
;
106 extern commpage_descriptor gettimeofday_g5_32
;
107 extern commpage_descriptor gettimeofday_g5_64
;
108 extern commpage_descriptor commpage_flush_dcache
;
109 extern commpage_descriptor commpage_flush_icache
;
110 extern commpage_descriptor pthread_self_sprg3
;
111 extern commpage_descriptor pthread_self_uftrap
;
112 extern commpage_descriptor spinlock_relinquish
;
113 extern commpage_descriptor bzero_32
;
114 extern commpage_descriptor bzero_128
;
115 extern commpage_descriptor bcopy_g3
;
116 extern commpage_descriptor bcopy_g4
;
117 extern commpage_descriptor bcopy_970
;
118 extern commpage_descriptor bcopy_64
;
119 extern commpage_descriptor compare_and_swap32_on32b
;
120 extern commpage_descriptor compare_and_swap32_on64b
;
121 extern commpage_descriptor compare_and_swap64b
;
122 extern commpage_descriptor memset_64
;
123 extern commpage_descriptor memset_g3
;
124 extern commpage_descriptor memset_g4
;
125 extern commpage_descriptor memset_g5
;
126 extern commpage_descriptor bigcopy_970
;
128 /* The list of all possible commpage routines. WARNING: the check for overlap
129 * assumes that these routines are in strictly ascending order, sorted by address
130 * in the commpage. We panic if not.
132 static commpage_descriptor
*routines
[] = {
133 &compare_and_swap32_on32
,
134 &compare_and_swap32_on64
,
138 &atomic_dequeue32_on32
,
139 &atomic_dequeue32_on64
,
142 &memory_barrier_mp32
,
143 &memory_barrier_mp64
,
146 &mach_absolute_time_32
,
147 &mach_absolute_time_64
,
148 &mach_absolute_time_lp64
,
153 &spinlock_32_lock_mp
,
154 &spinlock_32_lock_up
,
155 &spinlock_64_lock_mp
,
156 &spinlock_64_lock_up
,
157 &spinlock_32_unlock_mp
,
158 &spinlock_32_unlock_up
,
159 &spinlock_64_unlock_mp
,
160 &spinlock_64_unlock_up
,
161 &pthread_getspecific_sprg3_32
,
162 &pthread_getspecific_sprg3_64
,
163 &pthread_getspecific_uftrap
,
167 &commpage_flush_dcache
,
168 &commpage_flush_icache
,
170 &pthread_self_uftrap
,
171 &spinlock_relinquish
,
178 &compare_and_swap32_on32b
,
179 &compare_and_swap32_on64b
,
180 &compare_and_swap64b
,
189 /* Allocate the commpages and add to one of the shared submaps created by vm.
190 * Called once each for the 32 and 64-bit submaps.
191 * 1. allocate pages in the kernel map (RW)
193 * 3. make a memory entry out of them
194 * 4. map that entry into the shared comm region map (R-only)
198 vm_map_t submap
) // com_region_map32 or com_region_map64
200 vm_offset_t kernel_addr
; // address of commpage in kernel map
201 vm_offset_t zero
= 0;
202 vm_size_t size
= _COMM_PAGE_AREA_USED
; // size actually populated
203 vm_map_entry_t entry
;
207 panic("commpage submap is null");
209 if (vm_allocate(kernel_map
,&kernel_addr
,_COMM_PAGE_AREA_USED
,VM_FLAGS_ANYWHERE
))
210 panic("cannot allocate commpage");
212 if (vm_map_wire(kernel_map
,kernel_addr
,kernel_addr
+_COMM_PAGE_AREA_USED
,VM_PROT_DEFAULT
,FALSE
))
213 panic("cannot wire commpage");
216 * Now that the object is created and wired into the kernel map, mark it so that no delay
217 * copy-on-write will ever be performed on it as a result of mapping it into user-space.
218 * If such a delayed copy ever occurred, we could remove the kernel's wired mapping - and
219 * that would be a real disaster.
221 * JMM - What we really need is a way to create it like this in the first place.
223 if (!vm_map_lookup_entry( kernel_map
, vm_map_trunc_page(kernel_addr
), &entry
) || entry
->is_sub_map
)
224 panic("cannot find commpage entry");
225 entry
->object
.vm_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
227 if (mach_make_memory_entry( kernel_map
, // target map
229 kernel_addr
, // offset (address in kernel map)
230 VM_PROT_DEFAULT
, // map it RW
231 &handle
, // this is the object handle we get
232 NULL
)) // parent_entry
233 panic("cannot make entry for commpage");
235 if (vm_map_64( submap
, // target map (shared submap)
236 &zero
, // address (map into 1st page in submap)
237 _COMM_PAGE_AREA_USED
, // size
239 VM_FLAGS_FIXED
, // flags (it must be 1st page in submap)
240 handle
, // port is the memory entry we just made
241 0, // offset (map 1st page in memory entry)
243 VM_PROT_READ
, // cur_protection (R-only in user map)
244 VM_PROT_READ
, // max_protection
245 VM_INHERIT_SHARE
)) // inheritance
246 panic("cannot map commpage");
248 ipc_port_release(handle
);
250 return (void*) kernel_addr
; // return address in kernel map
254 /* Get address (in kernel map) of a commpage field. */
258 int addr_at_runtime
)
260 return (void*) (commPagePtr
+ addr_at_runtime
- _COMM_PAGE_BASE_ADDRESS
);
264 /* Determine number of CPUs on this system. We cannot rely on
265 * machine_info.max_cpus this early in the boot.
268 commpage_cpus( void )
272 cpus
= ml_get_max_cpus(); // NB: this call can block
275 panic("commpage cpus==0");
283 /* Initialize kernel version of _cpu_capabilities vector (used by KEXTs.) */
286 commpage_init_cpu_capabilities( void )
292 pfp
= &(PerProcTable
[0].ppe_vaddr
->pf
); // point to features in per-proc
293 available
= pfp
->Available
;
295 // If AltiVec is disabled make sure it is not reported as available.
296 if ((available
& pfAltivec
) == 0) {
297 _cpu_capabilities
&= ~kHasAltivec
;
300 if (_cpu_capabilities
& kDcbaAvailable
) { // if this processor has DCBA, time it...
301 _cpu_capabilities
|= commpage_time_dcba(); // ...and set kDcbaRecomended if it helps.
304 cpus
= commpage_cpus(); // how many CPUs do we have
305 if (cpus
== 1) _cpu_capabilities
|= kUP
;
306 _cpu_capabilities
|= (cpus
<< kNumCPUsShift
);
308 if (_cpu_capabilities
& k64Bit
) // 64-bit processors use SPRG3 for TLS
309 _cpu_capabilities
|= kFastThreadLocalStorage
;
313 /* Copy data into commpage. */
321 char *dest
= commpage_addr_of(address
);
324 panic("commpage overlap: %08 - %08X", dest
, next
);
326 bcopy((const char*)source
,dest
,length
);
328 next
= (dest
+ length
);
332 /* Modify commpage code in-place for this specific platform. */
338 uint32_t search_mask
,
339 uint32_t search_pattern
,
341 uint32_t new_pattern
,
342 int (*check
)(uint32_t instruction
) )
344 int words
= bytes
>> 2;
347 while( (--words
) >= 0 ) {
349 if ((word
& search_mask
)==search_pattern
) {
350 if ((check
==NULL
) || (check(word
))) { // check instruction if necessary
361 /* Check to see if exactly one bit is set in a MTCRF instruction's FXM field.
367 int x
= (mtcrf
>> 12) & 0xFF; // isolate the FXM field of the MTCRF
370 panic("commpage bad mtcrf");
372 return (x
& (x
-1))==0 ? 1 : 0; // return 1 iff exactly 1 bit set in FXM field
376 /* Check to see if a RLWINM (whose ME is 31) is a SRWI. Since to shift right n bits
377 * you must "RLWINM ra,rs,32-n,n,31", if (SH+MB)==32 then we have a SRWI.
383 int sh
= (rlwinm
>> 11) & 0x1F; // extract SH field of RLWINM, ie bits 16-20
384 int mb
= (rlwinm
>> 6 ) & 0x1F; // extract MB field of RLWINM, ie bits 21-25
386 return (sh
+ mb
) == 32; // it is a SRWI if (SH+MB)==32
390 /* Handle kCommPageDCBA bit: the commpage routine uses DCBA. If the machine we're
391 * running on doesn't benefit from use of that instruction, map them to NOPs
395 commpage_handle_dcbas(
399 uint32_t *ptr
, search_mask
, search
, replace_mask
, replace
;
401 if ( (_cpu_capabilities
& kDcbaRecommended
) == 0 ) {
402 ptr
= commpage_addr_of(address
);
404 search_mask
= 0xFC0007FE; // search x-form opcode bits
405 search
= 0x7C0005EC; // for a DCBA
406 replace_mask
= 0xFFFFFFFF; // replace all bits...
407 replace
= 0x60000000; // ...with a NOP
409 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
414 /* Handle kCommPageSYNC bit: this routine uses SYNC, LWSYNC, or EIEIO. If we're
415 * running on a UP machine, map them to NOPs.
418 commpage_handle_syncs(
422 uint32_t *ptr
, search_mask
, search
, replace_mask
, replace
;
424 if (_NumCPUs() == 1) {
425 ptr
= commpage_addr_of(address
);
427 search_mask
= 0xFC0005FE; // search x-form opcode bits (but ignore bit 0x00000200)
428 search
= 0x7C0004AC; // for a SYNC, LWSYNC, or EIEIO
429 replace_mask
= 0xFFFFFFFF; // replace all bits...
430 replace
= 0x60000000; // ...with a NOP
432 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
437 /* Handle kCommPageISYNC bit: this routine uses ISYNCs. If we're running on a UP machine,
441 commpage_handle_isyncs(
445 uint32_t *ptr
, search_mask
, search
, replace_mask
, replace
;
447 if (_NumCPUs() == 1) {
448 ptr
= commpage_addr_of(address
);
450 search_mask
= 0xFC0007FE; // search xl-form opcode bits
451 search
= 0x4C00012C; // for an ISYNC
452 replace_mask
= 0xFFFFFFFF; // replace all bits...
453 replace
= 0x60000000; // ...with a NOP
455 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
460 /* Handle kCommPageMTCRF bit. When this was written (3/03), the assembler did not
461 * recognize the special form of MTCRF instructions, in which exactly one bit is set
462 * in the 8-bit mask field. Bit 11 of the instruction should be set in this case,
463 * since the 970 and probably other 64-bit processors optimize it. Once the assembler
464 * has been updated this code can be removed, though it need not be.
467 commpage_handle_mtcrfs(
471 uint32_t *ptr
, search_mask
, search
, replace_mask
, replace
;
473 if (_cpu_capabilities
& k64Bit
) {
474 ptr
= commpage_addr_of(address
);
476 search_mask
= 0xFC0007FE; // search x-form opcode bits
477 search
= 0x7C000120; // for a MTCRF
478 replace_mask
= 0x00100000; // replace bit 11...
479 replace
= 0x00100000; // ...with a 1-bit
481 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,commpage_onebit
);
486 /* Port 32-bit code to 64-bit for use in the 64-bit commpage. This sounds fancier than
487 * it is. We do the following:
488 * - map "cmpw*" into "cmpd*"
489 * - map "srwi" into "srdi"
490 * Perhaps surprisingly, this is enough to permit lots of code to run in 64-bit mode, as
491 * long as it is written with this in mind.
494 commpage_port_32_to_64(
498 uint32_t *ptr
, search_mask
, search
, replace_mask
, replace
;
500 ptr
= commpage_addr_of(address
);
502 search_mask
= 0xFC2007FE; // search x-form opcode bits (and L bit)
503 search
= 0x7C000000; // for a CMPW
504 replace_mask
= 0x00200000; // replace bit 10 (L)...
505 replace
= 0x00200000; // ...with a 1-bit, converting word to doubleword compares
506 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
508 search_mask
= 0xFC2007FE; // search x-form opcode bits (and L bit)
509 search
= 0x7C000040; // for a CMPLW
510 replace_mask
= 0x00200000; // replace bit 10 (L)...
511 replace
= 0x00200000; // ...with a 1-bit, converting word to doubleword compares
512 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
514 search_mask
= 0xFC200000; // search d-form opcode bits (and L bit)
515 search
= 0x28000000; // for a CMPLWI
516 replace_mask
= 0x00200000; // replace bit 10 (L)...
517 replace
= 0x00200000; // ...with a 1-bit, converting word to doubleword compares
518 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
520 search_mask
= 0xFC200000; // search d-form opcode bits (and L bit)
521 search
= 0x2C000000; // for a CMPWI
522 replace_mask
= 0x00200000; // replace bit 10 (L)...
523 replace
= 0x00200000; // ...with a 1-bit, converting word to doubleword compares
524 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
526 search_mask
= 0xFC00003E; // search d-form opcode bits and ME (mask end) field
527 search
= 0x5400003E; // for an RLWINM with ME=31 (which might be a "srwi")
528 replace_mask
= 0xFC00003E; // then replace RLWINM's opcode and ME field to make a RLDICL
529 replace
= 0x78000002; // opcode is 30, ME is 0, except we add 32 to SH amount
530 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,commpage_srwi
);
534 /* Copy a routine into comm page if it matches running machine.
537 commpage_stuff_routine(
538 commpage_descriptor
*rd
,
539 int mode
) // kCommPage32 or kCommPage64
544 if ( (rd
->special
& mode
) == 0 ) // is this routine useable in this mode?
547 if (rd
->commpage_address
!= cur_routine
) {
548 if ((cur_routine
!=0) && (matched
==0))
549 panic("commpage no match for last, next address %08x", rd
->commpage_address
);
550 cur_routine
= rd
->commpage_address
;
554 must
= _cpu_capabilities
& rd
->musthave
;
555 cant
= _cpu_capabilities
& rd
->canthave
;
557 if ((must
== rd
->musthave
) && (cant
== 0)) {
559 panic("commpage multiple matches for address %08x", rd
->commpage_address
);
561 routine_code
= ((char*)rd
) + rd
->code_offset
;
563 commpage_stuff(rd
->commpage_address
,routine_code
,rd
->code_length
);
565 if (rd
->special
& kCommPageDCBA
)
566 commpage_handle_dcbas(rd
->commpage_address
,rd
->code_length
);
568 if (rd
->special
& kCommPageSYNC
)
569 commpage_handle_syncs(rd
->commpage_address
,rd
->code_length
);
571 if (rd
->special
& kCommPageISYNC
)
572 commpage_handle_isyncs(rd
->commpage_address
,rd
->code_length
);
574 if (rd
->special
& kCommPageMTCRF
)
575 commpage_handle_mtcrfs(rd
->commpage_address
,rd
->code_length
);
577 if ((mode
== kCommPage64
) && (rd
->special
& kPort32to64
))
578 commpage_port_32_to_64(rd
->commpage_address
,rd
->code_length
);
583 /* Fill in the 32- or 64-bit commpage. Called once for each. */
586 commpage_populate_one(
587 vm_map_t submap
, // the map to populate
588 char ** kernAddressPtr
, // address within kernel of this commpage
589 int mode
, // either kCommPage32 or kCommPage64
590 const char* signature
) // "commpage 32-bit" or "commpage 64-bit"
595 static double two52
= 1048576.0 * 1048576.0 * 4096.0; // 2**52
596 static double ten6
= 1000000.0; // 10**6
597 static uint64_t magicFE
= 0xFEFEFEFEFEFEFEFFLL
; // used to find 0s in strings
598 static uint64_t magic80
= 0x8080808080808080LL
; // also used to find 0s
599 commpage_descriptor
**rd
;
600 short version
= _COMM_PAGE_THIS_VERSION
;
602 next
= NULL
; // initialize next available byte in the commpage
603 cur_routine
= 0; // initialize comm page address of "current" routine
605 commPagePtr
= (char*) commpage_allocate( submap
);
606 *kernAddressPtr
= commPagePtr
; // save address either in commPagePtr32 or 64
608 /* Stuff in the constants. We move things into the comm page in strictly
609 * ascending order, so we can check for overlap and panic if so.
612 commpage_stuff(_COMM_PAGE_SIGNATURE
,signature
,strlen(signature
));
614 commpage_stuff(_COMM_PAGE_VERSION
,&version
,2);
616 commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES
,&_cpu_capabilities
,sizeof(int));
618 c1
= (_cpu_capabilities
& kHasAltivec
) ? -1 : 0;
619 commpage_stuff(_COMM_PAGE_ALTIVEC
,&c1
,1);
621 c1
= (_cpu_capabilities
& k64Bit
) ? -1 : 0;
622 commpage_stuff(_COMM_PAGE_64_BIT
,&c1
,1);
624 if (_cpu_capabilities
& kCache32
)
626 else if (_cpu_capabilities
& kCache64
)
628 else if (_cpu_capabilities
& kCache128
)
630 commpage_stuff(_COMM_PAGE_CACHE_LINESIZE
,&c2
,2);
632 commpage_stuff(_COMM_PAGE_2_TO_52
,&two52
,8);
633 commpage_stuff(_COMM_PAGE_10_TO_6
,&ten6
,8);
634 commpage_stuff(_COMM_PAGE_MAGIC_FE
,&magicFE
,8);
635 commpage_stuff(_COMM_PAGE_MAGIC_80
,&magic80
,8);
637 c8
= 0; // 0 timestamp means "disabled"
638 commpage_stuff(_COMM_PAGE_TIMEBASE
,&c8
,8);
639 commpage_stuff(_COMM_PAGE_TIMESTAMP
,&c8
,8);
640 commpage_stuff(_COMM_PAGE_SEC_PER_TICK
,&c8
,8);
642 /* Now the routines. We try each potential routine in turn,
643 * and copy in any that "match" the platform we are running on.
644 * We require that exactly one routine match for each slot in the
645 * comm page, and panic if not.
648 for( rd
= routines
; *rd
!= NULL
; rd
++ )
649 commpage_stuff_routine(*rd
,mode
);
652 panic("commpage no match on last routine");
654 if (next
> (commPagePtr
+ _COMM_PAGE_AREA_USED
))
655 panic("commpage overflow");
658 // make all that new code executable
660 sync_cache_virtual((vm_offset_t
) commPagePtr
,_COMM_PAGE_AREA_USED
);
664 /* Fill in commpage: called once, during kernel initialization, from the
665 * startup thread before user-mode code is running.
667 * See the top of this file for a list of what you have to do to add
668 * a new routine to the commpage.
672 commpage_populate( void )
674 commpage_init_cpu_capabilities();
675 commpage_populate_one( com_region_map32
, &commPagePtr32
, kCommPage32
, "commpage 32-bit");
676 if (_cpu_capabilities
& k64Bit
) {
677 commpage_populate_one( com_region_map64
, &commPagePtr64
, kCommPage64
, "commpage 64-bit");
678 pmap_init_sharedpage((vm_offset_t
)commPagePtr64
); // Do the 64-bit version