2 * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
25 * Here's what to do if you want to add a new routine to the comm page:
27 * 1. Add a definition for it's address in osfmk/ppc/cpu_capabilities.h,
28 * being careful to reserve room for future expansion.
30 * 2. Write one or more versions of the routine, each with it's own
31 * commpage_descriptor. The tricky part is getting the "special",
32 * "musthave", and "canthave" fields right, so that exactly one
33 * version of the routine is selected for every machine.
34 * The source files should be in osfmk/ppc/commpage/.
36 * 3. Add a ptr to your new commpage_descriptor(s) in the "routines"
37 * static array below. Of course, you'll also have to declare them
40 * 4. Write the code in Libc to use the new routine.
43 #include <mach/mach_types.h>
44 #include <mach/machine.h>
45 #include <mach/vm_map.h>
46 #include <ppc/exception.h>
47 #include <ppc/machine_routines.h>
48 #include <machine/cpu_capabilities.h>
49 #include <machine/commpage.h>
50 #include <machine/pmap.h>
51 #include <vm/vm_kern.h>
52 #include <vm/vm_map.h>
53 #include <ipc/ipc_port.h>
55 extern vm_map_t com_region_map32
; // the 32-bit shared submap, set up in vm init
56 extern vm_map_t com_region_map64
; // the 64-bit shared submap
58 char *commPagePtr32
= NULL
; // virtual address of 32-bit comm page in kernel map
59 char *commPagePtr64
= NULL
; // and 64-bit commpage
60 int _cpu_capabilities
= 0; // define the capability vector
62 static char *next
; // next available byte in comm page
63 static int cur_routine
; // comm page address of "current" routine
64 static int matched
; // true if we've found a match for "current" routine
65 static char *commPagePtr
; // virtual address in kernel of commpage we are working on
67 extern commpage_descriptor compare_and_swap32_on32
;
68 extern commpage_descriptor compare_and_swap32_on64
;
69 extern commpage_descriptor compare_and_swap64
;
70 extern commpage_descriptor atomic_enqueue32
;
71 extern commpage_descriptor atomic_enqueue64
;
72 extern commpage_descriptor atomic_dequeue32_on32
;
73 extern commpage_descriptor atomic_dequeue32_on64
;
74 extern commpage_descriptor atomic_dequeue64
;
75 extern commpage_descriptor memory_barrier_up
;
76 extern commpage_descriptor memory_barrier_mp32
;
77 extern commpage_descriptor memory_barrier_mp64
;
78 extern commpage_descriptor atomic_add32
;
79 extern commpage_descriptor atomic_add64
;
80 extern commpage_descriptor mach_absolute_time_32
;
81 extern commpage_descriptor mach_absolute_time_64
;
82 extern commpage_descriptor mach_absolute_time_lp64
;
83 extern commpage_descriptor spinlock_32_try_mp
;
84 extern commpage_descriptor spinlock_32_try_up
;
85 extern commpage_descriptor spinlock_64_try_mp
;
86 extern commpage_descriptor spinlock_64_try_up
;
87 extern commpage_descriptor spinlock_32_lock_mp
;
88 extern commpage_descriptor spinlock_32_lock_up
;
89 extern commpage_descriptor spinlock_64_lock_mp
;
90 extern commpage_descriptor spinlock_64_lock_up
;
91 extern commpage_descriptor spinlock_32_unlock_mp
;
92 extern commpage_descriptor spinlock_32_unlock_up
;
93 extern commpage_descriptor spinlock_64_unlock_mp
;
94 extern commpage_descriptor spinlock_64_unlock_up
;
95 extern commpage_descriptor pthread_getspecific_sprg3_32
;
96 extern commpage_descriptor pthread_getspecific_sprg3_64
;
97 extern commpage_descriptor pthread_getspecific_uftrap
;
98 extern commpage_descriptor gettimeofday_32
;
99 extern commpage_descriptor gettimeofday_g5_32
;
100 extern commpage_descriptor gettimeofday_g5_64
;
101 extern commpage_descriptor commpage_flush_dcache
;
102 extern commpage_descriptor commpage_flush_icache
;
103 extern commpage_descriptor pthread_self_sprg3
;
104 extern commpage_descriptor pthread_self_uftrap
;
105 extern commpage_descriptor spinlock_relinquish
;
106 extern commpage_descriptor bzero_32
;
107 extern commpage_descriptor bzero_128
;
108 extern commpage_descriptor bcopy_g3
;
109 extern commpage_descriptor bcopy_g4
;
110 extern commpage_descriptor bcopy_970
;
111 extern commpage_descriptor bcopy_64
;
112 extern commpage_descriptor compare_and_swap32_on32b
;
113 extern commpage_descriptor compare_and_swap32_on64b
;
114 extern commpage_descriptor compare_and_swap64b
;
115 extern commpage_descriptor memset_64
;
116 extern commpage_descriptor memset_g3
;
117 extern commpage_descriptor memset_g4
;
118 extern commpage_descriptor memset_g5
;
119 extern commpage_descriptor bigcopy_970
;
121 /* The list of all possible commpage routines. WARNING: the check for overlap
122 * assumes that these routines are in strictly ascending order, sorted by address
123 * in the commpage. We panic if not.
125 static commpage_descriptor
*routines
[] = {
126 &compare_and_swap32_on32
,
127 &compare_and_swap32_on64
,
131 &atomic_dequeue32_on32
,
132 &atomic_dequeue32_on64
,
135 &memory_barrier_mp32
,
136 &memory_barrier_mp64
,
139 &mach_absolute_time_32
,
140 &mach_absolute_time_64
,
141 &mach_absolute_time_lp64
,
146 &spinlock_32_lock_mp
,
147 &spinlock_32_lock_up
,
148 &spinlock_64_lock_mp
,
149 &spinlock_64_lock_up
,
150 &spinlock_32_unlock_mp
,
151 &spinlock_32_unlock_up
,
152 &spinlock_64_unlock_mp
,
153 &spinlock_64_unlock_up
,
154 &pthread_getspecific_sprg3_32
,
155 &pthread_getspecific_sprg3_64
,
156 &pthread_getspecific_uftrap
,
160 &commpage_flush_dcache
,
161 &commpage_flush_icache
,
163 &pthread_self_uftrap
,
164 &spinlock_relinquish
,
171 &compare_and_swap32_on32b
,
172 &compare_and_swap32_on64b
,
173 &compare_and_swap64b
,
182 /* Allocate the commpages and add to one of the shared submaps created by vm.
183 * Called once each for the 32 and 64-bit submaps.
184 * 1. allocate pages in the kernel map (RW)
186 * 3. make a memory entry out of them
187 * 4. map that entry into the shared comm region map (R-only)
191 vm_map_t submap
) // com_region_map32 or com_region_map64
193 vm_offset_t kernel_addr
; // address of commpage in kernel map
194 vm_offset_t zero
= 0;
195 vm_size_t size
= _COMM_PAGE_AREA_USED
; // size actually populated
196 vm_map_entry_t entry
;
200 panic("commpage submap is null");
202 if (vm_allocate(kernel_map
,&kernel_addr
,_COMM_PAGE_AREA_USED
,VM_FLAGS_ANYWHERE
))
203 panic("cannot allocate commpage");
205 if (vm_map_wire(kernel_map
,kernel_addr
,kernel_addr
+_COMM_PAGE_AREA_USED
,VM_PROT_DEFAULT
,FALSE
))
206 panic("cannot wire commpage");
209 * Now that the object is created and wired into the kernel map, mark it so that no delay
210 * copy-on-write will ever be performed on it as a result of mapping it into user-space.
211 * If such a delayed copy ever occurred, we could remove the kernel's wired mapping - and
212 * that would be a real disaster.
214 * JMM - What we really need is a way to create it like this in the first place.
216 if (!vm_map_lookup_entry( kernel_map
, vm_map_trunc_page(kernel_addr
), &entry
) || entry
->is_sub_map
)
217 panic("cannot find commpage entry");
218 entry
->object
.vm_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
220 if (mach_make_memory_entry( kernel_map
, // target map
222 kernel_addr
, // offset (address in kernel map)
223 VM_PROT_DEFAULT
, // map it RW
224 &handle
, // this is the object handle we get
225 NULL
)) // parent_entry
226 panic("cannot make entry for commpage");
228 if (vm_map_64( submap
, // target map (shared submap)
229 &zero
, // address (map into 1st page in submap)
230 _COMM_PAGE_AREA_USED
, // size
232 VM_FLAGS_FIXED
, // flags (it must be 1st page in submap)
233 handle
, // port is the memory entry we just made
234 0, // offset (map 1st page in memory entry)
236 VM_PROT_READ
, // cur_protection (R-only in user map)
237 VM_PROT_READ
, // max_protection
238 VM_INHERIT_SHARE
)) // inheritance
239 panic("cannot map commpage");
241 ipc_port_release(handle
);
243 return (void*) kernel_addr
; // return address in kernel map
247 /* Get address (in kernel map) of a commpage field. */
251 int addr_at_runtime
)
253 return (void*) (commPagePtr
+ addr_at_runtime
- _COMM_PAGE_BASE_ADDRESS
);
257 /* Determine number of CPUs on this system. We cannot rely on
258 * machine_info.max_cpus this early in the boot.
261 commpage_cpus( void )
265 cpus
= ml_get_max_cpus(); // NB: this call can block
268 panic("commpage cpus==0");
276 /* Initialize kernel version of _cpu_capabilities vector (used by KEXTs.) */
279 commpage_init_cpu_capabilities( void )
285 pfp
= &(PerProcTable
[0].ppe_vaddr
->pf
); // point to features in per-proc
286 available
= pfp
->Available
;
288 // If AltiVec is disabled make sure it is not reported as available.
289 if ((available
& pfAltivec
) == 0) {
290 _cpu_capabilities
&= ~kHasAltivec
;
293 if (_cpu_capabilities
& kDcbaAvailable
) { // if this processor has DCBA, time it...
294 _cpu_capabilities
|= commpage_time_dcba(); // ...and set kDcbaRecomended if it helps.
297 cpus
= commpage_cpus(); // how many CPUs do we have
298 if (cpus
== 1) _cpu_capabilities
|= kUP
;
299 _cpu_capabilities
|= (cpus
<< kNumCPUsShift
);
301 if (_cpu_capabilities
& k64Bit
) // 64-bit processors use SPRG3 for TLS
302 _cpu_capabilities
|= kFastThreadLocalStorage
;
306 /* Copy data into commpage. */
314 char *dest
= commpage_addr_of(address
);
317 panic("commpage overlap: %08 - %08X", dest
, next
);
319 bcopy((const char*)source
,dest
,length
);
321 next
= (dest
+ length
);
325 /* Modify commpage code in-place for this specific platform. */
331 uint32_t search_mask
,
332 uint32_t search_pattern
,
334 uint32_t new_pattern
,
335 int (*check
)(uint32_t instruction
) )
337 int words
= bytes
>> 2;
340 while( (--words
) >= 0 ) {
342 if ((word
& search_mask
)==search_pattern
) {
343 if ((check
==NULL
) || (check(word
))) { // check instruction if necessary
354 /* Check to see if exactly one bit is set in a MTCRF instruction's FXM field.
360 int x
= (mtcrf
>> 12) & 0xFF; // isolate the FXM field of the MTCRF
363 panic("commpage bad mtcrf");
365 return (x
& (x
-1))==0 ? 1 : 0; // return 1 iff exactly 1 bit set in FXM field
369 /* Check to see if a RLWINM (whose ME is 31) is a SRWI. Since to shift right n bits
370 * you must "RLWINM ra,rs,32-n,n,31", if (SH+MB)==32 then we have a SRWI.
376 int sh
= (rlwinm
>> 11) & 0x1F; // extract SH field of RLWINM, ie bits 16-20
377 int mb
= (rlwinm
>> 6 ) & 0x1F; // extract MB field of RLWINM, ie bits 21-25
379 return (sh
+ mb
) == 32; // it is a SRWI if (SH+MB)==32
383 /* Handle kCommPageDCBA bit: the commpage routine uses DCBA. If the machine we're
384 * running on doesn't benefit from use of that instruction, map them to NOPs
388 commpage_handle_dcbas(
392 uint32_t *ptr
, search_mask
, search
, replace_mask
, replace
;
394 if ( (_cpu_capabilities
& kDcbaRecommended
) == 0 ) {
395 ptr
= commpage_addr_of(address
);
397 search_mask
= 0xFC0007FE; // search x-form opcode bits
398 search
= 0x7C0005EC; // for a DCBA
399 replace_mask
= 0xFFFFFFFF; // replace all bits...
400 replace
= 0x60000000; // ...with a NOP
402 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
407 /* Handle kCommPageSYNC bit: this routine uses SYNC, LWSYNC, or EIEIO. If we're
408 * running on a UP machine, map them to NOPs.
411 commpage_handle_syncs(
415 uint32_t *ptr
, search_mask
, search
, replace_mask
, replace
;
417 if (_NumCPUs() == 1) {
418 ptr
= commpage_addr_of(address
);
420 search_mask
= 0xFC0005FE; // search x-form opcode bits (but ignore bit 0x00000200)
421 search
= 0x7C0004AC; // for a SYNC, LWSYNC, or EIEIO
422 replace_mask
= 0xFFFFFFFF; // replace all bits...
423 replace
= 0x60000000; // ...with a NOP
425 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
430 /* Handle kCommPageISYNC bit: this routine uses ISYNCs. If we're running on a UP machine,
434 commpage_handle_isyncs(
438 uint32_t *ptr
, search_mask
, search
, replace_mask
, replace
;
440 if (_NumCPUs() == 1) {
441 ptr
= commpage_addr_of(address
);
443 search_mask
= 0xFC0007FE; // search xl-form opcode bits
444 search
= 0x4C00012C; // for an ISYNC
445 replace_mask
= 0xFFFFFFFF; // replace all bits...
446 replace
= 0x60000000; // ...with a NOP
448 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
453 /* Handle kCommPageMTCRF bit. When this was written (3/03), the assembler did not
454 * recognize the special form of MTCRF instructions, in which exactly one bit is set
455 * in the 8-bit mask field. Bit 11 of the instruction should be set in this case,
456 * since the 970 and probably other 64-bit processors optimize it. Once the assembler
457 * has been updated this code can be removed, though it need not be.
460 commpage_handle_mtcrfs(
464 uint32_t *ptr
, search_mask
, search
, replace_mask
, replace
;
466 if (_cpu_capabilities
& k64Bit
) {
467 ptr
= commpage_addr_of(address
);
469 search_mask
= 0xFC0007FE; // search x-form opcode bits
470 search
= 0x7C000120; // for a MTCRF
471 replace_mask
= 0x00100000; // replace bit 11...
472 replace
= 0x00100000; // ...with a 1-bit
474 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,commpage_onebit
);
479 /* Port 32-bit code to 64-bit for use in the 64-bit commpage. This sounds fancier than
480 * it is. We do the following:
481 * - map "cmpw*" into "cmpd*"
482 * - map "srwi" into "srdi"
483 * Perhaps surprisingly, this is enough to permit lots of code to run in 64-bit mode, as
484 * long as it is written with this in mind.
487 commpage_port_32_to_64(
491 uint32_t *ptr
, search_mask
, search
, replace_mask
, replace
;
493 ptr
= commpage_addr_of(address
);
495 search_mask
= 0xFC2007FE; // search x-form opcode bits (and L bit)
496 search
= 0x7C000000; // for a CMPW
497 replace_mask
= 0x00200000; // replace bit 10 (L)...
498 replace
= 0x00200000; // ...with a 1-bit, converting word to doubleword compares
499 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
501 search_mask
= 0xFC2007FE; // search x-form opcode bits (and L bit)
502 search
= 0x7C000040; // for a CMPLW
503 replace_mask
= 0x00200000; // replace bit 10 (L)...
504 replace
= 0x00200000; // ...with a 1-bit, converting word to doubleword compares
505 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
507 search_mask
= 0xFC200000; // search d-form opcode bits (and L bit)
508 search
= 0x28000000; // for a CMPLWI
509 replace_mask
= 0x00200000; // replace bit 10 (L)...
510 replace
= 0x00200000; // ...with a 1-bit, converting word to doubleword compares
511 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
513 search_mask
= 0xFC200000; // search d-form opcode bits (and L bit)
514 search
= 0x2C000000; // for a CMPWI
515 replace_mask
= 0x00200000; // replace bit 10 (L)...
516 replace
= 0x00200000; // ...with a 1-bit, converting word to doubleword compares
517 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
519 search_mask
= 0xFC00003E; // search d-form opcode bits and ME (mask end) field
520 search
= 0x5400003E; // for an RLWINM with ME=31 (which might be a "srwi")
521 replace_mask
= 0xFC00003E; // then replace RLWINM's opcode and ME field to make a RLDICL
522 replace
= 0x78000002; // opcode is 30, ME is 0, except we add 32 to SH amount
523 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,commpage_srwi
);
527 /* Copy a routine into comm page if it matches running machine.
530 commpage_stuff_routine(
531 commpage_descriptor
*rd
,
532 int mode
) // kCommPage32 or kCommPage64
537 if ( (rd
->special
& mode
) == 0 ) // is this routine useable in this mode?
540 if (rd
->commpage_address
!= cur_routine
) {
541 if ((cur_routine
!=0) && (matched
==0))
542 panic("commpage no match for last, next address %08x", rd
->commpage_address
);
543 cur_routine
= rd
->commpage_address
;
547 must
= _cpu_capabilities
& rd
->musthave
;
548 cant
= _cpu_capabilities
& rd
->canthave
;
550 if ((must
== rd
->musthave
) && (cant
== 0)) {
552 panic("commpage multiple matches for address %08x", rd
->commpage_address
);
554 routine_code
= ((char*)rd
) + rd
->code_offset
;
556 commpage_stuff(rd
->commpage_address
,routine_code
,rd
->code_length
);
558 if (rd
->special
& kCommPageDCBA
)
559 commpage_handle_dcbas(rd
->commpage_address
,rd
->code_length
);
561 if (rd
->special
& kCommPageSYNC
)
562 commpage_handle_syncs(rd
->commpage_address
,rd
->code_length
);
564 if (rd
->special
& kCommPageISYNC
)
565 commpage_handle_isyncs(rd
->commpage_address
,rd
->code_length
);
567 if (rd
->special
& kCommPageMTCRF
)
568 commpage_handle_mtcrfs(rd
->commpage_address
,rd
->code_length
);
570 if ((mode
== kCommPage64
) && (rd
->special
& kPort32to64
))
571 commpage_port_32_to_64(rd
->commpage_address
,rd
->code_length
);
576 /* Fill in the 32- or 64-bit commpage. Called once for each. */
579 commpage_populate_one(
580 vm_map_t submap
, // the map to populate
581 char ** kernAddressPtr
, // address within kernel of this commpage
582 int mode
, // either kCommPage32 or kCommPage64
583 const char* signature
) // "commpage 32-bit" or "commpage 64-bit"
588 static double two52
= 1048576.0 * 1048576.0 * 4096.0; // 2**52
589 static double ten6
= 1000000.0; // 10**6
590 static uint64_t magicFE
= 0xFEFEFEFEFEFEFEFFLL
; // used to find 0s in strings
591 static uint64_t magic80
= 0x8080808080808080LL
; // also used to find 0s
592 commpage_descriptor
**rd
;
593 short version
= _COMM_PAGE_THIS_VERSION
;
595 next
= NULL
; // initialize next available byte in the commpage
596 cur_routine
= 0; // initialize comm page address of "current" routine
598 commPagePtr
= (char*) commpage_allocate( submap
);
599 *kernAddressPtr
= commPagePtr
; // save address either in commPagePtr32 or 64
601 /* Stuff in the constants. We move things into the comm page in strictly
602 * ascending order, so we can check for overlap and panic if so.
605 commpage_stuff(_COMM_PAGE_SIGNATURE
,signature
,strlen(signature
));
607 commpage_stuff(_COMM_PAGE_VERSION
,&version
,2);
609 commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES
,&_cpu_capabilities
,sizeof(int));
611 c1
= (_cpu_capabilities
& kHasAltivec
) ? -1 : 0;
612 commpage_stuff(_COMM_PAGE_ALTIVEC
,&c1
,1);
614 c1
= (_cpu_capabilities
& k64Bit
) ? -1 : 0;
615 commpage_stuff(_COMM_PAGE_64_BIT
,&c1
,1);
617 if (_cpu_capabilities
& kCache32
)
619 else if (_cpu_capabilities
& kCache64
)
621 else if (_cpu_capabilities
& kCache128
)
623 commpage_stuff(_COMM_PAGE_CACHE_LINESIZE
,&c2
,2);
625 commpage_stuff(_COMM_PAGE_2_TO_52
,&two52
,8);
626 commpage_stuff(_COMM_PAGE_10_TO_6
,&ten6
,8);
627 commpage_stuff(_COMM_PAGE_MAGIC_FE
,&magicFE
,8);
628 commpage_stuff(_COMM_PAGE_MAGIC_80
,&magic80
,8);
630 c8
= 0; // 0 timestamp means "disabled"
631 commpage_stuff(_COMM_PAGE_TIMEBASE
,&c8
,8);
632 commpage_stuff(_COMM_PAGE_TIMESTAMP
,&c8
,8);
633 commpage_stuff(_COMM_PAGE_SEC_PER_TICK
,&c8
,8);
635 /* Now the routines. We try each potential routine in turn,
636 * and copy in any that "match" the platform we are running on.
637 * We require that exactly one routine match for each slot in the
638 * comm page, and panic if not.
641 for( rd
= routines
; *rd
!= NULL
; rd
++ )
642 commpage_stuff_routine(*rd
,mode
);
645 panic("commpage no match on last routine");
647 if (next
> (commPagePtr
+ _COMM_PAGE_AREA_USED
))
648 panic("commpage overflow");
651 // make all that new code executable
653 sync_cache_virtual((vm_offset_t
) commPagePtr
,_COMM_PAGE_AREA_USED
);
657 /* Fill in commpage: called once, during kernel initialization, from the
658 * startup thread before user-mode code is running.
660 * See the top of this file for a list of what you have to do to add
661 * a new routine to the commpage.
665 commpage_populate( void )
667 commpage_init_cpu_capabilities();
668 commpage_populate_one( com_region_map32
, &commPagePtr32
, kCommPage32
, "commpage 32-bit");
669 if (_cpu_capabilities
& k64Bit
) {
670 commpage_populate_one( com_region_map64
, &commPagePtr64
, kCommPage64
, "commpage 64-bit");
671 pmap_init_sharedpage((vm_offset_t
)commPagePtr64
); // Do the 64-bit version