2 * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
24 * Here's what to do if you want to add a new routine to the comm page:
26 * 1. Add a definition for it's address in osfmk/ppc/cpu_capabilities.h,
27 * being careful to reserve room for future expansion.
29 * 2. Write one or more versions of the routine, each with it's own
30 * commpage_descriptor. The tricky part is getting the "special",
31 * "musthave", and "canthave" fields right, so that exactly one
32 * version of the routine is selected for every machine.
33 * The source files should be in osfmk/ppc/commpage/.
35 * 3. Add a ptr to your new commpage_descriptor(s) in the "routines"
36 * static array below. Of course, you'll also have to declare them
39 * 4. Write the code in Libc to use the new routine.
42 #include <mach/mach_types.h>
43 #include <mach/machine.h>
44 #include <mach/vm_map.h>
45 #include <ppc/exception.h>
46 #include <ppc/machine_routines.h>
47 #include <machine/cpu_capabilities.h>
48 #include <machine/commpage.h>
49 #include <machine/pmap.h>
50 #include <vm/vm_kern.h>
51 #include <vm/vm_map.h>
52 #include <ipc/ipc_port.h>
54 extern vm_map_t com_region_map32
; // the 32-bit shared submap, set up in vm init
55 extern vm_map_t com_region_map64
; // the 64-bit shared submap
57 char *commPagePtr32
= NULL
; // virtual address of 32-bit comm page in kernel map
58 char *commPagePtr64
= NULL
; // and 64-bit commpage
59 int _cpu_capabilities
= 0; // define the capability vector
61 static char *next
; // next available byte in comm page
62 static int cur_routine
; // comm page address of "current" routine
63 static int matched
; // true if we've found a match for "current" routine
64 static char *commPagePtr
; // virtual address in kernel of commpage we are working on
66 extern commpage_descriptor compare_and_swap32_on32
;
67 extern commpage_descriptor compare_and_swap32_on64
;
68 extern commpage_descriptor compare_and_swap64
;
69 extern commpage_descriptor atomic_enqueue32
;
70 extern commpage_descriptor atomic_enqueue64
;
71 extern commpage_descriptor atomic_dequeue32_on32
;
72 extern commpage_descriptor atomic_dequeue32_on64
;
73 extern commpage_descriptor atomic_dequeue64
;
74 extern commpage_descriptor memory_barrier_up
;
75 extern commpage_descriptor memory_barrier_mp32
;
76 extern commpage_descriptor memory_barrier_mp64
;
77 extern commpage_descriptor atomic_add32
;
78 extern commpage_descriptor atomic_add64
;
79 extern commpage_descriptor mach_absolute_time_32
;
80 extern commpage_descriptor mach_absolute_time_64
;
81 extern commpage_descriptor mach_absolute_time_lp64
;
82 extern commpage_descriptor spinlock_32_try_mp
;
83 extern commpage_descriptor spinlock_32_try_up
;
84 extern commpage_descriptor spinlock_64_try_mp
;
85 extern commpage_descriptor spinlock_64_try_up
;
86 extern commpage_descriptor spinlock_32_lock_mp
;
87 extern commpage_descriptor spinlock_32_lock_up
;
88 extern commpage_descriptor spinlock_64_lock_mp
;
89 extern commpage_descriptor spinlock_64_lock_up
;
90 extern commpage_descriptor spinlock_32_unlock_mp
;
91 extern commpage_descriptor spinlock_32_unlock_up
;
92 extern commpage_descriptor spinlock_64_unlock_mp
;
93 extern commpage_descriptor spinlock_64_unlock_up
;
94 extern commpage_descriptor pthread_getspecific_sprg3_32
;
95 extern commpage_descriptor pthread_getspecific_sprg3_64
;
96 extern commpage_descriptor pthread_getspecific_uftrap
;
97 extern commpage_descriptor gettimeofday_32
;
98 extern commpage_descriptor gettimeofday_g5_32
;
99 extern commpage_descriptor gettimeofday_g5_64
;
100 extern commpage_descriptor commpage_flush_dcache
;
101 extern commpage_descriptor commpage_flush_icache
;
102 extern commpage_descriptor pthread_self_sprg3
;
103 extern commpage_descriptor pthread_self_uftrap
;
104 extern commpage_descriptor spinlock_relinquish
;
105 extern commpage_descriptor bzero_32
;
106 extern commpage_descriptor bzero_128
;
107 extern commpage_descriptor bcopy_g3
;
108 extern commpage_descriptor bcopy_g4
;
109 extern commpage_descriptor bcopy_970
;
110 extern commpage_descriptor bcopy_64
;
111 extern commpage_descriptor compare_and_swap32_on32b
;
112 extern commpage_descriptor compare_and_swap32_on64b
;
113 extern commpage_descriptor compare_and_swap64b
;
114 extern commpage_descriptor memset_64
;
115 extern commpage_descriptor memset_g3
;
116 extern commpage_descriptor memset_g4
;
117 extern commpage_descriptor memset_g5
;
118 extern commpage_descriptor bigcopy_970
;
120 /* The list of all possible commpage routines. WARNING: the check for overlap
121 * assumes that these routines are in strictly ascending order, sorted by address
122 * in the commpage. We panic if not.
124 static commpage_descriptor
*routines
[] = {
125 &compare_and_swap32_on32
,
126 &compare_and_swap32_on64
,
130 &atomic_dequeue32_on32
,
131 &atomic_dequeue32_on64
,
134 &memory_barrier_mp32
,
135 &memory_barrier_mp64
,
138 &mach_absolute_time_32
,
139 &mach_absolute_time_64
,
140 &mach_absolute_time_lp64
,
145 &spinlock_32_lock_mp
,
146 &spinlock_32_lock_up
,
147 &spinlock_64_lock_mp
,
148 &spinlock_64_lock_up
,
149 &spinlock_32_unlock_mp
,
150 &spinlock_32_unlock_up
,
151 &spinlock_64_unlock_mp
,
152 &spinlock_64_unlock_up
,
153 &pthread_getspecific_sprg3_32
,
154 &pthread_getspecific_sprg3_64
,
155 &pthread_getspecific_uftrap
,
159 &commpage_flush_dcache
,
160 &commpage_flush_icache
,
162 &pthread_self_uftrap
,
163 &spinlock_relinquish
,
170 &compare_and_swap32_on32b
,
171 &compare_and_swap32_on64b
,
172 &compare_and_swap64b
,
181 /* Allocate the commpages and add to one of the shared submaps created by vm.
182 * Called once each for the 32 and 64-bit submaps.
183 * 1. allocate pages in the kernel map (RW)
185 * 3. make a memory entry out of them
186 * 4. map that entry into the shared comm region map (R-only)
190 vm_map_t submap
) // com_region_map32 or com_region_map64
192 vm_offset_t kernel_addr
; // address of commpage in kernel map
193 vm_offset_t zero
= 0;
194 vm_size_t size
= _COMM_PAGE_AREA_USED
; // size actually populated
195 vm_map_entry_t entry
;
199 panic("commpage submap is null");
201 if (vm_allocate(kernel_map
,&kernel_addr
,_COMM_PAGE_AREA_USED
,VM_FLAGS_ANYWHERE
))
202 panic("cannot allocate commpage");
204 if (vm_map_wire(kernel_map
,kernel_addr
,kernel_addr
+_COMM_PAGE_AREA_USED
,VM_PROT_DEFAULT
,FALSE
))
205 panic("cannot wire commpage");
208 * Now that the object is created and wired into the kernel map, mark it so that no delay
209 * copy-on-write will ever be performed on it as a result of mapping it into user-space.
210 * If such a delayed copy ever occurred, we could remove the kernel's wired mapping - and
211 * that would be a real disaster.
213 * JMM - What we really need is a way to create it like this in the first place.
215 if (!vm_map_lookup_entry( kernel_map
, vm_map_trunc_page(kernel_addr
), &entry
) || entry
->is_sub_map
)
216 panic("cannot find commpage entry");
217 entry
->object
.vm_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
219 if (mach_make_memory_entry( kernel_map
, // target map
221 kernel_addr
, // offset (address in kernel map)
222 VM_PROT_DEFAULT
, // map it RW
223 &handle
, // this is the object handle we get
224 NULL
)) // parent_entry
225 panic("cannot make entry for commpage");
227 if (vm_map_64( submap
, // target map (shared submap)
228 &zero
, // address (map into 1st page in submap)
229 _COMM_PAGE_AREA_USED
, // size
231 VM_FLAGS_FIXED
, // flags (it must be 1st page in submap)
232 handle
, // port is the memory entry we just made
233 0, // offset (map 1st page in memory entry)
235 VM_PROT_READ
, // cur_protection (R-only in user map)
236 VM_PROT_READ
, // max_protection
237 VM_INHERIT_SHARE
)) // inheritance
238 panic("cannot map commpage");
240 ipc_port_release(handle
);
242 return (void*) kernel_addr
; // return address in kernel map
246 /* Get address (in kernel map) of a commpage field. */
250 int addr_at_runtime
)
252 return (void*) (commPagePtr
+ addr_at_runtime
- _COMM_PAGE_BASE_ADDRESS
);
256 /* Determine number of CPUs on this system. We cannot rely on
257 * machine_info.max_cpus this early in the boot.
260 commpage_cpus( void )
264 cpus
= ml_get_max_cpus(); // NB: this call can block
267 panic("commpage cpus==0");
275 /* Initialize kernel version of _cpu_capabilities vector (used by KEXTs.) */
278 commpage_init_cpu_capabilities( void )
284 pfp
= &(PerProcTable
[0].ppe_vaddr
->pf
); // point to features in per-proc
285 available
= pfp
->Available
;
287 // If AltiVec is disabled make sure it is not reported as available.
288 if ((available
& pfAltivec
) == 0) {
289 _cpu_capabilities
&= ~kHasAltivec
;
292 if (_cpu_capabilities
& kDcbaAvailable
) { // if this processor has DCBA, time it...
293 _cpu_capabilities
|= commpage_time_dcba(); // ...and set kDcbaRecomended if it helps.
296 cpus
= commpage_cpus(); // how many CPUs do we have
297 if (cpus
== 1) _cpu_capabilities
|= kUP
;
298 _cpu_capabilities
|= (cpus
<< kNumCPUsShift
);
300 if (_cpu_capabilities
& k64Bit
) // 64-bit processors use SPRG3 for TLS
301 _cpu_capabilities
|= kFastThreadLocalStorage
;
305 /* Copy data into commpage. */
313 char *dest
= commpage_addr_of(address
);
316 panic("commpage overlap: %08 - %08X", dest
, next
);
318 bcopy((const char*)source
,dest
,length
);
320 next
= (dest
+ length
);
324 /* Modify commpage code in-place for this specific platform. */
330 uint32_t search_mask
,
331 uint32_t search_pattern
,
333 uint32_t new_pattern
,
334 int (*check
)(uint32_t instruction
) )
336 int words
= bytes
>> 2;
339 while( (--words
) >= 0 ) {
341 if ((word
& search_mask
)==search_pattern
) {
342 if ((check
==NULL
) || (check(word
))) { // check instruction if necessary
353 /* Check to see if exactly one bit is set in a MTCRF instruction's FXM field.
359 int x
= (mtcrf
>> 12) & 0xFF; // isolate the FXM field of the MTCRF
362 panic("commpage bad mtcrf");
364 return (x
& (x
-1))==0 ? 1 : 0; // return 1 iff exactly 1 bit set in FXM field
368 /* Check to see if a RLWINM (whose ME is 31) is a SRWI. Since to shift right n bits
369 * you must "RLWINM ra,rs,32-n,n,31", if (SH+MB)==32 then we have a SRWI.
375 int sh
= (rlwinm
>> 11) & 0x1F; // extract SH field of RLWINM, ie bits 16-20
376 int mb
= (rlwinm
>> 6 ) & 0x1F; // extract MB field of RLWINM, ie bits 21-25
378 return (sh
+ mb
) == 32; // it is a SRWI if (SH+MB)==32
382 /* Handle kCommPageDCBA bit: the commpage routine uses DCBA. If the machine we're
383 * running on doesn't benefit from use of that instruction, map them to NOPs
387 commpage_handle_dcbas(
391 uint32_t *ptr
, search_mask
, search
, replace_mask
, replace
;
393 if ( (_cpu_capabilities
& kDcbaRecommended
) == 0 ) {
394 ptr
= commpage_addr_of(address
);
396 search_mask
= 0xFC0007FE; // search x-form opcode bits
397 search
= 0x7C0005EC; // for a DCBA
398 replace_mask
= 0xFFFFFFFF; // replace all bits...
399 replace
= 0x60000000; // ...with a NOP
401 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
406 /* Handle kCommPageSYNC bit: this routine uses SYNC, LWSYNC, or EIEIO. If we're
407 * running on a UP machine, map them to NOPs.
410 commpage_handle_syncs(
414 uint32_t *ptr
, search_mask
, search
, replace_mask
, replace
;
416 if (_NumCPUs() == 1) {
417 ptr
= commpage_addr_of(address
);
419 search_mask
= 0xFC0005FE; // search x-form opcode bits (but ignore bit 0x00000200)
420 search
= 0x7C0004AC; // for a SYNC, LWSYNC, or EIEIO
421 replace_mask
= 0xFFFFFFFF; // replace all bits...
422 replace
= 0x60000000; // ...with a NOP
424 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
429 /* Handle kCommPageISYNC bit: this routine uses ISYNCs. If we're running on a UP machine,
433 commpage_handle_isyncs(
437 uint32_t *ptr
, search_mask
, search
, replace_mask
, replace
;
439 if (_NumCPUs() == 1) {
440 ptr
= commpage_addr_of(address
);
442 search_mask
= 0xFC0007FE; // search xl-form opcode bits
443 search
= 0x4C00012C; // for an ISYNC
444 replace_mask
= 0xFFFFFFFF; // replace all bits...
445 replace
= 0x60000000; // ...with a NOP
447 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
452 /* Handle kCommPageMTCRF bit. When this was written (3/03), the assembler did not
453 * recognize the special form of MTCRF instructions, in which exactly one bit is set
454 * in the 8-bit mask field. Bit 11 of the instruction should be set in this case,
455 * since the 970 and probably other 64-bit processors optimize it. Once the assembler
456 * has been updated this code can be removed, though it need not be.
459 commpage_handle_mtcrfs(
463 uint32_t *ptr
, search_mask
, search
, replace_mask
, replace
;
465 if (_cpu_capabilities
& k64Bit
) {
466 ptr
= commpage_addr_of(address
);
468 search_mask
= 0xFC0007FE; // search x-form opcode bits
469 search
= 0x7C000120; // for a MTCRF
470 replace_mask
= 0x00100000; // replace bit 11...
471 replace
= 0x00100000; // ...with a 1-bit
473 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,commpage_onebit
);
478 /* Port 32-bit code to 64-bit for use in the 64-bit commpage. This sounds fancier than
479 * it is. We do the following:
480 * - map "cmpw*" into "cmpd*"
481 * - map "srwi" into "srdi"
482 * Perhaps surprisingly, this is enough to permit lots of code to run in 64-bit mode, as
483 * long as it is written with this in mind.
486 commpage_port_32_to_64(
490 uint32_t *ptr
, search_mask
, search
, replace_mask
, replace
;
492 ptr
= commpage_addr_of(address
);
494 search_mask
= 0xFC2007FE; // search x-form opcode bits (and L bit)
495 search
= 0x7C000000; // for a CMPW
496 replace_mask
= 0x00200000; // replace bit 10 (L)...
497 replace
= 0x00200000; // ...with a 1-bit, converting word to doubleword compares
498 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
500 search_mask
= 0xFC2007FE; // search x-form opcode bits (and L bit)
501 search
= 0x7C000040; // for a CMPLW
502 replace_mask
= 0x00200000; // replace bit 10 (L)...
503 replace
= 0x00200000; // ...with a 1-bit, converting word to doubleword compares
504 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
506 search_mask
= 0xFC200000; // search d-form opcode bits (and L bit)
507 search
= 0x28000000; // for a CMPLWI
508 replace_mask
= 0x00200000; // replace bit 10 (L)...
509 replace
= 0x00200000; // ...with a 1-bit, converting word to doubleword compares
510 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
512 search_mask
= 0xFC200000; // search d-form opcode bits (and L bit)
513 search
= 0x2C000000; // for a CMPWI
514 replace_mask
= 0x00200000; // replace bit 10 (L)...
515 replace
= 0x00200000; // ...with a 1-bit, converting word to doubleword compares
516 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,NULL
);
518 search_mask
= 0xFC00003E; // search d-form opcode bits and ME (mask end) field
519 search
= 0x5400003E; // for an RLWINM with ME=31 (which might be a "srwi")
520 replace_mask
= 0xFC00003E; // then replace RLWINM's opcode and ME field to make a RLDICL
521 replace
= 0x78000002; // opcode is 30, ME is 0, except we add 32 to SH amount
522 commpage_change(ptr
,length
,search_mask
,search
,replace_mask
,replace
,commpage_srwi
);
526 /* Copy a routine into comm page if it matches running machine.
529 commpage_stuff_routine(
530 commpage_descriptor
*rd
,
531 int mode
) // kCommPage32 or kCommPage64
536 if ( (rd
->special
& mode
) == 0 ) // is this routine useable in this mode?
539 if (rd
->commpage_address
!= cur_routine
) {
540 if ((cur_routine
!=0) && (matched
==0))
541 panic("commpage no match for last, next address %08x", rd
->commpage_address
);
542 cur_routine
= rd
->commpage_address
;
546 must
= _cpu_capabilities
& rd
->musthave
;
547 cant
= _cpu_capabilities
& rd
->canthave
;
549 if ((must
== rd
->musthave
) && (cant
== 0)) {
551 panic("commpage multiple matches for address %08x", rd
->commpage_address
);
553 routine_code
= ((char*)rd
) + rd
->code_offset
;
555 commpage_stuff(rd
->commpage_address
,routine_code
,rd
->code_length
);
557 if (rd
->special
& kCommPageDCBA
)
558 commpage_handle_dcbas(rd
->commpage_address
,rd
->code_length
);
560 if (rd
->special
& kCommPageSYNC
)
561 commpage_handle_syncs(rd
->commpage_address
,rd
->code_length
);
563 if (rd
->special
& kCommPageISYNC
)
564 commpage_handle_isyncs(rd
->commpage_address
,rd
->code_length
);
566 if (rd
->special
& kCommPageMTCRF
)
567 commpage_handle_mtcrfs(rd
->commpage_address
,rd
->code_length
);
569 if ((mode
== kCommPage64
) && (rd
->special
& kPort32to64
))
570 commpage_port_32_to_64(rd
->commpage_address
,rd
->code_length
);
575 /* Fill in the 32- or 64-bit commpage. Called once for each. */
578 commpage_populate_one(
579 vm_map_t submap
, // the map to populate
580 char ** kernAddressPtr
, // address within kernel of this commpage
581 int mode
, // either kCommPage32 or kCommPage64
582 const char* signature
) // "commpage 32-bit" or "commpage 64-bit"
587 static double two52
= 1048576.0 * 1048576.0 * 4096.0; // 2**52
588 static double ten6
= 1000000.0; // 10**6
589 static uint64_t magicFE
= 0xFEFEFEFEFEFEFEFFLL
; // used to find 0s in strings
590 static uint64_t magic80
= 0x8080808080808080LL
; // also used to find 0s
591 commpage_descriptor
**rd
;
592 short version
= _COMM_PAGE_THIS_VERSION
;
594 next
= NULL
; // initialize next available byte in the commpage
595 cur_routine
= 0; // initialize comm page address of "current" routine
597 commPagePtr
= (char*) commpage_allocate( submap
);
598 *kernAddressPtr
= commPagePtr
; // save address either in commPagePtr32 or 64
600 /* Stuff in the constants. We move things into the comm page in strictly
601 * ascending order, so we can check for overlap and panic if so.
604 commpage_stuff(_COMM_PAGE_SIGNATURE
,signature
,strlen(signature
));
606 commpage_stuff(_COMM_PAGE_VERSION
,&version
,2);
608 commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES
,&_cpu_capabilities
,sizeof(int));
610 c1
= (_cpu_capabilities
& kHasAltivec
) ? -1 : 0;
611 commpage_stuff(_COMM_PAGE_ALTIVEC
,&c1
,1);
613 c1
= (_cpu_capabilities
& k64Bit
) ? -1 : 0;
614 commpage_stuff(_COMM_PAGE_64_BIT
,&c1
,1);
616 if (_cpu_capabilities
& kCache32
)
618 else if (_cpu_capabilities
& kCache64
)
620 else if (_cpu_capabilities
& kCache128
)
622 commpage_stuff(_COMM_PAGE_CACHE_LINESIZE
,&c2
,2);
624 commpage_stuff(_COMM_PAGE_2_TO_52
,&two52
,8);
625 commpage_stuff(_COMM_PAGE_10_TO_6
,&ten6
,8);
626 commpage_stuff(_COMM_PAGE_MAGIC_FE
,&magicFE
,8);
627 commpage_stuff(_COMM_PAGE_MAGIC_80
,&magic80
,8);
629 c8
= 0; // 0 timestamp means "disabled"
630 commpage_stuff(_COMM_PAGE_TIMEBASE
,&c8
,8);
631 commpage_stuff(_COMM_PAGE_TIMESTAMP
,&c8
,8);
632 commpage_stuff(_COMM_PAGE_SEC_PER_TICK
,&c8
,8);
634 /* Now the routines. We try each potential routine in turn,
635 * and copy in any that "match" the platform we are running on.
636 * We require that exactly one routine match for each slot in the
637 * comm page, and panic if not.
640 for( rd
= routines
; *rd
!= NULL
; rd
++ )
641 commpage_stuff_routine(*rd
,mode
);
644 panic("commpage no match on last routine");
646 if (next
> (commPagePtr
+ _COMM_PAGE_AREA_USED
))
647 panic("commpage overflow");
650 // make all that new code executable
652 sync_cache_virtual((vm_offset_t
) commPagePtr
,_COMM_PAGE_AREA_USED
);
656 /* Fill in commpage: called once, during kernel initialization, from the
657 * startup thread before user-mode code is running.
659 * See the top of this file for a list of what you have to do to add
660 * a new routine to the commpage.
664 commpage_populate( void )
666 commpage_init_cpu_capabilities();
667 commpage_populate_one( com_region_map32
, &commPagePtr32
, kCommPage32
, "commpage 32-bit");
668 if (_cpu_capabilities
& k64Bit
) {
669 commpage_populate_one( com_region_map64
, &commPagePtr64
, kCommPage64
, "commpage 64-bit");
670 pmap_init_sharedpage((vm_offset_t
)commPagePtr64
); // Do the 64-bit version