2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
30 #include <ppc/proc_reg.h>
31 #include <mach/ppc/vm_param.h>
33 #include <sys/errno.h>
37 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
39 * void pmap_zero_page(vm_offset_t pa)
41 * Zero a page of physical memory. This routine runs in 32 or 64-bit mode,
42 * and handles 32 and 128-byte cache lines.
47 .globl EXT(pmap_zero_page)
51 mflr r12 // save return address
52 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
53 mtlr r12 // restore return address
54 andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size
56 subfic r4,r9,PPC_PGBYTES // r4 <- starting offset in page
58 bt++ pf64Bitb,page0S4 // Go do the big guys...
60 slwi r3,r3,12 // get page address from page num
61 b page_zero_1 // Jump to line aligned loop...
74 sldi r3,r3,12 // get page address from page num
76 page_zero_1: // loop zeroing cache lines
77 sub. r5,r4,r9 // more to go?
78 dcbz128 r3,r4 // zero either 32 or 128 bytes
79 sub r4,r5,r9 // generate next offset
83 b EXT(ml_restore) // restore MSR and do the isync
86 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
88 * phys_copy(src, dst, bytecount)
93 * This routine will copy bytecount bytes from physical address src to physical
94 * address dst. It runs in 64-bit mode if necessary, but does not handle
95 * overlap or make any attempt to be optimal. Length must be a signed word.
96 * Not performance critical.
101 .globl EXT(phys_copy)
105 rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg
106 mflr r12 // get return address
107 rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits
108 rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg
109 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
110 rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits
111 mtlr r12 // restore return address
112 subic. r5,r7,4 // a word to copy?
117 phys_copy_1: // loop copying words
118 subic. r5,r5,4 // more to go?
125 addic. r5,r5,4 // restore count
126 ble phys_copy_4 // no more
128 // Loop is aligned here
130 phys_copy_3: // loop copying bytes
131 subic. r5,r5,1 // more to go?
138 b EXT(ml_restore) // restore MSR and do the isync
141 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
143 * pmap_copy_page(src, dst)
147 * This routine will copy the physical page src to physical page dst
149 * This routine assumes that the src and dst are page numbers and that the
150 * destination is cached. It runs on 32 and 64 bit processors, with and
151 * without altivec, and with 32 and 128 byte cache lines.
152 * We also must assume that no-one will be executing within the destination
153 * page, and that this will be used for paging. Because this
154 * is a common routine, we have tuned loops for each processor class.
157 #define kSFSize (FM_SIZE+160)
159 ENTRY(pmap_copy_page, TAG_NO_FRAME_USED)
161 lis r2,hi16(MASK(MSR_VEC)) ; Get the vector flag
162 mflr r0 // get return
163 ori r2,r2,lo16(MASK(MSR_FP)) ; Add the FP flag
165 stwu r1,-kSFSize(r1) // set up a stack frame for VRs or FPRs
166 mfmsr r11 // save MSR at entry
167 mfsprg r10,2 // get feature flags
168 andc r11,r11,r2 // Clear out vec and fp
169 ori r2,r2,lo16(MASK(MSR_EE)) // Get EE on also
170 andc r2,r11,r2 // Clear out EE as well
171 mtcrf 0x02,r10 // we need to test pf64Bit
172 ori r2,r2,MASK(MSR_FP) // must enable FP for G3...
173 mtcrf 0x80,r10 // we need to test pfAltivec too
174 oris r2,r2,hi16(MASK(MSR_VEC)) // enable altivec for G4 (ignored if G3)
175 mtmsr r2 // turn EE off, FP and VEC on
177 bt++ pf64Bitb,pmap_copy_64 // skip if 64-bit processor (only they take hint)
178 slwi r3,r3,12 // get page address from page num
179 slwi r4,r4,12 // get page address from page num
180 rlwinm r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1 // get ready to turn off DR
181 bt pfAltivecb,pmap_copy_g4 // altivec but not 64-bit means G4
184 // G3 -- copy using FPRs
186 stfd f0,FM_SIZE+0(r1) // save the 4 FPRs we use to copy
187 stfd f1,FM_SIZE+8(r1)
188 li r5,PPC_PGBYTES/32 // count of cache lines in a page
189 stfd f2,FM_SIZE+16(r1)
191 stfd f3,FM_SIZE+24(r1)
192 mtmsr r12 // turn off DR after saving FPRs on stack
195 pmap_g3_copy_loop: // loop over 32-byte cache lines
196 dcbz 0,r4 // avoid read of dest line
206 dcbst 0,r4 // flush dest line to RAM
208 bdnz pmap_g3_copy_loop
210 sync // wait for stores to take
211 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
212 li r6,PPC_PGBYTES-32 // point to last line in page
213 pmap_g3_icache_flush:
214 subic. r5,r6,32 // more to go?
215 icbi r4,r6 // flush another line in icache
216 subi r6,r5,32 // get offset to next line
218 bne pmap_g3_icache_flush
221 mtmsr r2 // turn DR back on
223 lfd f0,FM_SIZE+0(r1) // restore the FPRs
225 lfd f2,FM_SIZE+16(r1)
226 lfd f3,FM_SIZE+24(r1)
228 b pmap_g4_restore // restore MSR and done
231 // G4 -- copy using VRs
233 pmap_copy_g4: // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR
234 la r9,FM_SIZE+16(r1) // place where we save VRs to r9
235 li r5,16 // load x-form offsets into r5-r9
236 li r6,32 // another offset
237 stvx v0,0,r9 // save some VRs so we can use to copy
238 li r7,48 // another offset
240 li r0,PPC_PGBYTES/64 // we loop over 64-byte chunks
243 li r8,96 // get look-ahead for touch
246 mtmsr r12 // now we've saved VRs on stack, turn off DR
247 isync // wait for it to happen
250 .align 5 // align inner loops
251 pmap_g4_copy_loop: // loop over 64-byte chunks
252 dcbt r3,r8 // touch 3 lines ahead
253 nop // avoid a 17-word loop...
254 dcbt r3,r9 // touch 4 lines ahead
256 dcba 0,r4 // avoid pre-fetch of 1st dest line
257 lvx v0,0,r3 // offset 0
258 lvx v1,r5,r3 // offset 16
259 lvx v2,r6,r3 // offset 32
260 lvx v3,r7,r3 // offset 48
262 dcba r6,r4 // avoid pre-fetch of 2nd line
263 stvx v0,0,r4 // offset 0
264 stvx v1,r5,r4 // offset 16
265 stvx v2,r6,r4 // offset 32
266 stvx v3,r7,r4 // offset 48
267 dcbf 0,r4 // push line 1
268 dcbf r6,r4 // and line 2
270 bdnz pmap_g4_copy_loop
272 sync // wait for stores to take
273 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
274 li r8,PPC_PGBYTES-32 // point to last line in page
275 pmap_g4_icache_flush:
276 subic. r9,r8,32 // more to go?
277 icbi r4,r8 // flush from icache
278 subi r8,r9,32 // get offset to next line
280 bne pmap_g4_icache_flush
283 mtmsr r2 // turn DR back on
285 la r9,FM_SIZE+16(r1) // get base of VR save area
286 lvx v0,0,r9 // restore the VRs
291 pmap_g4_restore: // r11=MSR
292 mtmsr r11 // turn EE on, VEC and FR off
293 isync // wait for it to happen
294 addi r1,r1,kSFSize // pop off our stack frame
295 lwz r0,8(r1) // restore return address
300 // 64-bit/128-byte processor: copy using VRs
302 pmap_copy_64: // r10=features, r11=old MSR
303 sldi r3,r3,12 // get page address from page num
304 sldi r4,r4,12 // get page address from page num
305 la r9,FM_SIZE+16(r1) // get base of VR save area
306 li r5,16 // load x-form offsets into r5-r9
307 li r6,32 // another offset
308 bf pfAltivecb,pmap_novmx_copy // altivec suppressed...
309 stvx v0,0,r9 // save 8 VRs so we can copy wo bubbles
311 li r7,48 // another offset
312 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
315 addi r9,r9,64 // advance base ptr so we can store another 4
317 li r0,MASK(MSR_DR) // get DR bit
320 andc r12,r2,r0 // turn off DR bit
321 li r0,1 // get a 1 to slam into SF
324 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
325 li r8,-128 // offset so we can reach back one line
326 mtmsrd r12 // now we've saved VRs, turn DR off and SF on
327 isync // wait for it to happen
328 dcbt128 0,r3,1 // start a forward stream
331 .align 5 // align inner loops
332 pmap_64_copy_loop: // loop over 128-byte chunks
333 dcbz128 0,r4 // avoid read of destination line
334 lvx v0,0,r3 // offset 0
335 lvx v1,r5,r3 // offset 16
336 lvx v2,r6,r3 // offset 32
337 lvx v3,r7,r3 // offset 48
338 addi r3,r3,64 // don't have enough GPRs so add 64 2x
339 lvx v4,0,r3 // offset 64
340 lvx v5,r5,r3 // offset 80
341 lvx v6,r6,r3 // offset 96
342 lvx v7,r7,r3 // offset 112
344 stvx v0,0,r4 // offset 0
345 stvx v1,r5,r4 // offset 16
346 stvx v2,r6,r4 // offset 32
347 stvx v3,r7,r4 // offset 48
349 stvx v4,0,r4 // offset 64
350 stvx v5,r5,r4 // offset 80
351 stvx v6,r6,r4 // offset 96
352 stvx v7,r7,r4 // offset 112
354 dcbf r8,r4 // flush the line we just wrote
355 bdnz pmap_64_copy_loop
357 sync // wait for stores to take
358 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
359 li r8,PPC_PGBYTES-128 // point to last line in page
360 pmap_64_icache_flush:
361 subic. r9,r8,128 // more to go?
362 icbi r4,r8 // flush from icache
363 subi r8,r9,128 // get offset to next line
365 bne pmap_64_icache_flush
368 mtmsrd r2 // turn DR back on, SF off
370 la r9,FM_SIZE+16(r1) // get base address of VR save area on stack
371 lvx v0,0,r9 // restore the VRs
381 b pmap_g4_restore // restore lower half of MSR and return
384 // Copy on 64-bit without VMX
388 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
390 li r0,MASK(MSR_DR) // get DR bit
391 andc r12,r2,r0 // turn off DR bit
392 li r0,1 // get a 1 to slam into SF
393 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
394 mtmsrd r12 // now we've saved VRs, turn DR off and SF on
395 isync // wait for it to happen
396 dcbt128 0,r3,1 // start a forward stream
398 pmap_novmx_copy_loop: // loop over 128-byte cache lines
399 dcbz128 0,r4 // avoid read of dest line
401 ld r0,0(r3) // Load half a line
410 std r0,0(r4) // Store half a line
419 ld r0,64(r3) // Load half a line
430 std r0,64(r4) // Store half a line
439 dcbf 0,r4 // flush the line we just wrote
441 bdnz pmap_novmx_copy_loop
443 sync // wait for stores to take
444 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
445 li r8,PPC_PGBYTES-128 // point to last line in page
447 pmap_novmx_icache_flush:
448 subic. r9,r8,128 // more to go?
449 icbi r4,r8 // flush from icache
450 subi r8,r9,128 // get offset to next line
452 bne pmap_novmx_icache_flush
455 mtmsrd r2 // turn DR back on, SF off
458 b pmap_g4_restore // restore lower half of MSR and return
462 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
464 // Stack frame format used by copyin, copyout, copyinstr and copyoutstr.
465 // These routines all run both on 32 and 64-bit machines, though because they are called
466 // by the BSD kernel they are always in 32-bit mode when entered. The mapped ptr returned
467 // by MapUserAddressSpace will be 64 bits however on 64-bit machines. Beware to avoid
468 // using compare instructions on this ptr. This mapped ptr is kept globally in r31, so there
469 // is no need to store or load it, which are mode-dependent operations since it could be
472 #define kkFrameSize (FM_SIZE+32)
474 #define kkBufSize (FM_SIZE+0)
475 #define kkCR (FM_SIZE+4)
476 #define kkSource (FM_SIZE+8)
477 #define kkDest (FM_SIZE+12)
478 #define kkCountPtr (FM_SIZE+16)
479 #define kkR31Save (FM_SIZE+20)
482 // nonvolatile CR bits we use as flags in cr3
491 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
494 * copyoutstr(src, dst, maxcount, count)
497 * vm_size_t maxcount;
500 * Set *count to the number of bytes copied.
503 ENTRY(copyoutstr, TAG_NO_FRAME_USED)
504 mfcr r2 // we use nonvolatile cr3
506 crset kkString // flag as a string op
507 mr r10,r4 // for copyout, dest ptr (r4) is in user space
508 stw r0,0(r6) // initialize #bytes moved
509 crclr kkIn // flag as copyout
513 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
516 * copyinstr(src, dst, maxcount, count)
519 * vm_size_t maxcount;
522 * Set *count to the number of bytes copied
523 * If dst == NULL, don't copy, just count bytes.
524 * Only currently called from klcopyinstr.
527 ENTRY(copyinstr, TAG_NO_FRAME_USED)
528 mfcr r2 // we use nonvolatile cr3
529 cmplwi r4,0 // dst==NULL?
531 crset kkString // flag as a string op
532 mr r10,r3 // for copyin, source ptr (r3) is in user space
533 crmove kkNull,cr0_eq // remember if (dst==NULL)
534 stw r0,0(r6) // initialize #bytes moved
535 crset kkIn // flag as copyin (rather than copyout)
536 b copyJoin1 // skip over the "crclr kkNull"
539 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
542 * copyout(src, dst, count)
550 .globl EXT(copyoutmsg)
556 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout
557 stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it
558 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
559 stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it
560 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
561 stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it
562 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
563 stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it
565 mfcr r2 // save caller's CR
566 crclr kkString // not a string version
567 mr r10,r4 // dest (r4) is user-space ptr
568 crclr kkIn // flag as copyout
572 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
575 * copyin(src, dst, count)
584 .globl EXT(copyinmsg)
589 mfcr r2 // save caller's CR
590 crclr kkString // not a string version
591 mr r10,r3 // source (r3) is user-space ptr in copyin
592 crset kkIn // flag as copyin
595 // Common code to handle setup for all the copy variants:
596 // r2 = caller's CR, since we use cr3
597 // r3-r6 = parameters
598 // r10 = user-space ptr (r3 if copyin, r4 if copyout)
599 // cr3 = kkIn, kkString, kkNull flags
602 crclr kkNull // (dst==NULL) convention not used with this call
603 copyJoin1: // enter from copyinstr with kkNull set
604 mflr r0 // get return address
605 cmplwi r5,0 // buffer length 0?
606 lis r9,0x1000 // r9 <- 0x10000000 (256MB)
607 stw r0,FM_LR_SAVE(r1) // save return
608 cmplw cr1,r5,r9 // buffer length > 256MB ?
609 mfsprg r8,2 // get the features
610 beq-- copyinout_0 // 0 length is degenerate case
611 stwu r1,-kkFrameSize(r1) // set up stack frame
612 stw r2,kkCR(r1) // save caller's CR since we use cr3
613 mtcrf 0x02,r8 // move pf64Bit to cr6
614 stw r3,kkSource(r1) // save args across MapUserAddressSpace
617 crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor
618 stw r6,kkCountPtr(r1)
619 stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr
620 li r31,0 // no mapped ptr yet
623 // Handle buffer length > 256MB. This is an error (ENAMETOOLONG) on copyin and copyout.
624 // The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp
625 // the buffer length to 256MB. This isn't an issue if the string is less than 256MB
626 // (as most are!), but if they are >256MB we eventually return ENAMETOOLONG. This restriction
627 // is due to MapUserAddressSpace; we don't want to consume more than two segments for
630 ble++ cr1,copyin0 // skip if buffer length <= 256MB
631 bf kkString,copyinout_too_big // error if not string op
632 mr r5,r9 // silently clamp buffer length to 256MB
633 stw r9,kkBufSize(r1) // update saved copy too
636 // Set up thread_recover in case we hit an illegal address.
639 mfsprg r8,1 /* Get the current act */
640 lis r2,hi16(copyinout_error)
641 lwz r7,ACT_THREAD(r8)
642 ori r2,r2,lo16(copyinout_error)
643 lwz r3,ACT_VMMAP(r8) // r3 <- vm_map virtual address
644 stw r2,THREAD_RECOVER(r7)
647 // Map user segment into kernel map, turn on 64-bit mode.
649 // r5 = buffer length
650 // r10 = user space ptr (r3 if copyin, r4 if copyout)
652 mr r6,r5 // Set length to map
653 li r4,0 // Note: we only do this 32-bit for now
654 mr r5,r10 // arg2 <- user space ptr
656 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace
657 stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it
658 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
659 stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it
660 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
661 stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it
662 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
663 stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it
665 bl EXT(MapUserAddressSpace) // set r3 <- address in kernel map of user operand
667 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace
668 stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it
669 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
670 stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it
671 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
672 stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it
673 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
674 stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it
676 or. r0,r3,r4 // Did we fail the mapping?
677 mr r31,r4 // r31 <- mapped ptr into user space (may be 64-bit)
678 beq-- copyinout_error // was 0, so there was an error making the mapping
679 bf-- kk64bit,copyin1 // skip if a 32-bit processor
681 rldimi r31,r3,32,0 // slam high-order bits into mapped ptr
682 mfmsr r4 // if 64-bit, turn on SF so we can use returned ptr
684 rldimi r4,r0,63,MSR_SF_BIT // light bit 0
685 mtmsrd r4 // turn on 64-bit mode
686 isync // wait for mode to change
689 // Load r3-r5, substituting mapped ptr as appropriate.
692 lwz r5,kkBufSize(r1) // restore length to copy
693 bf kkIn,copyin2 // skip if copyout
694 lwz r4,kkDest(r1) // copyin: source is mapped, dest is r4 at entry
695 mr r3,r31 // source is mapped ptr
697 copyin2: // handle copyout
698 lwz r3,kkSource(r1) // source is kernel buffer (r3 at entry)
699 mr r4,r31 // dest is mapped ptr into user space
702 // Finally, all set up to copy:
703 // r3 = source ptr (mapped if copyin)
704 // r4 = dest ptr (mapped if copyout)
706 // r31 = mapped ptr returned by MapUserAddressSpace
707 // cr3 = kkIn, kkString, kk64bit, and kkNull flags
710 bt kkString,copyString // handle copyinstr and copyoutstr
711 bl EXT(bcopy) // copyin and copyout: let bcopy do the work
712 li r3,0 // return success
715 // Main exit point for copyin, copyout, copyinstr, and copyoutstr. Also reached
716 // from error recovery if we get a DSI accessing user space. Clear recovery ptr,
717 // and pop off frame. Note that we have kept
718 // the mapped ptr into user space in r31, as a reg64_t type (ie, a 64-bit ptr on
719 // 64-bit machines.) We must unpack r31 into an addr64_t in (r3,r4) before passing
720 // it to ReleaseUserAddressSpace.
721 // r3 = 0, EFAULT, or ENAMETOOLONG
724 lwz r2,kkCR(r1) // get callers cr3
725 mfsprg r6,1 // Get the current act
726 lwz r10,ACT_THREAD(r6)
728 bf-- kk64bit,copyinx1 // skip if 32-bit processor
730 rldicl r12,r12,0,MSR_SF_BIT+1 // if 64-bit processor, turn 64-bit mode off
731 mtmsrd r12 // turn SF off and EE back on
732 isync // wait for the mode to change
734 lwz r31,kkR31Save(r1) // restore callers r31
735 addi r1,r1,kkFrameSize // pop off our stack frame
736 lwz r0,FM_LR_SAVE(r1)
738 stw r4,THREAD_RECOVER(r10) // Clear recovery
740 mtcrf 0x10,r2 // restore cr3
744 /* We get here via the exception handler if an illegal
745 * user memory reference was made. This error handler is used by
746 * copyin, copyout, copyinstr, and copyoutstr. Registers are as
747 * they were at point of fault, so for example cr3 flags are valid.
751 li r3,EFAULT // return error
754 copyinout_0: // degenerate case: 0-length copy
755 mtcrf 0x10,r2 // restore cr3
756 li r3,0 // return success
759 copyinout_too_big: // degenerate case
760 mtcrf 0x10,r2 // restore cr3
761 lwz r1,0(r1) // pop off stack frame
766 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
767 // Handle copyinstr and copyoutstr. At this point the stack frame is set up,
768 // the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode
769 // if necessary, and:
770 // r3 = source ptr, mapped if copyinstr
771 // r4 = dest ptr, mapped if copyoutstr
772 // r5 = buffer length
773 // r31 = mapped ptr returned by MapUserAddressSpace
774 // cr3 = kkIn, kkString, kkNull, and kk64bit flags
775 // We do word copies unless the buffer is very short, then use a byte copy loop
776 // for the leftovers if necessary.
779 li r12,0 // Set header bytes count to zero
780 cmplwi cr1,r5,20 // is buffer very short?
781 mtctr r5 // assuming short, set up loop count for bytes
782 blt cr1,copyinstr8 // too short for word loop
783 andi. r12,r3,0x3 // is source ptr word aligned?
784 bne copyinstr11 // bytes loop
786 srwi r6,r5,2 // get #words in buffer
787 mtctr r6 // set up word loop count
788 lis r10,hi16(0xFEFEFEFF) // load magic constants into r10 and r11
789 lis r11,hi16(0x80808080)
790 ori r10,r10,lo16(0xFEFEFEFF)
791 ori r11,r11,lo16(0x80808080)
792 bf kkNull,copyinstr6 // enter loop that copies
793 b copyinstr5 // use loop that just counts
796 // Word loop(s). They do a word-parallel search for 0s, using the following
797 // inobvious but very efficient test:
798 // y = data + 0xFEFEFEFF
799 // z = ~data & 0x80808080
800 // If (y & z)==0, then all bytes in dataword are nonzero. We need two copies of
801 // this loop, since if we test kkNull in the loop then it becomes 9 words long.
803 .align 5 // align inner loops for speed
804 copyinstr5: // version that counts but does not copy
805 lwz r8,0(r3) // get next word of source
806 addi r3,r3,4 // increment source ptr
807 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
808 andc r7,r11,r8 // r7 = ~data & 0x80808080
809 and. r7,r9,r7 // r7 = r9 & r7
810 bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes are nonzero
814 .align 5 // align inner loops for speed
815 copyinstr6: // version that counts and copies
816 lwz r8,0(r3) // get next word of source
817 addi r3,r3,4 // increment source ptr
818 addi r4,r4,4 // increment dest ptr while we wait for data
819 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
820 andc r7,r11,r8 // r7 = ~data & 0x80808080
821 and. r7,r9,r7 // r7 = r9 & r7
822 stw r8,-4(r4) // pack all 4 bytes into buffer
823 bdnzt cr0_eq,copyinstr6 // if r7==0, then all bytes are nonzero
826 // Either 0 found or buffer filled. The above algorithm has mapped nonzero bytes to 0
827 // and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also
828 // mapped to 0x80. We must mask out these false hits before searching for an 0x80 byte.
831 crnot kkZero,cr0_eq // 0 found iff cr0_eq is off
832 mfctr r6 // get #words remaining in buffer
833 rlwinm r2,r8,7,0,31 // move 0x01 bits to 0x80 position
834 slwi r6,r6,2 // convert to #bytes remaining
835 andc r7,r7,r2 // turn off false hits from 0x0100 worst case
836 rlwimi r6,r5,0,30,31 // add in odd bytes leftover in buffer
837 srwi r7,r7,8 // we want to count the 0 as a byte xferred
838 addi r6,r6,4 // don't count last word xferred (yet)
839 cntlzw r7,r7 // now we can find the 0 byte (ie, the 0x80)
840 srwi r7,r7,3 // convert 8,16,24,32 to 1,2,3,4
841 sub. r6,r6,r7 // account for nonzero bytes in last word
842 bt++ kkZero,copyinstr10 // 0 found, so done
844 beq copyinstr10 // r6==0, so buffer truly full
845 mtctr r6 // 0 not found, loop over r6 bytes
846 b copyinstr8 // enter byte loop for last 1-3 leftover bytes
849 // Byte loop. This is used for very small buffers and for the odd bytes left over
850 // after searching and copying words at a time.
852 .align 5 // align inner loops for speed
853 copyinstr8: // loop over bytes of source
854 lbz r0,0(r3) // get next byte of source
856 addi r4,r4,1 // increment dest addr whether we store or not
858 bt-- kkNull,copyinstr9 // don't store (was copyinstr with NULL ptr)
861 bdnzf cr0_eq,copyinstr8 // loop if byte not 0 and more room in buffer
863 mfctr r6 // get #bytes left in buffer
864 crmove kkZero,cr0_eq // remember if 0 found or buffer filled
867 // Buffer filled or 0 found. Unwind and return.
868 // r5 = kkBufSize, ie buffer length
869 // r6 = untransferred bytes remaining in buffer
870 // r31 = mapped ptr returned by MapUserAddressSpace
871 // cr3 = kkZero set iff 0 found
874 lwz r9,kkCountPtr(r1) // get ptr to place to store count of bytes moved
875 sub r2,r5,r6 // get #bytes we moved, counting the 0 iff any
876 add r2,r2,r12 // add the header bytes count
877 li r3,0 // assume 0 return status
878 stw r2,0(r9) // store #bytes moved
879 bt++ kkZero,copyinx // we did find the 0 so return 0
880 li r3,ENAMETOOLONG // buffer filled
881 b copyinx // join main exit routine
883 // Byte loop. This is used on the header bytes for unaligned source
885 .align 5 // align inner loops for speed
887 li r10,4 // load word size
888 sub r12,r10,r12 // set the header bytes count
889 mtctr r12 // set up bytes loop count
890 copyinstr12: // loop over bytes of source
891 lbz r0,0(r3) // get next byte of source
893 addi r4,r4,1 // increment dest addr whether we store or not
895 bt-- kkNull,copyinstr13 // don't store (was copyinstr with NULL ptr)
898 bdnzf cr0_eq,copyinstr12 // loop if byte not 0 and more room in buffer
899 sub r5,r5,r12 // substract the bytes copied
900 bne cr0_eq,copyinstr1 // branch to word loop
902 mr r5,r12 // Get the header bytes count
903 li r12,0 // Clear the header bytes count
904 mfctr r6 // get #bytes left in buffer
905 crmove kkZero,cr0_eq // remember if 0 found or buffer filled