2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
33 #include <ppc/proc_reg.h>
34 #include <mach/ppc/vm_param.h>
36 #include <sys/errno.h>
40 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
42 * void pmap_zero_page(vm_offset_t pa)
44 * Zero a page of physical memory. This routine runs in 32 or 64-bit mode,
45 * and handles 32 and 128-byte cache lines.
50 .globl EXT(pmap_zero_page)
54 mflr r12 // save return address
55 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
56 mtlr r12 // restore return address
57 andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size
59 subfic r4,r9,PPC_PGBYTES // r4 <- starting offset in page
61 bt++ pf64Bitb,page0S4 // Go do the big guys...
63 slwi r3,r3,12 // get page address from page num
64 b page_zero_1 // Jump to line aligned loop...
77 sldi r3,r3,12 // get page address from page num
79 page_zero_1: // loop zeroing cache lines
80 sub. r5,r4,r9 // more to go?
81 dcbz128 r3,r4 // zero either 32 or 128 bytes
82 sub r4,r5,r9 // generate next offset
86 b EXT(ml_restore) // restore MSR and do the isync
89 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
91 * phys_copy(src, dst, bytecount)
96 * This routine will copy bytecount bytes from physical address src to physical
97 * address dst. It runs in 64-bit mode if necessary, but does not handle
98 * overlap or make any attempt to be optimal. Length must be a signed word.
99 * Not performance critical.
104 .globl EXT(phys_copy)
108 rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg
109 mflr r12 // get return address
110 rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits
111 rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg
112 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
113 rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits
114 mtlr r12 // restore return address
115 subic. r5,r7,4 // a word to copy?
120 phys_copy_1: // loop copying words
121 subic. r5,r5,4 // more to go?
128 addic. r5,r5,4 // restore count
129 ble phys_copy_4 // no more
131 // Loop is aligned here
133 phys_copy_3: // loop copying bytes
134 subic. r5,r5,1 // more to go?
141 b EXT(ml_restore) // restore MSR and do the isync
144 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
146 * pmap_copy_page(src, dst)
150 * This routine will copy the physical page src to physical page dst
152 * This routine assumes that the src and dst are page numbers and that the
153 * destination is cached. It runs on 32 and 64 bit processors, with and
154 * without altivec, and with 32 and 128 byte cache lines.
155 * We also must assume that no-one will be executing within the destination
156 * page, and that this will be used for paging. Because this
157 * is a common routine, we have tuned loops for each processor class.
160 #define kSFSize (FM_SIZE+160)
162 ENTRY(pmap_copy_page, TAG_NO_FRAME_USED)
164 lis r2,hi16(MASK(MSR_VEC)) ; Get the vector flag
165 mflr r0 // get return
166 ori r2,r2,lo16(MASK(MSR_FP)) ; Add the FP flag
168 stwu r1,-kSFSize(r1) // set up a stack frame for VRs or FPRs
169 mfmsr r11 // save MSR at entry
170 mfsprg r10,2 // get feature flags
171 andc r11,r11,r2 // Clear out vec and fp
172 ori r2,r2,lo16(MASK(MSR_EE)) // Get EE on also
173 andc r2,r11,r2 // Clear out EE as well
174 mtcrf 0x02,r10 // we need to test pf64Bit
175 ori r2,r2,MASK(MSR_FP) // must enable FP for G3...
176 mtcrf 0x80,r10 // we need to test pfAltivec too
177 oris r2,r2,hi16(MASK(MSR_VEC)) // enable altivec for G4 (ignored if G3)
178 mtmsr r2 // turn EE off, FP and VEC on
180 bt++ pf64Bitb,pmap_copy_64 // skip if 64-bit processor (only they take hint)
181 slwi r3,r3,12 // get page address from page num
182 slwi r4,r4,12 // get page address from page num
183 rlwinm r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1 // get ready to turn off DR
184 bt pfAltivecb,pmap_copy_g4 // altivec but not 64-bit means G4
187 // G3 -- copy using FPRs
189 stfd f0,FM_SIZE+0(r1) // save the 4 FPRs we use to copy
190 stfd f1,FM_SIZE+8(r1)
191 li r5,PPC_PGBYTES/32 // count of cache lines in a page
192 stfd f2,FM_SIZE+16(r1)
194 stfd f3,FM_SIZE+24(r1)
195 mtmsr r12 // turn off DR after saving FPRs on stack
198 pmap_g3_copy_loop: // loop over 32-byte cache lines
199 dcbz 0,r4 // avoid read of dest line
209 dcbst 0,r4 // flush dest line to RAM
211 bdnz pmap_g3_copy_loop
213 sync // wait for stores to take
214 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
215 li r6,PPC_PGBYTES-32 // point to last line in page
216 pmap_g3_icache_flush:
217 subic. r5,r6,32 // more to go?
218 icbi r4,r6 // flush another line in icache
219 subi r6,r5,32 // get offset to next line
221 bne pmap_g3_icache_flush
224 mtmsr r2 // turn DR back on
226 lfd f0,FM_SIZE+0(r1) // restore the FPRs
228 lfd f2,FM_SIZE+16(r1)
229 lfd f3,FM_SIZE+24(r1)
231 b pmap_g4_restore // restore MSR and done
234 // G4 -- copy using VRs
236 pmap_copy_g4: // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR
237 la r9,FM_SIZE+16(r1) // place where we save VRs to r9
238 li r5,16 // load x-form offsets into r5-r9
239 li r6,32 // another offset
240 stvx v0,0,r9 // save some VRs so we can use to copy
241 li r7,48 // another offset
243 li r0,PPC_PGBYTES/64 // we loop over 64-byte chunks
246 li r8,96 // get look-ahead for touch
249 mtmsr r12 // now we've saved VRs on stack, turn off DR
250 isync // wait for it to happen
253 .align 5 // align inner loops
254 pmap_g4_copy_loop: // loop over 64-byte chunks
255 dcbt r3,r8 // touch 3 lines ahead
256 nop // avoid a 17-word loop...
257 dcbt r3,r9 // touch 4 lines ahead
259 dcba 0,r4 // avoid pre-fetch of 1st dest line
260 lvx v0,0,r3 // offset 0
261 lvx v1,r5,r3 // offset 16
262 lvx v2,r6,r3 // offset 32
263 lvx v3,r7,r3 // offset 48
265 dcba r6,r4 // avoid pre-fetch of 2nd line
266 stvx v0,0,r4 // offset 0
267 stvx v1,r5,r4 // offset 16
268 stvx v2,r6,r4 // offset 32
269 stvx v3,r7,r4 // offset 48
270 dcbf 0,r4 // push line 1
271 dcbf r6,r4 // and line 2
273 bdnz pmap_g4_copy_loop
275 sync // wait for stores to take
276 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
277 li r8,PPC_PGBYTES-32 // point to last line in page
278 pmap_g4_icache_flush:
279 subic. r9,r8,32 // more to go?
280 icbi r4,r8 // flush from icache
281 subi r8,r9,32 // get offset to next line
283 bne pmap_g4_icache_flush
286 mtmsr r2 // turn DR back on
288 la r9,FM_SIZE+16(r1) // get base of VR save area
289 lvx v0,0,r9 // restore the VRs
294 pmap_g4_restore: // r11=MSR
295 mtmsr r11 // turn EE on, VEC and FR off
296 isync // wait for it to happen
297 addi r1,r1,kSFSize // pop off our stack frame
298 lwz r0,8(r1) // restore return address
303 // 64-bit/128-byte processor: copy using VRs
305 pmap_copy_64: // r10=features, r11=old MSR
306 sldi r3,r3,12 // get page address from page num
307 sldi r4,r4,12 // get page address from page num
308 la r9,FM_SIZE+16(r1) // get base of VR save area
309 li r5,16 // load x-form offsets into r5-r9
310 li r6,32 // another offset
311 bf pfAltivecb,pmap_novmx_copy // altivec suppressed...
312 stvx v0,0,r9 // save 8 VRs so we can copy wo bubbles
314 li r7,48 // another offset
315 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
318 addi r9,r9,64 // advance base ptr so we can store another 4
320 li r0,MASK(MSR_DR) // get DR bit
323 andc r12,r2,r0 // turn off DR bit
324 li r0,1 // get a 1 to slam into SF
327 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
328 li r8,-128 // offset so we can reach back one line
329 mtmsrd r12 // now we've saved VRs, turn DR off and SF on
330 isync // wait for it to happen
331 dcbt128 0,r3,1 // start a forward stream
334 .align 5 // align inner loops
335 pmap_64_copy_loop: // loop over 128-byte chunks
336 dcbz128 0,r4 // avoid read of destination line
337 lvx v0,0,r3 // offset 0
338 lvx v1,r5,r3 // offset 16
339 lvx v2,r6,r3 // offset 32
340 lvx v3,r7,r3 // offset 48
341 addi r3,r3,64 // don't have enough GPRs so add 64 2x
342 lvx v4,0,r3 // offset 64
343 lvx v5,r5,r3 // offset 80
344 lvx v6,r6,r3 // offset 96
345 lvx v7,r7,r3 // offset 112
347 stvx v0,0,r4 // offset 0
348 stvx v1,r5,r4 // offset 16
349 stvx v2,r6,r4 // offset 32
350 stvx v3,r7,r4 // offset 48
352 stvx v4,0,r4 // offset 64
353 stvx v5,r5,r4 // offset 80
354 stvx v6,r6,r4 // offset 96
355 stvx v7,r7,r4 // offset 112
357 dcbf r8,r4 // flush the line we just wrote
358 bdnz pmap_64_copy_loop
360 sync // wait for stores to take
361 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
362 li r8,PPC_PGBYTES-128 // point to last line in page
363 pmap_64_icache_flush:
364 subic. r9,r8,128 // more to go?
365 icbi r4,r8 // flush from icache
366 subi r8,r9,128 // get offset to next line
368 bne pmap_64_icache_flush
371 mtmsrd r2 // turn DR back on, SF off
373 la r9,FM_SIZE+16(r1) // get base address of VR save area on stack
374 lvx v0,0,r9 // restore the VRs
384 b pmap_g4_restore // restore lower half of MSR and return
387 // Copy on 64-bit without VMX
391 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
393 li r0,MASK(MSR_DR) // get DR bit
394 andc r12,r2,r0 // turn off DR bit
395 li r0,1 // get a 1 to slam into SF
396 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
397 mtmsrd r12 // now we've saved VRs, turn DR off and SF on
398 isync // wait for it to happen
399 dcbt128 0,r3,1 // start a forward stream
401 pmap_novmx_copy_loop: // loop over 128-byte cache lines
402 dcbz128 0,r4 // avoid read of dest line
404 ld r0,0(r3) // Load half a line
413 std r0,0(r4) // Store half a line
422 ld r0,64(r3) // Load half a line
433 std r0,64(r4) // Store half a line
442 dcbf 0,r4 // flush the line we just wrote
444 bdnz pmap_novmx_copy_loop
446 sync // wait for stores to take
447 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
448 li r8,PPC_PGBYTES-128 // point to last line in page
450 pmap_novmx_icache_flush:
451 subic. r9,r8,128 // more to go?
452 icbi r4,r8 // flush from icache
453 subi r8,r9,128 // get offset to next line
455 bne pmap_novmx_icache_flush
458 mtmsrd r2 // turn DR back on, SF off
461 b pmap_g4_restore // restore lower half of MSR and return
465 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
467 // Stack frame format used by copyin, copyout, copyinstr and copyoutstr.
468 // These routines all run both on 32 and 64-bit machines, though because they are called
469 // by the BSD kernel they are always in 32-bit mode when entered. The mapped ptr returned
470 // by MapUserMemoryWindow will be 64 bits however on 64-bit machines. Beware to avoid
471 // using compare instructions on this ptr. This mapped ptr is kept globally in r31, so there
472 // is no need to store or load it, which are mode-dependent operations since it could be
475 #define kkFrameSize (FM_SIZE+32)
477 #define kkBufSize (FM_SIZE+0)
478 #define kkCR3 (FM_SIZE+4)
479 #define kkSource (FM_SIZE+8)
480 #define kkDest (FM_SIZE+12)
481 #define kkCountPtr (FM_SIZE+16)
482 #define kkR31Save (FM_SIZE+20)
483 #define kkThrErrJmp (FM_SIZE+24)
486 // nonvolatile CR bits we use as flags in cr3
495 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
498 * copyoutstr(src, dst, maxcount, count)
499 * vm_offset_t src; // r3
500 * addr64_t dst; // r4 and r5
501 * vm_size_t maxcount; // r6
502 * vm_size_t* count; // r7
504 * Set *count to the number of bytes copied.
507 ENTRY(copyoutstr, TAG_NO_FRAME_USED)
508 mfcr r2,0x10 // save caller's cr3, which we use for flags
509 mr r10,r4 // move high word of 64-bit user address to r10
511 crset kkString // flag as a string op
512 mr r11,r5 // move low word of 64-bit user address to r11
513 stw r0,0(r7) // initialize #bytes moved
514 crclr kkIn // flag as copyout
518 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
521 * copyinstr(src, dst, maxcount, count)
522 * addr64_t src; // r3 and r4
523 * vm_offset_t dst; // r5
524 * vm_size_t maxcount; // r6
525 * vm_size_t* count; // r7
527 * Set *count to the number of bytes copied
528 * If dst == NULL, don't copy, just count bytes.
529 * Only currently called from klcopyinstr.
532 ENTRY(copyinstr, TAG_NO_FRAME_USED)
533 mfcr r2,0x10 // save caller's cr3, which we use for flags
534 cmplwi r5,0 // dst==NULL?
535 mr r10,r3 // move high word of 64-bit user address to r10
537 crset kkString // flag as a string op
538 mr r11,r4 // move low word of 64-bit user address to r11
539 crmove kkNull,cr0_eq // remember if (dst==NULL)
540 stw r0,0(r7) // initialize #bytes moved
541 crset kkIn // flag as copyin (rather than copyout)
542 b copyJoin1 // skip over the "crclr kkNull"
545 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
548 * copyout(src, dst, count)
549 * vm_offset_t src; // r3
550 * addr64_t dst; // r4 and r5
551 * size_t count; // r6
556 .globl EXT(copyoutmsg)
562 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout
563 stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it
564 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
565 stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it
566 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
567 stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it
568 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
569 stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it
571 mfcr r2,0x10 // save caller's cr3, which we use for flags
572 mr r10,r4 // move high word of 64-bit user address to r10
573 crclr kkString // not a string version
574 mr r11,r5 // move low word of 64-bit user address to r11
575 crclr kkIn // flag as copyout
579 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
582 * copyin(src, dst, count)
583 * addr64_t src; // r3 and r4
584 * vm_offset_t dst; // r5
585 * size_t count; // r6
591 .globl EXT(copyinmsg)
596 mfcr r2,0x10 // save caller's cr3, which we use for flags
597 mr r10,r3 // move high word of 64-bit user address to r10
598 crclr kkString // not a string version
599 mr r11,r4 // move low word of 64-bit user address to r11
600 crset kkIn // flag as copyin
603 // Common code to handle setup for all the copy variants:
605 // r3 = source if copyout
606 // r5 = dest if copyin
607 // r6 = buffer length or count
608 // r7 = count output ptr (if kkString set)
609 // r10 = high word of 64-bit user-space address (source if copyin, dest if copyout)
610 // r11 = low word of 64-bit user-space address
611 // cr3 = kkIn, kkString, kkNull flags
614 crclr kkNull // (dst==NULL) convention not used with this call
615 copyJoin1: // enter from copyinstr with kkNull set
616 mflr r0 // get return address
617 cmplwi r6,0 // buffer length 0?
618 lis r9,0x1000 // r9 <- 0x10000000 (256MB)
619 stw r0,FM_LR_SAVE(r1) // save return
620 cmplw cr1,r6,r9 // buffer length > 256MB ?
621 mfsprg r8,2 // get the features
622 beq-- copyinout_0 // 0 length is degenerate case
623 stwu r1,-kkFrameSize(r1) // set up stack frame
624 stw r2,kkCR3(r1) // save caller's cr3, which we use for flags
625 mtcrf 0x02,r8 // move pf64Bit to cr6
626 stw r3,kkSource(r1) // save args across MapUserMemoryWindow
629 crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor
630 stw r7,kkCountPtr(r1)
631 stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr
635 // Handle buffer length > 256MB. This is an error (ENAMETOOLONG) on copyin and copyout.
636 // The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp
637 // the buffer length to 256MB. This isn't an issue if the string is less than 256MB
638 // (as most are!), but if they are >256MB we eventually return ENAMETOOLONG. This restriction
639 // is due to MapUserMemoryWindow; we don't want to consume more than two segments for
642 ble++ cr1,copyin0 // skip if buffer length <= 256MB
643 bf kkString,copyinout_too_big // error if not string op
644 mr r6,r9 // silently clamp buffer length to 256MB
645 stw r9,kkBufSize(r1) // update saved copy too
648 // Set up thread_recover in case we hit an illegal address.
651 li r31,0 // no mapped ptr yet
652 mfsprg r8,1 // Get the current thread
653 lis r2,hi16(copyinout_error)
654 ori r2,r2,lo16(copyinout_error)
655 lwz r4,THREAD_RECOVER(r8)
656 lwz r3,ACT_VMMAP(r8) // r3 <- vm_map virtual address
657 stw r2,THREAD_RECOVER(r8)
658 stw r4,kkThrErrJmp(r1)
661 // Map user segment into kernel map, turn on 64-bit mode. At this point:
663 // r6 = buffer length
664 // r10/r11 = 64-bit user-space ptr (source if copyin, dest if copyout)
666 // When we call MapUserMemoryWindow, we pass:
668 // r4/r5 = 64-bit user space address as an addr64_t
670 mr r4,r10 // copy user ptr into r4/r5
673 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace
674 stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it
675 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
676 stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it
677 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
678 stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it
679 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
680 stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it
682 bl EXT(MapUserMemoryWindow) // get r3/r4 <- 64-bit address in kernel map of user operand
684 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace
685 stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it
686 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
687 stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it
688 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
689 stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it
690 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
691 stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it
693 mr r31,r4 // r31 <- mapped ptr into user space (may be 64-bit)
694 bf-- kk64bit,copyin1 // skip if a 32-bit processor
696 rldimi r31,r3,32,0 // slam high-order bits into mapped ptr
697 mfmsr r4 // if 64-bit, turn on SF so we can use returned ptr
699 rldimi r4,r0,63,MSR_SF_BIT // light bit 0
700 mtmsrd r4 // turn on 64-bit mode
701 isync // wait for mode to change
704 // Load r3-r5, substituting mapped ptr as appropriate.
707 lwz r5,kkBufSize(r1) // restore length to copy
708 bf kkIn,copyin2 // skip if copyout
709 lwz r4,kkDest(r1) // copyin: dest is kernel ptr
710 mr r3,r31 // source is mapped ptr
712 copyin2: // handle copyout
713 lwz r3,kkSource(r1) // source is kernel buffer (r3 at entry)
714 mr r4,r31 // dest is mapped ptr into user space
717 // Finally, all set up to copy:
718 // r3 = source ptr (mapped if copyin)
719 // r4 = dest ptr (mapped if copyout)
721 // r31 = mapped ptr returned by MapUserMemoryWindow
722 // cr3 = kkIn, kkString, kk64bit, and kkNull flags
725 bt kkString,copyString // handle copyinstr and copyoutstr
726 bl EXT(bcopy) // copyin and copyout: let bcopy do the work
727 li r3,0 // return success
730 // Main exit point for copyin, copyout, copyinstr, and copyoutstr. Also reached
731 // from error recovery if we get a DSI accessing user space. Clear recovery ptr,
732 // and pop off frame.
733 // r3 = 0, EFAULT, or ENAMETOOLONG
736 lwz r2,kkCR3(r1) // get callers cr3
737 mfsprg r6,1 // Get the current thread
738 bf-- kk64bit,copyinx1 // skip if 32-bit processor
740 rldicl r12,r12,0,MSR_SF_BIT+1 // if 64-bit processor, turn 64-bit mode off
741 mtmsrd r12 // turn SF off
742 isync // wait for the mode to change
744 lwz r0,FM_LR_SAVE+kkFrameSize(r1) // get return address
745 lwz r31,kkR31Save(r1) // restore callers r31
746 lwz r4,kkThrErrJmp(r1) // load saved thread recover
747 addi r1,r1,kkFrameSize // pop off our stack frame
749 stw r4,THREAD_RECOVER(r6) // restore thread recover
750 mtcrf 0x10,r2 // restore cr3
754 /* We get here via the exception handler if an illegal
755 * user memory reference was made. This error handler is used by
756 * copyin, copyout, copyinstr, and copyoutstr. Registers are as
757 * they were at point of fault, so for example cr3 flags are valid.
761 li r3,EFAULT // return error
764 copyinout_0: // degenerate case: 0-length copy
765 mtcrf 0x10,r2 // restore cr3
766 li r3,0 // return success
769 copyinout_too_big: // degenerate case
770 mtcrf 0x10,r2 // restore cr3
771 lwz r1,0(r1) // pop off stack frame
776 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
777 // Handle copyinstr and copyoutstr. At this point the stack frame is set up,
778 // the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode
779 // if necessary, and:
780 // r3 = source ptr, mapped if copyinstr
781 // r4 = dest ptr, mapped if copyoutstr
782 // r5 = buffer length
783 // r31 = mapped ptr returned by MapUserMemoryWindow
784 // cr3 = kkIn, kkString, kkNull, and kk64bit flags
785 // We do word copies unless the buffer is very short, then use a byte copy loop
786 // for the leftovers if necessary. The crossover at which the word loop becomes
787 // faster is about seven bytes, counting the zero.
789 // We first must word-align the source ptr, in order to avoid taking a spurious
793 cmplwi cr1,r5,15 // is buffer very short?
794 mr r12,r3 // remember ptr to 1st source byte
795 mtctr r5 // assuming short, set up loop count for bytes
796 blt-- cr1,copyinstr8 // too short for word loop
797 rlwinm r2,r3,0,0x3 // get byte offset of 1st byte within word
798 rlwinm r9,r3,3,0x18 // get bit offset of 1st byte within word
800 sub r3,r3,r2 // word-align source address
801 add r6,r5,r2 // get length starting at byte 0 in word
802 srw r7,r7,r9 // get mask for bytes in first word
803 srwi r0,r6,2 // get #words in buffer
804 lwz r5,0(r3) // get aligned word with first source byte
805 lis r10,hi16(0xFEFEFEFF) // load magic constants into r10 and r11
806 lis r11,hi16(0x80808080)
807 mtctr r0 // set up word loop count
808 addi r3,r3,4 // advance past the source word
809 ori r10,r10,lo16(0xFEFEFEFF)
810 ori r11,r11,lo16(0x80808080)
811 orc r8,r5,r7 // map bytes preceeding first source byte into 0xFF
812 bt-- kkNull,copyinstr5enter // enter loop that just counts
814 // Special case 1st word, which has been 0xFF filled on left. Note that we use
815 // "and.", even though we execute both in 32 and 64-bit mode. This is OK.
817 slw r5,r5,r9 // left justify payload bytes
818 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
819 andc r7,r11,r8 // r7 = ~data & 0x80808080
820 subfic r0,r2,4 // get r0 <- #payload bytes in 1st word
821 and. r7,r9,r7 // if r7==0, then all bytes in r8 are nonzero
822 stw r5,0(r4) // copy payload bytes to dest buffer
823 add r4,r4,r0 // then point to next byte in dest buffer
824 bdnzt cr0_eq,copyinstr6 // use loop that copies if 0 not found
826 b copyinstr7 // 0 found (buffer can't be full)
829 // Word loop(s). They do a word-parallel search for 0s, using the following
830 // inobvious but very efficient test:
831 // y = data + 0xFEFEFEFF
832 // z = ~data & 0x80808080
833 // If (y & z)==0, then all bytes in dataword are nonzero. There are two copies
834 // of this loop, one that just counts and another that copies.
835 // r3 = ptr to next word of source (word aligned)
836 // r4 = ptr to next byte in buffer
837 // r6 = original buffer length (adjusted to be word origin)
840 // r12 = ptr to 1st source byte (used to determine string length)
842 .align 5 // align inner loops for speed
843 copyinstr5: // version that counts but does not copy
844 lwz r8,0(r3) // get next word of source
845 addi r3,r3,4 // advance past it
847 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
848 andc r7,r11,r8 // r7 = ~data & 0x80808080
849 and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode)
850 bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes in r8 are nonzero
854 .align 5 // align inner loops for speed
855 copyinstr6: // version that counts and copies
856 lwz r8,0(r3) // get next word of source
857 addi r3,r3,4 // advance past it
858 addi r4,r4,4 // increment dest ptr while we wait for data
859 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
860 andc r7,r11,r8 // r7 = ~data & 0x80808080
861 and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode)
862 stw r8,-4(r4) // pack all 4 bytes into buffer
863 bdnzt cr0_eq,copyinstr6 // if r7==0, then all bytes are nonzero
866 // Either 0 found or buffer filled. The above algorithm has mapped nonzero bytes to 0
867 // and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also
868 // mapped to 0x80. We must mask out these false hits before searching for an 0x80 byte.
869 // r3 = word aligned ptr to next word of source (ie, r8==mem(r3-4))
870 // r6 = original buffer length (adjusted to be word origin)
871 // r7 = computed vector of 0x00 and 0x80 bytes
872 // r8 = original source word, coming from -4(r3), possibly padded with 0xFFs on left if 1st word
873 // r12 = ptr to 1st source byte (used to determine string length)
874 // cr0 = beq set iff 0 not found
877 rlwinm r2,r8,7,0,31 // move 0x01 bits to 0x80 position
878 rlwinm r6,r6,0,0x3 // mask down to partial byte count in last word
879 andc r7,r7,r2 // turn off false hits from 0x0100 worst case
880 crnot kkZero,cr0_eq // 0 found iff cr0_eq is off
881 srwi r7,r7,8 // we want to count the 0 as a byte xferred
882 cmpwi r6,0 // any bytes left over in last word?
883 cntlzw r7,r7 // now we can find the 0 byte (ie, the 0x80)
884 subi r3,r3,4 // back up r3 to point to 1st byte in r8
885 srwi r7,r7,3 // convert 8,16,24,32 to 1,2,3,4
886 add r3,r3,r7 // now r3 points one past 0 byte, or at 1st byte not xferred
887 bt++ kkZero,copyinstr10 // 0 found, so done
889 beq copyinstr10 // r6==0, so buffer truly full
890 mtctr r6 // 0 not found, loop over r6 bytes
891 b copyinstr8 // enter byte loop for last 1-3 leftover bytes
894 // Byte loop. This is used for very small buffers and for the odd bytes left over
895 // after searching and copying words at a time.
896 // r3 = ptr to next byte of source
897 // r4 = ptr to next dest byte
898 // r12 = ptr to first byte of source
899 // ctr = count of bytes to check
901 .align 5 // align inner loops for speed
902 copyinstr8: // loop over bytes of source
903 lbz r0,0(r3) // get next byte of source
905 addi r4,r4,1 // increment dest addr whether we store or not
907 bt-- kkNull,copyinstr9 // don't store if copyinstr with NULL ptr
910 bdnzf cr0_eq,copyinstr8 // loop if byte not 0 and more room in buffer
912 crmove kkZero,cr0_eq // remember if 0 found or buffer filled
915 // Buffer filled or 0 found. Unwind and return.
916 // r3 = ptr to 1st source byte not transferred
917 // r12 = ptr to 1st source byte
918 // r31 = mapped ptr returned by MapUserMemoryWindow
919 // cr3 = kkZero set iff 0 found
922 lwz r9,kkCountPtr(r1) // get ptr to place to store count of bytes moved
923 sub r2,r3,r12 // compute #bytes copied (including the 0)
924 li r3,0 // assume success return status
925 stw r2,0(r9) // store #bytes moved
926 bt++ kkZero,copyinx // we did find the 0 so return 0
927 li r3,ENAMETOOLONG // buffer filled
928 b copyinx // join main exit routine
930 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
933 * copypv(source, sink, size, which)
934 * addr64_t src; // r3 and r4
935 * addr64_t dst; // r5 and r6
939 * Operand size bytes are copied from operand src into operand dst. The source and
940 * destination operand addresses are given as addr64_t, and may designate starting
941 * locations in physical or virtual memory in any combination except where both are
942 * virtual. Virtual memory locations may be in either the kernel or the current thread's
943 * address space. Operand size may be up to 256MB.
945 * Operation is controlled by operand which, which offers these options:
946 * cppvPsrc : source operand is (1) physical or (0) virtual
947 * cppvPsnk : destination operand is (1) physical or (0) virtual
948 * cppvKmap : virtual operand is in (1) kernel or (0) current thread
949 * cppvFsnk : (1) flush destination before and after transfer
950 * cppvFsrc : (1) flush source before and after transfer
951 * cppvNoModSnk : (1) don't set source operand's changed bit(s)
952 * cppvNoRefSrc : (1) don't set destination operand's referenced bit(s)
954 * Implementation is now split into this new 64-bit path and the old path, hw_copypv_32().
955 * This section describes the operation of the new 64-bit path.
957 * The 64-bit path utilizes the more capacious 64-bit kernel address space to create a
958 * window in the kernel address space into all of physical RAM plus the I/O hole. Since
959 * the window's mappings specify the proper access policies for the underlying memory,
960 * the new path does not have to flush caches to avoid a cache paradox, so cppvFsnk
961 * and cppvFsrc are ignored. Physical operand adresses are relocated into the physical
962 * memory window, and are accessed with data relocation on. Virtual addresses are either
963 * within the kernel, or are mapped into the kernel address space through the user memory
964 * window. Because accesses to a virtual operand are performed with data relocation on,
965 * the new path does not have to translate the address, disable/enable interrupts, lock
966 * the mapping, or update referenced and changed bits.
968 * The IBM 970 (a.k.a. G5) processor treats real-mode accesses as guarded, so there is
969 * a substantial performance penalty for copypv operating in real mode. Utilizing the
970 * new 64-bit path, transfer performance increases >100% on the G5.
972 * The attentive reader may notice that mtmsrd ops are not followed by isync ops as
973 * might be expected. The 970 follows PowerPC architecture version 2.01, which defines
974 * mtmsrd with L=0 as a context synchronizing op, so a following isync is no longer
977 * To keep things exciting, we develop 64-bit values in non-volatiles, but we also need
978 * to call 32-bit functions, which would lead to the high-order 32 bits of our values
979 * getting clobbered unless we do something special. So, we preserve our 64-bit non-volatiles
980 * in our own stack frame across calls to 32-bit functions.
984 // Map operand which bits into non-volatile CR2 and CR3 bits.
985 #define whichAlign ((3+1)*4)
986 #define whichMask 0x007F0000
987 #define pvPsnk (cppvPsnkb - whichAlign)
988 #define pvPsrc (cppvPsrcb - whichAlign)
989 #define pvFsnk (cppvFsnkb - whichAlign)
990 #define pvFsrc (cppvFsrcb - whichAlign)
991 #define pvNoModSnk (cppvNoModSnkb - whichAlign)
992 #define pvNoRefSrc (cppvNoRefSrcb - whichAlign)
993 #define pvKmap (cppvKmapb - whichAlign)
994 #define pvNoCache cr2_lt
1000 mfsprg r10,2 // get feature flags
1001 mtcrf 0x02,r10 // we need to test pf64Bit
1002 bt++ pf64Bitb,copypv_64 // skip if 64-bit processor (only they take hint)
1004 b EXT(hw_copypv_32) // carry on with 32-bit copypv
1006 // Push a 32-bit ABI-compliant stack frame and preserve all non-volatiles that we'll clobber.
1008 mfsprg r9,1 // get current thread
1009 stwu r1,-(FM_ALIGN((31-26+11)*4)+FM_SIZE)(r1)
1010 // allocate stack frame and link it
1011 mflr r0 // get return address
1012 mfcr r10 // get cr2 and cr3
1013 lwz r12,THREAD_RECOVER(r9) // get error callback
1014 stw r26,FM_ARG0+0x00(r1) // save non-volatile r26
1015 stw r27,FM_ARG0+0x04(r1) // save non-volatile r27
1016 stw r28,FM_ARG0+0x08(r1) // save non-volatile r28
1017 stw r29,FM_ARG0+0x0C(r1) // save non-volatile r29
1018 stw r30,FM_ARG0+0x10(r1) // save non-volatile r30
1019 stw r31,FM_ARG0+0x14(r1) // save non-volatile r31
1020 stw r12,FM_ARG0+0x20(r1) // save error callback
1021 stw r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
1022 // save return address
1023 stw r10,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
1024 // save non-volatile cr2 and cr3
1026 // Non-volatile register usage in this routine is:
1027 // r26: saved msr image
1028 // r27: current pmap_t / virtual source address
1029 // r28: destination virtual address
1030 // r29: source address
1031 // r30: destination address
1032 // r31: byte count to copy
1033 // cr2/3: parameter 'which' bits
1035 rlwinm r8,r8,whichAlign,whichMask // align and mask which bits
1036 mr r31,r7 // copy size to somewhere non-volatile
1037 mtcrf 0x20,r8 // insert which bits into cr2 and cr3
1038 mtcrf 0x10,r8 // insert which bits into cr2 and cr3
1039 rlwinm r29,r3,0,1,0 // form source address high-order bits
1040 rlwinm r30,r5,0,1,0 // form destination address high-order bits
1041 rlwimi r29,r4,0,0,31 // form source address low-order bits
1042 rlwimi r30,r6,0,0,31 // form destination address low-order bits
1043 crand cr7_lt,pvPsnk,pvPsrc // are both operand addresses physical?
1044 cntlzw r0,r31 // count leading zeroes in byte count
1045 cror cr7_eq,pvPsnk,pvPsrc // cr7_eq <- source or destination is physical
1046 bf-- cr7_eq,copypv_einval // both operands may not be virtual
1047 cmplwi r0,4 // byte count greater than or equal 256M (2**28)?
1048 blt-- copypv_einval // byte count too big, give EINVAL
1049 cmplwi r31,0 // byte count zero?
1050 beq-- copypv_zero // early out
1051 bt cr7_lt,copypv_phys // both operand addresses are physical
1052 mr r28,r30 // assume destination is virtual
1053 bf pvPsnk,copypv_dv // is destination virtual?
1054 mr r28,r29 // no, so source must be virtual
1056 lis r27,ha16(EXT(kernel_pmap)) // get kernel's pmap_t *, high-order
1057 lwz r27,lo16(EXT(kernel_pmap))(r27) // get kernel's pmap_t
1058 bt pvKmap,copypv_kern // virtual address in kernel map?
1059 lwz r3,ACT_VMMAP(r9) // get user's vm_map *
1060 rldicl r4,r28,32,32 // r4, r5 <- addr64_t virtual address
1062 std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit call
1063 std r30,FM_ARG0+0x38(r1) // preserve 64-bit r30 across 32-bit call
1064 bl EXT(MapUserMemoryWindow) // map slice of user space into kernel space
1065 ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29
1066 ld r30,FM_ARG0+0x38(r1) // restore 64-bit r30
1067 rlwinm r28,r3,0,1,0 // convert relocated addr64_t virtual address
1068 rlwimi r28,r4,0,0,31 // into a single 64-bit scalar
1071 // Since we'll be accessing the virtual operand with data-relocation on, we won't need to
1072 // update the referenced and changed bits manually after the copy. So, force the appropriate
1073 // flag bit on for the virtual operand.
1074 crorc pvNoModSnk,pvNoModSnk,pvPsnk // for virtual dest, let hardware do ref/chg bits
1075 crorc pvNoRefSrc,pvNoRefSrc,pvPsrc // for virtual source, let hardware do ref bit
1077 // We'll be finding a mapping and looking at, so we need to disable 'rupts.
1078 lis r0,hi16(MASK(MSR_VEC)) // get vector mask
1079 ori r0,r0,lo16(MASK(MSR_FP)) // insert fp mask
1080 mfmsr r26 // save current msr
1081 andc r26,r26,r0 // turn off VEC and FP in saved copy
1082 ori r0,r0,lo16(MASK(MSR_EE)) // add EE to our mask
1083 andc r0,r26,r0 // disable EE in our new msr image
1084 mtmsrd r0 // introduce new msr image
1086 // We're now holding the virtual operand's pmap_t in r27 and its virtual address in r28. We now
1087 // try to find a mapping corresponding to this address in order to determine whether the address
1088 // is cacheable. If we don't find a mapping, we can safely assume that the operand is cacheable
1089 // (a non-cacheable operand must be a block mapping, which will always exist); otherwise, we
1090 // examine the mapping's caching-inhibited bit.
1091 mr r3,r27 // r3 <- pmap_t pmap
1092 rldicl r4,r28,32,32 // r4, r5 <- addr64_t va
1094 la r6,FM_ARG0+0x18(r1) // r6 <- addr64_t *nextva
1095 li r7,1 // r7 <- int full, search nested mappings
1096 std r26,FM_ARG0+0x28(r1) // preserve 64-bit r26 across 32-bit calls
1097 std r28,FM_ARG0+0x30(r1) // preserve 64-bit r28 across 32-bit calls
1098 std r29,FM_ARG0+0x38(r1) // preserve 64-bit r29 across 32-bit calls
1099 std r30,FM_ARG0+0x40(r1) // preserve 64-bit r30 across 32-bit calls
1100 bl EXT(mapping_find) // find mapping for virtual operand
1101 mr. r3,r3 // did we find it?
1102 beq copypv_nomapping // nope, so we'll assume it's cacheable
1103 lwz r4,mpVAddr+4(r3) // get low half of virtual addr for hw flags
1104 rlwinm. r4,r4,0,mpIb-32,mpIb-32 // caching-inhibited bit set?
1105 crnot pvNoCache,cr0_eq // if it is, use bcopy_nc
1106 bl EXT(mapping_drop_busy) // drop busy on the mapping
1108 ld r26,FM_ARG0+0x28(r1) // restore 64-bit r26
1109 ld r28,FM_ARG0+0x30(r1) // restore 64-bit r28
1110 ld r29,FM_ARG0+0x38(r1) // restore 64-bit r29
1111 ld r30,FM_ARG0+0x40(r1) // restore 64-bit r30
1112 mtmsrd r26 // restore msr to it's previous state
1114 // Set both the source and destination virtual addresses to the virtual operand's address --
1115 // we'll overlay one of them with the physical operand's address.
1116 mr r27,r28 // make virtual operand BOTH source AND destination
1118 // Now we're ready to relocate the physical operand address(es) into the physical memory window.
1119 // Recall that we've mapped physical memory (including the I/O hole) into the kernel's address
1120 // space somewhere at or over the 2**32 line. If one or both of the operands are in the I/O hole,
1121 // we'll set the pvNoCache flag, forcing use of non-caching bcopy_nc() to do the copy.
1123 ld r6,lgPMWvaddr(0) // get physical memory window virtual address
1124 bf pvPsnk,copypv_dstvirt // is destination address virtual?
1125 cntlzd r4,r30 // count leading zeros in destination address
1126 cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
1127 cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations
1128 add r28,r30,r6 // relocate physical destination into physical window
1130 bf pvPsrc,copypv_srcvirt // is source address virtual?
1131 cntlzd r4,r29 // count leading zeros in source address
1132 cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
1133 cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations
1134 add r27,r29,r6 // relocate physical source into physical window
1137 // Once the copy is under way (bcopy or bcopy_nc), we will want to get control if anything
1138 // funny happens during the copy. So, we set a pointer to our error handler in the per-thread
1140 mfsprg r8,1 // get current threads stuff
1141 lis r3,hi16(copypv_error) // get our error callback's address, high
1142 ori r3,r3,lo16(copypv_error) // get our error callback's address, low
1143 stw r3,THREAD_RECOVER(r8) // set our error callback
1145 // Since our physical operand(s) are relocated at or above the 2**32 line, we must enter
1147 li r0,1 // get a handy one bit
1148 mfmsr r3 // get current msr
1149 rldimi r3,r0,63,MSR_SF_BIT // set SF bit on in our msr copy
1150 mtmsrd r3 // enter 64-bit mode
1152 // If requested, flush data cache
1153 // Note that we don't flush, the code is being saved "just in case".
1155 bf pvFsrc,copypv_nfs // do we flush the source?
1156 rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address
1158 mr r5,r31 // r5 <- count (in bytes)
1159 li r6,0 // r6 <- boolean phys (false, not physical)
1160 bl EXT(flush_dcache) // flush the source operand
1162 bf pvFsnk,copypv_nfdx // do we flush the destination?
1163 rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address
1165 mr r5,r31 // r5 <- count (in bytes)
1166 li r6,0 // r6 <- boolean phys (false, not physical)
1167 bl EXT(flush_dcache) // flush the destination operand
1171 // Call bcopy or bcopy_nc to perform the copy.
1172 mr r3,r27 // r3 <- source virtual address
1173 mr r4,r28 // r4 <- destination virtual address
1174 mr r5,r31 // r5 <- bytes to copy
1175 bt pvNoCache,copypv_nc // take non-caching route
1176 bl EXT(bcopy) // call bcopy to do the copying
1179 bl EXT(bcopy_nc) // call bcopy_nc to do the copying
1182 // If requested, flush data cache
1183 // Note that we don't flush, the code is being saved "just in case".
1185 bf pvFsrc,copypv_nfsx // do we flush the source?
1186 rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address
1188 mr r5,r31 // r5 <- count (in bytes)
1189 li r6,0 // r6 <- boolean phys (false, not physical)
1190 bl EXT(flush_dcache) // flush the source operand
1192 bf pvFsnk,copypv_nfd // do we flush the destination?
1193 rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address
1195 mr r5,r31 // r5 <- count (in bytes)
1196 li r6,0 // r6 <- boolean phys (false, not physical)
1197 bl EXT(flush_dcache) // flush the destination operand
1201 // Leave 64-bit mode.
1202 mfmsr r3 // get current msr
1203 rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy
1204 mtmsrd r3 // leave 64-bit mode
1206 // If requested, set ref/chg on source/dest physical operand(s). It is possible that copy is
1207 // from/to a RAM disk situated outside of mapped physical RAM, so we check each page by calling
1208 // mapping_phys_lookup() before we try to set its ref/chg bits; otherwise, we might panic.
1209 // Note that this code is page-size sensitive, so it should probably be a part of our low-level
1211 bt pvNoModSnk,copypv_nomod // skip destination update if not requested
1212 std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit calls
1213 li r26,1 // r26 <- 4K-page count
1214 mr r27,r31 // r27 <- byte count
1215 rlwinm r3,r30,0,20,31 // does destination cross a page boundary?
1216 subfic r3,r3,4096 //
1218 blt copypv_modnox // skip if not crossing case
1219 subf r27,r3,r27 // r27 <- byte count less initial fragment
1220 addi r26,r26,1 // increment page count
1222 srdi r3,r27,12 // pages to update (not including crosser)
1223 add r26,r26,r3 // add in crosser
1224 srdi r27,r30,12 // r27 <- destination page number
1226 mr r3,r27 // r3 <- destination page number
1227 la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex
1228 bl EXT(mapping_phys_lookup) // see if page is really there
1230 beq-- copypv_modend // nope, break out of modify loop
1231 mr r3,r27 // r3 <- destination page number
1232 bl EXT(mapping_set_mod) // set page changed status
1233 subi r26,r26,1 // decrement page count
1234 cmpwi r26,0 // done yet?
1235 bgt copypv_modloop // nope, iterate
1237 ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29
1239 bt pvNoRefSrc,copypv_done // skip source update if not requested
1241 li r26,1 // r26 <- 4K-page count
1242 mr r27,r31 // r27 <- byte count
1243 rlwinm r3,r29,0,20,31 // does source cross a page boundary?
1244 subfic r3,r3,4096 //
1246 blt copypv_refnox // skip if not crossing case
1247 subf r27,r3,r27 // r27 <- byte count less initial fragment
1248 addi r26,r26,1 // increment page count
1250 srdi r3,r27,12 // pages to update (not including crosser)
1251 add r26,r26,r3 // add in crosser
1252 srdi r27,r29,12 // r27 <- source page number
1254 mr r3,r27 // r3 <- source page number
1255 la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex
1256 bl EXT(mapping_phys_lookup) // see if page is really there
1258 beq-- copypv_done // nope, break out of modify loop
1259 mr r3,r27 // r3 <- source page number
1260 bl EXT(mapping_set_ref) // set page referenced status
1261 subi r26,r26,1 // decrement page count
1262 cmpwi r26,0 // done yet?
1263 bgt copypv_refloop // nope, iterate
1265 // Return, indicating success.
1268 li r3,0 // our efforts were crowned with success
1270 // Pop frame, restore caller's non-volatiles, clear recovery routine pointer.
1272 mfsprg r9,1 // get current threads stuff
1273 lwz r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
1274 // get return address
1275 lwz r4,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
1276 // get non-volatile cr2 and cr3
1277 lwz r26,FM_ARG0+0x00(r1) // restore non-volatile r26
1278 lwz r27,FM_ARG0+0x04(r1) // restore non-volatile r27
1279 mtlr r0 // restore return address
1280 lwz r28,FM_ARG0+0x08(r1) // restore non-volatile r28
1281 mtcrf 0x20,r4 // restore non-volatile cr2
1282 mtcrf 0x10,r4 // restore non-volatile cr3
1283 lwz r11,FM_ARG0+0x20(r1) // save error callback
1284 lwz r29,FM_ARG0+0x0C(r1) // restore non-volatile r29
1285 lwz r30,FM_ARG0+0x10(r1) // restore non-volatile r30
1286 lwz r31,FM_ARG0+0x14(r1) // restore non-volatile r31
1287 stw r11,THREAD_RECOVER(r9) // restore our error callback
1288 lwz r1,0(r1) // release stack frame
1290 blr // y'all come back now
1292 // Invalid argument handler.
1294 li r3,EINVAL // invalid argument
1295 b copypv_return // return
1297 // Error encountered during bcopy or bcopy_nc.
1299 mfmsr r3 // get current msr
1300 rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy
1301 mtmsrd r3 // leave 64-bit mode
1302 li r3,EFAULT // it was all his fault
1303 b copypv_return // return