]> git.saurik.com Git - apple/xnu.git/blob - osfmk/ppc/movc.s
c1637547a36222b993b8dfc534cb1e440027d4e8
[apple/xnu.git] / osfmk / ppc / movc.s
1 /*
2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /*
24 * @OSF_COPYRIGHT@
25 */
26 #include <debug.h>
27 #include <ppc/asm.h>
28 #include <ppc/proc_reg.h>
29 #include <mach/ppc/vm_param.h>
30 #include <assym.s>
31 #include <sys/errno.h>
32
33 #define INSTRUMENT 0
34
35 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
36 /*
37 * void pmap_zero_page(vm_offset_t pa)
38 *
39 * Zero a page of physical memory. This routine runs in 32 or 64-bit mode,
40 * and handles 32 and 128-byte cache lines.
41 */
42
43
44 .align 5
45 .globl EXT(pmap_zero_page)
46
47 LEXT(pmap_zero_page)
48
49 mflr r12 // save return address
50 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
51 mtlr r12 // restore return address
52 andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size
53
54 subfic r4,r9,PPC_PGBYTES // r4 <- starting offset in page
55
56 bt++ pf64Bitb,page0S4 // Go do the big guys...
57
58 slwi r3,r3,12 // get page address from page num
59 b page_zero_1 // Jump to line aligned loop...
60
61 .align 5
62
63 nop
64 nop
65 nop
66 nop
67 nop
68 nop
69 nop
70
71 page0S4:
72 sldi r3,r3,12 // get page address from page num
73
74 page_zero_1: // loop zeroing cache lines
75 sub. r5,r4,r9 // more to go?
76 dcbz128 r3,r4 // zero either 32 or 128 bytes
77 sub r4,r5,r9 // generate next offset
78 dcbz128 r3,r5
79 bne-- page_zero_1
80
81 b EXT(ml_restore) // restore MSR and do the isync
82
83
84 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
85 /* void
86 * phys_copy(src, dst, bytecount)
87 * addr64_t src;
88 * addr64_t dst;
89 * int bytecount
90 *
91 * This routine will copy bytecount bytes from physical address src to physical
92 * address dst. It runs in 64-bit mode if necessary, but does not handle
93 * overlap or make any attempt to be optimal. Length must be a signed word.
94 * Not performance critical.
95 */
96
97
98 .align 5
99 .globl EXT(phys_copy)
100
101 LEXT(phys_copy)
102
103 rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg
104 mflr r12 // get return address
105 rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits
106 rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg
107 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
108 rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits
109 mtlr r12 // restore return address
110 subic. r5,r7,4 // a word to copy?
111 b phys_copy_2
112
113 .align 5
114
115 phys_copy_1: // loop copying words
116 subic. r5,r5,4 // more to go?
117 lwz r0,0(r3)
118 addi r3,r3,4
119 stw r0,0(r4)
120 addi r4,r4,4
121 phys_copy_2:
122 bge phys_copy_1
123 addic. r5,r5,4 // restore count
124 ble phys_copy_4 // no more
125
126 // Loop is aligned here
127
128 phys_copy_3: // loop copying bytes
129 subic. r5,r5,1 // more to go?
130 lbz r0,0(r3)
131 addi r3,r3,1
132 stb r0,0(r4)
133 addi r4,r4,1
134 bgt phys_copy_3
135 phys_copy_4:
136 b EXT(ml_restore) // restore MSR and do the isync
137
138
139 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
140 /* void
141 * pmap_copy_page(src, dst)
142 * ppnum_t src;
143 * ppnum_t dst;
144 *
145 * This routine will copy the physical page src to physical page dst
146 *
147 * This routine assumes that the src and dst are page numbers and that the
148 * destination is cached. It runs on 32 and 64 bit processors, with and
149 * without altivec, and with 32 and 128 byte cache lines.
150 * We also must assume that no-one will be executing within the destination
151 * page, and that this will be used for paging. Because this
152 * is a common routine, we have tuned loops for each processor class.
153 *
154 */
155 #define kSFSize (FM_SIZE+160)
156
157 ENTRY(pmap_copy_page, TAG_NO_FRAME_USED)
158
159 lis r2,hi16(MASK(MSR_VEC)) ; Get the vector flag
160 mflr r0 // get return
161 ori r2,r2,lo16(MASK(MSR_FP)) ; Add the FP flag
162 stw r0,8(r1) // save
163 stwu r1,-kSFSize(r1) // set up a stack frame for VRs or FPRs
164 mfmsr r11 // save MSR at entry
165 mfsprg r10,2 // get feature flags
166 andc r11,r11,r2 // Clear out vec and fp
167 ori r2,r2,lo16(MASK(MSR_EE)) // Get EE on also
168 andc r2,r11,r2 // Clear out EE as well
169 mtcrf 0x02,r10 // we need to test pf64Bit
170 ori r2,r2,MASK(MSR_FP) // must enable FP for G3...
171 mtcrf 0x80,r10 // we need to test pfAltivec too
172 oris r2,r2,hi16(MASK(MSR_VEC)) // enable altivec for G4 (ignored if G3)
173 mtmsr r2 // turn EE off, FP and VEC on
174 isync
175 bt++ pf64Bitb,pmap_copy_64 // skip if 64-bit processor (only they take hint)
176 slwi r3,r3,12 // get page address from page num
177 slwi r4,r4,12 // get page address from page num
178 rlwinm r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1 // get ready to turn off DR
179 bt pfAltivecb,pmap_copy_g4 // altivec but not 64-bit means G4
180
181
182 // G3 -- copy using FPRs
183
184 stfd f0,FM_SIZE+0(r1) // save the 4 FPRs we use to copy
185 stfd f1,FM_SIZE+8(r1)
186 li r5,PPC_PGBYTES/32 // count of cache lines in a page
187 stfd f2,FM_SIZE+16(r1)
188 mtctr r5
189 stfd f3,FM_SIZE+24(r1)
190 mtmsr r12 // turn off DR after saving FPRs on stack
191 isync
192
193 pmap_g3_copy_loop: // loop over 32-byte cache lines
194 dcbz 0,r4 // avoid read of dest line
195 lfd f0,0(r3)
196 lfd f1,8(r3)
197 lfd f2,16(r3)
198 lfd f3,24(r3)
199 addi r3,r3,32
200 stfd f0,0(r4)
201 stfd f1,8(r4)
202 stfd f2,16(r4)
203 stfd f3,24(r4)
204 dcbst 0,r4 // flush dest line to RAM
205 addi r4,r4,32
206 bdnz pmap_g3_copy_loop
207
208 sync // wait for stores to take
209 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
210 li r6,PPC_PGBYTES-32 // point to last line in page
211 pmap_g3_icache_flush:
212 subic. r5,r6,32 // more to go?
213 icbi r4,r6 // flush another line in icache
214 subi r6,r5,32 // get offset to next line
215 icbi r4,r5
216 bne pmap_g3_icache_flush
217
218 sync
219 mtmsr r2 // turn DR back on
220 isync
221 lfd f0,FM_SIZE+0(r1) // restore the FPRs
222 lfd f1,FM_SIZE+8(r1)
223 lfd f2,FM_SIZE+16(r1)
224 lfd f3,FM_SIZE+24(r1)
225
226 b pmap_g4_restore // restore MSR and done
227
228
229 // G4 -- copy using VRs
230
231 pmap_copy_g4: // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR
232 la r9,FM_SIZE+16(r1) // place where we save VRs to r9
233 li r5,16 // load x-form offsets into r5-r9
234 li r6,32 // another offset
235 stvx v0,0,r9 // save some VRs so we can use to copy
236 li r7,48 // another offset
237 stvx v1,r5,r9
238 li r0,PPC_PGBYTES/64 // we loop over 64-byte chunks
239 stvx v2,r6,r9
240 mtctr r0
241 li r8,96 // get look-ahead for touch
242 stvx v3,r7,r9
243 li r9,128
244 mtmsr r12 // now we've saved VRs on stack, turn off DR
245 isync // wait for it to happen
246 b pmap_g4_copy_loop
247
248 .align 5 // align inner loops
249 pmap_g4_copy_loop: // loop over 64-byte chunks
250 dcbt r3,r8 // touch 3 lines ahead
251 nop // avoid a 17-word loop...
252 dcbt r3,r9 // touch 4 lines ahead
253 nop // more padding
254 dcba 0,r4 // avoid pre-fetch of 1st dest line
255 lvx v0,0,r3 // offset 0
256 lvx v1,r5,r3 // offset 16
257 lvx v2,r6,r3 // offset 32
258 lvx v3,r7,r3 // offset 48
259 addi r3,r3,64
260 dcba r6,r4 // avoid pre-fetch of 2nd line
261 stvx v0,0,r4 // offset 0
262 stvx v1,r5,r4 // offset 16
263 stvx v2,r6,r4 // offset 32
264 stvx v3,r7,r4 // offset 48
265 dcbf 0,r4 // push line 1
266 dcbf r6,r4 // and line 2
267 addi r4,r4,64
268 bdnz pmap_g4_copy_loop
269
270 sync // wait for stores to take
271 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
272 li r8,PPC_PGBYTES-32 // point to last line in page
273 pmap_g4_icache_flush:
274 subic. r9,r8,32 // more to go?
275 icbi r4,r8 // flush from icache
276 subi r8,r9,32 // get offset to next line
277 icbi r4,r9
278 bne pmap_g4_icache_flush
279
280 sync
281 mtmsr r2 // turn DR back on
282 isync
283 la r9,FM_SIZE+16(r1) // get base of VR save area
284 lvx v0,0,r9 // restore the VRs
285 lvx v1,r5,r9
286 lvx v2,r6,r9
287 lvx v3,r7,r9
288
289 pmap_g4_restore: // r11=MSR
290 mtmsr r11 // turn EE on, VEC and FR off
291 isync // wait for it to happen
292 addi r1,r1,kSFSize // pop off our stack frame
293 lwz r0,8(r1) // restore return address
294 mtlr r0
295 blr
296
297
298 // 64-bit/128-byte processor: copy using VRs
299
300 pmap_copy_64: // r10=features, r11=old MSR
301 sldi r3,r3,12 // get page address from page num
302 sldi r4,r4,12 // get page address from page num
303 la r9,FM_SIZE+16(r1) // get base of VR save area
304 li r5,16 // load x-form offsets into r5-r9
305 li r6,32 // another offset
306 bf pfAltivecb,pmap_novmx_copy // altivec suppressed...
307 stvx v0,0,r9 // save 8 VRs so we can copy wo bubbles
308 stvx v1,r5,r9
309 li r7,48 // another offset
310 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
311 stvx v2,r6,r9
312 stvx v3,r7,r9
313 addi r9,r9,64 // advance base ptr so we can store another 4
314 mtctr r0
315 li r0,MASK(MSR_DR) // get DR bit
316 stvx v4,0,r9
317 stvx v5,r5,r9
318 andc r12,r2,r0 // turn off DR bit
319 li r0,1 // get a 1 to slam into SF
320 stvx v6,r6,r9
321 stvx v7,r7,r9
322 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
323 li r8,-128 // offset so we can reach back one line
324 mtmsrd r12 // now we've saved VRs, turn DR off and SF on
325 isync // wait for it to happen
326 dcbt128 0,r3,1 // start a forward stream
327 b pmap_64_copy_loop
328
329 .align 5 // align inner loops
330 pmap_64_copy_loop: // loop over 128-byte chunks
331 dcbz128 0,r4 // avoid read of destination line
332 lvx v0,0,r3 // offset 0
333 lvx v1,r5,r3 // offset 16
334 lvx v2,r6,r3 // offset 32
335 lvx v3,r7,r3 // offset 48
336 addi r3,r3,64 // don't have enough GPRs so add 64 2x
337 lvx v4,0,r3 // offset 64
338 lvx v5,r5,r3 // offset 80
339 lvx v6,r6,r3 // offset 96
340 lvx v7,r7,r3 // offset 112
341 addi r3,r3,64
342 stvx v0,0,r4 // offset 0
343 stvx v1,r5,r4 // offset 16
344 stvx v2,r6,r4 // offset 32
345 stvx v3,r7,r4 // offset 48
346 addi r4,r4,64
347 stvx v4,0,r4 // offset 64
348 stvx v5,r5,r4 // offset 80
349 stvx v6,r6,r4 // offset 96
350 stvx v7,r7,r4 // offset 112
351 addi r4,r4,64
352 dcbf r8,r4 // flush the line we just wrote
353 bdnz pmap_64_copy_loop
354
355 sync // wait for stores to take
356 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
357 li r8,PPC_PGBYTES-128 // point to last line in page
358 pmap_64_icache_flush:
359 subic. r9,r8,128 // more to go?
360 icbi r4,r8 // flush from icache
361 subi r8,r9,128 // get offset to next line
362 icbi r4,r9
363 bne pmap_64_icache_flush
364
365 sync
366 mtmsrd r2 // turn DR back on, SF off
367 isync
368 la r9,FM_SIZE+16(r1) // get base address of VR save area on stack
369 lvx v0,0,r9 // restore the VRs
370 lvx v1,r5,r9
371 lvx v2,r6,r9
372 lvx v3,r7,r9
373 addi r9,r9,64
374 lvx v4,0,r9
375 lvx v5,r5,r9
376 lvx v6,r6,r9
377 lvx v7,r7,r9
378
379 b pmap_g4_restore // restore lower half of MSR and return
380
381 //
382 // Copy on 64-bit without VMX
383 //
384
385 pmap_novmx_copy:
386 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
387 mtctr r0
388 li r0,MASK(MSR_DR) // get DR bit
389 andc r12,r2,r0 // turn off DR bit
390 li r0,1 // get a 1 to slam into SF
391 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
392 mtmsrd r12 // now we've saved VRs, turn DR off and SF on
393 isync // wait for it to happen
394 dcbt128 0,r3,1 // start a forward stream
395
396 pmap_novmx_copy_loop: // loop over 128-byte cache lines
397 dcbz128 0,r4 // avoid read of dest line
398
399 ld r0,0(r3) // Load half a line
400 ld r12,8(r3)
401 ld r5,16(r3)
402 ld r6,24(r3)
403 ld r7,32(r3)
404 ld r8,40(r3)
405 ld r9,48(r3)
406 ld r10,56(r3)
407
408 std r0,0(r4) // Store half a line
409 std r12,8(r4)
410 std r5,16(r4)
411 std r6,24(r4)
412 std r7,32(r4)
413 std r8,40(r4)
414 std r9,48(r4)
415 std r10,56(r4)
416
417 ld r0,64(r3) // Load half a line
418 ld r12,72(r3)
419 ld r5,80(r3)
420 ld r6,88(r3)
421 ld r7,96(r3)
422 ld r8,104(r3)
423 ld r9,112(r3)
424 ld r10,120(r3)
425
426 addi r3,r3,128
427
428 std r0,64(r4) // Store half a line
429 std r12,72(r4)
430 std r5,80(r4)
431 std r6,88(r4)
432 std r7,96(r4)
433 std r8,104(r4)
434 std r9,112(r4)
435 std r10,120(r4)
436
437 dcbf 0,r4 // flush the line we just wrote
438 addi r4,r4,128
439 bdnz pmap_novmx_copy_loop
440
441 sync // wait for stores to take
442 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
443 li r8,PPC_PGBYTES-128 // point to last line in page
444
445 pmap_novmx_icache_flush:
446 subic. r9,r8,128 // more to go?
447 icbi r4,r8 // flush from icache
448 subi r8,r9,128 // get offset to next line
449 icbi r4,r9
450 bne pmap_novmx_icache_flush
451
452 sync
453 mtmsrd r2 // turn DR back on, SF off
454 isync
455
456 b pmap_g4_restore // restore lower half of MSR and return
457
458
459
460 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
461
462 // Stack frame format used by copyin, copyout, copyinstr and copyoutstr.
463 // These routines all run both on 32 and 64-bit machines, though because they are called
464 // by the BSD kernel they are always in 32-bit mode when entered. The mapped ptr returned
465 // by MapUserMemoryWindow will be 64 bits however on 64-bit machines. Beware to avoid
466 // using compare instructions on this ptr. This mapped ptr is kept globally in r31, so there
467 // is no need to store or load it, which are mode-dependent operations since it could be
468 // 32 or 64 bits.
469
470 #define kkFrameSize (FM_SIZE+32)
471
472 #define kkBufSize (FM_SIZE+0)
473 #define kkCR3 (FM_SIZE+4)
474 #define kkSource (FM_SIZE+8)
475 #define kkDest (FM_SIZE+12)
476 #define kkCountPtr (FM_SIZE+16)
477 #define kkR31Save (FM_SIZE+20)
478 #define kkThrErrJmp (FM_SIZE+24)
479
480
481 // nonvolatile CR bits we use as flags in cr3
482
483 #define kk64bit 12
484 #define kkNull 13
485 #define kkIn 14
486 #define kkString 15
487 #define kkZero 15
488
489
490 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
491 /*
492 * int
493 * copyoutstr(src, dst, maxcount, count)
494 * vm_offset_t src; // r3
495 * addr64_t dst; // r4 and r5
496 * vm_size_t maxcount; // r6
497 * vm_size_t* count; // r7
498 *
499 * Set *count to the number of bytes copied.
500 */
501
502 ENTRY(copyoutstr, TAG_NO_FRAME_USED)
503 mfcr r2,0x10 // save caller's cr3, which we use for flags
504 mr r10,r4 // move high word of 64-bit user address to r10
505 li r0,0
506 crset kkString // flag as a string op
507 mr r11,r5 // move low word of 64-bit user address to r11
508 stw r0,0(r7) // initialize #bytes moved
509 crclr kkIn // flag as copyout
510 b copyJoin
511
512
513 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
514 /*
515 * int
516 * copyinstr(src, dst, maxcount, count)
517 * addr64_t src; // r3 and r4
518 * vm_offset_t dst; // r5
519 * vm_size_t maxcount; // r6
520 * vm_size_t* count; // r7
521 *
522 * Set *count to the number of bytes copied
523 * If dst == NULL, don't copy, just count bytes.
524 * Only currently called from klcopyinstr.
525 */
526
527 ENTRY(copyinstr, TAG_NO_FRAME_USED)
528 mfcr r2,0x10 // save caller's cr3, which we use for flags
529 cmplwi r5,0 // dst==NULL?
530 mr r10,r3 // move high word of 64-bit user address to r10
531 li r0,0
532 crset kkString // flag as a string op
533 mr r11,r4 // move low word of 64-bit user address to r11
534 crmove kkNull,cr0_eq // remember if (dst==NULL)
535 stw r0,0(r7) // initialize #bytes moved
536 crset kkIn // flag as copyin (rather than copyout)
537 b copyJoin1 // skip over the "crclr kkNull"
538
539
540 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
541 /*
542 * int
543 * copyout(src, dst, count)
544 * vm_offset_t src; // r3
545 * addr64_t dst; // r4 and r5
546 * size_t count; // r6
547 */
548
549 .align 5
550 .globl EXT(copyout)
551 .globl EXT(copyoutmsg)
552
553 LEXT(copyout)
554 LEXT(copyoutmsg)
555
556 #if INSTRUMENT
557 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout
558 stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it
559 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
560 stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it
561 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
562 stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it
563 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
564 stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it
565 #endif
566 mfcr r2,0x10 // save caller's cr3, which we use for flags
567 mr r10,r4 // move high word of 64-bit user address to r10
568 crclr kkString // not a string version
569 mr r11,r5 // move low word of 64-bit user address to r11
570 crclr kkIn // flag as copyout
571 b copyJoin
572
573
574 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
575 /*
576 * int
577 * copyin(src, dst, count)
578 * addr64_t src; // r3 and r4
579 * vm_offset_t dst; // r5
580 * size_t count; // r6
581 */
582
583
584 .align 5
585 .globl EXT(copyin)
586 .globl EXT(copyinmsg)
587
588 LEXT(copyin)
589 LEXT(copyinmsg)
590
591 mfcr r2,0x10 // save caller's cr3, which we use for flags
592 mr r10,r3 // move high word of 64-bit user address to r10
593 crclr kkString // not a string version
594 mr r11,r4 // move low word of 64-bit user address to r11
595 crset kkIn // flag as copyin
596
597
598 // Common code to handle setup for all the copy variants:
599 // r2 = caller's cr3
600 // r3 = source if copyout
601 // r5 = dest if copyin
602 // r6 = buffer length or count
603 // r7 = count output ptr (if kkString set)
604 // r10 = high word of 64-bit user-space address (source if copyin, dest if copyout)
605 // r11 = low word of 64-bit user-space address
606 // cr3 = kkIn, kkString, kkNull flags
607
608 copyJoin:
609 crclr kkNull // (dst==NULL) convention not used with this call
610 copyJoin1: // enter from copyinstr with kkNull set
611 mflr r0 // get return address
612 cmplwi r6,0 // buffer length 0?
613 lis r9,0x1000 // r9 <- 0x10000000 (256MB)
614 stw r0,FM_LR_SAVE(r1) // save return
615 cmplw cr1,r6,r9 // buffer length > 256MB ?
616 mfsprg r8,2 // get the features
617 beq-- copyinout_0 // 0 length is degenerate case
618 stwu r1,-kkFrameSize(r1) // set up stack frame
619 stw r2,kkCR3(r1) // save caller's cr3, which we use for flags
620 mtcrf 0x02,r8 // move pf64Bit to cr6
621 stw r3,kkSource(r1) // save args across MapUserMemoryWindow
622 stw r5,kkDest(r1)
623 stw r6,kkBufSize(r1)
624 crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor
625 stw r7,kkCountPtr(r1)
626 stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr
627 li r31,0 // no mapped ptr yet
628
629
630 // Handle buffer length > 256MB. This is an error (ENAMETOOLONG) on copyin and copyout.
631 // The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp
632 // the buffer length to 256MB. This isn't an issue if the string is less than 256MB
633 // (as most are!), but if they are >256MB we eventually return ENAMETOOLONG. This restriction
634 // is due to MapUserMemoryWindow; we don't want to consume more than two segments for
635 // the mapping.
636
637 ble++ cr1,copyin0 // skip if buffer length <= 256MB
638 bf kkString,copyinout_too_big // error if not string op
639 mr r6,r9 // silently clamp buffer length to 256MB
640 stw r9,kkBufSize(r1) // update saved copy too
641
642
643 // Set up thread_recover in case we hit an illegal address.
644
645 copyin0:
646 mfsprg r8,1 // Get the current thread
647 lis r2,hi16(copyinout_error)
648 ori r2,r2,lo16(copyinout_error)
649 lwz r4,THREAD_RECOVER(r8)
650 lwz r3,ACT_VMMAP(r8) // r3 <- vm_map virtual address
651 stw r2,THREAD_RECOVER(r8)
652 stw r4,kkThrErrJmp(r1)
653
654
655 // Map user segment into kernel map, turn on 64-bit mode. At this point:
656 // r3 = vm map
657 // r6 = buffer length
658 // r10/r11 = 64-bit user-space ptr (source if copyin, dest if copyout)
659 //
660 // When we call MapUserMemoryWindow, we pass:
661 // r3 = vm map ptr
662 // r4/r5 = 64-bit user space address as an addr64_t
663
664 mr r4,r10 // copy user ptr into r4/r5
665 mr r5,r11
666 #if INSTRUMENT
667 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace
668 stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it
669 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
670 stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it
671 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
672 stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it
673 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
674 stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it
675 #endif
676 bl EXT(MapUserMemoryWindow) // get r3/r4 <- 64-bit address in kernel map of user operand
677 #if INSTRUMENT
678 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace
679 stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it
680 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
681 stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it
682 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
683 stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it
684 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
685 stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it
686 #endif
687 mr r31,r4 // r31 <- mapped ptr into user space (may be 64-bit)
688 bf-- kk64bit,copyin1 // skip if a 32-bit processor
689
690 rldimi r31,r3,32,0 // slam high-order bits into mapped ptr
691 mfmsr r4 // if 64-bit, turn on SF so we can use returned ptr
692 li r0,1
693 rldimi r4,r0,63,MSR_SF_BIT // light bit 0
694 mtmsrd r4 // turn on 64-bit mode
695 isync // wait for mode to change
696
697
698 // Load r3-r5, substituting mapped ptr as appropriate.
699
700 copyin1:
701 lwz r5,kkBufSize(r1) // restore length to copy
702 bf kkIn,copyin2 // skip if copyout
703 lwz r4,kkDest(r1) // copyin: dest is kernel ptr
704 mr r3,r31 // source is mapped ptr
705 b copyin3
706 copyin2: // handle copyout
707 lwz r3,kkSource(r1) // source is kernel buffer (r3 at entry)
708 mr r4,r31 // dest is mapped ptr into user space
709
710
711 // Finally, all set up to copy:
712 // r3 = source ptr (mapped if copyin)
713 // r4 = dest ptr (mapped if copyout)
714 // r5 = length
715 // r31 = mapped ptr returned by MapUserMemoryWindow
716 // cr3 = kkIn, kkString, kk64bit, and kkNull flags
717
718 copyin3:
719 bt kkString,copyString // handle copyinstr and copyoutstr
720 bl EXT(bcopy) // copyin and copyout: let bcopy do the work
721 li r3,0 // return success
722
723
724 // Main exit point for copyin, copyout, copyinstr, and copyoutstr. Also reached
725 // from error recovery if we get a DSI accessing user space. Clear recovery ptr,
726 // and pop off frame.
727 // r3 = 0, EFAULT, or ENAMETOOLONG
728
729 copyinx:
730 lwz r2,kkCR3(r1) // get callers cr3
731 mfsprg r6,1 // Get the current thread
732 bf-- kk64bit,copyinx1 // skip if 32-bit processor
733 mfmsr r12
734 rldicl r12,r12,0,MSR_SF_BIT+1 // if 64-bit processor, turn 64-bit mode off
735 mtmsrd r12 // turn SF off
736 isync // wait for the mode to change
737 copyinx1:
738 lwz r0,FM_LR_SAVE+kkFrameSize(r1) // get return address
739 lwz r31,kkR31Save(r1) // restore callers r31
740 lwz r4,kkThrErrJmp(r1) // load saved thread recover
741 addi r1,r1,kkFrameSize // pop off our stack frame
742 mtlr r0
743 stw r4,THREAD_RECOVER(r6) // restore thread recover
744 mtcrf 0x10,r2 // restore cr3
745 blr
746
747
748 /* We get here via the exception handler if an illegal
749 * user memory reference was made. This error handler is used by
750 * copyin, copyout, copyinstr, and copyoutstr. Registers are as
751 * they were at point of fault, so for example cr3 flags are valid.
752 */
753
754 copyinout_error:
755 li r3,EFAULT // return error
756 b copyinx
757
758 copyinout_0: // degenerate case: 0-length copy
759 mtcrf 0x10,r2 // restore cr3
760 li r3,0 // return success
761 blr
762
763 copyinout_too_big: // degenerate case
764 mtcrf 0x10,r2 // restore cr3
765 lwz r1,0(r1) // pop off stack frame
766 li r3,ENAMETOOLONG
767 blr
768
769
770 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
771 // Handle copyinstr and copyoutstr. At this point the stack frame is set up,
772 // the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode
773 // if necessary, and:
774 // r3 = source ptr, mapped if copyinstr
775 // r4 = dest ptr, mapped if copyoutstr
776 // r5 = buffer length
777 // r31 = mapped ptr returned by MapUserMemoryWindow
778 // cr3 = kkIn, kkString, kkNull, and kk64bit flags
779 // We do word copies unless the buffer is very short, then use a byte copy loop
780 // for the leftovers if necessary. The crossover at which the word loop becomes
781 // faster is about seven bytes, counting the zero.
782 //
783 // We first must word-align the source ptr, in order to avoid taking a spurious
784 // page fault.
785
786 copyString:
787 cmplwi cr1,r5,15 // is buffer very short?
788 mr r12,r3 // remember ptr to 1st source byte
789 mtctr r5 // assuming short, set up loop count for bytes
790 blt-- cr1,copyinstr8 // too short for word loop
791 rlwinm r2,r3,0,0x3 // get byte offset of 1st byte within word
792 rlwinm r9,r3,3,0x18 // get bit offset of 1st byte within word
793 li r7,-1
794 sub r3,r3,r2 // word-align source address
795 add r6,r5,r2 // get length starting at byte 0 in word
796 srw r7,r7,r9 // get mask for bytes in first word
797 srwi r0,r6,2 // get #words in buffer
798 lwz r5,0(r3) // get aligned word with first source byte
799 lis r10,hi16(0xFEFEFEFF) // load magic constants into r10 and r11
800 lis r11,hi16(0x80808080)
801 mtctr r0 // set up word loop count
802 addi r3,r3,4 // advance past the source word
803 ori r10,r10,lo16(0xFEFEFEFF)
804 ori r11,r11,lo16(0x80808080)
805 orc r8,r5,r7 // map bytes preceeding first source byte into 0xFF
806 bt-- kkNull,copyinstr5enter // enter loop that just counts
807
808 // Special case 1st word, which has been 0xFF filled on left. Note that we use
809 // "and.", even though we execute both in 32 and 64-bit mode. This is OK.
810
811 slw r5,r5,r9 // left justify payload bytes
812 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
813 andc r7,r11,r8 // r7 = ~data & 0x80808080
814 subfic r0,r2,4 // get r0 <- #payload bytes in 1st word
815 and. r7,r9,r7 // if r7==0, then all bytes in r8 are nonzero
816 stw r5,0(r4) // copy payload bytes to dest buffer
817 add r4,r4,r0 // then point to next byte in dest buffer
818 bdnzt cr0_eq,copyinstr6 // use loop that copies if 0 not found
819
820 b copyinstr7 // 0 found (buffer can't be full)
821
822
823 // Word loop(s). They do a word-parallel search for 0s, using the following
824 // inobvious but very efficient test:
825 // y = data + 0xFEFEFEFF
826 // z = ~data & 0x80808080
827 // If (y & z)==0, then all bytes in dataword are nonzero. There are two copies
828 // of this loop, one that just counts and another that copies.
829 // r3 = ptr to next word of source (word aligned)
830 // r4 = ptr to next byte in buffer
831 // r6 = original buffer length (adjusted to be word origin)
832 // r10 = 0xFEFEFEFE
833 // r11 = 0x80808080
834 // r12 = ptr to 1st source byte (used to determine string length)
835
836 .align 5 // align inner loops for speed
837 copyinstr5: // version that counts but does not copy
838 lwz r8,0(r3) // get next word of source
839 addi r3,r3,4 // advance past it
840 copyinstr5enter:
841 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
842 andc r7,r11,r8 // r7 = ~data & 0x80808080
843 and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode)
844 bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes in r8 are nonzero
845
846 b copyinstr7
847
848 .align 5 // align inner loops for speed
849 copyinstr6: // version that counts and copies
850 lwz r8,0(r3) // get next word of source
851 addi r3,r3,4 // advance past it
852 addi r4,r4,4 // increment dest ptr while we wait for data
853 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
854 andc r7,r11,r8 // r7 = ~data & 0x80808080
855 and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode)
856 stw r8,-4(r4) // pack all 4 bytes into buffer
857 bdnzt cr0_eq,copyinstr6 // if r7==0, then all bytes are nonzero
858
859
860 // Either 0 found or buffer filled. The above algorithm has mapped nonzero bytes to 0
861 // and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also
862 // mapped to 0x80. We must mask out these false hits before searching for an 0x80 byte.
863 // r3 = word aligned ptr to next word of source (ie, r8==mem(r3-4))
864 // r6 = original buffer length (adjusted to be word origin)
865 // r7 = computed vector of 0x00 and 0x80 bytes
866 // r8 = original source word, coming from -4(r3), possibly padded with 0xFFs on left if 1st word
867 // r12 = ptr to 1st source byte (used to determine string length)
868 // cr0 = beq set iff 0 not found
869
870 copyinstr7:
871 rlwinm r2,r8,7,0,31 // move 0x01 bits to 0x80 position
872 rlwinm r6,r6,0,0x3 // mask down to partial byte count in last word
873 andc r7,r7,r2 // turn off false hits from 0x0100 worst case
874 crnot kkZero,cr0_eq // 0 found iff cr0_eq is off
875 srwi r7,r7,8 // we want to count the 0 as a byte xferred
876 cmpwi r6,0 // any bytes left over in last word?
877 cntlzw r7,r7 // now we can find the 0 byte (ie, the 0x80)
878 subi r3,r3,4 // back up r3 to point to 1st byte in r8
879 srwi r7,r7,3 // convert 8,16,24,32 to 1,2,3,4
880 add r3,r3,r7 // now r3 points one past 0 byte, or at 1st byte not xferred
881 bt++ kkZero,copyinstr10 // 0 found, so done
882
883 beq copyinstr10 // r6==0, so buffer truly full
884 mtctr r6 // 0 not found, loop over r6 bytes
885 b copyinstr8 // enter byte loop for last 1-3 leftover bytes
886
887
888 // Byte loop. This is used for very small buffers and for the odd bytes left over
889 // after searching and copying words at a time.
890 // r3 = ptr to next byte of source
891 // r4 = ptr to next dest byte
892 // r12 = ptr to first byte of source
893 // ctr = count of bytes to check
894
895 .align 5 // align inner loops for speed
896 copyinstr8: // loop over bytes of source
897 lbz r0,0(r3) // get next byte of source
898 addi r3,r3,1
899 addi r4,r4,1 // increment dest addr whether we store or not
900 cmpwi r0,0 // the 0?
901 bt-- kkNull,copyinstr9 // don't store if copyinstr with NULL ptr
902 stb r0,-1(r4)
903 copyinstr9:
904 bdnzf cr0_eq,copyinstr8 // loop if byte not 0 and more room in buffer
905
906 crmove kkZero,cr0_eq // remember if 0 found or buffer filled
907
908
909 // Buffer filled or 0 found. Unwind and return.
910 // r3 = ptr to 1st source byte not transferred
911 // r12 = ptr to 1st source byte
912 // r31 = mapped ptr returned by MapUserMemoryWindow
913 // cr3 = kkZero set iff 0 found
914
915 copyinstr10:
916 lwz r9,kkCountPtr(r1) // get ptr to place to store count of bytes moved
917 sub r2,r3,r12 // compute #bytes copied (including the 0)
918 li r3,0 // assume success return status
919 stw r2,0(r9) // store #bytes moved
920 bt++ kkZero,copyinx // we did find the 0 so return 0
921 li r3,ENAMETOOLONG // buffer filled
922 b copyinx // join main exit routine
923
924 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
925 /*
926 * int
927 * copypv(source, sink, size, which)
928 * addr64_t src; // r3 and r4
929 * addr64_t dst; // r5 and r6
930 * size_t size; // r7
931 * int which; // r8
932 *
933 * Operand size bytes are copied from operand src into operand dst. The source and
934 * destination operand addresses are given as addr64_t, and may designate starting
935 * locations in physical or virtual memory in any combination except where both are
936 * virtual. Virtual memory locations may be in either the kernel or the current thread's
937 * address space. Operand size may be up to 256MB.
938 *
939 * Operation is controlled by operand which, which offers these options:
940 * cppvPsrc : source operand is (1) physical or (0) virtual
941 * cppvPsnk : destination operand is (1) physical or (0) virtual
942 * cppvKmap : virtual operand is in (1) kernel or (0) current thread
943 * cppvFsnk : (1) flush destination before and after transfer
944 * cppvFsrc : (1) flush source before and after transfer
945 * cppvNoModSnk : (1) don't set source operand's changed bit(s)
946 * cppvNoRefSrc : (1) don't set destination operand's referenced bit(s)
947 *
948 * Implementation is now split into this new 64-bit path and the old path, hw_copypv_32().
949 * This section describes the operation of the new 64-bit path.
950 *
951 * The 64-bit path utilizes the more capacious 64-bit kernel address space to create a
952 * window in the kernel address space into all of physical RAM plus the I/O hole. Since
953 * the window's mappings specify the proper access policies for the underlying memory,
954 * the new path does not have to flush caches to avoid a cache paradox, so cppvFsnk
955 * and cppvFsrc are ignored. Physical operand adresses are relocated into the physical
956 * memory window, and are accessed with data relocation on. Virtual addresses are either
957 * within the kernel, or are mapped into the kernel address space through the user memory
958 * window. Because accesses to a virtual operand are performed with data relocation on,
959 * the new path does not have to translate the address, disable/enable interrupts, lock
960 * the mapping, or update referenced and changed bits.
961 *
962 * The IBM 970 (a.k.a. G5) processor treats real-mode accesses as guarded, so there is
963 * a substantial performance penalty for copypv operating in real mode. Utilizing the
964 * new 64-bit path, transfer performance increases >100% on the G5.
965 *
966 * The attentive reader may notice that mtmsrd ops are not followed by isync ops as
967 * might be expected. The 970 follows PowerPC architecture version 2.01, which defines
968 * mtmsrd with L=0 as a context synchronizing op, so a following isync is no longer
969 * required.
970 *
971 * To keep things exciting, we develop 64-bit values in non-volatiles, but we also need
972 * to call 32-bit functions, which would lead to the high-order 32 bits of our values
973 * getting clobbered unless we do something special. So, we preserve our 64-bit non-volatiles
974 * in our own stack frame across calls to 32-bit functions.
975 *
976 */
977
978 // Map operand which bits into non-volatile CR2 and CR3 bits.
979 #define whichAlign ((3+1)*4)
980 #define whichMask 0x007F0000
981 #define pvPsnk (cppvPsnkb - whichAlign)
982 #define pvPsrc (cppvPsrcb - whichAlign)
983 #define pvFsnk (cppvFsnkb - whichAlign)
984 #define pvFsrc (cppvFsrcb - whichAlign)
985 #define pvNoModSnk (cppvNoModSnkb - whichAlign)
986 #define pvNoRefSrc (cppvNoRefSrcb - whichAlign)
987 #define pvKmap (cppvKmapb - whichAlign)
988 #define pvNoCache cr2_lt
989
990 .align 5
991 .globl EXT(copypv)
992
993 LEXT(copypv)
994 mfsprg r10,2 // get feature flags
995 mtcrf 0x02,r10 // we need to test pf64Bit
996 bt++ pf64Bitb,copypv_64 // skip if 64-bit processor (only they take hint)
997
998 b EXT(hw_copypv_32) // carry on with 32-bit copypv
999
1000 // Push a 32-bit ABI-compliant stack frame and preserve all non-volatiles that we'll clobber.
1001 copypv_64:
1002 mfsprg r9,1 // get current thread
1003 stwu r1,-(FM_ALIGN((31-26+11)*4)+FM_SIZE)(r1)
1004 // allocate stack frame and link it
1005 mflr r0 // get return address
1006 mfcr r10 // get cr2 and cr3
1007 lwz r12,THREAD_RECOVER(r9) // get error callback
1008 stw r26,FM_ARG0+0x00(r1) // save non-volatile r26
1009 stw r27,FM_ARG0+0x04(r1) // save non-volatile r27
1010 stw r28,FM_ARG0+0x08(r1) // save non-volatile r28
1011 stw r29,FM_ARG0+0x0C(r1) // save non-volatile r29
1012 stw r30,FM_ARG0+0x10(r1) // save non-volatile r30
1013 stw r31,FM_ARG0+0x14(r1) // save non-volatile r31
1014 stw r12,FM_ARG0+0x20(r1) // save error callback
1015 stw r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
1016 // save return address
1017 stw r10,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
1018 // save non-volatile cr2 and cr3
1019
1020 // Non-volatile register usage in this routine is:
1021 // r26: saved msr image
1022 // r27: current pmap_t / virtual source address
1023 // r28: destination virtual address
1024 // r29: source address
1025 // r30: destination address
1026 // r31: byte count to copy
1027 // cr2/3: parameter 'which' bits
1028
1029 rlwinm r8,r8,whichAlign,whichMask // align and mask which bits
1030 mr r31,r7 // copy size to somewhere non-volatile
1031 mtcrf 0x20,r8 // insert which bits into cr2 and cr3
1032 mtcrf 0x10,r8 // insert which bits into cr2 and cr3
1033 rlwinm r29,r3,0,1,0 // form source address high-order bits
1034 rlwinm r30,r5,0,1,0 // form destination address high-order bits
1035 rlwimi r29,r4,0,0,31 // form source address low-order bits
1036 rlwimi r30,r6,0,0,31 // form destination address low-order bits
1037 crand cr7_lt,pvPsnk,pvPsrc // are both operand addresses physical?
1038 cntlzw r0,r31 // count leading zeroes in byte count
1039 cror cr7_eq,pvPsnk,pvPsrc // cr7_eq <- source or destination is physical
1040 bf-- cr7_eq,copypv_einval // both operands may not be virtual
1041 cmplwi r0,4 // byte count greater than or equal 256M (2**28)?
1042 blt-- copypv_einval // byte count too big, give EINVAL
1043 cmplwi r31,0 // byte count zero?
1044 beq-- copypv_zero // early out
1045 bt cr7_lt,copypv_phys // both operand addresses are physical
1046 mr r28,r30 // assume destination is virtual
1047 bf pvPsnk,copypv_dv // is destination virtual?
1048 mr r28,r29 // no, so source must be virtual
1049 copypv_dv:
1050 lis r27,ha16(EXT(kernel_pmap)) // get kernel's pmap_t *, high-order
1051 lwz r27,lo16(EXT(kernel_pmap))(r27) // get kernel's pmap_t
1052 bt pvKmap,copypv_kern // virtual address in kernel map?
1053 lwz r3,ACT_VMMAP(r9) // get user's vm_map *
1054 rldicl r4,r28,32,32 // r4, r5 <- addr64_t virtual address
1055 rldicl r5,r28,0,32
1056 std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit call
1057 std r30,FM_ARG0+0x38(r1) // preserve 64-bit r30 across 32-bit call
1058 bl EXT(MapUserMemoryWindow) // map slice of user space into kernel space
1059 ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29
1060 ld r30,FM_ARG0+0x38(r1) // restore 64-bit r30
1061 rlwinm r28,r3,0,1,0 // convert relocated addr64_t virtual address
1062 rlwimi r28,r4,0,0,31 // into a single 64-bit scalar
1063 copypv_kern:
1064
1065 // Since we'll be accessing the virtual operand with data-relocation on, we won't need to
1066 // update the referenced and changed bits manually after the copy. So, force the appropriate
1067 // flag bit on for the virtual operand.
1068 crorc pvNoModSnk,pvNoModSnk,pvPsnk // for virtual dest, let hardware do ref/chg bits
1069 crorc pvNoRefSrc,pvNoRefSrc,pvPsrc // for virtual source, let hardware do ref bit
1070
1071 // We'll be finding a mapping and looking at, so we need to disable 'rupts.
1072 lis r0,hi16(MASK(MSR_VEC)) // get vector mask
1073 ori r0,r0,lo16(MASK(MSR_FP)) // insert fp mask
1074 mfmsr r26 // save current msr
1075 andc r26,r26,r0 // turn off VEC and FP in saved copy
1076 ori r0,r0,lo16(MASK(MSR_EE)) // add EE to our mask
1077 andc r0,r26,r0 // disable EE in our new msr image
1078 mtmsrd r0 // introduce new msr image
1079
1080 // We're now holding the virtual operand's pmap_t in r27 and its virtual address in r28. We now
1081 // try to find a mapping corresponding to this address in order to determine whether the address
1082 // is cacheable. If we don't find a mapping, we can safely assume that the operand is cacheable
1083 // (a non-cacheable operand must be a block mapping, which will always exist); otherwise, we
1084 // examine the mapping's caching-inhibited bit.
1085 mr r3,r27 // r3 <- pmap_t pmap
1086 rldicl r4,r28,32,32 // r4, r5 <- addr64_t va
1087 rldicl r5,r28,0,32
1088 la r6,FM_ARG0+0x18(r1) // r6 <- addr64_t *nextva
1089 li r7,1 // r7 <- int full, search nested mappings
1090 std r26,FM_ARG0+0x28(r1) // preserve 64-bit r26 across 32-bit calls
1091 std r28,FM_ARG0+0x30(r1) // preserve 64-bit r28 across 32-bit calls
1092 std r29,FM_ARG0+0x38(r1) // preserve 64-bit r29 across 32-bit calls
1093 std r30,FM_ARG0+0x40(r1) // preserve 64-bit r30 across 32-bit calls
1094 bl EXT(mapping_find) // find mapping for virtual operand
1095 mr. r3,r3 // did we find it?
1096 beq copypv_nomapping // nope, so we'll assume it's cacheable
1097 lwz r4,mpVAddr+4(r3) // get low half of virtual addr for hw flags
1098 rlwinm. r4,r4,0,mpIb-32,mpIb-32 // caching-inhibited bit set?
1099 crnot pvNoCache,cr0_eq // if it is, use bcopy_nc
1100 bl EXT(mapping_drop_busy) // drop busy on the mapping
1101 copypv_nomapping:
1102 ld r26,FM_ARG0+0x28(r1) // restore 64-bit r26
1103 ld r28,FM_ARG0+0x30(r1) // restore 64-bit r28
1104 ld r29,FM_ARG0+0x38(r1) // restore 64-bit r29
1105 ld r30,FM_ARG0+0x40(r1) // restore 64-bit r30
1106 mtmsrd r26 // restore msr to it's previous state
1107
1108 // Set both the source and destination virtual addresses to the virtual operand's address --
1109 // we'll overlay one of them with the physical operand's address.
1110 mr r27,r28 // make virtual operand BOTH source AND destination
1111
1112 // Now we're ready to relocate the physical operand address(es) into the physical memory window.
1113 // Recall that we've mapped physical memory (including the I/O hole) into the kernel's address
1114 // space somewhere at or over the 2**32 line. If one or both of the operands are in the I/O hole,
1115 // we'll set the pvNoCache flag, forcing use of non-caching bcopy_nc() to do the copy.
1116 copypv_phys:
1117 ld r6,lgPMWvaddr(0) // get physical memory window virtual address
1118 bf pvPsnk,copypv_dstvirt // is destination address virtual?
1119 cntlzd r4,r30 // count leading zeros in destination address
1120 cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
1121 cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations
1122 add r28,r30,r6 // relocate physical destination into physical window
1123 copypv_dstvirt:
1124 bf pvPsrc,copypv_srcvirt // is source address virtual?
1125 cntlzd r4,r29 // count leading zeros in source address
1126 cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
1127 cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations
1128 add r27,r29,r6 // relocate physical source into physical window
1129 copypv_srcvirt:
1130
1131 // Once the copy is under way (bcopy or bcopy_nc), we will want to get control if anything
1132 // funny happens during the copy. So, we set a pointer to our error handler in the per-thread
1133 // control block.
1134 mfsprg r8,1 // get current threads stuff
1135 lis r3,hi16(copypv_error) // get our error callback's address, high
1136 ori r3,r3,lo16(copypv_error) // get our error callback's address, low
1137 stw r3,THREAD_RECOVER(r8) // set our error callback
1138
1139 // Since our physical operand(s) are relocated at or above the 2**32 line, we must enter
1140 // 64-bit mode.
1141 li r0,1 // get a handy one bit
1142 mfmsr r3 // get current msr
1143 rldimi r3,r0,63,MSR_SF_BIT // set SF bit on in our msr copy
1144 mtmsrd r3 // enter 64-bit mode
1145
1146 // If requested, flush data cache
1147 // Note that we don't flush, the code is being saved "just in case".
1148 #if 0
1149 bf pvFsrc,copypv_nfs // do we flush the source?
1150 rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address
1151 rldicl r4,r27,0,32
1152 mr r5,r31 // r5 <- count (in bytes)
1153 li r6,0 // r6 <- boolean phys (false, not physical)
1154 bl EXT(flush_dcache) // flush the source operand
1155 copypv_nfs:
1156 bf pvFsnk,copypv_nfdx // do we flush the destination?
1157 rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address
1158 rldicl r4,r28,0,32
1159 mr r5,r31 // r5 <- count (in bytes)
1160 li r6,0 // r6 <- boolean phys (false, not physical)
1161 bl EXT(flush_dcache) // flush the destination operand
1162 copypv_nfdx:
1163 #endif
1164
1165 // Call bcopy or bcopy_nc to perform the copy.
1166 mr r3,r27 // r3 <- source virtual address
1167 mr r4,r28 // r4 <- destination virtual address
1168 mr r5,r31 // r5 <- bytes to copy
1169 bt pvNoCache,copypv_nc // take non-caching route
1170 bl EXT(bcopy) // call bcopy to do the copying
1171 b copypv_copydone
1172 copypv_nc:
1173 bl EXT(bcopy_nc) // call bcopy_nc to do the copying
1174 copypv_copydone:
1175
1176 // If requested, flush data cache
1177 // Note that we don't flush, the code is being saved "just in case".
1178 #if 0
1179 bf pvFsrc,copypv_nfsx // do we flush the source?
1180 rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address
1181 rldicl r4,r27,0,32
1182 mr r5,r31 // r5 <- count (in bytes)
1183 li r6,0 // r6 <- boolean phys (false, not physical)
1184 bl EXT(flush_dcache) // flush the source operand
1185 copypv_nfsx:
1186 bf pvFsnk,copypv_nfd // do we flush the destination?
1187 rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address
1188 rldicl r4,r28,0,32
1189 mr r5,r31 // r5 <- count (in bytes)
1190 li r6,0 // r6 <- boolean phys (false, not physical)
1191 bl EXT(flush_dcache) // flush the destination operand
1192 copypv_nfd:
1193 #endif
1194
1195 // Leave 64-bit mode.
1196 mfmsr r3 // get current msr
1197 rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy
1198 mtmsrd r3 // leave 64-bit mode
1199
1200 // If requested, set ref/chg on source/dest physical operand(s). It is possible that copy is
1201 // from/to a RAM disk situated outside of mapped physical RAM, so we check each page by calling
1202 // mapping_phys_lookup() before we try to set its ref/chg bits; otherwise, we might panic.
1203 // Note that this code is page-size sensitive, so it should probably be a part of our low-level
1204 // code in hw_vm.s.
1205 bt pvNoModSnk,copypv_nomod // skip destination update if not requested
1206 std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit calls
1207 li r26,1 // r26 <- 4K-page count
1208 mr r27,r31 // r27 <- byte count
1209 rlwinm r3,r30,0,20,31 // does destination cross a page boundary?
1210 subfic r3,r3,4096 //
1211 cmplw r3,r27 //
1212 blt copypv_modnox // skip if not crossing case
1213 subf r27,r3,r27 // r27 <- byte count less initial fragment
1214 addi r26,r26,1 // increment page count
1215 copypv_modnox:
1216 srdi r3,r27,12 // pages to update (not including crosser)
1217 add r26,r26,r3 // add in crosser
1218 srdi r27,r30,12 // r27 <- destination page number
1219 copypv_modloop:
1220 mr r3,r27 // r3 <- destination page number
1221 la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex
1222 bl EXT(mapping_phys_lookup) // see if page is really there
1223 mr. r3,r3 // is it?
1224 beq-- copypv_modend // nope, break out of modify loop
1225 mr r3,r27 // r3 <- destination page number
1226 bl EXT(mapping_set_mod) // set page changed status
1227 subi r26,r26,1 // decrement page count
1228 cmpwi r26,0 // done yet?
1229 bgt copypv_modloop // nope, iterate
1230 copypv_modend:
1231 ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29
1232 copypv_nomod:
1233 bt pvNoRefSrc,copypv_done // skip source update if not requested
1234 copypv_debugref:
1235 li r26,1 // r26 <- 4K-page count
1236 mr r27,r31 // r27 <- byte count
1237 rlwinm r3,r29,0,20,31 // does source cross a page boundary?
1238 subfic r3,r3,4096 //
1239 cmplw r3,r27 //
1240 blt copypv_refnox // skip if not crossing case
1241 subf r27,r3,r27 // r27 <- byte count less initial fragment
1242 addi r26,r26,1 // increment page count
1243 copypv_refnox:
1244 srdi r3,r27,12 // pages to update (not including crosser)
1245 add r26,r26,r3 // add in crosser
1246 srdi r27,r29,12 // r27 <- source page number
1247 copypv_refloop:
1248 mr r3,r27 // r3 <- source page number
1249 la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex
1250 bl EXT(mapping_phys_lookup) // see if page is really there
1251 mr. r3,r3 // is it?
1252 beq-- copypv_done // nope, break out of modify loop
1253 mr r3,r27 // r3 <- source page number
1254 bl EXT(mapping_set_ref) // set page referenced status
1255 subi r26,r26,1 // decrement page count
1256 cmpwi r26,0 // done yet?
1257 bgt copypv_refloop // nope, iterate
1258
1259 // Return, indicating success.
1260 copypv_done:
1261 copypv_zero:
1262 li r3,0 // our efforts were crowned with success
1263
1264 // Pop frame, restore caller's non-volatiles, clear recovery routine pointer.
1265 copypv_return:
1266 mfsprg r9,1 // get current threads stuff
1267 lwz r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
1268 // get return address
1269 lwz r4,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
1270 // get non-volatile cr2 and cr3
1271 lwz r26,FM_ARG0+0x00(r1) // restore non-volatile r26
1272 lwz r27,FM_ARG0+0x04(r1) // restore non-volatile r27
1273 mtlr r0 // restore return address
1274 lwz r28,FM_ARG0+0x08(r1) // restore non-volatile r28
1275 mtcrf 0x20,r4 // restore non-volatile cr2
1276 mtcrf 0x10,r4 // restore non-volatile cr3
1277 lwz r11,FM_ARG0+0x20(r1) // save error callback
1278 lwz r29,FM_ARG0+0x0C(r1) // restore non-volatile r29
1279 lwz r30,FM_ARG0+0x10(r1) // restore non-volatile r30
1280 lwz r31,FM_ARG0+0x14(r1) // restore non-volatile r31
1281 stw r11,THREAD_RECOVER(r9) // restore our error callback
1282 lwz r1,0(r1) // release stack frame
1283
1284 blr // y'all come back now
1285
1286 // Invalid argument handler.
1287 copypv_einval:
1288 li r3,EINVAL // invalid argument
1289 b copypv_return // return
1290
1291 // Error encountered during bcopy or bcopy_nc.
1292 copypv_error:
1293 mfmsr r3 // get current msr
1294 rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy
1295 mtmsrd r3 // leave 64-bit mode
1296 li r3,EFAULT // it was all his fault
1297 b copypv_return // return