]> git.saurik.com Git - apple/xnu.git/blame_incremental - osfmk/ppc/movc.s
xnu-792.tar.gz
[apple/xnu.git] / osfmk / ppc / movc.s
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * @OSF_COPYRIGHT@
24 */
25#include <debug.h>
26#include <ppc/asm.h>
27#include <ppc/proc_reg.h>
28#include <mach/ppc/vm_param.h>
29#include <assym.s>
30#include <sys/errno.h>
31
32#define INSTRUMENT 0
33
34//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
35/*
36 * void pmap_zero_page(vm_offset_t pa)
37 *
38 * Zero a page of physical memory. This routine runs in 32 or 64-bit mode,
39 * and handles 32 and 128-byte cache lines.
40 */
41
42
43 .align 5
44 .globl EXT(pmap_zero_page)
45
46LEXT(pmap_zero_page)
47
48 mflr r12 // save return address
49 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
50 mtlr r12 // restore return address
51 andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size
52
53 subfic r4,r9,PPC_PGBYTES // r4 <- starting offset in page
54
55 bt++ pf64Bitb,page0S4 // Go do the big guys...
56
57 slwi r3,r3,12 // get page address from page num
58 b page_zero_1 // Jump to line aligned loop...
59
60 .align 5
61
62 nop
63 nop
64 nop
65 nop
66 nop
67 nop
68 nop
69
70page0S4:
71 sldi r3,r3,12 // get page address from page num
72
73page_zero_1: // loop zeroing cache lines
74 sub. r5,r4,r9 // more to go?
75 dcbz128 r3,r4 // zero either 32 or 128 bytes
76 sub r4,r5,r9 // generate next offset
77 dcbz128 r3,r5
78 bne-- page_zero_1
79
80 b EXT(ml_restore) // restore MSR and do the isync
81
82
83//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
84/* void
85 * phys_copy(src, dst, bytecount)
86 * addr64_t src;
87 * addr64_t dst;
88 * int bytecount
89 *
90 * This routine will copy bytecount bytes from physical address src to physical
91 * address dst. It runs in 64-bit mode if necessary, but does not handle
92 * overlap or make any attempt to be optimal. Length must be a signed word.
93 * Not performance critical.
94 */
95
96
97 .align 5
98 .globl EXT(phys_copy)
99
100LEXT(phys_copy)
101
102 rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg
103 mflr r12 // get return address
104 rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits
105 rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg
106 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
107 rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits
108 mtlr r12 // restore return address
109 subic. r5,r7,4 // a word to copy?
110 b phys_copy_2
111
112 .align 5
113
114phys_copy_1: // loop copying words
115 subic. r5,r5,4 // more to go?
116 lwz r0,0(r3)
117 addi r3,r3,4
118 stw r0,0(r4)
119 addi r4,r4,4
120phys_copy_2:
121 bge phys_copy_1
122 addic. r5,r5,4 // restore count
123 ble phys_copy_4 // no more
124
125 // Loop is aligned here
126
127phys_copy_3: // loop copying bytes
128 subic. r5,r5,1 // more to go?
129 lbz r0,0(r3)
130 addi r3,r3,1
131 stb r0,0(r4)
132 addi r4,r4,1
133 bgt phys_copy_3
134phys_copy_4:
135 b EXT(ml_restore) // restore MSR and do the isync
136
137
138//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
139/* void
140 * pmap_copy_page(src, dst)
141 * ppnum_t src;
142 * ppnum_t dst;
143 *
144 * This routine will copy the physical page src to physical page dst
145 *
146 * This routine assumes that the src and dst are page numbers and that the
147 * destination is cached. It runs on 32 and 64 bit processors, with and
148 * without altivec, and with 32 and 128 byte cache lines.
149 * We also must assume that no-one will be executing within the destination
150 * page, and that this will be used for paging. Because this
151 * is a common routine, we have tuned loops for each processor class.
152 *
153 */
154#define kSFSize (FM_SIZE+160)
155
156ENTRY(pmap_copy_page, TAG_NO_FRAME_USED)
157
158 lis r2,hi16(MASK(MSR_VEC)) ; Get the vector flag
159 mflr r0 // get return
160 ori r2,r2,lo16(MASK(MSR_FP)) ; Add the FP flag
161 stw r0,8(r1) // save
162 stwu r1,-kSFSize(r1) // set up a stack frame for VRs or FPRs
163 mfmsr r11 // save MSR at entry
164 mfsprg r10,2 // get feature flags
165 andc r11,r11,r2 // Clear out vec and fp
166 ori r2,r2,lo16(MASK(MSR_EE)) // Get EE on also
167 andc r2,r11,r2 // Clear out EE as well
168 mtcrf 0x02,r10 // we need to test pf64Bit
169 ori r2,r2,MASK(MSR_FP) // must enable FP for G3...
170 mtcrf 0x80,r10 // we need to test pfAltivec too
171 oris r2,r2,hi16(MASK(MSR_VEC)) // enable altivec for G4 (ignored if G3)
172 mtmsr r2 // turn EE off, FP and VEC on
173 isync
174 bt++ pf64Bitb,pmap_copy_64 // skip if 64-bit processor (only they take hint)
175 slwi r3,r3,12 // get page address from page num
176 slwi r4,r4,12 // get page address from page num
177 rlwinm r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1 // get ready to turn off DR
178 bt pfAltivecb,pmap_copy_g4 // altivec but not 64-bit means G4
179
180
181 // G3 -- copy using FPRs
182
183 stfd f0,FM_SIZE+0(r1) // save the 4 FPRs we use to copy
184 stfd f1,FM_SIZE+8(r1)
185 li r5,PPC_PGBYTES/32 // count of cache lines in a page
186 stfd f2,FM_SIZE+16(r1)
187 mtctr r5
188 stfd f3,FM_SIZE+24(r1)
189 mtmsr r12 // turn off DR after saving FPRs on stack
190 isync
191
192pmap_g3_copy_loop: // loop over 32-byte cache lines
193 dcbz 0,r4 // avoid read of dest line
194 lfd f0,0(r3)
195 lfd f1,8(r3)
196 lfd f2,16(r3)
197 lfd f3,24(r3)
198 addi r3,r3,32
199 stfd f0,0(r4)
200 stfd f1,8(r4)
201 stfd f2,16(r4)
202 stfd f3,24(r4)
203 dcbst 0,r4 // flush dest line to RAM
204 addi r4,r4,32
205 bdnz pmap_g3_copy_loop
206
207 sync // wait for stores to take
208 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
209 li r6,PPC_PGBYTES-32 // point to last line in page
210pmap_g3_icache_flush:
211 subic. r5,r6,32 // more to go?
212 icbi r4,r6 // flush another line in icache
213 subi r6,r5,32 // get offset to next line
214 icbi r4,r5
215 bne pmap_g3_icache_flush
216
217 sync
218 mtmsr r2 // turn DR back on
219 isync
220 lfd f0,FM_SIZE+0(r1) // restore the FPRs
221 lfd f1,FM_SIZE+8(r1)
222 lfd f2,FM_SIZE+16(r1)
223 lfd f3,FM_SIZE+24(r1)
224
225 b pmap_g4_restore // restore MSR and done
226
227
228 // G4 -- copy using VRs
229
230pmap_copy_g4: // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR
231 la r9,FM_SIZE+16(r1) // place where we save VRs to r9
232 li r5,16 // load x-form offsets into r5-r9
233 li r6,32 // another offset
234 stvx v0,0,r9 // save some VRs so we can use to copy
235 li r7,48 // another offset
236 stvx v1,r5,r9
237 li r0,PPC_PGBYTES/64 // we loop over 64-byte chunks
238 stvx v2,r6,r9
239 mtctr r0
240 li r8,96 // get look-ahead for touch
241 stvx v3,r7,r9
242 li r9,128
243 mtmsr r12 // now we've saved VRs on stack, turn off DR
244 isync // wait for it to happen
245 b pmap_g4_copy_loop
246
247 .align 5 // align inner loops
248pmap_g4_copy_loop: // loop over 64-byte chunks
249 dcbt r3,r8 // touch 3 lines ahead
250 nop // avoid a 17-word loop...
251 dcbt r3,r9 // touch 4 lines ahead
252 nop // more padding
253 dcba 0,r4 // avoid pre-fetch of 1st dest line
254 lvx v0,0,r3 // offset 0
255 lvx v1,r5,r3 // offset 16
256 lvx v2,r6,r3 // offset 32
257 lvx v3,r7,r3 // offset 48
258 addi r3,r3,64
259 dcba r6,r4 // avoid pre-fetch of 2nd line
260 stvx v0,0,r4 // offset 0
261 stvx v1,r5,r4 // offset 16
262 stvx v2,r6,r4 // offset 32
263 stvx v3,r7,r4 // offset 48
264 dcbf 0,r4 // push line 1
265 dcbf r6,r4 // and line 2
266 addi r4,r4,64
267 bdnz pmap_g4_copy_loop
268
269 sync // wait for stores to take
270 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
271 li r8,PPC_PGBYTES-32 // point to last line in page
272pmap_g4_icache_flush:
273 subic. r9,r8,32 // more to go?
274 icbi r4,r8 // flush from icache
275 subi r8,r9,32 // get offset to next line
276 icbi r4,r9
277 bne pmap_g4_icache_flush
278
279 sync
280 mtmsr r2 // turn DR back on
281 isync
282 la r9,FM_SIZE+16(r1) // get base of VR save area
283 lvx v0,0,r9 // restore the VRs
284 lvx v1,r5,r9
285 lvx v2,r6,r9
286 lvx v3,r7,r9
287
288pmap_g4_restore: // r11=MSR
289 mtmsr r11 // turn EE on, VEC and FR off
290 isync // wait for it to happen
291 addi r1,r1,kSFSize // pop off our stack frame
292 lwz r0,8(r1) // restore return address
293 mtlr r0
294 blr
295
296
297 // 64-bit/128-byte processor: copy using VRs
298
299pmap_copy_64: // r10=features, r11=old MSR
300 sldi r3,r3,12 // get page address from page num
301 sldi r4,r4,12 // get page address from page num
302 la r9,FM_SIZE+16(r1) // get base of VR save area
303 li r5,16 // load x-form offsets into r5-r9
304 li r6,32 // another offset
305 bf pfAltivecb,pmap_novmx_copy // altivec suppressed...
306 stvx v0,0,r9 // save 8 VRs so we can copy wo bubbles
307 stvx v1,r5,r9
308 li r7,48 // another offset
309 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
310 stvx v2,r6,r9
311 stvx v3,r7,r9
312 addi r9,r9,64 // advance base ptr so we can store another 4
313 mtctr r0
314 li r0,MASK(MSR_DR) // get DR bit
315 stvx v4,0,r9
316 stvx v5,r5,r9
317 andc r12,r2,r0 // turn off DR bit
318 li r0,1 // get a 1 to slam into SF
319 stvx v6,r6,r9
320 stvx v7,r7,r9
321 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
322 li r8,-128 // offset so we can reach back one line
323 mtmsrd r12 // now we've saved VRs, turn DR off and SF on
324 isync // wait for it to happen
325 dcbt128 0,r3,1 // start a forward stream
326 b pmap_64_copy_loop
327
328 .align 5 // align inner loops
329pmap_64_copy_loop: // loop over 128-byte chunks
330 dcbz128 0,r4 // avoid read of destination line
331 lvx v0,0,r3 // offset 0
332 lvx v1,r5,r3 // offset 16
333 lvx v2,r6,r3 // offset 32
334 lvx v3,r7,r3 // offset 48
335 addi r3,r3,64 // don't have enough GPRs so add 64 2x
336 lvx v4,0,r3 // offset 64
337 lvx v5,r5,r3 // offset 80
338 lvx v6,r6,r3 // offset 96
339 lvx v7,r7,r3 // offset 112
340 addi r3,r3,64
341 stvx v0,0,r4 // offset 0
342 stvx v1,r5,r4 // offset 16
343 stvx v2,r6,r4 // offset 32
344 stvx v3,r7,r4 // offset 48
345 addi r4,r4,64
346 stvx v4,0,r4 // offset 64
347 stvx v5,r5,r4 // offset 80
348 stvx v6,r6,r4 // offset 96
349 stvx v7,r7,r4 // offset 112
350 addi r4,r4,64
351 dcbf r8,r4 // flush the line we just wrote
352 bdnz pmap_64_copy_loop
353
354 sync // wait for stores to take
355 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
356 li r8,PPC_PGBYTES-128 // point to last line in page
357pmap_64_icache_flush:
358 subic. r9,r8,128 // more to go?
359 icbi r4,r8 // flush from icache
360 subi r8,r9,128 // get offset to next line
361 icbi r4,r9
362 bne pmap_64_icache_flush
363
364 sync
365 mtmsrd r2 // turn DR back on, SF off
366 isync
367 la r9,FM_SIZE+16(r1) // get base address of VR save area on stack
368 lvx v0,0,r9 // restore the VRs
369 lvx v1,r5,r9
370 lvx v2,r6,r9
371 lvx v3,r7,r9
372 addi r9,r9,64
373 lvx v4,0,r9
374 lvx v5,r5,r9
375 lvx v6,r6,r9
376 lvx v7,r7,r9
377
378 b pmap_g4_restore // restore lower half of MSR and return
379
380 //
381 // Copy on 64-bit without VMX
382 //
383
384pmap_novmx_copy:
385 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
386 mtctr r0
387 li r0,MASK(MSR_DR) // get DR bit
388 andc r12,r2,r0 // turn off DR bit
389 li r0,1 // get a 1 to slam into SF
390 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
391 mtmsrd r12 // now we've saved VRs, turn DR off and SF on
392 isync // wait for it to happen
393 dcbt128 0,r3,1 // start a forward stream
394
395pmap_novmx_copy_loop: // loop over 128-byte cache lines
396 dcbz128 0,r4 // avoid read of dest line
397
398 ld r0,0(r3) // Load half a line
399 ld r12,8(r3)
400 ld r5,16(r3)
401 ld r6,24(r3)
402 ld r7,32(r3)
403 ld r8,40(r3)
404 ld r9,48(r3)
405 ld r10,56(r3)
406
407 std r0,0(r4) // Store half a line
408 std r12,8(r4)
409 std r5,16(r4)
410 std r6,24(r4)
411 std r7,32(r4)
412 std r8,40(r4)
413 std r9,48(r4)
414 std r10,56(r4)
415
416 ld r0,64(r3) // Load half a line
417 ld r12,72(r3)
418 ld r5,80(r3)
419 ld r6,88(r3)
420 ld r7,96(r3)
421 ld r8,104(r3)
422 ld r9,112(r3)
423 ld r10,120(r3)
424
425 addi r3,r3,128
426
427 std r0,64(r4) // Store half a line
428 std r12,72(r4)
429 std r5,80(r4)
430 std r6,88(r4)
431 std r7,96(r4)
432 std r8,104(r4)
433 std r9,112(r4)
434 std r10,120(r4)
435
436 dcbf 0,r4 // flush the line we just wrote
437 addi r4,r4,128
438 bdnz pmap_novmx_copy_loop
439
440 sync // wait for stores to take
441 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
442 li r8,PPC_PGBYTES-128 // point to last line in page
443
444pmap_novmx_icache_flush:
445 subic. r9,r8,128 // more to go?
446 icbi r4,r8 // flush from icache
447 subi r8,r9,128 // get offset to next line
448 icbi r4,r9
449 bne pmap_novmx_icache_flush
450
451 sync
452 mtmsrd r2 // turn DR back on, SF off
453 isync
454
455 b pmap_g4_restore // restore lower half of MSR and return
456
457
458
459//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
460
461// Stack frame format used by copyin, copyout, copyinstr and copyoutstr.
462// These routines all run both on 32 and 64-bit machines, though because they are called
463// by the BSD kernel they are always in 32-bit mode when entered. The mapped ptr returned
464// by MapUserMemoryWindow will be 64 bits however on 64-bit machines. Beware to avoid
465// using compare instructions on this ptr. This mapped ptr is kept globally in r31, so there
466// is no need to store or load it, which are mode-dependent operations since it could be
467// 32 or 64 bits.
468
469#define kkFrameSize (FM_SIZE+32)
470
471#define kkBufSize (FM_SIZE+0)
472#define kkCR3 (FM_SIZE+4)
473#define kkSource (FM_SIZE+8)
474#define kkDest (FM_SIZE+12)
475#define kkCountPtr (FM_SIZE+16)
476#define kkR31Save (FM_SIZE+20)
477#define kkThrErrJmp (FM_SIZE+24)
478
479
480// nonvolatile CR bits we use as flags in cr3
481
482#define kk64bit 12
483#define kkNull 13
484#define kkIn 14
485#define kkString 15
486#define kkZero 15
487
488
489//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
490/*
491 * int
492 * copyoutstr(src, dst, maxcount, count)
493 * vm_offset_t src; // r3
494 * addr64_t dst; // r4 and r5
495 * vm_size_t maxcount; // r6
496 * vm_size_t* count; // r7
497 *
498 * Set *count to the number of bytes copied.
499 */
500
501ENTRY(copyoutstr, TAG_NO_FRAME_USED)
502 mfcr r2,0x10 // save caller's cr3, which we use for flags
503 mr r10,r4 // move high word of 64-bit user address to r10
504 li r0,0
505 crset kkString // flag as a string op
506 mr r11,r5 // move low word of 64-bit user address to r11
507 stw r0,0(r7) // initialize #bytes moved
508 crclr kkIn // flag as copyout
509 b copyJoin
510
511
512//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
513/*
514 * int
515 * copyinstr(src, dst, maxcount, count)
516 * addr64_t src; // r3 and r4
517 * vm_offset_t dst; // r5
518 * vm_size_t maxcount; // r6
519 * vm_size_t* count; // r7
520 *
521 * Set *count to the number of bytes copied
522 * If dst == NULL, don't copy, just count bytes.
523 * Only currently called from klcopyinstr.
524 */
525
526ENTRY(copyinstr, TAG_NO_FRAME_USED)
527 mfcr r2,0x10 // save caller's cr3, which we use for flags
528 cmplwi r5,0 // dst==NULL?
529 mr r10,r3 // move high word of 64-bit user address to r10
530 li r0,0
531 crset kkString // flag as a string op
532 mr r11,r4 // move low word of 64-bit user address to r11
533 crmove kkNull,cr0_eq // remember if (dst==NULL)
534 stw r0,0(r7) // initialize #bytes moved
535 crset kkIn // flag as copyin (rather than copyout)
536 b copyJoin1 // skip over the "crclr kkNull"
537
538
539//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
540/*
541 * int
542 * copyout(src, dst, count)
543 * vm_offset_t src; // r3
544 * addr64_t dst; // r4 and r5
545 * size_t count; // r6
546 */
547
548 .align 5
549 .globl EXT(copyout)
550 .globl EXT(copyoutmsg)
551
552LEXT(copyout)
553LEXT(copyoutmsg)
554
555#if INSTRUMENT
556 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout
557 stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it
558 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
559 stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it
560 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
561 stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it
562 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
563 stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it
564#endif
565 mfcr r2,0x10 // save caller's cr3, which we use for flags
566 mr r10,r4 // move high word of 64-bit user address to r10
567 crclr kkString // not a string version
568 mr r11,r5 // move low word of 64-bit user address to r11
569 crclr kkIn // flag as copyout
570 b copyJoin
571
572
573//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
574/*
575 * int
576 * copyin(src, dst, count)
577 * addr64_t src; // r3 and r4
578 * vm_offset_t dst; // r5
579 * size_t count; // r6
580 */
581
582
583 .align 5
584 .globl EXT(copyin)
585 .globl EXT(copyinmsg)
586
587LEXT(copyin)
588LEXT(copyinmsg)
589
590 mfcr r2,0x10 // save caller's cr3, which we use for flags
591 mr r10,r3 // move high word of 64-bit user address to r10
592 crclr kkString // not a string version
593 mr r11,r4 // move low word of 64-bit user address to r11
594 crset kkIn // flag as copyin
595
596
597// Common code to handle setup for all the copy variants:
598// r2 = caller's cr3
599// r3 = source if copyout
600// r5 = dest if copyin
601// r6 = buffer length or count
602// r7 = count output ptr (if kkString set)
603// r10 = high word of 64-bit user-space address (source if copyin, dest if copyout)
604// r11 = low word of 64-bit user-space address
605// cr3 = kkIn, kkString, kkNull flags
606
607copyJoin:
608 crclr kkNull // (dst==NULL) convention not used with this call
609copyJoin1: // enter from copyinstr with kkNull set
610 mflr r0 // get return address
611 cmplwi r6,0 // buffer length 0?
612 lis r9,0x1000 // r9 <- 0x10000000 (256MB)
613 stw r0,FM_LR_SAVE(r1) // save return
614 cmplw cr1,r6,r9 // buffer length > 256MB ?
615 mfsprg r8,2 // get the features
616 beq-- copyinout_0 // 0 length is degenerate case
617 stwu r1,-kkFrameSize(r1) // set up stack frame
618 stw r2,kkCR3(r1) // save caller's cr3, which we use for flags
619 mtcrf 0x02,r8 // move pf64Bit to cr6
620 stw r3,kkSource(r1) // save args across MapUserMemoryWindow
621 stw r5,kkDest(r1)
622 stw r6,kkBufSize(r1)
623 crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor
624 stw r7,kkCountPtr(r1)
625 stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr
626 li r31,0 // no mapped ptr yet
627
628
629// Handle buffer length > 256MB. This is an error (ENAMETOOLONG) on copyin and copyout.
630// The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp
631// the buffer length to 256MB. This isn't an issue if the string is less than 256MB
632// (as most are!), but if they are >256MB we eventually return ENAMETOOLONG. This restriction
633// is due to MapUserMemoryWindow; we don't want to consume more than two segments for
634// the mapping.
635
636 ble++ cr1,copyin0 // skip if buffer length <= 256MB
637 bf kkString,copyinout_too_big // error if not string op
638 mr r6,r9 // silently clamp buffer length to 256MB
639 stw r9,kkBufSize(r1) // update saved copy too
640
641
642// Set up thread_recover in case we hit an illegal address.
643
644copyin0:
645 mfsprg r8,1 // Get the current thread
646 lis r2,hi16(copyinout_error)
647 ori r2,r2,lo16(copyinout_error)
648 lwz r4,THREAD_RECOVER(r8)
649 lwz r3,ACT_VMMAP(r8) // r3 <- vm_map virtual address
650 stw r2,THREAD_RECOVER(r8)
651 stw r4,kkThrErrJmp(r1)
652
653
654// Map user segment into kernel map, turn on 64-bit mode. At this point:
655// r3 = vm map
656// r6 = buffer length
657// r10/r11 = 64-bit user-space ptr (source if copyin, dest if copyout)
658//
659// When we call MapUserMemoryWindow, we pass:
660// r3 = vm map ptr
661// r4/r5 = 64-bit user space address as an addr64_t
662
663 mr r4,r10 // copy user ptr into r4/r5
664 mr r5,r11
665#if INSTRUMENT
666 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace
667 stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it
668 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
669 stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it
670 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
671 stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it
672 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
673 stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it
674#endif
675 bl EXT(MapUserMemoryWindow) // get r3/r4 <- 64-bit address in kernel map of user operand
676#if INSTRUMENT
677 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace
678 stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it
679 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
680 stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it
681 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
682 stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it
683 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
684 stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it
685#endif
686 mr r31,r4 // r31 <- mapped ptr into user space (may be 64-bit)
687 bf-- kk64bit,copyin1 // skip if a 32-bit processor
688
689 rldimi r31,r3,32,0 // slam high-order bits into mapped ptr
690 mfmsr r4 // if 64-bit, turn on SF so we can use returned ptr
691 li r0,1
692 rldimi r4,r0,63,MSR_SF_BIT // light bit 0
693 mtmsrd r4 // turn on 64-bit mode
694 isync // wait for mode to change
695
696
697// Load r3-r5, substituting mapped ptr as appropriate.
698
699copyin1:
700 lwz r5,kkBufSize(r1) // restore length to copy
701 bf kkIn,copyin2 // skip if copyout
702 lwz r4,kkDest(r1) // copyin: dest is kernel ptr
703 mr r3,r31 // source is mapped ptr
704 b copyin3
705copyin2: // handle copyout
706 lwz r3,kkSource(r1) // source is kernel buffer (r3 at entry)
707 mr r4,r31 // dest is mapped ptr into user space
708
709
710// Finally, all set up to copy:
711// r3 = source ptr (mapped if copyin)
712// r4 = dest ptr (mapped if copyout)
713// r5 = length
714// r31 = mapped ptr returned by MapUserMemoryWindow
715// cr3 = kkIn, kkString, kk64bit, and kkNull flags
716
717copyin3:
718 bt kkString,copyString // handle copyinstr and copyoutstr
719 bl EXT(bcopy) // copyin and copyout: let bcopy do the work
720 li r3,0 // return success
721
722
723// Main exit point for copyin, copyout, copyinstr, and copyoutstr. Also reached
724// from error recovery if we get a DSI accessing user space. Clear recovery ptr,
725// and pop off frame.
726// r3 = 0, EFAULT, or ENAMETOOLONG
727
728copyinx:
729 lwz r2,kkCR3(r1) // get callers cr3
730 mfsprg r6,1 // Get the current thread
731 bf-- kk64bit,copyinx1 // skip if 32-bit processor
732 mfmsr r12
733 rldicl r12,r12,0,MSR_SF_BIT+1 // if 64-bit processor, turn 64-bit mode off
734 mtmsrd r12 // turn SF off
735 isync // wait for the mode to change
736copyinx1:
737 lwz r0,FM_LR_SAVE+kkFrameSize(r1) // get return address
738 lwz r31,kkR31Save(r1) // restore callers r31
739 lwz r4,kkThrErrJmp(r1) // load saved thread recover
740 addi r1,r1,kkFrameSize // pop off our stack frame
741 mtlr r0
742 stw r4,THREAD_RECOVER(r6) // restore thread recover
743 mtcrf 0x10,r2 // restore cr3
744 blr
745
746
747/* We get here via the exception handler if an illegal
748 * user memory reference was made. This error handler is used by
749 * copyin, copyout, copyinstr, and copyoutstr. Registers are as
750 * they were at point of fault, so for example cr3 flags are valid.
751 */
752
753copyinout_error:
754 li r3,EFAULT // return error
755 b copyinx
756
757copyinout_0: // degenerate case: 0-length copy
758 mtcrf 0x10,r2 // restore cr3
759 li r3,0 // return success
760 blr
761
762copyinout_too_big: // degenerate case
763 mtcrf 0x10,r2 // restore cr3
764 lwz r1,0(r1) // pop off stack frame
765 li r3,ENAMETOOLONG
766 blr
767
768
769//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
770// Handle copyinstr and copyoutstr. At this point the stack frame is set up,
771// the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode
772// if necessary, and:
773// r3 = source ptr, mapped if copyinstr
774// r4 = dest ptr, mapped if copyoutstr
775// r5 = buffer length
776// r31 = mapped ptr returned by MapUserMemoryWindow
777// cr3 = kkIn, kkString, kkNull, and kk64bit flags
778// We do word copies unless the buffer is very short, then use a byte copy loop
779// for the leftovers if necessary. The crossover at which the word loop becomes
780// faster is about seven bytes, counting the zero.
781//
782// We first must word-align the source ptr, in order to avoid taking a spurious
783// page fault.
784
785copyString:
786 cmplwi cr1,r5,15 // is buffer very short?
787 mr r12,r3 // remember ptr to 1st source byte
788 mtctr r5 // assuming short, set up loop count for bytes
789 blt-- cr1,copyinstr8 // too short for word loop
790 rlwinm r2,r3,0,0x3 // get byte offset of 1st byte within word
791 rlwinm r9,r3,3,0x18 // get bit offset of 1st byte within word
792 li r7,-1
793 sub r3,r3,r2 // word-align source address
794 add r6,r5,r2 // get length starting at byte 0 in word
795 srw r7,r7,r9 // get mask for bytes in first word
796 srwi r0,r6,2 // get #words in buffer
797 lwz r5,0(r3) // get aligned word with first source byte
798 lis r10,hi16(0xFEFEFEFF) // load magic constants into r10 and r11
799 lis r11,hi16(0x80808080)
800 mtctr r0 // set up word loop count
801 addi r3,r3,4 // advance past the source word
802 ori r10,r10,lo16(0xFEFEFEFF)
803 ori r11,r11,lo16(0x80808080)
804 orc r8,r5,r7 // map bytes preceeding first source byte into 0xFF
805 bt-- kkNull,copyinstr5enter // enter loop that just counts
806
807// Special case 1st word, which has been 0xFF filled on left. Note that we use
808// "and.", even though we execute both in 32 and 64-bit mode. This is OK.
809
810 slw r5,r5,r9 // left justify payload bytes
811 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
812 andc r7,r11,r8 // r7 = ~data & 0x80808080
813 subfic r0,r2,4 // get r0 <- #payload bytes in 1st word
814 and. r7,r9,r7 // if r7==0, then all bytes in r8 are nonzero
815 stw r5,0(r4) // copy payload bytes to dest buffer
816 add r4,r4,r0 // then point to next byte in dest buffer
817 bdnzt cr0_eq,copyinstr6 // use loop that copies if 0 not found
818
819 b copyinstr7 // 0 found (buffer can't be full)
820
821
822// Word loop(s). They do a word-parallel search for 0s, using the following
823// inobvious but very efficient test:
824// y = data + 0xFEFEFEFF
825// z = ~data & 0x80808080
826// If (y & z)==0, then all bytes in dataword are nonzero. There are two copies
827// of this loop, one that just counts and another that copies.
828// r3 = ptr to next word of source (word aligned)
829// r4 = ptr to next byte in buffer
830// r6 = original buffer length (adjusted to be word origin)
831// r10 = 0xFEFEFEFE
832// r11 = 0x80808080
833// r12 = ptr to 1st source byte (used to determine string length)
834
835 .align 5 // align inner loops for speed
836copyinstr5: // version that counts but does not copy
837 lwz r8,0(r3) // get next word of source
838 addi r3,r3,4 // advance past it
839copyinstr5enter:
840 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
841 andc r7,r11,r8 // r7 = ~data & 0x80808080
842 and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode)
843 bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes in r8 are nonzero
844
845 b copyinstr7
846
847 .align 5 // align inner loops for speed
848copyinstr6: // version that counts and copies
849 lwz r8,0(r3) // get next word of source
850 addi r3,r3,4 // advance past it
851 addi r4,r4,4 // increment dest ptr while we wait for data
852 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
853 andc r7,r11,r8 // r7 = ~data & 0x80808080
854 and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode)
855 stw r8,-4(r4) // pack all 4 bytes into buffer
856 bdnzt cr0_eq,copyinstr6 // if r7==0, then all bytes are nonzero
857
858
859// Either 0 found or buffer filled. The above algorithm has mapped nonzero bytes to 0
860// and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also
861// mapped to 0x80. We must mask out these false hits before searching for an 0x80 byte.
862// r3 = word aligned ptr to next word of source (ie, r8==mem(r3-4))
863// r6 = original buffer length (adjusted to be word origin)
864// r7 = computed vector of 0x00 and 0x80 bytes
865// r8 = original source word, coming from -4(r3), possibly padded with 0xFFs on left if 1st word
866// r12 = ptr to 1st source byte (used to determine string length)
867// cr0 = beq set iff 0 not found
868
869copyinstr7:
870 rlwinm r2,r8,7,0,31 // move 0x01 bits to 0x80 position
871 rlwinm r6,r6,0,0x3 // mask down to partial byte count in last word
872 andc r7,r7,r2 // turn off false hits from 0x0100 worst case
873 crnot kkZero,cr0_eq // 0 found iff cr0_eq is off
874 srwi r7,r7,8 // we want to count the 0 as a byte xferred
875 cmpwi r6,0 // any bytes left over in last word?
876 cntlzw r7,r7 // now we can find the 0 byte (ie, the 0x80)
877 subi r3,r3,4 // back up r3 to point to 1st byte in r8
878 srwi r7,r7,3 // convert 8,16,24,32 to 1,2,3,4
879 add r3,r3,r7 // now r3 points one past 0 byte, or at 1st byte not xferred
880 bt++ kkZero,copyinstr10 // 0 found, so done
881
882 beq copyinstr10 // r6==0, so buffer truly full
883 mtctr r6 // 0 not found, loop over r6 bytes
884 b copyinstr8 // enter byte loop for last 1-3 leftover bytes
885
886
887// Byte loop. This is used for very small buffers and for the odd bytes left over
888// after searching and copying words at a time.
889// r3 = ptr to next byte of source
890// r4 = ptr to next dest byte
891// r12 = ptr to first byte of source
892// ctr = count of bytes to check
893
894 .align 5 // align inner loops for speed
895copyinstr8: // loop over bytes of source
896 lbz r0,0(r3) // get next byte of source
897 addi r3,r3,1
898 addi r4,r4,1 // increment dest addr whether we store or not
899 cmpwi r0,0 // the 0?
900 bt-- kkNull,copyinstr9 // don't store if copyinstr with NULL ptr
901 stb r0,-1(r4)
902copyinstr9:
903 bdnzf cr0_eq,copyinstr8 // loop if byte not 0 and more room in buffer
904
905 crmove kkZero,cr0_eq // remember if 0 found or buffer filled
906
907
908// Buffer filled or 0 found. Unwind and return.
909// r3 = ptr to 1st source byte not transferred
910// r12 = ptr to 1st source byte
911// r31 = mapped ptr returned by MapUserMemoryWindow
912// cr3 = kkZero set iff 0 found
913
914copyinstr10:
915 lwz r9,kkCountPtr(r1) // get ptr to place to store count of bytes moved
916 sub r2,r3,r12 // compute #bytes copied (including the 0)
917 li r3,0 // assume success return status
918 stw r2,0(r9) // store #bytes moved
919 bt++ kkZero,copyinx // we did find the 0 so return 0
920 li r3,ENAMETOOLONG // buffer filled
921 b copyinx // join main exit routine
922
923//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
924/*
925 * int
926 * copypv(source, sink, size, which)
927 * addr64_t src; // r3 and r4
928 * addr64_t dst; // r5 and r6
929 * size_t size; // r7
930 * int which; // r8
931 *
932 * Operand size bytes are copied from operand src into operand dst. The source and
933 * destination operand addresses are given as addr64_t, and may designate starting
934 * locations in physical or virtual memory in any combination except where both are
935 * virtual. Virtual memory locations may be in either the kernel or the current thread's
936 * address space. Operand size may be up to 256MB.
937 *
938 * Operation is controlled by operand which, which offers these options:
939 * cppvPsrc : source operand is (1) physical or (0) virtual
940 * cppvPsnk : destination operand is (1) physical or (0) virtual
941 * cppvKmap : virtual operand is in (1) kernel or (0) current thread
942 * cppvFsnk : (1) flush destination before and after transfer
943 * cppvFsrc : (1) flush source before and after transfer
944 * cppvNoModSnk : (1) don't set source operand's changed bit(s)
945 * cppvNoRefSrc : (1) don't set destination operand's referenced bit(s)
946 *
947 * Implementation is now split into this new 64-bit path and the old path, hw_copypv_32().
948 * This section describes the operation of the new 64-bit path.
949 *
950 * The 64-bit path utilizes the more capacious 64-bit kernel address space to create a
951 * window in the kernel address space into all of physical RAM plus the I/O hole. Since
952 * the window's mappings specify the proper access policies for the underlying memory,
953 * the new path does not have to flush caches to avoid a cache paradox, so cppvFsnk
954 * and cppvFsrc are ignored. Physical operand adresses are relocated into the physical
955 * memory window, and are accessed with data relocation on. Virtual addresses are either
956 * within the kernel, or are mapped into the kernel address space through the user memory
957 * window. Because accesses to a virtual operand are performed with data relocation on,
958 * the new path does not have to translate the address, disable/enable interrupts, lock
959 * the mapping, or update referenced and changed bits.
960 *
961 * The IBM 970 (a.k.a. G5) processor treats real-mode accesses as guarded, so there is
962 * a substantial performance penalty for copypv operating in real mode. Utilizing the
963 * new 64-bit path, transfer performance increases >100% on the G5.
964 *
965 * The attentive reader may notice that mtmsrd ops are not followed by isync ops as
966 * might be expected. The 970 follows PowerPC architecture version 2.01, which defines
967 * mtmsrd with L=0 as a context synchronizing op, so a following isync is no longer
968 * required.
969 *
970 * To keep things exciting, we develop 64-bit values in non-volatiles, but we also need
971 * to call 32-bit functions, which would lead to the high-order 32 bits of our values
972 * getting clobbered unless we do something special. So, we preserve our 64-bit non-volatiles
973 * in our own stack frame across calls to 32-bit functions.
974 *
975 */
976
977// Map operand which bits into non-volatile CR2 and CR3 bits.
978#define whichAlign ((3+1)*4)
979#define whichMask 0x007F0000
980#define pvPsnk (cppvPsnkb - whichAlign)
981#define pvPsrc (cppvPsrcb - whichAlign)
982#define pvFsnk (cppvFsnkb - whichAlign)
983#define pvFsrc (cppvFsrcb - whichAlign)
984#define pvNoModSnk (cppvNoModSnkb - whichAlign)
985#define pvNoRefSrc (cppvNoRefSrcb - whichAlign)
986#define pvKmap (cppvKmapb - whichAlign)
987#define pvNoCache cr2_lt
988
989 .align 5
990 .globl EXT(copypv)
991
992LEXT(copypv)
993 mfsprg r10,2 // get feature flags
994 mtcrf 0x02,r10 // we need to test pf64Bit
995 bt++ pf64Bitb,copypv_64 // skip if 64-bit processor (only they take hint)
996
997 b EXT(hw_copypv_32) // carry on with 32-bit copypv
998
999// Push a 32-bit ABI-compliant stack frame and preserve all non-volatiles that we'll clobber.
1000copypv_64:
1001 mfsprg r9,1 // get current thread
1002 stwu r1,-(FM_ALIGN((31-26+11)*4)+FM_SIZE)(r1)
1003 // allocate stack frame and link it
1004 mflr r0 // get return address
1005 mfcr r10 // get cr2 and cr3
1006 lwz r12,THREAD_RECOVER(r9) // get error callback
1007 stw r26,FM_ARG0+0x00(r1) // save non-volatile r26
1008 stw r27,FM_ARG0+0x04(r1) // save non-volatile r27
1009 stw r28,FM_ARG0+0x08(r1) // save non-volatile r28
1010 stw r29,FM_ARG0+0x0C(r1) // save non-volatile r29
1011 stw r30,FM_ARG0+0x10(r1) // save non-volatile r30
1012 stw r31,FM_ARG0+0x14(r1) // save non-volatile r31
1013 stw r12,FM_ARG0+0x20(r1) // save error callback
1014 stw r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
1015 // save return address
1016 stw r10,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
1017 // save non-volatile cr2 and cr3
1018
1019// Non-volatile register usage in this routine is:
1020// r26: saved msr image
1021// r27: current pmap_t / virtual source address
1022// r28: destination virtual address
1023// r29: source address
1024// r30: destination address
1025// r31: byte count to copy
1026// cr2/3: parameter 'which' bits
1027
1028 rlwinm r8,r8,whichAlign,whichMask // align and mask which bits
1029 mr r31,r7 // copy size to somewhere non-volatile
1030 mtcrf 0x20,r8 // insert which bits into cr2 and cr3
1031 mtcrf 0x10,r8 // insert which bits into cr2 and cr3
1032 rlwinm r29,r3,0,1,0 // form source address high-order bits
1033 rlwinm r30,r5,0,1,0 // form destination address high-order bits
1034 rlwimi r29,r4,0,0,31 // form source address low-order bits
1035 rlwimi r30,r6,0,0,31 // form destination address low-order bits
1036 crand cr7_lt,pvPsnk,pvPsrc // are both operand addresses physical?
1037 cntlzw r0,r31 // count leading zeroes in byte count
1038 cror cr7_eq,pvPsnk,pvPsrc // cr7_eq <- source or destination is physical
1039 bf-- cr7_eq,copypv_einval // both operands may not be virtual
1040 cmplwi r0,4 // byte count greater than or equal 256M (2**28)?
1041 blt-- copypv_einval // byte count too big, give EINVAL
1042 cmplwi r31,0 // byte count zero?
1043 beq-- copypv_zero // early out
1044 bt cr7_lt,copypv_phys // both operand addresses are physical
1045 mr r28,r30 // assume destination is virtual
1046 bf pvPsnk,copypv_dv // is destination virtual?
1047 mr r28,r29 // no, so source must be virtual
1048copypv_dv:
1049 lis r27,ha16(EXT(kernel_pmap)) // get kernel's pmap_t *, high-order
1050 lwz r27,lo16(EXT(kernel_pmap))(r27) // get kernel's pmap_t
1051 bt pvKmap,copypv_kern // virtual address in kernel map?
1052 lwz r3,ACT_VMMAP(r9) // get user's vm_map *
1053 rldicl r4,r28,32,32 // r4, r5 <- addr64_t virtual address
1054 rldicl r5,r28,0,32
1055 std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit call
1056 std r30,FM_ARG0+0x38(r1) // preserve 64-bit r30 across 32-bit call
1057 bl EXT(MapUserMemoryWindow) // map slice of user space into kernel space
1058 ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29
1059 ld r30,FM_ARG0+0x38(r1) // restore 64-bit r30
1060 rlwinm r28,r3,0,1,0 // convert relocated addr64_t virtual address
1061 rlwimi r28,r4,0,0,31 // into a single 64-bit scalar
1062copypv_kern:
1063
1064// Since we'll be accessing the virtual operand with data-relocation on, we won't need to
1065// update the referenced and changed bits manually after the copy. So, force the appropriate
1066// flag bit on for the virtual operand.
1067 crorc pvNoModSnk,pvNoModSnk,pvPsnk // for virtual dest, let hardware do ref/chg bits
1068 crorc pvNoRefSrc,pvNoRefSrc,pvPsrc // for virtual source, let hardware do ref bit
1069
1070// We'll be finding a mapping and looking at, so we need to disable 'rupts.
1071 lis r0,hi16(MASK(MSR_VEC)) // get vector mask
1072 ori r0,r0,lo16(MASK(MSR_FP)) // insert fp mask
1073 mfmsr r26 // save current msr
1074 andc r26,r26,r0 // turn off VEC and FP in saved copy
1075 ori r0,r0,lo16(MASK(MSR_EE)) // add EE to our mask
1076 andc r0,r26,r0 // disable EE in our new msr image
1077 mtmsrd r0 // introduce new msr image
1078
1079// We're now holding the virtual operand's pmap_t in r27 and its virtual address in r28. We now
1080// try to find a mapping corresponding to this address in order to determine whether the address
1081// is cacheable. If we don't find a mapping, we can safely assume that the operand is cacheable
1082// (a non-cacheable operand must be a block mapping, which will always exist); otherwise, we
1083// examine the mapping's caching-inhibited bit.
1084 mr r3,r27 // r3 <- pmap_t pmap
1085 rldicl r4,r28,32,32 // r4, r5 <- addr64_t va
1086 rldicl r5,r28,0,32
1087 la r6,FM_ARG0+0x18(r1) // r6 <- addr64_t *nextva
1088 li r7,1 // r7 <- int full, search nested mappings
1089 std r26,FM_ARG0+0x28(r1) // preserve 64-bit r26 across 32-bit calls
1090 std r28,FM_ARG0+0x30(r1) // preserve 64-bit r28 across 32-bit calls
1091 std r29,FM_ARG0+0x38(r1) // preserve 64-bit r29 across 32-bit calls
1092 std r30,FM_ARG0+0x40(r1) // preserve 64-bit r30 across 32-bit calls
1093 bl EXT(mapping_find) // find mapping for virtual operand
1094 mr. r3,r3 // did we find it?
1095 beq copypv_nomapping // nope, so we'll assume it's cacheable
1096 lwz r4,mpVAddr+4(r3) // get low half of virtual addr for hw flags
1097 rlwinm. r4,r4,0,mpIb-32,mpIb-32 // caching-inhibited bit set?
1098 crnot pvNoCache,cr0_eq // if it is, use bcopy_nc
1099 bl EXT(mapping_drop_busy) // drop busy on the mapping
1100copypv_nomapping:
1101 ld r26,FM_ARG0+0x28(r1) // restore 64-bit r26
1102 ld r28,FM_ARG0+0x30(r1) // restore 64-bit r28
1103 ld r29,FM_ARG0+0x38(r1) // restore 64-bit r29
1104 ld r30,FM_ARG0+0x40(r1) // restore 64-bit r30
1105 mtmsrd r26 // restore msr to it's previous state
1106
1107// Set both the source and destination virtual addresses to the virtual operand's address --
1108// we'll overlay one of them with the physical operand's address.
1109 mr r27,r28 // make virtual operand BOTH source AND destination
1110
1111// Now we're ready to relocate the physical operand address(es) into the physical memory window.
1112// Recall that we've mapped physical memory (including the I/O hole) into the kernel's address
1113// space somewhere at or over the 2**32 line. If one or both of the operands are in the I/O hole,
1114// we'll set the pvNoCache flag, forcing use of non-caching bcopy_nc() to do the copy.
1115copypv_phys:
1116 ld r6,lgPMWvaddr(0) // get physical memory window virtual address
1117 bf pvPsnk,copypv_dstvirt // is destination address virtual?
1118 cntlzd r4,r30 // count leading zeros in destination address
1119 cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
1120 cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations
1121 add r28,r30,r6 // relocate physical destination into physical window
1122copypv_dstvirt:
1123 bf pvPsrc,copypv_srcvirt // is source address virtual?
1124 cntlzd r4,r29 // count leading zeros in source address
1125 cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
1126 cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations
1127 add r27,r29,r6 // relocate physical source into physical window
1128copypv_srcvirt:
1129
1130// Once the copy is under way (bcopy or bcopy_nc), we will want to get control if anything
1131// funny happens during the copy. So, we set a pointer to our error handler in the per-thread
1132// control block.
1133 mfsprg r8,1 // get current threads stuff
1134 lis r3,hi16(copypv_error) // get our error callback's address, high
1135 ori r3,r3,lo16(copypv_error) // get our error callback's address, low
1136 stw r3,THREAD_RECOVER(r8) // set our error callback
1137
1138// Since our physical operand(s) are relocated at or above the 2**32 line, we must enter
1139// 64-bit mode.
1140 li r0,1 // get a handy one bit
1141 mfmsr r3 // get current msr
1142 rldimi r3,r0,63,MSR_SF_BIT // set SF bit on in our msr copy
1143 mtmsrd r3 // enter 64-bit mode
1144
1145// If requested, flush data cache
1146// Note that we don't flush, the code is being saved "just in case".
1147#if 0
1148 bf pvFsrc,copypv_nfs // do we flush the source?
1149 rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address
1150 rldicl r4,r27,0,32
1151 mr r5,r31 // r5 <- count (in bytes)
1152 li r6,0 // r6 <- boolean phys (false, not physical)
1153 bl EXT(flush_dcache) // flush the source operand
1154copypv_nfs:
1155 bf pvFsnk,copypv_nfdx // do we flush the destination?
1156 rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address
1157 rldicl r4,r28,0,32
1158 mr r5,r31 // r5 <- count (in bytes)
1159 li r6,0 // r6 <- boolean phys (false, not physical)
1160 bl EXT(flush_dcache) // flush the destination operand
1161copypv_nfdx:
1162#endif
1163
1164// Call bcopy or bcopy_nc to perform the copy.
1165 mr r3,r27 // r3 <- source virtual address
1166 mr r4,r28 // r4 <- destination virtual address
1167 mr r5,r31 // r5 <- bytes to copy
1168 bt pvNoCache,copypv_nc // take non-caching route
1169 bl EXT(bcopy) // call bcopy to do the copying
1170 b copypv_copydone
1171copypv_nc:
1172 bl EXT(bcopy_nc) // call bcopy_nc to do the copying
1173copypv_copydone:
1174
1175// If requested, flush data cache
1176// Note that we don't flush, the code is being saved "just in case".
1177#if 0
1178 bf pvFsrc,copypv_nfsx // do we flush the source?
1179 rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address
1180 rldicl r4,r27,0,32
1181 mr r5,r31 // r5 <- count (in bytes)
1182 li r6,0 // r6 <- boolean phys (false, not physical)
1183 bl EXT(flush_dcache) // flush the source operand
1184copypv_nfsx:
1185 bf pvFsnk,copypv_nfd // do we flush the destination?
1186 rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address
1187 rldicl r4,r28,0,32
1188 mr r5,r31 // r5 <- count (in bytes)
1189 li r6,0 // r6 <- boolean phys (false, not physical)
1190 bl EXT(flush_dcache) // flush the destination operand
1191copypv_nfd:
1192#endif
1193
1194// Leave 64-bit mode.
1195 mfmsr r3 // get current msr
1196 rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy
1197 mtmsrd r3 // leave 64-bit mode
1198
1199// If requested, set ref/chg on source/dest physical operand(s). It is possible that copy is
1200// from/to a RAM disk situated outside of mapped physical RAM, so we check each page by calling
1201// mapping_phys_lookup() before we try to set its ref/chg bits; otherwise, we might panic.
1202// Note that this code is page-size sensitive, so it should probably be a part of our low-level
1203// code in hw_vm.s.
1204 bt pvNoModSnk,copypv_nomod // skip destination update if not requested
1205 std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit calls
1206 li r26,1 // r26 <- 4K-page count
1207 mr r27,r31 // r27 <- byte count
1208 rlwinm r3,r30,0,20,31 // does destination cross a page boundary?
1209 subfic r3,r3,4096 //
1210 cmplw r3,r27 //
1211 blt copypv_modnox // skip if not crossing case
1212 subf r27,r3,r27 // r27 <- byte count less initial fragment
1213 addi r26,r26,1 // increment page count
1214copypv_modnox:
1215 srdi r3,r27,12 // pages to update (not including crosser)
1216 add r26,r26,r3 // add in crosser
1217 srdi r27,r30,12 // r27 <- destination page number
1218copypv_modloop:
1219 mr r3,r27 // r3 <- destination page number
1220 la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex
1221 bl EXT(mapping_phys_lookup) // see if page is really there
1222 mr. r3,r3 // is it?
1223 beq-- copypv_modend // nope, break out of modify loop
1224 mr r3,r27 // r3 <- destination page number
1225 bl EXT(mapping_set_mod) // set page changed status
1226 subi r26,r26,1 // decrement page count
1227 cmpwi r26,0 // done yet?
1228 bgt copypv_modloop // nope, iterate
1229copypv_modend:
1230 ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29
1231copypv_nomod:
1232 bt pvNoRefSrc,copypv_done // skip source update if not requested
1233copypv_debugref:
1234 li r26,1 // r26 <- 4K-page count
1235 mr r27,r31 // r27 <- byte count
1236 rlwinm r3,r29,0,20,31 // does source cross a page boundary?
1237 subfic r3,r3,4096 //
1238 cmplw r3,r27 //
1239 blt copypv_refnox // skip if not crossing case
1240 subf r27,r3,r27 // r27 <- byte count less initial fragment
1241 addi r26,r26,1 // increment page count
1242copypv_refnox:
1243 srdi r3,r27,12 // pages to update (not including crosser)
1244 add r26,r26,r3 // add in crosser
1245 srdi r27,r29,12 // r27 <- source page number
1246copypv_refloop:
1247 mr r3,r27 // r3 <- source page number
1248 la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex
1249 bl EXT(mapping_phys_lookup) // see if page is really there
1250 mr. r3,r3 // is it?
1251 beq-- copypv_done // nope, break out of modify loop
1252 mr r3,r27 // r3 <- source page number
1253 bl EXT(mapping_set_ref) // set page referenced status
1254 subi r26,r26,1 // decrement page count
1255 cmpwi r26,0 // done yet?
1256 bgt copypv_refloop // nope, iterate
1257
1258// Return, indicating success.
1259copypv_done:
1260copypv_zero:
1261 li r3,0 // our efforts were crowned with success
1262
1263// Pop frame, restore caller's non-volatiles, clear recovery routine pointer.
1264copypv_return:
1265 mfsprg r9,1 // get current threads stuff
1266 lwz r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
1267 // get return address
1268 lwz r4,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
1269 // get non-volatile cr2 and cr3
1270 lwz r26,FM_ARG0+0x00(r1) // restore non-volatile r26
1271 lwz r27,FM_ARG0+0x04(r1) // restore non-volatile r27
1272 mtlr r0 // restore return address
1273 lwz r28,FM_ARG0+0x08(r1) // restore non-volatile r28
1274 mtcrf 0x20,r4 // restore non-volatile cr2
1275 mtcrf 0x10,r4 // restore non-volatile cr3
1276 lwz r11,FM_ARG0+0x20(r1) // save error callback
1277 lwz r29,FM_ARG0+0x0C(r1) // restore non-volatile r29
1278 lwz r30,FM_ARG0+0x10(r1) // restore non-volatile r30
1279 lwz r31,FM_ARG0+0x14(r1) // restore non-volatile r31
1280 stw r11,THREAD_RECOVER(r9) // restore our error callback
1281 lwz r1,0(r1) // release stack frame
1282
1283 blr // y'all come back now
1284
1285// Invalid argument handler.
1286copypv_einval:
1287 li r3,EINVAL // invalid argument
1288 b copypv_return // return
1289
1290// Error encountered during bcopy or bcopy_nc.
1291copypv_error:
1292 mfmsr r3 // get current msr
1293 rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy
1294 mtmsrd r3 // leave 64-bit mode
1295 li r3,EFAULT // it was all his fault
1296 b copypv_return // return