]> git.saurik.com Git - apple/xnu.git/blame_incremental - osfmk/ppc/movc.s
xnu-1228.12.14.tar.gz
[apple/xnu.git] / osfmk / ppc / movc.s
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31#include <debug.h>
32#include <ppc/asm.h>
33#include <ppc/proc_reg.h>
34#include <mach/ppc/vm_param.h>
35#include <assym.s>
36#include <sys/errno.h>
37
38#define INSTRUMENT 0
39
40//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
41/*
42 * void pmap_zero_page(vm_offset_t pa)
43 *
44 * Zero a page of physical memory. This routine runs in 32 or 64-bit mode,
45 * and handles 32 and 128-byte cache lines.
46 */
47
48
49 .align 5
50 .globl EXT(pmap_zero_page)
51
52LEXT(pmap_zero_page)
53
54 mflr r12 // save return address
55 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
56 mtlr r12 // restore return address
57 andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size
58
59 subfic r4,r9,PPC_PGBYTES // r4 <- starting offset in page
60
61 bt++ pf64Bitb,page0S4 // Go do the big guys...
62
63 slwi r3,r3,12 // get page address from page num
64 b page_zero_1 // Jump to line aligned loop...
65
66 .align 5
67
68 nop
69 nop
70 nop
71 nop
72 nop
73 nop
74 nop
75
76page0S4:
77 sldi r3,r3,12 // get page address from page num
78
79page_zero_1: // loop zeroing cache lines
80 sub. r5,r4,r9 // more to go?
81 dcbz128 r3,r4 // zero either 32 or 128 bytes
82 sub r4,r5,r9 // generate next offset
83 dcbz128 r3,r5
84 bne-- page_zero_1
85
86 b EXT(ml_restore) // restore MSR and do the isync
87
88
89//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
90/* void
91 * phys_copy(src, dst, bytecount)
92 * addr64_t src;
93 * addr64_t dst;
94 * int bytecount
95 *
96 * This routine will copy bytecount bytes from physical address src to physical
97 * address dst. It runs in 64-bit mode if necessary, but does not handle
98 * overlap or make any attempt to be optimal. Length must be a signed word.
99 * Not performance critical.
100 */
101
102
103 .align 5
104 .globl EXT(phys_copy)
105
106LEXT(phys_copy)
107
108 rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg
109 mflr r12 // get return address
110 rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits
111 rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg
112 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
113 rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits
114 mtlr r12 // restore return address
115 subic. r5,r7,4 // a word to copy?
116 b phys_copy_2
117
118 .align 5
119
120phys_copy_1: // loop copying words
121 subic. r5,r5,4 // more to go?
122 lwz r0,0(r3)
123 addi r3,r3,4
124 stw r0,0(r4)
125 addi r4,r4,4
126phys_copy_2:
127 bge phys_copy_1
128 addic. r5,r5,4 // restore count
129 ble phys_copy_4 // no more
130
131 // Loop is aligned here
132
133phys_copy_3: // loop copying bytes
134 subic. r5,r5,1 // more to go?
135 lbz r0,0(r3)
136 addi r3,r3,1
137 stb r0,0(r4)
138 addi r4,r4,1
139 bgt phys_copy_3
140phys_copy_4:
141 b EXT(ml_restore) // restore MSR and do the isync
142
143
144//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
145/* void
146 * pmap_copy_page(src, dst)
147 * ppnum_t src;
148 * ppnum_t dst;
149 *
150 * This routine will copy the physical page src to physical page dst
151 *
152 * This routine assumes that the src and dst are page numbers and that the
153 * destination is cached. It runs on 32 and 64 bit processors, with and
154 * without altivec, and with 32 and 128 byte cache lines.
155 * We also must assume that no-one will be executing within the destination
156 * page, and that this will be used for paging. Because this
157 * is a common routine, we have tuned loops for each processor class.
158 *
159 */
160#define kSFSize (FM_SIZE+160)
161
162ENTRY(pmap_copy_page, TAG_NO_FRAME_USED)
163
164 lis r2,hi16(MASK(MSR_VEC)) ; Get the vector flag
165 mflr r0 // get return
166 ori r2,r2,lo16(MASK(MSR_FP)) ; Add the FP flag
167 stw r0,8(r1) // save
168 stwu r1,-kSFSize(r1) // set up a stack frame for VRs or FPRs
169 mfmsr r11 // save MSR at entry
170 mfsprg r10,2 // get feature flags
171 andc r11,r11,r2 // Clear out vec and fp
172 ori r2,r2,lo16(MASK(MSR_EE)) // Get EE on also
173 andc r2,r11,r2 // Clear out EE as well
174 mtcrf 0x02,r10 // we need to test pf64Bit
175 ori r2,r2,MASK(MSR_FP) // must enable FP for G3...
176 mtcrf 0x80,r10 // we need to test pfAltivec too
177 oris r2,r2,hi16(MASK(MSR_VEC)) // enable altivec for G4 (ignored if G3)
178 mtmsr r2 // turn EE off, FP and VEC on
179 isync
180 bt++ pf64Bitb,pmap_copy_64 // skip if 64-bit processor (only they take hint)
181 slwi r3,r3,12 // get page address from page num
182 slwi r4,r4,12 // get page address from page num
183 rlwinm r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1 // get ready to turn off DR
184 bt pfAltivecb,pmap_copy_g4 // altivec but not 64-bit means G4
185
186
187 // G3 -- copy using FPRs
188
189 stfd f0,FM_SIZE+0(r1) // save the 4 FPRs we use to copy
190 stfd f1,FM_SIZE+8(r1)
191 li r5,PPC_PGBYTES/32 // count of cache lines in a page
192 stfd f2,FM_SIZE+16(r1)
193 mtctr r5
194 stfd f3,FM_SIZE+24(r1)
195 mtmsr r12 // turn off DR after saving FPRs on stack
196 isync
197
198pmap_g3_copy_loop: // loop over 32-byte cache lines
199 dcbz 0,r4 // avoid read of dest line
200 lfd f0,0(r3)
201 lfd f1,8(r3)
202 lfd f2,16(r3)
203 lfd f3,24(r3)
204 addi r3,r3,32
205 stfd f0,0(r4)
206 stfd f1,8(r4)
207 stfd f2,16(r4)
208 stfd f3,24(r4)
209 dcbst 0,r4 // flush dest line to RAM
210 addi r4,r4,32
211 bdnz pmap_g3_copy_loop
212
213 sync // wait for stores to take
214 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
215 li r6,PPC_PGBYTES-32 // point to last line in page
216pmap_g3_icache_flush:
217 subic. r5,r6,32 // more to go?
218 icbi r4,r6 // flush another line in icache
219 subi r6,r5,32 // get offset to next line
220 icbi r4,r5
221 bne pmap_g3_icache_flush
222
223 sync
224 mtmsr r2 // turn DR back on
225 isync
226 lfd f0,FM_SIZE+0(r1) // restore the FPRs
227 lfd f1,FM_SIZE+8(r1)
228 lfd f2,FM_SIZE+16(r1)
229 lfd f3,FM_SIZE+24(r1)
230
231 b pmap_g4_restore // restore MSR and done
232
233
234 // G4 -- copy using VRs
235
236pmap_copy_g4: // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR
237 la r9,FM_SIZE+16(r1) // place where we save VRs to r9
238 li r5,16 // load x-form offsets into r5-r9
239 li r6,32 // another offset
240 stvx v0,0,r9 // save some VRs so we can use to copy
241 li r7,48 // another offset
242 stvx v1,r5,r9
243 li r0,PPC_PGBYTES/64 // we loop over 64-byte chunks
244 stvx v2,r6,r9
245 mtctr r0
246 li r8,96 // get look-ahead for touch
247 stvx v3,r7,r9
248 li r9,128
249 mtmsr r12 // now we've saved VRs on stack, turn off DR
250 isync // wait for it to happen
251 b pmap_g4_copy_loop
252
253 .align 5 // align inner loops
254pmap_g4_copy_loop: // loop over 64-byte chunks
255 dcbt r3,r8 // touch 3 lines ahead
256 nop // avoid a 17-word loop...
257 dcbt r3,r9 // touch 4 lines ahead
258 nop // more padding
259 dcba 0,r4 // avoid pre-fetch of 1st dest line
260 lvx v0,0,r3 // offset 0
261 lvx v1,r5,r3 // offset 16
262 lvx v2,r6,r3 // offset 32
263 lvx v3,r7,r3 // offset 48
264 addi r3,r3,64
265 dcba r6,r4 // avoid pre-fetch of 2nd line
266 stvx v0,0,r4 // offset 0
267 stvx v1,r5,r4 // offset 16
268 stvx v2,r6,r4 // offset 32
269 stvx v3,r7,r4 // offset 48
270 dcbf 0,r4 // push line 1
271 dcbf r6,r4 // and line 2
272 addi r4,r4,64
273 bdnz pmap_g4_copy_loop
274
275 sync // wait for stores to take
276 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
277 li r8,PPC_PGBYTES-32 // point to last line in page
278pmap_g4_icache_flush:
279 subic. r9,r8,32 // more to go?
280 icbi r4,r8 // flush from icache
281 subi r8,r9,32 // get offset to next line
282 icbi r4,r9
283 bne pmap_g4_icache_flush
284
285 sync
286 mtmsr r2 // turn DR back on
287 isync
288 la r9,FM_SIZE+16(r1) // get base of VR save area
289 lvx v0,0,r9 // restore the VRs
290 lvx v1,r5,r9
291 lvx v2,r6,r9
292 lvx v3,r7,r9
293
294pmap_g4_restore: // r11=MSR
295 mtmsr r11 // turn EE on, VEC and FR off
296 isync // wait for it to happen
297 addi r1,r1,kSFSize // pop off our stack frame
298 lwz r0,8(r1) // restore return address
299 mtlr r0
300 blr
301
302
303 // 64-bit/128-byte processor: copy using VRs
304
305pmap_copy_64: // r10=features, r11=old MSR
306 sldi r3,r3,12 // get page address from page num
307 sldi r4,r4,12 // get page address from page num
308 la r9,FM_SIZE+16(r1) // get base of VR save area
309 li r5,16 // load x-form offsets into r5-r9
310 li r6,32 // another offset
311 bf pfAltivecb,pmap_novmx_copy // altivec suppressed...
312 stvx v0,0,r9 // save 8 VRs so we can copy wo bubbles
313 stvx v1,r5,r9
314 li r7,48 // another offset
315 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
316 stvx v2,r6,r9
317 stvx v3,r7,r9
318 addi r9,r9,64 // advance base ptr so we can store another 4
319 mtctr r0
320 li r0,MASK(MSR_DR) // get DR bit
321 stvx v4,0,r9
322 stvx v5,r5,r9
323 andc r12,r2,r0 // turn off DR bit
324 li r0,1 // get a 1 to slam into SF
325 stvx v6,r6,r9
326 stvx v7,r7,r9
327 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
328 li r8,-128 // offset so we can reach back one line
329 mtmsrd r12 // now we've saved VRs, turn DR off and SF on
330 isync // wait for it to happen
331 dcbt128 0,r3,1 // start a forward stream
332 b pmap_64_copy_loop
333
334 .align 5 // align inner loops
335pmap_64_copy_loop: // loop over 128-byte chunks
336 dcbz128 0,r4 // avoid read of destination line
337 lvx v0,0,r3 // offset 0
338 lvx v1,r5,r3 // offset 16
339 lvx v2,r6,r3 // offset 32
340 lvx v3,r7,r3 // offset 48
341 addi r3,r3,64 // don't have enough GPRs so add 64 2x
342 lvx v4,0,r3 // offset 64
343 lvx v5,r5,r3 // offset 80
344 lvx v6,r6,r3 // offset 96
345 lvx v7,r7,r3 // offset 112
346 addi r3,r3,64
347 stvx v0,0,r4 // offset 0
348 stvx v1,r5,r4 // offset 16
349 stvx v2,r6,r4 // offset 32
350 stvx v3,r7,r4 // offset 48
351 addi r4,r4,64
352 stvx v4,0,r4 // offset 64
353 stvx v5,r5,r4 // offset 80
354 stvx v6,r6,r4 // offset 96
355 stvx v7,r7,r4 // offset 112
356 addi r4,r4,64
357 dcbf r8,r4 // flush the line we just wrote
358 bdnz pmap_64_copy_loop
359
360 sync // wait for stores to take
361 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
362 li r8,PPC_PGBYTES-128 // point to last line in page
363pmap_64_icache_flush:
364 subic. r9,r8,128 // more to go?
365 icbi r4,r8 // flush from icache
366 subi r8,r9,128 // get offset to next line
367 icbi r4,r9
368 bne pmap_64_icache_flush
369
370 sync
371 mtmsrd r2 // turn DR back on, SF off
372 isync
373 la r9,FM_SIZE+16(r1) // get base address of VR save area on stack
374 lvx v0,0,r9 // restore the VRs
375 lvx v1,r5,r9
376 lvx v2,r6,r9
377 lvx v3,r7,r9
378 addi r9,r9,64
379 lvx v4,0,r9
380 lvx v5,r5,r9
381 lvx v6,r6,r9
382 lvx v7,r7,r9
383
384 b pmap_g4_restore // restore lower half of MSR and return
385
386 //
387 // Copy on 64-bit without VMX
388 //
389
390pmap_novmx_copy:
391 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
392 mtctr r0
393 li r0,MASK(MSR_DR) // get DR bit
394 andc r12,r2,r0 // turn off DR bit
395 li r0,1 // get a 1 to slam into SF
396 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
397 mtmsrd r12 // now we've saved VRs, turn DR off and SF on
398 isync // wait for it to happen
399 dcbt128 0,r3,1 // start a forward stream
400
401pmap_novmx_copy_loop: // loop over 128-byte cache lines
402 dcbz128 0,r4 // avoid read of dest line
403
404 ld r0,0(r3) // Load half a line
405 ld r12,8(r3)
406 ld r5,16(r3)
407 ld r6,24(r3)
408 ld r7,32(r3)
409 ld r8,40(r3)
410 ld r9,48(r3)
411 ld r10,56(r3)
412
413 std r0,0(r4) // Store half a line
414 std r12,8(r4)
415 std r5,16(r4)
416 std r6,24(r4)
417 std r7,32(r4)
418 std r8,40(r4)
419 std r9,48(r4)
420 std r10,56(r4)
421
422 ld r0,64(r3) // Load half a line
423 ld r12,72(r3)
424 ld r5,80(r3)
425 ld r6,88(r3)
426 ld r7,96(r3)
427 ld r8,104(r3)
428 ld r9,112(r3)
429 ld r10,120(r3)
430
431 addi r3,r3,128
432
433 std r0,64(r4) // Store half a line
434 std r12,72(r4)
435 std r5,80(r4)
436 std r6,88(r4)
437 std r7,96(r4)
438 std r8,104(r4)
439 std r9,112(r4)
440 std r10,120(r4)
441
442 dcbf 0,r4 // flush the line we just wrote
443 addi r4,r4,128
444 bdnz pmap_novmx_copy_loop
445
446 sync // wait for stores to take
447 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
448 li r8,PPC_PGBYTES-128 // point to last line in page
449
450pmap_novmx_icache_flush:
451 subic. r9,r8,128 // more to go?
452 icbi r4,r8 // flush from icache
453 subi r8,r9,128 // get offset to next line
454 icbi r4,r9
455 bne pmap_novmx_icache_flush
456
457 sync
458 mtmsrd r2 // turn DR back on, SF off
459 isync
460
461 b pmap_g4_restore // restore lower half of MSR and return
462
463
464
465//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
466
467// Stack frame format used by copyin, copyout, copyinstr and copyoutstr.
468// These routines all run both on 32 and 64-bit machines, though because they are called
469// by the BSD kernel they are always in 32-bit mode when entered. The mapped ptr returned
470// by MapUserMemoryWindow will be 64 bits however on 64-bit machines. Beware to avoid
471// using compare instructions on this ptr. This mapped ptr is kept globally in r31, so there
472// is no need to store or load it, which are mode-dependent operations since it could be
473// 32 or 64 bits.
474
475#define kkFrameSize (FM_SIZE+32)
476
477#define kkBufSize (FM_SIZE+0)
478#define kkCR3 (FM_SIZE+4)
479#define kkSource (FM_SIZE+8)
480#define kkDest (FM_SIZE+12)
481#define kkCountPtr (FM_SIZE+16)
482#define kkR31Save (FM_SIZE+20)
483#define kkThrErrJmp (FM_SIZE+24)
484
485
486// nonvolatile CR bits we use as flags in cr3
487
488#define kk64bit 12
489#define kkNull 13
490#define kkIn 14
491#define kkString 15
492#define kkZero 15
493
494
495//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
496/*
497 * int
498 * copyoutstr(src, dst, maxcount, count)
499 * vm_offset_t src; // r3
500 * addr64_t dst; // r4 and r5
501 * vm_size_t maxcount; // r6
502 * vm_size_t* count; // r7
503 *
504 * Set *count to the number of bytes copied.
505 */
506
507ENTRY(copyoutstr, TAG_NO_FRAME_USED)
508 mfcr r2,0x10 // save caller's cr3, which we use for flags
509 mr r10,r4 // move high word of 64-bit user address to r10
510 li r0,0
511 crset kkString // flag as a string op
512 mr r11,r5 // move low word of 64-bit user address to r11
513 stw r0,0(r7) // initialize #bytes moved
514 crclr kkIn // flag as copyout
515 b copyJoin
516
517
518//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
519/*
520 * int
521 * copyinstr(src, dst, maxcount, count)
522 * addr64_t src; // r3 and r4
523 * vm_offset_t dst; // r5
524 * vm_size_t maxcount; // r6
525 * vm_size_t* count; // r7
526 *
527 * Set *count to the number of bytes copied
528 * If dst == NULL, don't copy, just count bytes.
529 * Only currently called from klcopyinstr.
530 */
531
532ENTRY(copyinstr, TAG_NO_FRAME_USED)
533 mfcr r2,0x10 // save caller's cr3, which we use for flags
534 cmplwi r5,0 // dst==NULL?
535 mr r10,r3 // move high word of 64-bit user address to r10
536 li r0,0
537 crset kkString // flag as a string op
538 mr r11,r4 // move low word of 64-bit user address to r11
539 crmove kkNull,cr0_eq // remember if (dst==NULL)
540 stw r0,0(r7) // initialize #bytes moved
541 crset kkIn // flag as copyin (rather than copyout)
542 b copyJoin1 // skip over the "crclr kkNull"
543
544
545//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
546/*
547 * int
548 * copyout(src, dst, count)
549 * vm_offset_t src; // r3
550 * addr64_t dst; // r4 and r5
551 * size_t count; // r6
552 */
553
554 .align 5
555 .globl EXT(copyout)
556 .globl EXT(copyoutmsg)
557
558LEXT(copyout)
559LEXT(copyoutmsg)
560
561#if INSTRUMENT
562 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout
563 stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it
564 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
565 stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it
566 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
567 stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it
568 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
569 stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it
570#endif
571 mfcr r2,0x10 // save caller's cr3, which we use for flags
572 mr r10,r4 // move high word of 64-bit user address to r10
573 crclr kkString // not a string version
574 mr r11,r5 // move low word of 64-bit user address to r11
575 crclr kkIn // flag as copyout
576 b copyJoin
577
578
579//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
580/*
581 * int
582 * copyin(src, dst, count)
583 * addr64_t src; // r3 and r4
584 * vm_offset_t dst; // r5
585 * size_t count; // r6
586 */
587
588
589 .align 5
590 .globl EXT(copyin)
591 .globl EXT(copyinmsg)
592
593LEXT(copyin)
594LEXT(copyinmsg)
595
596 mfcr r2,0x10 // save caller's cr3, which we use for flags
597 mr r10,r3 // move high word of 64-bit user address to r10
598 crclr kkString // not a string version
599 mr r11,r4 // move low word of 64-bit user address to r11
600 crset kkIn // flag as copyin
601
602
603// Common code to handle setup for all the copy variants:
604// r2 = caller's cr3
605// r3 = source if copyout
606// r5 = dest if copyin
607// r6 = buffer length or count
608// r7 = count output ptr (if kkString set)
609// r10 = high word of 64-bit user-space address (source if copyin, dest if copyout)
610// r11 = low word of 64-bit user-space address
611// cr3 = kkIn, kkString, kkNull flags
612
613copyJoin:
614 crclr kkNull // (dst==NULL) convention not used with this call
615copyJoin1: // enter from copyinstr with kkNull set
616 mflr r0 // get return address
617 cmplwi r6,0 // buffer length 0?
618 lis r9,0x1000 // r9 <- 0x10000000 (256MB)
619 stw r0,FM_LR_SAVE(r1) // save return
620 cmplw cr1,r6,r9 // buffer length > 256MB ?
621 mfsprg r8,2 // get the features
622 beq-- copyinout_0 // 0 length is degenerate case
623 stwu r1,-kkFrameSize(r1) // set up stack frame
624 stw r2,kkCR3(r1) // save caller's cr3, which we use for flags
625 mtcrf 0x02,r8 // move pf64Bit to cr6
626 stw r3,kkSource(r1) // save args across MapUserMemoryWindow
627 stw r5,kkDest(r1)
628 stw r6,kkBufSize(r1)
629 crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor
630 stw r7,kkCountPtr(r1)
631 stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr
632
633
634
635// Handle buffer length > 256MB. This is an error (ENAMETOOLONG) on copyin and copyout.
636// The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp
637// the buffer length to 256MB. This isn't an issue if the string is less than 256MB
638// (as most are!), but if they are >256MB we eventually return ENAMETOOLONG. This restriction
639// is due to MapUserMemoryWindow; we don't want to consume more than two segments for
640// the mapping.
641
642 ble++ cr1,copyin0 // skip if buffer length <= 256MB
643 bf kkString,copyinout_too_big // error if not string op
644 mr r6,r9 // silently clamp buffer length to 256MB
645 stw r9,kkBufSize(r1) // update saved copy too
646
647
648// Set up thread_recover in case we hit an illegal address.
649
650copyin0:
651 li r31,0 // no mapped ptr yet
652 mfsprg r8,1 // Get the current thread
653 lis r2,hi16(copyinout_error)
654 ori r2,r2,lo16(copyinout_error)
655 lwz r4,THREAD_RECOVER(r8)
656 lwz r3,ACT_VMMAP(r8) // r3 <- vm_map virtual address
657 stw r2,THREAD_RECOVER(r8)
658 stw r4,kkThrErrJmp(r1)
659
660
661// Map user segment into kernel map, turn on 64-bit mode. At this point:
662// r3 = vm map
663// r6 = buffer length
664// r10/r11 = 64-bit user-space ptr (source if copyin, dest if copyout)
665//
666// When we call MapUserMemoryWindow, we pass:
667// r3 = vm map ptr
668// r4/r5 = 64-bit user space address as an addr64_t
669
670 mr r4,r10 // copy user ptr into r4/r5
671 mr r5,r11
672#if INSTRUMENT
673 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace
674 stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it
675 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
676 stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it
677 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
678 stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it
679 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
680 stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it
681#endif
682 bl EXT(MapUserMemoryWindow) // get r3/r4 <- 64-bit address in kernel map of user operand
683#if INSTRUMENT
684 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace
685 stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it
686 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
687 stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it
688 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
689 stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it
690 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
691 stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it
692#endif
693 mr r31,r4 // r31 <- mapped ptr into user space (may be 64-bit)
694 bf-- kk64bit,copyin1 // skip if a 32-bit processor
695
696 rldimi r31,r3,32,0 // slam high-order bits into mapped ptr
697 mfmsr r4 // if 64-bit, turn on SF so we can use returned ptr
698 li r0,1
699 rldimi r4,r0,63,MSR_SF_BIT // light bit 0
700 mtmsrd r4 // turn on 64-bit mode
701 isync // wait for mode to change
702
703
704// Load r3-r5, substituting mapped ptr as appropriate.
705
706copyin1:
707 lwz r5,kkBufSize(r1) // restore length to copy
708 bf kkIn,copyin2 // skip if copyout
709 lwz r4,kkDest(r1) // copyin: dest is kernel ptr
710 mr r3,r31 // source is mapped ptr
711 b copyin3
712copyin2: // handle copyout
713 lwz r3,kkSource(r1) // source is kernel buffer (r3 at entry)
714 mr r4,r31 // dest is mapped ptr into user space
715
716
717// Finally, all set up to copy:
718// r3 = source ptr (mapped if copyin)
719// r4 = dest ptr (mapped if copyout)
720// r5 = length
721// r31 = mapped ptr returned by MapUserMemoryWindow
722// cr3 = kkIn, kkString, kk64bit, and kkNull flags
723
724copyin3:
725 bt kkString,copyString // handle copyinstr and copyoutstr
726 bl EXT(bcopy) // copyin and copyout: let bcopy do the work
727 li r3,0 // return success
728
729
730// Main exit point for copyin, copyout, copyinstr, and copyoutstr. Also reached
731// from error recovery if we get a DSI accessing user space. Clear recovery ptr,
732// and pop off frame.
733// r3 = 0, EFAULT, or ENAMETOOLONG
734
735copyinx:
736 lwz r2,kkCR3(r1) // get callers cr3
737 mfsprg r6,1 // Get the current thread
738 bf-- kk64bit,copyinx1 // skip if 32-bit processor
739 mfmsr r12
740 rldicl r12,r12,0,MSR_SF_BIT+1 // if 64-bit processor, turn 64-bit mode off
741 mtmsrd r12 // turn SF off
742 isync // wait for the mode to change
743copyinx1:
744 lwz r0,FM_LR_SAVE+kkFrameSize(r1) // get return address
745 lwz r31,kkR31Save(r1) // restore callers r31
746 lwz r4,kkThrErrJmp(r1) // load saved thread recover
747 addi r1,r1,kkFrameSize // pop off our stack frame
748 mtlr r0
749 stw r4,THREAD_RECOVER(r6) // restore thread recover
750 mtcrf 0x10,r2 // restore cr3
751 blr
752
753
754/* We get here via the exception handler if an illegal
755 * user memory reference was made. This error handler is used by
756 * copyin, copyout, copyinstr, and copyoutstr. Registers are as
757 * they were at point of fault, so for example cr3 flags are valid.
758 */
759
760copyinout_error:
761 li r3,EFAULT // return error
762 b copyinx
763
764copyinout_0: // degenerate case: 0-length copy
765 mtcrf 0x10,r2 // restore cr3
766 li r3,0 // return success
767 blr
768
769copyinout_too_big: // degenerate case
770 mtcrf 0x10,r2 // restore cr3
771 lwz r1,0(r1) // pop off stack frame
772 li r3,ENAMETOOLONG
773 blr
774
775
776//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
777// Handle copyinstr and copyoutstr. At this point the stack frame is set up,
778// the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode
779// if necessary, and:
780// r3 = source ptr, mapped if copyinstr
781// r4 = dest ptr, mapped if copyoutstr
782// r5 = buffer length
783// r31 = mapped ptr returned by MapUserMemoryWindow
784// cr3 = kkIn, kkString, kkNull, and kk64bit flags
785// We do word copies unless the buffer is very short, then use a byte copy loop
786// for the leftovers if necessary. The crossover at which the word loop becomes
787// faster is about seven bytes, counting the zero.
788//
789// We first must word-align the source ptr, in order to avoid taking a spurious
790// page fault.
791
792copyString:
793 cmplwi cr1,r5,15 // is buffer very short?
794 mr r12,r3 // remember ptr to 1st source byte
795 mtctr r5 // assuming short, set up loop count for bytes
796 blt-- cr1,copyinstr8 // too short for word loop
797 rlwinm r2,r3,0,0x3 // get byte offset of 1st byte within word
798 rlwinm r9,r3,3,0x18 // get bit offset of 1st byte within word
799 li r7,-1
800 sub r3,r3,r2 // word-align source address
801 add r6,r5,r2 // get length starting at byte 0 in word
802 srw r7,r7,r9 // get mask for bytes in first word
803 srwi r0,r6,2 // get #words in buffer
804 lwz r5,0(r3) // get aligned word with first source byte
805 lis r10,hi16(0xFEFEFEFF) // load magic constants into r10 and r11
806 lis r11,hi16(0x80808080)
807 mtctr r0 // set up word loop count
808 addi r3,r3,4 // advance past the source word
809 ori r10,r10,lo16(0xFEFEFEFF)
810 ori r11,r11,lo16(0x80808080)
811 orc r8,r5,r7 // map bytes preceeding first source byte into 0xFF
812 bt-- kkNull,copyinstr5enter // enter loop that just counts
813
814// Special case 1st word, which has been 0xFF filled on left. Note that we use
815// "and.", even though we execute both in 32 and 64-bit mode. This is OK.
816
817 slw r5,r5,r9 // left justify payload bytes
818 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
819 andc r7,r11,r8 // r7 = ~data & 0x80808080
820 subfic r0,r2,4 // get r0 <- #payload bytes in 1st word
821 and. r7,r9,r7 // if r7==0, then all bytes in r8 are nonzero
822 stw r5,0(r4) // copy payload bytes to dest buffer
823 add r4,r4,r0 // then point to next byte in dest buffer
824 bdnzt cr0_eq,copyinstr6 // use loop that copies if 0 not found
825
826 b copyinstr7 // 0 found (buffer can't be full)
827
828
829// Word loop(s). They do a word-parallel search for 0s, using the following
830// inobvious but very efficient test:
831// y = data + 0xFEFEFEFF
832// z = ~data & 0x80808080
833// If (y & z)==0, then all bytes in dataword are nonzero. There are two copies
834// of this loop, one that just counts and another that copies.
835// r3 = ptr to next word of source (word aligned)
836// r4 = ptr to next byte in buffer
837// r6 = original buffer length (adjusted to be word origin)
838// r10 = 0xFEFEFEFE
839// r11 = 0x80808080
840// r12 = ptr to 1st source byte (used to determine string length)
841
842 .align 5 // align inner loops for speed
843copyinstr5: // version that counts but does not copy
844 lwz r8,0(r3) // get next word of source
845 addi r3,r3,4 // advance past it
846copyinstr5enter:
847 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
848 andc r7,r11,r8 // r7 = ~data & 0x80808080
849 and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode)
850 bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes in r8 are nonzero
851
852 b copyinstr7
853
854 .align 5 // align inner loops for speed
855copyinstr6: // version that counts and copies
856 lwz r8,0(r3) // get next word of source
857 addi r3,r3,4 // advance past it
858 addi r4,r4,4 // increment dest ptr while we wait for data
859 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
860 andc r7,r11,r8 // r7 = ~data & 0x80808080
861 and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode)
862 stw r8,-4(r4) // pack all 4 bytes into buffer
863 bdnzt cr0_eq,copyinstr6 // if r7==0, then all bytes are nonzero
864
865
866// Either 0 found or buffer filled. The above algorithm has mapped nonzero bytes to 0
867// and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also
868// mapped to 0x80. We must mask out these false hits before searching for an 0x80 byte.
869// r3 = word aligned ptr to next word of source (ie, r8==mem(r3-4))
870// r6 = original buffer length (adjusted to be word origin)
871// r7 = computed vector of 0x00 and 0x80 bytes
872// r8 = original source word, coming from -4(r3), possibly padded with 0xFFs on left if 1st word
873// r12 = ptr to 1st source byte (used to determine string length)
874// cr0 = beq set iff 0 not found
875
876copyinstr7:
877 rlwinm r2,r8,7,0,31 // move 0x01 bits to 0x80 position
878 rlwinm r6,r6,0,0x3 // mask down to partial byte count in last word
879 andc r7,r7,r2 // turn off false hits from 0x0100 worst case
880 crnot kkZero,cr0_eq // 0 found iff cr0_eq is off
881 srwi r7,r7,8 // we want to count the 0 as a byte xferred
882 cmpwi r6,0 // any bytes left over in last word?
883 cntlzw r7,r7 // now we can find the 0 byte (ie, the 0x80)
884 subi r3,r3,4 // back up r3 to point to 1st byte in r8
885 srwi r7,r7,3 // convert 8,16,24,32 to 1,2,3,4
886 add r3,r3,r7 // now r3 points one past 0 byte, or at 1st byte not xferred
887 bt++ kkZero,copyinstr10 // 0 found, so done
888
889 beq copyinstr10 // r6==0, so buffer truly full
890 mtctr r6 // 0 not found, loop over r6 bytes
891 b copyinstr8 // enter byte loop for last 1-3 leftover bytes
892
893
894// Byte loop. This is used for very small buffers and for the odd bytes left over
895// after searching and copying words at a time.
896// r3 = ptr to next byte of source
897// r4 = ptr to next dest byte
898// r12 = ptr to first byte of source
899// ctr = count of bytes to check
900
901 .align 5 // align inner loops for speed
902copyinstr8: // loop over bytes of source
903 lbz r0,0(r3) // get next byte of source
904 addi r3,r3,1
905 addi r4,r4,1 // increment dest addr whether we store or not
906 cmpwi r0,0 // the 0?
907 bt-- kkNull,copyinstr9 // don't store if copyinstr with NULL ptr
908 stb r0,-1(r4)
909copyinstr9:
910 bdnzf cr0_eq,copyinstr8 // loop if byte not 0 and more room in buffer
911
912 crmove kkZero,cr0_eq // remember if 0 found or buffer filled
913
914
915// Buffer filled or 0 found. Unwind and return.
916// r3 = ptr to 1st source byte not transferred
917// r12 = ptr to 1st source byte
918// r31 = mapped ptr returned by MapUserMemoryWindow
919// cr3 = kkZero set iff 0 found
920
921copyinstr10:
922 lwz r9,kkCountPtr(r1) // get ptr to place to store count of bytes moved
923 sub r2,r3,r12 // compute #bytes copied (including the 0)
924 li r3,0 // assume success return status
925 stw r2,0(r9) // store #bytes moved
926 bt++ kkZero,copyinx // we did find the 0 so return 0
927 li r3,ENAMETOOLONG // buffer filled
928 b copyinx // join main exit routine
929
930//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
931/*
932 * int
933 * copypv(source, sink, size, which)
934 * addr64_t src; // r3 and r4
935 * addr64_t dst; // r5 and r6
936 * size_t size; // r7
937 * int which; // r8
938 *
939 * Operand size bytes are copied from operand src into operand dst. The source and
940 * destination operand addresses are given as addr64_t, and may designate starting
941 * locations in physical or virtual memory in any combination except where both are
942 * virtual. Virtual memory locations may be in either the kernel or the current thread's
943 * address space. Operand size may be up to 256MB.
944 *
945 * Operation is controlled by operand which, which offers these options:
946 * cppvPsrc : source operand is (1) physical or (0) virtual
947 * cppvPsnk : destination operand is (1) physical or (0) virtual
948 * cppvKmap : virtual operand is in (1) kernel or (0) current thread
949 * cppvFsnk : (1) flush destination before and after transfer
950 * cppvFsrc : (1) flush source before and after transfer
951 * cppvNoModSnk : (1) don't set source operand's changed bit(s)
952 * cppvNoRefSrc : (1) don't set destination operand's referenced bit(s)
953 *
954 * Implementation is now split into this new 64-bit path and the old path, hw_copypv_32().
955 * This section describes the operation of the new 64-bit path.
956 *
957 * The 64-bit path utilizes the more capacious 64-bit kernel address space to create a
958 * window in the kernel address space into all of physical RAM plus the I/O hole. Since
959 * the window's mappings specify the proper access policies for the underlying memory,
960 * the new path does not have to flush caches to avoid a cache paradox, so cppvFsnk
961 * and cppvFsrc are ignored. Physical operand adresses are relocated into the physical
962 * memory window, and are accessed with data relocation on. Virtual addresses are either
963 * within the kernel, or are mapped into the kernel address space through the user memory
964 * window. Because accesses to a virtual operand are performed with data relocation on,
965 * the new path does not have to translate the address, disable/enable interrupts, lock
966 * the mapping, or update referenced and changed bits.
967 *
968 * The IBM 970 (a.k.a. G5) processor treats real-mode accesses as guarded, so there is
969 * a substantial performance penalty for copypv operating in real mode. Utilizing the
970 * new 64-bit path, transfer performance increases >100% on the G5.
971 *
972 * The attentive reader may notice that mtmsrd ops are not followed by isync ops as
973 * might be expected. The 970 follows PowerPC architecture version 2.01, which defines
974 * mtmsrd with L=0 as a context synchronizing op, so a following isync is no longer
975 * required.
976 *
977 * To keep things exciting, we develop 64-bit values in non-volatiles, but we also need
978 * to call 32-bit functions, which would lead to the high-order 32 bits of our values
979 * getting clobbered unless we do something special. So, we preserve our 64-bit non-volatiles
980 * in our own stack frame across calls to 32-bit functions.
981 *
982 */
983
984// Map operand which bits into non-volatile CR2 and CR3 bits.
985#define whichAlign ((3+1)*4)
986#define whichMask 0x007F0000
987#define pvPsnk (cppvPsnkb - whichAlign)
988#define pvPsrc (cppvPsrcb - whichAlign)
989#define pvFsnk (cppvFsnkb - whichAlign)
990#define pvFsrc (cppvFsrcb - whichAlign)
991#define pvNoModSnk (cppvNoModSnkb - whichAlign)
992#define pvNoRefSrc (cppvNoRefSrcb - whichAlign)
993#define pvKmap (cppvKmapb - whichAlign)
994#define pvNoCache cr2_lt
995
996 .align 5
997 .globl EXT(copypv)
998
999LEXT(copypv)
1000 mfsprg r10,2 // get feature flags
1001 mtcrf 0x02,r10 // we need to test pf64Bit
1002 bt++ pf64Bitb,copypv_64 // skip if 64-bit processor (only they take hint)
1003
1004 b EXT(hw_copypv_32) // carry on with 32-bit copypv
1005
1006// Push a 32-bit ABI-compliant stack frame and preserve all non-volatiles that we'll clobber.
1007copypv_64:
1008 mfsprg r9,1 // get current thread
1009 stwu r1,-(FM_ALIGN((31-26+11)*4)+FM_SIZE)(r1)
1010 // allocate stack frame and link it
1011 mflr r0 // get return address
1012 mfcr r10 // get cr2 and cr3
1013 lwz r12,THREAD_RECOVER(r9) // get error callback
1014 stw r26,FM_ARG0+0x00(r1) // save non-volatile r26
1015 stw r27,FM_ARG0+0x04(r1) // save non-volatile r27
1016 stw r28,FM_ARG0+0x08(r1) // save non-volatile r28
1017 stw r29,FM_ARG0+0x0C(r1) // save non-volatile r29
1018 stw r30,FM_ARG0+0x10(r1) // save non-volatile r30
1019 stw r31,FM_ARG0+0x14(r1) // save non-volatile r31
1020 stw r12,FM_ARG0+0x20(r1) // save error callback
1021 stw r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
1022 // save return address
1023 stw r10,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
1024 // save non-volatile cr2 and cr3
1025
1026// Non-volatile register usage in this routine is:
1027// r26: saved msr image
1028// r27: current pmap_t / virtual source address
1029// r28: destination virtual address
1030// r29: source address
1031// r30: destination address
1032// r31: byte count to copy
1033// cr2/3: parameter 'which' bits
1034
1035 rlwinm r8,r8,whichAlign,whichMask // align and mask which bits
1036 mr r31,r7 // copy size to somewhere non-volatile
1037 mtcrf 0x20,r8 // insert which bits into cr2 and cr3
1038 mtcrf 0x10,r8 // insert which bits into cr2 and cr3
1039 rlwinm r29,r3,0,1,0 // form source address high-order bits
1040 rlwinm r30,r5,0,1,0 // form destination address high-order bits
1041 rlwimi r29,r4,0,0,31 // form source address low-order bits
1042 rlwimi r30,r6,0,0,31 // form destination address low-order bits
1043 crand cr7_lt,pvPsnk,pvPsrc // are both operand addresses physical?
1044 cntlzw r0,r31 // count leading zeroes in byte count
1045 cror cr7_eq,pvPsnk,pvPsrc // cr7_eq <- source or destination is physical
1046 bf-- cr7_eq,copypv_einval // both operands may not be virtual
1047 cmplwi r0,4 // byte count greater than or equal 256M (2**28)?
1048 blt-- copypv_einval // byte count too big, give EINVAL
1049 cmplwi r31,0 // byte count zero?
1050 beq-- copypv_zero // early out
1051 bt cr7_lt,copypv_phys // both operand addresses are physical
1052 mr r28,r30 // assume destination is virtual
1053 bf pvPsnk,copypv_dv // is destination virtual?
1054 mr r28,r29 // no, so source must be virtual
1055copypv_dv:
1056 lis r27,ha16(EXT(kernel_pmap)) // get kernel's pmap_t *, high-order
1057 lwz r27,lo16(EXT(kernel_pmap))(r27) // get kernel's pmap_t
1058 bt pvKmap,copypv_kern // virtual address in kernel map?
1059 lwz r3,ACT_VMMAP(r9) // get user's vm_map *
1060 rldicl r4,r28,32,32 // r4, r5 <- addr64_t virtual address
1061 rldicl r5,r28,0,32
1062 std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit call
1063 std r30,FM_ARG0+0x38(r1) // preserve 64-bit r30 across 32-bit call
1064 bl EXT(MapUserMemoryWindow) // map slice of user space into kernel space
1065 ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29
1066 ld r30,FM_ARG0+0x38(r1) // restore 64-bit r30
1067 rlwinm r28,r3,0,1,0 // convert relocated addr64_t virtual address
1068 rlwimi r28,r4,0,0,31 // into a single 64-bit scalar
1069copypv_kern:
1070
1071// Since we'll be accessing the virtual operand with data-relocation on, we won't need to
1072// update the referenced and changed bits manually after the copy. So, force the appropriate
1073// flag bit on for the virtual operand.
1074 crorc pvNoModSnk,pvNoModSnk,pvPsnk // for virtual dest, let hardware do ref/chg bits
1075 crorc pvNoRefSrc,pvNoRefSrc,pvPsrc // for virtual source, let hardware do ref bit
1076
1077// We'll be finding a mapping and looking at, so we need to disable 'rupts.
1078 lis r0,hi16(MASK(MSR_VEC)) // get vector mask
1079 ori r0,r0,lo16(MASK(MSR_FP)) // insert fp mask
1080 mfmsr r26 // save current msr
1081 andc r26,r26,r0 // turn off VEC and FP in saved copy
1082 ori r0,r0,lo16(MASK(MSR_EE)) // add EE to our mask
1083 andc r0,r26,r0 // disable EE in our new msr image
1084 mtmsrd r0 // introduce new msr image
1085
1086// We're now holding the virtual operand's pmap_t in r27 and its virtual address in r28. We now
1087// try to find a mapping corresponding to this address in order to determine whether the address
1088// is cacheable. If we don't find a mapping, we can safely assume that the operand is cacheable
1089// (a non-cacheable operand must be a block mapping, which will always exist); otherwise, we
1090// examine the mapping's caching-inhibited bit.
1091 mr r3,r27 // r3 <- pmap_t pmap
1092 rldicl r4,r28,32,32 // r4, r5 <- addr64_t va
1093 rldicl r5,r28,0,32
1094 la r6,FM_ARG0+0x18(r1) // r6 <- addr64_t *nextva
1095 li r7,1 // r7 <- int full, search nested mappings
1096 std r26,FM_ARG0+0x28(r1) // preserve 64-bit r26 across 32-bit calls
1097 std r28,FM_ARG0+0x30(r1) // preserve 64-bit r28 across 32-bit calls
1098 std r29,FM_ARG0+0x38(r1) // preserve 64-bit r29 across 32-bit calls
1099 std r30,FM_ARG0+0x40(r1) // preserve 64-bit r30 across 32-bit calls
1100 bl EXT(mapping_find) // find mapping for virtual operand
1101 mr. r3,r3 // did we find it?
1102 beq copypv_nomapping // nope, so we'll assume it's cacheable
1103 lwz r4,mpVAddr+4(r3) // get low half of virtual addr for hw flags
1104 rlwinm. r4,r4,0,mpIb-32,mpIb-32 // caching-inhibited bit set?
1105 crnot pvNoCache,cr0_eq // if it is, use bcopy_nc
1106 bl EXT(mapping_drop_busy) // drop busy on the mapping
1107copypv_nomapping:
1108 ld r26,FM_ARG0+0x28(r1) // restore 64-bit r26
1109 ld r28,FM_ARG0+0x30(r1) // restore 64-bit r28
1110 ld r29,FM_ARG0+0x38(r1) // restore 64-bit r29
1111 ld r30,FM_ARG0+0x40(r1) // restore 64-bit r30
1112 mtmsrd r26 // restore msr to it's previous state
1113
1114// Set both the source and destination virtual addresses to the virtual operand's address --
1115// we'll overlay one of them with the physical operand's address.
1116 mr r27,r28 // make virtual operand BOTH source AND destination
1117
1118// Now we're ready to relocate the physical operand address(es) into the physical memory window.
1119// Recall that we've mapped physical memory (including the I/O hole) into the kernel's address
1120// space somewhere at or over the 2**32 line. If one or both of the operands are in the I/O hole,
1121// we'll set the pvNoCache flag, forcing use of non-caching bcopy_nc() to do the copy.
1122copypv_phys:
1123 ld r6,lgPMWvaddr(0) // get physical memory window virtual address
1124 bf pvPsnk,copypv_dstvirt // is destination address virtual?
1125 cntlzd r4,r30 // count leading zeros in destination address
1126 cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
1127 cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations
1128 add r28,r30,r6 // relocate physical destination into physical window
1129copypv_dstvirt:
1130 bf pvPsrc,copypv_srcvirt // is source address virtual?
1131 cntlzd r4,r29 // count leading zeros in source address
1132 cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
1133 cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations
1134 add r27,r29,r6 // relocate physical source into physical window
1135copypv_srcvirt:
1136
1137// Once the copy is under way (bcopy or bcopy_nc), we will want to get control if anything
1138// funny happens during the copy. So, we set a pointer to our error handler in the per-thread
1139// control block.
1140 mfsprg r8,1 // get current threads stuff
1141 lis r3,hi16(copypv_error) // get our error callback's address, high
1142 ori r3,r3,lo16(copypv_error) // get our error callback's address, low
1143 stw r3,THREAD_RECOVER(r8) // set our error callback
1144
1145// Since our physical operand(s) are relocated at or above the 2**32 line, we must enter
1146// 64-bit mode.
1147 li r0,1 // get a handy one bit
1148 mfmsr r3 // get current msr
1149 rldimi r3,r0,63,MSR_SF_BIT // set SF bit on in our msr copy
1150 mtmsrd r3 // enter 64-bit mode
1151
1152// If requested, flush data cache
1153// Note that we don't flush, the code is being saved "just in case".
1154#if 0
1155 bf pvFsrc,copypv_nfs // do we flush the source?
1156 rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address
1157 rldicl r4,r27,0,32
1158 mr r5,r31 // r5 <- count (in bytes)
1159 li r6,0 // r6 <- boolean phys (false, not physical)
1160 bl EXT(flush_dcache) // flush the source operand
1161copypv_nfs:
1162 bf pvFsnk,copypv_nfdx // do we flush the destination?
1163 rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address
1164 rldicl r4,r28,0,32
1165 mr r5,r31 // r5 <- count (in bytes)
1166 li r6,0 // r6 <- boolean phys (false, not physical)
1167 bl EXT(flush_dcache) // flush the destination operand
1168copypv_nfdx:
1169#endif
1170
1171// Call bcopy or bcopy_nc to perform the copy.
1172 mr r3,r27 // r3 <- source virtual address
1173 mr r4,r28 // r4 <- destination virtual address
1174 mr r5,r31 // r5 <- bytes to copy
1175 bt pvNoCache,copypv_nc // take non-caching route
1176 bl EXT(bcopy) // call bcopy to do the copying
1177 b copypv_copydone
1178copypv_nc:
1179 bl EXT(bcopy_nc) // call bcopy_nc to do the copying
1180copypv_copydone:
1181
1182// If requested, flush data cache
1183// Note that we don't flush, the code is being saved "just in case".
1184#if 0
1185 bf pvFsrc,copypv_nfsx // do we flush the source?
1186 rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address
1187 rldicl r4,r27,0,32
1188 mr r5,r31 // r5 <- count (in bytes)
1189 li r6,0 // r6 <- boolean phys (false, not physical)
1190 bl EXT(flush_dcache) // flush the source operand
1191copypv_nfsx:
1192 bf pvFsnk,copypv_nfd // do we flush the destination?
1193 rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address
1194 rldicl r4,r28,0,32
1195 mr r5,r31 // r5 <- count (in bytes)
1196 li r6,0 // r6 <- boolean phys (false, not physical)
1197 bl EXT(flush_dcache) // flush the destination operand
1198copypv_nfd:
1199#endif
1200
1201// Leave 64-bit mode.
1202 mfmsr r3 // get current msr
1203 rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy
1204 mtmsrd r3 // leave 64-bit mode
1205
1206// If requested, set ref/chg on source/dest physical operand(s). It is possible that copy is
1207// from/to a RAM disk situated outside of mapped physical RAM, so we check each page by calling
1208// mapping_phys_lookup() before we try to set its ref/chg bits; otherwise, we might panic.
1209// Note that this code is page-size sensitive, so it should probably be a part of our low-level
1210// code in hw_vm.s.
1211 bt pvNoModSnk,copypv_nomod // skip destination update if not requested
1212 std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit calls
1213 li r26,1 // r26 <- 4K-page count
1214 mr r27,r31 // r27 <- byte count
1215 rlwinm r3,r30,0,20,31 // does destination cross a page boundary?
1216 subfic r3,r3,4096 //
1217 cmplw r3,r27 //
1218 blt copypv_modnox // skip if not crossing case
1219 subf r27,r3,r27 // r27 <- byte count less initial fragment
1220 addi r26,r26,1 // increment page count
1221copypv_modnox:
1222 srdi r3,r27,12 // pages to update (not including crosser)
1223 add r26,r26,r3 // add in crosser
1224 srdi r27,r30,12 // r27 <- destination page number
1225copypv_modloop:
1226 mr r3,r27 // r3 <- destination page number
1227 la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex
1228 bl EXT(mapping_phys_lookup) // see if page is really there
1229 mr. r3,r3 // is it?
1230 beq-- copypv_modend // nope, break out of modify loop
1231 mr r3,r27 // r3 <- destination page number
1232 bl EXT(mapping_set_mod) // set page changed status
1233 subi r26,r26,1 // decrement page count
1234 cmpwi r26,0 // done yet?
1235 bgt copypv_modloop // nope, iterate
1236copypv_modend:
1237 ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29
1238copypv_nomod:
1239 bt pvNoRefSrc,copypv_done // skip source update if not requested
1240copypv_debugref:
1241 li r26,1 // r26 <- 4K-page count
1242 mr r27,r31 // r27 <- byte count
1243 rlwinm r3,r29,0,20,31 // does source cross a page boundary?
1244 subfic r3,r3,4096 //
1245 cmplw r3,r27 //
1246 blt copypv_refnox // skip if not crossing case
1247 subf r27,r3,r27 // r27 <- byte count less initial fragment
1248 addi r26,r26,1 // increment page count
1249copypv_refnox:
1250 srdi r3,r27,12 // pages to update (not including crosser)
1251 add r26,r26,r3 // add in crosser
1252 srdi r27,r29,12 // r27 <- source page number
1253copypv_refloop:
1254 mr r3,r27 // r3 <- source page number
1255 la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex
1256 bl EXT(mapping_phys_lookup) // see if page is really there
1257 mr. r3,r3 // is it?
1258 beq-- copypv_done // nope, break out of modify loop
1259 mr r3,r27 // r3 <- source page number
1260 bl EXT(mapping_set_ref) // set page referenced status
1261 subi r26,r26,1 // decrement page count
1262 cmpwi r26,0 // done yet?
1263 bgt copypv_refloop // nope, iterate
1264
1265// Return, indicating success.
1266copypv_done:
1267copypv_zero:
1268 li r3,0 // our efforts were crowned with success
1269
1270// Pop frame, restore caller's non-volatiles, clear recovery routine pointer.
1271copypv_return:
1272 mfsprg r9,1 // get current threads stuff
1273 lwz r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
1274 // get return address
1275 lwz r4,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
1276 // get non-volatile cr2 and cr3
1277 lwz r26,FM_ARG0+0x00(r1) // restore non-volatile r26
1278 lwz r27,FM_ARG0+0x04(r1) // restore non-volatile r27
1279 mtlr r0 // restore return address
1280 lwz r28,FM_ARG0+0x08(r1) // restore non-volatile r28
1281 mtcrf 0x20,r4 // restore non-volatile cr2
1282 mtcrf 0x10,r4 // restore non-volatile cr3
1283 lwz r11,FM_ARG0+0x20(r1) // save error callback
1284 lwz r29,FM_ARG0+0x0C(r1) // restore non-volatile r29
1285 lwz r30,FM_ARG0+0x10(r1) // restore non-volatile r30
1286 lwz r31,FM_ARG0+0x14(r1) // restore non-volatile r31
1287 stw r11,THREAD_RECOVER(r9) // restore our error callback
1288 lwz r1,0(r1) // release stack frame
1289
1290 blr // y'all come back now
1291
1292// Invalid argument handler.
1293copypv_einval:
1294 li r3,EINVAL // invalid argument
1295 b copypv_return // return
1296
1297// Error encountered during bcopy or bcopy_nc.
1298copypv_error:
1299 mfmsr r3 // get current msr
1300 rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy
1301 mtmsrd r3 // leave 64-bit mode
1302 li r3,EFAULT // it was all his fault
1303 b copypv_return // return