]> git.saurik.com Git - apple/xnu.git/blame - osfmk/ppc/movc.s
xnu-792.13.8.tar.gz
[apple/xnu.git] / osfmk / ppc / movc.s
CommitLineData
1c79356b 1/*
55e303ae 2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
1c79356b 3 *
8ad349bb 4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
1c79356b 5 *
8ad349bb
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
1c79356b
A
29 */
30/*
31 * @OSF_COPYRIGHT@
32 */
33#include <debug.h>
34#include <ppc/asm.h>
35#include <ppc/proc_reg.h>
36#include <mach/ppc/vm_param.h>
37#include <assym.s>
38#include <sys/errno.h>
39
55e303ae
A
40#define INSTRUMENT 0
41
42//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
1c79356b
A
43/*
44 * void pmap_zero_page(vm_offset_t pa)
45 *
55e303ae
A
46 * Zero a page of physical memory. This routine runs in 32 or 64-bit mode,
47 * and handles 32 and 128-byte cache lines.
1c79356b
A
48 */
49
1c79356b 50
55e303ae
A
51 .align 5
52 .globl EXT(pmap_zero_page)
53
54LEXT(pmap_zero_page)
55
56 mflr r12 // save return address
57 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
58 mtlr r12 // restore return address
59 andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size
d7e50217 60
55e303ae
A
61 subfic r4,r9,PPC_PGBYTES // r4 <- starting offset in page
62
63 bt++ pf64Bitb,page0S4 // Go do the big guys...
64
65 slwi r3,r3,12 // get page address from page num
66 b page_zero_1 // Jump to line aligned loop...
67
68 .align 5
69
70 nop
71 nop
72 nop
73 nop
74 nop
75 nop
76 nop
77
78page0S4:
79 sldi r3,r3,12 // get page address from page num
80
81page_zero_1: // loop zeroing cache lines
82 sub. r5,r4,r9 // more to go?
83 dcbz128 r3,r4 // zero either 32 or 128 bytes
84 sub r4,r5,r9 // generate next offset
85 dcbz128 r3,r5
86 bne-- page_zero_1
87
88 b EXT(ml_restore) // restore MSR and do the isync
89
90
91//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
1c79356b
A
92/* void
93 * phys_copy(src, dst, bytecount)
55e303ae
A
94 * addr64_t src;
95 * addr64_t dst;
1c79356b
A
96 * int bytecount
97 *
98 * This routine will copy bytecount bytes from physical address src to physical
55e303ae
A
99 * address dst. It runs in 64-bit mode if necessary, but does not handle
100 * overlap or make any attempt to be optimal. Length must be a signed word.
101 * Not performance critical.
1c79356b
A
102 */
103
1c79356b 104
55e303ae
A
105 .align 5
106 .globl EXT(phys_copy)
107
108LEXT(phys_copy)
109
110 rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg
111 mflr r12 // get return address
112 rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits
113 rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg
114 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
115 rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits
116 mtlr r12 // restore return address
117 subic. r5,r7,4 // a word to copy?
118 b phys_copy_2
119
120 .align 5
121
122phys_copy_1: // loop copying words
123 subic. r5,r5,4 // more to go?
124 lwz r0,0(r3)
125 addi r3,r3,4
126 stw r0,0(r4)
127 addi r4,r4,4
128phys_copy_2:
129 bge phys_copy_1
130 addic. r5,r5,4 // restore count
131 ble phys_copy_4 // no more
132
133 // Loop is aligned here
134
135phys_copy_3: // loop copying bytes
136 subic. r5,r5,1 // more to go?
137 lbz r0,0(r3)
138 addi r3,r3,1
139 stb r0,0(r4)
140 addi r4,r4,1
141 bgt phys_copy_3
142phys_copy_4:
143 b EXT(ml_restore) // restore MSR and do the isync
144
145
146//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
1c79356b
A
147/* void
148 * pmap_copy_page(src, dst)
55e303ae
A
149 * ppnum_t src;
150 * ppnum_t dst;
1c79356b
A
151 *
152 * This routine will copy the physical page src to physical page dst
153 *
55e303ae
A
154 * This routine assumes that the src and dst are page numbers and that the
155 * destination is cached. It runs on 32 and 64 bit processors, with and
156 * without altivec, and with 32 and 128 byte cache lines.
157 * We also must assume that no-one will be executing within the destination
158 * page, and that this will be used for paging. Because this
159 * is a common routine, we have tuned loops for each processor class.
1c79356b
A
160 *
161 */
55e303ae 162#define kSFSize (FM_SIZE+160)
1c79356b 163
1c79356b 164ENTRY(pmap_copy_page, TAG_NO_FRAME_USED)
1c79356b 165
55e303ae
A
166 lis r2,hi16(MASK(MSR_VEC)) ; Get the vector flag
167 mflr r0 // get return
168 ori r2,r2,lo16(MASK(MSR_FP)) ; Add the FP flag
169 stw r0,8(r1) // save
170 stwu r1,-kSFSize(r1) // set up a stack frame for VRs or FPRs
171 mfmsr r11 // save MSR at entry
172 mfsprg r10,2 // get feature flags
173 andc r11,r11,r2 // Clear out vec and fp
174 ori r2,r2,lo16(MASK(MSR_EE)) // Get EE on also
175 andc r2,r11,r2 // Clear out EE as well
176 mtcrf 0x02,r10 // we need to test pf64Bit
177 ori r2,r2,MASK(MSR_FP) // must enable FP for G3...
178 mtcrf 0x80,r10 // we need to test pfAltivec too
179 oris r2,r2,hi16(MASK(MSR_VEC)) // enable altivec for G4 (ignored if G3)
180 mtmsr r2 // turn EE off, FP and VEC on
181 isync
182 bt++ pf64Bitb,pmap_copy_64 // skip if 64-bit processor (only they take hint)
183 slwi r3,r3,12 // get page address from page num
184 slwi r4,r4,12 // get page address from page num
185 rlwinm r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1 // get ready to turn off DR
186 bt pfAltivecb,pmap_copy_g4 // altivec but not 64-bit means G4
187
188
189 // G3 -- copy using FPRs
190
191 stfd f0,FM_SIZE+0(r1) // save the 4 FPRs we use to copy
192 stfd f1,FM_SIZE+8(r1)
193 li r5,PPC_PGBYTES/32 // count of cache lines in a page
194 stfd f2,FM_SIZE+16(r1)
195 mtctr r5
196 stfd f3,FM_SIZE+24(r1)
197 mtmsr r12 // turn off DR after saving FPRs on stack
198 isync
199
200pmap_g3_copy_loop: // loop over 32-byte cache lines
201 dcbz 0,r4 // avoid read of dest line
202 lfd f0,0(r3)
203 lfd f1,8(r3)
204 lfd f2,16(r3)
205 lfd f3,24(r3)
206 addi r3,r3,32
207 stfd f0,0(r4)
208 stfd f1,8(r4)
209 stfd f2,16(r4)
210 stfd f3,24(r4)
211 dcbst 0,r4 // flush dest line to RAM
212 addi r4,r4,32
213 bdnz pmap_g3_copy_loop
214
215 sync // wait for stores to take
216 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
217 li r6,PPC_PGBYTES-32 // point to last line in page
218pmap_g3_icache_flush:
219 subic. r5,r6,32 // more to go?
220 icbi r4,r6 // flush another line in icache
221 subi r6,r5,32 // get offset to next line
222 icbi r4,r5
223 bne pmap_g3_icache_flush
224
225 sync
226 mtmsr r2 // turn DR back on
227 isync
228 lfd f0,FM_SIZE+0(r1) // restore the FPRs
229 lfd f1,FM_SIZE+8(r1)
230 lfd f2,FM_SIZE+16(r1)
231 lfd f3,FM_SIZE+24(r1)
232
233 b pmap_g4_restore // restore MSR and done
234
235
236 // G4 -- copy using VRs
237
238pmap_copy_g4: // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR
239 la r9,FM_SIZE+16(r1) // place where we save VRs to r9
240 li r5,16 // load x-form offsets into r5-r9
241 li r6,32 // another offset
242 stvx v0,0,r9 // save some VRs so we can use to copy
243 li r7,48 // another offset
244 stvx v1,r5,r9
245 li r0,PPC_PGBYTES/64 // we loop over 64-byte chunks
246 stvx v2,r6,r9
247 mtctr r0
248 li r8,96 // get look-ahead for touch
249 stvx v3,r7,r9
250 li r9,128
251 mtmsr r12 // now we've saved VRs on stack, turn off DR
252 isync // wait for it to happen
253 b pmap_g4_copy_loop
254
255 .align 5 // align inner loops
256pmap_g4_copy_loop: // loop over 64-byte chunks
257 dcbt r3,r8 // touch 3 lines ahead
258 nop // avoid a 17-word loop...
259 dcbt r3,r9 // touch 4 lines ahead
260 nop // more padding
261 dcba 0,r4 // avoid pre-fetch of 1st dest line
262 lvx v0,0,r3 // offset 0
263 lvx v1,r5,r3 // offset 16
264 lvx v2,r6,r3 // offset 32
265 lvx v3,r7,r3 // offset 48
266 addi r3,r3,64
267 dcba r6,r4 // avoid pre-fetch of 2nd line
268 stvx v0,0,r4 // offset 0
269 stvx v1,r5,r4 // offset 16
270 stvx v2,r6,r4 // offset 32
271 stvx v3,r7,r4 // offset 48
272 dcbf 0,r4 // push line 1
273 dcbf r6,r4 // and line 2
274 addi r4,r4,64
275 bdnz pmap_g4_copy_loop
276
277 sync // wait for stores to take
278 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
279 li r8,PPC_PGBYTES-32 // point to last line in page
280pmap_g4_icache_flush:
281 subic. r9,r8,32 // more to go?
282 icbi r4,r8 // flush from icache
283 subi r8,r9,32 // get offset to next line
284 icbi r4,r9
285 bne pmap_g4_icache_flush
286
287 sync
288 mtmsr r2 // turn DR back on
289 isync
290 la r9,FM_SIZE+16(r1) // get base of VR save area
291 lvx v0,0,r9 // restore the VRs
292 lvx v1,r5,r9
293 lvx v2,r6,r9
294 lvx v3,r7,r9
295
296pmap_g4_restore: // r11=MSR
297 mtmsr r11 // turn EE on, VEC and FR off
298 isync // wait for it to happen
299 addi r1,r1,kSFSize // pop off our stack frame
300 lwz r0,8(r1) // restore return address
301 mtlr r0
302 blr
303
304
305 // 64-bit/128-byte processor: copy using VRs
306
307pmap_copy_64: // r10=features, r11=old MSR
308 sldi r3,r3,12 // get page address from page num
309 sldi r4,r4,12 // get page address from page num
310 la r9,FM_SIZE+16(r1) // get base of VR save area
311 li r5,16 // load x-form offsets into r5-r9
312 li r6,32 // another offset
313 bf pfAltivecb,pmap_novmx_copy // altivec suppressed...
314 stvx v0,0,r9 // save 8 VRs so we can copy wo bubbles
315 stvx v1,r5,r9
316 li r7,48 // another offset
317 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
318 stvx v2,r6,r9
319 stvx v3,r7,r9
320 addi r9,r9,64 // advance base ptr so we can store another 4
321 mtctr r0
322 li r0,MASK(MSR_DR) // get DR bit
323 stvx v4,0,r9
324 stvx v5,r5,r9
325 andc r12,r2,r0 // turn off DR bit
326 li r0,1 // get a 1 to slam into SF
327 stvx v6,r6,r9
328 stvx v7,r7,r9
329 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
330 li r8,-128 // offset so we can reach back one line
331 mtmsrd r12 // now we've saved VRs, turn DR off and SF on
332 isync // wait for it to happen
333 dcbt128 0,r3,1 // start a forward stream
334 b pmap_64_copy_loop
335
336 .align 5 // align inner loops
337pmap_64_copy_loop: // loop over 128-byte chunks
338 dcbz128 0,r4 // avoid read of destination line
339 lvx v0,0,r3 // offset 0
340 lvx v1,r5,r3 // offset 16
341 lvx v2,r6,r3 // offset 32
342 lvx v3,r7,r3 // offset 48
343 addi r3,r3,64 // don't have enough GPRs so add 64 2x
344 lvx v4,0,r3 // offset 64
345 lvx v5,r5,r3 // offset 80
346 lvx v6,r6,r3 // offset 96
347 lvx v7,r7,r3 // offset 112
348 addi r3,r3,64
349 stvx v0,0,r4 // offset 0
350 stvx v1,r5,r4 // offset 16
351 stvx v2,r6,r4 // offset 32
352 stvx v3,r7,r4 // offset 48
353 addi r4,r4,64
354 stvx v4,0,r4 // offset 64
355 stvx v5,r5,r4 // offset 80
356 stvx v6,r6,r4 // offset 96
357 stvx v7,r7,r4 // offset 112
358 addi r4,r4,64
359 dcbf r8,r4 // flush the line we just wrote
360 bdnz pmap_64_copy_loop
361
362 sync // wait for stores to take
363 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
364 li r8,PPC_PGBYTES-128 // point to last line in page
365pmap_64_icache_flush:
366 subic. r9,r8,128 // more to go?
367 icbi r4,r8 // flush from icache
368 subi r8,r9,128 // get offset to next line
369 icbi r4,r9
370 bne pmap_64_icache_flush
371
372 sync
373 mtmsrd r2 // turn DR back on, SF off
374 isync
375 la r9,FM_SIZE+16(r1) // get base address of VR save area on stack
376 lvx v0,0,r9 // restore the VRs
377 lvx v1,r5,r9
378 lvx v2,r6,r9
379 lvx v3,r7,r9
380 addi r9,r9,64
381 lvx v4,0,r9
382 lvx v5,r5,r9
383 lvx v6,r6,r9
384 lvx v7,r7,r9
385
386 b pmap_g4_restore // restore lower half of MSR and return
387
388 //
389 // Copy on 64-bit without VMX
390 //
391
392pmap_novmx_copy:
393 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
394 mtctr r0
395 li r0,MASK(MSR_DR) // get DR bit
396 andc r12,r2,r0 // turn off DR bit
397 li r0,1 // get a 1 to slam into SF
398 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
399 mtmsrd r12 // now we've saved VRs, turn DR off and SF on
400 isync // wait for it to happen
401 dcbt128 0,r3,1 // start a forward stream
402
403pmap_novmx_copy_loop: // loop over 128-byte cache lines
404 dcbz128 0,r4 // avoid read of dest line
405
406 ld r0,0(r3) // Load half a line
407 ld r12,8(r3)
408 ld r5,16(r3)
409 ld r6,24(r3)
410 ld r7,32(r3)
411 ld r8,40(r3)
412 ld r9,48(r3)
413 ld r10,56(r3)
414
415 std r0,0(r4) // Store half a line
416 std r12,8(r4)
417 std r5,16(r4)
418 std r6,24(r4)
419 std r7,32(r4)
420 std r8,40(r4)
421 std r9,48(r4)
422 std r10,56(r4)
423
424 ld r0,64(r3) // Load half a line
425 ld r12,72(r3)
426 ld r5,80(r3)
427 ld r6,88(r3)
428 ld r7,96(r3)
429 ld r8,104(r3)
430 ld r9,112(r3)
431 ld r10,120(r3)
432
433 addi r3,r3,128
434
435 std r0,64(r4) // Store half a line
436 std r12,72(r4)
437 std r5,80(r4)
438 std r6,88(r4)
439 std r7,96(r4)
440 std r8,104(r4)
441 std r9,112(r4)
442 std r10,120(r4)
443
444 dcbf 0,r4 // flush the line we just wrote
445 addi r4,r4,128
446 bdnz pmap_novmx_copy_loop
447
448 sync // wait for stores to take
449 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
450 li r8,PPC_PGBYTES-128 // point to last line in page
451
452pmap_novmx_icache_flush:
453 subic. r9,r8,128 // more to go?
454 icbi r4,r8 // flush from icache
455 subi r8,r9,128 // get offset to next line
456 icbi r4,r9
457 bne pmap_novmx_icache_flush
458
459 sync
460 mtmsrd r2 // turn DR back on, SF off
461 isync
462
463 b pmap_g4_restore // restore lower half of MSR and return
464
465
466
467//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
de355530 468
55e303ae
A
469// Stack frame format used by copyin, copyout, copyinstr and copyoutstr.
470// These routines all run both on 32 and 64-bit machines, though because they are called
471// by the BSD kernel they are always in 32-bit mode when entered. The mapped ptr returned
91447636 472// by MapUserMemoryWindow will be 64 bits however on 64-bit machines. Beware to avoid
55e303ae
A
473// using compare instructions on this ptr. This mapped ptr is kept globally in r31, so there
474// is no need to store or load it, which are mode-dependent operations since it could be
475// 32 or 64 bits.
476
477#define kkFrameSize (FM_SIZE+32)
478
479#define kkBufSize (FM_SIZE+0)
91447636 480#define kkCR3 (FM_SIZE+4)
55e303ae
A
481#define kkSource (FM_SIZE+8)
482#define kkDest (FM_SIZE+12)
483#define kkCountPtr (FM_SIZE+16)
484#define kkR31Save (FM_SIZE+20)
91447636 485#define kkThrErrJmp (FM_SIZE+24)
55e303ae
A
486
487
488// nonvolatile CR bits we use as flags in cr3
489
490#define kk64bit 12
491#define kkNull 13
492#define kkIn 14
493#define kkString 15
494#define kkZero 15
495
496
497//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
1c79356b 498/*
d7e50217 499 * int
55e303ae 500 * copyoutstr(src, dst, maxcount, count)
91447636
A
501 * vm_offset_t src; // r3
502 * addr64_t dst; // r4 and r5
503 * vm_size_t maxcount; // r6
504 * vm_size_t* count; // r7
de355530 505 *
55e303ae 506 * Set *count to the number of bytes copied.
de355530
A
507 */
508
55e303ae 509ENTRY(copyoutstr, TAG_NO_FRAME_USED)
91447636
A
510 mfcr r2,0x10 // save caller's cr3, which we use for flags
511 mr r10,r4 // move high word of 64-bit user address to r10
55e303ae
A
512 li r0,0
513 crset kkString // flag as a string op
91447636
A
514 mr r11,r5 // move low word of 64-bit user address to r11
515 stw r0,0(r7) // initialize #bytes moved
55e303ae
A
516 crclr kkIn // flag as copyout
517 b copyJoin
de355530 518
de355530 519
55e303ae 520//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
de355530 521/*
55e303ae
A
522 * int
523 * copyinstr(src, dst, maxcount, count)
91447636
A
524 * addr64_t src; // r3 and r4
525 * vm_offset_t dst; // r5
526 * vm_size_t maxcount; // r6
527 * vm_size_t* count; // r7
1c79356b
A
528 *
529 * Set *count to the number of bytes copied
1c79356b
A
530 * If dst == NULL, don't copy, just count bytes.
531 * Only currently called from klcopyinstr.
532 */
533
534ENTRY(copyinstr, TAG_NO_FRAME_USED)
91447636
A
535 mfcr r2,0x10 // save caller's cr3, which we use for flags
536 cmplwi r5,0 // dst==NULL?
537 mr r10,r3 // move high word of 64-bit user address to r10
55e303ae
A
538 li r0,0
539 crset kkString // flag as a string op
91447636 540 mr r11,r4 // move low word of 64-bit user address to r11
55e303ae 541 crmove kkNull,cr0_eq // remember if (dst==NULL)
91447636 542 stw r0,0(r7) // initialize #bytes moved
55e303ae
A
543 crset kkIn // flag as copyin (rather than copyout)
544 b copyJoin1 // skip over the "crclr kkNull"
545
546
547//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
548/*
549 * int
550 * copyout(src, dst, count)
91447636
A
551 * vm_offset_t src; // r3
552 * addr64_t dst; // r4 and r5
553 * size_t count; // r6
1c79356b 554 */
1c79356b 555
55e303ae
A
556 .align 5
557 .globl EXT(copyout)
558 .globl EXT(copyoutmsg)
559
560LEXT(copyout)
561LEXT(copyoutmsg)
562
563#if INSTRUMENT
91447636
A
564 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout
565 stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it
566 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
567 stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it
568 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
569 stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it
570 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
571 stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it
55e303ae 572#endif
91447636
A
573 mfcr r2,0x10 // save caller's cr3, which we use for flags
574 mr r10,r4 // move high word of 64-bit user address to r10
55e303ae 575 crclr kkString // not a string version
91447636 576 mr r11,r5 // move low word of 64-bit user address to r11
55e303ae
A
577 crclr kkIn // flag as copyout
578 b copyJoin
579
580
581//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
582/*
583 * int
584 * copyin(src, dst, count)
91447636
A
585 * addr64_t src; // r3 and r4
586 * vm_offset_t dst; // r5
587 * size_t count; // r6
de355530 588 */
1c79356b 589
1c79356b 590
55e303ae
A
591 .align 5
592 .globl EXT(copyin)
593 .globl EXT(copyinmsg)
594
595LEXT(copyin)
596LEXT(copyinmsg)
597
91447636
A
598 mfcr r2,0x10 // save caller's cr3, which we use for flags
599 mr r10,r3 // move high word of 64-bit user address to r10
55e303ae 600 crclr kkString // not a string version
91447636 601 mr r11,r4 // move low word of 64-bit user address to r11
55e303ae
A
602 crset kkIn // flag as copyin
603
604
605// Common code to handle setup for all the copy variants:
91447636
A
606// r2 = caller's cr3
607// r3 = source if copyout
608// r5 = dest if copyin
609// r6 = buffer length or count
610// r7 = count output ptr (if kkString set)
611// r10 = high word of 64-bit user-space address (source if copyin, dest if copyout)
612// r11 = low word of 64-bit user-space address
55e303ae
A
613// cr3 = kkIn, kkString, kkNull flags
614
615copyJoin:
616 crclr kkNull // (dst==NULL) convention not used with this call
617copyJoin1: // enter from copyinstr with kkNull set
618 mflr r0 // get return address
91447636 619 cmplwi r6,0 // buffer length 0?
55e303ae
A
620 lis r9,0x1000 // r9 <- 0x10000000 (256MB)
621 stw r0,FM_LR_SAVE(r1) // save return
91447636 622 cmplw cr1,r6,r9 // buffer length > 256MB ?
55e303ae
A
623 mfsprg r8,2 // get the features
624 beq-- copyinout_0 // 0 length is degenerate case
625 stwu r1,-kkFrameSize(r1) // set up stack frame
91447636 626 stw r2,kkCR3(r1) // save caller's cr3, which we use for flags
55e303ae 627 mtcrf 0x02,r8 // move pf64Bit to cr6
91447636
A
628 stw r3,kkSource(r1) // save args across MapUserMemoryWindow
629 stw r5,kkDest(r1)
630 stw r6,kkBufSize(r1)
55e303ae 631 crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor
91447636 632 stw r7,kkCountPtr(r1)
55e303ae
A
633 stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr
634 li r31,0 // no mapped ptr yet
635
636
637// Handle buffer length > 256MB. This is an error (ENAMETOOLONG) on copyin and copyout.
638// The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp
639// the buffer length to 256MB. This isn't an issue if the string is less than 256MB
640// (as most are!), but if they are >256MB we eventually return ENAMETOOLONG. This restriction
91447636 641// is due to MapUserMemoryWindow; we don't want to consume more than two segments for
55e303ae
A
642// the mapping.
643
644 ble++ cr1,copyin0 // skip if buffer length <= 256MB
645 bf kkString,copyinout_too_big // error if not string op
91447636 646 mr r6,r9 // silently clamp buffer length to 256MB
55e303ae
A
647 stw r9,kkBufSize(r1) // update saved copy too
648
649
650// Set up thread_recover in case we hit an illegal address.
651
652copyin0:
91447636 653 mfsprg r8,1 // Get the current thread
55e303ae 654 lis r2,hi16(copyinout_error)
55e303ae 655 ori r2,r2,lo16(copyinout_error)
91447636 656 lwz r4,THREAD_RECOVER(r8)
55e303ae 657 lwz r3,ACT_VMMAP(r8) // r3 <- vm_map virtual address
91447636
A
658 stw r2,THREAD_RECOVER(r8)
659 stw r4,kkThrErrJmp(r1)
55e303ae
A
660
661
91447636 662// Map user segment into kernel map, turn on 64-bit mode. At this point:
55e303ae 663// r3 = vm map
91447636
A
664// r6 = buffer length
665// r10/r11 = 64-bit user-space ptr (source if copyin, dest if copyout)
666//
667// When we call MapUserMemoryWindow, we pass:
668// r3 = vm map ptr
669// r4/r5 = 64-bit user space address as an addr64_t
55e303ae 670
91447636
A
671 mr r4,r10 // copy user ptr into r4/r5
672 mr r5,r11
55e303ae 673#if INSTRUMENT
91447636
A
674 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace
675 stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it
676 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
677 stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it
678 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
679 stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it
680 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
681 stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it
55e303ae 682#endif
91447636 683 bl EXT(MapUserMemoryWindow) // get r3/r4 <- 64-bit address in kernel map of user operand
55e303ae 684#if INSTRUMENT
91447636
A
685 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace
686 stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it
687 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
688 stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it
689 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
690 stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it
691 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
692 stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it
55e303ae 693#endif
55e303ae 694 mr r31,r4 // r31 <- mapped ptr into user space (may be 64-bit)
55e303ae
A
695 bf-- kk64bit,copyin1 // skip if a 32-bit processor
696
697 rldimi r31,r3,32,0 // slam high-order bits into mapped ptr
698 mfmsr r4 // if 64-bit, turn on SF so we can use returned ptr
699 li r0,1
700 rldimi r4,r0,63,MSR_SF_BIT // light bit 0
701 mtmsrd r4 // turn on 64-bit mode
702 isync // wait for mode to change
703
704
705// Load r3-r5, substituting mapped ptr as appropriate.
706
707copyin1:
708 lwz r5,kkBufSize(r1) // restore length to copy
709 bf kkIn,copyin2 // skip if copyout
91447636 710 lwz r4,kkDest(r1) // copyin: dest is kernel ptr
55e303ae
A
711 mr r3,r31 // source is mapped ptr
712 b copyin3
713copyin2: // handle copyout
714 lwz r3,kkSource(r1) // source is kernel buffer (r3 at entry)
715 mr r4,r31 // dest is mapped ptr into user space
716
717
718// Finally, all set up to copy:
719// r3 = source ptr (mapped if copyin)
720// r4 = dest ptr (mapped if copyout)
721// r5 = length
91447636 722// r31 = mapped ptr returned by MapUserMemoryWindow
55e303ae
A
723// cr3 = kkIn, kkString, kk64bit, and kkNull flags
724
725copyin3:
726 bt kkString,copyString // handle copyinstr and copyoutstr
727 bl EXT(bcopy) // copyin and copyout: let bcopy do the work
728 li r3,0 // return success
729
730
731// Main exit point for copyin, copyout, copyinstr, and copyoutstr. Also reached
732// from error recovery if we get a DSI accessing user space. Clear recovery ptr,
91447636 733// and pop off frame.
55e303ae
A
734// r3 = 0, EFAULT, or ENAMETOOLONG
735
736copyinx:
91447636
A
737 lwz r2,kkCR3(r1) // get callers cr3
738 mfsprg r6,1 // Get the current thread
55e303ae
A
739 bf-- kk64bit,copyinx1 // skip if 32-bit processor
740 mfmsr r12
741 rldicl r12,r12,0,MSR_SF_BIT+1 // if 64-bit processor, turn 64-bit mode off
91447636 742 mtmsrd r12 // turn SF off
55e303ae
A
743 isync // wait for the mode to change
744copyinx1:
91447636 745 lwz r0,FM_LR_SAVE+kkFrameSize(r1) // get return address
55e303ae 746 lwz r31,kkR31Save(r1) // restore callers r31
91447636 747 lwz r4,kkThrErrJmp(r1) // load saved thread recover
55e303ae 748 addi r1,r1,kkFrameSize // pop off our stack frame
55e303ae 749 mtlr r0
91447636 750 stw r4,THREAD_RECOVER(r6) // restore thread recover
55e303ae
A
751 mtcrf 0x10,r2 // restore cr3
752 blr
de355530 753
1c79356b 754
55e303ae
A
755/* We get here via the exception handler if an illegal
756 * user memory reference was made. This error handler is used by
757 * copyin, copyout, copyinstr, and copyoutstr. Registers are as
758 * they were at point of fault, so for example cr3 flags are valid.
de355530 759 */
d7e50217 760
55e303ae
A
761copyinout_error:
762 li r3,EFAULT // return error
763 b copyinx
764
765copyinout_0: // degenerate case: 0-length copy
766 mtcrf 0x10,r2 // restore cr3
767 li r3,0 // return success
768 blr
769
770copyinout_too_big: // degenerate case
771 mtcrf 0x10,r2 // restore cr3
772 lwz r1,0(r1) // pop off stack frame
773 li r3,ENAMETOOLONG
774 blr
775
776
777//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
778// Handle copyinstr and copyoutstr. At this point the stack frame is set up,
779// the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode
780// if necessary, and:
781// r3 = source ptr, mapped if copyinstr
782// r4 = dest ptr, mapped if copyoutstr
783// r5 = buffer length
91447636 784// r31 = mapped ptr returned by MapUserMemoryWindow
55e303ae
A
785// cr3 = kkIn, kkString, kkNull, and kk64bit flags
786// We do word copies unless the buffer is very short, then use a byte copy loop
91447636
A
787// for the leftovers if necessary. The crossover at which the word loop becomes
788// faster is about seven bytes, counting the zero.
789//
790// We first must word-align the source ptr, in order to avoid taking a spurious
791// page fault.
55e303ae
A
792
793copyString:
91447636
A
794 cmplwi cr1,r5,15 // is buffer very short?
795 mr r12,r3 // remember ptr to 1st source byte
55e303ae 796 mtctr r5 // assuming short, set up loop count for bytes
91447636
A
797 blt-- cr1,copyinstr8 // too short for word loop
798 rlwinm r2,r3,0,0x3 // get byte offset of 1st byte within word
799 rlwinm r9,r3,3,0x18 // get bit offset of 1st byte within word
800 li r7,-1
801 sub r3,r3,r2 // word-align source address
802 add r6,r5,r2 // get length starting at byte 0 in word
803 srw r7,r7,r9 // get mask for bytes in first word
804 srwi r0,r6,2 // get #words in buffer
805 lwz r5,0(r3) // get aligned word with first source byte
55e303ae
A
806 lis r10,hi16(0xFEFEFEFF) // load magic constants into r10 and r11
807 lis r11,hi16(0x80808080)
91447636
A
808 mtctr r0 // set up word loop count
809 addi r3,r3,4 // advance past the source word
55e303ae
A
810 ori r10,r10,lo16(0xFEFEFEFF)
811 ori r11,r11,lo16(0x80808080)
91447636
A
812 orc r8,r5,r7 // map bytes preceeding first source byte into 0xFF
813 bt-- kkNull,copyinstr5enter // enter loop that just counts
814
815// Special case 1st word, which has been 0xFF filled on left. Note that we use
816// "and.", even though we execute both in 32 and 64-bit mode. This is OK.
817
818 slw r5,r5,r9 // left justify payload bytes
819 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
820 andc r7,r11,r8 // r7 = ~data & 0x80808080
821 subfic r0,r2,4 // get r0 <- #payload bytes in 1st word
822 and. r7,r9,r7 // if r7==0, then all bytes in r8 are nonzero
823 stw r5,0(r4) // copy payload bytes to dest buffer
824 add r4,r4,r0 // then point to next byte in dest buffer
825 bdnzt cr0_eq,copyinstr6 // use loop that copies if 0 not found
826
827 b copyinstr7 // 0 found (buffer can't be full)
55e303ae
A
828
829
830// Word loop(s). They do a word-parallel search for 0s, using the following
831// inobvious but very efficient test:
832// y = data + 0xFEFEFEFF
833// z = ~data & 0x80808080
91447636
A
834// If (y & z)==0, then all bytes in dataword are nonzero. There are two copies
835// of this loop, one that just counts and another that copies.
836// r3 = ptr to next word of source (word aligned)
837// r4 = ptr to next byte in buffer
838// r6 = original buffer length (adjusted to be word origin)
839// r10 = 0xFEFEFEFE
840// r11 = 0x80808080
841// r12 = ptr to 1st source byte (used to determine string length)
55e303ae
A
842
843 .align 5 // align inner loops for speed
844copyinstr5: // version that counts but does not copy
91447636
A
845 lwz r8,0(r3) // get next word of source
846 addi r3,r3,4 // advance past it
847copyinstr5enter:
55e303ae
A
848 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
849 andc r7,r11,r8 // r7 = ~data & 0x80808080
91447636
A
850 and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode)
851 bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes in r8 are nonzero
55e303ae
A
852
853 b copyinstr7
854
855 .align 5 // align inner loops for speed
856copyinstr6: // version that counts and copies
91447636
A
857 lwz r8,0(r3) // get next word of source
858 addi r3,r3,4 // advance past it
55e303ae
A
859 addi r4,r4,4 // increment dest ptr while we wait for data
860 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
861 andc r7,r11,r8 // r7 = ~data & 0x80808080
91447636 862 and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode)
55e303ae
A
863 stw r8,-4(r4) // pack all 4 bytes into buffer
864 bdnzt cr0_eq,copyinstr6 // if r7==0, then all bytes are nonzero
865
866
867// Either 0 found or buffer filled. The above algorithm has mapped nonzero bytes to 0
868// and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also
869// mapped to 0x80. We must mask out these false hits before searching for an 0x80 byte.
91447636
A
870// r3 = word aligned ptr to next word of source (ie, r8==mem(r3-4))
871// r6 = original buffer length (adjusted to be word origin)
872// r7 = computed vector of 0x00 and 0x80 bytes
873// r8 = original source word, coming from -4(r3), possibly padded with 0xFFs on left if 1st word
874// r12 = ptr to 1st source byte (used to determine string length)
875// cr0 = beq set iff 0 not found
55e303ae
A
876
877copyinstr7:
55e303ae 878 rlwinm r2,r8,7,0,31 // move 0x01 bits to 0x80 position
91447636 879 rlwinm r6,r6,0,0x3 // mask down to partial byte count in last word
55e303ae 880 andc r7,r7,r2 // turn off false hits from 0x0100 worst case
91447636
A
881 crnot kkZero,cr0_eq // 0 found iff cr0_eq is off
882 srwi r7,r7,8 // we want to count the 0 as a byte xferred
883 cmpwi r6,0 // any bytes left over in last word?
55e303ae 884 cntlzw r7,r7 // now we can find the 0 byte (ie, the 0x80)
91447636 885 subi r3,r3,4 // back up r3 to point to 1st byte in r8
55e303ae 886 srwi r7,r7,3 // convert 8,16,24,32 to 1,2,3,4
91447636 887 add r3,r3,r7 // now r3 points one past 0 byte, or at 1st byte not xferred
55e303ae
A
888 bt++ kkZero,copyinstr10 // 0 found, so done
889
890 beq copyinstr10 // r6==0, so buffer truly full
891 mtctr r6 // 0 not found, loop over r6 bytes
892 b copyinstr8 // enter byte loop for last 1-3 leftover bytes
893
894
895// Byte loop. This is used for very small buffers and for the odd bytes left over
896// after searching and copying words at a time.
91447636
A
897// r3 = ptr to next byte of source
898// r4 = ptr to next dest byte
899// r12 = ptr to first byte of source
900// ctr = count of bytes to check
55e303ae
A
901
902 .align 5 // align inner loops for speed
903copyinstr8: // loop over bytes of source
904 lbz r0,0(r3) // get next byte of source
905 addi r3,r3,1
906 addi r4,r4,1 // increment dest addr whether we store or not
907 cmpwi r0,0 // the 0?
91447636 908 bt-- kkNull,copyinstr9 // don't store if copyinstr with NULL ptr
55e303ae
A
909 stb r0,-1(r4)
910copyinstr9:
911 bdnzf cr0_eq,copyinstr8 // loop if byte not 0 and more room in buffer
912
55e303ae
A
913 crmove kkZero,cr0_eq // remember if 0 found or buffer filled
914
915
916// Buffer filled or 0 found. Unwind and return.
91447636
A
917// r3 = ptr to 1st source byte not transferred
918// r12 = ptr to 1st source byte
919// r31 = mapped ptr returned by MapUserMemoryWindow
920// cr3 = kkZero set iff 0 found
55e303ae
A
921
922copyinstr10:
923 lwz r9,kkCountPtr(r1) // get ptr to place to store count of bytes moved
91447636
A
924 sub r2,r3,r12 // compute #bytes copied (including the 0)
925 li r3,0 // assume success return status
55e303ae
A
926 stw r2,0(r9) // store #bytes moved
927 bt++ kkZero,copyinx // we did find the 0 so return 0
928 li r3,ENAMETOOLONG // buffer filled
929 b copyinx // join main exit routine
930
91447636
A
931//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
932/*
933 * int
934 * copypv(source, sink, size, which)
935 * addr64_t src; // r3 and r4
936 * addr64_t dst; // r5 and r6
937 * size_t size; // r7
938 * int which; // r8
939 *
940 * Operand size bytes are copied from operand src into operand dst. The source and
941 * destination operand addresses are given as addr64_t, and may designate starting
942 * locations in physical or virtual memory in any combination except where both are
943 * virtual. Virtual memory locations may be in either the kernel or the current thread's
944 * address space. Operand size may be up to 256MB.
945 *
946 * Operation is controlled by operand which, which offers these options:
947 * cppvPsrc : source operand is (1) physical or (0) virtual
948 * cppvPsnk : destination operand is (1) physical or (0) virtual
949 * cppvKmap : virtual operand is in (1) kernel or (0) current thread
950 * cppvFsnk : (1) flush destination before and after transfer
951 * cppvFsrc : (1) flush source before and after transfer
952 * cppvNoModSnk : (1) don't set source operand's changed bit(s)
953 * cppvNoRefSrc : (1) don't set destination operand's referenced bit(s)
954 *
955 * Implementation is now split into this new 64-bit path and the old path, hw_copypv_32().
956 * This section describes the operation of the new 64-bit path.
957 *
958 * The 64-bit path utilizes the more capacious 64-bit kernel address space to create a
959 * window in the kernel address space into all of physical RAM plus the I/O hole. Since
960 * the window's mappings specify the proper access policies for the underlying memory,
961 * the new path does not have to flush caches to avoid a cache paradox, so cppvFsnk
962 * and cppvFsrc are ignored. Physical operand adresses are relocated into the physical
963 * memory window, and are accessed with data relocation on. Virtual addresses are either
964 * within the kernel, or are mapped into the kernel address space through the user memory
965 * window. Because accesses to a virtual operand are performed with data relocation on,
966 * the new path does not have to translate the address, disable/enable interrupts, lock
967 * the mapping, or update referenced and changed bits.
968 *
969 * The IBM 970 (a.k.a. G5) processor treats real-mode accesses as guarded, so there is
970 * a substantial performance penalty for copypv operating in real mode. Utilizing the
971 * new 64-bit path, transfer performance increases >100% on the G5.
972 *
973 * The attentive reader may notice that mtmsrd ops are not followed by isync ops as
974 * might be expected. The 970 follows PowerPC architecture version 2.01, which defines
975 * mtmsrd with L=0 as a context synchronizing op, so a following isync is no longer
976 * required.
977 *
978 * To keep things exciting, we develop 64-bit values in non-volatiles, but we also need
979 * to call 32-bit functions, which would lead to the high-order 32 bits of our values
980 * getting clobbered unless we do something special. So, we preserve our 64-bit non-volatiles
981 * in our own stack frame across calls to 32-bit functions.
982 *
983 */
55e303ae 984
91447636
A
985// Map operand which bits into non-volatile CR2 and CR3 bits.
986#define whichAlign ((3+1)*4)
987#define whichMask 0x007F0000
988#define pvPsnk (cppvPsnkb - whichAlign)
989#define pvPsrc (cppvPsrcb - whichAlign)
990#define pvFsnk (cppvFsnkb - whichAlign)
991#define pvFsrc (cppvFsrcb - whichAlign)
992#define pvNoModSnk (cppvNoModSnkb - whichAlign)
993#define pvNoRefSrc (cppvNoRefSrcb - whichAlign)
994#define pvKmap (cppvKmapb - whichAlign)
995#define pvNoCache cr2_lt
996
997 .align 5
998 .globl EXT(copypv)
999
1000LEXT(copypv)
1001 mfsprg r10,2 // get feature flags
1002 mtcrf 0x02,r10 // we need to test pf64Bit
1003 bt++ pf64Bitb,copypv_64 // skip if 64-bit processor (only they take hint)
1004
1005 b EXT(hw_copypv_32) // carry on with 32-bit copypv
1006
1007// Push a 32-bit ABI-compliant stack frame and preserve all non-volatiles that we'll clobber.
1008copypv_64:
1009 mfsprg r9,1 // get current thread
1010 stwu r1,-(FM_ALIGN((31-26+11)*4)+FM_SIZE)(r1)
1011 // allocate stack frame and link it
1012 mflr r0 // get return address
1013 mfcr r10 // get cr2 and cr3
1014 lwz r12,THREAD_RECOVER(r9) // get error callback
1015 stw r26,FM_ARG0+0x00(r1) // save non-volatile r26
1016 stw r27,FM_ARG0+0x04(r1) // save non-volatile r27
1017 stw r28,FM_ARG0+0x08(r1) // save non-volatile r28
1018 stw r29,FM_ARG0+0x0C(r1) // save non-volatile r29
1019 stw r30,FM_ARG0+0x10(r1) // save non-volatile r30
1020 stw r31,FM_ARG0+0x14(r1) // save non-volatile r31
1021 stw r12,FM_ARG0+0x20(r1) // save error callback
1022 stw r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
1023 // save return address
1024 stw r10,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
1025 // save non-volatile cr2 and cr3
1026
1027// Non-volatile register usage in this routine is:
1028// r26: saved msr image
1029// r27: current pmap_t / virtual source address
1030// r28: destination virtual address
1031// r29: source address
1032// r30: destination address
1033// r31: byte count to copy
1034// cr2/3: parameter 'which' bits
1035
1036 rlwinm r8,r8,whichAlign,whichMask // align and mask which bits
1037 mr r31,r7 // copy size to somewhere non-volatile
1038 mtcrf 0x20,r8 // insert which bits into cr2 and cr3
1039 mtcrf 0x10,r8 // insert which bits into cr2 and cr3
1040 rlwinm r29,r3,0,1,0 // form source address high-order bits
1041 rlwinm r30,r5,0,1,0 // form destination address high-order bits
1042 rlwimi r29,r4,0,0,31 // form source address low-order bits
1043 rlwimi r30,r6,0,0,31 // form destination address low-order bits
1044 crand cr7_lt,pvPsnk,pvPsrc // are both operand addresses physical?
1045 cntlzw r0,r31 // count leading zeroes in byte count
1046 cror cr7_eq,pvPsnk,pvPsrc // cr7_eq <- source or destination is physical
1047 bf-- cr7_eq,copypv_einval // both operands may not be virtual
1048 cmplwi r0,4 // byte count greater than or equal 256M (2**28)?
1049 blt-- copypv_einval // byte count too big, give EINVAL
1050 cmplwi r31,0 // byte count zero?
1051 beq-- copypv_zero // early out
1052 bt cr7_lt,copypv_phys // both operand addresses are physical
1053 mr r28,r30 // assume destination is virtual
1054 bf pvPsnk,copypv_dv // is destination virtual?
1055 mr r28,r29 // no, so source must be virtual
1056copypv_dv:
1057 lis r27,ha16(EXT(kernel_pmap)) // get kernel's pmap_t *, high-order
1058 lwz r27,lo16(EXT(kernel_pmap))(r27) // get kernel's pmap_t
1059 bt pvKmap,copypv_kern // virtual address in kernel map?
1060 lwz r3,ACT_VMMAP(r9) // get user's vm_map *
1061 rldicl r4,r28,32,32 // r4, r5 <- addr64_t virtual address
1062 rldicl r5,r28,0,32
1063 std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit call
1064 std r30,FM_ARG0+0x38(r1) // preserve 64-bit r30 across 32-bit call
1065 bl EXT(MapUserMemoryWindow) // map slice of user space into kernel space
1066 ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29
1067 ld r30,FM_ARG0+0x38(r1) // restore 64-bit r30
1068 rlwinm r28,r3,0,1,0 // convert relocated addr64_t virtual address
1069 rlwimi r28,r4,0,0,31 // into a single 64-bit scalar
1070copypv_kern:
1071
1072// Since we'll be accessing the virtual operand with data-relocation on, we won't need to
1073// update the referenced and changed bits manually after the copy. So, force the appropriate
1074// flag bit on for the virtual operand.
1075 crorc pvNoModSnk,pvNoModSnk,pvPsnk // for virtual dest, let hardware do ref/chg bits
1076 crorc pvNoRefSrc,pvNoRefSrc,pvPsrc // for virtual source, let hardware do ref bit
1077
1078// We'll be finding a mapping and looking at, so we need to disable 'rupts.
1079 lis r0,hi16(MASK(MSR_VEC)) // get vector mask
1080 ori r0,r0,lo16(MASK(MSR_FP)) // insert fp mask
1081 mfmsr r26 // save current msr
1082 andc r26,r26,r0 // turn off VEC and FP in saved copy
1083 ori r0,r0,lo16(MASK(MSR_EE)) // add EE to our mask
1084 andc r0,r26,r0 // disable EE in our new msr image
1085 mtmsrd r0 // introduce new msr image
1086
1087// We're now holding the virtual operand's pmap_t in r27 and its virtual address in r28. We now
1088// try to find a mapping corresponding to this address in order to determine whether the address
1089// is cacheable. If we don't find a mapping, we can safely assume that the operand is cacheable
1090// (a non-cacheable operand must be a block mapping, which will always exist); otherwise, we
1091// examine the mapping's caching-inhibited bit.
1092 mr r3,r27 // r3 <- pmap_t pmap
1093 rldicl r4,r28,32,32 // r4, r5 <- addr64_t va
1094 rldicl r5,r28,0,32
1095 la r6,FM_ARG0+0x18(r1) // r6 <- addr64_t *nextva
1096 li r7,1 // r7 <- int full, search nested mappings
1097 std r26,FM_ARG0+0x28(r1) // preserve 64-bit r26 across 32-bit calls
1098 std r28,FM_ARG0+0x30(r1) // preserve 64-bit r28 across 32-bit calls
1099 std r29,FM_ARG0+0x38(r1) // preserve 64-bit r29 across 32-bit calls
1100 std r30,FM_ARG0+0x40(r1) // preserve 64-bit r30 across 32-bit calls
1101 bl EXT(mapping_find) // find mapping for virtual operand
1102 mr. r3,r3 // did we find it?
1103 beq copypv_nomapping // nope, so we'll assume it's cacheable
1104 lwz r4,mpVAddr+4(r3) // get low half of virtual addr for hw flags
1105 rlwinm. r4,r4,0,mpIb-32,mpIb-32 // caching-inhibited bit set?
1106 crnot pvNoCache,cr0_eq // if it is, use bcopy_nc
1107 bl EXT(mapping_drop_busy) // drop busy on the mapping
1108copypv_nomapping:
1109 ld r26,FM_ARG0+0x28(r1) // restore 64-bit r26
1110 ld r28,FM_ARG0+0x30(r1) // restore 64-bit r28
1111 ld r29,FM_ARG0+0x38(r1) // restore 64-bit r29
1112 ld r30,FM_ARG0+0x40(r1) // restore 64-bit r30
1113 mtmsrd r26 // restore msr to it's previous state
1114
1115// Set both the source and destination virtual addresses to the virtual operand's address --
1116// we'll overlay one of them with the physical operand's address.
1117 mr r27,r28 // make virtual operand BOTH source AND destination
1118
1119// Now we're ready to relocate the physical operand address(es) into the physical memory window.
1120// Recall that we've mapped physical memory (including the I/O hole) into the kernel's address
1121// space somewhere at or over the 2**32 line. If one or both of the operands are in the I/O hole,
1122// we'll set the pvNoCache flag, forcing use of non-caching bcopy_nc() to do the copy.
1123copypv_phys:
1124 ld r6,lgPMWvaddr(0) // get physical memory window virtual address
1125 bf pvPsnk,copypv_dstvirt // is destination address virtual?
1126 cntlzd r4,r30 // count leading zeros in destination address
1127 cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
1128 cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations
1129 add r28,r30,r6 // relocate physical destination into physical window
1130copypv_dstvirt:
1131 bf pvPsrc,copypv_srcvirt // is source address virtual?
1132 cntlzd r4,r29 // count leading zeros in source address
1133 cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
1134 cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations
1135 add r27,r29,r6 // relocate physical source into physical window
1136copypv_srcvirt:
1137
1138// Once the copy is under way (bcopy or bcopy_nc), we will want to get control if anything
1139// funny happens during the copy. So, we set a pointer to our error handler in the per-thread
1140// control block.
1141 mfsprg r8,1 // get current threads stuff
1142 lis r3,hi16(copypv_error) // get our error callback's address, high
1143 ori r3,r3,lo16(copypv_error) // get our error callback's address, low
1144 stw r3,THREAD_RECOVER(r8) // set our error callback
1145
1146// Since our physical operand(s) are relocated at or above the 2**32 line, we must enter
1147// 64-bit mode.
1148 li r0,1 // get a handy one bit
1149 mfmsr r3 // get current msr
1150 rldimi r3,r0,63,MSR_SF_BIT // set SF bit on in our msr copy
1151 mtmsrd r3 // enter 64-bit mode
1152
1153// If requested, flush data cache
1154// Note that we don't flush, the code is being saved "just in case".
1155#if 0
1156 bf pvFsrc,copypv_nfs // do we flush the source?
1157 rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address
1158 rldicl r4,r27,0,32
1159 mr r5,r31 // r5 <- count (in bytes)
1160 li r6,0 // r6 <- boolean phys (false, not physical)
1161 bl EXT(flush_dcache) // flush the source operand
1162copypv_nfs:
1163 bf pvFsnk,copypv_nfdx // do we flush the destination?
1164 rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address
1165 rldicl r4,r28,0,32
1166 mr r5,r31 // r5 <- count (in bytes)
1167 li r6,0 // r6 <- boolean phys (false, not physical)
1168 bl EXT(flush_dcache) // flush the destination operand
1169copypv_nfdx:
1170#endif
1171
1172// Call bcopy or bcopy_nc to perform the copy.
1173 mr r3,r27 // r3 <- source virtual address
1174 mr r4,r28 // r4 <- destination virtual address
1175 mr r5,r31 // r5 <- bytes to copy
1176 bt pvNoCache,copypv_nc // take non-caching route
1177 bl EXT(bcopy) // call bcopy to do the copying
1178 b copypv_copydone
1179copypv_nc:
1180 bl EXT(bcopy_nc) // call bcopy_nc to do the copying
1181copypv_copydone:
1182
1183// If requested, flush data cache
1184// Note that we don't flush, the code is being saved "just in case".
1185#if 0
1186 bf pvFsrc,copypv_nfsx // do we flush the source?
1187 rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address
1188 rldicl r4,r27,0,32
1189 mr r5,r31 // r5 <- count (in bytes)
1190 li r6,0 // r6 <- boolean phys (false, not physical)
1191 bl EXT(flush_dcache) // flush the source operand
1192copypv_nfsx:
1193 bf pvFsnk,copypv_nfd // do we flush the destination?
1194 rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address
1195 rldicl r4,r28,0,32
1196 mr r5,r31 // r5 <- count (in bytes)
1197 li r6,0 // r6 <- boolean phys (false, not physical)
1198 bl EXT(flush_dcache) // flush the destination operand
1199copypv_nfd:
1200#endif
1201
1202// Leave 64-bit mode.
1203 mfmsr r3 // get current msr
1204 rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy
1205 mtmsrd r3 // leave 64-bit mode
1206
1207// If requested, set ref/chg on source/dest physical operand(s). It is possible that copy is
1208// from/to a RAM disk situated outside of mapped physical RAM, so we check each page by calling
1209// mapping_phys_lookup() before we try to set its ref/chg bits; otherwise, we might panic.
1210// Note that this code is page-size sensitive, so it should probably be a part of our low-level
1211// code in hw_vm.s.
1212 bt pvNoModSnk,copypv_nomod // skip destination update if not requested
1213 std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit calls
1214 li r26,1 // r26 <- 4K-page count
1215 mr r27,r31 // r27 <- byte count
1216 rlwinm r3,r30,0,20,31 // does destination cross a page boundary?
1217 subfic r3,r3,4096 //
1218 cmplw r3,r27 //
1219 blt copypv_modnox // skip if not crossing case
1220 subf r27,r3,r27 // r27 <- byte count less initial fragment
1221 addi r26,r26,1 // increment page count
1222copypv_modnox:
1223 srdi r3,r27,12 // pages to update (not including crosser)
1224 add r26,r26,r3 // add in crosser
1225 srdi r27,r30,12 // r27 <- destination page number
1226copypv_modloop:
1227 mr r3,r27 // r3 <- destination page number
1228 la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex
1229 bl EXT(mapping_phys_lookup) // see if page is really there
1230 mr. r3,r3 // is it?
1231 beq-- copypv_modend // nope, break out of modify loop
1232 mr r3,r27 // r3 <- destination page number
1233 bl EXT(mapping_set_mod) // set page changed status
1234 subi r26,r26,1 // decrement page count
1235 cmpwi r26,0 // done yet?
1236 bgt copypv_modloop // nope, iterate
1237copypv_modend:
1238 ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29
1239copypv_nomod:
1240 bt pvNoRefSrc,copypv_done // skip source update if not requested
1241copypv_debugref:
1242 li r26,1 // r26 <- 4K-page count
1243 mr r27,r31 // r27 <- byte count
1244 rlwinm r3,r29,0,20,31 // does source cross a page boundary?
1245 subfic r3,r3,4096 //
1246 cmplw r3,r27 //
1247 blt copypv_refnox // skip if not crossing case
1248 subf r27,r3,r27 // r27 <- byte count less initial fragment
1249 addi r26,r26,1 // increment page count
1250copypv_refnox:
1251 srdi r3,r27,12 // pages to update (not including crosser)
1252 add r26,r26,r3 // add in crosser
1253 srdi r27,r29,12 // r27 <- source page number
1254copypv_refloop:
1255 mr r3,r27 // r3 <- source page number
1256 la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex
1257 bl EXT(mapping_phys_lookup) // see if page is really there
1258 mr. r3,r3 // is it?
1259 beq-- copypv_done // nope, break out of modify loop
1260 mr r3,r27 // r3 <- source page number
1261 bl EXT(mapping_set_ref) // set page referenced status
1262 subi r26,r26,1 // decrement page count
1263 cmpwi r26,0 // done yet?
1264 bgt copypv_refloop // nope, iterate
1265
1266// Return, indicating success.
1267copypv_done:
1268copypv_zero:
1269 li r3,0 // our efforts were crowned with success
1270
1271// Pop frame, restore caller's non-volatiles, clear recovery routine pointer.
1272copypv_return:
1273 mfsprg r9,1 // get current threads stuff
1274 lwz r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
1275 // get return address
1276 lwz r4,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
1277 // get non-volatile cr2 and cr3
1278 lwz r26,FM_ARG0+0x00(r1) // restore non-volatile r26
1279 lwz r27,FM_ARG0+0x04(r1) // restore non-volatile r27
1280 mtlr r0 // restore return address
1281 lwz r28,FM_ARG0+0x08(r1) // restore non-volatile r28
1282 mtcrf 0x20,r4 // restore non-volatile cr2
1283 mtcrf 0x10,r4 // restore non-volatile cr3
1284 lwz r11,FM_ARG0+0x20(r1) // save error callback
1285 lwz r29,FM_ARG0+0x0C(r1) // restore non-volatile r29
1286 lwz r30,FM_ARG0+0x10(r1) // restore non-volatile r30
1287 lwz r31,FM_ARG0+0x14(r1) // restore non-volatile r31
1288 stw r11,THREAD_RECOVER(r9) // restore our error callback
1289 lwz r1,0(r1) // release stack frame
1290
1291 blr // y'all come back now
1292
1293// Invalid argument handler.
1294copypv_einval:
1295 li r3,EINVAL // invalid argument
1296 b copypv_return // return
1297
1298// Error encountered during bcopy or bcopy_nc.
1299copypv_error:
1300 mfmsr r3 // get current msr
1301 rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy
1302 mtmsrd r3 // leave 64-bit mode
1303 li r3,EFAULT // it was all his fault
1304 b copypv_return // return