]> git.saurik.com Git - apple/xnu.git/blame - osfmk/ppc/movc.s
xnu-1228.3.13.tar.gz
[apple/xnu.git] / osfmk / ppc / movc.s
CommitLineData
1c79356b 1/*
55e303ae 2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31#include <debug.h>
32#include <ppc/asm.h>
33#include <ppc/proc_reg.h>
34#include <mach/ppc/vm_param.h>
35#include <assym.s>
36#include <sys/errno.h>
37
55e303ae
A
38#define INSTRUMENT 0
39
40//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
1c79356b
A
41/*
42 * void pmap_zero_page(vm_offset_t pa)
43 *
55e303ae
A
44 * Zero a page of physical memory. This routine runs in 32 or 64-bit mode,
45 * and handles 32 and 128-byte cache lines.
1c79356b
A
46 */
47
1c79356b 48
55e303ae
A
49 .align 5
50 .globl EXT(pmap_zero_page)
51
52LEXT(pmap_zero_page)
53
54 mflr r12 // save return address
55 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
56 mtlr r12 // restore return address
57 andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size
d7e50217 58
55e303ae
A
59 subfic r4,r9,PPC_PGBYTES // r4 <- starting offset in page
60
61 bt++ pf64Bitb,page0S4 // Go do the big guys...
62
63 slwi r3,r3,12 // get page address from page num
64 b page_zero_1 // Jump to line aligned loop...
65
66 .align 5
67
68 nop
69 nop
70 nop
71 nop
72 nop
73 nop
74 nop
75
76page0S4:
77 sldi r3,r3,12 // get page address from page num
78
79page_zero_1: // loop zeroing cache lines
80 sub. r5,r4,r9 // more to go?
81 dcbz128 r3,r4 // zero either 32 or 128 bytes
82 sub r4,r5,r9 // generate next offset
83 dcbz128 r3,r5
84 bne-- page_zero_1
85
86 b EXT(ml_restore) // restore MSR and do the isync
87
88
89//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
1c79356b
A
90/* void
91 * phys_copy(src, dst, bytecount)
55e303ae
A
92 * addr64_t src;
93 * addr64_t dst;
1c79356b
A
94 * int bytecount
95 *
96 * This routine will copy bytecount bytes from physical address src to physical
55e303ae
A
97 * address dst. It runs in 64-bit mode if necessary, but does not handle
98 * overlap or make any attempt to be optimal. Length must be a signed word.
99 * Not performance critical.
1c79356b
A
100 */
101
1c79356b 102
55e303ae
A
103 .align 5
104 .globl EXT(phys_copy)
105
106LEXT(phys_copy)
107
108 rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg
109 mflr r12 // get return address
110 rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits
111 rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg
112 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
113 rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits
114 mtlr r12 // restore return address
115 subic. r5,r7,4 // a word to copy?
116 b phys_copy_2
117
118 .align 5
119
120phys_copy_1: // loop copying words
121 subic. r5,r5,4 // more to go?
122 lwz r0,0(r3)
123 addi r3,r3,4
124 stw r0,0(r4)
125 addi r4,r4,4
126phys_copy_2:
127 bge phys_copy_1
128 addic. r5,r5,4 // restore count
129 ble phys_copy_4 // no more
130
131 // Loop is aligned here
132
133phys_copy_3: // loop copying bytes
134 subic. r5,r5,1 // more to go?
135 lbz r0,0(r3)
136 addi r3,r3,1
137 stb r0,0(r4)
138 addi r4,r4,1
139 bgt phys_copy_3
140phys_copy_4:
141 b EXT(ml_restore) // restore MSR and do the isync
142
143
144//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
1c79356b
A
145/* void
146 * pmap_copy_page(src, dst)
55e303ae
A
147 * ppnum_t src;
148 * ppnum_t dst;
1c79356b
A
149 *
150 * This routine will copy the physical page src to physical page dst
151 *
55e303ae
A
152 * This routine assumes that the src and dst are page numbers and that the
153 * destination is cached. It runs on 32 and 64 bit processors, with and
154 * without altivec, and with 32 and 128 byte cache lines.
155 * We also must assume that no-one will be executing within the destination
156 * page, and that this will be used for paging. Because this
157 * is a common routine, we have tuned loops for each processor class.
1c79356b
A
158 *
159 */
55e303ae 160#define kSFSize (FM_SIZE+160)
1c79356b 161
1c79356b 162ENTRY(pmap_copy_page, TAG_NO_FRAME_USED)
1c79356b 163
55e303ae
A
164 lis r2,hi16(MASK(MSR_VEC)) ; Get the vector flag
165 mflr r0 // get return
166 ori r2,r2,lo16(MASK(MSR_FP)) ; Add the FP flag
167 stw r0,8(r1) // save
168 stwu r1,-kSFSize(r1) // set up a stack frame for VRs or FPRs
169 mfmsr r11 // save MSR at entry
170 mfsprg r10,2 // get feature flags
171 andc r11,r11,r2 // Clear out vec and fp
172 ori r2,r2,lo16(MASK(MSR_EE)) // Get EE on also
173 andc r2,r11,r2 // Clear out EE as well
174 mtcrf 0x02,r10 // we need to test pf64Bit
175 ori r2,r2,MASK(MSR_FP) // must enable FP for G3...
176 mtcrf 0x80,r10 // we need to test pfAltivec too
177 oris r2,r2,hi16(MASK(MSR_VEC)) // enable altivec for G4 (ignored if G3)
178 mtmsr r2 // turn EE off, FP and VEC on
179 isync
180 bt++ pf64Bitb,pmap_copy_64 // skip if 64-bit processor (only they take hint)
181 slwi r3,r3,12 // get page address from page num
182 slwi r4,r4,12 // get page address from page num
183 rlwinm r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1 // get ready to turn off DR
184 bt pfAltivecb,pmap_copy_g4 // altivec but not 64-bit means G4
185
186
187 // G3 -- copy using FPRs
188
189 stfd f0,FM_SIZE+0(r1) // save the 4 FPRs we use to copy
190 stfd f1,FM_SIZE+8(r1)
191 li r5,PPC_PGBYTES/32 // count of cache lines in a page
192 stfd f2,FM_SIZE+16(r1)
193 mtctr r5
194 stfd f3,FM_SIZE+24(r1)
195 mtmsr r12 // turn off DR after saving FPRs on stack
196 isync
197
198pmap_g3_copy_loop: // loop over 32-byte cache lines
199 dcbz 0,r4 // avoid read of dest line
200 lfd f0,0(r3)
201 lfd f1,8(r3)
202 lfd f2,16(r3)
203 lfd f3,24(r3)
204 addi r3,r3,32
205 stfd f0,0(r4)
206 stfd f1,8(r4)
207 stfd f2,16(r4)
208 stfd f3,24(r4)
209 dcbst 0,r4 // flush dest line to RAM
210 addi r4,r4,32
211 bdnz pmap_g3_copy_loop
212
213 sync // wait for stores to take
214 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
215 li r6,PPC_PGBYTES-32 // point to last line in page
216pmap_g3_icache_flush:
217 subic. r5,r6,32 // more to go?
218 icbi r4,r6 // flush another line in icache
219 subi r6,r5,32 // get offset to next line
220 icbi r4,r5
221 bne pmap_g3_icache_flush
222
223 sync
224 mtmsr r2 // turn DR back on
225 isync
226 lfd f0,FM_SIZE+0(r1) // restore the FPRs
227 lfd f1,FM_SIZE+8(r1)
228 lfd f2,FM_SIZE+16(r1)
229 lfd f3,FM_SIZE+24(r1)
230
231 b pmap_g4_restore // restore MSR and done
232
233
234 // G4 -- copy using VRs
235
236pmap_copy_g4: // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR
237 la r9,FM_SIZE+16(r1) // place where we save VRs to r9
238 li r5,16 // load x-form offsets into r5-r9
239 li r6,32 // another offset
240 stvx v0,0,r9 // save some VRs so we can use to copy
241 li r7,48 // another offset
242 stvx v1,r5,r9
243 li r0,PPC_PGBYTES/64 // we loop over 64-byte chunks
244 stvx v2,r6,r9
245 mtctr r0
246 li r8,96 // get look-ahead for touch
247 stvx v3,r7,r9
248 li r9,128
249 mtmsr r12 // now we've saved VRs on stack, turn off DR
250 isync // wait for it to happen
251 b pmap_g4_copy_loop
252
253 .align 5 // align inner loops
254pmap_g4_copy_loop: // loop over 64-byte chunks
255 dcbt r3,r8 // touch 3 lines ahead
256 nop // avoid a 17-word loop...
257 dcbt r3,r9 // touch 4 lines ahead
258 nop // more padding
259 dcba 0,r4 // avoid pre-fetch of 1st dest line
260 lvx v0,0,r3 // offset 0
261 lvx v1,r5,r3 // offset 16
262 lvx v2,r6,r3 // offset 32
263 lvx v3,r7,r3 // offset 48
264 addi r3,r3,64
265 dcba r6,r4 // avoid pre-fetch of 2nd line
266 stvx v0,0,r4 // offset 0
267 stvx v1,r5,r4 // offset 16
268 stvx v2,r6,r4 // offset 32
269 stvx v3,r7,r4 // offset 48
270 dcbf 0,r4 // push line 1
271 dcbf r6,r4 // and line 2
272 addi r4,r4,64
273 bdnz pmap_g4_copy_loop
274
275 sync // wait for stores to take
276 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
277 li r8,PPC_PGBYTES-32 // point to last line in page
278pmap_g4_icache_flush:
279 subic. r9,r8,32 // more to go?
280 icbi r4,r8 // flush from icache
281 subi r8,r9,32 // get offset to next line
282 icbi r4,r9
283 bne pmap_g4_icache_flush
284
285 sync
286 mtmsr r2 // turn DR back on
287 isync
288 la r9,FM_SIZE+16(r1) // get base of VR save area
289 lvx v0,0,r9 // restore the VRs
290 lvx v1,r5,r9
291 lvx v2,r6,r9
292 lvx v3,r7,r9
293
294pmap_g4_restore: // r11=MSR
295 mtmsr r11 // turn EE on, VEC and FR off
296 isync // wait for it to happen
297 addi r1,r1,kSFSize // pop off our stack frame
298 lwz r0,8(r1) // restore return address
299 mtlr r0
300 blr
301
302
303 // 64-bit/128-byte processor: copy using VRs
304
305pmap_copy_64: // r10=features, r11=old MSR
306 sldi r3,r3,12 // get page address from page num
307 sldi r4,r4,12 // get page address from page num
308 la r9,FM_SIZE+16(r1) // get base of VR save area
309 li r5,16 // load x-form offsets into r5-r9
310 li r6,32 // another offset
311 bf pfAltivecb,pmap_novmx_copy // altivec suppressed...
312 stvx v0,0,r9 // save 8 VRs so we can copy wo bubbles
313 stvx v1,r5,r9
314 li r7,48 // another offset
315 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
316 stvx v2,r6,r9
317 stvx v3,r7,r9
318 addi r9,r9,64 // advance base ptr so we can store another 4
319 mtctr r0
320 li r0,MASK(MSR_DR) // get DR bit
321 stvx v4,0,r9
322 stvx v5,r5,r9
323 andc r12,r2,r0 // turn off DR bit
324 li r0,1 // get a 1 to slam into SF
325 stvx v6,r6,r9
326 stvx v7,r7,r9
327 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
328 li r8,-128 // offset so we can reach back one line
329 mtmsrd r12 // now we've saved VRs, turn DR off and SF on
330 isync // wait for it to happen
331 dcbt128 0,r3,1 // start a forward stream
332 b pmap_64_copy_loop
333
334 .align 5 // align inner loops
335pmap_64_copy_loop: // loop over 128-byte chunks
336 dcbz128 0,r4 // avoid read of destination line
337 lvx v0,0,r3 // offset 0
338 lvx v1,r5,r3 // offset 16
339 lvx v2,r6,r3 // offset 32
340 lvx v3,r7,r3 // offset 48
341 addi r3,r3,64 // don't have enough GPRs so add 64 2x
342 lvx v4,0,r3 // offset 64
343 lvx v5,r5,r3 // offset 80
344 lvx v6,r6,r3 // offset 96
345 lvx v7,r7,r3 // offset 112
346 addi r3,r3,64
347 stvx v0,0,r4 // offset 0
348 stvx v1,r5,r4 // offset 16
349 stvx v2,r6,r4 // offset 32
350 stvx v3,r7,r4 // offset 48
351 addi r4,r4,64
352 stvx v4,0,r4 // offset 64
353 stvx v5,r5,r4 // offset 80
354 stvx v6,r6,r4 // offset 96
355 stvx v7,r7,r4 // offset 112
356 addi r4,r4,64
357 dcbf r8,r4 // flush the line we just wrote
358 bdnz pmap_64_copy_loop
359
360 sync // wait for stores to take
361 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
362 li r8,PPC_PGBYTES-128 // point to last line in page
363pmap_64_icache_flush:
364 subic. r9,r8,128 // more to go?
365 icbi r4,r8 // flush from icache
366 subi r8,r9,128 // get offset to next line
367 icbi r4,r9
368 bne pmap_64_icache_flush
369
370 sync
371 mtmsrd r2 // turn DR back on, SF off
372 isync
373 la r9,FM_SIZE+16(r1) // get base address of VR save area on stack
374 lvx v0,0,r9 // restore the VRs
375 lvx v1,r5,r9
376 lvx v2,r6,r9
377 lvx v3,r7,r9
378 addi r9,r9,64
379 lvx v4,0,r9
380 lvx v5,r5,r9
381 lvx v6,r6,r9
382 lvx v7,r7,r9
383
384 b pmap_g4_restore // restore lower half of MSR and return
385
386 //
387 // Copy on 64-bit without VMX
388 //
389
390pmap_novmx_copy:
391 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
392 mtctr r0
393 li r0,MASK(MSR_DR) // get DR bit
394 andc r12,r2,r0 // turn off DR bit
395 li r0,1 // get a 1 to slam into SF
396 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
397 mtmsrd r12 // now we've saved VRs, turn DR off and SF on
398 isync // wait for it to happen
399 dcbt128 0,r3,1 // start a forward stream
400
401pmap_novmx_copy_loop: // loop over 128-byte cache lines
402 dcbz128 0,r4 // avoid read of dest line
403
404 ld r0,0(r3) // Load half a line
405 ld r12,8(r3)
406 ld r5,16(r3)
407 ld r6,24(r3)
408 ld r7,32(r3)
409 ld r8,40(r3)
410 ld r9,48(r3)
411 ld r10,56(r3)
412
413 std r0,0(r4) // Store half a line
414 std r12,8(r4)
415 std r5,16(r4)
416 std r6,24(r4)
417 std r7,32(r4)
418 std r8,40(r4)
419 std r9,48(r4)
420 std r10,56(r4)
421
422 ld r0,64(r3) // Load half a line
423 ld r12,72(r3)
424 ld r5,80(r3)
425 ld r6,88(r3)
426 ld r7,96(r3)
427 ld r8,104(r3)
428 ld r9,112(r3)
429 ld r10,120(r3)
430
431 addi r3,r3,128
432
433 std r0,64(r4) // Store half a line
434 std r12,72(r4)
435 std r5,80(r4)
436 std r6,88(r4)
437 std r7,96(r4)
438 std r8,104(r4)
439 std r9,112(r4)
440 std r10,120(r4)
441
442 dcbf 0,r4 // flush the line we just wrote
443 addi r4,r4,128
444 bdnz pmap_novmx_copy_loop
445
446 sync // wait for stores to take
447 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
448 li r8,PPC_PGBYTES-128 // point to last line in page
449
450pmap_novmx_icache_flush:
451 subic. r9,r8,128 // more to go?
452 icbi r4,r8 // flush from icache
453 subi r8,r9,128 // get offset to next line
454 icbi r4,r9
455 bne pmap_novmx_icache_flush
456
457 sync
458 mtmsrd r2 // turn DR back on, SF off
459 isync
460
461 b pmap_g4_restore // restore lower half of MSR and return
462
463
464
465//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
de355530 466
55e303ae
A
467// Stack frame format used by copyin, copyout, copyinstr and copyoutstr.
468// These routines all run both on 32 and 64-bit machines, though because they are called
469// by the BSD kernel they are always in 32-bit mode when entered. The mapped ptr returned
91447636 470// by MapUserMemoryWindow will be 64 bits however on 64-bit machines. Beware to avoid
55e303ae
A
471// using compare instructions on this ptr. This mapped ptr is kept globally in r31, so there
472// is no need to store or load it, which are mode-dependent operations since it could be
473// 32 or 64 bits.
474
475#define kkFrameSize (FM_SIZE+32)
476
477#define kkBufSize (FM_SIZE+0)
91447636 478#define kkCR3 (FM_SIZE+4)
55e303ae
A
479#define kkSource (FM_SIZE+8)
480#define kkDest (FM_SIZE+12)
481#define kkCountPtr (FM_SIZE+16)
482#define kkR31Save (FM_SIZE+20)
91447636 483#define kkThrErrJmp (FM_SIZE+24)
55e303ae
A
484
485
486// nonvolatile CR bits we use as flags in cr3
487
488#define kk64bit 12
489#define kkNull 13
490#define kkIn 14
491#define kkString 15
492#define kkZero 15
493
494
495//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
1c79356b 496/*
d7e50217 497 * int
55e303ae 498 * copyoutstr(src, dst, maxcount, count)
91447636
A
499 * vm_offset_t src; // r3
500 * addr64_t dst; // r4 and r5
501 * vm_size_t maxcount; // r6
502 * vm_size_t* count; // r7
de355530 503 *
55e303ae 504 * Set *count to the number of bytes copied.
de355530
A
505 */
506
55e303ae 507ENTRY(copyoutstr, TAG_NO_FRAME_USED)
91447636
A
508 mfcr r2,0x10 // save caller's cr3, which we use for flags
509 mr r10,r4 // move high word of 64-bit user address to r10
55e303ae
A
510 li r0,0
511 crset kkString // flag as a string op
91447636
A
512 mr r11,r5 // move low word of 64-bit user address to r11
513 stw r0,0(r7) // initialize #bytes moved
55e303ae
A
514 crclr kkIn // flag as copyout
515 b copyJoin
de355530 516
de355530 517
55e303ae 518//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
de355530 519/*
55e303ae
A
520 * int
521 * copyinstr(src, dst, maxcount, count)
91447636
A
522 * addr64_t src; // r3 and r4
523 * vm_offset_t dst; // r5
524 * vm_size_t maxcount; // r6
525 * vm_size_t* count; // r7
1c79356b
A
526 *
527 * Set *count to the number of bytes copied
1c79356b
A
528 * If dst == NULL, don't copy, just count bytes.
529 * Only currently called from klcopyinstr.
530 */
531
532ENTRY(copyinstr, TAG_NO_FRAME_USED)
91447636
A
533 mfcr r2,0x10 // save caller's cr3, which we use for flags
534 cmplwi r5,0 // dst==NULL?
535 mr r10,r3 // move high word of 64-bit user address to r10
55e303ae
A
536 li r0,0
537 crset kkString // flag as a string op
91447636 538 mr r11,r4 // move low word of 64-bit user address to r11
55e303ae 539 crmove kkNull,cr0_eq // remember if (dst==NULL)
91447636 540 stw r0,0(r7) // initialize #bytes moved
55e303ae
A
541 crset kkIn // flag as copyin (rather than copyout)
542 b copyJoin1 // skip over the "crclr kkNull"
543
544
545//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
546/*
547 * int
548 * copyout(src, dst, count)
91447636
A
549 * vm_offset_t src; // r3
550 * addr64_t dst; // r4 and r5
551 * size_t count; // r6
1c79356b 552 */
1c79356b 553
55e303ae
A
554 .align 5
555 .globl EXT(copyout)
556 .globl EXT(copyoutmsg)
557
558LEXT(copyout)
559LEXT(copyoutmsg)
560
561#if INSTRUMENT
91447636
A
562 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout
563 stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it
564 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
565 stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it
566 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
567 stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it
568 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
569 stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it
55e303ae 570#endif
91447636
A
571 mfcr r2,0x10 // save caller's cr3, which we use for flags
572 mr r10,r4 // move high word of 64-bit user address to r10
55e303ae 573 crclr kkString // not a string version
91447636 574 mr r11,r5 // move low word of 64-bit user address to r11
55e303ae
A
575 crclr kkIn // flag as copyout
576 b copyJoin
577
578
579//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
580/*
581 * int
582 * copyin(src, dst, count)
91447636
A
583 * addr64_t src; // r3 and r4
584 * vm_offset_t dst; // r5
585 * size_t count; // r6
de355530 586 */
1c79356b 587
1c79356b 588
55e303ae
A
589 .align 5
590 .globl EXT(copyin)
591 .globl EXT(copyinmsg)
592
593LEXT(copyin)
594LEXT(copyinmsg)
595
91447636
A
596 mfcr r2,0x10 // save caller's cr3, which we use for flags
597 mr r10,r3 // move high word of 64-bit user address to r10
55e303ae 598 crclr kkString // not a string version
91447636 599 mr r11,r4 // move low word of 64-bit user address to r11
55e303ae
A
600 crset kkIn // flag as copyin
601
602
603// Common code to handle setup for all the copy variants:
91447636
A
604// r2 = caller's cr3
605// r3 = source if copyout
606// r5 = dest if copyin
607// r6 = buffer length or count
608// r7 = count output ptr (if kkString set)
609// r10 = high word of 64-bit user-space address (source if copyin, dest if copyout)
610// r11 = low word of 64-bit user-space address
55e303ae
A
611// cr3 = kkIn, kkString, kkNull flags
612
613copyJoin:
614 crclr kkNull // (dst==NULL) convention not used with this call
615copyJoin1: // enter from copyinstr with kkNull set
616 mflr r0 // get return address
91447636 617 cmplwi r6,0 // buffer length 0?
55e303ae
A
618 lis r9,0x1000 // r9 <- 0x10000000 (256MB)
619 stw r0,FM_LR_SAVE(r1) // save return
91447636 620 cmplw cr1,r6,r9 // buffer length > 256MB ?
55e303ae
A
621 mfsprg r8,2 // get the features
622 beq-- copyinout_0 // 0 length is degenerate case
623 stwu r1,-kkFrameSize(r1) // set up stack frame
91447636 624 stw r2,kkCR3(r1) // save caller's cr3, which we use for flags
55e303ae 625 mtcrf 0x02,r8 // move pf64Bit to cr6
91447636
A
626 stw r3,kkSource(r1) // save args across MapUserMemoryWindow
627 stw r5,kkDest(r1)
628 stw r6,kkBufSize(r1)
55e303ae 629 crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor
91447636 630 stw r7,kkCountPtr(r1)
55e303ae
A
631 stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr
632 li r31,0 // no mapped ptr yet
633
634
635// Handle buffer length > 256MB. This is an error (ENAMETOOLONG) on copyin and copyout.
636// The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp
637// the buffer length to 256MB. This isn't an issue if the string is less than 256MB
638// (as most are!), but if they are >256MB we eventually return ENAMETOOLONG. This restriction
91447636 639// is due to MapUserMemoryWindow; we don't want to consume more than two segments for
55e303ae
A
640// the mapping.
641
642 ble++ cr1,copyin0 // skip if buffer length <= 256MB
643 bf kkString,copyinout_too_big // error if not string op
91447636 644 mr r6,r9 // silently clamp buffer length to 256MB
55e303ae
A
645 stw r9,kkBufSize(r1) // update saved copy too
646
647
648// Set up thread_recover in case we hit an illegal address.
649
650copyin0:
91447636 651 mfsprg r8,1 // Get the current thread
55e303ae 652 lis r2,hi16(copyinout_error)
55e303ae 653 ori r2,r2,lo16(copyinout_error)
91447636 654 lwz r4,THREAD_RECOVER(r8)
55e303ae 655 lwz r3,ACT_VMMAP(r8) // r3 <- vm_map virtual address
91447636
A
656 stw r2,THREAD_RECOVER(r8)
657 stw r4,kkThrErrJmp(r1)
55e303ae
A
658
659
91447636 660// Map user segment into kernel map, turn on 64-bit mode. At this point:
55e303ae 661// r3 = vm map
91447636
A
662// r6 = buffer length
663// r10/r11 = 64-bit user-space ptr (source if copyin, dest if copyout)
664//
665// When we call MapUserMemoryWindow, we pass:
666// r3 = vm map ptr
667// r4/r5 = 64-bit user space address as an addr64_t
55e303ae 668
91447636
A
669 mr r4,r10 // copy user ptr into r4/r5
670 mr r5,r11
55e303ae 671#if INSTRUMENT
91447636
A
672 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace
673 stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it
674 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
675 stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it
676 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
677 stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it
678 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
679 stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it
55e303ae 680#endif
91447636 681 bl EXT(MapUserMemoryWindow) // get r3/r4 <- 64-bit address in kernel map of user operand
55e303ae 682#if INSTRUMENT
91447636
A
683 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace
684 stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it
685 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
686 stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it
687 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
688 stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it
689 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
690 stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it
55e303ae 691#endif
55e303ae 692 mr r31,r4 // r31 <- mapped ptr into user space (may be 64-bit)
55e303ae
A
693 bf-- kk64bit,copyin1 // skip if a 32-bit processor
694
695 rldimi r31,r3,32,0 // slam high-order bits into mapped ptr
696 mfmsr r4 // if 64-bit, turn on SF so we can use returned ptr
697 li r0,1
698 rldimi r4,r0,63,MSR_SF_BIT // light bit 0
699 mtmsrd r4 // turn on 64-bit mode
700 isync // wait for mode to change
701
702
703// Load r3-r5, substituting mapped ptr as appropriate.
704
705copyin1:
706 lwz r5,kkBufSize(r1) // restore length to copy
707 bf kkIn,copyin2 // skip if copyout
91447636 708 lwz r4,kkDest(r1) // copyin: dest is kernel ptr
55e303ae
A
709 mr r3,r31 // source is mapped ptr
710 b copyin3
711copyin2: // handle copyout
712 lwz r3,kkSource(r1) // source is kernel buffer (r3 at entry)
713 mr r4,r31 // dest is mapped ptr into user space
714
715
716// Finally, all set up to copy:
717// r3 = source ptr (mapped if copyin)
718// r4 = dest ptr (mapped if copyout)
719// r5 = length
91447636 720// r31 = mapped ptr returned by MapUserMemoryWindow
55e303ae
A
721// cr3 = kkIn, kkString, kk64bit, and kkNull flags
722
723copyin3:
724 bt kkString,copyString // handle copyinstr and copyoutstr
725 bl EXT(bcopy) // copyin and copyout: let bcopy do the work
726 li r3,0 // return success
727
728
729// Main exit point for copyin, copyout, copyinstr, and copyoutstr. Also reached
730// from error recovery if we get a DSI accessing user space. Clear recovery ptr,
91447636 731// and pop off frame.
55e303ae
A
732// r3 = 0, EFAULT, or ENAMETOOLONG
733
734copyinx:
91447636
A
735 lwz r2,kkCR3(r1) // get callers cr3
736 mfsprg r6,1 // Get the current thread
55e303ae
A
737 bf-- kk64bit,copyinx1 // skip if 32-bit processor
738 mfmsr r12
739 rldicl r12,r12,0,MSR_SF_BIT+1 // if 64-bit processor, turn 64-bit mode off
91447636 740 mtmsrd r12 // turn SF off
55e303ae
A
741 isync // wait for the mode to change
742copyinx1:
91447636 743 lwz r0,FM_LR_SAVE+kkFrameSize(r1) // get return address
55e303ae 744 lwz r31,kkR31Save(r1) // restore callers r31
91447636 745 lwz r4,kkThrErrJmp(r1) // load saved thread recover
55e303ae 746 addi r1,r1,kkFrameSize // pop off our stack frame
55e303ae 747 mtlr r0
91447636 748 stw r4,THREAD_RECOVER(r6) // restore thread recover
55e303ae
A
749 mtcrf 0x10,r2 // restore cr3
750 blr
de355530 751
1c79356b 752
55e303ae
A
753/* We get here via the exception handler if an illegal
754 * user memory reference was made. This error handler is used by
755 * copyin, copyout, copyinstr, and copyoutstr. Registers are as
756 * they were at point of fault, so for example cr3 flags are valid.
de355530 757 */
d7e50217 758
55e303ae
A
759copyinout_error:
760 li r3,EFAULT // return error
761 b copyinx
762
763copyinout_0: // degenerate case: 0-length copy
764 mtcrf 0x10,r2 // restore cr3
765 li r3,0 // return success
766 blr
767
768copyinout_too_big: // degenerate case
769 mtcrf 0x10,r2 // restore cr3
770 lwz r1,0(r1) // pop off stack frame
771 li r3,ENAMETOOLONG
772 blr
773
774
775//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
776// Handle copyinstr and copyoutstr. At this point the stack frame is set up,
777// the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode
778// if necessary, and:
779// r3 = source ptr, mapped if copyinstr
780// r4 = dest ptr, mapped if copyoutstr
781// r5 = buffer length
91447636 782// r31 = mapped ptr returned by MapUserMemoryWindow
55e303ae
A
783// cr3 = kkIn, kkString, kkNull, and kk64bit flags
784// We do word copies unless the buffer is very short, then use a byte copy loop
91447636
A
785// for the leftovers if necessary. The crossover at which the word loop becomes
786// faster is about seven bytes, counting the zero.
787//
788// We first must word-align the source ptr, in order to avoid taking a spurious
789// page fault.
55e303ae
A
790
791copyString:
91447636
A
792 cmplwi cr1,r5,15 // is buffer very short?
793 mr r12,r3 // remember ptr to 1st source byte
55e303ae 794 mtctr r5 // assuming short, set up loop count for bytes
91447636
A
795 blt-- cr1,copyinstr8 // too short for word loop
796 rlwinm r2,r3,0,0x3 // get byte offset of 1st byte within word
797 rlwinm r9,r3,3,0x18 // get bit offset of 1st byte within word
798 li r7,-1
799 sub r3,r3,r2 // word-align source address
800 add r6,r5,r2 // get length starting at byte 0 in word
801 srw r7,r7,r9 // get mask for bytes in first word
802 srwi r0,r6,2 // get #words in buffer
803 lwz r5,0(r3) // get aligned word with first source byte
55e303ae
A
804 lis r10,hi16(0xFEFEFEFF) // load magic constants into r10 and r11
805 lis r11,hi16(0x80808080)
91447636
A
806 mtctr r0 // set up word loop count
807 addi r3,r3,4 // advance past the source word
55e303ae
A
808 ori r10,r10,lo16(0xFEFEFEFF)
809 ori r11,r11,lo16(0x80808080)
91447636
A
810 orc r8,r5,r7 // map bytes preceeding first source byte into 0xFF
811 bt-- kkNull,copyinstr5enter // enter loop that just counts
812
813// Special case 1st word, which has been 0xFF filled on left. Note that we use
814// "and.", even though we execute both in 32 and 64-bit mode. This is OK.
815
816 slw r5,r5,r9 // left justify payload bytes
817 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
818 andc r7,r11,r8 // r7 = ~data & 0x80808080
819 subfic r0,r2,4 // get r0 <- #payload bytes in 1st word
820 and. r7,r9,r7 // if r7==0, then all bytes in r8 are nonzero
821 stw r5,0(r4) // copy payload bytes to dest buffer
822 add r4,r4,r0 // then point to next byte in dest buffer
823 bdnzt cr0_eq,copyinstr6 // use loop that copies if 0 not found
824
825 b copyinstr7 // 0 found (buffer can't be full)
55e303ae
A
826
827
828// Word loop(s). They do a word-parallel search for 0s, using the following
829// inobvious but very efficient test:
830// y = data + 0xFEFEFEFF
831// z = ~data & 0x80808080
91447636
A
832// If (y & z)==0, then all bytes in dataword are nonzero. There are two copies
833// of this loop, one that just counts and another that copies.
834// r3 = ptr to next word of source (word aligned)
835// r4 = ptr to next byte in buffer
836// r6 = original buffer length (adjusted to be word origin)
837// r10 = 0xFEFEFEFE
838// r11 = 0x80808080
839// r12 = ptr to 1st source byte (used to determine string length)
55e303ae
A
840
841 .align 5 // align inner loops for speed
842copyinstr5: // version that counts but does not copy
91447636
A
843 lwz r8,0(r3) // get next word of source
844 addi r3,r3,4 // advance past it
845copyinstr5enter:
55e303ae
A
846 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
847 andc r7,r11,r8 // r7 = ~data & 0x80808080
91447636
A
848 and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode)
849 bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes in r8 are nonzero
55e303ae
A
850
851 b copyinstr7
852
853 .align 5 // align inner loops for speed
854copyinstr6: // version that counts and copies
91447636
A
855 lwz r8,0(r3) // get next word of source
856 addi r3,r3,4 // advance past it
55e303ae
A
857 addi r4,r4,4 // increment dest ptr while we wait for data
858 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
859 andc r7,r11,r8 // r7 = ~data & 0x80808080
91447636 860 and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode)
55e303ae
A
861 stw r8,-4(r4) // pack all 4 bytes into buffer
862 bdnzt cr0_eq,copyinstr6 // if r7==0, then all bytes are nonzero
863
864
865// Either 0 found or buffer filled. The above algorithm has mapped nonzero bytes to 0
866// and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also
867// mapped to 0x80. We must mask out these false hits before searching for an 0x80 byte.
91447636
A
868// r3 = word aligned ptr to next word of source (ie, r8==mem(r3-4))
869// r6 = original buffer length (adjusted to be word origin)
870// r7 = computed vector of 0x00 and 0x80 bytes
871// r8 = original source word, coming from -4(r3), possibly padded with 0xFFs on left if 1st word
872// r12 = ptr to 1st source byte (used to determine string length)
873// cr0 = beq set iff 0 not found
55e303ae
A
874
875copyinstr7:
55e303ae 876 rlwinm r2,r8,7,0,31 // move 0x01 bits to 0x80 position
91447636 877 rlwinm r6,r6,0,0x3 // mask down to partial byte count in last word
55e303ae 878 andc r7,r7,r2 // turn off false hits from 0x0100 worst case
91447636
A
879 crnot kkZero,cr0_eq // 0 found iff cr0_eq is off
880 srwi r7,r7,8 // we want to count the 0 as a byte xferred
881 cmpwi r6,0 // any bytes left over in last word?
55e303ae 882 cntlzw r7,r7 // now we can find the 0 byte (ie, the 0x80)
91447636 883 subi r3,r3,4 // back up r3 to point to 1st byte in r8
55e303ae 884 srwi r7,r7,3 // convert 8,16,24,32 to 1,2,3,4
91447636 885 add r3,r3,r7 // now r3 points one past 0 byte, or at 1st byte not xferred
55e303ae
A
886 bt++ kkZero,copyinstr10 // 0 found, so done
887
888 beq copyinstr10 // r6==0, so buffer truly full
889 mtctr r6 // 0 not found, loop over r6 bytes
890 b copyinstr8 // enter byte loop for last 1-3 leftover bytes
891
892
893// Byte loop. This is used for very small buffers and for the odd bytes left over
894// after searching and copying words at a time.
91447636
A
895// r3 = ptr to next byte of source
896// r4 = ptr to next dest byte
897// r12 = ptr to first byte of source
898// ctr = count of bytes to check
55e303ae
A
899
900 .align 5 // align inner loops for speed
901copyinstr8: // loop over bytes of source
902 lbz r0,0(r3) // get next byte of source
903 addi r3,r3,1
904 addi r4,r4,1 // increment dest addr whether we store or not
905 cmpwi r0,0 // the 0?
91447636 906 bt-- kkNull,copyinstr9 // don't store if copyinstr with NULL ptr
55e303ae
A
907 stb r0,-1(r4)
908copyinstr9:
909 bdnzf cr0_eq,copyinstr8 // loop if byte not 0 and more room in buffer
910
55e303ae
A
911 crmove kkZero,cr0_eq // remember if 0 found or buffer filled
912
913
914// Buffer filled or 0 found. Unwind and return.
91447636
A
915// r3 = ptr to 1st source byte not transferred
916// r12 = ptr to 1st source byte
917// r31 = mapped ptr returned by MapUserMemoryWindow
918// cr3 = kkZero set iff 0 found
55e303ae
A
919
920copyinstr10:
921 lwz r9,kkCountPtr(r1) // get ptr to place to store count of bytes moved
91447636
A
922 sub r2,r3,r12 // compute #bytes copied (including the 0)
923 li r3,0 // assume success return status
55e303ae
A
924 stw r2,0(r9) // store #bytes moved
925 bt++ kkZero,copyinx // we did find the 0 so return 0
926 li r3,ENAMETOOLONG // buffer filled
927 b copyinx // join main exit routine
928
91447636
A
929//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
930/*
931 * int
932 * copypv(source, sink, size, which)
933 * addr64_t src; // r3 and r4
934 * addr64_t dst; // r5 and r6
935 * size_t size; // r7
936 * int which; // r8
937 *
938 * Operand size bytes are copied from operand src into operand dst. The source and
939 * destination operand addresses are given as addr64_t, and may designate starting
940 * locations in physical or virtual memory in any combination except where both are
941 * virtual. Virtual memory locations may be in either the kernel or the current thread's
942 * address space. Operand size may be up to 256MB.
943 *
944 * Operation is controlled by operand which, which offers these options:
945 * cppvPsrc : source operand is (1) physical or (0) virtual
946 * cppvPsnk : destination operand is (1) physical or (0) virtual
947 * cppvKmap : virtual operand is in (1) kernel or (0) current thread
948 * cppvFsnk : (1) flush destination before and after transfer
949 * cppvFsrc : (1) flush source before and after transfer
950 * cppvNoModSnk : (1) don't set source operand's changed bit(s)
951 * cppvNoRefSrc : (1) don't set destination operand's referenced bit(s)
952 *
953 * Implementation is now split into this new 64-bit path and the old path, hw_copypv_32().
954 * This section describes the operation of the new 64-bit path.
955 *
956 * The 64-bit path utilizes the more capacious 64-bit kernel address space to create a
957 * window in the kernel address space into all of physical RAM plus the I/O hole. Since
958 * the window's mappings specify the proper access policies for the underlying memory,
959 * the new path does not have to flush caches to avoid a cache paradox, so cppvFsnk
960 * and cppvFsrc are ignored. Physical operand adresses are relocated into the physical
961 * memory window, and are accessed with data relocation on. Virtual addresses are either
962 * within the kernel, or are mapped into the kernel address space through the user memory
963 * window. Because accesses to a virtual operand are performed with data relocation on,
964 * the new path does not have to translate the address, disable/enable interrupts, lock
965 * the mapping, or update referenced and changed bits.
966 *
967 * The IBM 970 (a.k.a. G5) processor treats real-mode accesses as guarded, so there is
968 * a substantial performance penalty for copypv operating in real mode. Utilizing the
969 * new 64-bit path, transfer performance increases >100% on the G5.
970 *
971 * The attentive reader may notice that mtmsrd ops are not followed by isync ops as
972 * might be expected. The 970 follows PowerPC architecture version 2.01, which defines
973 * mtmsrd with L=0 as a context synchronizing op, so a following isync is no longer
974 * required.
975 *
976 * To keep things exciting, we develop 64-bit values in non-volatiles, but we also need
977 * to call 32-bit functions, which would lead to the high-order 32 bits of our values
978 * getting clobbered unless we do something special. So, we preserve our 64-bit non-volatiles
979 * in our own stack frame across calls to 32-bit functions.
980 *
981 */
55e303ae 982
91447636
A
983// Map operand which bits into non-volatile CR2 and CR3 bits.
984#define whichAlign ((3+1)*4)
985#define whichMask 0x007F0000
986#define pvPsnk (cppvPsnkb - whichAlign)
987#define pvPsrc (cppvPsrcb - whichAlign)
988#define pvFsnk (cppvFsnkb - whichAlign)
989#define pvFsrc (cppvFsrcb - whichAlign)
990#define pvNoModSnk (cppvNoModSnkb - whichAlign)
991#define pvNoRefSrc (cppvNoRefSrcb - whichAlign)
992#define pvKmap (cppvKmapb - whichAlign)
993#define pvNoCache cr2_lt
994
995 .align 5
996 .globl EXT(copypv)
997
998LEXT(copypv)
999 mfsprg r10,2 // get feature flags
1000 mtcrf 0x02,r10 // we need to test pf64Bit
1001 bt++ pf64Bitb,copypv_64 // skip if 64-bit processor (only they take hint)
1002
1003 b EXT(hw_copypv_32) // carry on with 32-bit copypv
1004
1005// Push a 32-bit ABI-compliant stack frame and preserve all non-volatiles that we'll clobber.
1006copypv_64:
1007 mfsprg r9,1 // get current thread
1008 stwu r1,-(FM_ALIGN((31-26+11)*4)+FM_SIZE)(r1)
1009 // allocate stack frame and link it
1010 mflr r0 // get return address
1011 mfcr r10 // get cr2 and cr3
1012 lwz r12,THREAD_RECOVER(r9) // get error callback
1013 stw r26,FM_ARG0+0x00(r1) // save non-volatile r26
1014 stw r27,FM_ARG0+0x04(r1) // save non-volatile r27
1015 stw r28,FM_ARG0+0x08(r1) // save non-volatile r28
1016 stw r29,FM_ARG0+0x0C(r1) // save non-volatile r29
1017 stw r30,FM_ARG0+0x10(r1) // save non-volatile r30
1018 stw r31,FM_ARG0+0x14(r1) // save non-volatile r31
1019 stw r12,FM_ARG0+0x20(r1) // save error callback
1020 stw r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
1021 // save return address
1022 stw r10,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
1023 // save non-volatile cr2 and cr3
1024
1025// Non-volatile register usage in this routine is:
1026// r26: saved msr image
1027// r27: current pmap_t / virtual source address
1028// r28: destination virtual address
1029// r29: source address
1030// r30: destination address
1031// r31: byte count to copy
1032// cr2/3: parameter 'which' bits
1033
1034 rlwinm r8,r8,whichAlign,whichMask // align and mask which bits
1035 mr r31,r7 // copy size to somewhere non-volatile
1036 mtcrf 0x20,r8 // insert which bits into cr2 and cr3
1037 mtcrf 0x10,r8 // insert which bits into cr2 and cr3
1038 rlwinm r29,r3,0,1,0 // form source address high-order bits
1039 rlwinm r30,r5,0,1,0 // form destination address high-order bits
1040 rlwimi r29,r4,0,0,31 // form source address low-order bits
1041 rlwimi r30,r6,0,0,31 // form destination address low-order bits
1042 crand cr7_lt,pvPsnk,pvPsrc // are both operand addresses physical?
1043 cntlzw r0,r31 // count leading zeroes in byte count
1044 cror cr7_eq,pvPsnk,pvPsrc // cr7_eq <- source or destination is physical
1045 bf-- cr7_eq,copypv_einval // both operands may not be virtual
1046 cmplwi r0,4 // byte count greater than or equal 256M (2**28)?
1047 blt-- copypv_einval // byte count too big, give EINVAL
1048 cmplwi r31,0 // byte count zero?
1049 beq-- copypv_zero // early out
1050 bt cr7_lt,copypv_phys // both operand addresses are physical
1051 mr r28,r30 // assume destination is virtual
1052 bf pvPsnk,copypv_dv // is destination virtual?
1053 mr r28,r29 // no, so source must be virtual
1054copypv_dv:
1055 lis r27,ha16(EXT(kernel_pmap)) // get kernel's pmap_t *, high-order
1056 lwz r27,lo16(EXT(kernel_pmap))(r27) // get kernel's pmap_t
1057 bt pvKmap,copypv_kern // virtual address in kernel map?
1058 lwz r3,ACT_VMMAP(r9) // get user's vm_map *
1059 rldicl r4,r28,32,32 // r4, r5 <- addr64_t virtual address
1060 rldicl r5,r28,0,32
1061 std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit call
1062 std r30,FM_ARG0+0x38(r1) // preserve 64-bit r30 across 32-bit call
1063 bl EXT(MapUserMemoryWindow) // map slice of user space into kernel space
1064 ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29
1065 ld r30,FM_ARG0+0x38(r1) // restore 64-bit r30
1066 rlwinm r28,r3,0,1,0 // convert relocated addr64_t virtual address
1067 rlwimi r28,r4,0,0,31 // into a single 64-bit scalar
1068copypv_kern:
1069
1070// Since we'll be accessing the virtual operand with data-relocation on, we won't need to
1071// update the referenced and changed bits manually after the copy. So, force the appropriate
1072// flag bit on for the virtual operand.
1073 crorc pvNoModSnk,pvNoModSnk,pvPsnk // for virtual dest, let hardware do ref/chg bits
1074 crorc pvNoRefSrc,pvNoRefSrc,pvPsrc // for virtual source, let hardware do ref bit
1075
1076// We'll be finding a mapping and looking at, so we need to disable 'rupts.
1077 lis r0,hi16(MASK(MSR_VEC)) // get vector mask
1078 ori r0,r0,lo16(MASK(MSR_FP)) // insert fp mask
1079 mfmsr r26 // save current msr
1080 andc r26,r26,r0 // turn off VEC and FP in saved copy
1081 ori r0,r0,lo16(MASK(MSR_EE)) // add EE to our mask
1082 andc r0,r26,r0 // disable EE in our new msr image
1083 mtmsrd r0 // introduce new msr image
1084
1085// We're now holding the virtual operand's pmap_t in r27 and its virtual address in r28. We now
1086// try to find a mapping corresponding to this address in order to determine whether the address
1087// is cacheable. If we don't find a mapping, we can safely assume that the operand is cacheable
1088// (a non-cacheable operand must be a block mapping, which will always exist); otherwise, we
1089// examine the mapping's caching-inhibited bit.
1090 mr r3,r27 // r3 <- pmap_t pmap
1091 rldicl r4,r28,32,32 // r4, r5 <- addr64_t va
1092 rldicl r5,r28,0,32
1093 la r6,FM_ARG0+0x18(r1) // r6 <- addr64_t *nextva
1094 li r7,1 // r7 <- int full, search nested mappings
1095 std r26,FM_ARG0+0x28(r1) // preserve 64-bit r26 across 32-bit calls
1096 std r28,FM_ARG0+0x30(r1) // preserve 64-bit r28 across 32-bit calls
1097 std r29,FM_ARG0+0x38(r1) // preserve 64-bit r29 across 32-bit calls
1098 std r30,FM_ARG0+0x40(r1) // preserve 64-bit r30 across 32-bit calls
1099 bl EXT(mapping_find) // find mapping for virtual operand
1100 mr. r3,r3 // did we find it?
1101 beq copypv_nomapping // nope, so we'll assume it's cacheable
1102 lwz r4,mpVAddr+4(r3) // get low half of virtual addr for hw flags
1103 rlwinm. r4,r4,0,mpIb-32,mpIb-32 // caching-inhibited bit set?
1104 crnot pvNoCache,cr0_eq // if it is, use bcopy_nc
1105 bl EXT(mapping_drop_busy) // drop busy on the mapping
1106copypv_nomapping:
1107 ld r26,FM_ARG0+0x28(r1) // restore 64-bit r26
1108 ld r28,FM_ARG0+0x30(r1) // restore 64-bit r28
1109 ld r29,FM_ARG0+0x38(r1) // restore 64-bit r29
1110 ld r30,FM_ARG0+0x40(r1) // restore 64-bit r30
1111 mtmsrd r26 // restore msr to it's previous state
1112
1113// Set both the source and destination virtual addresses to the virtual operand's address --
1114// we'll overlay one of them with the physical operand's address.
1115 mr r27,r28 // make virtual operand BOTH source AND destination
1116
1117// Now we're ready to relocate the physical operand address(es) into the physical memory window.
1118// Recall that we've mapped physical memory (including the I/O hole) into the kernel's address
1119// space somewhere at or over the 2**32 line. If one or both of the operands are in the I/O hole,
1120// we'll set the pvNoCache flag, forcing use of non-caching bcopy_nc() to do the copy.
1121copypv_phys:
1122 ld r6,lgPMWvaddr(0) // get physical memory window virtual address
1123 bf pvPsnk,copypv_dstvirt // is destination address virtual?
1124 cntlzd r4,r30 // count leading zeros in destination address
1125 cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
1126 cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations
1127 add r28,r30,r6 // relocate physical destination into physical window
1128copypv_dstvirt:
1129 bf pvPsrc,copypv_srcvirt // is source address virtual?
1130 cntlzd r4,r29 // count leading zeros in source address
1131 cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
1132 cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations
1133 add r27,r29,r6 // relocate physical source into physical window
1134copypv_srcvirt:
1135
1136// Once the copy is under way (bcopy or bcopy_nc), we will want to get control if anything
1137// funny happens during the copy. So, we set a pointer to our error handler in the per-thread
1138// control block.
1139 mfsprg r8,1 // get current threads stuff
1140 lis r3,hi16(copypv_error) // get our error callback's address, high
1141 ori r3,r3,lo16(copypv_error) // get our error callback's address, low
1142 stw r3,THREAD_RECOVER(r8) // set our error callback
1143
1144// Since our physical operand(s) are relocated at or above the 2**32 line, we must enter
1145// 64-bit mode.
1146 li r0,1 // get a handy one bit
1147 mfmsr r3 // get current msr
1148 rldimi r3,r0,63,MSR_SF_BIT // set SF bit on in our msr copy
1149 mtmsrd r3 // enter 64-bit mode
1150
1151// If requested, flush data cache
1152// Note that we don't flush, the code is being saved "just in case".
1153#if 0
1154 bf pvFsrc,copypv_nfs // do we flush the source?
1155 rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address
1156 rldicl r4,r27,0,32
1157 mr r5,r31 // r5 <- count (in bytes)
1158 li r6,0 // r6 <- boolean phys (false, not physical)
1159 bl EXT(flush_dcache) // flush the source operand
1160copypv_nfs:
1161 bf pvFsnk,copypv_nfdx // do we flush the destination?
1162 rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address
1163 rldicl r4,r28,0,32
1164 mr r5,r31 // r5 <- count (in bytes)
1165 li r6,0 // r6 <- boolean phys (false, not physical)
1166 bl EXT(flush_dcache) // flush the destination operand
1167copypv_nfdx:
1168#endif
1169
1170// Call bcopy or bcopy_nc to perform the copy.
1171 mr r3,r27 // r3 <- source virtual address
1172 mr r4,r28 // r4 <- destination virtual address
1173 mr r5,r31 // r5 <- bytes to copy
1174 bt pvNoCache,copypv_nc // take non-caching route
1175 bl EXT(bcopy) // call bcopy to do the copying
1176 b copypv_copydone
1177copypv_nc:
1178 bl EXT(bcopy_nc) // call bcopy_nc to do the copying
1179copypv_copydone:
1180
1181// If requested, flush data cache
1182// Note that we don't flush, the code is being saved "just in case".
1183#if 0
1184 bf pvFsrc,copypv_nfsx // do we flush the source?
1185 rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address
1186 rldicl r4,r27,0,32
1187 mr r5,r31 // r5 <- count (in bytes)
1188 li r6,0 // r6 <- boolean phys (false, not physical)
1189 bl EXT(flush_dcache) // flush the source operand
1190copypv_nfsx:
1191 bf pvFsnk,copypv_nfd // do we flush the destination?
1192 rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address
1193 rldicl r4,r28,0,32
1194 mr r5,r31 // r5 <- count (in bytes)
1195 li r6,0 // r6 <- boolean phys (false, not physical)
1196 bl EXT(flush_dcache) // flush the destination operand
1197copypv_nfd:
1198#endif
1199
1200// Leave 64-bit mode.
1201 mfmsr r3 // get current msr
1202 rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy
1203 mtmsrd r3 // leave 64-bit mode
1204
1205// If requested, set ref/chg on source/dest physical operand(s). It is possible that copy is
1206// from/to a RAM disk situated outside of mapped physical RAM, so we check each page by calling
1207// mapping_phys_lookup() before we try to set its ref/chg bits; otherwise, we might panic.
1208// Note that this code is page-size sensitive, so it should probably be a part of our low-level
1209// code in hw_vm.s.
1210 bt pvNoModSnk,copypv_nomod // skip destination update if not requested
1211 std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit calls
1212 li r26,1 // r26 <- 4K-page count
1213 mr r27,r31 // r27 <- byte count
1214 rlwinm r3,r30,0,20,31 // does destination cross a page boundary?
1215 subfic r3,r3,4096 //
1216 cmplw r3,r27 //
1217 blt copypv_modnox // skip if not crossing case
1218 subf r27,r3,r27 // r27 <- byte count less initial fragment
1219 addi r26,r26,1 // increment page count
1220copypv_modnox:
1221 srdi r3,r27,12 // pages to update (not including crosser)
1222 add r26,r26,r3 // add in crosser
1223 srdi r27,r30,12 // r27 <- destination page number
1224copypv_modloop:
1225 mr r3,r27 // r3 <- destination page number
1226 la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex
1227 bl EXT(mapping_phys_lookup) // see if page is really there
1228 mr. r3,r3 // is it?
1229 beq-- copypv_modend // nope, break out of modify loop
1230 mr r3,r27 // r3 <- destination page number
1231 bl EXT(mapping_set_mod) // set page changed status
1232 subi r26,r26,1 // decrement page count
1233 cmpwi r26,0 // done yet?
1234 bgt copypv_modloop // nope, iterate
1235copypv_modend:
1236 ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29
1237copypv_nomod:
1238 bt pvNoRefSrc,copypv_done // skip source update if not requested
1239copypv_debugref:
1240 li r26,1 // r26 <- 4K-page count
1241 mr r27,r31 // r27 <- byte count
1242 rlwinm r3,r29,0,20,31 // does source cross a page boundary?
1243 subfic r3,r3,4096 //
1244 cmplw r3,r27 //
1245 blt copypv_refnox // skip if not crossing case
1246 subf r27,r3,r27 // r27 <- byte count less initial fragment
1247 addi r26,r26,1 // increment page count
1248copypv_refnox:
1249 srdi r3,r27,12 // pages to update (not including crosser)
1250 add r26,r26,r3 // add in crosser
1251 srdi r27,r29,12 // r27 <- source page number
1252copypv_refloop:
1253 mr r3,r27 // r3 <- source page number
1254 la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex
1255 bl EXT(mapping_phys_lookup) // see if page is really there
1256 mr. r3,r3 // is it?
1257 beq-- copypv_done // nope, break out of modify loop
1258 mr r3,r27 // r3 <- source page number
1259 bl EXT(mapping_set_ref) // set page referenced status
1260 subi r26,r26,1 // decrement page count
1261 cmpwi r26,0 // done yet?
1262 bgt copypv_refloop // nope, iterate
1263
1264// Return, indicating success.
1265copypv_done:
1266copypv_zero:
1267 li r3,0 // our efforts were crowned with success
1268
1269// Pop frame, restore caller's non-volatiles, clear recovery routine pointer.
1270copypv_return:
1271 mfsprg r9,1 // get current threads stuff
1272 lwz r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
1273 // get return address
1274 lwz r4,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
1275 // get non-volatile cr2 and cr3
1276 lwz r26,FM_ARG0+0x00(r1) // restore non-volatile r26
1277 lwz r27,FM_ARG0+0x04(r1) // restore non-volatile r27
1278 mtlr r0 // restore return address
1279 lwz r28,FM_ARG0+0x08(r1) // restore non-volatile r28
1280 mtcrf 0x20,r4 // restore non-volatile cr2
1281 mtcrf 0x10,r4 // restore non-volatile cr3
1282 lwz r11,FM_ARG0+0x20(r1) // save error callback
1283 lwz r29,FM_ARG0+0x0C(r1) // restore non-volatile r29
1284 lwz r30,FM_ARG0+0x10(r1) // restore non-volatile r30
1285 lwz r31,FM_ARG0+0x14(r1) // restore non-volatile r31
1286 stw r11,THREAD_RECOVER(r9) // restore our error callback
1287 lwz r1,0(r1) // release stack frame
1288
1289 blr // y'all come back now
1290
1291// Invalid argument handler.
1292copypv_einval:
1293 li r3,EINVAL // invalid argument
1294 b copypv_return // return
1295
1296// Error encountered during bcopy or bcopy_nc.
1297copypv_error:
1298 mfmsr r3 // get current msr
1299 rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy
1300 mtmsrd r3 // leave 64-bit mode
1301 li r3,EFAULT // it was all his fault
1302 b copypv_return // return