]> git.saurik.com Git - apple/xnu.git/blame - osfmk/ppc/movc.s
xnu-517.tar.gz
[apple/xnu.git] / osfmk / ppc / movc.s
CommitLineData
1c79356b 1/*
55e303ae 2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
43866e37 6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
1c79356b 7 *
43866e37
A
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
43866e37
A
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
1c79356b
A
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25/*
26 * @OSF_COPYRIGHT@
27 */
28#include <debug.h>
29#include <ppc/asm.h>
30#include <ppc/proc_reg.h>
31#include <mach/ppc/vm_param.h>
32#include <assym.s>
33#include <sys/errno.h>
34
55e303ae
A
35#define INSTRUMENT 0
36
37//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
1c79356b
A
38/*
39 * void pmap_zero_page(vm_offset_t pa)
40 *
55e303ae
A
41 * Zero a page of physical memory. This routine runs in 32 or 64-bit mode,
42 * and handles 32 and 128-byte cache lines.
1c79356b
A
43 */
44
1c79356b 45
55e303ae
A
46 .align 5
47 .globl EXT(pmap_zero_page)
48
49LEXT(pmap_zero_page)
50
51 mflr r12 // save return address
52 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
53 mtlr r12 // restore return address
54 andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size
d7e50217 55
55e303ae
A
56 subfic r4,r9,PPC_PGBYTES // r4 <- starting offset in page
57
58 bt++ pf64Bitb,page0S4 // Go do the big guys...
59
60 slwi r3,r3,12 // get page address from page num
61 b page_zero_1 // Jump to line aligned loop...
62
63 .align 5
64
65 nop
66 nop
67 nop
68 nop
69 nop
70 nop
71 nop
72
73page0S4:
74 sldi r3,r3,12 // get page address from page num
75
76page_zero_1: // loop zeroing cache lines
77 sub. r5,r4,r9 // more to go?
78 dcbz128 r3,r4 // zero either 32 or 128 bytes
79 sub r4,r5,r9 // generate next offset
80 dcbz128 r3,r5
81 bne-- page_zero_1
82
83 b EXT(ml_restore) // restore MSR and do the isync
84
85
86//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
1c79356b
A
87/* void
88 * phys_copy(src, dst, bytecount)
55e303ae
A
89 * addr64_t src;
90 * addr64_t dst;
1c79356b
A
91 * int bytecount
92 *
93 * This routine will copy bytecount bytes from physical address src to physical
55e303ae
A
94 * address dst. It runs in 64-bit mode if necessary, but does not handle
95 * overlap or make any attempt to be optimal. Length must be a signed word.
96 * Not performance critical.
1c79356b
A
97 */
98
1c79356b 99
55e303ae
A
100 .align 5
101 .globl EXT(phys_copy)
102
103LEXT(phys_copy)
104
105 rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg
106 mflr r12 // get return address
107 rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits
108 rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg
109 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
110 rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits
111 mtlr r12 // restore return address
112 subic. r5,r7,4 // a word to copy?
113 b phys_copy_2
114
115 .align 5
116
117phys_copy_1: // loop copying words
118 subic. r5,r5,4 // more to go?
119 lwz r0,0(r3)
120 addi r3,r3,4
121 stw r0,0(r4)
122 addi r4,r4,4
123phys_copy_2:
124 bge phys_copy_1
125 addic. r5,r5,4 // restore count
126 ble phys_copy_4 // no more
127
128 // Loop is aligned here
129
130phys_copy_3: // loop copying bytes
131 subic. r5,r5,1 // more to go?
132 lbz r0,0(r3)
133 addi r3,r3,1
134 stb r0,0(r4)
135 addi r4,r4,1
136 bgt phys_copy_3
137phys_copy_4:
138 b EXT(ml_restore) // restore MSR and do the isync
139
140
141//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
1c79356b
A
142/* void
143 * pmap_copy_page(src, dst)
55e303ae
A
144 * ppnum_t src;
145 * ppnum_t dst;
1c79356b
A
146 *
147 * This routine will copy the physical page src to physical page dst
148 *
55e303ae
A
149 * This routine assumes that the src and dst are page numbers and that the
150 * destination is cached. It runs on 32 and 64 bit processors, with and
151 * without altivec, and with 32 and 128 byte cache lines.
152 * We also must assume that no-one will be executing within the destination
153 * page, and that this will be used for paging. Because this
154 * is a common routine, we have tuned loops for each processor class.
1c79356b
A
155 *
156 */
55e303ae 157#define kSFSize (FM_SIZE+160)
1c79356b 158
1c79356b 159ENTRY(pmap_copy_page, TAG_NO_FRAME_USED)
1c79356b 160
55e303ae
A
161 lis r2,hi16(MASK(MSR_VEC)) ; Get the vector flag
162 mflr r0 // get return
163 ori r2,r2,lo16(MASK(MSR_FP)) ; Add the FP flag
164 stw r0,8(r1) // save
165 stwu r1,-kSFSize(r1) // set up a stack frame for VRs or FPRs
166 mfmsr r11 // save MSR at entry
167 mfsprg r10,2 // get feature flags
168 andc r11,r11,r2 // Clear out vec and fp
169 ori r2,r2,lo16(MASK(MSR_EE)) // Get EE on also
170 andc r2,r11,r2 // Clear out EE as well
171 mtcrf 0x02,r10 // we need to test pf64Bit
172 ori r2,r2,MASK(MSR_FP) // must enable FP for G3...
173 mtcrf 0x80,r10 // we need to test pfAltivec too
174 oris r2,r2,hi16(MASK(MSR_VEC)) // enable altivec for G4 (ignored if G3)
175 mtmsr r2 // turn EE off, FP and VEC on
176 isync
177 bt++ pf64Bitb,pmap_copy_64 // skip if 64-bit processor (only they take hint)
178 slwi r3,r3,12 // get page address from page num
179 slwi r4,r4,12 // get page address from page num
180 rlwinm r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1 // get ready to turn off DR
181 bt pfAltivecb,pmap_copy_g4 // altivec but not 64-bit means G4
182
183
184 // G3 -- copy using FPRs
185
186 stfd f0,FM_SIZE+0(r1) // save the 4 FPRs we use to copy
187 stfd f1,FM_SIZE+8(r1)
188 li r5,PPC_PGBYTES/32 // count of cache lines in a page
189 stfd f2,FM_SIZE+16(r1)
190 mtctr r5
191 stfd f3,FM_SIZE+24(r1)
192 mtmsr r12 // turn off DR after saving FPRs on stack
193 isync
194
195pmap_g3_copy_loop: // loop over 32-byte cache lines
196 dcbz 0,r4 // avoid read of dest line
197 lfd f0,0(r3)
198 lfd f1,8(r3)
199 lfd f2,16(r3)
200 lfd f3,24(r3)
201 addi r3,r3,32
202 stfd f0,0(r4)
203 stfd f1,8(r4)
204 stfd f2,16(r4)
205 stfd f3,24(r4)
206 dcbst 0,r4 // flush dest line to RAM
207 addi r4,r4,32
208 bdnz pmap_g3_copy_loop
209
210 sync // wait for stores to take
211 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
212 li r6,PPC_PGBYTES-32 // point to last line in page
213pmap_g3_icache_flush:
214 subic. r5,r6,32 // more to go?
215 icbi r4,r6 // flush another line in icache
216 subi r6,r5,32 // get offset to next line
217 icbi r4,r5
218 bne pmap_g3_icache_flush
219
220 sync
221 mtmsr r2 // turn DR back on
222 isync
223 lfd f0,FM_SIZE+0(r1) // restore the FPRs
224 lfd f1,FM_SIZE+8(r1)
225 lfd f2,FM_SIZE+16(r1)
226 lfd f3,FM_SIZE+24(r1)
227
228 b pmap_g4_restore // restore MSR and done
229
230
231 // G4 -- copy using VRs
232
233pmap_copy_g4: // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR
234 la r9,FM_SIZE+16(r1) // place where we save VRs to r9
235 li r5,16 // load x-form offsets into r5-r9
236 li r6,32 // another offset
237 stvx v0,0,r9 // save some VRs so we can use to copy
238 li r7,48 // another offset
239 stvx v1,r5,r9
240 li r0,PPC_PGBYTES/64 // we loop over 64-byte chunks
241 stvx v2,r6,r9
242 mtctr r0
243 li r8,96 // get look-ahead for touch
244 stvx v3,r7,r9
245 li r9,128
246 mtmsr r12 // now we've saved VRs on stack, turn off DR
247 isync // wait for it to happen
248 b pmap_g4_copy_loop
249
250 .align 5 // align inner loops
251pmap_g4_copy_loop: // loop over 64-byte chunks
252 dcbt r3,r8 // touch 3 lines ahead
253 nop // avoid a 17-word loop...
254 dcbt r3,r9 // touch 4 lines ahead
255 nop // more padding
256 dcba 0,r4 // avoid pre-fetch of 1st dest line
257 lvx v0,0,r3 // offset 0
258 lvx v1,r5,r3 // offset 16
259 lvx v2,r6,r3 // offset 32
260 lvx v3,r7,r3 // offset 48
261 addi r3,r3,64
262 dcba r6,r4 // avoid pre-fetch of 2nd line
263 stvx v0,0,r4 // offset 0
264 stvx v1,r5,r4 // offset 16
265 stvx v2,r6,r4 // offset 32
266 stvx v3,r7,r4 // offset 48
267 dcbf 0,r4 // push line 1
268 dcbf r6,r4 // and line 2
269 addi r4,r4,64
270 bdnz pmap_g4_copy_loop
271
272 sync // wait for stores to take
273 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
274 li r8,PPC_PGBYTES-32 // point to last line in page
275pmap_g4_icache_flush:
276 subic. r9,r8,32 // more to go?
277 icbi r4,r8 // flush from icache
278 subi r8,r9,32 // get offset to next line
279 icbi r4,r9
280 bne pmap_g4_icache_flush
281
282 sync
283 mtmsr r2 // turn DR back on
284 isync
285 la r9,FM_SIZE+16(r1) // get base of VR save area
286 lvx v0,0,r9 // restore the VRs
287 lvx v1,r5,r9
288 lvx v2,r6,r9
289 lvx v3,r7,r9
290
291pmap_g4_restore: // r11=MSR
292 mtmsr r11 // turn EE on, VEC and FR off
293 isync // wait for it to happen
294 addi r1,r1,kSFSize // pop off our stack frame
295 lwz r0,8(r1) // restore return address
296 mtlr r0
297 blr
298
299
300 // 64-bit/128-byte processor: copy using VRs
301
302pmap_copy_64: // r10=features, r11=old MSR
303 sldi r3,r3,12 // get page address from page num
304 sldi r4,r4,12 // get page address from page num
305 la r9,FM_SIZE+16(r1) // get base of VR save area
306 li r5,16 // load x-form offsets into r5-r9
307 li r6,32 // another offset
308 bf pfAltivecb,pmap_novmx_copy // altivec suppressed...
309 stvx v0,0,r9 // save 8 VRs so we can copy wo bubbles
310 stvx v1,r5,r9
311 li r7,48 // another offset
312 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
313 stvx v2,r6,r9
314 stvx v3,r7,r9
315 addi r9,r9,64 // advance base ptr so we can store another 4
316 mtctr r0
317 li r0,MASK(MSR_DR) // get DR bit
318 stvx v4,0,r9
319 stvx v5,r5,r9
320 andc r12,r2,r0 // turn off DR bit
321 li r0,1 // get a 1 to slam into SF
322 stvx v6,r6,r9
323 stvx v7,r7,r9
324 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
325 li r8,-128 // offset so we can reach back one line
326 mtmsrd r12 // now we've saved VRs, turn DR off and SF on
327 isync // wait for it to happen
328 dcbt128 0,r3,1 // start a forward stream
329 b pmap_64_copy_loop
330
331 .align 5 // align inner loops
332pmap_64_copy_loop: // loop over 128-byte chunks
333 dcbz128 0,r4 // avoid read of destination line
334 lvx v0,0,r3 // offset 0
335 lvx v1,r5,r3 // offset 16
336 lvx v2,r6,r3 // offset 32
337 lvx v3,r7,r3 // offset 48
338 addi r3,r3,64 // don't have enough GPRs so add 64 2x
339 lvx v4,0,r3 // offset 64
340 lvx v5,r5,r3 // offset 80
341 lvx v6,r6,r3 // offset 96
342 lvx v7,r7,r3 // offset 112
343 addi r3,r3,64
344 stvx v0,0,r4 // offset 0
345 stvx v1,r5,r4 // offset 16
346 stvx v2,r6,r4 // offset 32
347 stvx v3,r7,r4 // offset 48
348 addi r4,r4,64
349 stvx v4,0,r4 // offset 64
350 stvx v5,r5,r4 // offset 80
351 stvx v6,r6,r4 // offset 96
352 stvx v7,r7,r4 // offset 112
353 addi r4,r4,64
354 dcbf r8,r4 // flush the line we just wrote
355 bdnz pmap_64_copy_loop
356
357 sync // wait for stores to take
358 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
359 li r8,PPC_PGBYTES-128 // point to last line in page
360pmap_64_icache_flush:
361 subic. r9,r8,128 // more to go?
362 icbi r4,r8 // flush from icache
363 subi r8,r9,128 // get offset to next line
364 icbi r4,r9
365 bne pmap_64_icache_flush
366
367 sync
368 mtmsrd r2 // turn DR back on, SF off
369 isync
370 la r9,FM_SIZE+16(r1) // get base address of VR save area on stack
371 lvx v0,0,r9 // restore the VRs
372 lvx v1,r5,r9
373 lvx v2,r6,r9
374 lvx v3,r7,r9
375 addi r9,r9,64
376 lvx v4,0,r9
377 lvx v5,r5,r9
378 lvx v6,r6,r9
379 lvx v7,r7,r9
380
381 b pmap_g4_restore // restore lower half of MSR and return
382
383 //
384 // Copy on 64-bit without VMX
385 //
386
387pmap_novmx_copy:
388 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
389 mtctr r0
390 li r0,MASK(MSR_DR) // get DR bit
391 andc r12,r2,r0 // turn off DR bit
392 li r0,1 // get a 1 to slam into SF
393 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
394 mtmsrd r12 // now we've saved VRs, turn DR off and SF on
395 isync // wait for it to happen
396 dcbt128 0,r3,1 // start a forward stream
397
398pmap_novmx_copy_loop: // loop over 128-byte cache lines
399 dcbz128 0,r4 // avoid read of dest line
400
401 ld r0,0(r3) // Load half a line
402 ld r12,8(r3)
403 ld r5,16(r3)
404 ld r6,24(r3)
405 ld r7,32(r3)
406 ld r8,40(r3)
407 ld r9,48(r3)
408 ld r10,56(r3)
409
410 std r0,0(r4) // Store half a line
411 std r12,8(r4)
412 std r5,16(r4)
413 std r6,24(r4)
414 std r7,32(r4)
415 std r8,40(r4)
416 std r9,48(r4)
417 std r10,56(r4)
418
419 ld r0,64(r3) // Load half a line
420 ld r12,72(r3)
421 ld r5,80(r3)
422 ld r6,88(r3)
423 ld r7,96(r3)
424 ld r8,104(r3)
425 ld r9,112(r3)
426 ld r10,120(r3)
427
428 addi r3,r3,128
429
430 std r0,64(r4) // Store half a line
431 std r12,72(r4)
432 std r5,80(r4)
433 std r6,88(r4)
434 std r7,96(r4)
435 std r8,104(r4)
436 std r9,112(r4)
437 std r10,120(r4)
438
439 dcbf 0,r4 // flush the line we just wrote
440 addi r4,r4,128
441 bdnz pmap_novmx_copy_loop
442
443 sync // wait for stores to take
444 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
445 li r8,PPC_PGBYTES-128 // point to last line in page
446
447pmap_novmx_icache_flush:
448 subic. r9,r8,128 // more to go?
449 icbi r4,r8 // flush from icache
450 subi r8,r9,128 // get offset to next line
451 icbi r4,r9
452 bne pmap_novmx_icache_flush
453
454 sync
455 mtmsrd r2 // turn DR back on, SF off
456 isync
457
458 b pmap_g4_restore // restore lower half of MSR and return
459
460
461
462//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
de355530 463
55e303ae
A
464// Stack frame format used by copyin, copyout, copyinstr and copyoutstr.
465// These routines all run both on 32 and 64-bit machines, though because they are called
466// by the BSD kernel they are always in 32-bit mode when entered. The mapped ptr returned
467// by MapUserAddressSpace will be 64 bits however on 64-bit machines. Beware to avoid
468// using compare instructions on this ptr. This mapped ptr is kept globally in r31, so there
469// is no need to store or load it, which are mode-dependent operations since it could be
470// 32 or 64 bits.
471
472#define kkFrameSize (FM_SIZE+32)
473
474#define kkBufSize (FM_SIZE+0)
475#define kkCR (FM_SIZE+4)
476#define kkSource (FM_SIZE+8)
477#define kkDest (FM_SIZE+12)
478#define kkCountPtr (FM_SIZE+16)
479#define kkR31Save (FM_SIZE+20)
480
481
482// nonvolatile CR bits we use as flags in cr3
483
484#define kk64bit 12
485#define kkNull 13
486#define kkIn 14
487#define kkString 15
488#define kkZero 15
489
490
491//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
1c79356b 492/*
d7e50217 493 * int
55e303ae 494 * copyoutstr(src, dst, maxcount, count)
de355530
A
495 * vm_offset_t src;
496 * vm_offset_t dst;
55e303ae
A
497 * vm_size_t maxcount;
498 * vm_size_t* count;
de355530 499 *
55e303ae 500 * Set *count to the number of bytes copied.
de355530
A
501 */
502
55e303ae
A
503ENTRY(copyoutstr, TAG_NO_FRAME_USED)
504 mfcr r2 // we use nonvolatile cr3
505 li r0,0
506 crset kkString // flag as a string op
507 mr r10,r4 // for copyout, dest ptr (r4) is in user space
508 stw r0,0(r6) // initialize #bytes moved
509 crclr kkIn // flag as copyout
510 b copyJoin
de355530 511
de355530 512
55e303ae 513//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
de355530 514/*
55e303ae
A
515 * int
516 * copyinstr(src, dst, maxcount, count)
1c79356b
A
517 * vm_offset_t src;
518 * vm_offset_t dst;
519 * vm_size_t maxcount;
520 * vm_size_t* count;
521 *
522 * Set *count to the number of bytes copied
1c79356b
A
523 * If dst == NULL, don't copy, just count bytes.
524 * Only currently called from klcopyinstr.
525 */
526
527ENTRY(copyinstr, TAG_NO_FRAME_USED)
55e303ae
A
528 mfcr r2 // we use nonvolatile cr3
529 cmplwi r4,0 // dst==NULL?
530 li r0,0
531 crset kkString // flag as a string op
532 mr r10,r3 // for copyin, source ptr (r3) is in user space
533 crmove kkNull,cr0_eq // remember if (dst==NULL)
534 stw r0,0(r6) // initialize #bytes moved
535 crset kkIn // flag as copyin (rather than copyout)
536 b copyJoin1 // skip over the "crclr kkNull"
537
538
539//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
540/*
541 * int
542 * copyout(src, dst, count)
543 * vm_offset_t src;
544 * vm_offset_t dst;
545 * size_t count;
1c79356b 546 */
1c79356b 547
55e303ae
A
548 .align 5
549 .globl EXT(copyout)
550 .globl EXT(copyoutmsg)
551
552LEXT(copyout)
553LEXT(copyoutmsg)
554
555#if INSTRUMENT
556 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout
557 stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it
558 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
559 stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it
560 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
561 stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it
562 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
563 stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it
564#endif
565 mfcr r2 // save caller's CR
566 crclr kkString // not a string version
567 mr r10,r4 // dest (r4) is user-space ptr
568 crclr kkIn // flag as copyout
569 b copyJoin
570
571
572//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
573/*
574 * int
575 * copyin(src, dst, count)
576 * vm_offset_t src;
577 * vm_offset_t dst;
578 * size_t count;
de355530 579 */
1c79356b 580
1c79356b 581
55e303ae
A
582 .align 5
583 .globl EXT(copyin)
584 .globl EXT(copyinmsg)
585
586LEXT(copyin)
587LEXT(copyinmsg)
588
589 mfcr r2 // save caller's CR
590 crclr kkString // not a string version
591 mr r10,r3 // source (r3) is user-space ptr in copyin
592 crset kkIn // flag as copyin
593
594
595// Common code to handle setup for all the copy variants:
596// r2 = caller's CR, since we use cr3
597// r3-r6 = parameters
598// r10 = user-space ptr (r3 if copyin, r4 if copyout)
599// cr3 = kkIn, kkString, kkNull flags
600
601copyJoin:
602 crclr kkNull // (dst==NULL) convention not used with this call
603copyJoin1: // enter from copyinstr with kkNull set
604 mflr r0 // get return address
605 cmplwi r5,0 // buffer length 0?
606 lis r9,0x1000 // r9 <- 0x10000000 (256MB)
607 stw r0,FM_LR_SAVE(r1) // save return
608 cmplw cr1,r5,r9 // buffer length > 256MB ?
609 mfsprg r8,2 // get the features
610 beq-- copyinout_0 // 0 length is degenerate case
611 stwu r1,-kkFrameSize(r1) // set up stack frame
612 stw r2,kkCR(r1) // save caller's CR since we use cr3
613 mtcrf 0x02,r8 // move pf64Bit to cr6
614 stw r3,kkSource(r1) // save args across MapUserAddressSpace
615 stw r4,kkDest(r1)
616 stw r5,kkBufSize(r1)
617 crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor
618 stw r6,kkCountPtr(r1)
619 stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr
620 li r31,0 // no mapped ptr yet
621
622
623// Handle buffer length > 256MB. This is an error (ENAMETOOLONG) on copyin and copyout.
624// The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp
625// the buffer length to 256MB. This isn't an issue if the string is less than 256MB
626// (as most are!), but if they are >256MB we eventually return ENAMETOOLONG. This restriction
627// is due to MapUserAddressSpace; we don't want to consume more than two segments for
628// the mapping.
629
630 ble++ cr1,copyin0 // skip if buffer length <= 256MB
631 bf kkString,copyinout_too_big // error if not string op
632 mr r5,r9 // silently clamp buffer length to 256MB
633 stw r9,kkBufSize(r1) // update saved copy too
634
635
636// Set up thread_recover in case we hit an illegal address.
637
638copyin0:
639 mfsprg r8,1 /* Get the current act */
640 lis r2,hi16(copyinout_error)
641 lwz r7,ACT_THREAD(r8)
642 ori r2,r2,lo16(copyinout_error)
643 lwz r3,ACT_VMMAP(r8) // r3 <- vm_map virtual address
644 stw r2,THREAD_RECOVER(r7)
645
646
647// Map user segment into kernel map, turn on 64-bit mode.
648// r3 = vm map
649// r5 = buffer length
650// r10 = user space ptr (r3 if copyin, r4 if copyout)
651
652 mr r6,r5 // Set length to map
653 li r4,0 // Note: we only do this 32-bit for now
654 mr r5,r10 // arg2 <- user space ptr
655#if INSTRUMENT
656 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace
657 stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it
658 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
659 stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it
660 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
661 stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it
662 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
663 stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it
664#endif
665 bl EXT(MapUserAddressSpace) // set r3 <- address in kernel map of user operand
666#if INSTRUMENT
667 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace
668 stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it
669 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
670 stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it
671 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
672 stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it
673 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
674 stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it
675#endif
676 or. r0,r3,r4 // Did we fail the mapping?
677 mr r31,r4 // r31 <- mapped ptr into user space (may be 64-bit)
678 beq-- copyinout_error // was 0, so there was an error making the mapping
679 bf-- kk64bit,copyin1 // skip if a 32-bit processor
680
681 rldimi r31,r3,32,0 // slam high-order bits into mapped ptr
682 mfmsr r4 // if 64-bit, turn on SF so we can use returned ptr
683 li r0,1
684 rldimi r4,r0,63,MSR_SF_BIT // light bit 0
685 mtmsrd r4 // turn on 64-bit mode
686 isync // wait for mode to change
687
688
689// Load r3-r5, substituting mapped ptr as appropriate.
690
691copyin1:
692 lwz r5,kkBufSize(r1) // restore length to copy
693 bf kkIn,copyin2 // skip if copyout
694 lwz r4,kkDest(r1) // copyin: source is mapped, dest is r4 at entry
695 mr r3,r31 // source is mapped ptr
696 b copyin3
697copyin2: // handle copyout
698 lwz r3,kkSource(r1) // source is kernel buffer (r3 at entry)
699 mr r4,r31 // dest is mapped ptr into user space
700
701
702// Finally, all set up to copy:
703// r3 = source ptr (mapped if copyin)
704// r4 = dest ptr (mapped if copyout)
705// r5 = length
706// r31 = mapped ptr returned by MapUserAddressSpace
707// cr3 = kkIn, kkString, kk64bit, and kkNull flags
708
709copyin3:
710 bt kkString,copyString // handle copyinstr and copyoutstr
711 bl EXT(bcopy) // copyin and copyout: let bcopy do the work
712 li r3,0 // return success
713
714
715// Main exit point for copyin, copyout, copyinstr, and copyoutstr. Also reached
716// from error recovery if we get a DSI accessing user space. Clear recovery ptr,
717// and pop off frame. Note that we have kept
718// the mapped ptr into user space in r31, as a reg64_t type (ie, a 64-bit ptr on
719// 64-bit machines.) We must unpack r31 into an addr64_t in (r3,r4) before passing
720// it to ReleaseUserAddressSpace.
721// r3 = 0, EFAULT, or ENAMETOOLONG
722
723copyinx:
724 lwz r2,kkCR(r1) // get callers cr3
725 mfsprg r6,1 // Get the current act
726 lwz r10,ACT_THREAD(r6)
727
728 bf-- kk64bit,copyinx1 // skip if 32-bit processor
729 mfmsr r12
730 rldicl r12,r12,0,MSR_SF_BIT+1 // if 64-bit processor, turn 64-bit mode off
731 mtmsrd r12 // turn SF off and EE back on
732 isync // wait for the mode to change
733copyinx1:
734 lwz r31,kkR31Save(r1) // restore callers r31
735 addi r1,r1,kkFrameSize // pop off our stack frame
736 lwz r0,FM_LR_SAVE(r1)
737 li r4,0
738 stw r4,THREAD_RECOVER(r10) // Clear recovery
739 mtlr r0
740 mtcrf 0x10,r2 // restore cr3
741 blr
de355530 742
1c79356b 743
55e303ae
A
744/* We get here via the exception handler if an illegal
745 * user memory reference was made. This error handler is used by
746 * copyin, copyout, copyinstr, and copyoutstr. Registers are as
747 * they were at point of fault, so for example cr3 flags are valid.
de355530 748 */
d7e50217 749
55e303ae
A
750copyinout_error:
751 li r3,EFAULT // return error
752 b copyinx
753
754copyinout_0: // degenerate case: 0-length copy
755 mtcrf 0x10,r2 // restore cr3
756 li r3,0 // return success
757 blr
758
759copyinout_too_big: // degenerate case
760 mtcrf 0x10,r2 // restore cr3
761 lwz r1,0(r1) // pop off stack frame
762 li r3,ENAMETOOLONG
763 blr
764
765
766//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
767// Handle copyinstr and copyoutstr. At this point the stack frame is set up,
768// the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode
769// if necessary, and:
770// r3 = source ptr, mapped if copyinstr
771// r4 = dest ptr, mapped if copyoutstr
772// r5 = buffer length
773// r31 = mapped ptr returned by MapUserAddressSpace
774// cr3 = kkIn, kkString, kkNull, and kk64bit flags
775// We do word copies unless the buffer is very short, then use a byte copy loop
776// for the leftovers if necessary.
777
778copyString:
779 li r12,0 // Set header bytes count to zero
780 cmplwi cr1,r5,20 // is buffer very short?
781 mtctr r5 // assuming short, set up loop count for bytes
782 blt cr1,copyinstr8 // too short for word loop
783 andi. r12,r3,0x3 // is source ptr word aligned?
784 bne copyinstr11 // bytes loop
785copyinstr1:
786 srwi r6,r5,2 // get #words in buffer
787 mtctr r6 // set up word loop count
788 lis r10,hi16(0xFEFEFEFF) // load magic constants into r10 and r11
789 lis r11,hi16(0x80808080)
790 ori r10,r10,lo16(0xFEFEFEFF)
791 ori r11,r11,lo16(0x80808080)
792 bf kkNull,copyinstr6 // enter loop that copies
793 b copyinstr5 // use loop that just counts
794
795
796// Word loop(s). They do a word-parallel search for 0s, using the following
797// inobvious but very efficient test:
798// y = data + 0xFEFEFEFF
799// z = ~data & 0x80808080
800// If (y & z)==0, then all bytes in dataword are nonzero. We need two copies of
801// this loop, since if we test kkNull in the loop then it becomes 9 words long.
802
803 .align 5 // align inner loops for speed
804copyinstr5: // version that counts but does not copy
805 lwz r8,0(r3) // get next word of source
806 addi r3,r3,4 // increment source ptr
807 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
808 andc r7,r11,r8 // r7 = ~data & 0x80808080
809 and. r7,r9,r7 // r7 = r9 & r7
810 bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes are nonzero
811
812 b copyinstr7
813
814 .align 5 // align inner loops for speed
815copyinstr6: // version that counts and copies
816 lwz r8,0(r3) // get next word of source
817 addi r3,r3,4 // increment source ptr
818 addi r4,r4,4 // increment dest ptr while we wait for data
819 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
820 andc r7,r11,r8 // r7 = ~data & 0x80808080
821 and. r7,r9,r7 // r7 = r9 & r7
822 stw r8,-4(r4) // pack all 4 bytes into buffer
823 bdnzt cr0_eq,copyinstr6 // if r7==0, then all bytes are nonzero
824
825
826// Either 0 found or buffer filled. The above algorithm has mapped nonzero bytes to 0
827// and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also
828// mapped to 0x80. We must mask out these false hits before searching for an 0x80 byte.
829
830copyinstr7:
831 crnot kkZero,cr0_eq // 0 found iff cr0_eq is off
832 mfctr r6 // get #words remaining in buffer
833 rlwinm r2,r8,7,0,31 // move 0x01 bits to 0x80 position
834 slwi r6,r6,2 // convert to #bytes remaining
835 andc r7,r7,r2 // turn off false hits from 0x0100 worst case
836 rlwimi r6,r5,0,30,31 // add in odd bytes leftover in buffer
837 srwi r7,r7,8 // we want to count the 0 as a byte xferred
838 addi r6,r6,4 // don't count last word xferred (yet)
839 cntlzw r7,r7 // now we can find the 0 byte (ie, the 0x80)
840 srwi r7,r7,3 // convert 8,16,24,32 to 1,2,3,4
841 sub. r6,r6,r7 // account for nonzero bytes in last word
842 bt++ kkZero,copyinstr10 // 0 found, so done
843
844 beq copyinstr10 // r6==0, so buffer truly full
845 mtctr r6 // 0 not found, loop over r6 bytes
846 b copyinstr8 // enter byte loop for last 1-3 leftover bytes
847
848
849// Byte loop. This is used for very small buffers and for the odd bytes left over
850// after searching and copying words at a time.
851
852 .align 5 // align inner loops for speed
853copyinstr8: // loop over bytes of source
854 lbz r0,0(r3) // get next byte of source
855 addi r3,r3,1
856 addi r4,r4,1 // increment dest addr whether we store or not
857 cmpwi r0,0 // the 0?
858 bt-- kkNull,copyinstr9 // don't store (was copyinstr with NULL ptr)
859 stb r0,-1(r4)
860copyinstr9:
861 bdnzf cr0_eq,copyinstr8 // loop if byte not 0 and more room in buffer
862
863 mfctr r6 // get #bytes left in buffer
864 crmove kkZero,cr0_eq // remember if 0 found or buffer filled
865
866
867// Buffer filled or 0 found. Unwind and return.
868// r5 = kkBufSize, ie buffer length
869// r6 = untransferred bytes remaining in buffer
870// r31 = mapped ptr returned by MapUserAddressSpace
871// cr3 = kkZero set iff 0 found
872
873copyinstr10:
874 lwz r9,kkCountPtr(r1) // get ptr to place to store count of bytes moved
875 sub r2,r5,r6 // get #bytes we moved, counting the 0 iff any
876 add r2,r2,r12 // add the header bytes count
877 li r3,0 // assume 0 return status
878 stw r2,0(r9) // store #bytes moved
879 bt++ kkZero,copyinx // we did find the 0 so return 0
880 li r3,ENAMETOOLONG // buffer filled
881 b copyinx // join main exit routine
882
883// Byte loop. This is used on the header bytes for unaligned source
884
885 .align 5 // align inner loops for speed
886copyinstr11:
887 li r10,4 // load word size
888 sub r12,r10,r12 // set the header bytes count
889 mtctr r12 // set up bytes loop count
890copyinstr12: // loop over bytes of source
891 lbz r0,0(r3) // get next byte of source
892 addi r3,r3,1
893 addi r4,r4,1 // increment dest addr whether we store or not
894 cmpwi r0,0 // the 0?
895 bt-- kkNull,copyinstr13 // don't store (was copyinstr with NULL ptr)
896 stb r0,-1(r4)
897copyinstr13:
898 bdnzf cr0_eq,copyinstr12 // loop if byte not 0 and more room in buffer
899 sub r5,r5,r12 // substract the bytes copied
900 bne cr0_eq,copyinstr1 // branch to word loop
901
902 mr r5,r12 // Get the header bytes count
903 li r12,0 // Clear the header bytes count
904 mfctr r6 // get #bytes left in buffer
905 crmove kkZero,cr0_eq // remember if 0 found or buffer filled
906 b copyinstr10
907