]> git.saurik.com Git - apple/xnu.git/blame_incremental - osfmk/ppc/movc.s
xnu-517.12.7.tar.gz
[apple/xnu.git] / osfmk / ppc / movc.s
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * @OSF_COPYRIGHT@
24 */
25#include <debug.h>
26#include <ppc/asm.h>
27#include <ppc/proc_reg.h>
28#include <mach/ppc/vm_param.h>
29#include <assym.s>
30#include <sys/errno.h>
31
32#define INSTRUMENT 0
33
34//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
35/*
36 * void pmap_zero_page(vm_offset_t pa)
37 *
38 * Zero a page of physical memory. This routine runs in 32 or 64-bit mode,
39 * and handles 32 and 128-byte cache lines.
40 */
41
42
43 .align 5
44 .globl EXT(pmap_zero_page)
45
46LEXT(pmap_zero_page)
47
48 mflr r12 // save return address
49 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
50 mtlr r12 // restore return address
51 andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size
52
53 subfic r4,r9,PPC_PGBYTES // r4 <- starting offset in page
54
55 bt++ pf64Bitb,page0S4 // Go do the big guys...
56
57 slwi r3,r3,12 // get page address from page num
58 b page_zero_1 // Jump to line aligned loop...
59
60 .align 5
61
62 nop
63 nop
64 nop
65 nop
66 nop
67 nop
68 nop
69
70page0S4:
71 sldi r3,r3,12 // get page address from page num
72
73page_zero_1: // loop zeroing cache lines
74 sub. r5,r4,r9 // more to go?
75 dcbz128 r3,r4 // zero either 32 or 128 bytes
76 sub r4,r5,r9 // generate next offset
77 dcbz128 r3,r5
78 bne-- page_zero_1
79
80 b EXT(ml_restore) // restore MSR and do the isync
81
82
83//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
84/* void
85 * phys_copy(src, dst, bytecount)
86 * addr64_t src;
87 * addr64_t dst;
88 * int bytecount
89 *
90 * This routine will copy bytecount bytes from physical address src to physical
91 * address dst. It runs in 64-bit mode if necessary, but does not handle
92 * overlap or make any attempt to be optimal. Length must be a signed word.
93 * Not performance critical.
94 */
95
96
97 .align 5
98 .globl EXT(phys_copy)
99
100LEXT(phys_copy)
101
102 rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg
103 mflr r12 // get return address
104 rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits
105 rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg
106 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10
107 rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits
108 mtlr r12 // restore return address
109 subic. r5,r7,4 // a word to copy?
110 b phys_copy_2
111
112 .align 5
113
114phys_copy_1: // loop copying words
115 subic. r5,r5,4 // more to go?
116 lwz r0,0(r3)
117 addi r3,r3,4
118 stw r0,0(r4)
119 addi r4,r4,4
120phys_copy_2:
121 bge phys_copy_1
122 addic. r5,r5,4 // restore count
123 ble phys_copy_4 // no more
124
125 // Loop is aligned here
126
127phys_copy_3: // loop copying bytes
128 subic. r5,r5,1 // more to go?
129 lbz r0,0(r3)
130 addi r3,r3,1
131 stb r0,0(r4)
132 addi r4,r4,1
133 bgt phys_copy_3
134phys_copy_4:
135 b EXT(ml_restore) // restore MSR and do the isync
136
137
138//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
139/* void
140 * pmap_copy_page(src, dst)
141 * ppnum_t src;
142 * ppnum_t dst;
143 *
144 * This routine will copy the physical page src to physical page dst
145 *
146 * This routine assumes that the src and dst are page numbers and that the
147 * destination is cached. It runs on 32 and 64 bit processors, with and
148 * without altivec, and with 32 and 128 byte cache lines.
149 * We also must assume that no-one will be executing within the destination
150 * page, and that this will be used for paging. Because this
151 * is a common routine, we have tuned loops for each processor class.
152 *
153 */
154#define kSFSize (FM_SIZE+160)
155
156ENTRY(pmap_copy_page, TAG_NO_FRAME_USED)
157
158 lis r2,hi16(MASK(MSR_VEC)) ; Get the vector flag
159 mflr r0 // get return
160 ori r2,r2,lo16(MASK(MSR_FP)) ; Add the FP flag
161 stw r0,8(r1) // save
162 stwu r1,-kSFSize(r1) // set up a stack frame for VRs or FPRs
163 mfmsr r11 // save MSR at entry
164 mfsprg r10,2 // get feature flags
165 andc r11,r11,r2 // Clear out vec and fp
166 ori r2,r2,lo16(MASK(MSR_EE)) // Get EE on also
167 andc r2,r11,r2 // Clear out EE as well
168 mtcrf 0x02,r10 // we need to test pf64Bit
169 ori r2,r2,MASK(MSR_FP) // must enable FP for G3...
170 mtcrf 0x80,r10 // we need to test pfAltivec too
171 oris r2,r2,hi16(MASK(MSR_VEC)) // enable altivec for G4 (ignored if G3)
172 mtmsr r2 // turn EE off, FP and VEC on
173 isync
174 bt++ pf64Bitb,pmap_copy_64 // skip if 64-bit processor (only they take hint)
175 slwi r3,r3,12 // get page address from page num
176 slwi r4,r4,12 // get page address from page num
177 rlwinm r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1 // get ready to turn off DR
178 bt pfAltivecb,pmap_copy_g4 // altivec but not 64-bit means G4
179
180
181 // G3 -- copy using FPRs
182
183 stfd f0,FM_SIZE+0(r1) // save the 4 FPRs we use to copy
184 stfd f1,FM_SIZE+8(r1)
185 li r5,PPC_PGBYTES/32 // count of cache lines in a page
186 stfd f2,FM_SIZE+16(r1)
187 mtctr r5
188 stfd f3,FM_SIZE+24(r1)
189 mtmsr r12 // turn off DR after saving FPRs on stack
190 isync
191
192pmap_g3_copy_loop: // loop over 32-byte cache lines
193 dcbz 0,r4 // avoid read of dest line
194 lfd f0,0(r3)
195 lfd f1,8(r3)
196 lfd f2,16(r3)
197 lfd f3,24(r3)
198 addi r3,r3,32
199 stfd f0,0(r4)
200 stfd f1,8(r4)
201 stfd f2,16(r4)
202 stfd f3,24(r4)
203 dcbst 0,r4 // flush dest line to RAM
204 addi r4,r4,32
205 bdnz pmap_g3_copy_loop
206
207 sync // wait for stores to take
208 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
209 li r6,PPC_PGBYTES-32 // point to last line in page
210pmap_g3_icache_flush:
211 subic. r5,r6,32 // more to go?
212 icbi r4,r6 // flush another line in icache
213 subi r6,r5,32 // get offset to next line
214 icbi r4,r5
215 bne pmap_g3_icache_flush
216
217 sync
218 mtmsr r2 // turn DR back on
219 isync
220 lfd f0,FM_SIZE+0(r1) // restore the FPRs
221 lfd f1,FM_SIZE+8(r1)
222 lfd f2,FM_SIZE+16(r1)
223 lfd f3,FM_SIZE+24(r1)
224
225 b pmap_g4_restore // restore MSR and done
226
227
228 // G4 -- copy using VRs
229
230pmap_copy_g4: // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR
231 la r9,FM_SIZE+16(r1) // place where we save VRs to r9
232 li r5,16 // load x-form offsets into r5-r9
233 li r6,32 // another offset
234 stvx v0,0,r9 // save some VRs so we can use to copy
235 li r7,48 // another offset
236 stvx v1,r5,r9
237 li r0,PPC_PGBYTES/64 // we loop over 64-byte chunks
238 stvx v2,r6,r9
239 mtctr r0
240 li r8,96 // get look-ahead for touch
241 stvx v3,r7,r9
242 li r9,128
243 mtmsr r12 // now we've saved VRs on stack, turn off DR
244 isync // wait for it to happen
245 b pmap_g4_copy_loop
246
247 .align 5 // align inner loops
248pmap_g4_copy_loop: // loop over 64-byte chunks
249 dcbt r3,r8 // touch 3 lines ahead
250 nop // avoid a 17-word loop...
251 dcbt r3,r9 // touch 4 lines ahead
252 nop // more padding
253 dcba 0,r4 // avoid pre-fetch of 1st dest line
254 lvx v0,0,r3 // offset 0
255 lvx v1,r5,r3 // offset 16
256 lvx v2,r6,r3 // offset 32
257 lvx v3,r7,r3 // offset 48
258 addi r3,r3,64
259 dcba r6,r4 // avoid pre-fetch of 2nd line
260 stvx v0,0,r4 // offset 0
261 stvx v1,r5,r4 // offset 16
262 stvx v2,r6,r4 // offset 32
263 stvx v3,r7,r4 // offset 48
264 dcbf 0,r4 // push line 1
265 dcbf r6,r4 // and line 2
266 addi r4,r4,64
267 bdnz pmap_g4_copy_loop
268
269 sync // wait for stores to take
270 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
271 li r8,PPC_PGBYTES-32 // point to last line in page
272pmap_g4_icache_flush:
273 subic. r9,r8,32 // more to go?
274 icbi r4,r8 // flush from icache
275 subi r8,r9,32 // get offset to next line
276 icbi r4,r9
277 bne pmap_g4_icache_flush
278
279 sync
280 mtmsr r2 // turn DR back on
281 isync
282 la r9,FM_SIZE+16(r1) // get base of VR save area
283 lvx v0,0,r9 // restore the VRs
284 lvx v1,r5,r9
285 lvx v2,r6,r9
286 lvx v3,r7,r9
287
288pmap_g4_restore: // r11=MSR
289 mtmsr r11 // turn EE on, VEC and FR off
290 isync // wait for it to happen
291 addi r1,r1,kSFSize // pop off our stack frame
292 lwz r0,8(r1) // restore return address
293 mtlr r0
294 blr
295
296
297 // 64-bit/128-byte processor: copy using VRs
298
299pmap_copy_64: // r10=features, r11=old MSR
300 sldi r3,r3,12 // get page address from page num
301 sldi r4,r4,12 // get page address from page num
302 la r9,FM_SIZE+16(r1) // get base of VR save area
303 li r5,16 // load x-form offsets into r5-r9
304 li r6,32 // another offset
305 bf pfAltivecb,pmap_novmx_copy // altivec suppressed...
306 stvx v0,0,r9 // save 8 VRs so we can copy wo bubbles
307 stvx v1,r5,r9
308 li r7,48 // another offset
309 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
310 stvx v2,r6,r9
311 stvx v3,r7,r9
312 addi r9,r9,64 // advance base ptr so we can store another 4
313 mtctr r0
314 li r0,MASK(MSR_DR) // get DR bit
315 stvx v4,0,r9
316 stvx v5,r5,r9
317 andc r12,r2,r0 // turn off DR bit
318 li r0,1 // get a 1 to slam into SF
319 stvx v6,r6,r9
320 stvx v7,r7,r9
321 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
322 li r8,-128 // offset so we can reach back one line
323 mtmsrd r12 // now we've saved VRs, turn DR off and SF on
324 isync // wait for it to happen
325 dcbt128 0,r3,1 // start a forward stream
326 b pmap_64_copy_loop
327
328 .align 5 // align inner loops
329pmap_64_copy_loop: // loop over 128-byte chunks
330 dcbz128 0,r4 // avoid read of destination line
331 lvx v0,0,r3 // offset 0
332 lvx v1,r5,r3 // offset 16
333 lvx v2,r6,r3 // offset 32
334 lvx v3,r7,r3 // offset 48
335 addi r3,r3,64 // don't have enough GPRs so add 64 2x
336 lvx v4,0,r3 // offset 64
337 lvx v5,r5,r3 // offset 80
338 lvx v6,r6,r3 // offset 96
339 lvx v7,r7,r3 // offset 112
340 addi r3,r3,64
341 stvx v0,0,r4 // offset 0
342 stvx v1,r5,r4 // offset 16
343 stvx v2,r6,r4 // offset 32
344 stvx v3,r7,r4 // offset 48
345 addi r4,r4,64
346 stvx v4,0,r4 // offset 64
347 stvx v5,r5,r4 // offset 80
348 stvx v6,r6,r4 // offset 96
349 stvx v7,r7,r4 // offset 112
350 addi r4,r4,64
351 dcbf r8,r4 // flush the line we just wrote
352 bdnz pmap_64_copy_loop
353
354 sync // wait for stores to take
355 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
356 li r8,PPC_PGBYTES-128 // point to last line in page
357pmap_64_icache_flush:
358 subic. r9,r8,128 // more to go?
359 icbi r4,r8 // flush from icache
360 subi r8,r9,128 // get offset to next line
361 icbi r4,r9
362 bne pmap_64_icache_flush
363
364 sync
365 mtmsrd r2 // turn DR back on, SF off
366 isync
367 la r9,FM_SIZE+16(r1) // get base address of VR save area on stack
368 lvx v0,0,r9 // restore the VRs
369 lvx v1,r5,r9
370 lvx v2,r6,r9
371 lvx v3,r7,r9
372 addi r9,r9,64
373 lvx v4,0,r9
374 lvx v5,r5,r9
375 lvx v6,r6,r9
376 lvx v7,r7,r9
377
378 b pmap_g4_restore // restore lower half of MSR and return
379
380 //
381 // Copy on 64-bit without VMX
382 //
383
384pmap_novmx_copy:
385 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks
386 mtctr r0
387 li r0,MASK(MSR_DR) // get DR bit
388 andc r12,r2,r0 // turn off DR bit
389 li r0,1 // get a 1 to slam into SF
390 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0)
391 mtmsrd r12 // now we've saved VRs, turn DR off and SF on
392 isync // wait for it to happen
393 dcbt128 0,r3,1 // start a forward stream
394
395pmap_novmx_copy_loop: // loop over 128-byte cache lines
396 dcbz128 0,r4 // avoid read of dest line
397
398 ld r0,0(r3) // Load half a line
399 ld r12,8(r3)
400 ld r5,16(r3)
401 ld r6,24(r3)
402 ld r7,32(r3)
403 ld r8,40(r3)
404 ld r9,48(r3)
405 ld r10,56(r3)
406
407 std r0,0(r4) // Store half a line
408 std r12,8(r4)
409 std r5,16(r4)
410 std r6,24(r4)
411 std r7,32(r4)
412 std r8,40(r4)
413 std r9,48(r4)
414 std r10,56(r4)
415
416 ld r0,64(r3) // Load half a line
417 ld r12,72(r3)
418 ld r5,80(r3)
419 ld r6,88(r3)
420 ld r7,96(r3)
421 ld r8,104(r3)
422 ld r9,112(r3)
423 ld r10,120(r3)
424
425 addi r3,r3,128
426
427 std r0,64(r4) // Store half a line
428 std r12,72(r4)
429 std r5,80(r4)
430 std r6,88(r4)
431 std r7,96(r4)
432 std r8,104(r4)
433 std r9,112(r4)
434 std r10,120(r4)
435
436 dcbf 0,r4 // flush the line we just wrote
437 addi r4,r4,128
438 bdnz pmap_novmx_copy_loop
439
440 sync // wait for stores to take
441 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page
442 li r8,PPC_PGBYTES-128 // point to last line in page
443
444pmap_novmx_icache_flush:
445 subic. r9,r8,128 // more to go?
446 icbi r4,r8 // flush from icache
447 subi r8,r9,128 // get offset to next line
448 icbi r4,r9
449 bne pmap_novmx_icache_flush
450
451 sync
452 mtmsrd r2 // turn DR back on, SF off
453 isync
454
455 b pmap_g4_restore // restore lower half of MSR and return
456
457
458
459//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
460
461// Stack frame format used by copyin, copyout, copyinstr and copyoutstr.
462// These routines all run both on 32 and 64-bit machines, though because they are called
463// by the BSD kernel they are always in 32-bit mode when entered. The mapped ptr returned
464// by MapUserAddressSpace will be 64 bits however on 64-bit machines. Beware to avoid
465// using compare instructions on this ptr. This mapped ptr is kept globally in r31, so there
466// is no need to store or load it, which are mode-dependent operations since it could be
467// 32 or 64 bits.
468
469#define kkFrameSize (FM_SIZE+32)
470
471#define kkBufSize (FM_SIZE+0)
472#define kkCR (FM_SIZE+4)
473#define kkSource (FM_SIZE+8)
474#define kkDest (FM_SIZE+12)
475#define kkCountPtr (FM_SIZE+16)
476#define kkR31Save (FM_SIZE+20)
477
478
479// nonvolatile CR bits we use as flags in cr3
480
481#define kk64bit 12
482#define kkNull 13
483#define kkIn 14
484#define kkString 15
485#define kkZero 15
486
487
488//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
489/*
490 * int
491 * copyoutstr(src, dst, maxcount, count)
492 * vm_offset_t src;
493 * vm_offset_t dst;
494 * vm_size_t maxcount;
495 * vm_size_t* count;
496 *
497 * Set *count to the number of bytes copied.
498 */
499
500ENTRY(copyoutstr, TAG_NO_FRAME_USED)
501 mfcr r2 // we use nonvolatile cr3
502 li r0,0
503 crset kkString // flag as a string op
504 mr r10,r4 // for copyout, dest ptr (r4) is in user space
505 stw r0,0(r6) // initialize #bytes moved
506 crclr kkIn // flag as copyout
507 b copyJoin
508
509
510//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
511/*
512 * int
513 * copyinstr(src, dst, maxcount, count)
514 * vm_offset_t src;
515 * vm_offset_t dst;
516 * vm_size_t maxcount;
517 * vm_size_t* count;
518 *
519 * Set *count to the number of bytes copied
520 * If dst == NULL, don't copy, just count bytes.
521 * Only currently called from klcopyinstr.
522 */
523
524ENTRY(copyinstr, TAG_NO_FRAME_USED)
525 mfcr r2 // we use nonvolatile cr3
526 cmplwi r4,0 // dst==NULL?
527 li r0,0
528 crset kkString // flag as a string op
529 mr r10,r3 // for copyin, source ptr (r3) is in user space
530 crmove kkNull,cr0_eq // remember if (dst==NULL)
531 stw r0,0(r6) // initialize #bytes moved
532 crset kkIn // flag as copyin (rather than copyout)
533 b copyJoin1 // skip over the "crclr kkNull"
534
535
536//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
537/*
538 * int
539 * copyout(src, dst, count)
540 * vm_offset_t src;
541 * vm_offset_t dst;
542 * size_t count;
543 */
544
545 .align 5
546 .globl EXT(copyout)
547 .globl EXT(copyoutmsg)
548
549LEXT(copyout)
550LEXT(copyoutmsg)
551
552#if INSTRUMENT
553 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout
554 stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it
555 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
556 stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it
557 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
558 stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it
559 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
560 stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it
561#endif
562 mfcr r2 // save caller's CR
563 crclr kkString // not a string version
564 mr r10,r4 // dest (r4) is user-space ptr
565 crclr kkIn // flag as copyout
566 b copyJoin
567
568
569//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
570/*
571 * int
572 * copyin(src, dst, count)
573 * vm_offset_t src;
574 * vm_offset_t dst;
575 * size_t count;
576 */
577
578
579 .align 5
580 .globl EXT(copyin)
581 .globl EXT(copyinmsg)
582
583LEXT(copyin)
584LEXT(copyinmsg)
585
586 mfcr r2 // save caller's CR
587 crclr kkString // not a string version
588 mr r10,r3 // source (r3) is user-space ptr in copyin
589 crset kkIn // flag as copyin
590
591
592// Common code to handle setup for all the copy variants:
593// r2 = caller's CR, since we use cr3
594// r3-r6 = parameters
595// r10 = user-space ptr (r3 if copyin, r4 if copyout)
596// cr3 = kkIn, kkString, kkNull flags
597
598copyJoin:
599 crclr kkNull // (dst==NULL) convention not used with this call
600copyJoin1: // enter from copyinstr with kkNull set
601 mflr r0 // get return address
602 cmplwi r5,0 // buffer length 0?
603 lis r9,0x1000 // r9 <- 0x10000000 (256MB)
604 stw r0,FM_LR_SAVE(r1) // save return
605 cmplw cr1,r5,r9 // buffer length > 256MB ?
606 mfsprg r8,2 // get the features
607 beq-- copyinout_0 // 0 length is degenerate case
608 stwu r1,-kkFrameSize(r1) // set up stack frame
609 stw r2,kkCR(r1) // save caller's CR since we use cr3
610 mtcrf 0x02,r8 // move pf64Bit to cr6
611 stw r3,kkSource(r1) // save args across MapUserAddressSpace
612 stw r4,kkDest(r1)
613 stw r5,kkBufSize(r1)
614 crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor
615 stw r6,kkCountPtr(r1)
616 stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr
617 li r31,0 // no mapped ptr yet
618
619
620// Handle buffer length > 256MB. This is an error (ENAMETOOLONG) on copyin and copyout.
621// The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp
622// the buffer length to 256MB. This isn't an issue if the string is less than 256MB
623// (as most are!), but if they are >256MB we eventually return ENAMETOOLONG. This restriction
624// is due to MapUserAddressSpace; we don't want to consume more than two segments for
625// the mapping.
626
627 ble++ cr1,copyin0 // skip if buffer length <= 256MB
628 bf kkString,copyinout_too_big // error if not string op
629 mr r5,r9 // silently clamp buffer length to 256MB
630 stw r9,kkBufSize(r1) // update saved copy too
631
632
633// Set up thread_recover in case we hit an illegal address.
634
635copyin0:
636 mfsprg r8,1 /* Get the current act */
637 lis r2,hi16(copyinout_error)
638 lwz r7,ACT_THREAD(r8)
639 ori r2,r2,lo16(copyinout_error)
640 lwz r3,ACT_VMMAP(r8) // r3 <- vm_map virtual address
641 stw r2,THREAD_RECOVER(r7)
642
643
644// Map user segment into kernel map, turn on 64-bit mode.
645// r3 = vm map
646// r5 = buffer length
647// r10 = user space ptr (r3 if copyin, r4 if copyout)
648
649 mr r6,r5 // Set length to map
650 li r4,0 // Note: we only do this 32-bit for now
651 mr r5,r10 // arg2 <- user space ptr
652#if INSTRUMENT
653 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace
654 stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it
655 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
656 stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it
657 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
658 stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it
659 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
660 stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it
661#endif
662 bl EXT(MapUserAddressSpace) // set r3 <- address in kernel map of user operand
663#if INSTRUMENT
664 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace
665 stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it
666 mfspr r12,pmc2 ; INSTRUMENT - Get stamp
667 stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it
668 mfspr r12,pmc3 ; INSTRUMENT - Get stamp
669 stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it
670 mfspr r12,pmc4 ; INSTRUMENT - Get stamp
671 stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it
672#endif
673 or. r0,r3,r4 // Did we fail the mapping?
674 mr r31,r4 // r31 <- mapped ptr into user space (may be 64-bit)
675 beq-- copyinout_error // was 0, so there was an error making the mapping
676 bf-- kk64bit,copyin1 // skip if a 32-bit processor
677
678 rldimi r31,r3,32,0 // slam high-order bits into mapped ptr
679 mfmsr r4 // if 64-bit, turn on SF so we can use returned ptr
680 li r0,1
681 rldimi r4,r0,63,MSR_SF_BIT // light bit 0
682 mtmsrd r4 // turn on 64-bit mode
683 isync // wait for mode to change
684
685
686// Load r3-r5, substituting mapped ptr as appropriate.
687
688copyin1:
689 lwz r5,kkBufSize(r1) // restore length to copy
690 bf kkIn,copyin2 // skip if copyout
691 lwz r4,kkDest(r1) // copyin: source is mapped, dest is r4 at entry
692 mr r3,r31 // source is mapped ptr
693 b copyin3
694copyin2: // handle copyout
695 lwz r3,kkSource(r1) // source is kernel buffer (r3 at entry)
696 mr r4,r31 // dest is mapped ptr into user space
697
698
699// Finally, all set up to copy:
700// r3 = source ptr (mapped if copyin)
701// r4 = dest ptr (mapped if copyout)
702// r5 = length
703// r31 = mapped ptr returned by MapUserAddressSpace
704// cr3 = kkIn, kkString, kk64bit, and kkNull flags
705
706copyin3:
707 bt kkString,copyString // handle copyinstr and copyoutstr
708 bl EXT(bcopy) // copyin and copyout: let bcopy do the work
709 li r3,0 // return success
710
711
712// Main exit point for copyin, copyout, copyinstr, and copyoutstr. Also reached
713// from error recovery if we get a DSI accessing user space. Clear recovery ptr,
714// and pop off frame. Note that we have kept
715// the mapped ptr into user space in r31, as a reg64_t type (ie, a 64-bit ptr on
716// 64-bit machines.) We must unpack r31 into an addr64_t in (r3,r4) before passing
717// it to ReleaseUserAddressSpace.
718// r3 = 0, EFAULT, or ENAMETOOLONG
719
720copyinx:
721 lwz r2,kkCR(r1) // get callers cr3
722 mfsprg r6,1 // Get the current act
723 lwz r10,ACT_THREAD(r6)
724
725 bf-- kk64bit,copyinx1 // skip if 32-bit processor
726 mfmsr r12
727 rldicl r12,r12,0,MSR_SF_BIT+1 // if 64-bit processor, turn 64-bit mode off
728 mtmsrd r12 // turn SF off and EE back on
729 isync // wait for the mode to change
730copyinx1:
731 lwz r31,kkR31Save(r1) // restore callers r31
732 addi r1,r1,kkFrameSize // pop off our stack frame
733 lwz r0,FM_LR_SAVE(r1)
734 li r4,0
735 stw r4,THREAD_RECOVER(r10) // Clear recovery
736 mtlr r0
737 mtcrf 0x10,r2 // restore cr3
738 blr
739
740
741/* We get here via the exception handler if an illegal
742 * user memory reference was made. This error handler is used by
743 * copyin, copyout, copyinstr, and copyoutstr. Registers are as
744 * they were at point of fault, so for example cr3 flags are valid.
745 */
746
747copyinout_error:
748 li r3,EFAULT // return error
749 b copyinx
750
751copyinout_0: // degenerate case: 0-length copy
752 mtcrf 0x10,r2 // restore cr3
753 li r3,0 // return success
754 blr
755
756copyinout_too_big: // degenerate case
757 mtcrf 0x10,r2 // restore cr3
758 lwz r1,0(r1) // pop off stack frame
759 li r3,ENAMETOOLONG
760 blr
761
762
763//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
764// Handle copyinstr and copyoutstr. At this point the stack frame is set up,
765// the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode
766// if necessary, and:
767// r3 = source ptr, mapped if copyinstr
768// r4 = dest ptr, mapped if copyoutstr
769// r5 = buffer length
770// r31 = mapped ptr returned by MapUserAddressSpace
771// cr3 = kkIn, kkString, kkNull, and kk64bit flags
772// We do word copies unless the buffer is very short, then use a byte copy loop
773// for the leftovers if necessary.
774
775copyString:
776 li r12,0 // Set header bytes count to zero
777 cmplwi cr1,r5,20 // is buffer very short?
778 mtctr r5 // assuming short, set up loop count for bytes
779 blt cr1,copyinstr8 // too short for word loop
780 andi. r12,r3,0x3 // is source ptr word aligned?
781 bne copyinstr11 // bytes loop
782copyinstr1:
783 srwi r6,r5,2 // get #words in buffer
784 mtctr r6 // set up word loop count
785 lis r10,hi16(0xFEFEFEFF) // load magic constants into r10 and r11
786 lis r11,hi16(0x80808080)
787 ori r10,r10,lo16(0xFEFEFEFF)
788 ori r11,r11,lo16(0x80808080)
789 bf kkNull,copyinstr6 // enter loop that copies
790 b copyinstr5 // use loop that just counts
791
792
793// Word loop(s). They do a word-parallel search for 0s, using the following
794// inobvious but very efficient test:
795// y = data + 0xFEFEFEFF
796// z = ~data & 0x80808080
797// If (y & z)==0, then all bytes in dataword are nonzero. We need two copies of
798// this loop, since if we test kkNull in the loop then it becomes 9 words long.
799
800 .align 5 // align inner loops for speed
801copyinstr5: // version that counts but does not copy
802 lwz r8,0(r3) // get next word of source
803 addi r3,r3,4 // increment source ptr
804 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
805 andc r7,r11,r8 // r7 = ~data & 0x80808080
806 and. r7,r9,r7 // r7 = r9 & r7
807 bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes are nonzero
808
809 b copyinstr7
810
811 .align 5 // align inner loops for speed
812copyinstr6: // version that counts and copies
813 lwz r8,0(r3) // get next word of source
814 addi r3,r3,4 // increment source ptr
815 addi r4,r4,4 // increment dest ptr while we wait for data
816 add r9,r10,r8 // r9 = data + 0xFEFEFEFF
817 andc r7,r11,r8 // r7 = ~data & 0x80808080
818 and. r7,r9,r7 // r7 = r9 & r7
819 stw r8,-4(r4) // pack all 4 bytes into buffer
820 bdnzt cr0_eq,copyinstr6 // if r7==0, then all bytes are nonzero
821
822
823// Either 0 found or buffer filled. The above algorithm has mapped nonzero bytes to 0
824// and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also
825// mapped to 0x80. We must mask out these false hits before searching for an 0x80 byte.
826
827copyinstr7:
828 crnot kkZero,cr0_eq // 0 found iff cr0_eq is off
829 mfctr r6 // get #words remaining in buffer
830 rlwinm r2,r8,7,0,31 // move 0x01 bits to 0x80 position
831 slwi r6,r6,2 // convert to #bytes remaining
832 andc r7,r7,r2 // turn off false hits from 0x0100 worst case
833 rlwimi r6,r5,0,30,31 // add in odd bytes leftover in buffer
834 srwi r7,r7,8 // we want to count the 0 as a byte xferred
835 addi r6,r6,4 // don't count last word xferred (yet)
836 cntlzw r7,r7 // now we can find the 0 byte (ie, the 0x80)
837 srwi r7,r7,3 // convert 8,16,24,32 to 1,2,3,4
838 sub. r6,r6,r7 // account for nonzero bytes in last word
839 bt++ kkZero,copyinstr10 // 0 found, so done
840
841 beq copyinstr10 // r6==0, so buffer truly full
842 mtctr r6 // 0 not found, loop over r6 bytes
843 b copyinstr8 // enter byte loop for last 1-3 leftover bytes
844
845
846// Byte loop. This is used for very small buffers and for the odd bytes left over
847// after searching and copying words at a time.
848
849 .align 5 // align inner loops for speed
850copyinstr8: // loop over bytes of source
851 lbz r0,0(r3) // get next byte of source
852 addi r3,r3,1
853 addi r4,r4,1 // increment dest addr whether we store or not
854 cmpwi r0,0 // the 0?
855 bt-- kkNull,copyinstr9 // don't store (was copyinstr with NULL ptr)
856 stb r0,-1(r4)
857copyinstr9:
858 bdnzf cr0_eq,copyinstr8 // loop if byte not 0 and more room in buffer
859
860 mfctr r6 // get #bytes left in buffer
861 crmove kkZero,cr0_eq // remember if 0 found or buffer filled
862
863
864// Buffer filled or 0 found. Unwind and return.
865// r5 = kkBufSize, ie buffer length
866// r6 = untransferred bytes remaining in buffer
867// r31 = mapped ptr returned by MapUserAddressSpace
868// cr3 = kkZero set iff 0 found
869
870copyinstr10:
871 lwz r9,kkCountPtr(r1) // get ptr to place to store count of bytes moved
872 sub r2,r5,r6 // get #bytes we moved, counting the 0 iff any
873 add r2,r2,r12 // add the header bytes count
874 li r3,0 // assume 0 return status
875 stw r2,0(r9) // store #bytes moved
876 bt++ kkZero,copyinx // we did find the 0 so return 0
877 li r3,ENAMETOOLONG // buffer filled
878 b copyinx // join main exit routine
879
880// Byte loop. This is used on the header bytes for unaligned source
881
882 .align 5 // align inner loops for speed
883copyinstr11:
884 li r10,4 // load word size
885 sub r12,r10,r12 // set the header bytes count
886 mtctr r12 // set up bytes loop count
887copyinstr12: // loop over bytes of source
888 lbz r0,0(r3) // get next byte of source
889 addi r3,r3,1
890 addi r4,r4,1 // increment dest addr whether we store or not
891 cmpwi r0,0 // the 0?
892 bt-- kkNull,copyinstr13 // don't store (was copyinstr with NULL ptr)
893 stb r0,-1(r4)
894copyinstr13:
895 bdnzf cr0_eq,copyinstr12 // loop if byte not 0 and more room in buffer
896 sub r5,r5,r12 // substract the bytes copied
897 bne cr0_eq,copyinstr1 // branch to word loop
898
899 mr r5,r12 // Get the header bytes count
900 li r12,0 // Clear the header bytes count
901 mfctr r6 // get #bytes left in buffer
902 crmove kkZero,cr0_eq // remember if 0 found or buffer filled
903 b copyinstr10
904