]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
55e303ae | 2 | * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. |
1c79356b A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
43866e37 | 6 | * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. |
1c79356b | 7 | * |
43866e37 A |
8 | * This file contains Original Code and/or Modifications of Original Code |
9 | * as defined in and that are subject to the Apple Public Source License | |
10 | * Version 2.0 (the 'License'). You may not use this file except in | |
11 | * compliance with the License. Please obtain a copy of the License at | |
12 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
13 | * file. | |
14 | * | |
15 | * The Original Code and all software distributed under the License are | |
16 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
1c79356b A |
17 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
18 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
43866e37 A |
19 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
20 | * Please see the License for the specific language governing rights and | |
21 | * limitations under the License. | |
1c79356b A |
22 | * |
23 | * @APPLE_LICENSE_HEADER_END@ | |
24 | */ | |
25 | /* | |
26 | * @OSF_COPYRIGHT@ | |
27 | */ | |
28 | #include <debug.h> | |
29 | #include <ppc/asm.h> | |
30 | #include <ppc/proc_reg.h> | |
31 | #include <mach/ppc/vm_param.h> | |
32 | #include <assym.s> | |
33 | #include <sys/errno.h> | |
34 | ||
55e303ae A |
35 | #define INSTRUMENT 0 |
36 | ||
37 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
1c79356b A |
38 | /* |
39 | * void pmap_zero_page(vm_offset_t pa) | |
40 | * | |
55e303ae A |
41 | * Zero a page of physical memory. This routine runs in 32 or 64-bit mode, |
42 | * and handles 32 and 128-byte cache lines. | |
1c79356b A |
43 | */ |
44 | ||
1c79356b | 45 | |
55e303ae A |
46 | .align 5 |
47 | .globl EXT(pmap_zero_page) | |
48 | ||
49 | LEXT(pmap_zero_page) | |
50 | ||
51 | mflr r12 // save return address | |
52 | bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10 | |
53 | mtlr r12 // restore return address | |
54 | andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size | |
d7e50217 | 55 | |
55e303ae A |
56 | subfic r4,r9,PPC_PGBYTES // r4 <- starting offset in page |
57 | ||
58 | bt++ pf64Bitb,page0S4 // Go do the big guys... | |
59 | ||
60 | slwi r3,r3,12 // get page address from page num | |
61 | b page_zero_1 // Jump to line aligned loop... | |
62 | ||
63 | .align 5 | |
64 | ||
65 | nop | |
66 | nop | |
67 | nop | |
68 | nop | |
69 | nop | |
70 | nop | |
71 | nop | |
72 | ||
73 | page0S4: | |
74 | sldi r3,r3,12 // get page address from page num | |
75 | ||
76 | page_zero_1: // loop zeroing cache lines | |
77 | sub. r5,r4,r9 // more to go? | |
78 | dcbz128 r3,r4 // zero either 32 or 128 bytes | |
79 | sub r4,r5,r9 // generate next offset | |
80 | dcbz128 r3,r5 | |
81 | bne-- page_zero_1 | |
82 | ||
83 | b EXT(ml_restore) // restore MSR and do the isync | |
84 | ||
85 | ||
86 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
1c79356b A |
87 | /* void |
88 | * phys_copy(src, dst, bytecount) | |
55e303ae A |
89 | * addr64_t src; |
90 | * addr64_t dst; | |
1c79356b A |
91 | * int bytecount |
92 | * | |
93 | * This routine will copy bytecount bytes from physical address src to physical | |
55e303ae A |
94 | * address dst. It runs in 64-bit mode if necessary, but does not handle |
95 | * overlap or make any attempt to be optimal. Length must be a signed word. | |
96 | * Not performance critical. | |
1c79356b A |
97 | */ |
98 | ||
1c79356b | 99 | |
55e303ae A |
100 | .align 5 |
101 | .globl EXT(phys_copy) | |
102 | ||
103 | LEXT(phys_copy) | |
104 | ||
105 | rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg | |
106 | mflr r12 // get return address | |
107 | rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits | |
108 | rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg | |
109 | bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10 | |
110 | rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits | |
111 | mtlr r12 // restore return address | |
112 | subic. r5,r7,4 // a word to copy? | |
113 | b phys_copy_2 | |
114 | ||
115 | .align 5 | |
116 | ||
117 | phys_copy_1: // loop copying words | |
118 | subic. r5,r5,4 // more to go? | |
119 | lwz r0,0(r3) | |
120 | addi r3,r3,4 | |
121 | stw r0,0(r4) | |
122 | addi r4,r4,4 | |
123 | phys_copy_2: | |
124 | bge phys_copy_1 | |
125 | addic. r5,r5,4 // restore count | |
126 | ble phys_copy_4 // no more | |
127 | ||
128 | // Loop is aligned here | |
129 | ||
130 | phys_copy_3: // loop copying bytes | |
131 | subic. r5,r5,1 // more to go? | |
132 | lbz r0,0(r3) | |
133 | addi r3,r3,1 | |
134 | stb r0,0(r4) | |
135 | addi r4,r4,1 | |
136 | bgt phys_copy_3 | |
137 | phys_copy_4: | |
138 | b EXT(ml_restore) // restore MSR and do the isync | |
139 | ||
140 | ||
141 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
1c79356b A |
142 | /* void |
143 | * pmap_copy_page(src, dst) | |
55e303ae A |
144 | * ppnum_t src; |
145 | * ppnum_t dst; | |
1c79356b A |
146 | * |
147 | * This routine will copy the physical page src to physical page dst | |
148 | * | |
55e303ae A |
149 | * This routine assumes that the src and dst are page numbers and that the |
150 | * destination is cached. It runs on 32 and 64 bit processors, with and | |
151 | * without altivec, and with 32 and 128 byte cache lines. | |
152 | * We also must assume that no-one will be executing within the destination | |
153 | * page, and that this will be used for paging. Because this | |
154 | * is a common routine, we have tuned loops for each processor class. | |
1c79356b A |
155 | * |
156 | */ | |
55e303ae | 157 | #define kSFSize (FM_SIZE+160) |
1c79356b | 158 | |
1c79356b | 159 | ENTRY(pmap_copy_page, TAG_NO_FRAME_USED) |
1c79356b | 160 | |
55e303ae A |
161 | lis r2,hi16(MASK(MSR_VEC)) ; Get the vector flag |
162 | mflr r0 // get return | |
163 | ori r2,r2,lo16(MASK(MSR_FP)) ; Add the FP flag | |
164 | stw r0,8(r1) // save | |
165 | stwu r1,-kSFSize(r1) // set up a stack frame for VRs or FPRs | |
166 | mfmsr r11 // save MSR at entry | |
167 | mfsprg r10,2 // get feature flags | |
168 | andc r11,r11,r2 // Clear out vec and fp | |
169 | ori r2,r2,lo16(MASK(MSR_EE)) // Get EE on also | |
170 | andc r2,r11,r2 // Clear out EE as well | |
171 | mtcrf 0x02,r10 // we need to test pf64Bit | |
172 | ori r2,r2,MASK(MSR_FP) // must enable FP for G3... | |
173 | mtcrf 0x80,r10 // we need to test pfAltivec too | |
174 | oris r2,r2,hi16(MASK(MSR_VEC)) // enable altivec for G4 (ignored if G3) | |
175 | mtmsr r2 // turn EE off, FP and VEC on | |
176 | isync | |
177 | bt++ pf64Bitb,pmap_copy_64 // skip if 64-bit processor (only they take hint) | |
178 | slwi r3,r3,12 // get page address from page num | |
179 | slwi r4,r4,12 // get page address from page num | |
180 | rlwinm r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1 // get ready to turn off DR | |
181 | bt pfAltivecb,pmap_copy_g4 // altivec but not 64-bit means G4 | |
182 | ||
183 | ||
184 | // G3 -- copy using FPRs | |
185 | ||
186 | stfd f0,FM_SIZE+0(r1) // save the 4 FPRs we use to copy | |
187 | stfd f1,FM_SIZE+8(r1) | |
188 | li r5,PPC_PGBYTES/32 // count of cache lines in a page | |
189 | stfd f2,FM_SIZE+16(r1) | |
190 | mtctr r5 | |
191 | stfd f3,FM_SIZE+24(r1) | |
192 | mtmsr r12 // turn off DR after saving FPRs on stack | |
193 | isync | |
194 | ||
195 | pmap_g3_copy_loop: // loop over 32-byte cache lines | |
196 | dcbz 0,r4 // avoid read of dest line | |
197 | lfd f0,0(r3) | |
198 | lfd f1,8(r3) | |
199 | lfd f2,16(r3) | |
200 | lfd f3,24(r3) | |
201 | addi r3,r3,32 | |
202 | stfd f0,0(r4) | |
203 | stfd f1,8(r4) | |
204 | stfd f2,16(r4) | |
205 | stfd f3,24(r4) | |
206 | dcbst 0,r4 // flush dest line to RAM | |
207 | addi r4,r4,32 | |
208 | bdnz pmap_g3_copy_loop | |
209 | ||
210 | sync // wait for stores to take | |
211 | subi r4,r4,PPC_PGBYTES // restore ptr to destintation page | |
212 | li r6,PPC_PGBYTES-32 // point to last line in page | |
213 | pmap_g3_icache_flush: | |
214 | subic. r5,r6,32 // more to go? | |
215 | icbi r4,r6 // flush another line in icache | |
216 | subi r6,r5,32 // get offset to next line | |
217 | icbi r4,r5 | |
218 | bne pmap_g3_icache_flush | |
219 | ||
220 | sync | |
221 | mtmsr r2 // turn DR back on | |
222 | isync | |
223 | lfd f0,FM_SIZE+0(r1) // restore the FPRs | |
224 | lfd f1,FM_SIZE+8(r1) | |
225 | lfd f2,FM_SIZE+16(r1) | |
226 | lfd f3,FM_SIZE+24(r1) | |
227 | ||
228 | b pmap_g4_restore // restore MSR and done | |
229 | ||
230 | ||
231 | // G4 -- copy using VRs | |
232 | ||
233 | pmap_copy_g4: // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR | |
234 | la r9,FM_SIZE+16(r1) // place where we save VRs to r9 | |
235 | li r5,16 // load x-form offsets into r5-r9 | |
236 | li r6,32 // another offset | |
237 | stvx v0,0,r9 // save some VRs so we can use to copy | |
238 | li r7,48 // another offset | |
239 | stvx v1,r5,r9 | |
240 | li r0,PPC_PGBYTES/64 // we loop over 64-byte chunks | |
241 | stvx v2,r6,r9 | |
242 | mtctr r0 | |
243 | li r8,96 // get look-ahead for touch | |
244 | stvx v3,r7,r9 | |
245 | li r9,128 | |
246 | mtmsr r12 // now we've saved VRs on stack, turn off DR | |
247 | isync // wait for it to happen | |
248 | b pmap_g4_copy_loop | |
249 | ||
250 | .align 5 // align inner loops | |
251 | pmap_g4_copy_loop: // loop over 64-byte chunks | |
252 | dcbt r3,r8 // touch 3 lines ahead | |
253 | nop // avoid a 17-word loop... | |
254 | dcbt r3,r9 // touch 4 lines ahead | |
255 | nop // more padding | |
256 | dcba 0,r4 // avoid pre-fetch of 1st dest line | |
257 | lvx v0,0,r3 // offset 0 | |
258 | lvx v1,r5,r3 // offset 16 | |
259 | lvx v2,r6,r3 // offset 32 | |
260 | lvx v3,r7,r3 // offset 48 | |
261 | addi r3,r3,64 | |
262 | dcba r6,r4 // avoid pre-fetch of 2nd line | |
263 | stvx v0,0,r4 // offset 0 | |
264 | stvx v1,r5,r4 // offset 16 | |
265 | stvx v2,r6,r4 // offset 32 | |
266 | stvx v3,r7,r4 // offset 48 | |
267 | dcbf 0,r4 // push line 1 | |
268 | dcbf r6,r4 // and line 2 | |
269 | addi r4,r4,64 | |
270 | bdnz pmap_g4_copy_loop | |
271 | ||
272 | sync // wait for stores to take | |
273 | subi r4,r4,PPC_PGBYTES // restore ptr to destintation page | |
274 | li r8,PPC_PGBYTES-32 // point to last line in page | |
275 | pmap_g4_icache_flush: | |
276 | subic. r9,r8,32 // more to go? | |
277 | icbi r4,r8 // flush from icache | |
278 | subi r8,r9,32 // get offset to next line | |
279 | icbi r4,r9 | |
280 | bne pmap_g4_icache_flush | |
281 | ||
282 | sync | |
283 | mtmsr r2 // turn DR back on | |
284 | isync | |
285 | la r9,FM_SIZE+16(r1) // get base of VR save area | |
286 | lvx v0,0,r9 // restore the VRs | |
287 | lvx v1,r5,r9 | |
288 | lvx v2,r6,r9 | |
289 | lvx v3,r7,r9 | |
290 | ||
291 | pmap_g4_restore: // r11=MSR | |
292 | mtmsr r11 // turn EE on, VEC and FR off | |
293 | isync // wait for it to happen | |
294 | addi r1,r1,kSFSize // pop off our stack frame | |
295 | lwz r0,8(r1) // restore return address | |
296 | mtlr r0 | |
297 | blr | |
298 | ||
299 | ||
300 | // 64-bit/128-byte processor: copy using VRs | |
301 | ||
302 | pmap_copy_64: // r10=features, r11=old MSR | |
303 | sldi r3,r3,12 // get page address from page num | |
304 | sldi r4,r4,12 // get page address from page num | |
305 | la r9,FM_SIZE+16(r1) // get base of VR save area | |
306 | li r5,16 // load x-form offsets into r5-r9 | |
307 | li r6,32 // another offset | |
308 | bf pfAltivecb,pmap_novmx_copy // altivec suppressed... | |
309 | stvx v0,0,r9 // save 8 VRs so we can copy wo bubbles | |
310 | stvx v1,r5,r9 | |
311 | li r7,48 // another offset | |
312 | li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks | |
313 | stvx v2,r6,r9 | |
314 | stvx v3,r7,r9 | |
315 | addi r9,r9,64 // advance base ptr so we can store another 4 | |
316 | mtctr r0 | |
317 | li r0,MASK(MSR_DR) // get DR bit | |
318 | stvx v4,0,r9 | |
319 | stvx v5,r5,r9 | |
320 | andc r12,r2,r0 // turn off DR bit | |
321 | li r0,1 // get a 1 to slam into SF | |
322 | stvx v6,r6,r9 | |
323 | stvx v7,r7,r9 | |
324 | rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0) | |
325 | li r8,-128 // offset so we can reach back one line | |
326 | mtmsrd r12 // now we've saved VRs, turn DR off and SF on | |
327 | isync // wait for it to happen | |
328 | dcbt128 0,r3,1 // start a forward stream | |
329 | b pmap_64_copy_loop | |
330 | ||
331 | .align 5 // align inner loops | |
332 | pmap_64_copy_loop: // loop over 128-byte chunks | |
333 | dcbz128 0,r4 // avoid read of destination line | |
334 | lvx v0,0,r3 // offset 0 | |
335 | lvx v1,r5,r3 // offset 16 | |
336 | lvx v2,r6,r3 // offset 32 | |
337 | lvx v3,r7,r3 // offset 48 | |
338 | addi r3,r3,64 // don't have enough GPRs so add 64 2x | |
339 | lvx v4,0,r3 // offset 64 | |
340 | lvx v5,r5,r3 // offset 80 | |
341 | lvx v6,r6,r3 // offset 96 | |
342 | lvx v7,r7,r3 // offset 112 | |
343 | addi r3,r3,64 | |
344 | stvx v0,0,r4 // offset 0 | |
345 | stvx v1,r5,r4 // offset 16 | |
346 | stvx v2,r6,r4 // offset 32 | |
347 | stvx v3,r7,r4 // offset 48 | |
348 | addi r4,r4,64 | |
349 | stvx v4,0,r4 // offset 64 | |
350 | stvx v5,r5,r4 // offset 80 | |
351 | stvx v6,r6,r4 // offset 96 | |
352 | stvx v7,r7,r4 // offset 112 | |
353 | addi r4,r4,64 | |
354 | dcbf r8,r4 // flush the line we just wrote | |
355 | bdnz pmap_64_copy_loop | |
356 | ||
357 | sync // wait for stores to take | |
358 | subi r4,r4,PPC_PGBYTES // restore ptr to destintation page | |
359 | li r8,PPC_PGBYTES-128 // point to last line in page | |
360 | pmap_64_icache_flush: | |
361 | subic. r9,r8,128 // more to go? | |
362 | icbi r4,r8 // flush from icache | |
363 | subi r8,r9,128 // get offset to next line | |
364 | icbi r4,r9 | |
365 | bne pmap_64_icache_flush | |
366 | ||
367 | sync | |
368 | mtmsrd r2 // turn DR back on, SF off | |
369 | isync | |
370 | la r9,FM_SIZE+16(r1) // get base address of VR save area on stack | |
371 | lvx v0,0,r9 // restore the VRs | |
372 | lvx v1,r5,r9 | |
373 | lvx v2,r6,r9 | |
374 | lvx v3,r7,r9 | |
375 | addi r9,r9,64 | |
376 | lvx v4,0,r9 | |
377 | lvx v5,r5,r9 | |
378 | lvx v6,r6,r9 | |
379 | lvx v7,r7,r9 | |
380 | ||
381 | b pmap_g4_restore // restore lower half of MSR and return | |
382 | ||
383 | // | |
384 | // Copy on 64-bit without VMX | |
385 | // | |
386 | ||
387 | pmap_novmx_copy: | |
388 | li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks | |
389 | mtctr r0 | |
390 | li r0,MASK(MSR_DR) // get DR bit | |
391 | andc r12,r2,r0 // turn off DR bit | |
392 | li r0,1 // get a 1 to slam into SF | |
393 | rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0) | |
394 | mtmsrd r12 // now we've saved VRs, turn DR off and SF on | |
395 | isync // wait for it to happen | |
396 | dcbt128 0,r3,1 // start a forward stream | |
397 | ||
398 | pmap_novmx_copy_loop: // loop over 128-byte cache lines | |
399 | dcbz128 0,r4 // avoid read of dest line | |
400 | ||
401 | ld r0,0(r3) // Load half a line | |
402 | ld r12,8(r3) | |
403 | ld r5,16(r3) | |
404 | ld r6,24(r3) | |
405 | ld r7,32(r3) | |
406 | ld r8,40(r3) | |
407 | ld r9,48(r3) | |
408 | ld r10,56(r3) | |
409 | ||
410 | std r0,0(r4) // Store half a line | |
411 | std r12,8(r4) | |
412 | std r5,16(r4) | |
413 | std r6,24(r4) | |
414 | std r7,32(r4) | |
415 | std r8,40(r4) | |
416 | std r9,48(r4) | |
417 | std r10,56(r4) | |
418 | ||
419 | ld r0,64(r3) // Load half a line | |
420 | ld r12,72(r3) | |
421 | ld r5,80(r3) | |
422 | ld r6,88(r3) | |
423 | ld r7,96(r3) | |
424 | ld r8,104(r3) | |
425 | ld r9,112(r3) | |
426 | ld r10,120(r3) | |
427 | ||
428 | addi r3,r3,128 | |
429 | ||
430 | std r0,64(r4) // Store half a line | |
431 | std r12,72(r4) | |
432 | std r5,80(r4) | |
433 | std r6,88(r4) | |
434 | std r7,96(r4) | |
435 | std r8,104(r4) | |
436 | std r9,112(r4) | |
437 | std r10,120(r4) | |
438 | ||
439 | dcbf 0,r4 // flush the line we just wrote | |
440 | addi r4,r4,128 | |
441 | bdnz pmap_novmx_copy_loop | |
442 | ||
443 | sync // wait for stores to take | |
444 | subi r4,r4,PPC_PGBYTES // restore ptr to destintation page | |
445 | li r8,PPC_PGBYTES-128 // point to last line in page | |
446 | ||
447 | pmap_novmx_icache_flush: | |
448 | subic. r9,r8,128 // more to go? | |
449 | icbi r4,r8 // flush from icache | |
450 | subi r8,r9,128 // get offset to next line | |
451 | icbi r4,r9 | |
452 | bne pmap_novmx_icache_flush | |
453 | ||
454 | sync | |
455 | mtmsrd r2 // turn DR back on, SF off | |
456 | isync | |
457 | ||
458 | b pmap_g4_restore // restore lower half of MSR and return | |
459 | ||
460 | ||
461 | ||
462 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
de355530 | 463 | |
55e303ae A |
464 | // Stack frame format used by copyin, copyout, copyinstr and copyoutstr. |
465 | // These routines all run both on 32 and 64-bit machines, though because they are called | |
466 | // by the BSD kernel they are always in 32-bit mode when entered. The mapped ptr returned | |
467 | // by MapUserAddressSpace will be 64 bits however on 64-bit machines. Beware to avoid | |
468 | // using compare instructions on this ptr. This mapped ptr is kept globally in r31, so there | |
469 | // is no need to store or load it, which are mode-dependent operations since it could be | |
470 | // 32 or 64 bits. | |
471 | ||
472 | #define kkFrameSize (FM_SIZE+32) | |
473 | ||
474 | #define kkBufSize (FM_SIZE+0) | |
475 | #define kkCR (FM_SIZE+4) | |
476 | #define kkSource (FM_SIZE+8) | |
477 | #define kkDest (FM_SIZE+12) | |
478 | #define kkCountPtr (FM_SIZE+16) | |
479 | #define kkR31Save (FM_SIZE+20) | |
480 | ||
481 | ||
482 | // nonvolatile CR bits we use as flags in cr3 | |
483 | ||
484 | #define kk64bit 12 | |
485 | #define kkNull 13 | |
486 | #define kkIn 14 | |
487 | #define kkString 15 | |
488 | #define kkZero 15 | |
489 | ||
490 | ||
491 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
1c79356b | 492 | /* |
d7e50217 | 493 | * int |
55e303ae | 494 | * copyoutstr(src, dst, maxcount, count) |
de355530 A |
495 | * vm_offset_t src; |
496 | * vm_offset_t dst; | |
55e303ae A |
497 | * vm_size_t maxcount; |
498 | * vm_size_t* count; | |
de355530 | 499 | * |
55e303ae | 500 | * Set *count to the number of bytes copied. |
de355530 A |
501 | */ |
502 | ||
55e303ae A |
503 | ENTRY(copyoutstr, TAG_NO_FRAME_USED) |
504 | mfcr r2 // we use nonvolatile cr3 | |
505 | li r0,0 | |
506 | crset kkString // flag as a string op | |
507 | mr r10,r4 // for copyout, dest ptr (r4) is in user space | |
508 | stw r0,0(r6) // initialize #bytes moved | |
509 | crclr kkIn // flag as copyout | |
510 | b copyJoin | |
de355530 | 511 | |
de355530 | 512 | |
55e303ae | 513 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> |
de355530 | 514 | /* |
55e303ae A |
515 | * int |
516 | * copyinstr(src, dst, maxcount, count) | |
1c79356b A |
517 | * vm_offset_t src; |
518 | * vm_offset_t dst; | |
519 | * vm_size_t maxcount; | |
520 | * vm_size_t* count; | |
521 | * | |
522 | * Set *count to the number of bytes copied | |
1c79356b A |
523 | * If dst == NULL, don't copy, just count bytes. |
524 | * Only currently called from klcopyinstr. | |
525 | */ | |
526 | ||
527 | ENTRY(copyinstr, TAG_NO_FRAME_USED) | |
55e303ae A |
528 | mfcr r2 // we use nonvolatile cr3 |
529 | cmplwi r4,0 // dst==NULL? | |
530 | li r0,0 | |
531 | crset kkString // flag as a string op | |
532 | mr r10,r3 // for copyin, source ptr (r3) is in user space | |
533 | crmove kkNull,cr0_eq // remember if (dst==NULL) | |
534 | stw r0,0(r6) // initialize #bytes moved | |
535 | crset kkIn // flag as copyin (rather than copyout) | |
536 | b copyJoin1 // skip over the "crclr kkNull" | |
537 | ||
538 | ||
539 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
540 | /* | |
541 | * int | |
542 | * copyout(src, dst, count) | |
543 | * vm_offset_t src; | |
544 | * vm_offset_t dst; | |
545 | * size_t count; | |
1c79356b | 546 | */ |
1c79356b | 547 | |
55e303ae A |
548 | .align 5 |
549 | .globl EXT(copyout) | |
550 | .globl EXT(copyoutmsg) | |
551 | ||
552 | LEXT(copyout) | |
553 | LEXT(copyoutmsg) | |
554 | ||
555 | #if INSTRUMENT | |
556 | mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout | |
557 | stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it | |
558 | mfspr r12,pmc2 ; INSTRUMENT - Get stamp | |
559 | stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it | |
560 | mfspr r12,pmc3 ; INSTRUMENT - Get stamp | |
561 | stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it | |
562 | mfspr r12,pmc4 ; INSTRUMENT - Get stamp | |
563 | stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it | |
564 | #endif | |
565 | mfcr r2 // save caller's CR | |
566 | crclr kkString // not a string version | |
567 | mr r10,r4 // dest (r4) is user-space ptr | |
568 | crclr kkIn // flag as copyout | |
569 | b copyJoin | |
570 | ||
571 | ||
572 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
573 | /* | |
574 | * int | |
575 | * copyin(src, dst, count) | |
576 | * vm_offset_t src; | |
577 | * vm_offset_t dst; | |
578 | * size_t count; | |
de355530 | 579 | */ |
1c79356b | 580 | |
1c79356b | 581 | |
55e303ae A |
582 | .align 5 |
583 | .globl EXT(copyin) | |
584 | .globl EXT(copyinmsg) | |
585 | ||
586 | LEXT(copyin) | |
587 | LEXT(copyinmsg) | |
588 | ||
589 | mfcr r2 // save caller's CR | |
590 | crclr kkString // not a string version | |
591 | mr r10,r3 // source (r3) is user-space ptr in copyin | |
592 | crset kkIn // flag as copyin | |
593 | ||
594 | ||
595 | // Common code to handle setup for all the copy variants: | |
596 | // r2 = caller's CR, since we use cr3 | |
597 | // r3-r6 = parameters | |
598 | // r10 = user-space ptr (r3 if copyin, r4 if copyout) | |
599 | // cr3 = kkIn, kkString, kkNull flags | |
600 | ||
601 | copyJoin: | |
602 | crclr kkNull // (dst==NULL) convention not used with this call | |
603 | copyJoin1: // enter from copyinstr with kkNull set | |
604 | mflr r0 // get return address | |
605 | cmplwi r5,0 // buffer length 0? | |
606 | lis r9,0x1000 // r9 <- 0x10000000 (256MB) | |
607 | stw r0,FM_LR_SAVE(r1) // save return | |
608 | cmplw cr1,r5,r9 // buffer length > 256MB ? | |
609 | mfsprg r8,2 // get the features | |
610 | beq-- copyinout_0 // 0 length is degenerate case | |
611 | stwu r1,-kkFrameSize(r1) // set up stack frame | |
612 | stw r2,kkCR(r1) // save caller's CR since we use cr3 | |
613 | mtcrf 0x02,r8 // move pf64Bit to cr6 | |
614 | stw r3,kkSource(r1) // save args across MapUserAddressSpace | |
615 | stw r4,kkDest(r1) | |
616 | stw r5,kkBufSize(r1) | |
617 | crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor | |
618 | stw r6,kkCountPtr(r1) | |
619 | stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr | |
620 | li r31,0 // no mapped ptr yet | |
621 | ||
622 | ||
623 | // Handle buffer length > 256MB. This is an error (ENAMETOOLONG) on copyin and copyout. | |
624 | // The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp | |
625 | // the buffer length to 256MB. This isn't an issue if the string is less than 256MB | |
626 | // (as most are!), but if they are >256MB we eventually return ENAMETOOLONG. This restriction | |
627 | // is due to MapUserAddressSpace; we don't want to consume more than two segments for | |
628 | // the mapping. | |
629 | ||
630 | ble++ cr1,copyin0 // skip if buffer length <= 256MB | |
631 | bf kkString,copyinout_too_big // error if not string op | |
632 | mr r5,r9 // silently clamp buffer length to 256MB | |
633 | stw r9,kkBufSize(r1) // update saved copy too | |
634 | ||
635 | ||
636 | // Set up thread_recover in case we hit an illegal address. | |
637 | ||
638 | copyin0: | |
639 | mfsprg r8,1 /* Get the current act */ | |
640 | lis r2,hi16(copyinout_error) | |
641 | lwz r7,ACT_THREAD(r8) | |
642 | ori r2,r2,lo16(copyinout_error) | |
643 | lwz r3,ACT_VMMAP(r8) // r3 <- vm_map virtual address | |
644 | stw r2,THREAD_RECOVER(r7) | |
645 | ||
646 | ||
647 | // Map user segment into kernel map, turn on 64-bit mode. | |
648 | // r3 = vm map | |
649 | // r5 = buffer length | |
650 | // r10 = user space ptr (r3 if copyin, r4 if copyout) | |
651 | ||
652 | mr r6,r5 // Set length to map | |
653 | li r4,0 // Note: we only do this 32-bit for now | |
654 | mr r5,r10 // arg2 <- user space ptr | |
655 | #if INSTRUMENT | |
656 | mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace | |
657 | stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it | |
658 | mfspr r12,pmc2 ; INSTRUMENT - Get stamp | |
659 | stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it | |
660 | mfspr r12,pmc3 ; INSTRUMENT - Get stamp | |
661 | stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it | |
662 | mfspr r12,pmc4 ; INSTRUMENT - Get stamp | |
663 | stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it | |
664 | #endif | |
665 | bl EXT(MapUserAddressSpace) // set r3 <- address in kernel map of user operand | |
666 | #if INSTRUMENT | |
667 | mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace | |
668 | stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it | |
669 | mfspr r12,pmc2 ; INSTRUMENT - Get stamp | |
670 | stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it | |
671 | mfspr r12,pmc3 ; INSTRUMENT - Get stamp | |
672 | stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it | |
673 | mfspr r12,pmc4 ; INSTRUMENT - Get stamp | |
674 | stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it | |
675 | #endif | |
676 | or. r0,r3,r4 // Did we fail the mapping? | |
677 | mr r31,r4 // r31 <- mapped ptr into user space (may be 64-bit) | |
678 | beq-- copyinout_error // was 0, so there was an error making the mapping | |
679 | bf-- kk64bit,copyin1 // skip if a 32-bit processor | |
680 | ||
681 | rldimi r31,r3,32,0 // slam high-order bits into mapped ptr | |
682 | mfmsr r4 // if 64-bit, turn on SF so we can use returned ptr | |
683 | li r0,1 | |
684 | rldimi r4,r0,63,MSR_SF_BIT // light bit 0 | |
685 | mtmsrd r4 // turn on 64-bit mode | |
686 | isync // wait for mode to change | |
687 | ||
688 | ||
689 | // Load r3-r5, substituting mapped ptr as appropriate. | |
690 | ||
691 | copyin1: | |
692 | lwz r5,kkBufSize(r1) // restore length to copy | |
693 | bf kkIn,copyin2 // skip if copyout | |
694 | lwz r4,kkDest(r1) // copyin: source is mapped, dest is r4 at entry | |
695 | mr r3,r31 // source is mapped ptr | |
696 | b copyin3 | |
697 | copyin2: // handle copyout | |
698 | lwz r3,kkSource(r1) // source is kernel buffer (r3 at entry) | |
699 | mr r4,r31 // dest is mapped ptr into user space | |
700 | ||
701 | ||
702 | // Finally, all set up to copy: | |
703 | // r3 = source ptr (mapped if copyin) | |
704 | // r4 = dest ptr (mapped if copyout) | |
705 | // r5 = length | |
706 | // r31 = mapped ptr returned by MapUserAddressSpace | |
707 | // cr3 = kkIn, kkString, kk64bit, and kkNull flags | |
708 | ||
709 | copyin3: | |
710 | bt kkString,copyString // handle copyinstr and copyoutstr | |
711 | bl EXT(bcopy) // copyin and copyout: let bcopy do the work | |
712 | li r3,0 // return success | |
713 | ||
714 | ||
715 | // Main exit point for copyin, copyout, copyinstr, and copyoutstr. Also reached | |
716 | // from error recovery if we get a DSI accessing user space. Clear recovery ptr, | |
717 | // and pop off frame. Note that we have kept | |
718 | // the mapped ptr into user space in r31, as a reg64_t type (ie, a 64-bit ptr on | |
719 | // 64-bit machines.) We must unpack r31 into an addr64_t in (r3,r4) before passing | |
720 | // it to ReleaseUserAddressSpace. | |
721 | // r3 = 0, EFAULT, or ENAMETOOLONG | |
722 | ||
723 | copyinx: | |
724 | lwz r2,kkCR(r1) // get callers cr3 | |
725 | mfsprg r6,1 // Get the current act | |
726 | lwz r10,ACT_THREAD(r6) | |
727 | ||
728 | bf-- kk64bit,copyinx1 // skip if 32-bit processor | |
729 | mfmsr r12 | |
730 | rldicl r12,r12,0,MSR_SF_BIT+1 // if 64-bit processor, turn 64-bit mode off | |
731 | mtmsrd r12 // turn SF off and EE back on | |
732 | isync // wait for the mode to change | |
733 | copyinx1: | |
734 | lwz r31,kkR31Save(r1) // restore callers r31 | |
735 | addi r1,r1,kkFrameSize // pop off our stack frame | |
736 | lwz r0,FM_LR_SAVE(r1) | |
737 | li r4,0 | |
738 | stw r4,THREAD_RECOVER(r10) // Clear recovery | |
739 | mtlr r0 | |
740 | mtcrf 0x10,r2 // restore cr3 | |
741 | blr | |
de355530 | 742 | |
1c79356b | 743 | |
55e303ae A |
744 | /* We get here via the exception handler if an illegal |
745 | * user memory reference was made. This error handler is used by | |
746 | * copyin, copyout, copyinstr, and copyoutstr. Registers are as | |
747 | * they were at point of fault, so for example cr3 flags are valid. | |
de355530 | 748 | */ |
d7e50217 | 749 | |
55e303ae A |
750 | copyinout_error: |
751 | li r3,EFAULT // return error | |
752 | b copyinx | |
753 | ||
754 | copyinout_0: // degenerate case: 0-length copy | |
755 | mtcrf 0x10,r2 // restore cr3 | |
756 | li r3,0 // return success | |
757 | blr | |
758 | ||
759 | copyinout_too_big: // degenerate case | |
760 | mtcrf 0x10,r2 // restore cr3 | |
761 | lwz r1,0(r1) // pop off stack frame | |
762 | li r3,ENAMETOOLONG | |
763 | blr | |
764 | ||
765 | ||
766 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
767 | // Handle copyinstr and copyoutstr. At this point the stack frame is set up, | |
768 | // the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode | |
769 | // if necessary, and: | |
770 | // r3 = source ptr, mapped if copyinstr | |
771 | // r4 = dest ptr, mapped if copyoutstr | |
772 | // r5 = buffer length | |
773 | // r31 = mapped ptr returned by MapUserAddressSpace | |
774 | // cr3 = kkIn, kkString, kkNull, and kk64bit flags | |
775 | // We do word copies unless the buffer is very short, then use a byte copy loop | |
776 | // for the leftovers if necessary. | |
777 | ||
778 | copyString: | |
779 | li r12,0 // Set header bytes count to zero | |
780 | cmplwi cr1,r5,20 // is buffer very short? | |
781 | mtctr r5 // assuming short, set up loop count for bytes | |
782 | blt cr1,copyinstr8 // too short for word loop | |
783 | andi. r12,r3,0x3 // is source ptr word aligned? | |
784 | bne copyinstr11 // bytes loop | |
785 | copyinstr1: | |
786 | srwi r6,r5,2 // get #words in buffer | |
787 | mtctr r6 // set up word loop count | |
788 | lis r10,hi16(0xFEFEFEFF) // load magic constants into r10 and r11 | |
789 | lis r11,hi16(0x80808080) | |
790 | ori r10,r10,lo16(0xFEFEFEFF) | |
791 | ori r11,r11,lo16(0x80808080) | |
792 | bf kkNull,copyinstr6 // enter loop that copies | |
793 | b copyinstr5 // use loop that just counts | |
794 | ||
795 | ||
796 | // Word loop(s). They do a word-parallel search for 0s, using the following | |
797 | // inobvious but very efficient test: | |
798 | // y = data + 0xFEFEFEFF | |
799 | // z = ~data & 0x80808080 | |
800 | // If (y & z)==0, then all bytes in dataword are nonzero. We need two copies of | |
801 | // this loop, since if we test kkNull in the loop then it becomes 9 words long. | |
802 | ||
803 | .align 5 // align inner loops for speed | |
804 | copyinstr5: // version that counts but does not copy | |
805 | lwz r8,0(r3) // get next word of source | |
806 | addi r3,r3,4 // increment source ptr | |
807 | add r9,r10,r8 // r9 = data + 0xFEFEFEFF | |
808 | andc r7,r11,r8 // r7 = ~data & 0x80808080 | |
809 | and. r7,r9,r7 // r7 = r9 & r7 | |
810 | bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes are nonzero | |
811 | ||
812 | b copyinstr7 | |
813 | ||
814 | .align 5 // align inner loops for speed | |
815 | copyinstr6: // version that counts and copies | |
816 | lwz r8,0(r3) // get next word of source | |
817 | addi r3,r3,4 // increment source ptr | |
818 | addi r4,r4,4 // increment dest ptr while we wait for data | |
819 | add r9,r10,r8 // r9 = data + 0xFEFEFEFF | |
820 | andc r7,r11,r8 // r7 = ~data & 0x80808080 | |
821 | and. r7,r9,r7 // r7 = r9 & r7 | |
822 | stw r8,-4(r4) // pack all 4 bytes into buffer | |
823 | bdnzt cr0_eq,copyinstr6 // if r7==0, then all bytes are nonzero | |
824 | ||
825 | ||
826 | // Either 0 found or buffer filled. The above algorithm has mapped nonzero bytes to 0 | |
827 | // and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also | |
828 | // mapped to 0x80. We must mask out these false hits before searching for an 0x80 byte. | |
829 | ||
830 | copyinstr7: | |
831 | crnot kkZero,cr0_eq // 0 found iff cr0_eq is off | |
832 | mfctr r6 // get #words remaining in buffer | |
833 | rlwinm r2,r8,7,0,31 // move 0x01 bits to 0x80 position | |
834 | slwi r6,r6,2 // convert to #bytes remaining | |
835 | andc r7,r7,r2 // turn off false hits from 0x0100 worst case | |
836 | rlwimi r6,r5,0,30,31 // add in odd bytes leftover in buffer | |
837 | srwi r7,r7,8 // we want to count the 0 as a byte xferred | |
838 | addi r6,r6,4 // don't count last word xferred (yet) | |
839 | cntlzw r7,r7 // now we can find the 0 byte (ie, the 0x80) | |
840 | srwi r7,r7,3 // convert 8,16,24,32 to 1,2,3,4 | |
841 | sub. r6,r6,r7 // account for nonzero bytes in last word | |
842 | bt++ kkZero,copyinstr10 // 0 found, so done | |
843 | ||
844 | beq copyinstr10 // r6==0, so buffer truly full | |
845 | mtctr r6 // 0 not found, loop over r6 bytes | |
846 | b copyinstr8 // enter byte loop for last 1-3 leftover bytes | |
847 | ||
848 | ||
849 | // Byte loop. This is used for very small buffers and for the odd bytes left over | |
850 | // after searching and copying words at a time. | |
851 | ||
852 | .align 5 // align inner loops for speed | |
853 | copyinstr8: // loop over bytes of source | |
854 | lbz r0,0(r3) // get next byte of source | |
855 | addi r3,r3,1 | |
856 | addi r4,r4,1 // increment dest addr whether we store or not | |
857 | cmpwi r0,0 // the 0? | |
858 | bt-- kkNull,copyinstr9 // don't store (was copyinstr with NULL ptr) | |
859 | stb r0,-1(r4) | |
860 | copyinstr9: | |
861 | bdnzf cr0_eq,copyinstr8 // loop if byte not 0 and more room in buffer | |
862 | ||
863 | mfctr r6 // get #bytes left in buffer | |
864 | crmove kkZero,cr0_eq // remember if 0 found or buffer filled | |
865 | ||
866 | ||
867 | // Buffer filled or 0 found. Unwind and return. | |
868 | // r5 = kkBufSize, ie buffer length | |
869 | // r6 = untransferred bytes remaining in buffer | |
870 | // r31 = mapped ptr returned by MapUserAddressSpace | |
871 | // cr3 = kkZero set iff 0 found | |
872 | ||
873 | copyinstr10: | |
874 | lwz r9,kkCountPtr(r1) // get ptr to place to store count of bytes moved | |
875 | sub r2,r5,r6 // get #bytes we moved, counting the 0 iff any | |
876 | add r2,r2,r12 // add the header bytes count | |
877 | li r3,0 // assume 0 return status | |
878 | stw r2,0(r9) // store #bytes moved | |
879 | bt++ kkZero,copyinx // we did find the 0 so return 0 | |
880 | li r3,ENAMETOOLONG // buffer filled | |
881 | b copyinx // join main exit routine | |
882 | ||
883 | // Byte loop. This is used on the header bytes for unaligned source | |
884 | ||
885 | .align 5 // align inner loops for speed | |
886 | copyinstr11: | |
887 | li r10,4 // load word size | |
888 | sub r12,r10,r12 // set the header bytes count | |
889 | mtctr r12 // set up bytes loop count | |
890 | copyinstr12: // loop over bytes of source | |
891 | lbz r0,0(r3) // get next byte of source | |
892 | addi r3,r3,1 | |
893 | addi r4,r4,1 // increment dest addr whether we store or not | |
894 | cmpwi r0,0 // the 0? | |
895 | bt-- kkNull,copyinstr13 // don't store (was copyinstr with NULL ptr) | |
896 | stb r0,-1(r4) | |
897 | copyinstr13: | |
898 | bdnzf cr0_eq,copyinstr12 // loop if byte not 0 and more room in buffer | |
899 | sub r5,r5,r12 // substract the bytes copied | |
900 | bne cr0_eq,copyinstr1 // branch to word loop | |
901 | ||
902 | mr r5,r12 // Get the header bytes count | |
903 | li r12,0 // Clear the header bytes count | |
904 | mfctr r6 // get #bytes left in buffer | |
905 | crmove kkZero,cr0_eq // remember if 0 found or buffer filled | |
906 | b copyinstr10 | |
907 |