]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * The contents of this file constitute Original Code as defined in and | |
7 | * are subject to the Apple Public Source License Version 1.1 (the | |
8 | * "License"). You may not use this file except in compliance with the | |
9 | * License. Please obtain a copy of the License at | |
10 | * http://www.apple.com/publicsource and read it before using this file. | |
11 | * | |
12 | * This Original Code and all software distributed under the License are | |
13 | * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
14 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
15 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the | |
17 | * License for the specific language governing rights and limitations | |
18 | * under the License. | |
19 | * | |
20 | * @APPLE_LICENSE_HEADER_END@ | |
21 | */ | |
22 | /* | |
23 | * @OSF_COPYRIGHT@ | |
24 | */ | |
25 | #include <debug.h> | |
26 | #include <ppc/asm.h> | |
27 | #include <ppc/proc_reg.h> | |
28 | #include <mach/ppc/vm_param.h> | |
29 | #include <assym.s> | |
30 | #include <sys/errno.h> | |
31 | ||
32 | #define INSTRUMENT 0 | |
33 | ||
34 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
35 | /* | |
36 | * void pmap_zero_page(vm_offset_t pa) | |
37 | * | |
38 | * Zero a page of physical memory. This routine runs in 32 or 64-bit mode, | |
39 | * and handles 32 and 128-byte cache lines. | |
40 | */ | |
41 | ||
42 | ||
43 | .align 5 | |
44 | .globl EXT(pmap_zero_page) | |
45 | ||
46 | LEXT(pmap_zero_page) | |
47 | ||
48 | mflr r12 // save return address | |
49 | bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10 | |
50 | mtlr r12 // restore return address | |
51 | andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size | |
52 | ||
53 | subfic r4,r9,PPC_PGBYTES // r4 <- starting offset in page | |
54 | ||
55 | bt++ pf64Bitb,page0S4 // Go do the big guys... | |
56 | ||
57 | slwi r3,r3,12 // get page address from page num | |
58 | b page_zero_1 // Jump to line aligned loop... | |
59 | ||
60 | .align 5 | |
61 | ||
62 | nop | |
63 | nop | |
64 | nop | |
65 | nop | |
66 | nop | |
67 | nop | |
68 | nop | |
69 | ||
70 | page0S4: | |
71 | sldi r3,r3,12 // get page address from page num | |
72 | ||
73 | page_zero_1: // loop zeroing cache lines | |
74 | sub. r5,r4,r9 // more to go? | |
75 | dcbz128 r3,r4 // zero either 32 or 128 bytes | |
76 | sub r4,r5,r9 // generate next offset | |
77 | dcbz128 r3,r5 | |
78 | bne-- page_zero_1 | |
79 | ||
80 | b EXT(ml_restore) // restore MSR and do the isync | |
81 | ||
82 | ||
83 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
84 | /* void | |
85 | * phys_copy(src, dst, bytecount) | |
86 | * addr64_t src; | |
87 | * addr64_t dst; | |
88 | * int bytecount | |
89 | * | |
90 | * This routine will copy bytecount bytes from physical address src to physical | |
91 | * address dst. It runs in 64-bit mode if necessary, but does not handle | |
92 | * overlap or make any attempt to be optimal. Length must be a signed word. | |
93 | * Not performance critical. | |
94 | */ | |
95 | ||
96 | ||
97 | .align 5 | |
98 | .globl EXT(phys_copy) | |
99 | ||
100 | LEXT(phys_copy) | |
101 | ||
102 | rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg | |
103 | mflr r12 // get return address | |
104 | rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits | |
105 | rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg | |
106 | bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10 | |
107 | rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits | |
108 | mtlr r12 // restore return address | |
109 | subic. r5,r7,4 // a word to copy? | |
110 | b phys_copy_2 | |
111 | ||
112 | .align 5 | |
113 | ||
114 | phys_copy_1: // loop copying words | |
115 | subic. r5,r5,4 // more to go? | |
116 | lwz r0,0(r3) | |
117 | addi r3,r3,4 | |
118 | stw r0,0(r4) | |
119 | addi r4,r4,4 | |
120 | phys_copy_2: | |
121 | bge phys_copy_1 | |
122 | addic. r5,r5,4 // restore count | |
123 | ble phys_copy_4 // no more | |
124 | ||
125 | // Loop is aligned here | |
126 | ||
127 | phys_copy_3: // loop copying bytes | |
128 | subic. r5,r5,1 // more to go? | |
129 | lbz r0,0(r3) | |
130 | addi r3,r3,1 | |
131 | stb r0,0(r4) | |
132 | addi r4,r4,1 | |
133 | bgt phys_copy_3 | |
134 | phys_copy_4: | |
135 | b EXT(ml_restore) // restore MSR and do the isync | |
136 | ||
137 | ||
138 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
139 | /* void | |
140 | * pmap_copy_page(src, dst) | |
141 | * ppnum_t src; | |
142 | * ppnum_t dst; | |
143 | * | |
144 | * This routine will copy the physical page src to physical page dst | |
145 | * | |
146 | * This routine assumes that the src and dst are page numbers and that the | |
147 | * destination is cached. It runs on 32 and 64 bit processors, with and | |
148 | * without altivec, and with 32 and 128 byte cache lines. | |
149 | * We also must assume that no-one will be executing within the destination | |
150 | * page, and that this will be used for paging. Because this | |
151 | * is a common routine, we have tuned loops for each processor class. | |
152 | * | |
153 | */ | |
154 | #define kSFSize (FM_SIZE+160) | |
155 | ||
156 | ENTRY(pmap_copy_page, TAG_NO_FRAME_USED) | |
157 | ||
158 | lis r2,hi16(MASK(MSR_VEC)) ; Get the vector flag | |
159 | mflr r0 // get return | |
160 | ori r2,r2,lo16(MASK(MSR_FP)) ; Add the FP flag | |
161 | stw r0,8(r1) // save | |
162 | stwu r1,-kSFSize(r1) // set up a stack frame for VRs or FPRs | |
163 | mfmsr r11 // save MSR at entry | |
164 | mfsprg r10,2 // get feature flags | |
165 | andc r11,r11,r2 // Clear out vec and fp | |
166 | ori r2,r2,lo16(MASK(MSR_EE)) // Get EE on also | |
167 | andc r2,r11,r2 // Clear out EE as well | |
168 | mtcrf 0x02,r10 // we need to test pf64Bit | |
169 | ori r2,r2,MASK(MSR_FP) // must enable FP for G3... | |
170 | mtcrf 0x80,r10 // we need to test pfAltivec too | |
171 | oris r2,r2,hi16(MASK(MSR_VEC)) // enable altivec for G4 (ignored if G3) | |
172 | mtmsr r2 // turn EE off, FP and VEC on | |
173 | isync | |
174 | bt++ pf64Bitb,pmap_copy_64 // skip if 64-bit processor (only they take hint) | |
175 | slwi r3,r3,12 // get page address from page num | |
176 | slwi r4,r4,12 // get page address from page num | |
177 | rlwinm r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1 // get ready to turn off DR | |
178 | bt pfAltivecb,pmap_copy_g4 // altivec but not 64-bit means G4 | |
179 | ||
180 | ||
181 | // G3 -- copy using FPRs | |
182 | ||
183 | stfd f0,FM_SIZE+0(r1) // save the 4 FPRs we use to copy | |
184 | stfd f1,FM_SIZE+8(r1) | |
185 | li r5,PPC_PGBYTES/32 // count of cache lines in a page | |
186 | stfd f2,FM_SIZE+16(r1) | |
187 | mtctr r5 | |
188 | stfd f3,FM_SIZE+24(r1) | |
189 | mtmsr r12 // turn off DR after saving FPRs on stack | |
190 | isync | |
191 | ||
192 | pmap_g3_copy_loop: // loop over 32-byte cache lines | |
193 | dcbz 0,r4 // avoid read of dest line | |
194 | lfd f0,0(r3) | |
195 | lfd f1,8(r3) | |
196 | lfd f2,16(r3) | |
197 | lfd f3,24(r3) | |
198 | addi r3,r3,32 | |
199 | stfd f0,0(r4) | |
200 | stfd f1,8(r4) | |
201 | stfd f2,16(r4) | |
202 | stfd f3,24(r4) | |
203 | dcbst 0,r4 // flush dest line to RAM | |
204 | addi r4,r4,32 | |
205 | bdnz pmap_g3_copy_loop | |
206 | ||
207 | sync // wait for stores to take | |
208 | subi r4,r4,PPC_PGBYTES // restore ptr to destintation page | |
209 | li r6,PPC_PGBYTES-32 // point to last line in page | |
210 | pmap_g3_icache_flush: | |
211 | subic. r5,r6,32 // more to go? | |
212 | icbi r4,r6 // flush another line in icache | |
213 | subi r6,r5,32 // get offset to next line | |
214 | icbi r4,r5 | |
215 | bne pmap_g3_icache_flush | |
216 | ||
217 | sync | |
218 | mtmsr r2 // turn DR back on | |
219 | isync | |
220 | lfd f0,FM_SIZE+0(r1) // restore the FPRs | |
221 | lfd f1,FM_SIZE+8(r1) | |
222 | lfd f2,FM_SIZE+16(r1) | |
223 | lfd f3,FM_SIZE+24(r1) | |
224 | ||
225 | b pmap_g4_restore // restore MSR and done | |
226 | ||
227 | ||
228 | // G4 -- copy using VRs | |
229 | ||
230 | pmap_copy_g4: // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR | |
231 | la r9,FM_SIZE+16(r1) // place where we save VRs to r9 | |
232 | li r5,16 // load x-form offsets into r5-r9 | |
233 | li r6,32 // another offset | |
234 | stvx v0,0,r9 // save some VRs so we can use to copy | |
235 | li r7,48 // another offset | |
236 | stvx v1,r5,r9 | |
237 | li r0,PPC_PGBYTES/64 // we loop over 64-byte chunks | |
238 | stvx v2,r6,r9 | |
239 | mtctr r0 | |
240 | li r8,96 // get look-ahead for touch | |
241 | stvx v3,r7,r9 | |
242 | li r9,128 | |
243 | mtmsr r12 // now we've saved VRs on stack, turn off DR | |
244 | isync // wait for it to happen | |
245 | b pmap_g4_copy_loop | |
246 | ||
247 | .align 5 // align inner loops | |
248 | pmap_g4_copy_loop: // loop over 64-byte chunks | |
249 | dcbt r3,r8 // touch 3 lines ahead | |
250 | nop // avoid a 17-word loop... | |
251 | dcbt r3,r9 // touch 4 lines ahead | |
252 | nop // more padding | |
253 | dcba 0,r4 // avoid pre-fetch of 1st dest line | |
254 | lvx v0,0,r3 // offset 0 | |
255 | lvx v1,r5,r3 // offset 16 | |
256 | lvx v2,r6,r3 // offset 32 | |
257 | lvx v3,r7,r3 // offset 48 | |
258 | addi r3,r3,64 | |
259 | dcba r6,r4 // avoid pre-fetch of 2nd line | |
260 | stvx v0,0,r4 // offset 0 | |
261 | stvx v1,r5,r4 // offset 16 | |
262 | stvx v2,r6,r4 // offset 32 | |
263 | stvx v3,r7,r4 // offset 48 | |
264 | dcbf 0,r4 // push line 1 | |
265 | dcbf r6,r4 // and line 2 | |
266 | addi r4,r4,64 | |
267 | bdnz pmap_g4_copy_loop | |
268 | ||
269 | sync // wait for stores to take | |
270 | subi r4,r4,PPC_PGBYTES // restore ptr to destintation page | |
271 | li r8,PPC_PGBYTES-32 // point to last line in page | |
272 | pmap_g4_icache_flush: | |
273 | subic. r9,r8,32 // more to go? | |
274 | icbi r4,r8 // flush from icache | |
275 | subi r8,r9,32 // get offset to next line | |
276 | icbi r4,r9 | |
277 | bne pmap_g4_icache_flush | |
278 | ||
279 | sync | |
280 | mtmsr r2 // turn DR back on | |
281 | isync | |
282 | la r9,FM_SIZE+16(r1) // get base of VR save area | |
283 | lvx v0,0,r9 // restore the VRs | |
284 | lvx v1,r5,r9 | |
285 | lvx v2,r6,r9 | |
286 | lvx v3,r7,r9 | |
287 | ||
288 | pmap_g4_restore: // r11=MSR | |
289 | mtmsr r11 // turn EE on, VEC and FR off | |
290 | isync // wait for it to happen | |
291 | addi r1,r1,kSFSize // pop off our stack frame | |
292 | lwz r0,8(r1) // restore return address | |
293 | mtlr r0 | |
294 | blr | |
295 | ||
296 | ||
297 | // 64-bit/128-byte processor: copy using VRs | |
298 | ||
299 | pmap_copy_64: // r10=features, r11=old MSR | |
300 | sldi r3,r3,12 // get page address from page num | |
301 | sldi r4,r4,12 // get page address from page num | |
302 | la r9,FM_SIZE+16(r1) // get base of VR save area | |
303 | li r5,16 // load x-form offsets into r5-r9 | |
304 | li r6,32 // another offset | |
305 | bf pfAltivecb,pmap_novmx_copy // altivec suppressed... | |
306 | stvx v0,0,r9 // save 8 VRs so we can copy wo bubbles | |
307 | stvx v1,r5,r9 | |
308 | li r7,48 // another offset | |
309 | li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks | |
310 | stvx v2,r6,r9 | |
311 | stvx v3,r7,r9 | |
312 | addi r9,r9,64 // advance base ptr so we can store another 4 | |
313 | mtctr r0 | |
314 | li r0,MASK(MSR_DR) // get DR bit | |
315 | stvx v4,0,r9 | |
316 | stvx v5,r5,r9 | |
317 | andc r12,r2,r0 // turn off DR bit | |
318 | li r0,1 // get a 1 to slam into SF | |
319 | stvx v6,r6,r9 | |
320 | stvx v7,r7,r9 | |
321 | rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0) | |
322 | li r8,-128 // offset so we can reach back one line | |
323 | mtmsrd r12 // now we've saved VRs, turn DR off and SF on | |
324 | isync // wait for it to happen | |
325 | dcbt128 0,r3,1 // start a forward stream | |
326 | b pmap_64_copy_loop | |
327 | ||
328 | .align 5 // align inner loops | |
329 | pmap_64_copy_loop: // loop over 128-byte chunks | |
330 | dcbz128 0,r4 // avoid read of destination line | |
331 | lvx v0,0,r3 // offset 0 | |
332 | lvx v1,r5,r3 // offset 16 | |
333 | lvx v2,r6,r3 // offset 32 | |
334 | lvx v3,r7,r3 // offset 48 | |
335 | addi r3,r3,64 // don't have enough GPRs so add 64 2x | |
336 | lvx v4,0,r3 // offset 64 | |
337 | lvx v5,r5,r3 // offset 80 | |
338 | lvx v6,r6,r3 // offset 96 | |
339 | lvx v7,r7,r3 // offset 112 | |
340 | addi r3,r3,64 | |
341 | stvx v0,0,r4 // offset 0 | |
342 | stvx v1,r5,r4 // offset 16 | |
343 | stvx v2,r6,r4 // offset 32 | |
344 | stvx v3,r7,r4 // offset 48 | |
345 | addi r4,r4,64 | |
346 | stvx v4,0,r4 // offset 64 | |
347 | stvx v5,r5,r4 // offset 80 | |
348 | stvx v6,r6,r4 // offset 96 | |
349 | stvx v7,r7,r4 // offset 112 | |
350 | addi r4,r4,64 | |
351 | dcbf r8,r4 // flush the line we just wrote | |
352 | bdnz pmap_64_copy_loop | |
353 | ||
354 | sync // wait for stores to take | |
355 | subi r4,r4,PPC_PGBYTES // restore ptr to destintation page | |
356 | li r8,PPC_PGBYTES-128 // point to last line in page | |
357 | pmap_64_icache_flush: | |
358 | subic. r9,r8,128 // more to go? | |
359 | icbi r4,r8 // flush from icache | |
360 | subi r8,r9,128 // get offset to next line | |
361 | icbi r4,r9 | |
362 | bne pmap_64_icache_flush | |
363 | ||
364 | sync | |
365 | mtmsrd r2 // turn DR back on, SF off | |
366 | isync | |
367 | la r9,FM_SIZE+16(r1) // get base address of VR save area on stack | |
368 | lvx v0,0,r9 // restore the VRs | |
369 | lvx v1,r5,r9 | |
370 | lvx v2,r6,r9 | |
371 | lvx v3,r7,r9 | |
372 | addi r9,r9,64 | |
373 | lvx v4,0,r9 | |
374 | lvx v5,r5,r9 | |
375 | lvx v6,r6,r9 | |
376 | lvx v7,r7,r9 | |
377 | ||
378 | b pmap_g4_restore // restore lower half of MSR and return | |
379 | ||
380 | // | |
381 | // Copy on 64-bit without VMX | |
382 | // | |
383 | ||
384 | pmap_novmx_copy: | |
385 | li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks | |
386 | mtctr r0 | |
387 | li r0,MASK(MSR_DR) // get DR bit | |
388 | andc r12,r2,r0 // turn off DR bit | |
389 | li r0,1 // get a 1 to slam into SF | |
390 | rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0) | |
391 | mtmsrd r12 // now we've saved VRs, turn DR off and SF on | |
392 | isync // wait for it to happen | |
393 | dcbt128 0,r3,1 // start a forward stream | |
394 | ||
395 | pmap_novmx_copy_loop: // loop over 128-byte cache lines | |
396 | dcbz128 0,r4 // avoid read of dest line | |
397 | ||
398 | ld r0,0(r3) // Load half a line | |
399 | ld r12,8(r3) | |
400 | ld r5,16(r3) | |
401 | ld r6,24(r3) | |
402 | ld r7,32(r3) | |
403 | ld r8,40(r3) | |
404 | ld r9,48(r3) | |
405 | ld r10,56(r3) | |
406 | ||
407 | std r0,0(r4) // Store half a line | |
408 | std r12,8(r4) | |
409 | std r5,16(r4) | |
410 | std r6,24(r4) | |
411 | std r7,32(r4) | |
412 | std r8,40(r4) | |
413 | std r9,48(r4) | |
414 | std r10,56(r4) | |
415 | ||
416 | ld r0,64(r3) // Load half a line | |
417 | ld r12,72(r3) | |
418 | ld r5,80(r3) | |
419 | ld r6,88(r3) | |
420 | ld r7,96(r3) | |
421 | ld r8,104(r3) | |
422 | ld r9,112(r3) | |
423 | ld r10,120(r3) | |
424 | ||
425 | addi r3,r3,128 | |
426 | ||
427 | std r0,64(r4) // Store half a line | |
428 | std r12,72(r4) | |
429 | std r5,80(r4) | |
430 | std r6,88(r4) | |
431 | std r7,96(r4) | |
432 | std r8,104(r4) | |
433 | std r9,112(r4) | |
434 | std r10,120(r4) | |
435 | ||
436 | dcbf 0,r4 // flush the line we just wrote | |
437 | addi r4,r4,128 | |
438 | bdnz pmap_novmx_copy_loop | |
439 | ||
440 | sync // wait for stores to take | |
441 | subi r4,r4,PPC_PGBYTES // restore ptr to destintation page | |
442 | li r8,PPC_PGBYTES-128 // point to last line in page | |
443 | ||
444 | pmap_novmx_icache_flush: | |
445 | subic. r9,r8,128 // more to go? | |
446 | icbi r4,r8 // flush from icache | |
447 | subi r8,r9,128 // get offset to next line | |
448 | icbi r4,r9 | |
449 | bne pmap_novmx_icache_flush | |
450 | ||
451 | sync | |
452 | mtmsrd r2 // turn DR back on, SF off | |
453 | isync | |
454 | ||
455 | b pmap_g4_restore // restore lower half of MSR and return | |
456 | ||
457 | ||
458 | ||
459 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
460 | ||
461 | // Stack frame format used by copyin, copyout, copyinstr and copyoutstr. | |
462 | // These routines all run both on 32 and 64-bit machines, though because they are called | |
463 | // by the BSD kernel they are always in 32-bit mode when entered. The mapped ptr returned | |
464 | // by MapUserAddressSpace will be 64 bits however on 64-bit machines. Beware to avoid | |
465 | // using compare instructions on this ptr. This mapped ptr is kept globally in r31, so there | |
466 | // is no need to store or load it, which are mode-dependent operations since it could be | |
467 | // 32 or 64 bits. | |
468 | ||
469 | #define kkFrameSize (FM_SIZE+32) | |
470 | ||
471 | #define kkBufSize (FM_SIZE+0) | |
472 | #define kkCR (FM_SIZE+4) | |
473 | #define kkSource (FM_SIZE+8) | |
474 | #define kkDest (FM_SIZE+12) | |
475 | #define kkCountPtr (FM_SIZE+16) | |
476 | #define kkR31Save (FM_SIZE+20) | |
477 | ||
478 | ||
479 | // nonvolatile CR bits we use as flags in cr3 | |
480 | ||
481 | #define kk64bit 12 | |
482 | #define kkNull 13 | |
483 | #define kkIn 14 | |
484 | #define kkString 15 | |
485 | #define kkZero 15 | |
486 | ||
487 | ||
488 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
489 | /* | |
490 | * int | |
491 | * copyoutstr(src, dst, maxcount, count) | |
492 | * vm_offset_t src; | |
493 | * vm_offset_t dst; | |
494 | * vm_size_t maxcount; | |
495 | * vm_size_t* count; | |
496 | * | |
497 | * Set *count to the number of bytes copied. | |
498 | */ | |
499 | ||
500 | ENTRY(copyoutstr, TAG_NO_FRAME_USED) | |
501 | mfcr r2 // we use nonvolatile cr3 | |
502 | li r0,0 | |
503 | crset kkString // flag as a string op | |
504 | mr r10,r4 // for copyout, dest ptr (r4) is in user space | |
505 | stw r0,0(r6) // initialize #bytes moved | |
506 | crclr kkIn // flag as copyout | |
507 | b copyJoin | |
508 | ||
509 | ||
510 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
511 | /* | |
512 | * int | |
513 | * copyinstr(src, dst, maxcount, count) | |
514 | * vm_offset_t src; | |
515 | * vm_offset_t dst; | |
516 | * vm_size_t maxcount; | |
517 | * vm_size_t* count; | |
518 | * | |
519 | * Set *count to the number of bytes copied | |
520 | * If dst == NULL, don't copy, just count bytes. | |
521 | * Only currently called from klcopyinstr. | |
522 | */ | |
523 | ||
524 | ENTRY(copyinstr, TAG_NO_FRAME_USED) | |
525 | mfcr r2 // we use nonvolatile cr3 | |
526 | cmplwi r4,0 // dst==NULL? | |
527 | li r0,0 | |
528 | crset kkString // flag as a string op | |
529 | mr r10,r3 // for copyin, source ptr (r3) is in user space | |
530 | crmove kkNull,cr0_eq // remember if (dst==NULL) | |
531 | stw r0,0(r6) // initialize #bytes moved | |
532 | crset kkIn // flag as copyin (rather than copyout) | |
533 | b copyJoin1 // skip over the "crclr kkNull" | |
534 | ||
535 | ||
536 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
537 | /* | |
538 | * int | |
539 | * copyout(src, dst, count) | |
540 | * vm_offset_t src; | |
541 | * vm_offset_t dst; | |
542 | * size_t count; | |
543 | */ | |
544 | ||
545 | .align 5 | |
546 | .globl EXT(copyout) | |
547 | .globl EXT(copyoutmsg) | |
548 | ||
549 | LEXT(copyout) | |
550 | LEXT(copyoutmsg) | |
551 | ||
552 | #if INSTRUMENT | |
553 | mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout | |
554 | stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it | |
555 | mfspr r12,pmc2 ; INSTRUMENT - Get stamp | |
556 | stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it | |
557 | mfspr r12,pmc3 ; INSTRUMENT - Get stamp | |
558 | stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it | |
559 | mfspr r12,pmc4 ; INSTRUMENT - Get stamp | |
560 | stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it | |
561 | #endif | |
562 | mfcr r2 // save caller's CR | |
563 | crclr kkString // not a string version | |
564 | mr r10,r4 // dest (r4) is user-space ptr | |
565 | crclr kkIn // flag as copyout | |
566 | b copyJoin | |
567 | ||
568 | ||
569 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
570 | /* | |
571 | * int | |
572 | * copyin(src, dst, count) | |
573 | * vm_offset_t src; | |
574 | * vm_offset_t dst; | |
575 | * size_t count; | |
576 | */ | |
577 | ||
578 | ||
579 | .align 5 | |
580 | .globl EXT(copyin) | |
581 | .globl EXT(copyinmsg) | |
582 | ||
583 | LEXT(copyin) | |
584 | LEXT(copyinmsg) | |
585 | ||
586 | mfcr r2 // save caller's CR | |
587 | crclr kkString // not a string version | |
588 | mr r10,r3 // source (r3) is user-space ptr in copyin | |
589 | crset kkIn // flag as copyin | |
590 | ||
591 | ||
592 | // Common code to handle setup for all the copy variants: | |
593 | // r2 = caller's CR, since we use cr3 | |
594 | // r3-r6 = parameters | |
595 | // r10 = user-space ptr (r3 if copyin, r4 if copyout) | |
596 | // cr3 = kkIn, kkString, kkNull flags | |
597 | ||
598 | copyJoin: | |
599 | crclr kkNull // (dst==NULL) convention not used with this call | |
600 | copyJoin1: // enter from copyinstr with kkNull set | |
601 | mflr r0 // get return address | |
602 | cmplwi r5,0 // buffer length 0? | |
603 | lis r9,0x1000 // r9 <- 0x10000000 (256MB) | |
604 | stw r0,FM_LR_SAVE(r1) // save return | |
605 | cmplw cr1,r5,r9 // buffer length > 256MB ? | |
606 | mfsprg r8,2 // get the features | |
607 | beq-- copyinout_0 // 0 length is degenerate case | |
608 | stwu r1,-kkFrameSize(r1) // set up stack frame | |
609 | stw r2,kkCR(r1) // save caller's CR since we use cr3 | |
610 | mtcrf 0x02,r8 // move pf64Bit to cr6 | |
611 | stw r3,kkSource(r1) // save args across MapUserAddressSpace | |
612 | stw r4,kkDest(r1) | |
613 | stw r5,kkBufSize(r1) | |
614 | crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor | |
615 | stw r6,kkCountPtr(r1) | |
616 | stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr | |
617 | li r31,0 // no mapped ptr yet | |
618 | ||
619 | ||
620 | // Handle buffer length > 256MB. This is an error (ENAMETOOLONG) on copyin and copyout. | |
621 | // The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp | |
622 | // the buffer length to 256MB. This isn't an issue if the string is less than 256MB | |
623 | // (as most are!), but if they are >256MB we eventually return ENAMETOOLONG. This restriction | |
624 | // is due to MapUserAddressSpace; we don't want to consume more than two segments for | |
625 | // the mapping. | |
626 | ||
627 | ble++ cr1,copyin0 // skip if buffer length <= 256MB | |
628 | bf kkString,copyinout_too_big // error if not string op | |
629 | mr r5,r9 // silently clamp buffer length to 256MB | |
630 | stw r9,kkBufSize(r1) // update saved copy too | |
631 | ||
632 | ||
633 | // Set up thread_recover in case we hit an illegal address. | |
634 | ||
635 | copyin0: | |
636 | mfsprg r8,1 /* Get the current act */ | |
637 | lis r2,hi16(copyinout_error) | |
638 | lwz r7,ACT_THREAD(r8) | |
639 | ori r2,r2,lo16(copyinout_error) | |
640 | lwz r3,ACT_VMMAP(r8) // r3 <- vm_map virtual address | |
641 | stw r2,THREAD_RECOVER(r7) | |
642 | ||
643 | ||
644 | // Map user segment into kernel map, turn on 64-bit mode. | |
645 | // r3 = vm map | |
646 | // r5 = buffer length | |
647 | // r10 = user space ptr (r3 if copyin, r4 if copyout) | |
648 | ||
649 | mr r6,r5 // Set length to map | |
650 | li r4,0 // Note: we only do this 32-bit for now | |
651 | mr r5,r10 // arg2 <- user space ptr | |
652 | #if INSTRUMENT | |
653 | mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace | |
654 | stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it | |
655 | mfspr r12,pmc2 ; INSTRUMENT - Get stamp | |
656 | stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it | |
657 | mfspr r12,pmc3 ; INSTRUMENT - Get stamp | |
658 | stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it | |
659 | mfspr r12,pmc4 ; INSTRUMENT - Get stamp | |
660 | stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it | |
661 | #endif | |
662 | bl EXT(MapUserAddressSpace) // set r3 <- address in kernel map of user operand | |
663 | #if INSTRUMENT | |
664 | mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace | |
665 | stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it | |
666 | mfspr r12,pmc2 ; INSTRUMENT - Get stamp | |
667 | stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it | |
668 | mfspr r12,pmc3 ; INSTRUMENT - Get stamp | |
669 | stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it | |
670 | mfspr r12,pmc4 ; INSTRUMENT - Get stamp | |
671 | stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it | |
672 | #endif | |
673 | or. r0,r3,r4 // Did we fail the mapping? | |
674 | mr r31,r4 // r31 <- mapped ptr into user space (may be 64-bit) | |
675 | beq-- copyinout_error // was 0, so there was an error making the mapping | |
676 | bf-- kk64bit,copyin1 // skip if a 32-bit processor | |
677 | ||
678 | rldimi r31,r3,32,0 // slam high-order bits into mapped ptr | |
679 | mfmsr r4 // if 64-bit, turn on SF so we can use returned ptr | |
680 | li r0,1 | |
681 | rldimi r4,r0,63,MSR_SF_BIT // light bit 0 | |
682 | mtmsrd r4 // turn on 64-bit mode | |
683 | isync // wait for mode to change | |
684 | ||
685 | ||
686 | // Load r3-r5, substituting mapped ptr as appropriate. | |
687 | ||
688 | copyin1: | |
689 | lwz r5,kkBufSize(r1) // restore length to copy | |
690 | bf kkIn,copyin2 // skip if copyout | |
691 | lwz r4,kkDest(r1) // copyin: source is mapped, dest is r4 at entry | |
692 | mr r3,r31 // source is mapped ptr | |
693 | b copyin3 | |
694 | copyin2: // handle copyout | |
695 | lwz r3,kkSource(r1) // source is kernel buffer (r3 at entry) | |
696 | mr r4,r31 // dest is mapped ptr into user space | |
697 | ||
698 | ||
699 | // Finally, all set up to copy: | |
700 | // r3 = source ptr (mapped if copyin) | |
701 | // r4 = dest ptr (mapped if copyout) | |
702 | // r5 = length | |
703 | // r31 = mapped ptr returned by MapUserAddressSpace | |
704 | // cr3 = kkIn, kkString, kk64bit, and kkNull flags | |
705 | ||
706 | copyin3: | |
707 | bt kkString,copyString // handle copyinstr and copyoutstr | |
708 | bl EXT(bcopy) // copyin and copyout: let bcopy do the work | |
709 | li r3,0 // return success | |
710 | ||
711 | ||
712 | // Main exit point for copyin, copyout, copyinstr, and copyoutstr. Also reached | |
713 | // from error recovery if we get a DSI accessing user space. Clear recovery ptr, | |
714 | // and pop off frame. Note that we have kept | |
715 | // the mapped ptr into user space in r31, as a reg64_t type (ie, a 64-bit ptr on | |
716 | // 64-bit machines.) We must unpack r31 into an addr64_t in (r3,r4) before passing | |
717 | // it to ReleaseUserAddressSpace. | |
718 | // r3 = 0, EFAULT, or ENAMETOOLONG | |
719 | ||
720 | copyinx: | |
721 | lwz r2,kkCR(r1) // get callers cr3 | |
722 | mfsprg r6,1 // Get the current act | |
723 | lwz r10,ACT_THREAD(r6) | |
724 | ||
725 | bf-- kk64bit,copyinx1 // skip if 32-bit processor | |
726 | mfmsr r12 | |
727 | rldicl r12,r12,0,MSR_SF_BIT+1 // if 64-bit processor, turn 64-bit mode off | |
728 | mtmsrd r12 // turn SF off and EE back on | |
729 | isync // wait for the mode to change | |
730 | copyinx1: | |
731 | lwz r31,kkR31Save(r1) // restore callers r31 | |
732 | addi r1,r1,kkFrameSize // pop off our stack frame | |
733 | lwz r0,FM_LR_SAVE(r1) | |
734 | li r4,0 | |
735 | stw r4,THREAD_RECOVER(r10) // Clear recovery | |
736 | mtlr r0 | |
737 | mtcrf 0x10,r2 // restore cr3 | |
738 | blr | |
739 | ||
740 | ||
741 | /* We get here via the exception handler if an illegal | |
742 | * user memory reference was made. This error handler is used by | |
743 | * copyin, copyout, copyinstr, and copyoutstr. Registers are as | |
744 | * they were at point of fault, so for example cr3 flags are valid. | |
745 | */ | |
746 | ||
747 | copyinout_error: | |
748 | li r3,EFAULT // return error | |
749 | b copyinx | |
750 | ||
751 | copyinout_0: // degenerate case: 0-length copy | |
752 | mtcrf 0x10,r2 // restore cr3 | |
753 | li r3,0 // return success | |
754 | blr | |
755 | ||
756 | copyinout_too_big: // degenerate case | |
757 | mtcrf 0x10,r2 // restore cr3 | |
758 | lwz r1,0(r1) // pop off stack frame | |
759 | li r3,ENAMETOOLONG | |
760 | blr | |
761 | ||
762 | ||
763 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
764 | // Handle copyinstr and copyoutstr. At this point the stack frame is set up, | |
765 | // the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode | |
766 | // if necessary, and: | |
767 | // r3 = source ptr, mapped if copyinstr | |
768 | // r4 = dest ptr, mapped if copyoutstr | |
769 | // r5 = buffer length | |
770 | // r31 = mapped ptr returned by MapUserAddressSpace | |
771 | // cr3 = kkIn, kkString, kkNull, and kk64bit flags | |
772 | // We do word copies unless the buffer is very short, then use a byte copy loop | |
773 | // for the leftovers if necessary. | |
774 | ||
775 | copyString: | |
776 | li r12,0 // Set header bytes count to zero | |
777 | cmplwi cr1,r5,20 // is buffer very short? | |
778 | mtctr r5 // assuming short, set up loop count for bytes | |
779 | blt cr1,copyinstr8 // too short for word loop | |
780 | andi. r12,r3,0x3 // is source ptr word aligned? | |
781 | bne copyinstr11 // bytes loop | |
782 | copyinstr1: | |
783 | srwi r6,r5,2 // get #words in buffer | |
784 | mtctr r6 // set up word loop count | |
785 | lis r10,hi16(0xFEFEFEFF) // load magic constants into r10 and r11 | |
786 | lis r11,hi16(0x80808080) | |
787 | ori r10,r10,lo16(0xFEFEFEFF) | |
788 | ori r11,r11,lo16(0x80808080) | |
789 | bf kkNull,copyinstr6 // enter loop that copies | |
790 | b copyinstr5 // use loop that just counts | |
791 | ||
792 | ||
793 | // Word loop(s). They do a word-parallel search for 0s, using the following | |
794 | // inobvious but very efficient test: | |
795 | // y = data + 0xFEFEFEFF | |
796 | // z = ~data & 0x80808080 | |
797 | // If (y & z)==0, then all bytes in dataword are nonzero. We need two copies of | |
798 | // this loop, since if we test kkNull in the loop then it becomes 9 words long. | |
799 | ||
800 | .align 5 // align inner loops for speed | |
801 | copyinstr5: // version that counts but does not copy | |
802 | lwz r8,0(r3) // get next word of source | |
803 | addi r3,r3,4 // increment source ptr | |
804 | add r9,r10,r8 // r9 = data + 0xFEFEFEFF | |
805 | andc r7,r11,r8 // r7 = ~data & 0x80808080 | |
806 | and. r7,r9,r7 // r7 = r9 & r7 | |
807 | bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes are nonzero | |
808 | ||
809 | b copyinstr7 | |
810 | ||
811 | .align 5 // align inner loops for speed | |
812 | copyinstr6: // version that counts and copies | |
813 | lwz r8,0(r3) // get next word of source | |
814 | addi r3,r3,4 // increment source ptr | |
815 | addi r4,r4,4 // increment dest ptr while we wait for data | |
816 | add r9,r10,r8 // r9 = data + 0xFEFEFEFF | |
817 | andc r7,r11,r8 // r7 = ~data & 0x80808080 | |
818 | and. r7,r9,r7 // r7 = r9 & r7 | |
819 | stw r8,-4(r4) // pack all 4 bytes into buffer | |
820 | bdnzt cr0_eq,copyinstr6 // if r7==0, then all bytes are nonzero | |
821 | ||
822 | ||
823 | // Either 0 found or buffer filled. The above algorithm has mapped nonzero bytes to 0 | |
824 | // and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also | |
825 | // mapped to 0x80. We must mask out these false hits before searching for an 0x80 byte. | |
826 | ||
827 | copyinstr7: | |
828 | crnot kkZero,cr0_eq // 0 found iff cr0_eq is off | |
829 | mfctr r6 // get #words remaining in buffer | |
830 | rlwinm r2,r8,7,0,31 // move 0x01 bits to 0x80 position | |
831 | slwi r6,r6,2 // convert to #bytes remaining | |
832 | andc r7,r7,r2 // turn off false hits from 0x0100 worst case | |
833 | rlwimi r6,r5,0,30,31 // add in odd bytes leftover in buffer | |
834 | srwi r7,r7,8 // we want to count the 0 as a byte xferred | |
835 | addi r6,r6,4 // don't count last word xferred (yet) | |
836 | cntlzw r7,r7 // now we can find the 0 byte (ie, the 0x80) | |
837 | srwi r7,r7,3 // convert 8,16,24,32 to 1,2,3,4 | |
838 | sub. r6,r6,r7 // account for nonzero bytes in last word | |
839 | bt++ kkZero,copyinstr10 // 0 found, so done | |
840 | ||
841 | beq copyinstr10 // r6==0, so buffer truly full | |
842 | mtctr r6 // 0 not found, loop over r6 bytes | |
843 | b copyinstr8 // enter byte loop for last 1-3 leftover bytes | |
844 | ||
845 | ||
846 | // Byte loop. This is used for very small buffers and for the odd bytes left over | |
847 | // after searching and copying words at a time. | |
848 | ||
849 | .align 5 // align inner loops for speed | |
850 | copyinstr8: // loop over bytes of source | |
851 | lbz r0,0(r3) // get next byte of source | |
852 | addi r3,r3,1 | |
853 | addi r4,r4,1 // increment dest addr whether we store or not | |
854 | cmpwi r0,0 // the 0? | |
855 | bt-- kkNull,copyinstr9 // don't store (was copyinstr with NULL ptr) | |
856 | stb r0,-1(r4) | |
857 | copyinstr9: | |
858 | bdnzf cr0_eq,copyinstr8 // loop if byte not 0 and more room in buffer | |
859 | ||
860 | mfctr r6 // get #bytes left in buffer | |
861 | crmove kkZero,cr0_eq // remember if 0 found or buffer filled | |
862 | ||
863 | ||
864 | // Buffer filled or 0 found. Unwind and return. | |
865 | // r5 = kkBufSize, ie buffer length | |
866 | // r6 = untransferred bytes remaining in buffer | |
867 | // r31 = mapped ptr returned by MapUserAddressSpace | |
868 | // cr3 = kkZero set iff 0 found | |
869 | ||
870 | copyinstr10: | |
871 | lwz r9,kkCountPtr(r1) // get ptr to place to store count of bytes moved | |
872 | sub r2,r5,r6 // get #bytes we moved, counting the 0 iff any | |
873 | add r2,r2,r12 // add the header bytes count | |
874 | li r3,0 // assume 0 return status | |
875 | stw r2,0(r9) // store #bytes moved | |
876 | bt++ kkZero,copyinx // we did find the 0 so return 0 | |
877 | li r3,ENAMETOOLONG // buffer filled | |
878 | b copyinx // join main exit routine | |
879 | ||
880 | // Byte loop. This is used on the header bytes for unaligned source | |
881 | ||
882 | .align 5 // align inner loops for speed | |
883 | copyinstr11: | |
884 | li r10,4 // load word size | |
885 | sub r12,r10,r12 // set the header bytes count | |
886 | mtctr r12 // set up bytes loop count | |
887 | copyinstr12: // loop over bytes of source | |
888 | lbz r0,0(r3) // get next byte of source | |
889 | addi r3,r3,1 | |
890 | addi r4,r4,1 // increment dest addr whether we store or not | |
891 | cmpwi r0,0 // the 0? | |
892 | bt-- kkNull,copyinstr13 // don't store (was copyinstr with NULL ptr) | |
893 | stb r0,-1(r4) | |
894 | copyinstr13: | |
895 | bdnzf cr0_eq,copyinstr12 // loop if byte not 0 and more room in buffer | |
896 | sub r5,r5,r12 // substract the bytes copied | |
897 | bne cr0_eq,copyinstr1 // branch to word loop | |
898 | ||
899 | mr r5,r12 // Get the header bytes count | |
900 | li r12,0 // Clear the header bytes count | |
901 | mfctr r6 // get #bytes left in buffer | |
902 | crmove kkZero,cr0_eq // remember if 0 found or buffer filled | |
903 | b copyinstr10 | |
904 |