]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /* | |
29 | * @OSF_COPYRIGHT@ | |
30 | */ | |
31 | #include <debug.h> | |
32 | #include <ppc/asm.h> | |
33 | #include <ppc/proc_reg.h> | |
34 | #include <mach/ppc/vm_param.h> | |
35 | #include <assym.s> | |
36 | #include <sys/errno.h> | |
37 | ||
38 | #define INSTRUMENT 0 | |
39 | ||
40 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
41 | /* | |
42 | * void pmap_zero_page(vm_offset_t pa) | |
43 | * | |
44 | * Zero a page of physical memory. This routine runs in 32 or 64-bit mode, | |
45 | * and handles 32 and 128-byte cache lines. | |
46 | */ | |
47 | ||
48 | ||
49 | .align 5 | |
50 | .globl EXT(pmap_zero_page) | |
51 | ||
52 | LEXT(pmap_zero_page) | |
53 | ||
54 | mflr r12 // save return address | |
55 | bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10 | |
56 | mtlr r12 // restore return address | |
57 | andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size | |
58 | ||
59 | subfic r4,r9,PPC_PGBYTES // r4 <- starting offset in page | |
60 | ||
61 | bt++ pf64Bitb,page0S4 // Go do the big guys... | |
62 | ||
63 | slwi r3,r3,12 // get page address from page num | |
64 | b page_zero_1 // Jump to line aligned loop... | |
65 | ||
66 | .align 5 | |
67 | ||
68 | nop | |
69 | nop | |
70 | nop | |
71 | nop | |
72 | nop | |
73 | nop | |
74 | nop | |
75 | ||
76 | page0S4: | |
77 | sldi r3,r3,12 // get page address from page num | |
78 | ||
79 | page_zero_1: // loop zeroing cache lines | |
80 | sub. r5,r4,r9 // more to go? | |
81 | dcbz128 r3,r4 // zero either 32 or 128 bytes | |
82 | sub r4,r5,r9 // generate next offset | |
83 | dcbz128 r3,r5 | |
84 | bne-- page_zero_1 | |
85 | ||
86 | b EXT(ml_restore) // restore MSR and do the isync | |
87 | ||
88 | ||
89 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
90 | /* void | |
91 | * phys_copy(src, dst, bytecount) | |
92 | * addr64_t src; | |
93 | * addr64_t dst; | |
94 | * int bytecount | |
95 | * | |
96 | * This routine will copy bytecount bytes from physical address src to physical | |
97 | * address dst. It runs in 64-bit mode if necessary, but does not handle | |
98 | * overlap or make any attempt to be optimal. Length must be a signed word. | |
99 | * Not performance critical. | |
100 | */ | |
101 | ||
102 | ||
103 | .align 5 | |
104 | .globl EXT(phys_copy) | |
105 | ||
106 | LEXT(phys_copy) | |
107 | ||
108 | rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg | |
109 | mflr r12 // get return address | |
110 | rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits | |
111 | rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg | |
112 | bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10 | |
113 | rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits | |
114 | mtlr r12 // restore return address | |
115 | subic. r5,r7,4 // a word to copy? | |
116 | b phys_copy_2 | |
117 | ||
118 | .align 5 | |
119 | ||
120 | phys_copy_1: // loop copying words | |
121 | subic. r5,r5,4 // more to go? | |
122 | lwz r0,0(r3) | |
123 | addi r3,r3,4 | |
124 | stw r0,0(r4) | |
125 | addi r4,r4,4 | |
126 | phys_copy_2: | |
127 | bge phys_copy_1 | |
128 | addic. r5,r5,4 // restore count | |
129 | ble phys_copy_4 // no more | |
130 | ||
131 | // Loop is aligned here | |
132 | ||
133 | phys_copy_3: // loop copying bytes | |
134 | subic. r5,r5,1 // more to go? | |
135 | lbz r0,0(r3) | |
136 | addi r3,r3,1 | |
137 | stb r0,0(r4) | |
138 | addi r4,r4,1 | |
139 | bgt phys_copy_3 | |
140 | phys_copy_4: | |
141 | b EXT(ml_restore) // restore MSR and do the isync | |
142 | ||
143 | ||
144 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
145 | /* void | |
146 | * pmap_copy_page(src, dst) | |
147 | * ppnum_t src; | |
148 | * ppnum_t dst; | |
149 | * | |
150 | * This routine will copy the physical page src to physical page dst | |
151 | * | |
152 | * This routine assumes that the src and dst are page numbers and that the | |
153 | * destination is cached. It runs on 32 and 64 bit processors, with and | |
154 | * without altivec, and with 32 and 128 byte cache lines. | |
155 | * We also must assume that no-one will be executing within the destination | |
156 | * page, and that this will be used for paging. Because this | |
157 | * is a common routine, we have tuned loops for each processor class. | |
158 | * | |
159 | */ | |
160 | #define kSFSize (FM_SIZE+160) | |
161 | ||
162 | ENTRY(pmap_copy_page, TAG_NO_FRAME_USED) | |
163 | ||
164 | lis r2,hi16(MASK(MSR_VEC)) ; Get the vector flag | |
165 | mflr r0 // get return | |
166 | ori r2,r2,lo16(MASK(MSR_FP)) ; Add the FP flag | |
167 | stw r0,8(r1) // save | |
168 | stwu r1,-kSFSize(r1) // set up a stack frame for VRs or FPRs | |
169 | mfmsr r11 // save MSR at entry | |
170 | mfsprg r10,2 // get feature flags | |
171 | andc r11,r11,r2 // Clear out vec and fp | |
172 | ori r2,r2,lo16(MASK(MSR_EE)) // Get EE on also | |
173 | andc r2,r11,r2 // Clear out EE as well | |
174 | mtcrf 0x02,r10 // we need to test pf64Bit | |
175 | ori r2,r2,MASK(MSR_FP) // must enable FP for G3... | |
176 | mtcrf 0x80,r10 // we need to test pfAltivec too | |
177 | oris r2,r2,hi16(MASK(MSR_VEC)) // enable altivec for G4 (ignored if G3) | |
178 | mtmsr r2 // turn EE off, FP and VEC on | |
179 | isync | |
180 | bt++ pf64Bitb,pmap_copy_64 // skip if 64-bit processor (only they take hint) | |
181 | slwi r3,r3,12 // get page address from page num | |
182 | slwi r4,r4,12 // get page address from page num | |
183 | rlwinm r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1 // get ready to turn off DR | |
184 | bt pfAltivecb,pmap_copy_g4 // altivec but not 64-bit means G4 | |
185 | ||
186 | ||
187 | // G3 -- copy using FPRs | |
188 | ||
189 | stfd f0,FM_SIZE+0(r1) // save the 4 FPRs we use to copy | |
190 | stfd f1,FM_SIZE+8(r1) | |
191 | li r5,PPC_PGBYTES/32 // count of cache lines in a page | |
192 | stfd f2,FM_SIZE+16(r1) | |
193 | mtctr r5 | |
194 | stfd f3,FM_SIZE+24(r1) | |
195 | mtmsr r12 // turn off DR after saving FPRs on stack | |
196 | isync | |
197 | ||
198 | pmap_g3_copy_loop: // loop over 32-byte cache lines | |
199 | dcbz 0,r4 // avoid read of dest line | |
200 | lfd f0,0(r3) | |
201 | lfd f1,8(r3) | |
202 | lfd f2,16(r3) | |
203 | lfd f3,24(r3) | |
204 | addi r3,r3,32 | |
205 | stfd f0,0(r4) | |
206 | stfd f1,8(r4) | |
207 | stfd f2,16(r4) | |
208 | stfd f3,24(r4) | |
209 | dcbst 0,r4 // flush dest line to RAM | |
210 | addi r4,r4,32 | |
211 | bdnz pmap_g3_copy_loop | |
212 | ||
213 | sync // wait for stores to take | |
214 | subi r4,r4,PPC_PGBYTES // restore ptr to destintation page | |
215 | li r6,PPC_PGBYTES-32 // point to last line in page | |
216 | pmap_g3_icache_flush: | |
217 | subic. r5,r6,32 // more to go? | |
218 | icbi r4,r6 // flush another line in icache | |
219 | subi r6,r5,32 // get offset to next line | |
220 | icbi r4,r5 | |
221 | bne pmap_g3_icache_flush | |
222 | ||
223 | sync | |
224 | mtmsr r2 // turn DR back on | |
225 | isync | |
226 | lfd f0,FM_SIZE+0(r1) // restore the FPRs | |
227 | lfd f1,FM_SIZE+8(r1) | |
228 | lfd f2,FM_SIZE+16(r1) | |
229 | lfd f3,FM_SIZE+24(r1) | |
230 | ||
231 | b pmap_g4_restore // restore MSR and done | |
232 | ||
233 | ||
234 | // G4 -- copy using VRs | |
235 | ||
236 | pmap_copy_g4: // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR | |
237 | la r9,FM_SIZE+16(r1) // place where we save VRs to r9 | |
238 | li r5,16 // load x-form offsets into r5-r9 | |
239 | li r6,32 // another offset | |
240 | stvx v0,0,r9 // save some VRs so we can use to copy | |
241 | li r7,48 // another offset | |
242 | stvx v1,r5,r9 | |
243 | li r0,PPC_PGBYTES/64 // we loop over 64-byte chunks | |
244 | stvx v2,r6,r9 | |
245 | mtctr r0 | |
246 | li r8,96 // get look-ahead for touch | |
247 | stvx v3,r7,r9 | |
248 | li r9,128 | |
249 | mtmsr r12 // now we've saved VRs on stack, turn off DR | |
250 | isync // wait for it to happen | |
251 | b pmap_g4_copy_loop | |
252 | ||
253 | .align 5 // align inner loops | |
254 | pmap_g4_copy_loop: // loop over 64-byte chunks | |
255 | dcbt r3,r8 // touch 3 lines ahead | |
256 | nop // avoid a 17-word loop... | |
257 | dcbt r3,r9 // touch 4 lines ahead | |
258 | nop // more padding | |
259 | dcba 0,r4 // avoid pre-fetch of 1st dest line | |
260 | lvx v0,0,r3 // offset 0 | |
261 | lvx v1,r5,r3 // offset 16 | |
262 | lvx v2,r6,r3 // offset 32 | |
263 | lvx v3,r7,r3 // offset 48 | |
264 | addi r3,r3,64 | |
265 | dcba r6,r4 // avoid pre-fetch of 2nd line | |
266 | stvx v0,0,r4 // offset 0 | |
267 | stvx v1,r5,r4 // offset 16 | |
268 | stvx v2,r6,r4 // offset 32 | |
269 | stvx v3,r7,r4 // offset 48 | |
270 | dcbf 0,r4 // push line 1 | |
271 | dcbf r6,r4 // and line 2 | |
272 | addi r4,r4,64 | |
273 | bdnz pmap_g4_copy_loop | |
274 | ||
275 | sync // wait for stores to take | |
276 | subi r4,r4,PPC_PGBYTES // restore ptr to destintation page | |
277 | li r8,PPC_PGBYTES-32 // point to last line in page | |
278 | pmap_g4_icache_flush: | |
279 | subic. r9,r8,32 // more to go? | |
280 | icbi r4,r8 // flush from icache | |
281 | subi r8,r9,32 // get offset to next line | |
282 | icbi r4,r9 | |
283 | bne pmap_g4_icache_flush | |
284 | ||
285 | sync | |
286 | mtmsr r2 // turn DR back on | |
287 | isync | |
288 | la r9,FM_SIZE+16(r1) // get base of VR save area | |
289 | lvx v0,0,r9 // restore the VRs | |
290 | lvx v1,r5,r9 | |
291 | lvx v2,r6,r9 | |
292 | lvx v3,r7,r9 | |
293 | ||
294 | pmap_g4_restore: // r11=MSR | |
295 | mtmsr r11 // turn EE on, VEC and FR off | |
296 | isync // wait for it to happen | |
297 | addi r1,r1,kSFSize // pop off our stack frame | |
298 | lwz r0,8(r1) // restore return address | |
299 | mtlr r0 | |
300 | blr | |
301 | ||
302 | ||
303 | // 64-bit/128-byte processor: copy using VRs | |
304 | ||
305 | pmap_copy_64: // r10=features, r11=old MSR | |
306 | sldi r3,r3,12 // get page address from page num | |
307 | sldi r4,r4,12 // get page address from page num | |
308 | la r9,FM_SIZE+16(r1) // get base of VR save area | |
309 | li r5,16 // load x-form offsets into r5-r9 | |
310 | li r6,32 // another offset | |
311 | bf pfAltivecb,pmap_novmx_copy // altivec suppressed... | |
312 | stvx v0,0,r9 // save 8 VRs so we can copy wo bubbles | |
313 | stvx v1,r5,r9 | |
314 | li r7,48 // another offset | |
315 | li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks | |
316 | stvx v2,r6,r9 | |
317 | stvx v3,r7,r9 | |
318 | addi r9,r9,64 // advance base ptr so we can store another 4 | |
319 | mtctr r0 | |
320 | li r0,MASK(MSR_DR) // get DR bit | |
321 | stvx v4,0,r9 | |
322 | stvx v5,r5,r9 | |
323 | andc r12,r2,r0 // turn off DR bit | |
324 | li r0,1 // get a 1 to slam into SF | |
325 | stvx v6,r6,r9 | |
326 | stvx v7,r7,r9 | |
327 | rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0) | |
328 | li r8,-128 // offset so we can reach back one line | |
329 | mtmsrd r12 // now we've saved VRs, turn DR off and SF on | |
330 | isync // wait for it to happen | |
331 | dcbt128 0,r3,1 // start a forward stream | |
332 | b pmap_64_copy_loop | |
333 | ||
334 | .align 5 // align inner loops | |
335 | pmap_64_copy_loop: // loop over 128-byte chunks | |
336 | dcbz128 0,r4 // avoid read of destination line | |
337 | lvx v0,0,r3 // offset 0 | |
338 | lvx v1,r5,r3 // offset 16 | |
339 | lvx v2,r6,r3 // offset 32 | |
340 | lvx v3,r7,r3 // offset 48 | |
341 | addi r3,r3,64 // don't have enough GPRs so add 64 2x | |
342 | lvx v4,0,r3 // offset 64 | |
343 | lvx v5,r5,r3 // offset 80 | |
344 | lvx v6,r6,r3 // offset 96 | |
345 | lvx v7,r7,r3 // offset 112 | |
346 | addi r3,r3,64 | |
347 | stvx v0,0,r4 // offset 0 | |
348 | stvx v1,r5,r4 // offset 16 | |
349 | stvx v2,r6,r4 // offset 32 | |
350 | stvx v3,r7,r4 // offset 48 | |
351 | addi r4,r4,64 | |
352 | stvx v4,0,r4 // offset 64 | |
353 | stvx v5,r5,r4 // offset 80 | |
354 | stvx v6,r6,r4 // offset 96 | |
355 | stvx v7,r7,r4 // offset 112 | |
356 | addi r4,r4,64 | |
357 | dcbf r8,r4 // flush the line we just wrote | |
358 | bdnz pmap_64_copy_loop | |
359 | ||
360 | sync // wait for stores to take | |
361 | subi r4,r4,PPC_PGBYTES // restore ptr to destintation page | |
362 | li r8,PPC_PGBYTES-128 // point to last line in page | |
363 | pmap_64_icache_flush: | |
364 | subic. r9,r8,128 // more to go? | |
365 | icbi r4,r8 // flush from icache | |
366 | subi r8,r9,128 // get offset to next line | |
367 | icbi r4,r9 | |
368 | bne pmap_64_icache_flush | |
369 | ||
370 | sync | |
371 | mtmsrd r2 // turn DR back on, SF off | |
372 | isync | |
373 | la r9,FM_SIZE+16(r1) // get base address of VR save area on stack | |
374 | lvx v0,0,r9 // restore the VRs | |
375 | lvx v1,r5,r9 | |
376 | lvx v2,r6,r9 | |
377 | lvx v3,r7,r9 | |
378 | addi r9,r9,64 | |
379 | lvx v4,0,r9 | |
380 | lvx v5,r5,r9 | |
381 | lvx v6,r6,r9 | |
382 | lvx v7,r7,r9 | |
383 | ||
384 | b pmap_g4_restore // restore lower half of MSR and return | |
385 | ||
386 | // | |
387 | // Copy on 64-bit without VMX | |
388 | // | |
389 | ||
390 | pmap_novmx_copy: | |
391 | li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks | |
392 | mtctr r0 | |
393 | li r0,MASK(MSR_DR) // get DR bit | |
394 | andc r12,r2,r0 // turn off DR bit | |
395 | li r0,1 // get a 1 to slam into SF | |
396 | rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0) | |
397 | mtmsrd r12 // now we've saved VRs, turn DR off and SF on | |
398 | isync // wait for it to happen | |
399 | dcbt128 0,r3,1 // start a forward stream | |
400 | ||
401 | pmap_novmx_copy_loop: // loop over 128-byte cache lines | |
402 | dcbz128 0,r4 // avoid read of dest line | |
403 | ||
404 | ld r0,0(r3) // Load half a line | |
405 | ld r12,8(r3) | |
406 | ld r5,16(r3) | |
407 | ld r6,24(r3) | |
408 | ld r7,32(r3) | |
409 | ld r8,40(r3) | |
410 | ld r9,48(r3) | |
411 | ld r10,56(r3) | |
412 | ||
413 | std r0,0(r4) // Store half a line | |
414 | std r12,8(r4) | |
415 | std r5,16(r4) | |
416 | std r6,24(r4) | |
417 | std r7,32(r4) | |
418 | std r8,40(r4) | |
419 | std r9,48(r4) | |
420 | std r10,56(r4) | |
421 | ||
422 | ld r0,64(r3) // Load half a line | |
423 | ld r12,72(r3) | |
424 | ld r5,80(r3) | |
425 | ld r6,88(r3) | |
426 | ld r7,96(r3) | |
427 | ld r8,104(r3) | |
428 | ld r9,112(r3) | |
429 | ld r10,120(r3) | |
430 | ||
431 | addi r3,r3,128 | |
432 | ||
433 | std r0,64(r4) // Store half a line | |
434 | std r12,72(r4) | |
435 | std r5,80(r4) | |
436 | std r6,88(r4) | |
437 | std r7,96(r4) | |
438 | std r8,104(r4) | |
439 | std r9,112(r4) | |
440 | std r10,120(r4) | |
441 | ||
442 | dcbf 0,r4 // flush the line we just wrote | |
443 | addi r4,r4,128 | |
444 | bdnz pmap_novmx_copy_loop | |
445 | ||
446 | sync // wait for stores to take | |
447 | subi r4,r4,PPC_PGBYTES // restore ptr to destintation page | |
448 | li r8,PPC_PGBYTES-128 // point to last line in page | |
449 | ||
450 | pmap_novmx_icache_flush: | |
451 | subic. r9,r8,128 // more to go? | |
452 | icbi r4,r8 // flush from icache | |
453 | subi r8,r9,128 // get offset to next line | |
454 | icbi r4,r9 | |
455 | bne pmap_novmx_icache_flush | |
456 | ||
457 | sync | |
458 | mtmsrd r2 // turn DR back on, SF off | |
459 | isync | |
460 | ||
461 | b pmap_g4_restore // restore lower half of MSR and return | |
462 | ||
463 | ||
464 | ||
465 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
466 | ||
467 | // Stack frame format used by copyin, copyout, copyinstr and copyoutstr. | |
468 | // These routines all run both on 32 and 64-bit machines, though because they are called | |
469 | // by the BSD kernel they are always in 32-bit mode when entered. The mapped ptr returned | |
470 | // by MapUserMemoryWindow will be 64 bits however on 64-bit machines. Beware to avoid | |
471 | // using compare instructions on this ptr. This mapped ptr is kept globally in r31, so there | |
472 | // is no need to store or load it, which are mode-dependent operations since it could be | |
473 | // 32 or 64 bits. | |
474 | ||
475 | #define kkFrameSize (FM_SIZE+32) | |
476 | ||
477 | #define kkBufSize (FM_SIZE+0) | |
478 | #define kkCR3 (FM_SIZE+4) | |
479 | #define kkSource (FM_SIZE+8) | |
480 | #define kkDest (FM_SIZE+12) | |
481 | #define kkCountPtr (FM_SIZE+16) | |
482 | #define kkR31Save (FM_SIZE+20) | |
483 | #define kkThrErrJmp (FM_SIZE+24) | |
484 | ||
485 | ||
486 | // nonvolatile CR bits we use as flags in cr3 | |
487 | ||
488 | #define kk64bit 12 | |
489 | #define kkNull 13 | |
490 | #define kkIn 14 | |
491 | #define kkString 15 | |
492 | #define kkZero 15 | |
493 | ||
494 | ||
495 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
496 | /* | |
497 | * int | |
498 | * copyoutstr(src, dst, maxcount, count) | |
499 | * vm_offset_t src; // r3 | |
500 | * addr64_t dst; // r4 and r5 | |
501 | * vm_size_t maxcount; // r6 | |
502 | * vm_size_t* count; // r7 | |
503 | * | |
504 | * Set *count to the number of bytes copied. | |
505 | */ | |
506 | ||
507 | ENTRY(copyoutstr, TAG_NO_FRAME_USED) | |
508 | mfcr r2,0x10 // save caller's cr3, which we use for flags | |
509 | mr r10,r4 // move high word of 64-bit user address to r10 | |
510 | li r0,0 | |
511 | crset kkString // flag as a string op | |
512 | mr r11,r5 // move low word of 64-bit user address to r11 | |
513 | stw r0,0(r7) // initialize #bytes moved | |
514 | crclr kkIn // flag as copyout | |
515 | b copyJoin | |
516 | ||
517 | ||
518 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
519 | /* | |
520 | * int | |
521 | * copyinstr(src, dst, maxcount, count) | |
522 | * addr64_t src; // r3 and r4 | |
523 | * vm_offset_t dst; // r5 | |
524 | * vm_size_t maxcount; // r6 | |
525 | * vm_size_t* count; // r7 | |
526 | * | |
527 | * Set *count to the number of bytes copied | |
528 | * If dst == NULL, don't copy, just count bytes. | |
529 | * Only currently called from klcopyinstr. | |
530 | */ | |
531 | ||
532 | ENTRY(copyinstr, TAG_NO_FRAME_USED) | |
533 | mfcr r2,0x10 // save caller's cr3, which we use for flags | |
534 | cmplwi r5,0 // dst==NULL? | |
535 | mr r10,r3 // move high word of 64-bit user address to r10 | |
536 | li r0,0 | |
537 | crset kkString // flag as a string op | |
538 | mr r11,r4 // move low word of 64-bit user address to r11 | |
539 | crmove kkNull,cr0_eq // remember if (dst==NULL) | |
540 | stw r0,0(r7) // initialize #bytes moved | |
541 | crset kkIn // flag as copyin (rather than copyout) | |
542 | b copyJoin1 // skip over the "crclr kkNull" | |
543 | ||
544 | ||
545 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
546 | /* | |
547 | * int | |
548 | * copyout(src, dst, count) | |
549 | * vm_offset_t src; // r3 | |
550 | * addr64_t dst; // r4 and r5 | |
551 | * size_t count; // r6 | |
552 | */ | |
553 | ||
554 | .align 5 | |
555 | .globl EXT(copyout) | |
556 | .globl EXT(copyoutmsg) | |
557 | ||
558 | LEXT(copyout) | |
559 | LEXT(copyoutmsg) | |
560 | ||
561 | #if INSTRUMENT | |
562 | mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout | |
563 | stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it | |
564 | mfspr r12,pmc2 ; INSTRUMENT - Get stamp | |
565 | stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it | |
566 | mfspr r12,pmc3 ; INSTRUMENT - Get stamp | |
567 | stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it | |
568 | mfspr r12,pmc4 ; INSTRUMENT - Get stamp | |
569 | stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it | |
570 | #endif | |
571 | mfcr r2,0x10 // save caller's cr3, which we use for flags | |
572 | mr r10,r4 // move high word of 64-bit user address to r10 | |
573 | crclr kkString // not a string version | |
574 | mr r11,r5 // move low word of 64-bit user address to r11 | |
575 | crclr kkIn // flag as copyout | |
576 | b copyJoin | |
577 | ||
578 | ||
579 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
580 | /* | |
581 | * int | |
582 | * copyin(src, dst, count) | |
583 | * addr64_t src; // r3 and r4 | |
584 | * vm_offset_t dst; // r5 | |
585 | * size_t count; // r6 | |
586 | */ | |
587 | ||
588 | ||
589 | .align 5 | |
590 | .globl EXT(copyin) | |
591 | .globl EXT(copyinmsg) | |
592 | ||
593 | LEXT(copyin) | |
594 | LEXT(copyinmsg) | |
595 | ||
596 | mfcr r2,0x10 // save caller's cr3, which we use for flags | |
597 | mr r10,r3 // move high word of 64-bit user address to r10 | |
598 | crclr kkString // not a string version | |
599 | mr r11,r4 // move low word of 64-bit user address to r11 | |
600 | crset kkIn // flag as copyin | |
601 | ||
602 | ||
603 | // Common code to handle setup for all the copy variants: | |
604 | // r2 = caller's cr3 | |
605 | // r3 = source if copyout | |
606 | // r5 = dest if copyin | |
607 | // r6 = buffer length or count | |
608 | // r7 = count output ptr (if kkString set) | |
609 | // r10 = high word of 64-bit user-space address (source if copyin, dest if copyout) | |
610 | // r11 = low word of 64-bit user-space address | |
611 | // cr3 = kkIn, kkString, kkNull flags | |
612 | ||
613 | copyJoin: | |
614 | crclr kkNull // (dst==NULL) convention not used with this call | |
615 | copyJoin1: // enter from copyinstr with kkNull set | |
616 | mflr r0 // get return address | |
617 | cmplwi r6,0 // buffer length 0? | |
618 | lis r9,0x1000 // r9 <- 0x10000000 (256MB) | |
619 | stw r0,FM_LR_SAVE(r1) // save return | |
620 | cmplw cr1,r6,r9 // buffer length > 256MB ? | |
621 | mfsprg r8,2 // get the features | |
622 | beq-- copyinout_0 // 0 length is degenerate case | |
623 | stwu r1,-kkFrameSize(r1) // set up stack frame | |
624 | stw r2,kkCR3(r1) // save caller's cr3, which we use for flags | |
625 | mtcrf 0x02,r8 // move pf64Bit to cr6 | |
626 | stw r3,kkSource(r1) // save args across MapUserMemoryWindow | |
627 | stw r5,kkDest(r1) | |
628 | stw r6,kkBufSize(r1) | |
629 | crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor | |
630 | stw r7,kkCountPtr(r1) | |
631 | stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr | |
632 | ||
633 | ||
634 | ||
635 | // Handle buffer length > 256MB. This is an error (ENAMETOOLONG) on copyin and copyout. | |
636 | // The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp | |
637 | // the buffer length to 256MB. This isn't an issue if the string is less than 256MB | |
638 | // (as most are!), but if they are >256MB we eventually return ENAMETOOLONG. This restriction | |
639 | // is due to MapUserMemoryWindow; we don't want to consume more than two segments for | |
640 | // the mapping. | |
641 | ||
642 | ble++ cr1,copyin0 // skip if buffer length <= 256MB | |
643 | bf kkString,copyinout_too_big // error if not string op | |
644 | mr r6,r9 // silently clamp buffer length to 256MB | |
645 | stw r9,kkBufSize(r1) // update saved copy too | |
646 | ||
647 | ||
648 | // Set up thread_recover in case we hit an illegal address. | |
649 | ||
650 | copyin0: | |
651 | li r31,0 // no mapped ptr yet | |
652 | mfsprg r8,1 // Get the current thread | |
653 | lis r2,hi16(copyinout_error) | |
654 | ori r2,r2,lo16(copyinout_error) | |
655 | lwz r4,THREAD_RECOVER(r8) | |
656 | lwz r3,ACT_VMMAP(r8) // r3 <- vm_map virtual address | |
657 | stw r2,THREAD_RECOVER(r8) | |
658 | stw r4,kkThrErrJmp(r1) | |
659 | ||
660 | ||
661 | // Map user segment into kernel map, turn on 64-bit mode. At this point: | |
662 | // r3 = vm map | |
663 | // r6 = buffer length | |
664 | // r10/r11 = 64-bit user-space ptr (source if copyin, dest if copyout) | |
665 | // | |
666 | // When we call MapUserMemoryWindow, we pass: | |
667 | // r3 = vm map ptr | |
668 | // r4/r5 = 64-bit user space address as an addr64_t | |
669 | ||
670 | mr r4,r10 // copy user ptr into r4/r5 | |
671 | mr r5,r11 | |
672 | #if INSTRUMENT | |
673 | mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace | |
674 | stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it | |
675 | mfspr r12,pmc2 ; INSTRUMENT - Get stamp | |
676 | stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it | |
677 | mfspr r12,pmc3 ; INSTRUMENT - Get stamp | |
678 | stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it | |
679 | mfspr r12,pmc4 ; INSTRUMENT - Get stamp | |
680 | stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it | |
681 | #endif | |
682 | bl EXT(MapUserMemoryWindow) // get r3/r4 <- 64-bit address in kernel map of user operand | |
683 | #if INSTRUMENT | |
684 | mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace | |
685 | stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it | |
686 | mfspr r12,pmc2 ; INSTRUMENT - Get stamp | |
687 | stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it | |
688 | mfspr r12,pmc3 ; INSTRUMENT - Get stamp | |
689 | stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it | |
690 | mfspr r12,pmc4 ; INSTRUMENT - Get stamp | |
691 | stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it | |
692 | #endif | |
693 | mr r31,r4 // r31 <- mapped ptr into user space (may be 64-bit) | |
694 | bf-- kk64bit,copyin1 // skip if a 32-bit processor | |
695 | ||
696 | rldimi r31,r3,32,0 // slam high-order bits into mapped ptr | |
697 | mfmsr r4 // if 64-bit, turn on SF so we can use returned ptr | |
698 | li r0,1 | |
699 | rldimi r4,r0,63,MSR_SF_BIT // light bit 0 | |
700 | mtmsrd r4 // turn on 64-bit mode | |
701 | isync // wait for mode to change | |
702 | ||
703 | ||
704 | // Load r3-r5, substituting mapped ptr as appropriate. | |
705 | ||
706 | copyin1: | |
707 | lwz r5,kkBufSize(r1) // restore length to copy | |
708 | bf kkIn,copyin2 // skip if copyout | |
709 | lwz r4,kkDest(r1) // copyin: dest is kernel ptr | |
710 | mr r3,r31 // source is mapped ptr | |
711 | b copyin3 | |
712 | copyin2: // handle copyout | |
713 | lwz r3,kkSource(r1) // source is kernel buffer (r3 at entry) | |
714 | mr r4,r31 // dest is mapped ptr into user space | |
715 | ||
716 | ||
717 | // Finally, all set up to copy: | |
718 | // r3 = source ptr (mapped if copyin) | |
719 | // r4 = dest ptr (mapped if copyout) | |
720 | // r5 = length | |
721 | // r31 = mapped ptr returned by MapUserMemoryWindow | |
722 | // cr3 = kkIn, kkString, kk64bit, and kkNull flags | |
723 | ||
724 | copyin3: | |
725 | bt kkString,copyString // handle copyinstr and copyoutstr | |
726 | bl EXT(bcopy) // copyin and copyout: let bcopy do the work | |
727 | li r3,0 // return success | |
728 | ||
729 | ||
730 | // Main exit point for copyin, copyout, copyinstr, and copyoutstr. Also reached | |
731 | // from error recovery if we get a DSI accessing user space. Clear recovery ptr, | |
732 | // and pop off frame. | |
733 | // r3 = 0, EFAULT, or ENAMETOOLONG | |
734 | ||
735 | copyinx: | |
736 | lwz r2,kkCR3(r1) // get callers cr3 | |
737 | mfsprg r6,1 // Get the current thread | |
738 | bf-- kk64bit,copyinx1 // skip if 32-bit processor | |
739 | mfmsr r12 | |
740 | rldicl r12,r12,0,MSR_SF_BIT+1 // if 64-bit processor, turn 64-bit mode off | |
741 | mtmsrd r12 // turn SF off | |
742 | isync // wait for the mode to change | |
743 | copyinx1: | |
744 | lwz r0,FM_LR_SAVE+kkFrameSize(r1) // get return address | |
745 | lwz r31,kkR31Save(r1) // restore callers r31 | |
746 | lwz r4,kkThrErrJmp(r1) // load saved thread recover | |
747 | addi r1,r1,kkFrameSize // pop off our stack frame | |
748 | mtlr r0 | |
749 | stw r4,THREAD_RECOVER(r6) // restore thread recover | |
750 | mtcrf 0x10,r2 // restore cr3 | |
751 | blr | |
752 | ||
753 | ||
754 | /* We get here via the exception handler if an illegal | |
755 | * user memory reference was made. This error handler is used by | |
756 | * copyin, copyout, copyinstr, and copyoutstr. Registers are as | |
757 | * they were at point of fault, so for example cr3 flags are valid. | |
758 | */ | |
759 | ||
760 | copyinout_error: | |
761 | li r3,EFAULT // return error | |
762 | b copyinx | |
763 | ||
764 | copyinout_0: // degenerate case: 0-length copy | |
765 | mtcrf 0x10,r2 // restore cr3 | |
766 | li r3,0 // return success | |
767 | blr | |
768 | ||
769 | copyinout_too_big: // degenerate case | |
770 | mtcrf 0x10,r2 // restore cr3 | |
771 | lwz r1,0(r1) // pop off stack frame | |
772 | li r3,ENAMETOOLONG | |
773 | blr | |
774 | ||
775 | ||
776 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
777 | // Handle copyinstr and copyoutstr. At this point the stack frame is set up, | |
778 | // the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode | |
779 | // if necessary, and: | |
780 | // r3 = source ptr, mapped if copyinstr | |
781 | // r4 = dest ptr, mapped if copyoutstr | |
782 | // r5 = buffer length | |
783 | // r31 = mapped ptr returned by MapUserMemoryWindow | |
784 | // cr3 = kkIn, kkString, kkNull, and kk64bit flags | |
785 | // We do word copies unless the buffer is very short, then use a byte copy loop | |
786 | // for the leftovers if necessary. The crossover at which the word loop becomes | |
787 | // faster is about seven bytes, counting the zero. | |
788 | // | |
789 | // We first must word-align the source ptr, in order to avoid taking a spurious | |
790 | // page fault. | |
791 | ||
792 | copyString: | |
793 | cmplwi cr1,r5,15 // is buffer very short? | |
794 | mr r12,r3 // remember ptr to 1st source byte | |
795 | mtctr r5 // assuming short, set up loop count for bytes | |
796 | blt-- cr1,copyinstr8 // too short for word loop | |
797 | rlwinm r2,r3,0,0x3 // get byte offset of 1st byte within word | |
798 | rlwinm r9,r3,3,0x18 // get bit offset of 1st byte within word | |
799 | li r7,-1 | |
800 | sub r3,r3,r2 // word-align source address | |
801 | add r6,r5,r2 // get length starting at byte 0 in word | |
802 | srw r7,r7,r9 // get mask for bytes in first word | |
803 | srwi r0,r6,2 // get #words in buffer | |
804 | lwz r5,0(r3) // get aligned word with first source byte | |
805 | lis r10,hi16(0xFEFEFEFF) // load magic constants into r10 and r11 | |
806 | lis r11,hi16(0x80808080) | |
807 | mtctr r0 // set up word loop count | |
808 | addi r3,r3,4 // advance past the source word | |
809 | ori r10,r10,lo16(0xFEFEFEFF) | |
810 | ori r11,r11,lo16(0x80808080) | |
811 | orc r8,r5,r7 // map bytes preceeding first source byte into 0xFF | |
812 | bt-- kkNull,copyinstr5enter // enter loop that just counts | |
813 | ||
814 | // Special case 1st word, which has been 0xFF filled on left. Note that we use | |
815 | // "and.", even though we execute both in 32 and 64-bit mode. This is OK. | |
816 | ||
817 | slw r5,r5,r9 // left justify payload bytes | |
818 | add r9,r10,r8 // r9 = data + 0xFEFEFEFF | |
819 | andc r7,r11,r8 // r7 = ~data & 0x80808080 | |
820 | subfic r0,r2,4 // get r0 <- #payload bytes in 1st word | |
821 | and. r7,r9,r7 // if r7==0, then all bytes in r8 are nonzero | |
822 | stw r5,0(r4) // copy payload bytes to dest buffer | |
823 | add r4,r4,r0 // then point to next byte in dest buffer | |
824 | bdnzt cr0_eq,copyinstr6 // use loop that copies if 0 not found | |
825 | ||
826 | b copyinstr7 // 0 found (buffer can't be full) | |
827 | ||
828 | ||
829 | // Word loop(s). They do a word-parallel search for 0s, using the following | |
830 | // inobvious but very efficient test: | |
831 | // y = data + 0xFEFEFEFF | |
832 | // z = ~data & 0x80808080 | |
833 | // If (y & z)==0, then all bytes in dataword are nonzero. There are two copies | |
834 | // of this loop, one that just counts and another that copies. | |
835 | // r3 = ptr to next word of source (word aligned) | |
836 | // r4 = ptr to next byte in buffer | |
837 | // r6 = original buffer length (adjusted to be word origin) | |
838 | // r10 = 0xFEFEFEFE | |
839 | // r11 = 0x80808080 | |
840 | // r12 = ptr to 1st source byte (used to determine string length) | |
841 | ||
842 | .align 5 // align inner loops for speed | |
843 | copyinstr5: // version that counts but does not copy | |
844 | lwz r8,0(r3) // get next word of source | |
845 | addi r3,r3,4 // advance past it | |
846 | copyinstr5enter: | |
847 | add r9,r10,r8 // r9 = data + 0xFEFEFEFF | |
848 | andc r7,r11,r8 // r7 = ~data & 0x80808080 | |
849 | and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode) | |
850 | bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes in r8 are nonzero | |
851 | ||
852 | b copyinstr7 | |
853 | ||
854 | .align 5 // align inner loops for speed | |
855 | copyinstr6: // version that counts and copies | |
856 | lwz r8,0(r3) // get next word of source | |
857 | addi r3,r3,4 // advance past it | |
858 | addi r4,r4,4 // increment dest ptr while we wait for data | |
859 | add r9,r10,r8 // r9 = data + 0xFEFEFEFF | |
860 | andc r7,r11,r8 // r7 = ~data & 0x80808080 | |
861 | and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode) | |
862 | stw r8,-4(r4) // pack all 4 bytes into buffer | |
863 | bdnzt cr0_eq,copyinstr6 // if r7==0, then all bytes are nonzero | |
864 | ||
865 | ||
866 | // Either 0 found or buffer filled. The above algorithm has mapped nonzero bytes to 0 | |
867 | // and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also | |
868 | // mapped to 0x80. We must mask out these false hits before searching for an 0x80 byte. | |
869 | // r3 = word aligned ptr to next word of source (ie, r8==mem(r3-4)) | |
870 | // r6 = original buffer length (adjusted to be word origin) | |
871 | // r7 = computed vector of 0x00 and 0x80 bytes | |
872 | // r8 = original source word, coming from -4(r3), possibly padded with 0xFFs on left if 1st word | |
873 | // r12 = ptr to 1st source byte (used to determine string length) | |
874 | // cr0 = beq set iff 0 not found | |
875 | ||
876 | copyinstr7: | |
877 | rlwinm r2,r8,7,0,31 // move 0x01 bits to 0x80 position | |
878 | rlwinm r6,r6,0,0x3 // mask down to partial byte count in last word | |
879 | andc r7,r7,r2 // turn off false hits from 0x0100 worst case | |
880 | crnot kkZero,cr0_eq // 0 found iff cr0_eq is off | |
881 | srwi r7,r7,8 // we want to count the 0 as a byte xferred | |
882 | cmpwi r6,0 // any bytes left over in last word? | |
883 | cntlzw r7,r7 // now we can find the 0 byte (ie, the 0x80) | |
884 | subi r3,r3,4 // back up r3 to point to 1st byte in r8 | |
885 | srwi r7,r7,3 // convert 8,16,24,32 to 1,2,3,4 | |
886 | add r3,r3,r7 // now r3 points one past 0 byte, or at 1st byte not xferred | |
887 | bt++ kkZero,copyinstr10 // 0 found, so done | |
888 | ||
889 | beq copyinstr10 // r6==0, so buffer truly full | |
890 | mtctr r6 // 0 not found, loop over r6 bytes | |
891 | b copyinstr8 // enter byte loop for last 1-3 leftover bytes | |
892 | ||
893 | ||
894 | // Byte loop. This is used for very small buffers and for the odd bytes left over | |
895 | // after searching and copying words at a time. | |
896 | // r3 = ptr to next byte of source | |
897 | // r4 = ptr to next dest byte | |
898 | // r12 = ptr to first byte of source | |
899 | // ctr = count of bytes to check | |
900 | ||
901 | .align 5 // align inner loops for speed | |
902 | copyinstr8: // loop over bytes of source | |
903 | lbz r0,0(r3) // get next byte of source | |
904 | addi r3,r3,1 | |
905 | addi r4,r4,1 // increment dest addr whether we store or not | |
906 | cmpwi r0,0 // the 0? | |
907 | bt-- kkNull,copyinstr9 // don't store if copyinstr with NULL ptr | |
908 | stb r0,-1(r4) | |
909 | copyinstr9: | |
910 | bdnzf cr0_eq,copyinstr8 // loop if byte not 0 and more room in buffer | |
911 | ||
912 | crmove kkZero,cr0_eq // remember if 0 found or buffer filled | |
913 | ||
914 | ||
915 | // Buffer filled or 0 found. Unwind and return. | |
916 | // r3 = ptr to 1st source byte not transferred | |
917 | // r12 = ptr to 1st source byte | |
918 | // r31 = mapped ptr returned by MapUserMemoryWindow | |
919 | // cr3 = kkZero set iff 0 found | |
920 | ||
921 | copyinstr10: | |
922 | lwz r9,kkCountPtr(r1) // get ptr to place to store count of bytes moved | |
923 | sub r2,r3,r12 // compute #bytes copied (including the 0) | |
924 | li r3,0 // assume success return status | |
925 | stw r2,0(r9) // store #bytes moved | |
926 | bt++ kkZero,copyinx // we did find the 0 so return 0 | |
927 | li r3,ENAMETOOLONG // buffer filled | |
928 | b copyinx // join main exit routine | |
929 | ||
930 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
931 | /* | |
932 | * int | |
933 | * copypv(source, sink, size, which) | |
934 | * addr64_t src; // r3 and r4 | |
935 | * addr64_t dst; // r5 and r6 | |
936 | * size_t size; // r7 | |
937 | * int which; // r8 | |
938 | * | |
939 | * Operand size bytes are copied from operand src into operand dst. The source and | |
940 | * destination operand addresses are given as addr64_t, and may designate starting | |
941 | * locations in physical or virtual memory in any combination except where both are | |
942 | * virtual. Virtual memory locations may be in either the kernel or the current thread's | |
943 | * address space. Operand size may be up to 256MB. | |
944 | * | |
945 | * Operation is controlled by operand which, which offers these options: | |
946 | * cppvPsrc : source operand is (1) physical or (0) virtual | |
947 | * cppvPsnk : destination operand is (1) physical or (0) virtual | |
948 | * cppvKmap : virtual operand is in (1) kernel or (0) current thread | |
949 | * cppvFsnk : (1) flush destination before and after transfer | |
950 | * cppvFsrc : (1) flush source before and after transfer | |
951 | * cppvNoModSnk : (1) don't set source operand's changed bit(s) | |
952 | * cppvNoRefSrc : (1) don't set destination operand's referenced bit(s) | |
953 | * | |
954 | * Implementation is now split into this new 64-bit path and the old path, hw_copypv_32(). | |
955 | * This section describes the operation of the new 64-bit path. | |
956 | * | |
957 | * The 64-bit path utilizes the more capacious 64-bit kernel address space to create a | |
958 | * window in the kernel address space into all of physical RAM plus the I/O hole. Since | |
959 | * the window's mappings specify the proper access policies for the underlying memory, | |
960 | * the new path does not have to flush caches to avoid a cache paradox, so cppvFsnk | |
961 | * and cppvFsrc are ignored. Physical operand adresses are relocated into the physical | |
962 | * memory window, and are accessed with data relocation on. Virtual addresses are either | |
963 | * within the kernel, or are mapped into the kernel address space through the user memory | |
964 | * window. Because accesses to a virtual operand are performed with data relocation on, | |
965 | * the new path does not have to translate the address, disable/enable interrupts, lock | |
966 | * the mapping, or update referenced and changed bits. | |
967 | * | |
968 | * The IBM 970 (a.k.a. G5) processor treats real-mode accesses as guarded, so there is | |
969 | * a substantial performance penalty for copypv operating in real mode. Utilizing the | |
970 | * new 64-bit path, transfer performance increases >100% on the G5. | |
971 | * | |
972 | * The attentive reader may notice that mtmsrd ops are not followed by isync ops as | |
973 | * might be expected. The 970 follows PowerPC architecture version 2.01, which defines | |
974 | * mtmsrd with L=0 as a context synchronizing op, so a following isync is no longer | |
975 | * required. | |
976 | * | |
977 | * To keep things exciting, we develop 64-bit values in non-volatiles, but we also need | |
978 | * to call 32-bit functions, which would lead to the high-order 32 bits of our values | |
979 | * getting clobbered unless we do something special. So, we preserve our 64-bit non-volatiles | |
980 | * in our own stack frame across calls to 32-bit functions. | |
981 | * | |
982 | */ | |
983 | ||
984 | // Map operand which bits into non-volatile CR2 and CR3 bits. | |
985 | #define whichAlign ((3+1)*4) | |
986 | #define whichMask 0x007F0000 | |
987 | #define pvPsnk (cppvPsnkb - whichAlign) | |
988 | #define pvPsrc (cppvPsrcb - whichAlign) | |
989 | #define pvFsnk (cppvFsnkb - whichAlign) | |
990 | #define pvFsrc (cppvFsrcb - whichAlign) | |
991 | #define pvNoModSnk (cppvNoModSnkb - whichAlign) | |
992 | #define pvNoRefSrc (cppvNoRefSrcb - whichAlign) | |
993 | #define pvKmap (cppvKmapb - whichAlign) | |
994 | #define pvNoCache cr2_lt | |
995 | ||
996 | .align 5 | |
997 | .globl EXT(copypv) | |
998 | ||
999 | LEXT(copypv) | |
1000 | mfsprg r10,2 // get feature flags | |
1001 | mtcrf 0x02,r10 // we need to test pf64Bit | |
1002 | bt++ pf64Bitb,copypv_64 // skip if 64-bit processor (only they take hint) | |
1003 | ||
1004 | b EXT(hw_copypv_32) // carry on with 32-bit copypv | |
1005 | ||
1006 | // Push a 32-bit ABI-compliant stack frame and preserve all non-volatiles that we'll clobber. | |
1007 | copypv_64: | |
1008 | mfsprg r9,1 // get current thread | |
1009 | stwu r1,-(FM_ALIGN((31-26+11)*4)+FM_SIZE)(r1) | |
1010 | // allocate stack frame and link it | |
1011 | mflr r0 // get return address | |
1012 | mfcr r10 // get cr2 and cr3 | |
1013 | lwz r12,THREAD_RECOVER(r9) // get error callback | |
1014 | stw r26,FM_ARG0+0x00(r1) // save non-volatile r26 | |
1015 | stw r27,FM_ARG0+0x04(r1) // save non-volatile r27 | |
1016 | stw r28,FM_ARG0+0x08(r1) // save non-volatile r28 | |
1017 | stw r29,FM_ARG0+0x0C(r1) // save non-volatile r29 | |
1018 | stw r30,FM_ARG0+0x10(r1) // save non-volatile r30 | |
1019 | stw r31,FM_ARG0+0x14(r1) // save non-volatile r31 | |
1020 | stw r12,FM_ARG0+0x20(r1) // save error callback | |
1021 | stw r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1) | |
1022 | // save return address | |
1023 | stw r10,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1) | |
1024 | // save non-volatile cr2 and cr3 | |
1025 | ||
1026 | // Non-volatile register usage in this routine is: | |
1027 | // r26: saved msr image | |
1028 | // r27: current pmap_t / virtual source address | |
1029 | // r28: destination virtual address | |
1030 | // r29: source address | |
1031 | // r30: destination address | |
1032 | // r31: byte count to copy | |
1033 | // cr2/3: parameter 'which' bits | |
1034 | ||
1035 | rlwinm r8,r8,whichAlign,whichMask // align and mask which bits | |
1036 | mr r31,r7 // copy size to somewhere non-volatile | |
1037 | mtcrf 0x20,r8 // insert which bits into cr2 and cr3 | |
1038 | mtcrf 0x10,r8 // insert which bits into cr2 and cr3 | |
1039 | rlwinm r29,r3,0,1,0 // form source address high-order bits | |
1040 | rlwinm r30,r5,0,1,0 // form destination address high-order bits | |
1041 | rlwimi r29,r4,0,0,31 // form source address low-order bits | |
1042 | rlwimi r30,r6,0,0,31 // form destination address low-order bits | |
1043 | crand cr7_lt,pvPsnk,pvPsrc // are both operand addresses physical? | |
1044 | cntlzw r0,r31 // count leading zeroes in byte count | |
1045 | cror cr7_eq,pvPsnk,pvPsrc // cr7_eq <- source or destination is physical | |
1046 | bf-- cr7_eq,copypv_einval // both operands may not be virtual | |
1047 | cmplwi r0,4 // byte count greater than or equal 256M (2**28)? | |
1048 | blt-- copypv_einval // byte count too big, give EINVAL | |
1049 | cmplwi r31,0 // byte count zero? | |
1050 | beq-- copypv_zero // early out | |
1051 | bt cr7_lt,copypv_phys // both operand addresses are physical | |
1052 | mr r28,r30 // assume destination is virtual | |
1053 | bf pvPsnk,copypv_dv // is destination virtual? | |
1054 | mr r28,r29 // no, so source must be virtual | |
1055 | copypv_dv: | |
1056 | lis r27,ha16(EXT(kernel_pmap)) // get kernel's pmap_t *, high-order | |
1057 | lwz r27,lo16(EXT(kernel_pmap))(r27) // get kernel's pmap_t | |
1058 | bt pvKmap,copypv_kern // virtual address in kernel map? | |
1059 | lwz r3,ACT_VMMAP(r9) // get user's vm_map * | |
1060 | rldicl r4,r28,32,32 // r4, r5 <- addr64_t virtual address | |
1061 | rldicl r5,r28,0,32 | |
1062 | std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit call | |
1063 | std r30,FM_ARG0+0x38(r1) // preserve 64-bit r30 across 32-bit call | |
1064 | bl EXT(MapUserMemoryWindow) // map slice of user space into kernel space | |
1065 | ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29 | |
1066 | ld r30,FM_ARG0+0x38(r1) // restore 64-bit r30 | |
1067 | rlwinm r28,r3,0,1,0 // convert relocated addr64_t virtual address | |
1068 | rlwimi r28,r4,0,0,31 // into a single 64-bit scalar | |
1069 | copypv_kern: | |
1070 | ||
1071 | // Since we'll be accessing the virtual operand with data-relocation on, we won't need to | |
1072 | // update the referenced and changed bits manually after the copy. So, force the appropriate | |
1073 | // flag bit on for the virtual operand. | |
1074 | crorc pvNoModSnk,pvNoModSnk,pvPsnk // for virtual dest, let hardware do ref/chg bits | |
1075 | crorc pvNoRefSrc,pvNoRefSrc,pvPsrc // for virtual source, let hardware do ref bit | |
1076 | ||
1077 | // We'll be finding a mapping and looking at, so we need to disable 'rupts. | |
1078 | lis r0,hi16(MASK(MSR_VEC)) // get vector mask | |
1079 | ori r0,r0,lo16(MASK(MSR_FP)) // insert fp mask | |
1080 | mfmsr r26 // save current msr | |
1081 | andc r26,r26,r0 // turn off VEC and FP in saved copy | |
1082 | ori r0,r0,lo16(MASK(MSR_EE)) // add EE to our mask | |
1083 | andc r0,r26,r0 // disable EE in our new msr image | |
1084 | mtmsrd r0 // introduce new msr image | |
1085 | ||
1086 | // We're now holding the virtual operand's pmap_t in r27 and its virtual address in r28. We now | |
1087 | // try to find a mapping corresponding to this address in order to determine whether the address | |
1088 | // is cacheable. If we don't find a mapping, we can safely assume that the operand is cacheable | |
1089 | // (a non-cacheable operand must be a block mapping, which will always exist); otherwise, we | |
1090 | // examine the mapping's caching-inhibited bit. | |
1091 | mr r3,r27 // r3 <- pmap_t pmap | |
1092 | rldicl r4,r28,32,32 // r4, r5 <- addr64_t va | |
1093 | rldicl r5,r28,0,32 | |
1094 | la r6,FM_ARG0+0x18(r1) // r6 <- addr64_t *nextva | |
1095 | li r7,1 // r7 <- int full, search nested mappings | |
1096 | std r26,FM_ARG0+0x28(r1) // preserve 64-bit r26 across 32-bit calls | |
1097 | std r28,FM_ARG0+0x30(r1) // preserve 64-bit r28 across 32-bit calls | |
1098 | std r29,FM_ARG0+0x38(r1) // preserve 64-bit r29 across 32-bit calls | |
1099 | std r30,FM_ARG0+0x40(r1) // preserve 64-bit r30 across 32-bit calls | |
1100 | bl EXT(mapping_find) // find mapping for virtual operand | |
1101 | mr. r3,r3 // did we find it? | |
1102 | beq copypv_nomapping // nope, so we'll assume it's cacheable | |
1103 | lwz r4,mpVAddr+4(r3) // get low half of virtual addr for hw flags | |
1104 | rlwinm. r4,r4,0,mpIb-32,mpIb-32 // caching-inhibited bit set? | |
1105 | crnot pvNoCache,cr0_eq // if it is, use bcopy_nc | |
1106 | bl EXT(mapping_drop_busy) // drop busy on the mapping | |
1107 | copypv_nomapping: | |
1108 | ld r26,FM_ARG0+0x28(r1) // restore 64-bit r26 | |
1109 | ld r28,FM_ARG0+0x30(r1) // restore 64-bit r28 | |
1110 | ld r29,FM_ARG0+0x38(r1) // restore 64-bit r29 | |
1111 | ld r30,FM_ARG0+0x40(r1) // restore 64-bit r30 | |
1112 | mtmsrd r26 // restore msr to it's previous state | |
1113 | ||
1114 | // Set both the source and destination virtual addresses to the virtual operand's address -- | |
1115 | // we'll overlay one of them with the physical operand's address. | |
1116 | mr r27,r28 // make virtual operand BOTH source AND destination | |
1117 | ||
1118 | // Now we're ready to relocate the physical operand address(es) into the physical memory window. | |
1119 | // Recall that we've mapped physical memory (including the I/O hole) into the kernel's address | |
1120 | // space somewhere at or over the 2**32 line. If one or both of the operands are in the I/O hole, | |
1121 | // we'll set the pvNoCache flag, forcing use of non-caching bcopy_nc() to do the copy. | |
1122 | copypv_phys: | |
1123 | ld r6,lgPMWvaddr(0) // get physical memory window virtual address | |
1124 | bf pvPsnk,copypv_dstvirt // is destination address virtual? | |
1125 | cntlzd r4,r30 // count leading zeros in destination address | |
1126 | cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1) | |
1127 | cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations | |
1128 | add r28,r30,r6 // relocate physical destination into physical window | |
1129 | copypv_dstvirt: | |
1130 | bf pvPsrc,copypv_srcvirt // is source address virtual? | |
1131 | cntlzd r4,r29 // count leading zeros in source address | |
1132 | cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1) | |
1133 | cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations | |
1134 | add r27,r29,r6 // relocate physical source into physical window | |
1135 | copypv_srcvirt: | |
1136 | ||
1137 | // Once the copy is under way (bcopy or bcopy_nc), we will want to get control if anything | |
1138 | // funny happens during the copy. So, we set a pointer to our error handler in the per-thread | |
1139 | // control block. | |
1140 | mfsprg r8,1 // get current threads stuff | |
1141 | lis r3,hi16(copypv_error) // get our error callback's address, high | |
1142 | ori r3,r3,lo16(copypv_error) // get our error callback's address, low | |
1143 | stw r3,THREAD_RECOVER(r8) // set our error callback | |
1144 | ||
1145 | // Since our physical operand(s) are relocated at or above the 2**32 line, we must enter | |
1146 | // 64-bit mode. | |
1147 | li r0,1 // get a handy one bit | |
1148 | mfmsr r3 // get current msr | |
1149 | rldimi r3,r0,63,MSR_SF_BIT // set SF bit on in our msr copy | |
1150 | mtmsrd r3 // enter 64-bit mode | |
1151 | ||
1152 | // If requested, flush data cache | |
1153 | // Note that we don't flush, the code is being saved "just in case". | |
1154 | #if 0 | |
1155 | bf pvFsrc,copypv_nfs // do we flush the source? | |
1156 | rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address | |
1157 | rldicl r4,r27,0,32 | |
1158 | mr r5,r31 // r5 <- count (in bytes) | |
1159 | li r6,0 // r6 <- boolean phys (false, not physical) | |
1160 | bl EXT(flush_dcache) // flush the source operand | |
1161 | copypv_nfs: | |
1162 | bf pvFsnk,copypv_nfdx // do we flush the destination? | |
1163 | rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address | |
1164 | rldicl r4,r28,0,32 | |
1165 | mr r5,r31 // r5 <- count (in bytes) | |
1166 | li r6,0 // r6 <- boolean phys (false, not physical) | |
1167 | bl EXT(flush_dcache) // flush the destination operand | |
1168 | copypv_nfdx: | |
1169 | #endif | |
1170 | ||
1171 | // Call bcopy or bcopy_nc to perform the copy. | |
1172 | mr r3,r27 // r3 <- source virtual address | |
1173 | mr r4,r28 // r4 <- destination virtual address | |
1174 | mr r5,r31 // r5 <- bytes to copy | |
1175 | bt pvNoCache,copypv_nc // take non-caching route | |
1176 | bl EXT(bcopy) // call bcopy to do the copying | |
1177 | b copypv_copydone | |
1178 | copypv_nc: | |
1179 | bl EXT(bcopy_nc) // call bcopy_nc to do the copying | |
1180 | copypv_copydone: | |
1181 | ||
1182 | // If requested, flush data cache | |
1183 | // Note that we don't flush, the code is being saved "just in case". | |
1184 | #if 0 | |
1185 | bf pvFsrc,copypv_nfsx // do we flush the source? | |
1186 | rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address | |
1187 | rldicl r4,r27,0,32 | |
1188 | mr r5,r31 // r5 <- count (in bytes) | |
1189 | li r6,0 // r6 <- boolean phys (false, not physical) | |
1190 | bl EXT(flush_dcache) // flush the source operand | |
1191 | copypv_nfsx: | |
1192 | bf pvFsnk,copypv_nfd // do we flush the destination? | |
1193 | rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address | |
1194 | rldicl r4,r28,0,32 | |
1195 | mr r5,r31 // r5 <- count (in bytes) | |
1196 | li r6,0 // r6 <- boolean phys (false, not physical) | |
1197 | bl EXT(flush_dcache) // flush the destination operand | |
1198 | copypv_nfd: | |
1199 | #endif | |
1200 | ||
1201 | // Leave 64-bit mode. | |
1202 | mfmsr r3 // get current msr | |
1203 | rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy | |
1204 | mtmsrd r3 // leave 64-bit mode | |
1205 | ||
1206 | // If requested, set ref/chg on source/dest physical operand(s). It is possible that copy is | |
1207 | // from/to a RAM disk situated outside of mapped physical RAM, so we check each page by calling | |
1208 | // mapping_phys_lookup() before we try to set its ref/chg bits; otherwise, we might panic. | |
1209 | // Note that this code is page-size sensitive, so it should probably be a part of our low-level | |
1210 | // code in hw_vm.s. | |
1211 | bt pvNoModSnk,copypv_nomod // skip destination update if not requested | |
1212 | std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit calls | |
1213 | li r26,1 // r26 <- 4K-page count | |
1214 | mr r27,r31 // r27 <- byte count | |
1215 | rlwinm r3,r30,0,20,31 // does destination cross a page boundary? | |
1216 | subfic r3,r3,4096 // | |
1217 | cmplw r3,r27 // | |
1218 | blt copypv_modnox // skip if not crossing case | |
1219 | subf r27,r3,r27 // r27 <- byte count less initial fragment | |
1220 | addi r26,r26,1 // increment page count | |
1221 | copypv_modnox: | |
1222 | srdi r3,r27,12 // pages to update (not including crosser) | |
1223 | add r26,r26,r3 // add in crosser | |
1224 | srdi r27,r30,12 // r27 <- destination page number | |
1225 | copypv_modloop: | |
1226 | mr r3,r27 // r3 <- destination page number | |
1227 | la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex | |
1228 | bl EXT(mapping_phys_lookup) // see if page is really there | |
1229 | mr. r3,r3 // is it? | |
1230 | beq-- copypv_modend // nope, break out of modify loop | |
1231 | mr r3,r27 // r3 <- destination page number | |
1232 | bl EXT(mapping_set_mod) // set page changed status | |
1233 | subi r26,r26,1 // decrement page count | |
1234 | cmpwi r26,0 // done yet? | |
1235 | bgt copypv_modloop // nope, iterate | |
1236 | copypv_modend: | |
1237 | ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29 | |
1238 | copypv_nomod: | |
1239 | bt pvNoRefSrc,copypv_done // skip source update if not requested | |
1240 | copypv_debugref: | |
1241 | li r26,1 // r26 <- 4K-page count | |
1242 | mr r27,r31 // r27 <- byte count | |
1243 | rlwinm r3,r29,0,20,31 // does source cross a page boundary? | |
1244 | subfic r3,r3,4096 // | |
1245 | cmplw r3,r27 // | |
1246 | blt copypv_refnox // skip if not crossing case | |
1247 | subf r27,r3,r27 // r27 <- byte count less initial fragment | |
1248 | addi r26,r26,1 // increment page count | |
1249 | copypv_refnox: | |
1250 | srdi r3,r27,12 // pages to update (not including crosser) | |
1251 | add r26,r26,r3 // add in crosser | |
1252 | srdi r27,r29,12 // r27 <- source page number | |
1253 | copypv_refloop: | |
1254 | mr r3,r27 // r3 <- source page number | |
1255 | la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex | |
1256 | bl EXT(mapping_phys_lookup) // see if page is really there | |
1257 | mr. r3,r3 // is it? | |
1258 | beq-- copypv_done // nope, break out of modify loop | |
1259 | mr r3,r27 // r3 <- source page number | |
1260 | bl EXT(mapping_set_ref) // set page referenced status | |
1261 | subi r26,r26,1 // decrement page count | |
1262 | cmpwi r26,0 // done yet? | |
1263 | bgt copypv_refloop // nope, iterate | |
1264 | ||
1265 | // Return, indicating success. | |
1266 | copypv_done: | |
1267 | copypv_zero: | |
1268 | li r3,0 // our efforts were crowned with success | |
1269 | ||
1270 | // Pop frame, restore caller's non-volatiles, clear recovery routine pointer. | |
1271 | copypv_return: | |
1272 | mfsprg r9,1 // get current threads stuff | |
1273 | lwz r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1) | |
1274 | // get return address | |
1275 | lwz r4,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1) | |
1276 | // get non-volatile cr2 and cr3 | |
1277 | lwz r26,FM_ARG0+0x00(r1) // restore non-volatile r26 | |
1278 | lwz r27,FM_ARG0+0x04(r1) // restore non-volatile r27 | |
1279 | mtlr r0 // restore return address | |
1280 | lwz r28,FM_ARG0+0x08(r1) // restore non-volatile r28 | |
1281 | mtcrf 0x20,r4 // restore non-volatile cr2 | |
1282 | mtcrf 0x10,r4 // restore non-volatile cr3 | |
1283 | lwz r11,FM_ARG0+0x20(r1) // save error callback | |
1284 | lwz r29,FM_ARG0+0x0C(r1) // restore non-volatile r29 | |
1285 | lwz r30,FM_ARG0+0x10(r1) // restore non-volatile r30 | |
1286 | lwz r31,FM_ARG0+0x14(r1) // restore non-volatile r31 | |
1287 | stw r11,THREAD_RECOVER(r9) // restore our error callback | |
1288 | lwz r1,0(r1) // release stack frame | |
1289 | ||
1290 | blr // y'all come back now | |
1291 | ||
1292 | // Invalid argument handler. | |
1293 | copypv_einval: | |
1294 | li r3,EINVAL // invalid argument | |
1295 | b copypv_return // return | |
1296 | ||
1297 | // Error encountered during bcopy or bcopy_nc. | |
1298 | copypv_error: | |
1299 | mfmsr r3 // get current msr | |
1300 | rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy | |
1301 | mtmsrd r3 // leave 64-bit mode | |
1302 | li r3,EFAULT // it was all his fault | |
1303 | b copypv_return // return |