]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * The contents of this file constitute Original Code as defined in and | |
7 | * are subject to the Apple Public Source License Version 1.1 (the | |
8 | * "License"). You may not use this file except in compliance with the | |
9 | * License. Please obtain a copy of the License at | |
10 | * http://www.apple.com/publicsource and read it before using this file. | |
11 | * | |
12 | * This Original Code and all software distributed under the License are | |
13 | * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
14 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
15 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the | |
17 | * License for the specific language governing rights and limitations | |
18 | * under the License. | |
19 | * | |
20 | * @APPLE_LICENSE_HEADER_END@ | |
21 | */ | |
22 | /* | |
23 | * @OSF_COPYRIGHT@ | |
24 | */ | |
25 | #include <debug.h> | |
26 | #include <ppc/asm.h> | |
27 | #include <ppc/proc_reg.h> | |
28 | #include <mach/ppc/vm_param.h> | |
29 | #include <assym.s> | |
30 | #include <sys/errno.h> | |
31 | ||
32 | #define INSTRUMENT 0 | |
33 | ||
34 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
35 | /* | |
36 | * void pmap_zero_page(vm_offset_t pa) | |
37 | * | |
38 | * Zero a page of physical memory. This routine runs in 32 or 64-bit mode, | |
39 | * and handles 32 and 128-byte cache lines. | |
40 | */ | |
41 | ||
42 | ||
43 | .align 5 | |
44 | .globl EXT(pmap_zero_page) | |
45 | ||
46 | LEXT(pmap_zero_page) | |
47 | ||
48 | mflr r12 // save return address | |
49 | bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10 | |
50 | mtlr r12 // restore return address | |
51 | andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size | |
52 | ||
53 | subfic r4,r9,PPC_PGBYTES // r4 <- starting offset in page | |
54 | ||
55 | bt++ pf64Bitb,page0S4 // Go do the big guys... | |
56 | ||
57 | slwi r3,r3,12 // get page address from page num | |
58 | b page_zero_1 // Jump to line aligned loop... | |
59 | ||
60 | .align 5 | |
61 | ||
62 | nop | |
63 | nop | |
64 | nop | |
65 | nop | |
66 | nop | |
67 | nop | |
68 | nop | |
69 | ||
70 | page0S4: | |
71 | sldi r3,r3,12 // get page address from page num | |
72 | ||
73 | page_zero_1: // loop zeroing cache lines | |
74 | sub. r5,r4,r9 // more to go? | |
75 | dcbz128 r3,r4 // zero either 32 or 128 bytes | |
76 | sub r4,r5,r9 // generate next offset | |
77 | dcbz128 r3,r5 | |
78 | bne-- page_zero_1 | |
79 | ||
80 | b EXT(ml_restore) // restore MSR and do the isync | |
81 | ||
82 | ||
83 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
84 | /* void | |
85 | * phys_copy(src, dst, bytecount) | |
86 | * addr64_t src; | |
87 | * addr64_t dst; | |
88 | * int bytecount | |
89 | * | |
90 | * This routine will copy bytecount bytes from physical address src to physical | |
91 | * address dst. It runs in 64-bit mode if necessary, but does not handle | |
92 | * overlap or make any attempt to be optimal. Length must be a signed word. | |
93 | * Not performance critical. | |
94 | */ | |
95 | ||
96 | ||
97 | .align 5 | |
98 | .globl EXT(phys_copy) | |
99 | ||
100 | LEXT(phys_copy) | |
101 | ||
102 | rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg | |
103 | mflr r12 // get return address | |
104 | rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits | |
105 | rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg | |
106 | bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10 | |
107 | rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits | |
108 | mtlr r12 // restore return address | |
109 | subic. r5,r7,4 // a word to copy? | |
110 | b phys_copy_2 | |
111 | ||
112 | .align 5 | |
113 | ||
114 | phys_copy_1: // loop copying words | |
115 | subic. r5,r5,4 // more to go? | |
116 | lwz r0,0(r3) | |
117 | addi r3,r3,4 | |
118 | stw r0,0(r4) | |
119 | addi r4,r4,4 | |
120 | phys_copy_2: | |
121 | bge phys_copy_1 | |
122 | addic. r5,r5,4 // restore count | |
123 | ble phys_copy_4 // no more | |
124 | ||
125 | // Loop is aligned here | |
126 | ||
127 | phys_copy_3: // loop copying bytes | |
128 | subic. r5,r5,1 // more to go? | |
129 | lbz r0,0(r3) | |
130 | addi r3,r3,1 | |
131 | stb r0,0(r4) | |
132 | addi r4,r4,1 | |
133 | bgt phys_copy_3 | |
134 | phys_copy_4: | |
135 | b EXT(ml_restore) // restore MSR and do the isync | |
136 | ||
137 | ||
138 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
139 | /* void | |
140 | * pmap_copy_page(src, dst) | |
141 | * ppnum_t src; | |
142 | * ppnum_t dst; | |
143 | * | |
144 | * This routine will copy the physical page src to physical page dst | |
145 | * | |
146 | * This routine assumes that the src and dst are page numbers and that the | |
147 | * destination is cached. It runs on 32 and 64 bit processors, with and | |
148 | * without altivec, and with 32 and 128 byte cache lines. | |
149 | * We also must assume that no-one will be executing within the destination | |
150 | * page, and that this will be used for paging. Because this | |
151 | * is a common routine, we have tuned loops for each processor class. | |
152 | * | |
153 | */ | |
154 | #define kSFSize (FM_SIZE+160) | |
155 | ||
156 | ENTRY(pmap_copy_page, TAG_NO_FRAME_USED) | |
157 | ||
158 | lis r2,hi16(MASK(MSR_VEC)) ; Get the vector flag | |
159 | mflr r0 // get return | |
160 | ori r2,r2,lo16(MASK(MSR_FP)) ; Add the FP flag | |
161 | stw r0,8(r1) // save | |
162 | stwu r1,-kSFSize(r1) // set up a stack frame for VRs or FPRs | |
163 | mfmsr r11 // save MSR at entry | |
164 | mfsprg r10,2 // get feature flags | |
165 | andc r11,r11,r2 // Clear out vec and fp | |
166 | ori r2,r2,lo16(MASK(MSR_EE)) // Get EE on also | |
167 | andc r2,r11,r2 // Clear out EE as well | |
168 | mtcrf 0x02,r10 // we need to test pf64Bit | |
169 | ori r2,r2,MASK(MSR_FP) // must enable FP for G3... | |
170 | mtcrf 0x80,r10 // we need to test pfAltivec too | |
171 | oris r2,r2,hi16(MASK(MSR_VEC)) // enable altivec for G4 (ignored if G3) | |
172 | mtmsr r2 // turn EE off, FP and VEC on | |
173 | isync | |
174 | bt++ pf64Bitb,pmap_copy_64 // skip if 64-bit processor (only they take hint) | |
175 | slwi r3,r3,12 // get page address from page num | |
176 | slwi r4,r4,12 // get page address from page num | |
177 | rlwinm r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1 // get ready to turn off DR | |
178 | bt pfAltivecb,pmap_copy_g4 // altivec but not 64-bit means G4 | |
179 | ||
180 | ||
181 | // G3 -- copy using FPRs | |
182 | ||
183 | stfd f0,FM_SIZE+0(r1) // save the 4 FPRs we use to copy | |
184 | stfd f1,FM_SIZE+8(r1) | |
185 | li r5,PPC_PGBYTES/32 // count of cache lines in a page | |
186 | stfd f2,FM_SIZE+16(r1) | |
187 | mtctr r5 | |
188 | stfd f3,FM_SIZE+24(r1) | |
189 | mtmsr r12 // turn off DR after saving FPRs on stack | |
190 | isync | |
191 | ||
192 | pmap_g3_copy_loop: // loop over 32-byte cache lines | |
193 | dcbz 0,r4 // avoid read of dest line | |
194 | lfd f0,0(r3) | |
195 | lfd f1,8(r3) | |
196 | lfd f2,16(r3) | |
197 | lfd f3,24(r3) | |
198 | addi r3,r3,32 | |
199 | stfd f0,0(r4) | |
200 | stfd f1,8(r4) | |
201 | stfd f2,16(r4) | |
202 | stfd f3,24(r4) | |
203 | dcbst 0,r4 // flush dest line to RAM | |
204 | addi r4,r4,32 | |
205 | bdnz pmap_g3_copy_loop | |
206 | ||
207 | sync // wait for stores to take | |
208 | subi r4,r4,PPC_PGBYTES // restore ptr to destintation page | |
209 | li r6,PPC_PGBYTES-32 // point to last line in page | |
210 | pmap_g3_icache_flush: | |
211 | subic. r5,r6,32 // more to go? | |
212 | icbi r4,r6 // flush another line in icache | |
213 | subi r6,r5,32 // get offset to next line | |
214 | icbi r4,r5 | |
215 | bne pmap_g3_icache_flush | |
216 | ||
217 | sync | |
218 | mtmsr r2 // turn DR back on | |
219 | isync | |
220 | lfd f0,FM_SIZE+0(r1) // restore the FPRs | |
221 | lfd f1,FM_SIZE+8(r1) | |
222 | lfd f2,FM_SIZE+16(r1) | |
223 | lfd f3,FM_SIZE+24(r1) | |
224 | ||
225 | b pmap_g4_restore // restore MSR and done | |
226 | ||
227 | ||
228 | // G4 -- copy using VRs | |
229 | ||
230 | pmap_copy_g4: // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR | |
231 | la r9,FM_SIZE+16(r1) // place where we save VRs to r9 | |
232 | li r5,16 // load x-form offsets into r5-r9 | |
233 | li r6,32 // another offset | |
234 | stvx v0,0,r9 // save some VRs so we can use to copy | |
235 | li r7,48 // another offset | |
236 | stvx v1,r5,r9 | |
237 | li r0,PPC_PGBYTES/64 // we loop over 64-byte chunks | |
238 | stvx v2,r6,r9 | |
239 | mtctr r0 | |
240 | li r8,96 // get look-ahead for touch | |
241 | stvx v3,r7,r9 | |
242 | li r9,128 | |
243 | mtmsr r12 // now we've saved VRs on stack, turn off DR | |
244 | isync // wait for it to happen | |
245 | b pmap_g4_copy_loop | |
246 | ||
247 | .align 5 // align inner loops | |
248 | pmap_g4_copy_loop: // loop over 64-byte chunks | |
249 | dcbt r3,r8 // touch 3 lines ahead | |
250 | nop // avoid a 17-word loop... | |
251 | dcbt r3,r9 // touch 4 lines ahead | |
252 | nop // more padding | |
253 | dcba 0,r4 // avoid pre-fetch of 1st dest line | |
254 | lvx v0,0,r3 // offset 0 | |
255 | lvx v1,r5,r3 // offset 16 | |
256 | lvx v2,r6,r3 // offset 32 | |
257 | lvx v3,r7,r3 // offset 48 | |
258 | addi r3,r3,64 | |
259 | dcba r6,r4 // avoid pre-fetch of 2nd line | |
260 | stvx v0,0,r4 // offset 0 | |
261 | stvx v1,r5,r4 // offset 16 | |
262 | stvx v2,r6,r4 // offset 32 | |
263 | stvx v3,r7,r4 // offset 48 | |
264 | dcbf 0,r4 // push line 1 | |
265 | dcbf r6,r4 // and line 2 | |
266 | addi r4,r4,64 | |
267 | bdnz pmap_g4_copy_loop | |
268 | ||
269 | sync // wait for stores to take | |
270 | subi r4,r4,PPC_PGBYTES // restore ptr to destintation page | |
271 | li r8,PPC_PGBYTES-32 // point to last line in page | |
272 | pmap_g4_icache_flush: | |
273 | subic. r9,r8,32 // more to go? | |
274 | icbi r4,r8 // flush from icache | |
275 | subi r8,r9,32 // get offset to next line | |
276 | icbi r4,r9 | |
277 | bne pmap_g4_icache_flush | |
278 | ||
279 | sync | |
280 | mtmsr r2 // turn DR back on | |
281 | isync | |
282 | la r9,FM_SIZE+16(r1) // get base of VR save area | |
283 | lvx v0,0,r9 // restore the VRs | |
284 | lvx v1,r5,r9 | |
285 | lvx v2,r6,r9 | |
286 | lvx v3,r7,r9 | |
287 | ||
288 | pmap_g4_restore: // r11=MSR | |
289 | mtmsr r11 // turn EE on, VEC and FR off | |
290 | isync // wait for it to happen | |
291 | addi r1,r1,kSFSize // pop off our stack frame | |
292 | lwz r0,8(r1) // restore return address | |
293 | mtlr r0 | |
294 | blr | |
295 | ||
296 | ||
297 | // 64-bit/128-byte processor: copy using VRs | |
298 | ||
299 | pmap_copy_64: // r10=features, r11=old MSR | |
300 | sldi r3,r3,12 // get page address from page num | |
301 | sldi r4,r4,12 // get page address from page num | |
302 | la r9,FM_SIZE+16(r1) // get base of VR save area | |
303 | li r5,16 // load x-form offsets into r5-r9 | |
304 | li r6,32 // another offset | |
305 | bf pfAltivecb,pmap_novmx_copy // altivec suppressed... | |
306 | stvx v0,0,r9 // save 8 VRs so we can copy wo bubbles | |
307 | stvx v1,r5,r9 | |
308 | li r7,48 // another offset | |
309 | li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks | |
310 | stvx v2,r6,r9 | |
311 | stvx v3,r7,r9 | |
312 | addi r9,r9,64 // advance base ptr so we can store another 4 | |
313 | mtctr r0 | |
314 | li r0,MASK(MSR_DR) // get DR bit | |
315 | stvx v4,0,r9 | |
316 | stvx v5,r5,r9 | |
317 | andc r12,r2,r0 // turn off DR bit | |
318 | li r0,1 // get a 1 to slam into SF | |
319 | stvx v6,r6,r9 | |
320 | stvx v7,r7,r9 | |
321 | rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0) | |
322 | li r8,-128 // offset so we can reach back one line | |
323 | mtmsrd r12 // now we've saved VRs, turn DR off and SF on | |
324 | isync // wait for it to happen | |
325 | dcbt128 0,r3,1 // start a forward stream | |
326 | b pmap_64_copy_loop | |
327 | ||
328 | .align 5 // align inner loops | |
329 | pmap_64_copy_loop: // loop over 128-byte chunks | |
330 | dcbz128 0,r4 // avoid read of destination line | |
331 | lvx v0,0,r3 // offset 0 | |
332 | lvx v1,r5,r3 // offset 16 | |
333 | lvx v2,r6,r3 // offset 32 | |
334 | lvx v3,r7,r3 // offset 48 | |
335 | addi r3,r3,64 // don't have enough GPRs so add 64 2x | |
336 | lvx v4,0,r3 // offset 64 | |
337 | lvx v5,r5,r3 // offset 80 | |
338 | lvx v6,r6,r3 // offset 96 | |
339 | lvx v7,r7,r3 // offset 112 | |
340 | addi r3,r3,64 | |
341 | stvx v0,0,r4 // offset 0 | |
342 | stvx v1,r5,r4 // offset 16 | |
343 | stvx v2,r6,r4 // offset 32 | |
344 | stvx v3,r7,r4 // offset 48 | |
345 | addi r4,r4,64 | |
346 | stvx v4,0,r4 // offset 64 | |
347 | stvx v5,r5,r4 // offset 80 | |
348 | stvx v6,r6,r4 // offset 96 | |
349 | stvx v7,r7,r4 // offset 112 | |
350 | addi r4,r4,64 | |
351 | dcbf r8,r4 // flush the line we just wrote | |
352 | bdnz pmap_64_copy_loop | |
353 | ||
354 | sync // wait for stores to take | |
355 | subi r4,r4,PPC_PGBYTES // restore ptr to destintation page | |
356 | li r8,PPC_PGBYTES-128 // point to last line in page | |
357 | pmap_64_icache_flush: | |
358 | subic. r9,r8,128 // more to go? | |
359 | icbi r4,r8 // flush from icache | |
360 | subi r8,r9,128 // get offset to next line | |
361 | icbi r4,r9 | |
362 | bne pmap_64_icache_flush | |
363 | ||
364 | sync | |
365 | mtmsrd r2 // turn DR back on, SF off | |
366 | isync | |
367 | la r9,FM_SIZE+16(r1) // get base address of VR save area on stack | |
368 | lvx v0,0,r9 // restore the VRs | |
369 | lvx v1,r5,r9 | |
370 | lvx v2,r6,r9 | |
371 | lvx v3,r7,r9 | |
372 | addi r9,r9,64 | |
373 | lvx v4,0,r9 | |
374 | lvx v5,r5,r9 | |
375 | lvx v6,r6,r9 | |
376 | lvx v7,r7,r9 | |
377 | ||
378 | b pmap_g4_restore // restore lower half of MSR and return | |
379 | ||
380 | // | |
381 | // Copy on 64-bit without VMX | |
382 | // | |
383 | ||
384 | pmap_novmx_copy: | |
385 | li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks | |
386 | mtctr r0 | |
387 | li r0,MASK(MSR_DR) // get DR bit | |
388 | andc r12,r2,r0 // turn off DR bit | |
389 | li r0,1 // get a 1 to slam into SF | |
390 | rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0) | |
391 | mtmsrd r12 // now we've saved VRs, turn DR off and SF on | |
392 | isync // wait for it to happen | |
393 | dcbt128 0,r3,1 // start a forward stream | |
394 | ||
395 | pmap_novmx_copy_loop: // loop over 128-byte cache lines | |
396 | dcbz128 0,r4 // avoid read of dest line | |
397 | ||
398 | ld r0,0(r3) // Load half a line | |
399 | ld r12,8(r3) | |
400 | ld r5,16(r3) | |
401 | ld r6,24(r3) | |
402 | ld r7,32(r3) | |
403 | ld r8,40(r3) | |
404 | ld r9,48(r3) | |
405 | ld r10,56(r3) | |
406 | ||
407 | std r0,0(r4) // Store half a line | |
408 | std r12,8(r4) | |
409 | std r5,16(r4) | |
410 | std r6,24(r4) | |
411 | std r7,32(r4) | |
412 | std r8,40(r4) | |
413 | std r9,48(r4) | |
414 | std r10,56(r4) | |
415 | ||
416 | ld r0,64(r3) // Load half a line | |
417 | ld r12,72(r3) | |
418 | ld r5,80(r3) | |
419 | ld r6,88(r3) | |
420 | ld r7,96(r3) | |
421 | ld r8,104(r3) | |
422 | ld r9,112(r3) | |
423 | ld r10,120(r3) | |
424 | ||
425 | addi r3,r3,128 | |
426 | ||
427 | std r0,64(r4) // Store half a line | |
428 | std r12,72(r4) | |
429 | std r5,80(r4) | |
430 | std r6,88(r4) | |
431 | std r7,96(r4) | |
432 | std r8,104(r4) | |
433 | std r9,112(r4) | |
434 | std r10,120(r4) | |
435 | ||
436 | dcbf 0,r4 // flush the line we just wrote | |
437 | addi r4,r4,128 | |
438 | bdnz pmap_novmx_copy_loop | |
439 | ||
440 | sync // wait for stores to take | |
441 | subi r4,r4,PPC_PGBYTES // restore ptr to destintation page | |
442 | li r8,PPC_PGBYTES-128 // point to last line in page | |
443 | ||
444 | pmap_novmx_icache_flush: | |
445 | subic. r9,r8,128 // more to go? | |
446 | icbi r4,r8 // flush from icache | |
447 | subi r8,r9,128 // get offset to next line | |
448 | icbi r4,r9 | |
449 | bne pmap_novmx_icache_flush | |
450 | ||
451 | sync | |
452 | mtmsrd r2 // turn DR back on, SF off | |
453 | isync | |
454 | ||
455 | b pmap_g4_restore // restore lower half of MSR and return | |
456 | ||
457 | ||
458 | ||
459 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
460 | ||
461 | // Stack frame format used by copyin, copyout, copyinstr and copyoutstr. | |
462 | // These routines all run both on 32 and 64-bit machines, though because they are called | |
463 | // by the BSD kernel they are always in 32-bit mode when entered. The mapped ptr returned | |
464 | // by MapUserMemoryWindow will be 64 bits however on 64-bit machines. Beware to avoid | |
465 | // using compare instructions on this ptr. This mapped ptr is kept globally in r31, so there | |
466 | // is no need to store or load it, which are mode-dependent operations since it could be | |
467 | // 32 or 64 bits. | |
468 | ||
469 | #define kkFrameSize (FM_SIZE+32) | |
470 | ||
471 | #define kkBufSize (FM_SIZE+0) | |
472 | #define kkCR3 (FM_SIZE+4) | |
473 | #define kkSource (FM_SIZE+8) | |
474 | #define kkDest (FM_SIZE+12) | |
475 | #define kkCountPtr (FM_SIZE+16) | |
476 | #define kkR31Save (FM_SIZE+20) | |
477 | #define kkThrErrJmp (FM_SIZE+24) | |
478 | ||
479 | ||
480 | // nonvolatile CR bits we use as flags in cr3 | |
481 | ||
482 | #define kk64bit 12 | |
483 | #define kkNull 13 | |
484 | #define kkIn 14 | |
485 | #define kkString 15 | |
486 | #define kkZero 15 | |
487 | ||
488 | ||
489 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
490 | /* | |
491 | * int | |
492 | * copyoutstr(src, dst, maxcount, count) | |
493 | * vm_offset_t src; // r3 | |
494 | * addr64_t dst; // r4 and r5 | |
495 | * vm_size_t maxcount; // r6 | |
496 | * vm_size_t* count; // r7 | |
497 | * | |
498 | * Set *count to the number of bytes copied. | |
499 | */ | |
500 | ||
501 | ENTRY(copyoutstr, TAG_NO_FRAME_USED) | |
502 | mfcr r2,0x10 // save caller's cr3, which we use for flags | |
503 | mr r10,r4 // move high word of 64-bit user address to r10 | |
504 | li r0,0 | |
505 | crset kkString // flag as a string op | |
506 | mr r11,r5 // move low word of 64-bit user address to r11 | |
507 | stw r0,0(r7) // initialize #bytes moved | |
508 | crclr kkIn // flag as copyout | |
509 | b copyJoin | |
510 | ||
511 | ||
512 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
513 | /* | |
514 | * int | |
515 | * copyinstr(src, dst, maxcount, count) | |
516 | * addr64_t src; // r3 and r4 | |
517 | * vm_offset_t dst; // r5 | |
518 | * vm_size_t maxcount; // r6 | |
519 | * vm_size_t* count; // r7 | |
520 | * | |
521 | * Set *count to the number of bytes copied | |
522 | * If dst == NULL, don't copy, just count bytes. | |
523 | * Only currently called from klcopyinstr. | |
524 | */ | |
525 | ||
526 | ENTRY(copyinstr, TAG_NO_FRAME_USED) | |
527 | mfcr r2,0x10 // save caller's cr3, which we use for flags | |
528 | cmplwi r5,0 // dst==NULL? | |
529 | mr r10,r3 // move high word of 64-bit user address to r10 | |
530 | li r0,0 | |
531 | crset kkString // flag as a string op | |
532 | mr r11,r4 // move low word of 64-bit user address to r11 | |
533 | crmove kkNull,cr0_eq // remember if (dst==NULL) | |
534 | stw r0,0(r7) // initialize #bytes moved | |
535 | crset kkIn // flag as copyin (rather than copyout) | |
536 | b copyJoin1 // skip over the "crclr kkNull" | |
537 | ||
538 | ||
539 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
540 | /* | |
541 | * int | |
542 | * copyout(src, dst, count) | |
543 | * vm_offset_t src; // r3 | |
544 | * addr64_t dst; // r4 and r5 | |
545 | * size_t count; // r6 | |
546 | */ | |
547 | ||
548 | .align 5 | |
549 | .globl EXT(copyout) | |
550 | .globl EXT(copyoutmsg) | |
551 | ||
552 | LEXT(copyout) | |
553 | LEXT(copyoutmsg) | |
554 | ||
555 | #if INSTRUMENT | |
556 | mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout | |
557 | stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it | |
558 | mfspr r12,pmc2 ; INSTRUMENT - Get stamp | |
559 | stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it | |
560 | mfspr r12,pmc3 ; INSTRUMENT - Get stamp | |
561 | stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it | |
562 | mfspr r12,pmc4 ; INSTRUMENT - Get stamp | |
563 | stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it | |
564 | #endif | |
565 | mfcr r2,0x10 // save caller's cr3, which we use for flags | |
566 | mr r10,r4 // move high word of 64-bit user address to r10 | |
567 | crclr kkString // not a string version | |
568 | mr r11,r5 // move low word of 64-bit user address to r11 | |
569 | crclr kkIn // flag as copyout | |
570 | b copyJoin | |
571 | ||
572 | ||
573 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
574 | /* | |
575 | * int | |
576 | * copyin(src, dst, count) | |
577 | * addr64_t src; // r3 and r4 | |
578 | * vm_offset_t dst; // r5 | |
579 | * size_t count; // r6 | |
580 | */ | |
581 | ||
582 | ||
583 | .align 5 | |
584 | .globl EXT(copyin) | |
585 | .globl EXT(copyinmsg) | |
586 | ||
587 | LEXT(copyin) | |
588 | LEXT(copyinmsg) | |
589 | ||
590 | mfcr r2,0x10 // save caller's cr3, which we use for flags | |
591 | mr r10,r3 // move high word of 64-bit user address to r10 | |
592 | crclr kkString // not a string version | |
593 | mr r11,r4 // move low word of 64-bit user address to r11 | |
594 | crset kkIn // flag as copyin | |
595 | ||
596 | ||
597 | // Common code to handle setup for all the copy variants: | |
598 | // r2 = caller's cr3 | |
599 | // r3 = source if copyout | |
600 | // r5 = dest if copyin | |
601 | // r6 = buffer length or count | |
602 | // r7 = count output ptr (if kkString set) | |
603 | // r10 = high word of 64-bit user-space address (source if copyin, dest if copyout) | |
604 | // r11 = low word of 64-bit user-space address | |
605 | // cr3 = kkIn, kkString, kkNull flags | |
606 | ||
607 | copyJoin: | |
608 | crclr kkNull // (dst==NULL) convention not used with this call | |
609 | copyJoin1: // enter from copyinstr with kkNull set | |
610 | mflr r0 // get return address | |
611 | cmplwi r6,0 // buffer length 0? | |
612 | lis r9,0x1000 // r9 <- 0x10000000 (256MB) | |
613 | stw r0,FM_LR_SAVE(r1) // save return | |
614 | cmplw cr1,r6,r9 // buffer length > 256MB ? | |
615 | mfsprg r8,2 // get the features | |
616 | beq-- copyinout_0 // 0 length is degenerate case | |
617 | stwu r1,-kkFrameSize(r1) // set up stack frame | |
618 | stw r2,kkCR3(r1) // save caller's cr3, which we use for flags | |
619 | mtcrf 0x02,r8 // move pf64Bit to cr6 | |
620 | stw r3,kkSource(r1) // save args across MapUserMemoryWindow | |
621 | stw r5,kkDest(r1) | |
622 | stw r6,kkBufSize(r1) | |
623 | crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor | |
624 | stw r7,kkCountPtr(r1) | |
625 | stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr | |
626 | li r31,0 // no mapped ptr yet | |
627 | ||
628 | ||
629 | // Handle buffer length > 256MB. This is an error (ENAMETOOLONG) on copyin and copyout. | |
630 | // The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp | |
631 | // the buffer length to 256MB. This isn't an issue if the string is less than 256MB | |
632 | // (as most are!), but if they are >256MB we eventually return ENAMETOOLONG. This restriction | |
633 | // is due to MapUserMemoryWindow; we don't want to consume more than two segments for | |
634 | // the mapping. | |
635 | ||
636 | ble++ cr1,copyin0 // skip if buffer length <= 256MB | |
637 | bf kkString,copyinout_too_big // error if not string op | |
638 | mr r6,r9 // silently clamp buffer length to 256MB | |
639 | stw r9,kkBufSize(r1) // update saved copy too | |
640 | ||
641 | ||
642 | // Set up thread_recover in case we hit an illegal address. | |
643 | ||
644 | copyin0: | |
645 | mfsprg r8,1 // Get the current thread | |
646 | lis r2,hi16(copyinout_error) | |
647 | ori r2,r2,lo16(copyinout_error) | |
648 | lwz r4,THREAD_RECOVER(r8) | |
649 | lwz r3,ACT_VMMAP(r8) // r3 <- vm_map virtual address | |
650 | stw r2,THREAD_RECOVER(r8) | |
651 | stw r4,kkThrErrJmp(r1) | |
652 | ||
653 | ||
654 | // Map user segment into kernel map, turn on 64-bit mode. At this point: | |
655 | // r3 = vm map | |
656 | // r6 = buffer length | |
657 | // r10/r11 = 64-bit user-space ptr (source if copyin, dest if copyout) | |
658 | // | |
659 | // When we call MapUserMemoryWindow, we pass: | |
660 | // r3 = vm map ptr | |
661 | // r4/r5 = 64-bit user space address as an addr64_t | |
662 | ||
663 | mr r4,r10 // copy user ptr into r4/r5 | |
664 | mr r5,r11 | |
665 | #if INSTRUMENT | |
666 | mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace | |
667 | stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it | |
668 | mfspr r12,pmc2 ; INSTRUMENT - Get stamp | |
669 | stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it | |
670 | mfspr r12,pmc3 ; INSTRUMENT - Get stamp | |
671 | stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it | |
672 | mfspr r12,pmc4 ; INSTRUMENT - Get stamp | |
673 | stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it | |
674 | #endif | |
675 | bl EXT(MapUserMemoryWindow) // get r3/r4 <- 64-bit address in kernel map of user operand | |
676 | #if INSTRUMENT | |
677 | mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace | |
678 | stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it | |
679 | mfspr r12,pmc2 ; INSTRUMENT - Get stamp | |
680 | stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it | |
681 | mfspr r12,pmc3 ; INSTRUMENT - Get stamp | |
682 | stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it | |
683 | mfspr r12,pmc4 ; INSTRUMENT - Get stamp | |
684 | stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it | |
685 | #endif | |
686 | mr r31,r4 // r31 <- mapped ptr into user space (may be 64-bit) | |
687 | bf-- kk64bit,copyin1 // skip if a 32-bit processor | |
688 | ||
689 | rldimi r31,r3,32,0 // slam high-order bits into mapped ptr | |
690 | mfmsr r4 // if 64-bit, turn on SF so we can use returned ptr | |
691 | li r0,1 | |
692 | rldimi r4,r0,63,MSR_SF_BIT // light bit 0 | |
693 | mtmsrd r4 // turn on 64-bit mode | |
694 | isync // wait for mode to change | |
695 | ||
696 | ||
697 | // Load r3-r5, substituting mapped ptr as appropriate. | |
698 | ||
699 | copyin1: | |
700 | lwz r5,kkBufSize(r1) // restore length to copy | |
701 | bf kkIn,copyin2 // skip if copyout | |
702 | lwz r4,kkDest(r1) // copyin: dest is kernel ptr | |
703 | mr r3,r31 // source is mapped ptr | |
704 | b copyin3 | |
705 | copyin2: // handle copyout | |
706 | lwz r3,kkSource(r1) // source is kernel buffer (r3 at entry) | |
707 | mr r4,r31 // dest is mapped ptr into user space | |
708 | ||
709 | ||
710 | // Finally, all set up to copy: | |
711 | // r3 = source ptr (mapped if copyin) | |
712 | // r4 = dest ptr (mapped if copyout) | |
713 | // r5 = length | |
714 | // r31 = mapped ptr returned by MapUserMemoryWindow | |
715 | // cr3 = kkIn, kkString, kk64bit, and kkNull flags | |
716 | ||
717 | copyin3: | |
718 | bt kkString,copyString // handle copyinstr and copyoutstr | |
719 | bl EXT(bcopy) // copyin and copyout: let bcopy do the work | |
720 | li r3,0 // return success | |
721 | ||
722 | ||
723 | // Main exit point for copyin, copyout, copyinstr, and copyoutstr. Also reached | |
724 | // from error recovery if we get a DSI accessing user space. Clear recovery ptr, | |
725 | // and pop off frame. | |
726 | // r3 = 0, EFAULT, or ENAMETOOLONG | |
727 | ||
728 | copyinx: | |
729 | lwz r2,kkCR3(r1) // get callers cr3 | |
730 | mfsprg r6,1 // Get the current thread | |
731 | bf-- kk64bit,copyinx1 // skip if 32-bit processor | |
732 | mfmsr r12 | |
733 | rldicl r12,r12,0,MSR_SF_BIT+1 // if 64-bit processor, turn 64-bit mode off | |
734 | mtmsrd r12 // turn SF off | |
735 | isync // wait for the mode to change | |
736 | copyinx1: | |
737 | lwz r0,FM_LR_SAVE+kkFrameSize(r1) // get return address | |
738 | lwz r31,kkR31Save(r1) // restore callers r31 | |
739 | lwz r4,kkThrErrJmp(r1) // load saved thread recover | |
740 | addi r1,r1,kkFrameSize // pop off our stack frame | |
741 | mtlr r0 | |
742 | stw r4,THREAD_RECOVER(r6) // restore thread recover | |
743 | mtcrf 0x10,r2 // restore cr3 | |
744 | blr | |
745 | ||
746 | ||
747 | /* We get here via the exception handler if an illegal | |
748 | * user memory reference was made. This error handler is used by | |
749 | * copyin, copyout, copyinstr, and copyoutstr. Registers are as | |
750 | * they were at point of fault, so for example cr3 flags are valid. | |
751 | */ | |
752 | ||
753 | copyinout_error: | |
754 | li r3,EFAULT // return error | |
755 | b copyinx | |
756 | ||
757 | copyinout_0: // degenerate case: 0-length copy | |
758 | mtcrf 0x10,r2 // restore cr3 | |
759 | li r3,0 // return success | |
760 | blr | |
761 | ||
762 | copyinout_too_big: // degenerate case | |
763 | mtcrf 0x10,r2 // restore cr3 | |
764 | lwz r1,0(r1) // pop off stack frame | |
765 | li r3,ENAMETOOLONG | |
766 | blr | |
767 | ||
768 | ||
769 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
770 | // Handle copyinstr and copyoutstr. At this point the stack frame is set up, | |
771 | // the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode | |
772 | // if necessary, and: | |
773 | // r3 = source ptr, mapped if copyinstr | |
774 | // r4 = dest ptr, mapped if copyoutstr | |
775 | // r5 = buffer length | |
776 | // r31 = mapped ptr returned by MapUserMemoryWindow | |
777 | // cr3 = kkIn, kkString, kkNull, and kk64bit flags | |
778 | // We do word copies unless the buffer is very short, then use a byte copy loop | |
779 | // for the leftovers if necessary. The crossover at which the word loop becomes | |
780 | // faster is about seven bytes, counting the zero. | |
781 | // | |
782 | // We first must word-align the source ptr, in order to avoid taking a spurious | |
783 | // page fault. | |
784 | ||
785 | copyString: | |
786 | cmplwi cr1,r5,15 // is buffer very short? | |
787 | mr r12,r3 // remember ptr to 1st source byte | |
788 | mtctr r5 // assuming short, set up loop count for bytes | |
789 | blt-- cr1,copyinstr8 // too short for word loop | |
790 | rlwinm r2,r3,0,0x3 // get byte offset of 1st byte within word | |
791 | rlwinm r9,r3,3,0x18 // get bit offset of 1st byte within word | |
792 | li r7,-1 | |
793 | sub r3,r3,r2 // word-align source address | |
794 | add r6,r5,r2 // get length starting at byte 0 in word | |
795 | srw r7,r7,r9 // get mask for bytes in first word | |
796 | srwi r0,r6,2 // get #words in buffer | |
797 | lwz r5,0(r3) // get aligned word with first source byte | |
798 | lis r10,hi16(0xFEFEFEFF) // load magic constants into r10 and r11 | |
799 | lis r11,hi16(0x80808080) | |
800 | mtctr r0 // set up word loop count | |
801 | addi r3,r3,4 // advance past the source word | |
802 | ori r10,r10,lo16(0xFEFEFEFF) | |
803 | ori r11,r11,lo16(0x80808080) | |
804 | orc r8,r5,r7 // map bytes preceeding first source byte into 0xFF | |
805 | bt-- kkNull,copyinstr5enter // enter loop that just counts | |
806 | ||
807 | // Special case 1st word, which has been 0xFF filled on left. Note that we use | |
808 | // "and.", even though we execute both in 32 and 64-bit mode. This is OK. | |
809 | ||
810 | slw r5,r5,r9 // left justify payload bytes | |
811 | add r9,r10,r8 // r9 = data + 0xFEFEFEFF | |
812 | andc r7,r11,r8 // r7 = ~data & 0x80808080 | |
813 | subfic r0,r2,4 // get r0 <- #payload bytes in 1st word | |
814 | and. r7,r9,r7 // if r7==0, then all bytes in r8 are nonzero | |
815 | stw r5,0(r4) // copy payload bytes to dest buffer | |
816 | add r4,r4,r0 // then point to next byte in dest buffer | |
817 | bdnzt cr0_eq,copyinstr6 // use loop that copies if 0 not found | |
818 | ||
819 | b copyinstr7 // 0 found (buffer can't be full) | |
820 | ||
821 | ||
822 | // Word loop(s). They do a word-parallel search for 0s, using the following | |
823 | // inobvious but very efficient test: | |
824 | // y = data + 0xFEFEFEFF | |
825 | // z = ~data & 0x80808080 | |
826 | // If (y & z)==0, then all bytes in dataword are nonzero. There are two copies | |
827 | // of this loop, one that just counts and another that copies. | |
828 | // r3 = ptr to next word of source (word aligned) | |
829 | // r4 = ptr to next byte in buffer | |
830 | // r6 = original buffer length (adjusted to be word origin) | |
831 | // r10 = 0xFEFEFEFE | |
832 | // r11 = 0x80808080 | |
833 | // r12 = ptr to 1st source byte (used to determine string length) | |
834 | ||
835 | .align 5 // align inner loops for speed | |
836 | copyinstr5: // version that counts but does not copy | |
837 | lwz r8,0(r3) // get next word of source | |
838 | addi r3,r3,4 // advance past it | |
839 | copyinstr5enter: | |
840 | add r9,r10,r8 // r9 = data + 0xFEFEFEFF | |
841 | andc r7,r11,r8 // r7 = ~data & 0x80808080 | |
842 | and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode) | |
843 | bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes in r8 are nonzero | |
844 | ||
845 | b copyinstr7 | |
846 | ||
847 | .align 5 // align inner loops for speed | |
848 | copyinstr6: // version that counts and copies | |
849 | lwz r8,0(r3) // get next word of source | |
850 | addi r3,r3,4 // advance past it | |
851 | addi r4,r4,4 // increment dest ptr while we wait for data | |
852 | add r9,r10,r8 // r9 = data + 0xFEFEFEFF | |
853 | andc r7,r11,r8 // r7 = ~data & 0x80808080 | |
854 | and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode) | |
855 | stw r8,-4(r4) // pack all 4 bytes into buffer | |
856 | bdnzt cr0_eq,copyinstr6 // if r7==0, then all bytes are nonzero | |
857 | ||
858 | ||
859 | // Either 0 found or buffer filled. The above algorithm has mapped nonzero bytes to 0 | |
860 | // and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also | |
861 | // mapped to 0x80. We must mask out these false hits before searching for an 0x80 byte. | |
862 | // r3 = word aligned ptr to next word of source (ie, r8==mem(r3-4)) | |
863 | // r6 = original buffer length (adjusted to be word origin) | |
864 | // r7 = computed vector of 0x00 and 0x80 bytes | |
865 | // r8 = original source word, coming from -4(r3), possibly padded with 0xFFs on left if 1st word | |
866 | // r12 = ptr to 1st source byte (used to determine string length) | |
867 | // cr0 = beq set iff 0 not found | |
868 | ||
869 | copyinstr7: | |
870 | rlwinm r2,r8,7,0,31 // move 0x01 bits to 0x80 position | |
871 | rlwinm r6,r6,0,0x3 // mask down to partial byte count in last word | |
872 | andc r7,r7,r2 // turn off false hits from 0x0100 worst case | |
873 | crnot kkZero,cr0_eq // 0 found iff cr0_eq is off | |
874 | srwi r7,r7,8 // we want to count the 0 as a byte xferred | |
875 | cmpwi r6,0 // any bytes left over in last word? | |
876 | cntlzw r7,r7 // now we can find the 0 byte (ie, the 0x80) | |
877 | subi r3,r3,4 // back up r3 to point to 1st byte in r8 | |
878 | srwi r7,r7,3 // convert 8,16,24,32 to 1,2,3,4 | |
879 | add r3,r3,r7 // now r3 points one past 0 byte, or at 1st byte not xferred | |
880 | bt++ kkZero,copyinstr10 // 0 found, so done | |
881 | ||
882 | beq copyinstr10 // r6==0, so buffer truly full | |
883 | mtctr r6 // 0 not found, loop over r6 bytes | |
884 | b copyinstr8 // enter byte loop for last 1-3 leftover bytes | |
885 | ||
886 | ||
887 | // Byte loop. This is used for very small buffers and for the odd bytes left over | |
888 | // after searching and copying words at a time. | |
889 | // r3 = ptr to next byte of source | |
890 | // r4 = ptr to next dest byte | |
891 | // r12 = ptr to first byte of source | |
892 | // ctr = count of bytes to check | |
893 | ||
894 | .align 5 // align inner loops for speed | |
895 | copyinstr8: // loop over bytes of source | |
896 | lbz r0,0(r3) // get next byte of source | |
897 | addi r3,r3,1 | |
898 | addi r4,r4,1 // increment dest addr whether we store or not | |
899 | cmpwi r0,0 // the 0? | |
900 | bt-- kkNull,copyinstr9 // don't store if copyinstr with NULL ptr | |
901 | stb r0,-1(r4) | |
902 | copyinstr9: | |
903 | bdnzf cr0_eq,copyinstr8 // loop if byte not 0 and more room in buffer | |
904 | ||
905 | crmove kkZero,cr0_eq // remember if 0 found or buffer filled | |
906 | ||
907 | ||
908 | // Buffer filled or 0 found. Unwind and return. | |
909 | // r3 = ptr to 1st source byte not transferred | |
910 | // r12 = ptr to 1st source byte | |
911 | // r31 = mapped ptr returned by MapUserMemoryWindow | |
912 | // cr3 = kkZero set iff 0 found | |
913 | ||
914 | copyinstr10: | |
915 | lwz r9,kkCountPtr(r1) // get ptr to place to store count of bytes moved | |
916 | sub r2,r3,r12 // compute #bytes copied (including the 0) | |
917 | li r3,0 // assume success return status | |
918 | stw r2,0(r9) // store #bytes moved | |
919 | bt++ kkZero,copyinx // we did find the 0 so return 0 | |
920 | li r3,ENAMETOOLONG // buffer filled | |
921 | b copyinx // join main exit routine | |
922 | ||
923 | //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> | |
924 | /* | |
925 | * int | |
926 | * copypv(source, sink, size, which) | |
927 | * addr64_t src; // r3 and r4 | |
928 | * addr64_t dst; // r5 and r6 | |
929 | * size_t size; // r7 | |
930 | * int which; // r8 | |
931 | * | |
932 | * Operand size bytes are copied from operand src into operand dst. The source and | |
933 | * destination operand addresses are given as addr64_t, and may designate starting | |
934 | * locations in physical or virtual memory in any combination except where both are | |
935 | * virtual. Virtual memory locations may be in either the kernel or the current thread's | |
936 | * address space. Operand size may be up to 256MB. | |
937 | * | |
938 | * Operation is controlled by operand which, which offers these options: | |
939 | * cppvPsrc : source operand is (1) physical or (0) virtual | |
940 | * cppvPsnk : destination operand is (1) physical or (0) virtual | |
941 | * cppvKmap : virtual operand is in (1) kernel or (0) current thread | |
942 | * cppvFsnk : (1) flush destination before and after transfer | |
943 | * cppvFsrc : (1) flush source before and after transfer | |
944 | * cppvNoModSnk : (1) don't set source operand's changed bit(s) | |
945 | * cppvNoRefSrc : (1) don't set destination operand's referenced bit(s) | |
946 | * | |
947 | * Implementation is now split into this new 64-bit path and the old path, hw_copypv_32(). | |
948 | * This section describes the operation of the new 64-bit path. | |
949 | * | |
950 | * The 64-bit path utilizes the more capacious 64-bit kernel address space to create a | |
951 | * window in the kernel address space into all of physical RAM plus the I/O hole. Since | |
952 | * the window's mappings specify the proper access policies for the underlying memory, | |
953 | * the new path does not have to flush caches to avoid a cache paradox, so cppvFsnk | |
954 | * and cppvFsrc are ignored. Physical operand adresses are relocated into the physical | |
955 | * memory window, and are accessed with data relocation on. Virtual addresses are either | |
956 | * within the kernel, or are mapped into the kernel address space through the user memory | |
957 | * window. Because accesses to a virtual operand are performed with data relocation on, | |
958 | * the new path does not have to translate the address, disable/enable interrupts, lock | |
959 | * the mapping, or update referenced and changed bits. | |
960 | * | |
961 | * The IBM 970 (a.k.a. G5) processor treats real-mode accesses as guarded, so there is | |
962 | * a substantial performance penalty for copypv operating in real mode. Utilizing the | |
963 | * new 64-bit path, transfer performance increases >100% on the G5. | |
964 | * | |
965 | * The attentive reader may notice that mtmsrd ops are not followed by isync ops as | |
966 | * might be expected. The 970 follows PowerPC architecture version 2.01, which defines | |
967 | * mtmsrd with L=0 as a context synchronizing op, so a following isync is no longer | |
968 | * required. | |
969 | * | |
970 | * To keep things exciting, we develop 64-bit values in non-volatiles, but we also need | |
971 | * to call 32-bit functions, which would lead to the high-order 32 bits of our values | |
972 | * getting clobbered unless we do something special. So, we preserve our 64-bit non-volatiles | |
973 | * in our own stack frame across calls to 32-bit functions. | |
974 | * | |
975 | */ | |
976 | ||
977 | // Map operand which bits into non-volatile CR2 and CR3 bits. | |
978 | #define whichAlign ((3+1)*4) | |
979 | #define whichMask 0x007F0000 | |
980 | #define pvPsnk (cppvPsnkb - whichAlign) | |
981 | #define pvPsrc (cppvPsrcb - whichAlign) | |
982 | #define pvFsnk (cppvFsnkb - whichAlign) | |
983 | #define pvFsrc (cppvFsrcb - whichAlign) | |
984 | #define pvNoModSnk (cppvNoModSnkb - whichAlign) | |
985 | #define pvNoRefSrc (cppvNoRefSrcb - whichAlign) | |
986 | #define pvKmap (cppvKmapb - whichAlign) | |
987 | #define pvNoCache cr2_lt | |
988 | ||
989 | .align 5 | |
990 | .globl EXT(copypv) | |
991 | ||
992 | LEXT(copypv) | |
993 | mfsprg r10,2 // get feature flags | |
994 | mtcrf 0x02,r10 // we need to test pf64Bit | |
995 | bt++ pf64Bitb,copypv_64 // skip if 64-bit processor (only they take hint) | |
996 | ||
997 | b EXT(hw_copypv_32) // carry on with 32-bit copypv | |
998 | ||
999 | // Push a 32-bit ABI-compliant stack frame and preserve all non-volatiles that we'll clobber. | |
1000 | copypv_64: | |
1001 | mfsprg r9,1 // get current thread | |
1002 | stwu r1,-(FM_ALIGN((31-26+11)*4)+FM_SIZE)(r1) | |
1003 | // allocate stack frame and link it | |
1004 | mflr r0 // get return address | |
1005 | mfcr r10 // get cr2 and cr3 | |
1006 | lwz r12,THREAD_RECOVER(r9) // get error callback | |
1007 | stw r26,FM_ARG0+0x00(r1) // save non-volatile r26 | |
1008 | stw r27,FM_ARG0+0x04(r1) // save non-volatile r27 | |
1009 | stw r28,FM_ARG0+0x08(r1) // save non-volatile r28 | |
1010 | stw r29,FM_ARG0+0x0C(r1) // save non-volatile r29 | |
1011 | stw r30,FM_ARG0+0x10(r1) // save non-volatile r30 | |
1012 | stw r31,FM_ARG0+0x14(r1) // save non-volatile r31 | |
1013 | stw r12,FM_ARG0+0x20(r1) // save error callback | |
1014 | stw r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1) | |
1015 | // save return address | |
1016 | stw r10,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1) | |
1017 | // save non-volatile cr2 and cr3 | |
1018 | ||
1019 | // Non-volatile register usage in this routine is: | |
1020 | // r26: saved msr image | |
1021 | // r27: current pmap_t / virtual source address | |
1022 | // r28: destination virtual address | |
1023 | // r29: source address | |
1024 | // r30: destination address | |
1025 | // r31: byte count to copy | |
1026 | // cr2/3: parameter 'which' bits | |
1027 | ||
1028 | rlwinm r8,r8,whichAlign,whichMask // align and mask which bits | |
1029 | mr r31,r7 // copy size to somewhere non-volatile | |
1030 | mtcrf 0x20,r8 // insert which bits into cr2 and cr3 | |
1031 | mtcrf 0x10,r8 // insert which bits into cr2 and cr3 | |
1032 | rlwinm r29,r3,0,1,0 // form source address high-order bits | |
1033 | rlwinm r30,r5,0,1,0 // form destination address high-order bits | |
1034 | rlwimi r29,r4,0,0,31 // form source address low-order bits | |
1035 | rlwimi r30,r6,0,0,31 // form destination address low-order bits | |
1036 | crand cr7_lt,pvPsnk,pvPsrc // are both operand addresses physical? | |
1037 | cntlzw r0,r31 // count leading zeroes in byte count | |
1038 | cror cr7_eq,pvPsnk,pvPsrc // cr7_eq <- source or destination is physical | |
1039 | bf-- cr7_eq,copypv_einval // both operands may not be virtual | |
1040 | cmplwi r0,4 // byte count greater than or equal 256M (2**28)? | |
1041 | blt-- copypv_einval // byte count too big, give EINVAL | |
1042 | cmplwi r31,0 // byte count zero? | |
1043 | beq-- copypv_zero // early out | |
1044 | bt cr7_lt,copypv_phys // both operand addresses are physical | |
1045 | mr r28,r30 // assume destination is virtual | |
1046 | bf pvPsnk,copypv_dv // is destination virtual? | |
1047 | mr r28,r29 // no, so source must be virtual | |
1048 | copypv_dv: | |
1049 | lis r27,ha16(EXT(kernel_pmap)) // get kernel's pmap_t *, high-order | |
1050 | lwz r27,lo16(EXT(kernel_pmap))(r27) // get kernel's pmap_t | |
1051 | bt pvKmap,copypv_kern // virtual address in kernel map? | |
1052 | lwz r3,ACT_VMMAP(r9) // get user's vm_map * | |
1053 | rldicl r4,r28,32,32 // r4, r5 <- addr64_t virtual address | |
1054 | rldicl r5,r28,0,32 | |
1055 | std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit call | |
1056 | std r30,FM_ARG0+0x38(r1) // preserve 64-bit r30 across 32-bit call | |
1057 | bl EXT(MapUserMemoryWindow) // map slice of user space into kernel space | |
1058 | ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29 | |
1059 | ld r30,FM_ARG0+0x38(r1) // restore 64-bit r30 | |
1060 | rlwinm r28,r3,0,1,0 // convert relocated addr64_t virtual address | |
1061 | rlwimi r28,r4,0,0,31 // into a single 64-bit scalar | |
1062 | copypv_kern: | |
1063 | ||
1064 | // Since we'll be accessing the virtual operand with data-relocation on, we won't need to | |
1065 | // update the referenced and changed bits manually after the copy. So, force the appropriate | |
1066 | // flag bit on for the virtual operand. | |
1067 | crorc pvNoModSnk,pvNoModSnk,pvPsnk // for virtual dest, let hardware do ref/chg bits | |
1068 | crorc pvNoRefSrc,pvNoRefSrc,pvPsrc // for virtual source, let hardware do ref bit | |
1069 | ||
1070 | // We'll be finding a mapping and looking at, so we need to disable 'rupts. | |
1071 | lis r0,hi16(MASK(MSR_VEC)) // get vector mask | |
1072 | ori r0,r0,lo16(MASK(MSR_FP)) // insert fp mask | |
1073 | mfmsr r26 // save current msr | |
1074 | andc r26,r26,r0 // turn off VEC and FP in saved copy | |
1075 | ori r0,r0,lo16(MASK(MSR_EE)) // add EE to our mask | |
1076 | andc r0,r26,r0 // disable EE in our new msr image | |
1077 | mtmsrd r0 // introduce new msr image | |
1078 | ||
1079 | // We're now holding the virtual operand's pmap_t in r27 and its virtual address in r28. We now | |
1080 | // try to find a mapping corresponding to this address in order to determine whether the address | |
1081 | // is cacheable. If we don't find a mapping, we can safely assume that the operand is cacheable | |
1082 | // (a non-cacheable operand must be a block mapping, which will always exist); otherwise, we | |
1083 | // examine the mapping's caching-inhibited bit. | |
1084 | mr r3,r27 // r3 <- pmap_t pmap | |
1085 | rldicl r4,r28,32,32 // r4, r5 <- addr64_t va | |
1086 | rldicl r5,r28,0,32 | |
1087 | la r6,FM_ARG0+0x18(r1) // r6 <- addr64_t *nextva | |
1088 | li r7,1 // r7 <- int full, search nested mappings | |
1089 | std r26,FM_ARG0+0x28(r1) // preserve 64-bit r26 across 32-bit calls | |
1090 | std r28,FM_ARG0+0x30(r1) // preserve 64-bit r28 across 32-bit calls | |
1091 | std r29,FM_ARG0+0x38(r1) // preserve 64-bit r29 across 32-bit calls | |
1092 | std r30,FM_ARG0+0x40(r1) // preserve 64-bit r30 across 32-bit calls | |
1093 | bl EXT(mapping_find) // find mapping for virtual operand | |
1094 | mr. r3,r3 // did we find it? | |
1095 | beq copypv_nomapping // nope, so we'll assume it's cacheable | |
1096 | lwz r4,mpVAddr+4(r3) // get low half of virtual addr for hw flags | |
1097 | rlwinm. r4,r4,0,mpIb-32,mpIb-32 // caching-inhibited bit set? | |
1098 | crnot pvNoCache,cr0_eq // if it is, use bcopy_nc | |
1099 | bl EXT(mapping_drop_busy) // drop busy on the mapping | |
1100 | copypv_nomapping: | |
1101 | ld r26,FM_ARG0+0x28(r1) // restore 64-bit r26 | |
1102 | ld r28,FM_ARG0+0x30(r1) // restore 64-bit r28 | |
1103 | ld r29,FM_ARG0+0x38(r1) // restore 64-bit r29 | |
1104 | ld r30,FM_ARG0+0x40(r1) // restore 64-bit r30 | |
1105 | mtmsrd r26 // restore msr to it's previous state | |
1106 | ||
1107 | // Set both the source and destination virtual addresses to the virtual operand's address -- | |
1108 | // we'll overlay one of them with the physical operand's address. | |
1109 | mr r27,r28 // make virtual operand BOTH source AND destination | |
1110 | ||
1111 | // Now we're ready to relocate the physical operand address(es) into the physical memory window. | |
1112 | // Recall that we've mapped physical memory (including the I/O hole) into the kernel's address | |
1113 | // space somewhere at or over the 2**32 line. If one or both of the operands are in the I/O hole, | |
1114 | // we'll set the pvNoCache flag, forcing use of non-caching bcopy_nc() to do the copy. | |
1115 | copypv_phys: | |
1116 | ld r6,lgPMWvaddr(0) // get physical memory window virtual address | |
1117 | bf pvPsnk,copypv_dstvirt // is destination address virtual? | |
1118 | cntlzd r4,r30 // count leading zeros in destination address | |
1119 | cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1) | |
1120 | cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations | |
1121 | add r28,r30,r6 // relocate physical destination into physical window | |
1122 | copypv_dstvirt: | |
1123 | bf pvPsrc,copypv_srcvirt // is source address virtual? | |
1124 | cntlzd r4,r29 // count leading zeros in source address | |
1125 | cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1) | |
1126 | cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations | |
1127 | add r27,r29,r6 // relocate physical source into physical window | |
1128 | copypv_srcvirt: | |
1129 | ||
1130 | // Once the copy is under way (bcopy or bcopy_nc), we will want to get control if anything | |
1131 | // funny happens during the copy. So, we set a pointer to our error handler in the per-thread | |
1132 | // control block. | |
1133 | mfsprg r8,1 // get current threads stuff | |
1134 | lis r3,hi16(copypv_error) // get our error callback's address, high | |
1135 | ori r3,r3,lo16(copypv_error) // get our error callback's address, low | |
1136 | stw r3,THREAD_RECOVER(r8) // set our error callback | |
1137 | ||
1138 | // Since our physical operand(s) are relocated at or above the 2**32 line, we must enter | |
1139 | // 64-bit mode. | |
1140 | li r0,1 // get a handy one bit | |
1141 | mfmsr r3 // get current msr | |
1142 | rldimi r3,r0,63,MSR_SF_BIT // set SF bit on in our msr copy | |
1143 | mtmsrd r3 // enter 64-bit mode | |
1144 | ||
1145 | // If requested, flush data cache | |
1146 | // Note that we don't flush, the code is being saved "just in case". | |
1147 | #if 0 | |
1148 | bf pvFsrc,copypv_nfs // do we flush the source? | |
1149 | rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address | |
1150 | rldicl r4,r27,0,32 | |
1151 | mr r5,r31 // r5 <- count (in bytes) | |
1152 | li r6,0 // r6 <- boolean phys (false, not physical) | |
1153 | bl EXT(flush_dcache) // flush the source operand | |
1154 | copypv_nfs: | |
1155 | bf pvFsnk,copypv_nfdx // do we flush the destination? | |
1156 | rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address | |
1157 | rldicl r4,r28,0,32 | |
1158 | mr r5,r31 // r5 <- count (in bytes) | |
1159 | li r6,0 // r6 <- boolean phys (false, not physical) | |
1160 | bl EXT(flush_dcache) // flush the destination operand | |
1161 | copypv_nfdx: | |
1162 | #endif | |
1163 | ||
1164 | // Call bcopy or bcopy_nc to perform the copy. | |
1165 | mr r3,r27 // r3 <- source virtual address | |
1166 | mr r4,r28 // r4 <- destination virtual address | |
1167 | mr r5,r31 // r5 <- bytes to copy | |
1168 | bt pvNoCache,copypv_nc // take non-caching route | |
1169 | bl EXT(bcopy) // call bcopy to do the copying | |
1170 | b copypv_copydone | |
1171 | copypv_nc: | |
1172 | bl EXT(bcopy_nc) // call bcopy_nc to do the copying | |
1173 | copypv_copydone: | |
1174 | ||
1175 | // If requested, flush data cache | |
1176 | // Note that we don't flush, the code is being saved "just in case". | |
1177 | #if 0 | |
1178 | bf pvFsrc,copypv_nfsx // do we flush the source? | |
1179 | rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address | |
1180 | rldicl r4,r27,0,32 | |
1181 | mr r5,r31 // r5 <- count (in bytes) | |
1182 | li r6,0 // r6 <- boolean phys (false, not physical) | |
1183 | bl EXT(flush_dcache) // flush the source operand | |
1184 | copypv_nfsx: | |
1185 | bf pvFsnk,copypv_nfd // do we flush the destination? | |
1186 | rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address | |
1187 | rldicl r4,r28,0,32 | |
1188 | mr r5,r31 // r5 <- count (in bytes) | |
1189 | li r6,0 // r6 <- boolean phys (false, not physical) | |
1190 | bl EXT(flush_dcache) // flush the destination operand | |
1191 | copypv_nfd: | |
1192 | #endif | |
1193 | ||
1194 | // Leave 64-bit mode. | |
1195 | mfmsr r3 // get current msr | |
1196 | rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy | |
1197 | mtmsrd r3 // leave 64-bit mode | |
1198 | ||
1199 | // If requested, set ref/chg on source/dest physical operand(s). It is possible that copy is | |
1200 | // from/to a RAM disk situated outside of mapped physical RAM, so we check each page by calling | |
1201 | // mapping_phys_lookup() before we try to set its ref/chg bits; otherwise, we might panic. | |
1202 | // Note that this code is page-size sensitive, so it should probably be a part of our low-level | |
1203 | // code in hw_vm.s. | |
1204 | bt pvNoModSnk,copypv_nomod // skip destination update if not requested | |
1205 | std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit calls | |
1206 | li r26,1 // r26 <- 4K-page count | |
1207 | mr r27,r31 // r27 <- byte count | |
1208 | rlwinm r3,r30,0,20,31 // does destination cross a page boundary? | |
1209 | subfic r3,r3,4096 // | |
1210 | cmplw r3,r27 // | |
1211 | blt copypv_modnox // skip if not crossing case | |
1212 | subf r27,r3,r27 // r27 <- byte count less initial fragment | |
1213 | addi r26,r26,1 // increment page count | |
1214 | copypv_modnox: | |
1215 | srdi r3,r27,12 // pages to update (not including crosser) | |
1216 | add r26,r26,r3 // add in crosser | |
1217 | srdi r27,r30,12 // r27 <- destination page number | |
1218 | copypv_modloop: | |
1219 | mr r3,r27 // r3 <- destination page number | |
1220 | la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex | |
1221 | bl EXT(mapping_phys_lookup) // see if page is really there | |
1222 | mr. r3,r3 // is it? | |
1223 | beq-- copypv_modend // nope, break out of modify loop | |
1224 | mr r3,r27 // r3 <- destination page number | |
1225 | bl EXT(mapping_set_mod) // set page changed status | |
1226 | subi r26,r26,1 // decrement page count | |
1227 | cmpwi r26,0 // done yet? | |
1228 | bgt copypv_modloop // nope, iterate | |
1229 | copypv_modend: | |
1230 | ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29 | |
1231 | copypv_nomod: | |
1232 | bt pvNoRefSrc,copypv_done // skip source update if not requested | |
1233 | copypv_debugref: | |
1234 | li r26,1 // r26 <- 4K-page count | |
1235 | mr r27,r31 // r27 <- byte count | |
1236 | rlwinm r3,r29,0,20,31 // does source cross a page boundary? | |
1237 | subfic r3,r3,4096 // | |
1238 | cmplw r3,r27 // | |
1239 | blt copypv_refnox // skip if not crossing case | |
1240 | subf r27,r3,r27 // r27 <- byte count less initial fragment | |
1241 | addi r26,r26,1 // increment page count | |
1242 | copypv_refnox: | |
1243 | srdi r3,r27,12 // pages to update (not including crosser) | |
1244 | add r26,r26,r3 // add in crosser | |
1245 | srdi r27,r29,12 // r27 <- source page number | |
1246 | copypv_refloop: | |
1247 | mr r3,r27 // r3 <- source page number | |
1248 | la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex | |
1249 | bl EXT(mapping_phys_lookup) // see if page is really there | |
1250 | mr. r3,r3 // is it? | |
1251 | beq-- copypv_done // nope, break out of modify loop | |
1252 | mr r3,r27 // r3 <- source page number | |
1253 | bl EXT(mapping_set_ref) // set page referenced status | |
1254 | subi r26,r26,1 // decrement page count | |
1255 | cmpwi r26,0 // done yet? | |
1256 | bgt copypv_refloop // nope, iterate | |
1257 | ||
1258 | // Return, indicating success. | |
1259 | copypv_done: | |
1260 | copypv_zero: | |
1261 | li r3,0 // our efforts were crowned with success | |
1262 | ||
1263 | // Pop frame, restore caller's non-volatiles, clear recovery routine pointer. | |
1264 | copypv_return: | |
1265 | mfsprg r9,1 // get current threads stuff | |
1266 | lwz r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1) | |
1267 | // get return address | |
1268 | lwz r4,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1) | |
1269 | // get non-volatile cr2 and cr3 | |
1270 | lwz r26,FM_ARG0+0x00(r1) // restore non-volatile r26 | |
1271 | lwz r27,FM_ARG0+0x04(r1) // restore non-volatile r27 | |
1272 | mtlr r0 // restore return address | |
1273 | lwz r28,FM_ARG0+0x08(r1) // restore non-volatile r28 | |
1274 | mtcrf 0x20,r4 // restore non-volatile cr2 | |
1275 | mtcrf 0x10,r4 // restore non-volatile cr3 | |
1276 | lwz r11,FM_ARG0+0x20(r1) // save error callback | |
1277 | lwz r29,FM_ARG0+0x0C(r1) // restore non-volatile r29 | |
1278 | lwz r30,FM_ARG0+0x10(r1) // restore non-volatile r30 | |
1279 | lwz r31,FM_ARG0+0x14(r1) // restore non-volatile r31 | |
1280 | stw r11,THREAD_RECOVER(r9) // restore our error callback | |
1281 | lwz r1,0(r1) // release stack frame | |
1282 | ||
1283 | blr // y'all come back now | |
1284 | ||
1285 | // Invalid argument handler. | |
1286 | copypv_einval: | |
1287 | li r3,EINVAL // invalid argument | |
1288 | b copypv_return // return | |
1289 | ||
1290 | // Error encountered during bcopy or bcopy_nc. | |
1291 | copypv_error: | |
1292 | mfmsr r3 // get current msr | |
1293 | rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy | |
1294 | mtmsrd r3 // leave 64-bit mode | |
1295 | li r3,EFAULT // it was all his fault | |
1296 | b copypv_return // return |