]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/copyio.c
xnu-1699.26.8.tar.gz
[apple/xnu.git] / osfmk / i386 / copyio.c
1 /*
2 * Copyright (c) 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <mach_assert.h>
29 #include <sys/errno.h>
30 #include <i386/param.h>
31 #include <i386/misc_protos.h>
32 #include <i386/cpu_data.h>
33 #include <i386/machine_routines.h>
34 #include <vm/pmap.h>
35 #include <vm/vm_map.h>
36 #include <vm/vm_kern.h>
37 #include <vm/vm_fault.h>
38
39 #include <sys/kdebug.h>
40
41 /*
42 * the copy engine has the following characteristics
43 * - copyio handles copies to/from user or kernel space
44 * - copypv deals with physical or virtual addresses
45 *
46 * implementation details as follows
47 * - a cache of up to NCOPY_WINDOWS is maintained per thread for
48 * access of user virutal space
49 * - the window size is determined by the amount of virtual space
50 * that can be mapped by a single page table
51 * - the mapping is done by copying the page table pointer from
52 * the user's directory entry corresponding to the window's
53 * address in user space to the directory entry corresponding
54 * to the window slot in the kernel's address space
55 * - the set of mappings is preserved across context switches,
56 * so the copy can run with pre-emption enabled
57 * - there is a gdt entry set up to anchor the kernel window on
58 * each processor
59 * - the copies are done using the selector corresponding to the
60 * gdt entry
61 * - the addresses corresponding to the user virtual address are
62 * relative to the beginning of the window being used to map
63 * that region... thus the thread can be pre-empted and switched
64 * to a different processor while in the midst of a copy
65 * - the window caches must be invalidated if the pmap changes out
66 * from under the thread... this can happen during vfork/exec...
67 * inval_copy_windows is the invalidation routine to be used
68 * - the copyio engine has 4 different states associated with it
69 * that allows for lazy tlb flushes and the ability to avoid
70 * a flush all together if we've just come from user space
71 * the 4 states are as follows...
72 *
73 * WINDOWS_OPENED - set by copyio to indicate to the context
74 * switch code that it is necessary to do a tlbflush after
75 * switching the windows since we're in the middle of a copy
76 *
77 * WINDOWS_CLOSED - set by copyio to indicate that it's done
78 * using the windows, so that the context switch code need
79 * not do the tlbflush... instead it will set the state to...
80 *
81 * WINDOWS_DIRTY - set by the context switch code to indicate
82 * to the copy engine that it is responsible for doing a
83 * tlbflush before using the windows again... it's also
84 * set by the inval_copy_windows routine to indicate the
85 * same responsibility.
86 *
87 * WINDOWS_CLEAN - set by the return to user path to indicate
88 * that a tlbflush has happened and that there is no need
89 * for copyio to do another when it is entered next...
90 *
91 * - a window for mapping single physical pages is provided for copypv
92 * - this window is maintained across context switches and has the
93 * same characteristics as the user space windows w/r to pre-emption
94 */
95
96 extern int copyout_user(const char *, vm_offset_t, vm_size_t);
97 extern int copyout_kern(const char *, vm_offset_t, vm_size_t);
98 extern int copyin_user(const vm_offset_t, char *, vm_size_t);
99 extern int copyin_kern(const vm_offset_t, char *, vm_size_t);
100 extern int copyoutphys_user(const char *, vm_offset_t, vm_size_t);
101 extern int copyoutphys_kern(const char *, vm_offset_t, vm_size_t);
102 extern int copyinphys_user(const vm_offset_t, char *, vm_size_t);
103 extern int copyinphys_kern(const vm_offset_t, char *, vm_size_t);
104 extern int copyinstr_user(const vm_offset_t, char *, vm_size_t, vm_size_t *);
105 extern int copyinstr_kern(const vm_offset_t, char *, vm_size_t, vm_size_t *);
106
107 static int copyio(int, user_addr_t, char *, vm_size_t, vm_size_t *, int);
108 static int copyio_phys(addr64_t, addr64_t, vm_size_t, int);
109
110
111 #define COPYIN 0
112 #define COPYOUT 1
113 #define COPYINSTR 2
114 #define COPYINPHYS 3
115 #define COPYOUTPHYS 4
116
117 void inval_copy_windows(thread_t thread)
118 {
119 int i;
120
121 for (i = 0; i < NCOPY_WINDOWS; i++) {
122 thread->machine.copy_window[i].user_base = -1;
123 }
124 thread->machine.nxt_window = 0;
125 thread->machine.copyio_state = WINDOWS_DIRTY;
126
127 KERNEL_DEBUG(0xeff70058 | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), (int)thread->map, 0, 0, 0);
128 }
129
130
131 static int
132 copyio(int copy_type, user_addr_t user_addr, char *kernel_addr,
133 vm_size_t nbytes, vm_size_t *lencopied, int use_kernel_map)
134 {
135 thread_t thread;
136 pmap_t pmap;
137 pt_entry_t *updp;
138 pt_entry_t *kpdp;
139 user_addr_t user_base;
140 vm_offset_t user_offset;
141 vm_offset_t kern_vaddr;
142 vm_size_t cnt;
143 vm_size_t bytes_copied;
144 int error = 0;
145 int window_index;
146 int copyio_state;
147 boolean_t istate;
148 #if KDEBUG
149 int debug_type = 0xeff70010;
150 debug_type += (copy_type << 2);
151 #endif
152
153 thread = current_thread();
154
155 KERNEL_DEBUG(debug_type | DBG_FUNC_START, (int)(user_addr >> 32), (int)user_addr,
156 (int)nbytes, thread->machine.copyio_state, 0);
157
158 if (nbytes == 0) {
159 KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)user_addr,
160 (unsigned)kernel_addr, (unsigned)nbytes, 0, 0);
161 return (0);
162 }
163 pmap = thread->map->pmap;
164
165 if (pmap == kernel_pmap || use_kernel_map) {
166
167 kern_vaddr = (vm_offset_t)user_addr;
168
169 switch (copy_type) {
170
171 case COPYIN:
172 error = copyin_kern(kern_vaddr, kernel_addr, nbytes);
173 break;
174
175 case COPYOUT:
176 error = copyout_kern(kernel_addr, kern_vaddr, nbytes);
177 break;
178
179 case COPYINSTR:
180 error = copyinstr_kern(kern_vaddr, kernel_addr, nbytes, lencopied);
181 break;
182
183 case COPYINPHYS:
184 error = copyinphys_kern(kern_vaddr, kernel_addr, nbytes);
185 break;
186
187 case COPYOUTPHYS:
188 error = copyoutphys_kern(kernel_addr, kern_vaddr, nbytes);
189 break;
190 }
191 KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)kern_vaddr,
192 (unsigned)kernel_addr, (unsigned)nbytes,
193 error | 0x80000000, 0);
194 return (error);
195 }
196
197 #if CONFIG_DTRACE
198 thread->machine.specFlags |= CopyIOActive;
199 #endif /* CONFIG_DTRACE */
200
201 if ((nbytes && (user_addr + nbytes <= user_addr)) ||
202 (user_addr < vm_map_min(thread->map)) ||
203 (user_addr + nbytes > vm_map_max(thread->map))) {
204 error = EFAULT;
205 goto done;
206 }
207
208 user_base = user_addr & ~((user_addr_t)(NBPDE - 1));
209 user_offset = (vm_offset_t)(user_addr & (NBPDE - 1));
210
211 KERNEL_DEBUG(debug_type | DBG_FUNC_NONE, (int)(user_base >> 32), (int)user_base,
212 (int)user_offset, 0, 0);
213
214 cnt = NBPDE - user_offset;
215
216 if (cnt > nbytes)
217 cnt = nbytes;
218
219 istate = ml_set_interrupts_enabled(FALSE);
220
221 copyio_state = thread->machine.copyio_state;
222 thread->machine.copyio_state = WINDOWS_OPENED;
223
224 (void) ml_set_interrupts_enabled(istate);
225
226
227 for (;;) {
228
229 for (window_index = 0; window_index < NCOPY_WINDOWS; window_index++) {
230 if (thread->machine.copy_window[window_index].user_base == user_base)
231 break;
232 }
233 if (window_index >= NCOPY_WINDOWS) {
234
235 window_index = thread->machine.nxt_window;
236 thread->machine.nxt_window++;
237
238 if (thread->machine.nxt_window >= NCOPY_WINDOWS)
239 thread->machine.nxt_window = 0;
240
241 /*
242 * it's necessary to disable pre-emption
243 * since I have to compute the kernel descriptor pointer
244 * for the new window
245 */
246 istate = ml_set_interrupts_enabled(FALSE);
247
248 thread->machine.copy_window[window_index].user_base = user_base;
249
250 updp = pmap_pde(pmap, user_base);
251
252 kpdp = current_cpu_datap()->cpu_copywindow_pdp;
253 kpdp += window_index;
254
255 pmap_store_pte(kpdp, updp ? *updp : 0);
256
257 (void) ml_set_interrupts_enabled(istate);
258
259 copyio_state = WINDOWS_DIRTY;
260
261 KERNEL_DEBUG(0xeff70040 | DBG_FUNC_NONE, window_index,
262 (unsigned)user_base, (unsigned)updp,
263 (unsigned)kpdp, 0);
264
265 }
266 #if JOE_DEBUG
267 else {
268 istate = ml_set_interrupts_enabled(FALSE);
269
270 updp = pmap_pde(pmap, user_base);
271
272 kpdp = current_cpu_datap()->cpu_copywindow_pdp;
273
274 kpdp += window_index;
275
276 if ((*kpdp & PG_FRAME) != (*updp & PG_FRAME)) {
277 panic("copyio: user pdp mismatch - kpdp = 0x%qx, updp = 0x%qx\n", *kpdp, *updp);
278 }
279 (void) ml_set_interrupts_enabled(istate);
280 }
281 #endif
282 if (copyio_state == WINDOWS_DIRTY) {
283 flush_tlb();
284
285 copyio_state = WINDOWS_CLEAN;
286
287 KERNEL_DEBUG(0xeff70054 | DBG_FUNC_NONE, window_index, 0, 0, 0, 0);
288 }
289 user_offset += (window_index * NBPDE);
290
291 KERNEL_DEBUG(0xeff70044 | DBG_FUNC_NONE, (unsigned)user_offset,
292 (unsigned)kernel_addr, cnt, 0, 0);
293
294 switch (copy_type) {
295
296 case COPYIN:
297 error = copyin_user(user_offset, kernel_addr, cnt);
298 break;
299
300 case COPYOUT:
301 error = copyout_user(kernel_addr, user_offset, cnt);
302 break;
303
304 case COPYINPHYS:
305 error = copyinphys_user(user_offset, kernel_addr, cnt);
306 break;
307
308 case COPYOUTPHYS:
309 error = copyoutphys_user(kernel_addr, user_offset, cnt);
310 break;
311
312 case COPYINSTR:
313 error = copyinstr_user(user_offset, kernel_addr, cnt, &bytes_copied);
314
315 /*
316 * lencopied should be updated on success
317 * or ENAMETOOLONG... but not EFAULT
318 */
319 if (error != EFAULT)
320 *lencopied += bytes_copied;
321
322 /*
323 * if we still have room, then the ENAMETOOLONG
324 * is just an artifact of the buffer straddling
325 * a window boundary and we should continue
326 */
327 if (error == ENAMETOOLONG && nbytes > cnt)
328 error = 0;
329
330 if (error) {
331 #if KDEBUG
332 nbytes = *lencopied;
333 #endif
334 break;
335 }
336 if (*(kernel_addr + bytes_copied - 1) == 0) {
337 /*
338 * we found a NULL terminator... we're done
339 */
340 #if KDEBUG
341 nbytes = *lencopied;
342 #endif
343 goto done;
344 }
345 if (cnt == nbytes) {
346 /*
347 * no more room in the buffer and we haven't
348 * yet come across a NULL terminator
349 */
350 #if KDEBUG
351 nbytes = *lencopied;
352 #endif
353 error = ENAMETOOLONG;
354 break;
355 }
356 assert(cnt == bytes_copied);
357
358 break;
359 }
360 if (error)
361 break;
362 if ((nbytes -= cnt) == 0)
363 break;
364
365 kernel_addr += cnt;
366 user_base += NBPDE;
367 user_offset = 0;
368
369 if (nbytes > NBPDE)
370 cnt = NBPDE;
371 else
372 cnt = nbytes;
373 }
374 done:
375 thread->machine.copyio_state = WINDOWS_CLOSED;
376
377 KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)user_addr,
378 (unsigned)kernel_addr, (unsigned)nbytes, error, 0);
379
380 #if CONFIG_DTRACE
381 thread->machine.specFlags &= ~CopyIOActive;
382 #endif /* CONFIG_DTRACE */
383
384 return (error);
385 }
386
387 static int
388 copyio_phys(addr64_t source, addr64_t sink, vm_size_t csize, int which)
389 {
390 pmap_paddr_t paddr;
391 user_addr_t vaddr;
392 char *window_offset;
393 pt_entry_t pentry;
394 int ctype;
395 int retval;
396 boolean_t istate;
397
398
399 if (which & cppvPsnk) {
400 paddr = (pmap_paddr_t)sink;
401 vaddr = (user_addr_t)source;
402 ctype = COPYINPHYS;
403 pentry = (pt_entry_t)(INTEL_PTE_VALID | (paddr & PG_FRAME) | INTEL_PTE_RW);
404 } else {
405 paddr = (pmap_paddr_t)source;
406 vaddr = (user_addr_t)sink;
407 ctype = COPYOUTPHYS;
408 pentry = (pt_entry_t)(INTEL_PTE_VALID | (paddr & PG_FRAME));
409 }
410 /* Fold in cache attributes for this physical page */
411 pentry |= pmap_get_cache_attributes(i386_btop(paddr));
412 window_offset = (char *)(uintptr_t)((uint32_t)paddr & (PAGE_SIZE - 1));
413
414 assert(!((current_thread()->machine.specFlags & CopyIOActive) && ((which & cppvKmap) == 0)));
415
416 if (current_thread()->machine.physwindow_busy) {
417 pt_entry_t old_pentry;
418
419 KERNEL_DEBUG(0xeff70048 | DBG_FUNC_NONE, paddr, csize, 0, -1, 0);
420 /*
421 * we had better be targeting wired memory at this point
422 * we will not be able to handle a fault with interrupts
423 * disabled... we disable them because we can't tolerate
424 * being preempted during this nested use of the window
425 */
426 istate = ml_set_interrupts_enabled(FALSE);
427
428 old_pentry = *(current_cpu_datap()->cpu_physwindow_ptep);
429 pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), pentry);
430
431 invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base);
432
433 retval = copyio(ctype, vaddr, window_offset, csize, NULL, which & cppvKmap);
434
435 pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), old_pentry);
436
437 invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base);
438
439 (void) ml_set_interrupts_enabled(istate);
440 } else {
441 /*
442 * mark the window as in use... if an interrupt hits while we're
443 * busy, or we trigger another coyppv from the fault path into
444 * the driver on a user address space page fault due to a copyin/out
445 * then we need to save and restore the current window state instead
446 * of caching the window preserving it across context switches
447 */
448 current_thread()->machine.physwindow_busy = 1;
449
450 if (current_thread()->machine.physwindow_pte != pentry) {
451 KERNEL_DEBUG(0xeff70048 | DBG_FUNC_NONE, paddr, csize, 0, 0, 0);
452
453 current_thread()->machine.physwindow_pte = pentry;
454
455 /*
456 * preemption at this point would be bad since we
457 * could end up on the other processor after we grabbed the
458 * pointer to the current cpu data area, but before we finished
459 * using it to stuff the page table entry since we would
460 * be modifying a window that no longer belonged to us
461 * the invlpg can be done unprotected since it only flushes
462 * this page address from the tlb... if it flushes the wrong
463 * one, no harm is done, and the context switch that moved us
464 * to the other processor will have already take care of
465 * flushing the tlb after it reloaded the page table from machine.physwindow_pte
466 */
467 istate = ml_set_interrupts_enabled(FALSE);
468
469 pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), pentry);
470 (void) ml_set_interrupts_enabled(istate);
471
472 invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base);
473 }
474 #if JOE_DEBUG
475 else {
476 if (pentry !=
477 (*(current_cpu_datap()->cpu_physwindow_ptep) & (INTEL_PTE_VALID | PG_FRAME | INTEL_PTE_RW)))
478 panic("copyio_phys: pentry != *physwindow_ptep");
479 }
480 #endif
481 retval = copyio(ctype, vaddr, window_offset, csize, NULL, which & cppvKmap);
482
483 current_thread()->machine.physwindow_busy = 0;
484 }
485 return (retval);
486 }
487
488 int
489 copyinmsg(const user_addr_t user_addr, char *kernel_addr, mach_msg_size_t nbytes)
490 {
491 return (copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0));
492 }
493
494 int
495 copyin(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes)
496 {
497 return (copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0));
498 }
499
500 int
501 copyinstr(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes, vm_size_t *lencopied)
502 {
503 *lencopied = 0;
504
505 return (copyio(COPYINSTR, user_addr, kernel_addr, nbytes, lencopied, 0));
506 }
507
508 int
509 copyoutmsg(const char *kernel_addr, user_addr_t user_addr, mach_msg_size_t nbytes)
510 {
511 return (copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0));
512 }
513
514 int
515 copyout(const void *kernel_addr, user_addr_t user_addr, vm_size_t nbytes)
516 {
517 return (copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0));
518 }
519
520
521 kern_return_t
522 copypv(addr64_t src64, addr64_t snk64, unsigned int size, int which)
523 {
524 unsigned int lop, csize;
525 int bothphys = 0;
526
527 KERNEL_DEBUG(0xeff7004c | DBG_FUNC_START, (unsigned)src64,
528 (unsigned)snk64, size, which, 0);
529
530 if ((which & (cppvPsrc | cppvPsnk)) == 0 ) /* Make sure that only one is virtual */
531 panic("copypv: no more than 1 parameter may be virtual\n"); /* Not allowed */
532
533 if ((which & (cppvPsrc | cppvPsnk)) == (cppvPsrc | cppvPsnk))
534 bothphys = 1; /* both are physical */
535
536 while (size) {
537
538 if (bothphys) {
539 lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1))); /* Assume sink smallest */
540
541 if (lop > (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1))))
542 lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1))); /* No, source is smaller */
543 } else {
544 /*
545 * only need to compute the resid for the physical page
546 * address... we don't care about where we start/finish in
547 * the virtual since we just call the normal copyin/copyout
548 */
549 if (which & cppvPsrc)
550 lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1)));
551 else
552 lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1)));
553 }
554 csize = size; /* Assume we can copy it all */
555 if (lop < size)
556 csize = lop; /* Nope, we can't do it all */
557 #if 0
558 /*
559 * flush_dcache64 is currently a nop on the i386...
560 * it's used when copying to non-system memory such
561 * as video capture cards... on PPC there was a need
562 * to flush due to how we mapped this memory... not
563 * sure if it's needed on i386.
564 */
565 if (which & cppvFsrc)
566 flush_dcache64(src64, csize, 1); /* If requested, flush source before move */
567 if (which & cppvFsnk)
568 flush_dcache64(snk64, csize, 1); /* If requested, flush sink before move */
569 #endif
570 if (bothphys) {
571 bcopy_phys(src64, snk64, csize); /* Do a physical copy, virtually */
572 }
573 else {
574 if (copyio_phys(src64, snk64, csize, which)) {
575 return (KERN_FAILURE);
576 }
577 }
578 #if 0
579 if (which & cppvFsrc)
580 flush_dcache64(src64, csize, 1); /* If requested, flush source after move */
581 if (which & cppvFsnk)
582 flush_dcache64(snk64, csize, 1); /* If requested, flush sink after move */
583 #endif
584 size -= csize; /* Calculate what is left */
585 snk64 += csize; /* Bump sink to next physical address */
586 src64 += csize; /* Bump source to next physical address */
587 }
588 KERNEL_DEBUG(0xeff7004c | DBG_FUNC_END, (unsigned)src64,
589 (unsigned)snk64, size, which, 0);
590
591 return KERN_SUCCESS;
592 }
593 void
594 copy_window_fault(thread_t thread, vm_map_t map, int window)
595 {
596 pt_entry_t *updp;
597 pt_entry_t *kpdp;
598
599 /*
600 * in case there was no page table assigned
601 * for the user base address and the pmap
602 * got 'expanded' due to this fault, we'll
603 * copy in the descriptor
604 *
605 * we're either setting the page table descriptor
606 * to the same value or it was 0... no need
607 * for a TLB flush in either case
608 */
609
610 updp = pmap_pde(map->pmap, thread->machine.copy_window[window].user_base);
611 assert(updp);
612 if (0 == updp) panic("trap: updp 0"); /* XXX DEBUG */
613 kpdp = current_cpu_datap()->cpu_copywindow_pdp;
614 kpdp += window;
615
616 #if JOE_DEBUG
617 if (*kpdp && (*kpdp & PG_FRAME) != (*updp & PG_FRAME))
618 panic("kernel_fault: user pdp doesn't match - updp = 0x%qx, kpdp = 0x%qx\n", *updp, *kpdp);
619 #endif
620 pmap_store_pte(kpdp, *updp);
621 }