]>
git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/copyio.c
82516b196f994ae4878eb0f85102f1a29614adc0
2 * Copyright (c) 2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 #include <mach_assert.h>
29 #include <sys/errno.h>
30 #include <i386/param.h>
31 #include <i386/misc_protos.h>
32 #include <i386/cpu_data.h>
33 #include <i386/machine_routines.h>
35 #include <vm/vm_map.h>
36 #include <vm/vm_kern.h>
37 #include <vm/vm_fault.h>
39 #include <sys/kdebug.h>
42 * the copy engine has the following characteristics
43 * - copyio handles copies to/from user or kernel space
44 * - copypv deals with physical or virtual addresses
46 * implementation details as follows
47 * - a cache of up to NCOPY_WINDOWS is maintained per thread for
48 * access of user virutal space
49 * - the window size is determined by the amount of virtual space
50 * that can be mapped by a single page table
51 * - the mapping is done by copying the page table pointer from
52 * the user's directory entry corresponding to the window's
53 * address in user space to the directory entry corresponding
54 * to the window slot in the kernel's address space
55 * - the set of mappings is preserved across context switches,
56 * so the copy can run with pre-emption enabled
57 * - there is a gdt entry set up to anchor the kernel window on
59 * - the copies are done using the selector corresponding to the
61 * - the addresses corresponding to the user virtual address are
62 * relative to the beginning of the window being used to map
63 * that region... thus the thread can be pre-empted and switched
64 * to a different processor while in the midst of a copy
65 * - the window caches must be invalidated if the pmap changes out
66 * from under the thread... this can happen during vfork/exec...
67 * inval_copy_windows is the invalidation routine to be used
68 * - the copyio engine has 4 different states associated with it
69 * that allows for lazy tlb flushes and the ability to avoid
70 * a flush all together if we've just come from user space
71 * the 4 states are as follows...
73 * WINDOWS_OPENED - set by copyio to indicate to the context
74 * switch code that it is necessary to do a tlbflush after
75 * switching the windows since we're in the middle of a copy
77 * WINDOWS_CLOSED - set by copyio to indicate that it's done
78 * using the windows, so that the context switch code need
79 * not do the tlbflush... instead it will set the state to...
81 * WINDOWS_DIRTY - set by the context switch code to indicate
82 * to the copy engine that it is responsible for doing a
83 * tlbflush before using the windows again... it's also
84 * set by the inval_copy_windows routine to indicate the
85 * same responsibility.
87 * WINDOWS_CLEAN - set by the return to user path to indicate
88 * that a tlbflush has happened and that there is no need
89 * for copyio to do another when it is entered next...
91 * - a window for mapping single physical pages is provided for copypv
92 * - this window is maintained across context switches and has the
93 * same characteristics as the user space windows w/r to pre-emption
96 extern int copyout_user(const char *, vm_offset_t
, vm_size_t
);
97 extern int copyout_kern(const char *, vm_offset_t
, vm_size_t
);
98 extern int copyin_user(const vm_offset_t
, char *, vm_size_t
);
99 extern int copyin_kern(const vm_offset_t
, char *, vm_size_t
);
100 extern int copyoutphys_user(const char *, vm_offset_t
, vm_size_t
);
101 extern int copyoutphys_kern(const char *, vm_offset_t
, vm_size_t
);
102 extern int copyinphys_user(const vm_offset_t
, char *, vm_size_t
);
103 extern int copyinphys_kern(const vm_offset_t
, char *, vm_size_t
);
104 extern int copyinstr_user(const vm_offset_t
, char *, vm_size_t
, vm_size_t
*);
105 extern int copyinstr_kern(const vm_offset_t
, char *, vm_size_t
, vm_size_t
*);
107 static int copyio(int, user_addr_t
, char *, vm_size_t
, vm_size_t
*, int);
108 static int copyio_phys(addr64_t
, addr64_t
, vm_size_t
, int);
115 #define COPYOUTPHYS 4
117 void inval_copy_windows(thread_t thread
)
121 for (i
= 0; i
< NCOPY_WINDOWS
; i
++) {
122 thread
->machine
.copy_window
[i
].user_base
= -1;
124 thread
->machine
.nxt_window
= 0;
125 thread
->machine
.copyio_state
= WINDOWS_DIRTY
;
127 KERNEL_DEBUG(0xeff70058 | DBG_FUNC_NONE
, (uintptr_t)thread_tid(thread
), (int)thread
->map
, 0, 0, 0);
132 copyio(int copy_type
, user_addr_t user_addr
, char *kernel_addr
,
133 vm_size_t nbytes
, vm_size_t
*lencopied
, int use_kernel_map
)
139 user_addr_t user_base
;
140 vm_offset_t user_offset
;
141 vm_offset_t kern_vaddr
;
143 vm_size_t bytes_copied
;
149 int debug_type
= 0xeff70010;
150 debug_type
+= (copy_type
<< 2);
153 thread
= current_thread();
155 KERNEL_DEBUG(debug_type
| DBG_FUNC_START
, (int)(user_addr
>> 32), (int)user_addr
,
156 (int)nbytes
, thread
->machine
.copyio_state
, 0);
159 KERNEL_DEBUG(debug_type
| DBG_FUNC_END
, (unsigned)user_addr
,
160 (unsigned)kernel_addr
, (unsigned)nbytes
, 0, 0);
163 pmap
= thread
->map
->pmap
;
165 if (pmap
== kernel_pmap
|| use_kernel_map
) {
167 kern_vaddr
= (vm_offset_t
)user_addr
;
172 error
= copyin_kern(kern_vaddr
, kernel_addr
, nbytes
);
176 error
= copyout_kern(kernel_addr
, kern_vaddr
, nbytes
);
180 error
= copyinstr_kern(kern_vaddr
, kernel_addr
, nbytes
, lencopied
);
184 error
= copyinphys_kern(kern_vaddr
, kernel_addr
, nbytes
);
188 error
= copyoutphys_kern(kernel_addr
, kern_vaddr
, nbytes
);
191 KERNEL_DEBUG(debug_type
| DBG_FUNC_END
, (unsigned)kern_vaddr
,
192 (unsigned)kernel_addr
, (unsigned)nbytes
,
193 error
| 0x80000000, 0);
198 thread
->machine
.specFlags
|= CopyIOActive
;
199 #endif /* CONFIG_DTRACE */
201 if ((nbytes
&& (user_addr
+ nbytes
<= user_addr
)) ||
202 (user_addr
< vm_map_min(thread
->map
)) ||
203 (user_addr
+ nbytes
> vm_map_max(thread
->map
))) {
208 user_base
= user_addr
& ~((user_addr_t
)(NBPDE
- 1));
209 user_offset
= (vm_offset_t
)(user_addr
& (NBPDE
- 1));
211 KERNEL_DEBUG(debug_type
| DBG_FUNC_NONE
, (int)(user_base
>> 32), (int)user_base
,
212 (int)user_offset
, 0, 0);
214 cnt
= NBPDE
- user_offset
;
219 istate
= ml_set_interrupts_enabled(FALSE
);
221 copyio_state
= thread
->machine
.copyio_state
;
222 thread
->machine
.copyio_state
= WINDOWS_OPENED
;
224 (void) ml_set_interrupts_enabled(istate
);
229 for (window_index
= 0; window_index
< NCOPY_WINDOWS
; window_index
++) {
230 if (thread
->machine
.copy_window
[window_index
].user_base
== user_base
)
233 if (window_index
>= NCOPY_WINDOWS
) {
235 window_index
= thread
->machine
.nxt_window
;
236 thread
->machine
.nxt_window
++;
238 if (thread
->machine
.nxt_window
>= NCOPY_WINDOWS
)
239 thread
->machine
.nxt_window
= 0;
242 * it's necessary to disable pre-emption
243 * since I have to compute the kernel descriptor pointer
246 istate
= ml_set_interrupts_enabled(FALSE
);
248 thread
->machine
.copy_window
[window_index
].user_base
= user_base
;
250 updp
= pmap_pde(pmap
, user_base
);
252 kpdp
= current_cpu_datap()->cpu_copywindow_pdp
;
253 kpdp
+= window_index
;
255 pmap_store_pte(kpdp
, updp
? *updp
: 0);
257 (void) ml_set_interrupts_enabled(istate
);
259 copyio_state
= WINDOWS_DIRTY
;
261 KERNEL_DEBUG(0xeff70040 | DBG_FUNC_NONE
, window_index
,
262 (unsigned)user_base
, (unsigned)updp
,
268 istate
= ml_set_interrupts_enabled(FALSE
);
270 updp
= pmap_pde(pmap
, user_base
);
272 kpdp
= current_cpu_datap()->cpu_copywindow_pdp
;
274 kpdp
+= window_index
;
276 if ((*kpdp
& PG_FRAME
) != (*updp
& PG_FRAME
)) {
277 panic("copyio: user pdp mismatch - kpdp = 0x%qx, updp = 0x%qx\n", *kpdp
, *updp
);
279 (void) ml_set_interrupts_enabled(istate
);
282 if (copyio_state
== WINDOWS_DIRTY
) {
285 copyio_state
= WINDOWS_CLEAN
;
287 KERNEL_DEBUG(0xeff70054 | DBG_FUNC_NONE
, window_index
, 0, 0, 0, 0);
289 user_offset
+= (window_index
* NBPDE
);
291 KERNEL_DEBUG(0xeff70044 | DBG_FUNC_NONE
, (unsigned)user_offset
,
292 (unsigned)kernel_addr
, cnt
, 0, 0);
297 error
= copyin_user(user_offset
, kernel_addr
, cnt
);
301 error
= copyout_user(kernel_addr
, user_offset
, cnt
);
305 error
= copyinphys_user(user_offset
, kernel_addr
, cnt
);
309 error
= copyoutphys_user(kernel_addr
, user_offset
, cnt
);
313 error
= copyinstr_user(user_offset
, kernel_addr
, cnt
, &bytes_copied
);
316 * lencopied should be updated on success
317 * or ENAMETOOLONG... but not EFAULT
320 *lencopied
+= bytes_copied
;
323 * if we still have room, then the ENAMETOOLONG
324 * is just an artifact of the buffer straddling
325 * a window boundary and we should continue
327 if (error
== ENAMETOOLONG
&& nbytes
> cnt
)
336 if (*(kernel_addr
+ bytes_copied
- 1) == 0) {
338 * we found a NULL terminator... we're done
347 * no more room in the buffer and we haven't
348 * yet come across a NULL terminator
353 error
= ENAMETOOLONG
;
356 assert(cnt
== bytes_copied
);
362 if ((nbytes
-= cnt
) == 0)
375 thread
->machine
.copyio_state
= WINDOWS_CLOSED
;
377 KERNEL_DEBUG(debug_type
| DBG_FUNC_END
, (unsigned)user_addr
,
378 (unsigned)kernel_addr
, (unsigned)nbytes
, error
, 0);
381 thread
->machine
.specFlags
&= ~CopyIOActive
;
382 #endif /* CONFIG_DTRACE */
388 copyio_phys(addr64_t source
, addr64_t sink
, vm_size_t csize
, int which
)
399 if (which
& cppvPsnk
) {
400 paddr
= (pmap_paddr_t
)sink
;
401 vaddr
= (user_addr_t
)source
;
403 pentry
= (pt_entry_t
)(INTEL_PTE_VALID
| (paddr
& PG_FRAME
) | INTEL_PTE_RW
);
405 paddr
= (pmap_paddr_t
)source
;
406 vaddr
= (user_addr_t
)sink
;
408 pentry
= (pt_entry_t
)(INTEL_PTE_VALID
| (paddr
& PG_FRAME
));
410 /* Fold in cache attributes for this physical page */
411 pentry
|= pmap_get_cache_attributes(i386_btop(paddr
));
412 window_offset
= (char *)(uintptr_t)((uint32_t)paddr
& (PAGE_SIZE
- 1));
414 assert(!((current_thread()->machine
.specFlags
& CopyIOActive
) && ((which
& cppvKmap
) == 0)));
416 if (current_thread()->machine
.physwindow_busy
) {
417 pt_entry_t old_pentry
;
419 KERNEL_DEBUG(0xeff70048 | DBG_FUNC_NONE
, paddr
, csize
, 0, -1, 0);
421 * we had better be targeting wired memory at this point
422 * we will not be able to handle a fault with interrupts
423 * disabled... we disable them because we can't tolerate
424 * being preempted during this nested use of the window
426 istate
= ml_set_interrupts_enabled(FALSE
);
428 old_pentry
= *(current_cpu_datap()->cpu_physwindow_ptep
);
429 pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep
), pentry
);
431 invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base
);
433 retval
= copyio(ctype
, vaddr
, window_offset
, csize
, NULL
, which
& cppvKmap
);
435 pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep
), old_pentry
);
437 invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base
);
439 (void) ml_set_interrupts_enabled(istate
);
442 * mark the window as in use... if an interrupt hits while we're
443 * busy, or we trigger another coyppv from the fault path into
444 * the driver on a user address space page fault due to a copyin/out
445 * then we need to save and restore the current window state instead
446 * of caching the window preserving it across context switches
448 current_thread()->machine
.physwindow_busy
= 1;
450 if (current_thread()->machine
.physwindow_pte
!= pentry
) {
451 KERNEL_DEBUG(0xeff70048 | DBG_FUNC_NONE
, paddr
, csize
, 0, 0, 0);
453 current_thread()->machine
.physwindow_pte
= pentry
;
456 * preemption at this point would be bad since we
457 * could end up on the other processor after we grabbed the
458 * pointer to the current cpu data area, but before we finished
459 * using it to stuff the page table entry since we would
460 * be modifying a window that no longer belonged to us
461 * the invlpg can be done unprotected since it only flushes
462 * this page address from the tlb... if it flushes the wrong
463 * one, no harm is done, and the context switch that moved us
464 * to the other processor will have already take care of
465 * flushing the tlb after it reloaded the page table from machine.physwindow_pte
467 istate
= ml_set_interrupts_enabled(FALSE
);
469 pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep
), pentry
);
470 (void) ml_set_interrupts_enabled(istate
);
472 invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base
);
477 (*(current_cpu_datap()->cpu_physwindow_ptep
) & (INTEL_PTE_VALID
| PG_FRAME
| INTEL_PTE_RW
)))
478 panic("copyio_phys: pentry != *physwindow_ptep");
481 retval
= copyio(ctype
, vaddr
, window_offset
, csize
, NULL
, which
& cppvKmap
);
483 current_thread()->machine
.physwindow_busy
= 0;
489 copyinmsg(const user_addr_t user_addr
, char *kernel_addr
, mach_msg_size_t nbytes
)
491 return (copyio(COPYIN
, user_addr
, kernel_addr
, nbytes
, NULL
, 0));
495 copyin(const user_addr_t user_addr
, char *kernel_addr
, vm_size_t nbytes
)
497 return (copyio(COPYIN
, user_addr
, kernel_addr
, nbytes
, NULL
, 0));
501 copyinstr(const user_addr_t user_addr
, char *kernel_addr
, vm_size_t nbytes
, vm_size_t
*lencopied
)
505 return (copyio(COPYINSTR
, user_addr
, kernel_addr
, nbytes
, lencopied
, 0));
509 copyoutmsg(const char *kernel_addr
, user_addr_t user_addr
, mach_msg_size_t nbytes
)
511 return (copyio(COPYOUT
, user_addr
, (char *)(uintptr_t)kernel_addr
, nbytes
, NULL
, 0));
515 copyout(const void *kernel_addr
, user_addr_t user_addr
, vm_size_t nbytes
)
517 return (copyio(COPYOUT
, user_addr
, (char *)(uintptr_t)kernel_addr
, nbytes
, NULL
, 0));
522 copypv(addr64_t src64
, addr64_t snk64
, unsigned int size
, int which
)
524 unsigned int lop
, csize
;
527 KERNEL_DEBUG(0xeff7004c | DBG_FUNC_START
, (unsigned)src64
,
528 (unsigned)snk64
, size
, which
, 0);
530 if ((which
& (cppvPsrc
| cppvPsnk
)) == 0 ) /* Make sure that only one is virtual */
531 panic("copypv: no more than 1 parameter may be virtual\n"); /* Not allowed */
533 if ((which
& (cppvPsrc
| cppvPsnk
)) == (cppvPsrc
| cppvPsnk
))
534 bothphys
= 1; /* both are physical */
539 lop
= (unsigned int)(PAGE_SIZE
- (snk64
& (PAGE_SIZE
- 1))); /* Assume sink smallest */
541 if (lop
> (unsigned int)(PAGE_SIZE
- (src64
& (PAGE_SIZE
- 1))))
542 lop
= (unsigned int)(PAGE_SIZE
- (src64
& (PAGE_SIZE
- 1))); /* No, source is smaller */
545 * only need to compute the resid for the physical page
546 * address... we don't care about where we start/finish in
547 * the virtual since we just call the normal copyin/copyout
549 if (which
& cppvPsrc
)
550 lop
= (unsigned int)(PAGE_SIZE
- (src64
& (PAGE_SIZE
- 1)));
552 lop
= (unsigned int)(PAGE_SIZE
- (snk64
& (PAGE_SIZE
- 1)));
554 csize
= size
; /* Assume we can copy it all */
556 csize
= lop
; /* Nope, we can't do it all */
559 * flush_dcache64 is currently a nop on the i386...
560 * it's used when copying to non-system memory such
561 * as video capture cards... on PPC there was a need
562 * to flush due to how we mapped this memory... not
563 * sure if it's needed on i386.
565 if (which
& cppvFsrc
)
566 flush_dcache64(src64
, csize
, 1); /* If requested, flush source before move */
567 if (which
& cppvFsnk
)
568 flush_dcache64(snk64
, csize
, 1); /* If requested, flush sink before move */
571 bcopy_phys(src64
, snk64
, csize
); /* Do a physical copy, virtually */
574 if (copyio_phys(src64
, snk64
, csize
, which
)) {
575 return (KERN_FAILURE
);
579 if (which
& cppvFsrc
)
580 flush_dcache64(src64
, csize
, 1); /* If requested, flush source after move */
581 if (which
& cppvFsnk
)
582 flush_dcache64(snk64
, csize
, 1); /* If requested, flush sink after move */
584 size
-= csize
; /* Calculate what is left */
585 snk64
+= csize
; /* Bump sink to next physical address */
586 src64
+= csize
; /* Bump source to next physical address */
588 KERNEL_DEBUG(0xeff7004c | DBG_FUNC_END
, (unsigned)src64
,
589 (unsigned)snk64
, size
, which
, 0);
594 copy_window_fault(thread_t thread
, vm_map_t map
, int window
)
600 * in case there was no page table assigned
601 * for the user base address and the pmap
602 * got 'expanded' due to this fault, we'll
603 * copy in the descriptor
605 * we're either setting the page table descriptor
606 * to the same value or it was 0... no need
607 * for a TLB flush in either case
610 updp
= pmap_pde(map
->pmap
, thread
->machine
.copy_window
[window
].user_base
);
612 if (0 == updp
) panic("trap: updp 0"); /* XXX DEBUG */
613 kpdp
= current_cpu_datap()->cpu_copywindow_pdp
;
617 if (*kpdp
&& (*kpdp
& PG_FRAME
) != (*updp
& PG_FRAME
))
618 panic("kernel_fault: user pdp doesn't match - updp = 0x%qx, kpdp = 0x%qx\n", *updp
, *kpdp
);
620 pmap_store_pte(kpdp
, *updp
);