]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/loose_ends.c
xnu-1504.7.4.tar.gz
[apple/xnu.git] / osfmk / i386 / loose_ends.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 #include <mach_assert.h>
59
60 #include <string.h>
61 #include <mach/boolean.h>
62 #include <mach/i386/vm_types.h>
63 #include <mach/i386/vm_param.h>
64 #include <kern/kern_types.h>
65 #include <kern/misc_protos.h>
66 #include <sys/errno.h>
67 #include <i386/param.h>
68 #include <i386/misc_protos.h>
69 #include <i386/cpu_data.h>
70 #include <i386/machine_routines.h>
71 #include <i386/cpuid.h>
72 #include <i386/vmx.h>
73 #include <vm/pmap.h>
74 #include <vm/vm_map.h>
75 #include <vm/vm_kern.h>
76 #include <vm/vm_fault.h>
77
78 #include <libkern/OSAtomic.h>
79 #include <sys/kdebug.h>
80
81
82 #if 0
83
84 #undef KERNEL_DEBUG
85 #define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
86 #define KDEBUG 1
87
88 #endif
89
90 /* XXX - should be gone from here */
91 extern void invalidate_icache64(addr64_t addr, unsigned cnt, int phys);
92 extern void flush_dcache64(addr64_t addr, unsigned count, int phys);
93 extern boolean_t phys_page_exists(ppnum_t);
94 extern void bcopy_no_overwrite(const char *from, char *to,vm_size_t bytes);
95 extern void pmap_set_reference(ppnum_t pn);
96 extern void mapping_set_mod(ppnum_t pa);
97 extern void mapping_set_ref(ppnum_t pn);
98
99 extern void ovbcopy(const char *from,
100 char *to,
101 vm_size_t nbytes);
102 void machine_callstack(natural_t *buf, vm_size_t callstack_max);
103
104
105 #define value_64bit(value) ((value) & 0xFFFFFFFF00000000LL)
106 #define low32(x) ((unsigned int)((x) & 0x00000000FFFFFFFFLL))
107
108
109
110
111 void
112 bzero_phys_nc(
113 addr64_t src64,
114 uint32_t bytes)
115 {
116 bzero_phys(src64,bytes);
117 }
118
119 void
120 bzero_phys(
121 addr64_t src64,
122 uint32_t bytes)
123 {
124 mapwindow_t *map;
125
126 mp_disable_preemption();
127
128 map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | INTEL_PTE_RW | ((pmap_paddr_t)src64 & PG_FRAME) | INTEL_PTE_REF | INTEL_PTE_MOD));
129
130 bzero((void *)((uintptr_t)map->prv_CADDR | ((uint32_t)src64 & INTEL_OFFMASK)), bytes);
131
132 pmap_put_mapwindow(map);
133
134 mp_enable_preemption();
135 }
136
137
138 /*
139 * bcopy_phys - like bcopy but copies from/to physical addresses.
140 */
141
142 void
143 bcopy_phys(
144 addr64_t src64,
145 addr64_t dst64,
146 vm_size_t bytes)
147 {
148 mapwindow_t *src_map, *dst_map;
149
150 /* ensure we stay within a page */
151 if ( ((((uint32_t)src64 & (NBPG-1)) + bytes) > NBPG) || ((((uint32_t)dst64 & (NBPG-1)) + bytes) > NBPG) ) {
152 panic("bcopy_phys alignment");
153 }
154 mp_disable_preemption();
155
156 src_map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | ((pmap_paddr_t)src64 & PG_FRAME) | INTEL_PTE_REF));
157 dst_map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | INTEL_PTE_RW | ((pmap_paddr_t)dst64 & PG_FRAME) |
158 INTEL_PTE_REF | INTEL_PTE_MOD));
159
160 bcopy((void *) ((uintptr_t)src_map->prv_CADDR | ((uint32_t)src64 & INTEL_OFFMASK)),
161 (void *) ((uintptr_t)dst_map->prv_CADDR | ((uint32_t)dst64 & INTEL_OFFMASK)), bytes);
162
163 pmap_put_mapwindow(src_map);
164 pmap_put_mapwindow(dst_map);
165
166 mp_enable_preemption();
167 }
168
169 /*
170 * ovbcopy - like bcopy, but recognizes overlapping ranges and handles
171 * them correctly.
172 */
173
174 void
175 ovbcopy(
176 const char *from,
177 char *to,
178 vm_size_t bytes) /* num bytes to copy */
179 {
180 /* Assume that bcopy copies left-to-right (low addr first). */
181 if (from + bytes <= to || to + bytes <= from || to == from)
182 bcopy_no_overwrite(from, to, bytes); /* non-overlapping or no-op*/
183 else if (from > to)
184 bcopy_no_overwrite(from, to, bytes); /* overlapping but OK */
185 else {
186 /* to > from: overlapping, and must copy right-to-left. */
187 from += bytes - 1;
188 to += bytes - 1;
189 while (bytes-- > 0)
190 *to-- = *from--;
191 }
192 }
193
194
195 /*
196 * Read data from a physical address.
197 */
198
199
200 static unsigned int
201 ml_phys_read_data(pmap_paddr_t paddr, int size )
202 {
203 mapwindow_t *map;
204 unsigned int result;
205
206 mp_disable_preemption();
207
208 map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | (paddr & PG_FRAME) | INTEL_PTE_REF));
209
210 switch (size) {
211 unsigned char s1;
212 unsigned short s2;
213 case 1:
214 s1 = *(unsigned char *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK));
215 result = s1;
216 break;
217 case 2:
218 s2 = *(unsigned short *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK));
219 result = s2;
220 break;
221 case 4:
222 default:
223 result = *(unsigned int *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK));
224 break;
225 }
226 pmap_put_mapwindow(map);
227
228 mp_enable_preemption();
229
230 return result;
231 }
232
233 static unsigned long long
234 ml_phys_read_long_long(pmap_paddr_t paddr )
235 {
236 mapwindow_t *map;
237 unsigned long long result;
238
239 mp_disable_preemption();
240
241 map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | (paddr & PG_FRAME) | INTEL_PTE_REF));
242
243 result = *(unsigned long long *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK));
244
245 pmap_put_mapwindow(map);
246
247 mp_enable_preemption();
248
249 return result;
250 }
251
252 unsigned int ml_phys_read( vm_offset_t paddr)
253 {
254 return ml_phys_read_data((pmap_paddr_t)paddr, 4);
255 }
256
257 unsigned int ml_phys_read_word(vm_offset_t paddr) {
258
259 return ml_phys_read_data((pmap_paddr_t)paddr, 4);
260 }
261
262 unsigned int ml_phys_read_64(addr64_t paddr64)
263 {
264 return ml_phys_read_data((pmap_paddr_t)paddr64, 4);
265 }
266
267 unsigned int ml_phys_read_word_64(addr64_t paddr64)
268 {
269 return ml_phys_read_data((pmap_paddr_t)paddr64, 4);
270 }
271
272 unsigned int ml_phys_read_half(vm_offset_t paddr)
273 {
274 return ml_phys_read_data((pmap_paddr_t)paddr, 2);
275 }
276
277 unsigned int ml_phys_read_half_64(addr64_t paddr64)
278 {
279 return ml_phys_read_data((pmap_paddr_t)paddr64, 2);
280 }
281
282 unsigned int ml_phys_read_byte(vm_offset_t paddr)
283 {
284 return ml_phys_read_data((pmap_paddr_t)paddr, 1);
285 }
286
287 unsigned int ml_phys_read_byte_64(addr64_t paddr64)
288 {
289 return ml_phys_read_data((pmap_paddr_t)paddr64, 1);
290 }
291
292 unsigned long long ml_phys_read_double(vm_offset_t paddr)
293 {
294 return ml_phys_read_long_long((pmap_paddr_t)paddr);
295 }
296
297 unsigned long long ml_phys_read_double_64(addr64_t paddr64)
298 {
299 return ml_phys_read_long_long((pmap_paddr_t)paddr64);
300 }
301
302
303
304 /*
305 * Write data to a physical address.
306 */
307
308 static void
309 ml_phys_write_data(pmap_paddr_t paddr, unsigned long data, int size)
310 {
311 mapwindow_t *map;
312
313 mp_disable_preemption();
314
315 map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | INTEL_PTE_RW | (paddr & PG_FRAME) |
316 INTEL_PTE_REF | INTEL_PTE_MOD));
317
318 switch (size) {
319 case 1:
320 *(unsigned char *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)) = (unsigned char)data;
321 break;
322 case 2:
323 *(unsigned short *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)) = (unsigned short)data;
324 break;
325 case 4:
326 default:
327 *(unsigned int *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)) = data;
328 break;
329 }
330 pmap_put_mapwindow(map);
331
332 mp_enable_preemption();
333 }
334
335 static void
336 ml_phys_write_long_long(pmap_paddr_t paddr, unsigned long long data)
337 {
338 mapwindow_t *map;
339
340 mp_disable_preemption();
341
342 map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | INTEL_PTE_RW | (paddr & PG_FRAME) |
343 INTEL_PTE_REF | INTEL_PTE_MOD));
344
345 *(unsigned long long *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)) = data;
346
347 pmap_put_mapwindow(map);
348
349 mp_enable_preemption();
350 }
351
352
353
354 void ml_phys_write_byte(vm_offset_t paddr, unsigned int data)
355 {
356 ml_phys_write_data((pmap_paddr_t)paddr, data, 1);
357 }
358
359 void ml_phys_write_byte_64(addr64_t paddr64, unsigned int data)
360 {
361 ml_phys_write_data((pmap_paddr_t)paddr64, data, 1);
362 }
363
364 void ml_phys_write_half(vm_offset_t paddr, unsigned int data)
365 {
366 ml_phys_write_data((pmap_paddr_t)paddr, data, 2);
367 }
368
369 void ml_phys_write_half_64(addr64_t paddr64, unsigned int data)
370 {
371 ml_phys_write_data((pmap_paddr_t)paddr64, data, 2);
372 }
373
374 void ml_phys_write(vm_offset_t paddr, unsigned int data)
375 {
376 ml_phys_write_data((pmap_paddr_t)paddr, data, 4);
377 }
378
379 void ml_phys_write_64(addr64_t paddr64, unsigned int data)
380 {
381 ml_phys_write_data((pmap_paddr_t)paddr64, data, 4);
382 }
383
384 void ml_phys_write_word(vm_offset_t paddr, unsigned int data)
385 {
386 ml_phys_write_data((pmap_paddr_t)paddr, data, 4);
387 }
388
389 void ml_phys_write_word_64(addr64_t paddr64, unsigned int data)
390 {
391 ml_phys_write_data((pmap_paddr_t)paddr64, data, 4);
392 }
393
394 void ml_phys_write_double(vm_offset_t paddr, unsigned long long data)
395 {
396 ml_phys_write_long_long((pmap_paddr_t)paddr, data);
397 }
398
399 void ml_phys_write_double_64(addr64_t paddr64, unsigned long long data)
400 {
401 ml_phys_write_long_long((pmap_paddr_t)paddr64, data);
402 }
403
404
405 /* PCI config cycle probing
406 *
407 *
408 * Read the memory location at physical address paddr.
409 * This is a part of a device probe, so there is a good chance we will
410 * have a machine check here. So we have to be able to handle that.
411 * We assume that machine checks are enabled both in MSR and HIDs
412 */
413
414 boolean_t
415 ml_probe_read(vm_offset_t paddr, unsigned int *val)
416 {
417 if ((PAGE_SIZE - (paddr & PAGE_MASK)) < 4)
418 return FALSE;
419
420 *val = ml_phys_read(paddr);
421
422 return TRUE;
423 }
424
425 /*
426 * Read the memory location at physical address paddr.
427 * This is a part of a device probe, so there is a good chance we will
428 * have a machine check here. So we have to be able to handle that.
429 * We assume that machine checks are enabled both in MSR and HIDs
430 */
431 boolean_t
432 ml_probe_read_64(addr64_t paddr64, unsigned int *val)
433 {
434 if ((PAGE_SIZE - (paddr64 & PAGE_MASK)) < 4)
435 return FALSE;
436
437 *val = ml_phys_read_64((pmap_paddr_t)paddr64);
438 return TRUE;
439 }
440
441
442 int bcmp(
443 const void *pa,
444 const void *pb,
445 size_t len)
446 {
447 const char *a = (const char *)pa;
448 const char *b = (const char *)pb;
449
450 if (len == 0)
451 return 0;
452
453 do
454 if (*a++ != *b++)
455 break;
456 while (--len);
457
458 return len;
459 }
460
461 int
462 memcmp(const void *s1, const void *s2, size_t n)
463 {
464 if (n != 0) {
465 const unsigned char *p1 = s1, *p2 = s2;
466
467 do {
468 if (*p1++ != *p2++)
469 return (*--p1 - *--p2);
470 } while (--n != 0);
471 }
472 return (0);
473 }
474
475 /*
476 * Abstract:
477 * strlen returns the number of characters in "string" preceeding
478 * the terminating null character.
479 */
480
481 size_t
482 strlen(
483 register const char *string)
484 {
485 register const char *ret = string;
486
487 while (*string++ != '\0')
488 continue;
489 return string - 1 - ret;
490 }
491
492 uint32_t
493 hw_compare_and_store(uint32_t oldval, uint32_t newval, volatile uint32_t *dest)
494 {
495 return OSCompareAndSwap((UInt32)oldval,
496 (UInt32)newval,
497 (volatile UInt32 *)dest);
498 }
499
500 #if MACH_ASSERT
501
502 /*
503 * Machine-dependent routine to fill in an array with up to callstack_max
504 * levels of return pc information.
505 */
506 void machine_callstack(
507 __unused natural_t *buf,
508 __unused vm_size_t callstack_max)
509 {
510 }
511
512 #endif /* MACH_ASSERT */
513
514 void fillPage(ppnum_t pa, unsigned int fill)
515 {
516 mapwindow_t *map;
517 pmap_paddr_t src;
518 int i;
519 int cnt = PAGE_SIZE/sizeof(unsigned int);
520 unsigned int *addr;
521
522 mp_disable_preemption();
523
524 src = i386_ptob(pa);
525 map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | INTEL_PTE_RW | (src & PG_FRAME) |
526 INTEL_PTE_REF | INTEL_PTE_MOD));
527
528 for (i = 0, addr = (unsigned int *)map->prv_CADDR; i < cnt ; i++ )
529 *addr++ = fill;
530
531 pmap_put_mapwindow(map);
532
533 mp_enable_preemption();
534 }
535
536 static inline void __sfence(void)
537 {
538 __asm__ volatile("sfence");
539 }
540 static inline void __mfence(void)
541 {
542 __asm__ volatile("mfence");
543 }
544 static inline void __wbinvd(void)
545 {
546 __asm__ volatile("wbinvd");
547 }
548 static inline void __clflush(void *ptr)
549 {
550 __asm__ volatile("clflush (%0)" : : "r" (ptr));
551 }
552
553 void dcache_incoherent_io_store64(addr64_t pa, unsigned int count)
554 {
555 mapwindow_t *map;
556 uint32_t linesize = cpuid_info()->cache_linesize;
557 addr64_t addr;
558 uint32_t offset, chunk;
559 boolean_t istate;
560
561 __mfence();
562
563 istate = ml_set_interrupts_enabled(FALSE);
564
565 offset = (uint32_t)(pa & (linesize - 1));
566 addr = pa - offset;
567
568 map = pmap_get_mapwindow((pt_entry_t)(i386_ptob(atop_64(addr)) | INTEL_PTE_VALID));
569
570 count += offset;
571 offset = (uint32_t)(addr & ((addr64_t) (page_size - 1)));
572 chunk = page_size - offset;
573
574 do
575 {
576 if (chunk > count)
577 chunk = count;
578
579 for (; offset < chunk; offset += linesize)
580 __clflush((void *)(((uintptr_t)map->prv_CADDR) + offset));
581
582 count -= chunk;
583 addr += chunk;
584 chunk = page_size;
585 offset = 0;
586
587 if (count) {
588 pmap_store_pte(map->prv_CMAP, (pt_entry_t)(i386_ptob(atop_64(addr)) | INTEL_PTE_VALID));
589 invlpg((uintptr_t)map->prv_CADDR);
590 }
591 }
592 while (count);
593
594 pmap_put_mapwindow(map);
595
596 (void) ml_set_interrupts_enabled(istate);
597
598 __mfence();
599 }
600
601 void dcache_incoherent_io_flush64(addr64_t pa, unsigned int count)
602 {
603 return(dcache_incoherent_io_store64(pa,count));
604 }
605
606 void
607 flush_dcache64(__unused addr64_t addr,
608 __unused unsigned count,
609 __unused int phys)
610 {
611 }
612
613 void
614 invalidate_icache64(__unused addr64_t addr,
615 __unused unsigned count,
616 __unused int phys)
617 {
618 }
619
620
621 addr64_t vm_last_addr;
622
623 void
624 mapping_set_mod(ppnum_t pn)
625 {
626 pmap_set_modify(pn);
627 }
628
629 void
630 mapping_set_ref(ppnum_t pn)
631 {
632 pmap_set_reference(pn);
633 }
634
635 void
636 cache_flush_page_phys(ppnum_t pa)
637 {
638 mapwindow_t *map;
639 boolean_t istate;
640 int i;
641 unsigned char *cacheline_addr;
642 int cacheline_size = cpuid_info()->cache_linesize;
643 int cachelines_in_page = PAGE_SIZE/cacheline_size;
644
645 __mfence();
646
647 istate = ml_set_interrupts_enabled(FALSE);
648
649 map = pmap_get_mapwindow((pt_entry_t)(i386_ptob(pa) | INTEL_PTE_VALID));
650
651 for (i = 0, cacheline_addr = (unsigned char *)map->prv_CADDR;
652 i < cachelines_in_page;
653 i++, cacheline_addr += cacheline_size) {
654 __clflush((void *) cacheline_addr);
655 }
656 pmap_put_mapwindow(map);
657
658 (void) ml_set_interrupts_enabled(istate);
659
660 __mfence();
661 }
662
663
664 /*
665 * the copy engine has the following characteristics
666 * - copyio handles copies to/from user or kernel space
667 * - copypv deals with physical or virtual addresses
668 *
669 * implementation details as follows
670 * - a cache of up to NCOPY_WINDOWS is maintained per thread for
671 * access of user virutal space
672 * - the window size is determined by the amount of virtual space
673 * that can be mapped by a single page table
674 * - the mapping is done by copying the page table pointer from
675 * the user's directory entry corresponding to the window's
676 * address in user space to the directory entry corresponding
677 * to the window slot in the kernel's address space
678 * - the set of mappings is preserved across context switches,
679 * so the copy can run with pre-emption enabled
680 * - there is a gdt entry set up to anchor the kernel window on
681 * each processor
682 * - the copies are done using the selector corresponding to the
683 * gdt entry
684 * - the addresses corresponding to the user virtual address are
685 * relative to the beginning of the window being used to map
686 * that region... thus the thread can be pre-empted and switched
687 * to a different processor while in the midst of a copy
688 * - the window caches must be invalidated if the pmap changes out
689 * from under the thread... this can happen during vfork/exec...
690 * inval_copy_windows is the invalidation routine to be used
691 * - the copyio engine has 4 different states associated with it
692 * that allows for lazy tlb flushes and the ability to avoid
693 * a flush all together if we've just come from user space
694 * the 4 states are as follows...
695 *
696 * WINDOWS_OPENED - set by copyio to indicate to the context
697 * switch code that it is necessary to do a tlbflush after
698 * switching the windows since we're in the middle of a copy
699 *
700 * WINDOWS_CLOSED - set by copyio to indicate that it's done
701 * using the windows, so that the context switch code need
702 * not do the tlbflush... instead it will set the state to...
703 *
704 * WINDOWS_DIRTY - set by the context switch code to indicate
705 * to the copy engine that it is responsible for doing a
706 * tlbflush before using the windows again... it's also
707 * set by the inval_copy_windows routine to indicate the
708 * same responsibility.
709 *
710 * WINDOWS_CLEAN - set by the return to user path to indicate
711 * that a tlbflush has happened and that there is no need
712 * for copyio to do another when it is entered next...
713 *
714 * - a window for mapping single physical pages is provided for copypv
715 * - this window is maintained across context switches and has the
716 * same characteristics as the user space windows w/r to pre-emption
717 */
718
719 extern int copyout_user(const char *, vm_offset_t, vm_size_t);
720 extern int copyout_kern(const char *, vm_offset_t, vm_size_t);
721 extern int copyin_user(const vm_offset_t, char *, vm_size_t);
722 extern int copyin_kern(const vm_offset_t, char *, vm_size_t);
723 extern int copyoutphys_user(const char *, vm_offset_t, vm_size_t);
724 extern int copyoutphys_kern(const char *, vm_offset_t, vm_size_t);
725 extern int copyinphys_user(const vm_offset_t, char *, vm_size_t);
726 extern int copyinphys_kern(const vm_offset_t, char *, vm_size_t);
727 extern int copyinstr_user(const vm_offset_t, char *, vm_size_t, vm_size_t *);
728 extern int copyinstr_kern(const vm_offset_t, char *, vm_size_t, vm_size_t *);
729
730 static int copyio(int, user_addr_t, char *, vm_size_t, vm_size_t *, int);
731 static int copyio_phys(addr64_t, addr64_t, vm_size_t, int);
732
733
734 #define COPYIN 0
735 #define COPYOUT 1
736 #define COPYINSTR 2
737 #define COPYINPHYS 3
738 #define COPYOUTPHYS 4
739
740
741 void inval_copy_windows(thread_t thread)
742 {
743 int i;
744
745 for (i = 0; i < NCOPY_WINDOWS; i++) {
746 thread->machine.copy_window[i].user_base = -1;
747 }
748 thread->machine.nxt_window = 0;
749 thread->machine.copyio_state = WINDOWS_DIRTY;
750
751 KERNEL_DEBUG(0xeff70058 | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), (int)thread->map, 0, 0, 0);
752 }
753
754
755 static int
756 copyio(int copy_type, user_addr_t user_addr, char *kernel_addr,
757 vm_size_t nbytes, vm_size_t *lencopied, int use_kernel_map)
758 {
759 thread_t thread;
760 pmap_t pmap;
761 pt_entry_t *updp;
762 pt_entry_t *kpdp;
763 user_addr_t user_base;
764 vm_offset_t user_offset;
765 vm_offset_t kern_vaddr;
766 vm_size_t cnt;
767 vm_size_t bytes_copied;
768 int error = 0;
769 int window_index;
770 int copyio_state;
771 boolean_t istate;
772 #if KDEBUG
773 int debug_type = 0xeff70010;
774 debug_type += (copy_type << 2);
775 #endif
776
777 thread = current_thread();
778
779 KERNEL_DEBUG(debug_type | DBG_FUNC_START, (int)(user_addr >> 32), (int)user_addr,
780 (int)nbytes, thread->machine.copyio_state, 0);
781
782 if (nbytes == 0) {
783 KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)user_addr,
784 (unsigned)kernel_addr, (unsigned)nbytes, 0, 0);
785 return (0);
786 }
787 pmap = thread->map->pmap;
788
789 if (pmap == kernel_pmap || use_kernel_map) {
790
791 kern_vaddr = (vm_offset_t)user_addr;
792
793 switch (copy_type) {
794
795 case COPYIN:
796 error = copyin_kern(kern_vaddr, kernel_addr, nbytes);
797 break;
798
799 case COPYOUT:
800 error = copyout_kern(kernel_addr, kern_vaddr, nbytes);
801 break;
802
803 case COPYINSTR:
804 error = copyinstr_kern(kern_vaddr, kernel_addr, nbytes, lencopied);
805 break;
806
807 case COPYINPHYS:
808 error = copyinphys_kern(kern_vaddr, kernel_addr, nbytes);
809 break;
810
811 case COPYOUTPHYS:
812 error = copyoutphys_kern(kernel_addr, kern_vaddr, nbytes);
813 break;
814 }
815 KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)kern_vaddr,
816 (unsigned)kernel_addr, (unsigned)nbytes,
817 error | 0x80000000, 0);
818 return (error);
819 }
820
821 #if CONFIG_DTRACE
822 thread->machine.specFlags |= CopyIOActive;
823 #endif /* CONFIG_DTRACE */
824
825 if ((nbytes && (user_addr + nbytes <= user_addr)) || ((user_addr + nbytes) > vm_map_max(thread->map))) {
826 error = EFAULT;
827 goto done;
828 }
829
830 user_base = user_addr & ~((user_addr_t)(NBPDE - 1));
831 user_offset = (vm_offset_t)(user_addr & (NBPDE - 1));
832
833 KERNEL_DEBUG(debug_type | DBG_FUNC_NONE, (int)(user_base >> 32), (int)user_base,
834 (int)user_offset, 0, 0);
835
836 cnt = NBPDE - user_offset;
837
838 if (cnt > nbytes)
839 cnt = nbytes;
840
841 istate = ml_set_interrupts_enabled(FALSE);
842
843 copyio_state = thread->machine.copyio_state;
844 thread->machine.copyio_state = WINDOWS_OPENED;
845
846 (void) ml_set_interrupts_enabled(istate);
847
848
849 for (;;) {
850
851 for (window_index = 0; window_index < NCOPY_WINDOWS; window_index++) {
852 if (thread->machine.copy_window[window_index].user_base == user_base)
853 break;
854 }
855 if (window_index >= NCOPY_WINDOWS) {
856
857 window_index = thread->machine.nxt_window;
858 thread->machine.nxt_window++;
859
860 if (thread->machine.nxt_window >= NCOPY_WINDOWS)
861 thread->machine.nxt_window = 0;
862 thread->machine.copy_window[window_index].user_base = user_base;
863
864 /*
865 * it's necessary to disable pre-emption
866 * since I have to compute the kernel descriptor pointer
867 * for the new window
868 */
869 istate = ml_set_interrupts_enabled(FALSE);
870
871 updp = pmap_pde(pmap, user_base);
872
873 kpdp = current_cpu_datap()->cpu_copywindow_pdp;
874 kpdp += window_index;
875
876 pmap_store_pte(kpdp, updp ? *updp : 0);
877
878 (void) ml_set_interrupts_enabled(istate);
879
880 copyio_state = WINDOWS_DIRTY;
881
882 KERNEL_DEBUG(0xeff70040 | DBG_FUNC_NONE, window_index,
883 (unsigned)user_base, (unsigned)updp,
884 (unsigned)kpdp, 0);
885
886 }
887 #if JOE_DEBUG
888 else {
889 istate = ml_set_interrupts_enabled(FALSE);
890
891 updp = pmap_pde(pmap, user_base);
892
893 kpdp = current_cpu_datap()->cpu_copywindow_pdp;
894
895 kpdp += window_index;
896
897 if ((*kpdp & PG_FRAME) != (*updp & PG_FRAME)) {
898 panic("copyio: user pdp mismatch - kpdp = 0x%qx, updp = 0x%qx\n", *kpdp, *updp);
899 }
900 (void) ml_set_interrupts_enabled(istate);
901 }
902 #endif
903 if (copyio_state == WINDOWS_DIRTY) {
904 flush_tlb();
905
906 copyio_state = WINDOWS_CLEAN;
907
908 KERNEL_DEBUG(0xeff70054 | DBG_FUNC_NONE, window_index, 0, 0, 0, 0);
909 }
910 user_offset += (window_index * NBPDE);
911
912 KERNEL_DEBUG(0xeff70044 | DBG_FUNC_NONE, (unsigned)user_offset,
913 (unsigned)kernel_addr, cnt, 0, 0);
914
915 switch (copy_type) {
916
917 case COPYIN:
918 error = copyin_user(user_offset, kernel_addr, cnt);
919 break;
920
921 case COPYOUT:
922 error = copyout_user(kernel_addr, user_offset, cnt);
923 break;
924
925 case COPYINPHYS:
926 error = copyinphys_user(user_offset, kernel_addr, cnt);
927 break;
928
929 case COPYOUTPHYS:
930 error = copyoutphys_user(kernel_addr, user_offset, cnt);
931 break;
932
933 case COPYINSTR:
934 error = copyinstr_user(user_offset, kernel_addr, cnt, &bytes_copied);
935
936 /*
937 * lencopied should be updated on success
938 * or ENAMETOOLONG... but not EFAULT
939 */
940 if (error != EFAULT)
941 *lencopied += bytes_copied;
942
943 /*
944 * if we still have room, then the ENAMETOOLONG
945 * is just an artifact of the buffer straddling
946 * a window boundary and we should continue
947 */
948 if (error == ENAMETOOLONG && nbytes > cnt)
949 error = 0;
950
951 if (error) {
952 #if KDEBUG
953 nbytes = *lencopied;
954 #endif
955 break;
956 }
957 if (*(kernel_addr + bytes_copied - 1) == 0) {
958 /*
959 * we found a NULL terminator... we're done
960 */
961 #if KDEBUG
962 nbytes = *lencopied;
963 #endif
964 goto done;
965 }
966 if (cnt == nbytes) {
967 /*
968 * no more room in the buffer and we haven't
969 * yet come across a NULL terminator
970 */
971 #if KDEBUG
972 nbytes = *lencopied;
973 #endif
974 error = ENAMETOOLONG;
975 break;
976 }
977 assert(cnt == bytes_copied);
978
979 break;
980 }
981 if (error)
982 break;
983 if ((nbytes -= cnt) == 0)
984 break;
985
986 kernel_addr += cnt;
987 user_base += NBPDE;
988 user_offset = 0;
989
990 if (nbytes > NBPDE)
991 cnt = NBPDE;
992 else
993 cnt = nbytes;
994 }
995 done:
996 thread->machine.copyio_state = WINDOWS_CLOSED;
997
998 KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)user_addr,
999 (unsigned)kernel_addr, (unsigned)nbytes, error, 0);
1000
1001 #if CONFIG_DTRACE
1002 thread->machine.specFlags &= ~CopyIOActive;
1003 #endif /* CONFIG_DTRACE */
1004
1005 return (error);
1006 }
1007
1008
1009 static int
1010 copyio_phys(addr64_t source, addr64_t sink, vm_size_t csize, int which)
1011 {
1012 pmap_paddr_t paddr;
1013 user_addr_t vaddr;
1014 char *window_offset;
1015 pt_entry_t pentry;
1016 int ctype;
1017 int retval;
1018 boolean_t istate;
1019
1020 if (which & cppvPsnk) {
1021 paddr = (pmap_paddr_t)sink;
1022 vaddr = (user_addr_t)source;
1023 ctype = COPYINPHYS;
1024 pentry = (pt_entry_t)(INTEL_PTE_VALID | (paddr & PG_FRAME) | INTEL_PTE_RW);
1025 } else {
1026 paddr = (pmap_paddr_t)source;
1027 vaddr = (user_addr_t)sink;
1028 ctype = COPYOUTPHYS;
1029 pentry = (pt_entry_t)(INTEL_PTE_VALID | (paddr & PG_FRAME));
1030 }
1031 window_offset = (char *)((uint32_t)paddr & (PAGE_SIZE - 1));
1032
1033 assert(!((current_thread()->machine.specFlags & CopyIOActive) && ((which & cppvKmap) == 0)));
1034
1035 if (current_thread()->machine.physwindow_busy) {
1036 pt_entry_t old_pentry;
1037
1038 KERNEL_DEBUG(0xeff70048 | DBG_FUNC_NONE, paddr, csize, 0, -1, 0);
1039 /*
1040 * we had better be targeting wired memory at this point
1041 * we will not be able to handle a fault with interrupts
1042 * disabled... we disable them because we can't tolerate
1043 * being preempted during this nested use of the window
1044 */
1045 istate = ml_set_interrupts_enabled(FALSE);
1046
1047 old_pentry = *(current_cpu_datap()->cpu_physwindow_ptep);
1048 pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), pentry);
1049
1050 invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base);
1051
1052 retval = copyio(ctype, vaddr, window_offset, csize, NULL, which & cppvKmap);
1053
1054 pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), old_pentry);
1055
1056 invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base);
1057
1058 (void) ml_set_interrupts_enabled(istate);
1059 } else {
1060 /*
1061 * mark the window as in use... if an interrupt hits while we're
1062 * busy, or we trigger another coyppv from the fault path into
1063 * the driver on a user address space page fault due to a copyin/out
1064 * then we need to save and restore the current window state instead
1065 * of caching the window preserving it across context switches
1066 */
1067 current_thread()->machine.physwindow_busy = 1;
1068
1069 if (current_thread()->machine.physwindow_pte != pentry) {
1070 KERNEL_DEBUG(0xeff70048 | DBG_FUNC_NONE, paddr, csize, 0, 0, 0);
1071
1072 current_thread()->machine.physwindow_pte = pentry;
1073
1074 /*
1075 * preemption at this point would be bad since we
1076 * could end up on the other processor after we grabbed the
1077 * pointer to the current cpu data area, but before we finished
1078 * using it to stuff the page table entry since we would
1079 * be modifying a window that no longer belonged to us
1080 * the invlpg can be done unprotected since it only flushes
1081 * this page address from the tlb... if it flushes the wrong
1082 * one, no harm is done, and the context switch that moved us
1083 * to the other processor will have already take care of
1084 * flushing the tlb after it reloaded the page table from machine.physwindow_pte
1085 */
1086 istate = ml_set_interrupts_enabled(FALSE);
1087
1088 pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), pentry);
1089 (void) ml_set_interrupts_enabled(istate);
1090
1091 invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base);
1092 }
1093 #if JOE_DEBUG
1094 else {
1095 if (pentry !=
1096 (*(current_cpu_datap()->cpu_physwindow_ptep) & (INTEL_PTE_VALID | PG_FRAME | INTEL_PTE_RW)))
1097 panic("copyio_phys: pentry != *physwindow_ptep");
1098 }
1099 #endif
1100 retval = copyio(ctype, vaddr, window_offset, csize, NULL, which & cppvKmap);
1101
1102 current_thread()->machine.physwindow_busy = 0;
1103 }
1104 return (retval);
1105 }
1106
1107 int
1108 copyinmsg(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes)
1109 {
1110 return (copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0));
1111 }
1112
1113 int
1114 copyin(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes)
1115 {
1116 return (copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0));
1117 }
1118
1119 int
1120 copyinstr(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes, vm_size_t *lencopied)
1121 {
1122 *lencopied = 0;
1123
1124 return (copyio(COPYINSTR, user_addr, kernel_addr, nbytes, lencopied, 0));
1125 }
1126
1127 int
1128 copyoutmsg(const char *kernel_addr, user_addr_t user_addr, vm_size_t nbytes)
1129 {
1130 return (copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0));
1131 }
1132
1133 int
1134 copyout(const void *kernel_addr, user_addr_t user_addr, vm_size_t nbytes)
1135 {
1136 return (copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0));
1137 }
1138
1139
1140 kern_return_t
1141 copypv(addr64_t src64, addr64_t snk64, unsigned int size, int which)
1142 {
1143 unsigned int lop, csize;
1144 int bothphys = 0;
1145
1146 KERNEL_DEBUG(0xeff7004c | DBG_FUNC_START, (unsigned)src64,
1147 (unsigned)snk64, size, which, 0);
1148
1149 if ((which & (cppvPsrc | cppvPsnk)) == 0 ) /* Make sure that only one is virtual */
1150 panic("copypv: no more than 1 parameter may be virtual\n"); /* Not allowed */
1151
1152 if ((which & (cppvPsrc | cppvPsnk)) == (cppvPsrc | cppvPsnk))
1153 bothphys = 1; /* both are physical */
1154
1155 while (size) {
1156
1157 if (bothphys) {
1158 lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1))); /* Assume sink smallest */
1159
1160 if (lop > (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1))))
1161 lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1))); /* No, source is smaller */
1162 } else {
1163 /*
1164 * only need to compute the resid for the physical page
1165 * address... we don't care about where we start/finish in
1166 * the virtual since we just call the normal copyin/copyout
1167 */
1168 if (which & cppvPsrc)
1169 lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1)));
1170 else
1171 lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1)));
1172 }
1173 csize = size; /* Assume we can copy it all */
1174 if (lop < size)
1175 csize = lop; /* Nope, we can't do it all */
1176 #if 0
1177 /*
1178 * flush_dcache64 is currently a nop on the i386...
1179 * it's used when copying to non-system memory such
1180 * as video capture cards... on PPC there was a need
1181 * to flush due to how we mapped this memory... not
1182 * sure if it's needed on i386.
1183 */
1184 if (which & cppvFsrc)
1185 flush_dcache64(src64, csize, 1); /* If requested, flush source before move */
1186 if (which & cppvFsnk)
1187 flush_dcache64(snk64, csize, 1); /* If requested, flush sink before move */
1188 #endif
1189 if (bothphys)
1190 bcopy_phys(src64, snk64, csize); /* Do a physical copy, virtually */
1191 else {
1192 if (copyio_phys(src64, snk64, csize, which))
1193 return (KERN_FAILURE);
1194 }
1195 #if 0
1196 if (which & cppvFsrc)
1197 flush_dcache64(src64, csize, 1); /* If requested, flush source after move */
1198 if (which & cppvFsnk)
1199 flush_dcache64(snk64, csize, 1); /* If requested, flush sink after move */
1200 #endif
1201 size -= csize; /* Calculate what is left */
1202 snk64 += csize; /* Bump sink to next physical address */
1203 src64 += csize; /* Bump source to next physical address */
1204 }
1205 KERNEL_DEBUG(0xeff7004c | DBG_FUNC_END, (unsigned)src64,
1206 (unsigned)snk64, size, which, 0);
1207
1208 return KERN_SUCCESS;
1209 }
1210
1211 #if !MACH_KDP
1212 void
1213 kdp_register_callout(void)
1214 {
1215 }
1216 #endif
1217
1218 #if !CONFIG_VMX
1219 int host_vmxon(boolean_t exclusive __unused)
1220 {
1221 return VMX_UNSUPPORTED;
1222 }
1223
1224 void host_vmxoff(void)
1225 {
1226 return;
1227 }
1228 #endif