]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_mman.c
1dd5bd3f07d29ef3688f810d2bd8d117a38f8df8
[apple/xnu.git] / bsd / kern / kern_mman.c
1 /*
2 * Copyright (c) 2007 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988 University of Utah.
30 * Copyright (c) 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * the Systems Programming Group of the University of Utah Computer
35 * Science Department.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66 *
67 * @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 /*
77 * Mapped file (mmap) interface to VM
78 */
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/filedesc.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/resourcevar.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/acct.h>
88 #include <sys/wait.h>
89 #include <sys/file_internal.h>
90 #include <sys/vadvise.h>
91 #include <sys/trace.h>
92 #include <sys/mman.h>
93 #include <sys/conf.h>
94 #include <sys/stat.h>
95 #include <sys/ubc.h>
96 #include <sys/ubc_internal.h>
97 #include <sys/sysproto.h>
98 #if CONFIG_PROTECT
99 #include <sys/cprotect.h>
100 #endif
101
102 #include <sys/syscall.h>
103 #include <sys/kdebug.h>
104 #include <sys/bsdtask_info.h>
105
106 #include <security/audit/audit.h>
107 #include <bsm/audit_kevents.h>
108
109 #include <mach/mach_types.h>
110 #include <mach/mach_traps.h>
111 #include <mach/vm_sync.h>
112 #include <mach/vm_behavior.h>
113 #include <mach/vm_inherit.h>
114 #include <mach/vm_statistics.h>
115 #include <mach/mach_vm.h>
116 #include <mach/vm_map.h>
117 #include <mach/host_priv.h>
118 #include <mach/sdt.h>
119
120 #include <machine/machine_routines.h>
121
122 #include <kern/cpu_number.h>
123 #include <kern/host.h>
124 #include <kern/task.h>
125 #include <kern/page_decrypt.h>
126
127 #include <IOKit/IOReturn.h>
128
129 #include <vm/vm_map.h>
130 #include <vm/vm_kern.h>
131 #include <vm/vm_pager.h>
132 #include <vm/vm_protos.h>
133
134 /*
135 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
136 * XXX usage is PROT_* from an interface perspective. Thus the values of
137 * XXX VM_PROT_* and PROT_* need to correspond.
138 */
139 int
140 mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
141 {
142 /*
143 * Map in special device (must be SHARED) or file
144 */
145 struct fileproc *fp;
146 register struct vnode *vp;
147 int flags;
148 int prot;
149 int err=0;
150 vm_map_t user_map;
151 kern_return_t result;
152 vm_map_offset_t user_addr;
153 vm_map_size_t user_size;
154 vm_object_offset_t pageoff;
155 vm_object_offset_t file_pos;
156 int alloc_flags=0;
157 boolean_t docow;
158 vm_prot_t maxprot;
159 void *handle;
160 memory_object_t pager = MEMORY_OBJECT_NULL;
161 memory_object_control_t control;
162 int mapanon=0;
163 int fpref=0;
164 int error =0;
165 int fd = uap->fd;
166 int num_retries = 0;
167
168 /*
169 * Note that for UNIX03 conformance, there is additional parameter checking for
170 * mmap() system call in libsyscall prior to entering the kernel. The sanity
171 * checks and argument validation done in this function are not the only places
172 * one can get returned errnos.
173 */
174
175 user_map = current_map();
176 user_addr = (vm_map_offset_t)uap->addr;
177 user_size = (vm_map_size_t) uap->len;
178
179 AUDIT_ARG(addr, user_addr);
180 AUDIT_ARG(len, user_size);
181 AUDIT_ARG(fd, uap->fd);
182
183 prot = (uap->prot & VM_PROT_ALL);
184 #if 3777787
185 /*
186 * Since the hardware currently does not support writing without
187 * read-before-write, or execution-without-read, if the request is
188 * for write or execute access, we must imply read access as well;
189 * otherwise programs expecting this to work will fail to operate.
190 */
191 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
192 prot |= VM_PROT_READ;
193 #endif /* radar 3777787 */
194
195 flags = uap->flags;
196 vp = NULLVP;
197
198 /*
199 * The vm code does not have prototypes & compiler doesn't do the'
200 * the right thing when you cast 64bit value and pass it in function
201 * call. So here it is.
202 */
203 file_pos = (vm_object_offset_t)uap->pos;
204
205
206 /* make sure mapping fits into numeric range etc */
207 if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64)
208 return (EINVAL);
209
210 /*
211 * Align the file position to a page boundary,
212 * and save its page offset component.
213 */
214 pageoff = (file_pos & vm_map_page_mask(user_map));
215 file_pos -= (vm_object_offset_t)pageoff;
216
217
218 /* Adjust size for rounding (on both ends). */
219 user_size += pageoff; /* low end... */
220 user_size = vm_map_round_page(user_size,
221 vm_map_page_mask(user_map)); /* hi end */
222
223 if (flags & MAP_JIT) {
224 if ((flags & MAP_FIXED) ||
225 (flags & MAP_SHARED) ||
226 !(flags & MAP_ANON) ||
227 (flags & MAP_RESILIENT_CODESIGN)) {
228 return EINVAL;
229 }
230 }
231
232 if ((flags & MAP_RESILIENT_CODESIGN) ||
233 (flags & MAP_RESILIENT_MEDIA)) {
234 assert(!(flags & MAP_JIT));
235 if (flags & MAP_ANON) {
236 return EINVAL;
237 }
238 if (prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) {
239 return EPERM;
240 }
241 }
242
243 /*
244 * Check for illegal addresses. Watch out for address wrap... Note
245 * that VM_*_ADDRESS are not constants due to casts (argh).
246 */
247 if (flags & MAP_FIXED) {
248 /*
249 * The specified address must have the same remainder
250 * as the file offset taken modulo PAGE_SIZE, so it
251 * should be aligned after adjustment by pageoff.
252 */
253 user_addr -= pageoff;
254 if (user_addr & vm_map_page_mask(user_map))
255 return (EINVAL);
256 }
257 #ifdef notyet
258 /* DO not have apis to get this info, need to wait till then*/
259 /*
260 * XXX for non-fixed mappings where no hint is provided or
261 * the hint would fall in the potential heap space,
262 * place it after the end of the largest possible heap.
263 *
264 * There should really be a pmap call to determine a reasonable
265 * location.
266 */
267 else if (addr < vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
268 vm_map_page_mask(user_map)))
269 addr = vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
270 vm_map_page_mask(user_map));
271
272 #endif
273
274 alloc_flags = 0;
275
276 if (flags & MAP_ANON) {
277
278 maxprot = VM_PROT_ALL;
279 #if CONFIG_MACF
280 /*
281 * Entitlement check.
282 */
283 error = mac_proc_check_map_anon(p, user_addr, user_size, prot, flags, &maxprot);
284 if (error) {
285 return EINVAL;
286 }
287 #endif /* MAC */
288
289 /*
290 * Mapping blank space is trivial. Use positive fds as the alias
291 * value for memory tracking.
292 */
293 if (fd != -1) {
294 /*
295 * Use "fd" to pass (some) Mach VM allocation flags,
296 * (see the VM_FLAGS_* definitions).
297 */
298 alloc_flags = fd & (VM_FLAGS_ALIAS_MASK | VM_FLAGS_SUPERPAGE_MASK |
299 VM_FLAGS_PURGABLE);
300 if (alloc_flags != fd) {
301 /* reject if there are any extra flags */
302 return EINVAL;
303 }
304 }
305
306 handle = NULL;
307 file_pos = 0;
308 mapanon = 1;
309 } else {
310 struct vnode_attr va;
311 vfs_context_t ctx = vfs_context_current();
312
313 if (flags & MAP_JIT)
314 return EINVAL;
315
316 /*
317 * Mapping file, get fp for validation. Obtain vnode and make
318 * sure it is of appropriate type.
319 */
320 err = fp_lookup(p, fd, &fp, 0);
321 if (err)
322 return(err);
323 fpref = 1;
324 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
325 case DTYPE_PSXSHM:
326 uap->addr = (user_addr_t)user_addr;
327 uap->len = (user_size_t)user_size;
328 uap->prot = prot;
329 uap->flags = flags;
330 uap->pos = file_pos;
331 error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
332 goto bad;
333 case DTYPE_VNODE:
334 break;
335 default:
336 error = EINVAL;
337 goto bad;
338 }
339 vp = (struct vnode *)fp->f_fglob->fg_data;
340 error = vnode_getwithref(vp);
341 if(error != 0)
342 goto bad;
343
344 if (vp->v_type != VREG && vp->v_type != VCHR) {
345 (void)vnode_put(vp);
346 error = EINVAL;
347 goto bad;
348 }
349
350 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
351
352 /*
353 * POSIX: mmap needs to update access time for mapped files
354 */
355 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
356 VATTR_INIT(&va);
357 nanotime(&va.va_access_time);
358 VATTR_SET_ACTIVE(&va, va_access_time);
359 vnode_setattr(vp, &va, ctx);
360 }
361
362 /*
363 * XXX hack to handle use of /dev/zero to map anon memory (ala
364 * SunOS).
365 */
366 if (vp->v_type == VCHR || vp->v_type == VSTR) {
367 (void)vnode_put(vp);
368 error = ENODEV;
369 goto bad;
370 } else {
371 /*
372 * Ensure that file and memory protections are
373 * compatible. Note that we only worry about
374 * writability if mapping is shared; in this case,
375 * current and max prot are dictated by the open file.
376 * XXX use the vnode instead? Problem is: what
377 * credentials do we use for determination? What if
378 * proc does a setuid?
379 */
380 maxprot = VM_PROT_EXECUTE; /* ??? */
381 if (fp->f_fglob->fg_flag & FREAD)
382 maxprot |= VM_PROT_READ;
383 else if (prot & PROT_READ) {
384 (void)vnode_put(vp);
385 error = EACCES;
386 goto bad;
387 }
388 /*
389 * If we are sharing potential changes (either via
390 * MAP_SHARED or via the implicit sharing of character
391 * device mappings), and we are trying to get write
392 * permission although we opened it without asking
393 * for it, bail out.
394 */
395
396 if ((flags & MAP_SHARED) != 0) {
397 if ((fp->f_fglob->fg_flag & FWRITE) != 0 &&
398 /*
399 * Do not allow writable mappings of
400 * swap files (see vm_swapfile_pager.c).
401 */
402 !vnode_isswap(vp)) {
403 /*
404 * check for write access
405 *
406 * Note that we already made this check when granting FWRITE
407 * against the file, so it seems redundant here.
408 */
409 error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
410
411 /* if not granted for any reason, but we wanted it, bad */
412 if ((prot & PROT_WRITE) && (error != 0)) {
413 vnode_put(vp);
414 goto bad;
415 }
416
417 /* if writable, remember */
418 if (error == 0)
419 maxprot |= VM_PROT_WRITE;
420
421 } else if ((prot & PROT_WRITE) != 0) {
422 (void)vnode_put(vp);
423 error = EACCES;
424 goto bad;
425 }
426 } else
427 maxprot |= VM_PROT_WRITE;
428
429 handle = (void *)vp;
430 #if CONFIG_MACF
431 error = mac_file_check_mmap(vfs_context_ucred(ctx),
432 fp->f_fglob, prot, flags, file_pos, &maxprot);
433 if (error) {
434 (void)vnode_put(vp);
435 goto bad;
436 }
437 #endif /* MAC */
438
439 #if CONFIG_PROTECT
440 {
441 error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0);
442 if (error) {
443 (void) vnode_put(vp);
444 goto bad;
445 }
446 }
447 #endif /* CONFIG_PROTECT */
448 }
449 }
450
451 if (user_size == 0) {
452 if (!mapanon)
453 (void)vnode_put(vp);
454 error = 0;
455 goto bad;
456 }
457
458 /*
459 * We bend a little - round the start and end addresses
460 * to the nearest page boundary.
461 */
462 user_size = vm_map_round_page(user_size,
463 vm_map_page_mask(user_map));
464
465 if (file_pos & vm_map_page_mask(user_map)) {
466 if (!mapanon)
467 (void)vnode_put(vp);
468 error = EINVAL;
469 goto bad;
470 }
471
472 if ((flags & MAP_FIXED) == 0) {
473 alloc_flags |= VM_FLAGS_ANYWHERE;
474 user_addr = vm_map_round_page(user_addr,
475 vm_map_page_mask(user_map));
476 } else {
477 if (user_addr != vm_map_trunc_page(user_addr,
478 vm_map_page_mask(user_map))) {
479 if (!mapanon)
480 (void)vnode_put(vp);
481 error = EINVAL;
482 goto bad;
483 }
484 /*
485 * mmap(MAP_FIXED) will replace any existing mappings in the
486 * specified range, if the new mapping is successful.
487 * If we just deallocate the specified address range here,
488 * another thread might jump in and allocate memory in that
489 * range before we get a chance to establish the new mapping,
490 * and we won't have a chance to restore the old mappings.
491 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
492 * has to deallocate the existing mappings and establish the
493 * new ones atomically.
494 */
495 alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
496 }
497
498 if (flags & MAP_NOCACHE)
499 alloc_flags |= VM_FLAGS_NO_CACHE;
500
501 if (flags & MAP_JIT) {
502 alloc_flags |= VM_FLAGS_MAP_JIT;
503 }
504
505 if (flags & MAP_RESILIENT_CODESIGN) {
506 alloc_flags |= VM_FLAGS_RESILIENT_CODESIGN;
507 }
508
509 /*
510 * Lookup/allocate object.
511 */
512 if (handle == NULL) {
513 control = NULL;
514 #ifdef notyet
515 /* Hmm .. */
516 #if defined(VM_PROT_READ_IS_EXEC)
517 if (prot & VM_PROT_READ)
518 prot |= VM_PROT_EXECUTE;
519 if (maxprot & VM_PROT_READ)
520 maxprot |= VM_PROT_EXECUTE;
521 #endif
522 #endif
523
524 #if 3777787
525 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
526 prot |= VM_PROT_READ;
527 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
528 maxprot |= VM_PROT_READ;
529 #endif /* radar 3777787 */
530 map_anon_retry:
531 result = vm_map_enter_mem_object(user_map,
532 &user_addr, user_size,
533 0, alloc_flags,
534 IPC_PORT_NULL, 0, FALSE,
535 prot, maxprot,
536 (flags & MAP_SHARED) ?
537 VM_INHERIT_SHARE :
538 VM_INHERIT_DEFAULT);
539
540 /* If a non-binding address was specified for this anonymous
541 * mapping, retry the mapping with a zero base
542 * in the event the mapping operation failed due to
543 * lack of space between the address and the map's maximum.
544 */
545 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
546 user_addr = vm_map_page_size(user_map);
547 goto map_anon_retry;
548 }
549 } else {
550 if (vnode_isswap(vp)) {
551 /*
552 * Map swap files with a special pager
553 * that returns obfuscated contents.
554 */
555 control = NULL;
556 pager = swapfile_pager_setup(vp);
557 if (pager != MEMORY_OBJECT_NULL) {
558 control = swapfile_pager_control(pager);
559 }
560 } else {
561 control = ubc_getobject(vp, UBC_FLAGS_NONE);
562 }
563
564 if (control == NULL) {
565 (void)vnode_put(vp);
566 error = ENOMEM;
567 goto bad;
568 }
569
570 /*
571 * Set credentials:
572 * FIXME: if we're writing the file we need a way to
573 * ensure that someone doesn't replace our R/W creds
574 * with ones that only work for read.
575 */
576
577 ubc_setthreadcred(vp, p, current_thread());
578 docow = FALSE;
579 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
580 docow = TRUE;
581 }
582
583 #ifdef notyet
584 /* Hmm .. */
585 #if defined(VM_PROT_READ_IS_EXEC)
586 if (prot & VM_PROT_READ)
587 prot |= VM_PROT_EXECUTE;
588 if (maxprot & VM_PROT_READ)
589 maxprot |= VM_PROT_EXECUTE;
590 #endif
591 #endif /* notyet */
592
593 #if 3777787
594 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
595 prot |= VM_PROT_READ;
596 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
597 maxprot |= VM_PROT_READ;
598 #endif /* radar 3777787 */
599
600 map_file_retry:
601 if ((flags & MAP_RESILIENT_CODESIGN) ||
602 (flags & MAP_RESILIENT_MEDIA)) {
603 if (prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) {
604 assert(!mapanon);
605 vnode_put(vp);
606 error = EPERM;
607 goto bad;
608 }
609 /* strictly limit access to "prot" */
610 maxprot &= prot;
611 }
612 result = vm_map_enter_mem_object_control(user_map,
613 &user_addr, user_size,
614 0, alloc_flags,
615 control, file_pos,
616 docow, prot, maxprot,
617 (flags & MAP_SHARED) ?
618 VM_INHERIT_SHARE :
619 VM_INHERIT_DEFAULT);
620
621 /* If a non-binding address was specified for this file backed
622 * mapping, retry the mapping with a zero base
623 * in the event the mapping operation failed due to
624 * lack of space between the address and the map's maximum.
625 */
626 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
627 user_addr = vm_map_page_size(user_map);
628 goto map_file_retry;
629 }
630 }
631
632 if (!mapanon) {
633 (void)vnode_put(vp);
634 }
635
636 switch (result) {
637 case KERN_SUCCESS:
638 *retval = user_addr + pageoff;
639 error = 0;
640 break;
641 case KERN_INVALID_ADDRESS:
642 case KERN_NO_SPACE:
643 error = ENOMEM;
644 break;
645 case KERN_PROTECTION_FAILURE:
646 error = EACCES;
647 break;
648 default:
649 error = EINVAL;
650 break;
651 }
652 bad:
653 if (pager != MEMORY_OBJECT_NULL) {
654 /*
655 * Release the reference on the pager.
656 * If the mapping was successful, it now holds
657 * an extra reference.
658 */
659 memory_object_deallocate(pager);
660 }
661 if (fpref)
662 fp_drop(p, fd, fp, 0);
663
664 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
665 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
666 (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
667 return(error);
668 }
669
670 int
671 msync(__unused proc_t p, struct msync_args *uap, int32_t *retval)
672 {
673 __pthread_testcancel(1);
674 return(msync_nocancel(p, (struct msync_nocancel_args *)uap, retval));
675 }
676
677 int
678 msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval)
679 {
680 mach_vm_offset_t addr;
681 mach_vm_size_t size;
682 int flags;
683 vm_map_t user_map;
684 int rv;
685 vm_sync_t sync_flags=0;
686
687 user_map = current_map();
688 addr = (mach_vm_offset_t) uap->addr;
689 size = (mach_vm_size_t)uap->len;
690 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
691 if (addr & vm_map_page_mask(user_map)) {
692 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
693 return EINVAL;
694 }
695 if (size == 0) {
696 /*
697 * We cannot support this properly without maintaining
698 * list all mmaps done. Cannot use vm_map_entry as they could be
699 * split or coalesced by indepenedant actions. So instead of
700 * inaccurate results, lets just return error as invalid size
701 * specified
702 */
703 return (EINVAL); /* XXX breaks posix apps */
704 }
705
706 flags = uap->flags;
707 /* disallow contradictory flags */
708 if ((flags & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))
709 return (EINVAL);
710
711 if (flags & MS_KILLPAGES)
712 sync_flags |= VM_SYNC_KILLPAGES;
713 if (flags & MS_DEACTIVATE)
714 sync_flags |= VM_SYNC_DEACTIVATE;
715 if (flags & MS_INVALIDATE)
716 sync_flags |= VM_SYNC_INVALIDATE;
717
718 if ( !(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
719 if (flags & MS_ASYNC)
720 sync_flags |= VM_SYNC_ASYNCHRONOUS;
721 else
722 sync_flags |= VM_SYNC_SYNCHRONOUS;
723 }
724
725 sync_flags |= VM_SYNC_CONTIGUOUS; /* complain if holes */
726
727 rv = mach_vm_msync(user_map, addr, size, sync_flags);
728
729 switch (rv) {
730 case KERN_SUCCESS:
731 break;
732 case KERN_INVALID_ADDRESS: /* hole in region being sync'ed */
733 return (ENOMEM);
734 case KERN_FAILURE:
735 return (EIO);
736 default:
737 return (EINVAL);
738 }
739 return (0);
740 }
741
742
743 int
744 munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval)
745 {
746 mach_vm_offset_t user_addr;
747 mach_vm_size_t user_size;
748 kern_return_t result;
749 vm_map_t user_map;
750
751 user_map = current_map();
752 user_addr = (mach_vm_offset_t) uap->addr;
753 user_size = (mach_vm_size_t) uap->len;
754
755 AUDIT_ARG(addr, user_addr);
756 AUDIT_ARG(len, user_size);
757
758 if (user_addr & vm_map_page_mask(user_map)) {
759 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
760 return EINVAL;
761 }
762
763 if (user_addr + user_size < user_addr)
764 return(EINVAL);
765
766 if (user_size == 0) {
767 /* UNIX SPEC: size is 0, return EINVAL */
768 return EINVAL;
769 }
770
771 result = mach_vm_deallocate(user_map, user_addr, user_size);
772 if (result != KERN_SUCCESS) {
773 return(EINVAL);
774 }
775 return(0);
776 }
777
778 int
779 mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
780 {
781 register vm_prot_t prot;
782 mach_vm_offset_t user_addr;
783 mach_vm_size_t user_size;
784 kern_return_t result;
785 vm_map_t user_map;
786 #if CONFIG_MACF
787 int error;
788 #endif
789
790 AUDIT_ARG(addr, uap->addr);
791 AUDIT_ARG(len, uap->len);
792 AUDIT_ARG(value32, uap->prot);
793
794 user_map = current_map();
795 user_addr = (mach_vm_offset_t) uap->addr;
796 user_size = (mach_vm_size_t) uap->len;
797 prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED));
798
799 if (user_addr & vm_map_page_mask(user_map)) {
800 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
801 return EINVAL;
802 }
803
804 #ifdef notyet
805 /* Hmm .. */
806 #if defined(VM_PROT_READ_IS_EXEC)
807 if (prot & VM_PROT_READ)
808 prot |= VM_PROT_EXECUTE;
809 #endif
810 #endif /* notyet */
811
812 #if 3936456
813 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
814 prot |= VM_PROT_READ;
815 #endif /* 3936456 */
816
817 #if CONFIG_MACF
818 /*
819 * The MAC check for mprotect is of limited use for 2 reasons:
820 * Without mmap revocation, the caller could have asked for the max
821 * protections initially instead of a reduced set, so a mprotect
822 * check would offer no new security.
823 * It is not possible to extract the vnode from the pager object(s)
824 * of the target memory range.
825 * However, the MAC check may be used to prevent a process from,
826 * e.g., making the stack executable.
827 */
828 error = mac_proc_check_mprotect(p, user_addr,
829 user_size, prot);
830 if (error)
831 return (error);
832 #endif
833
834 if(prot & VM_PROT_TRUSTED) {
835 #if CONFIG_DYNAMIC_CODE_SIGNING
836 /* CODE SIGNING ENFORCEMENT - JIT support */
837 /* The special protection value VM_PROT_TRUSTED requests that we treat
838 * this page as if it had a valid code signature.
839 * If this is enabled, there MUST be a MAC policy implementing the
840 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
841 * compromised because the check would always succeed and thusly any
842 * process could sign dynamically. */
843 result = vm_map_sign(
844 user_map,
845 vm_map_trunc_page(user_addr,
846 vm_map_page_mask(user_map)),
847 vm_map_round_page(user_addr+user_size,
848 vm_map_page_mask(user_map)));
849 switch (result) {
850 case KERN_SUCCESS:
851 break;
852 case KERN_INVALID_ADDRESS:
853 /* UNIX SPEC: for an invalid address range, return ENOMEM */
854 return ENOMEM;
855 default:
856 return EINVAL;
857 }
858 #else
859 return ENOTSUP;
860 #endif
861 }
862 prot &= ~VM_PROT_TRUSTED;
863
864 result = mach_vm_protect(user_map, user_addr, user_size,
865 FALSE, prot);
866 switch (result) {
867 case KERN_SUCCESS:
868 return (0);
869 case KERN_PROTECTION_FAILURE:
870 return (EACCES);
871 case KERN_INVALID_ADDRESS:
872 /* UNIX SPEC: for an invalid address range, return ENOMEM */
873 return ENOMEM;
874 }
875 return (EINVAL);
876 }
877
878
879 int
880 minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval)
881 {
882 mach_vm_offset_t addr;
883 mach_vm_size_t size;
884 register vm_inherit_t inherit;
885 vm_map_t user_map;
886 kern_return_t result;
887
888 AUDIT_ARG(addr, uap->addr);
889 AUDIT_ARG(len, uap->len);
890 AUDIT_ARG(value32, uap->inherit);
891
892 addr = (mach_vm_offset_t)uap->addr;
893 size = (mach_vm_size_t)uap->len;
894 inherit = uap->inherit;
895
896 user_map = current_map();
897 result = mach_vm_inherit(user_map, addr, size,
898 inherit);
899 switch (result) {
900 case KERN_SUCCESS:
901 return (0);
902 case KERN_PROTECTION_FAILURE:
903 return (EACCES);
904 }
905 return (EINVAL);
906 }
907
908 int
909 madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
910 {
911 vm_map_t user_map;
912 mach_vm_offset_t start;
913 mach_vm_size_t size;
914 vm_behavior_t new_behavior;
915 kern_return_t result;
916
917 /*
918 * Since this routine is only advisory, we default to conservative
919 * behavior.
920 */
921 switch (uap->behav) {
922 case MADV_RANDOM:
923 new_behavior = VM_BEHAVIOR_RANDOM;
924 break;
925 case MADV_SEQUENTIAL:
926 new_behavior = VM_BEHAVIOR_SEQUENTIAL;
927 break;
928 case MADV_NORMAL:
929 new_behavior = VM_BEHAVIOR_DEFAULT;
930 break;
931 case MADV_WILLNEED:
932 new_behavior = VM_BEHAVIOR_WILLNEED;
933 break;
934 case MADV_DONTNEED:
935 new_behavior = VM_BEHAVIOR_DONTNEED;
936 break;
937 case MADV_FREE:
938 new_behavior = VM_BEHAVIOR_FREE;
939 break;
940 case MADV_ZERO_WIRED_PAGES:
941 new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES;
942 break;
943 case MADV_FREE_REUSABLE:
944 new_behavior = VM_BEHAVIOR_REUSABLE;
945 break;
946 case MADV_FREE_REUSE:
947 new_behavior = VM_BEHAVIOR_REUSE;
948 break;
949 case MADV_CAN_REUSE:
950 new_behavior = VM_BEHAVIOR_CAN_REUSE;
951 break;
952 case MADV_PAGEOUT:
953 #if MACH_ASSERT
954 new_behavior = VM_BEHAVIOR_PAGEOUT;
955 break;
956 #else /* MACH_ASSERT */
957 return ENOTSUP;
958 #endif /* MACH_ASSERT */
959 default:
960 return(EINVAL);
961 }
962
963 start = (mach_vm_offset_t) uap->addr;
964 size = (mach_vm_size_t) uap->len;
965
966
967 user_map = current_map();
968
969 result = mach_vm_behavior_set(user_map, start, size, new_behavior);
970 switch (result) {
971 case KERN_SUCCESS:
972 return 0;
973 case KERN_INVALID_ADDRESS:
974 return EINVAL;
975 case KERN_NO_SPACE:
976 return ENOMEM;
977 }
978
979 return EINVAL;
980 }
981
982 int
983 mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval)
984 {
985 mach_vm_offset_t addr, first_addr, end;
986 vm_map_t map;
987 user_addr_t vec;
988 int error;
989 int vecindex, lastvecindex;
990 int mincoreinfo=0;
991 int pqueryinfo;
992 kern_return_t ret;
993 int numref;
994
995 char c;
996
997 map = current_map();
998
999 /*
1000 * Make sure that the addresses presented are valid for user
1001 * mode.
1002 */
1003 first_addr = addr = vm_map_trunc_page(uap->addr,
1004 vm_map_page_mask(map));
1005 end = addr + vm_map_round_page(uap->len,
1006 vm_map_page_mask(map));
1007
1008 if (end < addr)
1009 return (EINVAL);
1010
1011 /*
1012 * Address of byte vector
1013 */
1014 vec = uap->vec;
1015
1016 map = current_map();
1017
1018 /*
1019 * Do this on a map entry basis so that if the pages are not
1020 * in the current processes address space, we can easily look
1021 * up the pages elsewhere.
1022 */
1023 lastvecindex = -1;
1024 for( ; addr < end; addr += PAGE_SIZE ) {
1025 pqueryinfo = 0;
1026 ret = mach_vm_page_query(map, addr, &pqueryinfo, &numref);
1027 if (ret != KERN_SUCCESS)
1028 pqueryinfo = 0;
1029 mincoreinfo = 0;
1030 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT)
1031 mincoreinfo |= MINCORE_INCORE;
1032 if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF)
1033 mincoreinfo |= MINCORE_REFERENCED;
1034 if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY)
1035 mincoreinfo |= MINCORE_MODIFIED;
1036
1037
1038 /*
1039 * calculate index into user supplied byte vector
1040 */
1041 vecindex = (addr - first_addr)>> PAGE_SHIFT;
1042
1043 /*
1044 * If we have skipped map entries, we need to make sure that
1045 * the byte vector is zeroed for those skipped entries.
1046 */
1047 while((lastvecindex + 1) < vecindex) {
1048 c = 0;
1049 error = copyout(&c, vec + lastvecindex, 1);
1050 if (error) {
1051 return (EFAULT);
1052 }
1053 ++lastvecindex;
1054 }
1055
1056 /*
1057 * Pass the page information to the user
1058 */
1059 c = (char)mincoreinfo;
1060 error = copyout(&c, vec + vecindex, 1);
1061 if (error) {
1062 return (EFAULT);
1063 }
1064 lastvecindex = vecindex;
1065 }
1066
1067
1068 /*
1069 * Zero the last entries in the byte vector.
1070 */
1071 vecindex = (end - first_addr) >> PAGE_SHIFT;
1072 while((lastvecindex + 1) < vecindex) {
1073 c = 0;
1074 error = copyout(&c, vec + lastvecindex, 1);
1075 if (error) {
1076 return (EFAULT);
1077 }
1078 ++lastvecindex;
1079 }
1080
1081 return (0);
1082 }
1083
1084 int
1085 mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval)
1086 {
1087 vm_map_t user_map;
1088 vm_map_offset_t addr;
1089 vm_map_size_t size, pageoff;
1090 kern_return_t result;
1091
1092 AUDIT_ARG(addr, uap->addr);
1093 AUDIT_ARG(len, uap->len);
1094
1095 addr = (vm_map_offset_t) uap->addr;
1096 size = (vm_map_size_t)uap->len;
1097
1098 /* disable wrap around */
1099 if (addr + size < addr)
1100 return (EINVAL);
1101
1102 if (size == 0)
1103 return (0);
1104
1105 user_map = current_map();
1106 pageoff = (addr & vm_map_page_mask(user_map));
1107 addr -= pageoff;
1108 size = vm_map_round_page(size+pageoff, vm_map_page_mask(user_map));
1109
1110 /* have to call vm_map_wire directly to pass "I don't know" protections */
1111 result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK), TRUE);
1112
1113 if (result == KERN_RESOURCE_SHORTAGE)
1114 return EAGAIN;
1115 else if (result != KERN_SUCCESS)
1116 return ENOMEM;
1117
1118 return 0; /* KERN_SUCCESS */
1119 }
1120
1121 int
1122 munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval)
1123 {
1124 mach_vm_offset_t addr;
1125 mach_vm_size_t size;
1126 vm_map_t user_map;
1127 kern_return_t result;
1128
1129 AUDIT_ARG(addr, uap->addr);
1130 AUDIT_ARG(addr, uap->len);
1131
1132 addr = (mach_vm_offset_t) uap->addr;
1133 size = (mach_vm_size_t)uap->len;
1134 user_map = current_map();
1135
1136 /* JMM - need to remove all wirings by spec - this just removes one */
1137 result = mach_vm_wire(host_priv_self(), user_map, addr, size, VM_PROT_NONE);
1138 return (result == KERN_SUCCESS ? 0 : ENOMEM);
1139 }
1140
1141
1142 int
1143 mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval)
1144 {
1145 return (ENOSYS);
1146 }
1147
1148 int
1149 munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval)
1150 {
1151 return(ENOSYS);
1152 }
1153
1154 #if CONFIG_CODE_DECRYPTION
1155 int
1156 mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __unused int32_t *retval)
1157 {
1158 mach_vm_offset_t user_addr;
1159 mach_vm_size_t user_size;
1160 kern_return_t result;
1161 vm_map_t user_map;
1162 uint32_t cryptid;
1163 cpu_type_t cputype;
1164 cpu_subtype_t cpusubtype;
1165 pager_crypt_info_t crypt_info;
1166 const char * cryptname = 0;
1167 char *vpath;
1168 int len, ret;
1169 struct proc_regioninfo_internal pinfo;
1170 vnode_t vp;
1171 uintptr_t vnodeaddr;
1172 uint32_t vid;
1173
1174 AUDIT_ARG(addr, uap->addr);
1175 AUDIT_ARG(len, uap->len);
1176
1177 user_map = current_map();
1178 user_addr = (mach_vm_offset_t) uap->addr;
1179 user_size = (mach_vm_size_t) uap->len;
1180
1181 cryptid = uap->cryptid;
1182 cputype = uap->cputype;
1183 cpusubtype = uap->cpusubtype;
1184
1185 if (user_addr & vm_map_page_mask(user_map)) {
1186 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1187 return EINVAL;
1188 }
1189
1190 switch(cryptid) {
1191 case 0:
1192 /* not encrypted, just an empty load command */
1193 return 0;
1194 case 1:
1195 cryptname="com.apple.unfree";
1196 break;
1197 case 0x10:
1198 /* some random cryptid that you could manually put into
1199 * your binary if you want NULL */
1200 cryptname="com.apple.null";
1201 break;
1202 default:
1203 return EINVAL;
1204 }
1205
1206 if (NULL == text_crypter_create) return ENOTSUP;
1207
1208 ret = fill_procregioninfo_onlymappedvnodes( proc_task(p), user_addr, &pinfo, &vnodeaddr, &vid);
1209 if (ret == 0 || !vnodeaddr) {
1210 /* No really, this returns 0 if the memory address is not backed by a file */
1211 return (EINVAL);
1212 }
1213
1214 vp = (vnode_t)vnodeaddr;
1215 if ((vnode_getwithvid(vp, vid)) == 0) {
1216 MALLOC_ZONE(vpath, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
1217 if(vpath == NULL) {
1218 vnode_put(vp);
1219 return (ENOMEM);
1220 }
1221
1222 len = MAXPATHLEN;
1223 ret = vn_getpath(vp, vpath, &len);
1224 if(ret) {
1225 FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI);
1226 vnode_put(vp);
1227 return (ret);
1228 }
1229
1230 vnode_put(vp);
1231 } else {
1232 return (EINVAL);
1233 }
1234
1235 #if 0
1236 kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n",
1237 __FUNCTION__, vpath, cryptid, cputype, cpusubtype, (uint64_t)user_addr, (uint64_t)user_size);
1238 #endif
1239
1240 /* set up decrypter first */
1241 crypt_file_data_t crypt_data = {
1242 .filename = vpath,
1243 .cputype = cputype,
1244 .cpusubtype = cpusubtype };
1245 result = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data);
1246 #if DEVELOPMENT || DEBUG
1247 printf("APPLE_PROTECT: %d[%s] map %p [0x%llx:0x%llx] %s(%s) -> 0x%x\n",
1248 p->p_pid, p->p_comm,
1249 user_map, (uint64_t) user_addr, (uint64_t) (user_addr + user_size),
1250 __FUNCTION__, vpath, result);
1251 #endif /* DEVELOPMENT || DEBUG */
1252 FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI);
1253
1254 if(result) {
1255 printf("%s: unable to create decrypter %s, kr=%d\n",
1256 __FUNCTION__, cryptname, result);
1257 if (result == kIOReturnNotPrivileged) {
1258 /* text encryption returned decryption failure */
1259 return (EPERM);
1260 } else {
1261 return (ENOMEM);
1262 }
1263 }
1264
1265 /* now remap using the decrypter */
1266 vm_object_offset_t crypto_backing_offset;
1267 crypto_backing_offset = -1; /* i.e. use map entry's offset */
1268 result = vm_map_apple_protected(user_map,
1269 user_addr,
1270 user_addr+user_size,
1271 crypto_backing_offset,
1272 &crypt_info);
1273 if (result) {
1274 printf("%s: mapping failed with %d\n", __FUNCTION__, result);
1275 }
1276
1277 if (result) {
1278 return (EPERM);
1279 }
1280 return 0;
1281 }
1282 #endif /* CONFIG_CODE_DECRYPTION */