]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_mman.c
xnu-3789.51.2.tar.gz
[apple/xnu.git] / bsd / kern / kern_mman.c
1 /*
2 * Copyright (c) 2007 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988 University of Utah.
30 * Copyright (c) 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * the Systems Programming Group of the University of Utah Computer
35 * Science Department.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66 *
67 * @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 /*
77 * Mapped file (mmap) interface to VM
78 */
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/filedesc.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/resourcevar.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/acct.h>
88 #include <sys/wait.h>
89 #include <sys/file_internal.h>
90 #include <sys/vadvise.h>
91 #include <sys/trace.h>
92 #include <sys/mman.h>
93 #include <sys/conf.h>
94 #include <sys/stat.h>
95 #include <sys/ubc.h>
96 #include <sys/ubc_internal.h>
97 #include <sys/sysproto.h>
98
99 #include <sys/syscall.h>
100 #include <sys/kdebug.h>
101 #include <sys/bsdtask_info.h>
102
103 #include <security/audit/audit.h>
104 #include <bsm/audit_kevents.h>
105
106 #include <mach/mach_types.h>
107 #include <mach/mach_traps.h>
108 #include <mach/vm_sync.h>
109 #include <mach/vm_behavior.h>
110 #include <mach/vm_inherit.h>
111 #include <mach/vm_statistics.h>
112 #include <mach/mach_vm.h>
113 #include <mach/vm_map.h>
114 #include <mach/host_priv.h>
115 #include <mach/sdt.h>
116
117 #include <machine/machine_routines.h>
118
119 #include <kern/cpu_number.h>
120 #include <kern/host.h>
121 #include <kern/task.h>
122 #include <kern/page_decrypt.h>
123
124 #include <IOKit/IOReturn.h>
125
126 #include <vm/vm_map.h>
127 #include <vm/vm_kern.h>
128 #include <vm/vm_pager.h>
129 #include <vm/vm_protos.h>
130
131 /*
132 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
133 * XXX usage is PROT_* from an interface perspective. Thus the values of
134 * XXX VM_PROT_* and PROT_* need to correspond.
135 */
136 int
137 mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
138 {
139 /*
140 * Map in special device (must be SHARED) or file
141 */
142 struct fileproc *fp;
143 struct vnode *vp;
144 int flags;
145 int prot;
146 int err=0;
147 vm_map_t user_map;
148 kern_return_t result;
149 vm_map_offset_t user_addr;
150 vm_map_size_t user_size;
151 vm_object_offset_t pageoff;
152 vm_object_offset_t file_pos;
153 int alloc_flags=0;
154 boolean_t docow;
155 vm_prot_t maxprot;
156 void *handle;
157 memory_object_t pager = MEMORY_OBJECT_NULL;
158 memory_object_control_t control;
159 int mapanon=0;
160 int fpref=0;
161 int error =0;
162 int fd = uap->fd;
163 int num_retries = 0;
164
165 /*
166 * Note that for UNIX03 conformance, there is additional parameter checking for
167 * mmap() system call in libsyscall prior to entering the kernel. The sanity
168 * checks and argument validation done in this function are not the only places
169 * one can get returned errnos.
170 */
171
172 user_map = current_map();
173 user_addr = (vm_map_offset_t)uap->addr;
174 user_size = (vm_map_size_t) uap->len;
175
176 AUDIT_ARG(addr, user_addr);
177 AUDIT_ARG(len, user_size);
178 AUDIT_ARG(fd, uap->fd);
179
180 prot = (uap->prot & VM_PROT_ALL);
181 #if 3777787
182 /*
183 * Since the hardware currently does not support writing without
184 * read-before-write, or execution-without-read, if the request is
185 * for write or execute access, we must imply read access as well;
186 * otherwise programs expecting this to work will fail to operate.
187 */
188 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
189 prot |= VM_PROT_READ;
190 #endif /* radar 3777787 */
191
192 flags = uap->flags;
193 vp = NULLVP;
194
195 /*
196 * The vm code does not have prototypes & compiler doesn't do the'
197 * the right thing when you cast 64bit value and pass it in function
198 * call. So here it is.
199 */
200 file_pos = (vm_object_offset_t)uap->pos;
201
202
203 /* make sure mapping fits into numeric range etc */
204 if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64)
205 return (EINVAL);
206
207 /*
208 * Align the file position to a page boundary,
209 * and save its page offset component.
210 */
211 pageoff = (file_pos & vm_map_page_mask(user_map));
212 file_pos -= (vm_object_offset_t)pageoff;
213
214
215 /* Adjust size for rounding (on both ends). */
216 user_size += pageoff; /* low end... */
217 user_size = vm_map_round_page(user_size,
218 vm_map_page_mask(user_map)); /* hi end */
219
220 if (flags & MAP_JIT) {
221 if ((flags & MAP_FIXED) ||
222 (flags & MAP_SHARED) ||
223 !(flags & MAP_ANON) ||
224 (flags & MAP_RESILIENT_CODESIGN) ||
225 (flags & MAP_RESILIENT_MEDIA)) {
226 return EINVAL;
227 }
228 }
229
230 if ((flags & MAP_RESILIENT_CODESIGN) ||
231 (flags & MAP_RESILIENT_MEDIA)) {
232 if ((flags & MAP_ANON) ||
233 (flags & MAP_JIT)) {
234 return EINVAL;
235 }
236 if (prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) {
237 return EPERM;
238 }
239 }
240
241 /*
242 * Check for illegal addresses. Watch out for address wrap... Note
243 * that VM_*_ADDRESS are not constants due to casts (argh).
244 */
245 if (flags & MAP_FIXED) {
246 /*
247 * The specified address must have the same remainder
248 * as the file offset taken modulo PAGE_SIZE, so it
249 * should be aligned after adjustment by pageoff.
250 */
251 user_addr -= pageoff;
252 if (user_addr & vm_map_page_mask(user_map))
253 return (EINVAL);
254 }
255 #ifdef notyet
256 /* DO not have apis to get this info, need to wait till then*/
257 /*
258 * XXX for non-fixed mappings where no hint is provided or
259 * the hint would fall in the potential heap space,
260 * place it after the end of the largest possible heap.
261 *
262 * There should really be a pmap call to determine a reasonable
263 * location.
264 */
265 else if (addr < vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
266 vm_map_page_mask(user_map)))
267 addr = vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
268 vm_map_page_mask(user_map));
269
270 #endif
271
272 alloc_flags = 0;
273
274 if (flags & MAP_ANON) {
275
276 maxprot = VM_PROT_ALL;
277 #if CONFIG_MACF
278 /*
279 * Entitlement check.
280 */
281 error = mac_proc_check_map_anon(p, user_addr, user_size, prot, flags, &maxprot);
282 if (error) {
283 return EINVAL;
284 }
285 #endif /* MAC */
286
287 /*
288 * Mapping blank space is trivial. Use positive fds as the alias
289 * value for memory tracking.
290 */
291 if (fd != -1) {
292 /*
293 * Use "fd" to pass (some) Mach VM allocation flags,
294 * (see the VM_FLAGS_* definitions).
295 */
296 alloc_flags = fd & (VM_FLAGS_ALIAS_MASK | VM_FLAGS_SUPERPAGE_MASK |
297 VM_FLAGS_PURGABLE);
298 if (alloc_flags != fd) {
299 /* reject if there are any extra flags */
300 return EINVAL;
301 }
302 }
303
304 handle = NULL;
305 file_pos = 0;
306 mapanon = 1;
307 } else {
308 struct vnode_attr va;
309 vfs_context_t ctx = vfs_context_current();
310
311 if (flags & MAP_JIT)
312 return EINVAL;
313
314 /*
315 * Mapping file, get fp for validation. Obtain vnode and make
316 * sure it is of appropriate type.
317 */
318 err = fp_lookup(p, fd, &fp, 0);
319 if (err)
320 return(err);
321 fpref = 1;
322 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
323 case DTYPE_PSXSHM:
324 uap->addr = (user_addr_t)user_addr;
325 uap->len = (user_size_t)user_size;
326 uap->prot = prot;
327 uap->flags = flags;
328 uap->pos = file_pos;
329 error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
330 goto bad;
331 case DTYPE_VNODE:
332 break;
333 default:
334 error = EINVAL;
335 goto bad;
336 }
337 vp = (struct vnode *)fp->f_fglob->fg_data;
338 error = vnode_getwithref(vp);
339 if(error != 0)
340 goto bad;
341
342 if (vp->v_type != VREG && vp->v_type != VCHR) {
343 (void)vnode_put(vp);
344 error = EINVAL;
345 goto bad;
346 }
347
348 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
349
350 /*
351 * POSIX: mmap needs to update access time for mapped files
352 */
353 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
354 VATTR_INIT(&va);
355 nanotime(&va.va_access_time);
356 VATTR_SET_ACTIVE(&va, va_access_time);
357 vnode_setattr(vp, &va, ctx);
358 }
359
360 /*
361 * XXX hack to handle use of /dev/zero to map anon memory (ala
362 * SunOS).
363 */
364 if (vp->v_type == VCHR || vp->v_type == VSTR) {
365 (void)vnode_put(vp);
366 error = ENODEV;
367 goto bad;
368 } else {
369 /*
370 * Ensure that file and memory protections are
371 * compatible. Note that we only worry about
372 * writability if mapping is shared; in this case,
373 * current and max prot are dictated by the open file.
374 * XXX use the vnode instead? Problem is: what
375 * credentials do we use for determination? What if
376 * proc does a setuid?
377 */
378 maxprot = VM_PROT_EXECUTE; /* ??? */
379 if (fp->f_fglob->fg_flag & FREAD)
380 maxprot |= VM_PROT_READ;
381 else if (prot & PROT_READ) {
382 (void)vnode_put(vp);
383 error = EACCES;
384 goto bad;
385 }
386 /*
387 * If we are sharing potential changes (either via
388 * MAP_SHARED or via the implicit sharing of character
389 * device mappings), and we are trying to get write
390 * permission although we opened it without asking
391 * for it, bail out.
392 */
393
394 if ((flags & MAP_SHARED) != 0) {
395 if ((fp->f_fglob->fg_flag & FWRITE) != 0 &&
396 /*
397 * Do not allow writable mappings of
398 * swap files (see vm_swapfile_pager.c).
399 */
400 !vnode_isswap(vp)) {
401 /*
402 * check for write access
403 *
404 * Note that we already made this check when granting FWRITE
405 * against the file, so it seems redundant here.
406 */
407 error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
408
409 /* if not granted for any reason, but we wanted it, bad */
410 if ((prot & PROT_WRITE) && (error != 0)) {
411 vnode_put(vp);
412 goto bad;
413 }
414
415 /* if writable, remember */
416 if (error == 0)
417 maxprot |= VM_PROT_WRITE;
418
419 } else if ((prot & PROT_WRITE) != 0) {
420 (void)vnode_put(vp);
421 error = EACCES;
422 goto bad;
423 }
424 } else
425 maxprot |= VM_PROT_WRITE;
426
427 handle = (void *)vp;
428 #if CONFIG_MACF
429 error = mac_file_check_mmap(vfs_context_ucred(ctx),
430 fp->f_fglob, prot, flags, file_pos, &maxprot);
431 if (error) {
432 (void)vnode_put(vp);
433 goto bad;
434 }
435 #endif /* MAC */
436 }
437 }
438
439 if (user_size == 0) {
440 if (!mapanon)
441 (void)vnode_put(vp);
442 error = 0;
443 goto bad;
444 }
445
446 /*
447 * We bend a little - round the start and end addresses
448 * to the nearest page boundary.
449 */
450 user_size = vm_map_round_page(user_size,
451 vm_map_page_mask(user_map));
452
453 if (file_pos & vm_map_page_mask(user_map)) {
454 if (!mapanon)
455 (void)vnode_put(vp);
456 error = EINVAL;
457 goto bad;
458 }
459
460 if ((flags & MAP_FIXED) == 0) {
461 alloc_flags |= VM_FLAGS_ANYWHERE;
462 user_addr = vm_map_round_page(user_addr,
463 vm_map_page_mask(user_map));
464 } else {
465 if (user_addr != vm_map_trunc_page(user_addr,
466 vm_map_page_mask(user_map))) {
467 if (!mapanon)
468 (void)vnode_put(vp);
469 error = EINVAL;
470 goto bad;
471 }
472 /*
473 * mmap(MAP_FIXED) will replace any existing mappings in the
474 * specified range, if the new mapping is successful.
475 * If we just deallocate the specified address range here,
476 * another thread might jump in and allocate memory in that
477 * range before we get a chance to establish the new mapping,
478 * and we won't have a chance to restore the old mappings.
479 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
480 * has to deallocate the existing mappings and establish the
481 * new ones atomically.
482 */
483 alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
484 }
485
486 if (flags & MAP_NOCACHE)
487 alloc_flags |= VM_FLAGS_NO_CACHE;
488
489 if (flags & MAP_JIT) {
490 alloc_flags |= VM_FLAGS_MAP_JIT;
491 }
492
493 if (flags & MAP_RESILIENT_CODESIGN) {
494 alloc_flags |= VM_FLAGS_RESILIENT_CODESIGN;
495 }
496
497 /*
498 * Lookup/allocate object.
499 */
500 if (handle == NULL) {
501 control = NULL;
502 #ifdef notyet
503 /* Hmm .. */
504 #if defined(VM_PROT_READ_IS_EXEC)
505 if (prot & VM_PROT_READ)
506 prot |= VM_PROT_EXECUTE;
507 if (maxprot & VM_PROT_READ)
508 maxprot |= VM_PROT_EXECUTE;
509 #endif
510 #endif
511
512 #if 3777787
513 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
514 prot |= VM_PROT_READ;
515 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
516 maxprot |= VM_PROT_READ;
517 #endif /* radar 3777787 */
518 map_anon_retry:
519 result = vm_map_enter_mem_object(user_map,
520 &user_addr, user_size,
521 0, alloc_flags,
522 IPC_PORT_NULL, 0, FALSE,
523 prot, maxprot,
524 (flags & MAP_SHARED) ?
525 VM_INHERIT_SHARE :
526 VM_INHERIT_DEFAULT);
527
528 /* If a non-binding address was specified for this anonymous
529 * mapping, retry the mapping with a zero base
530 * in the event the mapping operation failed due to
531 * lack of space between the address and the map's maximum.
532 */
533 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
534 user_addr = vm_map_page_size(user_map);
535 goto map_anon_retry;
536 }
537 } else {
538 if (vnode_isswap(vp)) {
539 /*
540 * Map swap files with a special pager
541 * that returns obfuscated contents.
542 */
543 control = NULL;
544 pager = swapfile_pager_setup(vp);
545 if (pager != MEMORY_OBJECT_NULL) {
546 control = swapfile_pager_control(pager);
547 }
548 } else {
549 control = ubc_getobject(vp, UBC_FLAGS_NONE);
550 }
551
552 if (control == NULL) {
553 (void)vnode_put(vp);
554 error = ENOMEM;
555 goto bad;
556 }
557
558 /*
559 * Set credentials:
560 * FIXME: if we're writing the file we need a way to
561 * ensure that someone doesn't replace our R/W creds
562 * with ones that only work for read.
563 */
564
565 ubc_setthreadcred(vp, p, current_thread());
566 docow = FALSE;
567 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
568 docow = TRUE;
569 }
570
571 #ifdef notyet
572 /* Hmm .. */
573 #if defined(VM_PROT_READ_IS_EXEC)
574 if (prot & VM_PROT_READ)
575 prot |= VM_PROT_EXECUTE;
576 if (maxprot & VM_PROT_READ)
577 maxprot |= VM_PROT_EXECUTE;
578 #endif
579 #endif /* notyet */
580
581 #if 3777787
582 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
583 prot |= VM_PROT_READ;
584 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
585 maxprot |= VM_PROT_READ;
586 #endif /* radar 3777787 */
587
588 map_file_retry:
589 if ((flags & MAP_RESILIENT_CODESIGN) ||
590 (flags & MAP_RESILIENT_MEDIA)) {
591 if (prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) {
592 assert(!mapanon);
593 vnode_put(vp);
594 error = EPERM;
595 goto bad;
596 }
597 /* strictly limit access to "prot" */
598 maxprot &= prot;
599 }
600 result = vm_map_enter_mem_object_control(user_map,
601 &user_addr, user_size,
602 0, alloc_flags,
603 control, file_pos,
604 docow, prot, maxprot,
605 (flags & MAP_SHARED) ?
606 VM_INHERIT_SHARE :
607 VM_INHERIT_DEFAULT);
608
609 /* If a non-binding address was specified for this file backed
610 * mapping, retry the mapping with a zero base
611 * in the event the mapping operation failed due to
612 * lack of space between the address and the map's maximum.
613 */
614 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
615 user_addr = vm_map_page_size(user_map);
616 goto map_file_retry;
617 }
618 }
619
620 if (!mapanon) {
621 (void)vnode_put(vp);
622 }
623
624 switch (result) {
625 case KERN_SUCCESS:
626 *retval = user_addr + pageoff;
627 error = 0;
628 break;
629 case KERN_INVALID_ADDRESS:
630 case KERN_NO_SPACE:
631 error = ENOMEM;
632 break;
633 case KERN_PROTECTION_FAILURE:
634 error = EACCES;
635 break;
636 default:
637 error = EINVAL;
638 break;
639 }
640 bad:
641 if (pager != MEMORY_OBJECT_NULL) {
642 /*
643 * Release the reference on the pager.
644 * If the mapping was successful, it now holds
645 * an extra reference.
646 */
647 memory_object_deallocate(pager);
648 }
649 if (fpref)
650 fp_drop(p, fd, fp, 0);
651
652 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
653 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
654 (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
655 return(error);
656 }
657
658 int
659 msync(__unused proc_t p, struct msync_args *uap, int32_t *retval)
660 {
661 __pthread_testcancel(1);
662 return(msync_nocancel(p, (struct msync_nocancel_args *)uap, retval));
663 }
664
665 int
666 msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval)
667 {
668 mach_vm_offset_t addr;
669 mach_vm_size_t size;
670 int flags;
671 vm_map_t user_map;
672 int rv;
673 vm_sync_t sync_flags=0;
674
675 user_map = current_map();
676 addr = (mach_vm_offset_t) uap->addr;
677 size = (mach_vm_size_t)uap->len;
678 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
679 if (addr & vm_map_page_mask(user_map)) {
680 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
681 return EINVAL;
682 }
683 if (size == 0) {
684 /*
685 * We cannot support this properly without maintaining
686 * list all mmaps done. Cannot use vm_map_entry as they could be
687 * split or coalesced by indepenedant actions. So instead of
688 * inaccurate results, lets just return error as invalid size
689 * specified
690 */
691 return (EINVAL); /* XXX breaks posix apps */
692 }
693
694 flags = uap->flags;
695 /* disallow contradictory flags */
696 if ((flags & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))
697 return (EINVAL);
698
699 if (flags & MS_KILLPAGES)
700 sync_flags |= VM_SYNC_KILLPAGES;
701 if (flags & MS_DEACTIVATE)
702 sync_flags |= VM_SYNC_DEACTIVATE;
703 if (flags & MS_INVALIDATE)
704 sync_flags |= VM_SYNC_INVALIDATE;
705
706 if ( !(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
707 if (flags & MS_ASYNC)
708 sync_flags |= VM_SYNC_ASYNCHRONOUS;
709 else
710 sync_flags |= VM_SYNC_SYNCHRONOUS;
711 }
712
713 sync_flags |= VM_SYNC_CONTIGUOUS; /* complain if holes */
714
715 rv = mach_vm_msync(user_map, addr, size, sync_flags);
716
717 switch (rv) {
718 case KERN_SUCCESS:
719 break;
720 case KERN_INVALID_ADDRESS: /* hole in region being sync'ed */
721 return (ENOMEM);
722 case KERN_FAILURE:
723 return (EIO);
724 default:
725 return (EINVAL);
726 }
727 return (0);
728 }
729
730
731 int
732 munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval)
733 {
734 mach_vm_offset_t user_addr;
735 mach_vm_size_t user_size;
736 kern_return_t result;
737 vm_map_t user_map;
738
739 user_map = current_map();
740 user_addr = (mach_vm_offset_t) uap->addr;
741 user_size = (mach_vm_size_t) uap->len;
742
743 AUDIT_ARG(addr, user_addr);
744 AUDIT_ARG(len, user_size);
745
746 if (user_addr & vm_map_page_mask(user_map)) {
747 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
748 return EINVAL;
749 }
750
751 if (user_addr + user_size < user_addr)
752 return(EINVAL);
753
754 if (user_size == 0) {
755 /* UNIX SPEC: size is 0, return EINVAL */
756 return EINVAL;
757 }
758
759 result = mach_vm_deallocate(user_map, user_addr, user_size);
760 if (result != KERN_SUCCESS) {
761 return(EINVAL);
762 }
763 return(0);
764 }
765
766 int
767 mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
768 {
769 vm_prot_t prot;
770 mach_vm_offset_t user_addr;
771 mach_vm_size_t user_size;
772 kern_return_t result;
773 vm_map_t user_map;
774 #if CONFIG_MACF
775 int error;
776 #endif
777
778 AUDIT_ARG(addr, uap->addr);
779 AUDIT_ARG(len, uap->len);
780 AUDIT_ARG(value32, uap->prot);
781
782 user_map = current_map();
783 user_addr = (mach_vm_offset_t) uap->addr;
784 user_size = (mach_vm_size_t) uap->len;
785 prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED | VM_PROT_STRIP_READ));
786
787 if (user_addr & vm_map_page_mask(user_map)) {
788 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
789 return EINVAL;
790 }
791
792 #ifdef notyet
793 /* Hmm .. */
794 #if defined(VM_PROT_READ_IS_EXEC)
795 if (prot & VM_PROT_READ)
796 prot |= VM_PROT_EXECUTE;
797 #endif
798 #endif /* notyet */
799
800 #if 3936456
801 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
802 prot |= VM_PROT_READ;
803 #endif /* 3936456 */
804
805
806 #if CONFIG_MACF
807 /*
808 * The MAC check for mprotect is of limited use for 2 reasons:
809 * Without mmap revocation, the caller could have asked for the max
810 * protections initially instead of a reduced set, so a mprotect
811 * check would offer no new security.
812 * It is not possible to extract the vnode from the pager object(s)
813 * of the target memory range.
814 * However, the MAC check may be used to prevent a process from,
815 * e.g., making the stack executable.
816 */
817 error = mac_proc_check_mprotect(p, user_addr,
818 user_size, prot);
819 if (error)
820 return (error);
821 #endif
822
823 if(prot & VM_PROT_TRUSTED) {
824 #if CONFIG_DYNAMIC_CODE_SIGNING
825 /* CODE SIGNING ENFORCEMENT - JIT support */
826 /* The special protection value VM_PROT_TRUSTED requests that we treat
827 * this page as if it had a valid code signature.
828 * If this is enabled, there MUST be a MAC policy implementing the
829 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
830 * compromised because the check would always succeed and thusly any
831 * process could sign dynamically. */
832 result = vm_map_sign(
833 user_map,
834 vm_map_trunc_page(user_addr,
835 vm_map_page_mask(user_map)),
836 vm_map_round_page(user_addr+user_size,
837 vm_map_page_mask(user_map)));
838 switch (result) {
839 case KERN_SUCCESS:
840 break;
841 case KERN_INVALID_ADDRESS:
842 /* UNIX SPEC: for an invalid address range, return ENOMEM */
843 return ENOMEM;
844 default:
845 return EINVAL;
846 }
847 #else
848 return ENOTSUP;
849 #endif
850 }
851 prot &= ~VM_PROT_TRUSTED;
852
853 result = mach_vm_protect(user_map, user_addr, user_size,
854 FALSE, prot);
855 switch (result) {
856 case KERN_SUCCESS:
857 return (0);
858 case KERN_PROTECTION_FAILURE:
859 return (EACCES);
860 case KERN_INVALID_ADDRESS:
861 /* UNIX SPEC: for an invalid address range, return ENOMEM */
862 return ENOMEM;
863 }
864 return (EINVAL);
865 }
866
867
868 int
869 minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval)
870 {
871 mach_vm_offset_t addr;
872 mach_vm_size_t size;
873 vm_inherit_t inherit;
874 vm_map_t user_map;
875 kern_return_t result;
876
877 AUDIT_ARG(addr, uap->addr);
878 AUDIT_ARG(len, uap->len);
879 AUDIT_ARG(value32, uap->inherit);
880
881 addr = (mach_vm_offset_t)uap->addr;
882 size = (mach_vm_size_t)uap->len;
883 inherit = uap->inherit;
884
885 user_map = current_map();
886 result = mach_vm_inherit(user_map, addr, size,
887 inherit);
888 switch (result) {
889 case KERN_SUCCESS:
890 return (0);
891 case KERN_PROTECTION_FAILURE:
892 return (EACCES);
893 }
894 return (EINVAL);
895 }
896
897 int
898 madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
899 {
900 vm_map_t user_map;
901 mach_vm_offset_t start;
902 mach_vm_size_t size;
903 vm_behavior_t new_behavior;
904 kern_return_t result;
905
906 /*
907 * Since this routine is only advisory, we default to conservative
908 * behavior.
909 */
910 switch (uap->behav) {
911 case MADV_RANDOM:
912 new_behavior = VM_BEHAVIOR_RANDOM;
913 break;
914 case MADV_SEQUENTIAL:
915 new_behavior = VM_BEHAVIOR_SEQUENTIAL;
916 break;
917 case MADV_NORMAL:
918 new_behavior = VM_BEHAVIOR_DEFAULT;
919 break;
920 case MADV_WILLNEED:
921 new_behavior = VM_BEHAVIOR_WILLNEED;
922 break;
923 case MADV_DONTNEED:
924 new_behavior = VM_BEHAVIOR_DONTNEED;
925 break;
926 case MADV_FREE:
927 new_behavior = VM_BEHAVIOR_FREE;
928 break;
929 case MADV_ZERO_WIRED_PAGES:
930 new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES;
931 break;
932 case MADV_FREE_REUSABLE:
933 new_behavior = VM_BEHAVIOR_REUSABLE;
934 break;
935 case MADV_FREE_REUSE:
936 new_behavior = VM_BEHAVIOR_REUSE;
937 break;
938 case MADV_CAN_REUSE:
939 new_behavior = VM_BEHAVIOR_CAN_REUSE;
940 break;
941 case MADV_PAGEOUT:
942 #if MACH_ASSERT
943 new_behavior = VM_BEHAVIOR_PAGEOUT;
944 break;
945 #else /* MACH_ASSERT */
946 return ENOTSUP;
947 #endif /* MACH_ASSERT */
948 default:
949 return(EINVAL);
950 }
951
952 start = (mach_vm_offset_t) uap->addr;
953 size = (mach_vm_size_t) uap->len;
954
955
956 user_map = current_map();
957
958 result = mach_vm_behavior_set(user_map, start, size, new_behavior);
959 switch (result) {
960 case KERN_SUCCESS:
961 return 0;
962 case KERN_INVALID_ADDRESS:
963 return EINVAL;
964 case KERN_NO_SPACE:
965 return ENOMEM;
966 }
967
968 return EINVAL;
969 }
970
971 int
972 mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval)
973 {
974 mach_vm_offset_t addr, first_addr, end;
975 vm_map_t map;
976 user_addr_t vec;
977 int error;
978 int vecindex, lastvecindex;
979 int mincoreinfo=0;
980 int pqueryinfo;
981 kern_return_t ret;
982 int numref;
983
984 char c;
985
986 map = current_map();
987
988 /*
989 * Make sure that the addresses presented are valid for user
990 * mode.
991 */
992 first_addr = addr = vm_map_trunc_page(uap->addr,
993 vm_map_page_mask(map));
994 end = addr + vm_map_round_page(uap->len,
995 vm_map_page_mask(map));
996
997 if (end < addr)
998 return (EINVAL);
999
1000 /*
1001 * Address of byte vector
1002 */
1003 vec = uap->vec;
1004
1005 map = current_map();
1006
1007 /*
1008 * Do this on a map entry basis so that if the pages are not
1009 * in the current processes address space, we can easily look
1010 * up the pages elsewhere.
1011 */
1012 lastvecindex = -1;
1013 for( ; addr < end; addr += PAGE_SIZE ) {
1014 pqueryinfo = 0;
1015 ret = mach_vm_page_query(map, addr, &pqueryinfo, &numref);
1016 if (ret != KERN_SUCCESS)
1017 pqueryinfo = 0;
1018 mincoreinfo = 0;
1019 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT)
1020 mincoreinfo |= MINCORE_INCORE;
1021 if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF)
1022 mincoreinfo |= MINCORE_REFERENCED;
1023 if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY)
1024 mincoreinfo |= MINCORE_MODIFIED;
1025
1026
1027 /*
1028 * calculate index into user supplied byte vector
1029 */
1030 vecindex = (addr - first_addr)>> PAGE_SHIFT;
1031
1032 /*
1033 * If we have skipped map entries, we need to make sure that
1034 * the byte vector is zeroed for those skipped entries.
1035 */
1036 while((lastvecindex + 1) < vecindex) {
1037 c = 0;
1038 error = copyout(&c, vec + lastvecindex, 1);
1039 if (error) {
1040 return (EFAULT);
1041 }
1042 ++lastvecindex;
1043 }
1044
1045 /*
1046 * Pass the page information to the user
1047 */
1048 c = (char)mincoreinfo;
1049 error = copyout(&c, vec + vecindex, 1);
1050 if (error) {
1051 return (EFAULT);
1052 }
1053 lastvecindex = vecindex;
1054 }
1055
1056
1057 /*
1058 * Zero the last entries in the byte vector.
1059 */
1060 vecindex = (end - first_addr) >> PAGE_SHIFT;
1061 while((lastvecindex + 1) < vecindex) {
1062 c = 0;
1063 error = copyout(&c, vec + lastvecindex, 1);
1064 if (error) {
1065 return (EFAULT);
1066 }
1067 ++lastvecindex;
1068 }
1069
1070 return (0);
1071 }
1072
1073 int
1074 mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval)
1075 {
1076 vm_map_t user_map;
1077 vm_map_offset_t addr;
1078 vm_map_size_t size, pageoff;
1079 kern_return_t result;
1080
1081 AUDIT_ARG(addr, uap->addr);
1082 AUDIT_ARG(len, uap->len);
1083
1084 addr = (vm_map_offset_t) uap->addr;
1085 size = (vm_map_size_t)uap->len;
1086
1087 /* disable wrap around */
1088 if (addr + size < addr)
1089 return (EINVAL);
1090
1091 if (size == 0)
1092 return (0);
1093
1094 user_map = current_map();
1095 pageoff = (addr & vm_map_page_mask(user_map));
1096 addr -= pageoff;
1097 size = vm_map_round_page(size+pageoff, vm_map_page_mask(user_map));
1098
1099 /* have to call vm_map_wire directly to pass "I don't know" protections */
1100 result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK), TRUE);
1101
1102 if (result == KERN_RESOURCE_SHORTAGE)
1103 return EAGAIN;
1104 else if (result == KERN_PROTECTION_FAILURE)
1105 return EACCES;
1106 else if (result != KERN_SUCCESS)
1107 return ENOMEM;
1108
1109 return 0; /* KERN_SUCCESS */
1110 }
1111
1112 int
1113 munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval)
1114 {
1115 mach_vm_offset_t addr;
1116 mach_vm_size_t size;
1117 vm_map_t user_map;
1118 kern_return_t result;
1119
1120 AUDIT_ARG(addr, uap->addr);
1121 AUDIT_ARG(addr, uap->len);
1122
1123 addr = (mach_vm_offset_t) uap->addr;
1124 size = (mach_vm_size_t)uap->len;
1125 user_map = current_map();
1126
1127 /* JMM - need to remove all wirings by spec - this just removes one */
1128 result = mach_vm_wire(host_priv_self(), user_map, addr, size, VM_PROT_NONE);
1129 return (result == KERN_SUCCESS ? 0 : ENOMEM);
1130 }
1131
1132
1133 int
1134 mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval)
1135 {
1136 return (ENOSYS);
1137 }
1138
1139 int
1140 munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval)
1141 {
1142 return(ENOSYS);
1143 }
1144
1145 #if CONFIG_CODE_DECRYPTION
1146 int
1147 mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __unused int32_t *retval)
1148 {
1149 mach_vm_offset_t user_addr;
1150 mach_vm_size_t user_size;
1151 kern_return_t result;
1152 vm_map_t user_map;
1153 uint32_t cryptid;
1154 cpu_type_t cputype;
1155 cpu_subtype_t cpusubtype;
1156 pager_crypt_info_t crypt_info;
1157 const char * cryptname = 0;
1158 char *vpath;
1159 int len, ret;
1160 struct proc_regioninfo_internal pinfo;
1161 vnode_t vp;
1162 uintptr_t vnodeaddr;
1163 uint32_t vid;
1164
1165 AUDIT_ARG(addr, uap->addr);
1166 AUDIT_ARG(len, uap->len);
1167
1168 user_map = current_map();
1169 user_addr = (mach_vm_offset_t) uap->addr;
1170 user_size = (mach_vm_size_t) uap->len;
1171
1172 cryptid = uap->cryptid;
1173 cputype = uap->cputype;
1174 cpusubtype = uap->cpusubtype;
1175
1176 if (user_addr & vm_map_page_mask(user_map)) {
1177 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1178 return EINVAL;
1179 }
1180
1181 switch(cryptid) {
1182 case 0:
1183 /* not encrypted, just an empty load command */
1184 return 0;
1185 case 1:
1186 cryptname="com.apple.unfree";
1187 break;
1188 case 0x10:
1189 /* some random cryptid that you could manually put into
1190 * your binary if you want NULL */
1191 cryptname="com.apple.null";
1192 break;
1193 default:
1194 return EINVAL;
1195 }
1196
1197 if (NULL == text_crypter_create) return ENOTSUP;
1198
1199 ret = fill_procregioninfo_onlymappedvnodes( proc_task(p), user_addr, &pinfo, &vnodeaddr, &vid);
1200 if (ret == 0 || !vnodeaddr) {
1201 /* No really, this returns 0 if the memory address is not backed by a file */
1202 return (EINVAL);
1203 }
1204
1205 vp = (vnode_t)vnodeaddr;
1206 if ((vnode_getwithvid(vp, vid)) == 0) {
1207 MALLOC_ZONE(vpath, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
1208 if(vpath == NULL) {
1209 vnode_put(vp);
1210 return (ENOMEM);
1211 }
1212
1213 len = MAXPATHLEN;
1214 ret = vn_getpath(vp, vpath, &len);
1215 if(ret) {
1216 FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI);
1217 vnode_put(vp);
1218 return (ret);
1219 }
1220
1221 vnode_put(vp);
1222 } else {
1223 return (EINVAL);
1224 }
1225
1226 #if 0
1227 kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n",
1228 __FUNCTION__, vpath, cryptid, cputype, cpusubtype, (uint64_t)user_addr, (uint64_t)user_size);
1229 #endif
1230
1231 /* set up decrypter first */
1232 crypt_file_data_t crypt_data = {
1233 .filename = vpath,
1234 .cputype = cputype,
1235 .cpusubtype = cpusubtype };
1236 result = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data);
1237 #if VM_MAP_DEBUG_APPLE_PROTECT
1238 if (vm_map_debug_apple_protect) {
1239 printf("APPLE_PROTECT: %d[%s] map %p [0x%llx:0x%llx] %s(%s) -> 0x%x\n",
1240 p->p_pid, p->p_comm,
1241 user_map,
1242 (uint64_t) user_addr,
1243 (uint64_t) (user_addr + user_size),
1244 __FUNCTION__, vpath, result);
1245 }
1246 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1247 FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI);
1248
1249 if(result) {
1250 printf("%s: unable to create decrypter %s, kr=%d\n",
1251 __FUNCTION__, cryptname, result);
1252 if (result == kIOReturnNotPrivileged) {
1253 /* text encryption returned decryption failure */
1254 return (EPERM);
1255 } else {
1256 return (ENOMEM);
1257 }
1258 }
1259
1260 /* now remap using the decrypter */
1261 vm_object_offset_t crypto_backing_offset;
1262 crypto_backing_offset = -1; /* i.e. use map entry's offset */
1263 result = vm_map_apple_protected(user_map,
1264 user_addr,
1265 user_addr+user_size,
1266 crypto_backing_offset,
1267 &crypt_info);
1268 if (result) {
1269 printf("%s: mapping failed with %d\n", __FUNCTION__, result);
1270 }
1271
1272 if (result) {
1273 return (EPERM);
1274 }
1275 return 0;
1276 }
1277 #endif /* CONFIG_CODE_DECRYPTION */