]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_mman.c
xnu-2782.40.9.tar.gz
[apple/xnu.git] / bsd / kern / kern_mman.c
1 /*
2 * Copyright (c) 2007 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988 University of Utah.
30 * Copyright (c) 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * the Systems Programming Group of the University of Utah Computer
35 * Science Department.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66 *
67 * @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 /*
77 * Mapped file (mmap) interface to VM
78 */
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/filedesc.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/resourcevar.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/acct.h>
88 #include <sys/wait.h>
89 #include <sys/file_internal.h>
90 #include <sys/vadvise.h>
91 #include <sys/trace.h>
92 #include <sys/mman.h>
93 #include <sys/conf.h>
94 #include <sys/stat.h>
95 #include <sys/ubc.h>
96 #include <sys/ubc_internal.h>
97 #include <sys/sysproto.h>
98 #if CONFIG_PROTECT
99 #include <sys/cprotect.h>
100 #endif
101
102 #include <sys/syscall.h>
103 #include <sys/kdebug.h>
104 #include <sys/bsdtask_info.h>
105
106 #include <security/audit/audit.h>
107 #include <bsm/audit_kevents.h>
108
109 #include <mach/mach_types.h>
110 #include <mach/mach_traps.h>
111 #include <mach/vm_sync.h>
112 #include <mach/vm_behavior.h>
113 #include <mach/vm_inherit.h>
114 #include <mach/vm_statistics.h>
115 #include <mach/mach_vm.h>
116 #include <mach/vm_map.h>
117 #include <mach/host_priv.h>
118
119 #include <machine/machine_routines.h>
120
121 #include <kern/cpu_number.h>
122 #include <kern/host.h>
123 #include <kern/task.h>
124 #include <kern/page_decrypt.h>
125
126 #include <IOKit/IOReturn.h>
127
128 #include <vm/vm_map.h>
129 #include <vm/vm_kern.h>
130 #include <vm/vm_pager.h>
131 #include <vm/vm_protos.h>
132
133 /*
134 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
135 * XXX usage is PROT_* from an interface perspective. Thus the values of
136 * XXX VM_PROT_* and PROT_* need to correspond.
137 */
138 int
139 mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
140 {
141 /*
142 * Map in special device (must be SHARED) or file
143 */
144 struct fileproc *fp;
145 register struct vnode *vp;
146 int flags;
147 int prot;
148 int err=0;
149 vm_map_t user_map;
150 kern_return_t result;
151 vm_map_offset_t user_addr;
152 vm_map_size_t user_size;
153 vm_object_offset_t pageoff;
154 vm_object_offset_t file_pos;
155 int alloc_flags=0;
156 boolean_t docow;
157 vm_prot_t maxprot;
158 void *handle;
159 memory_object_t pager = MEMORY_OBJECT_NULL;
160 memory_object_control_t control;
161 int mapanon=0;
162 int fpref=0;
163 int error =0;
164 int fd = uap->fd;
165 int num_retries = 0;
166
167 user_map = current_map();
168 user_addr = (vm_map_offset_t)uap->addr;
169 user_size = (vm_map_size_t) uap->len;
170
171 AUDIT_ARG(addr, user_addr);
172 AUDIT_ARG(len, user_size);
173 AUDIT_ARG(fd, uap->fd);
174
175 prot = (uap->prot & VM_PROT_ALL);
176 #if 3777787
177 /*
178 * Since the hardware currently does not support writing without
179 * read-before-write, or execution-without-read, if the request is
180 * for write or execute access, we must imply read access as well;
181 * otherwise programs expecting this to work will fail to operate.
182 */
183 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
184 prot |= VM_PROT_READ;
185 #endif /* radar 3777787 */
186
187 flags = uap->flags;
188 vp = NULLVP;
189
190 /*
191 * The vm code does not have prototypes & compiler doesn't do the'
192 * the right thing when you cast 64bit value and pass it in function
193 * call. So here it is.
194 */
195 file_pos = (vm_object_offset_t)uap->pos;
196
197
198 /* make sure mapping fits into numeric range etc */
199 if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64)
200 return (EINVAL);
201
202 /*
203 * Align the file position to a page boundary,
204 * and save its page offset component.
205 */
206 pageoff = (file_pos & vm_map_page_mask(user_map));
207 file_pos -= (vm_object_offset_t)pageoff;
208
209
210 /* Adjust size for rounding (on both ends). */
211 user_size += pageoff; /* low end... */
212 user_size = vm_map_round_page(user_size,
213 vm_map_page_mask(user_map)); /* hi end */
214
215 if ((flags & MAP_JIT) && ((flags & MAP_FIXED) || (flags & MAP_SHARED) || !(flags & MAP_ANON))){
216 return EINVAL;
217 }
218 /*
219 * Check for illegal addresses. Watch out for address wrap... Note
220 * that VM_*_ADDRESS are not constants due to casts (argh).
221 */
222 if (flags & MAP_FIXED) {
223 /*
224 * The specified address must have the same remainder
225 * as the file offset taken modulo PAGE_SIZE, so it
226 * should be aligned after adjustment by pageoff.
227 */
228 user_addr -= pageoff;
229 if (user_addr & vm_map_page_mask(user_map))
230 return (EINVAL);
231 }
232 #ifdef notyet
233 /* DO not have apis to get this info, need to wait till then*/
234 /*
235 * XXX for non-fixed mappings where no hint is provided or
236 * the hint would fall in the potential heap space,
237 * place it after the end of the largest possible heap.
238 *
239 * There should really be a pmap call to determine a reasonable
240 * location.
241 */
242 else if (addr < vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
243 vm_map_page_mask(user_map)))
244 addr = vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
245 vm_map_page_mask(user_map));
246
247 #endif
248
249 alloc_flags = 0;
250
251 if (flags & MAP_ANON) {
252
253 maxprot = VM_PROT_ALL;
254 #if CONFIG_MACF
255 /*
256 * Entitlement check.
257 */
258 error = mac_proc_check_map_anon(p, user_addr, user_size, prot, flags, &maxprot);
259 if (error) {
260 return EINVAL;
261 }
262 #endif /* MAC */
263
264 /*
265 * Mapping blank space is trivial. Use positive fds as the alias
266 * value for memory tracking.
267 */
268 if (fd != -1) {
269 /*
270 * Use "fd" to pass (some) Mach VM allocation flags,
271 * (see the VM_FLAGS_* definitions).
272 */
273 alloc_flags = fd & (VM_FLAGS_ALIAS_MASK | VM_FLAGS_SUPERPAGE_MASK |
274 VM_FLAGS_PURGABLE);
275 if (alloc_flags != fd) {
276 /* reject if there are any extra flags */
277 return EINVAL;
278 }
279 }
280
281 handle = NULL;
282 file_pos = 0;
283 mapanon = 1;
284 } else {
285 struct vnode_attr va;
286 vfs_context_t ctx = vfs_context_current();
287
288 if (flags & MAP_JIT)
289 return EINVAL;
290
291 /*
292 * Mapping file, get fp for validation. Obtain vnode and make
293 * sure it is of appropriate type.
294 */
295 err = fp_lookup(p, fd, &fp, 0);
296 if (err)
297 return(err);
298 fpref = 1;
299 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
300 case DTYPE_PSXSHM:
301 uap->addr = (user_addr_t)user_addr;
302 uap->len = (user_size_t)user_size;
303 uap->prot = prot;
304 uap->flags = flags;
305 uap->pos = file_pos;
306 error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
307 goto bad;
308 case DTYPE_VNODE:
309 break;
310 default:
311 error = EINVAL;
312 goto bad;
313 }
314 vp = (struct vnode *)fp->f_fglob->fg_data;
315 error = vnode_getwithref(vp);
316 if(error != 0)
317 goto bad;
318
319 if (vp->v_type != VREG && vp->v_type != VCHR) {
320 (void)vnode_put(vp);
321 error = EINVAL;
322 goto bad;
323 }
324
325 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
326
327 /*
328 * POSIX: mmap needs to update access time for mapped files
329 */
330 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
331 VATTR_INIT(&va);
332 nanotime(&va.va_access_time);
333 VATTR_SET_ACTIVE(&va, va_access_time);
334 vnode_setattr(vp, &va, ctx);
335 }
336
337 /*
338 * XXX hack to handle use of /dev/zero to map anon memory (ala
339 * SunOS).
340 */
341 if (vp->v_type == VCHR || vp->v_type == VSTR) {
342 (void)vnode_put(vp);
343 error = ENODEV;
344 goto bad;
345 } else {
346 /*
347 * Ensure that file and memory protections are
348 * compatible. Note that we only worry about
349 * writability if mapping is shared; in this case,
350 * current and max prot are dictated by the open file.
351 * XXX use the vnode instead? Problem is: what
352 * credentials do we use for determination? What if
353 * proc does a setuid?
354 */
355 maxprot = VM_PROT_EXECUTE; /* ??? */
356 if (fp->f_fglob->fg_flag & FREAD)
357 maxprot |= VM_PROT_READ;
358 else if (prot & PROT_READ) {
359 (void)vnode_put(vp);
360 error = EACCES;
361 goto bad;
362 }
363 /*
364 * If we are sharing potential changes (either via
365 * MAP_SHARED or via the implicit sharing of character
366 * device mappings), and we are trying to get write
367 * permission although we opened it without asking
368 * for it, bail out.
369 */
370
371 if ((flags & MAP_SHARED) != 0) {
372 if ((fp->f_fglob->fg_flag & FWRITE) != 0 &&
373 /*
374 * Do not allow writable mappings of
375 * swap files (see vm_swapfile_pager.c).
376 */
377 !vnode_isswap(vp)) {
378 /*
379 * check for write access
380 *
381 * Note that we already made this check when granting FWRITE
382 * against the file, so it seems redundant here.
383 */
384 error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
385
386 /* if not granted for any reason, but we wanted it, bad */
387 if ((prot & PROT_WRITE) && (error != 0)) {
388 vnode_put(vp);
389 goto bad;
390 }
391
392 /* if writable, remember */
393 if (error == 0)
394 maxprot |= VM_PROT_WRITE;
395
396 } else if ((prot & PROT_WRITE) != 0) {
397 (void)vnode_put(vp);
398 error = EACCES;
399 goto bad;
400 }
401 } else
402 maxprot |= VM_PROT_WRITE;
403
404 handle = (void *)vp;
405 #if CONFIG_MACF
406 error = mac_file_check_mmap(vfs_context_ucred(ctx),
407 fp->f_fglob, prot, flags, &maxprot);
408 if (error) {
409 (void)vnode_put(vp);
410 goto bad;
411 }
412 #endif /* MAC */
413
414 #if CONFIG_PROTECT
415 {
416 error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0);
417 if (error) {
418 (void) vnode_put(vp);
419 goto bad;
420 }
421 }
422 #endif /* CONFIG_PROTECT */
423
424
425 }
426 }
427
428 if (user_size == 0) {
429 if (!mapanon)
430 (void)vnode_put(vp);
431 error = 0;
432 goto bad;
433 }
434
435 /*
436 * We bend a little - round the start and end addresses
437 * to the nearest page boundary.
438 */
439 user_size = vm_map_round_page(user_size,
440 vm_map_page_mask(user_map));
441
442 if (file_pos & vm_map_page_mask(user_map)) {
443 if (!mapanon)
444 (void)vnode_put(vp);
445 error = EINVAL;
446 goto bad;
447 }
448
449 if ((flags & MAP_FIXED) == 0) {
450 alloc_flags |= VM_FLAGS_ANYWHERE;
451 user_addr = vm_map_round_page(user_addr,
452 vm_map_page_mask(user_map));
453 } else {
454 if (user_addr != vm_map_trunc_page(user_addr,
455 vm_map_page_mask(user_map))) {
456 if (!mapanon)
457 (void)vnode_put(vp);
458 error = EINVAL;
459 goto bad;
460 }
461 /*
462 * mmap(MAP_FIXED) will replace any existing mappings in the
463 * specified range, if the new mapping is successful.
464 * If we just deallocate the specified address range here,
465 * another thread might jump in and allocate memory in that
466 * range before we get a chance to establish the new mapping,
467 * and we won't have a chance to restore the old mappings.
468 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
469 * has to deallocate the existing mappings and establish the
470 * new ones atomically.
471 */
472 alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
473 }
474
475 if (flags & MAP_NOCACHE)
476 alloc_flags |= VM_FLAGS_NO_CACHE;
477
478 if (flags & MAP_JIT){
479 alloc_flags |= VM_FLAGS_MAP_JIT;
480 }
481 /*
482 * Lookup/allocate object.
483 */
484 if (handle == NULL) {
485 control = NULL;
486 #ifdef notyet
487 /* Hmm .. */
488 #if defined(VM_PROT_READ_IS_EXEC)
489 if (prot & VM_PROT_READ)
490 prot |= VM_PROT_EXECUTE;
491 if (maxprot & VM_PROT_READ)
492 maxprot |= VM_PROT_EXECUTE;
493 #endif
494 #endif
495
496 #if 3777787
497 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
498 prot |= VM_PROT_READ;
499 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
500 maxprot |= VM_PROT_READ;
501 #endif /* radar 3777787 */
502 map_anon_retry:
503 result = vm_map_enter_mem_object(user_map,
504 &user_addr, user_size,
505 0, alloc_flags,
506 IPC_PORT_NULL, 0, FALSE,
507 prot, maxprot,
508 (flags & MAP_SHARED) ?
509 VM_INHERIT_SHARE :
510 VM_INHERIT_DEFAULT);
511
512 /* If a non-binding address was specified for this anonymous
513 * mapping, retry the mapping with a zero base
514 * in the event the mapping operation failed due to
515 * lack of space between the address and the map's maximum.
516 */
517 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
518 user_addr = vm_map_page_size(user_map);
519 goto map_anon_retry;
520 }
521 } else {
522 if (vnode_isswap(vp)) {
523 /*
524 * Map swap files with a special pager
525 * that returns obfuscated contents.
526 */
527 control = NULL;
528 pager = swapfile_pager_setup(vp);
529 if (pager != MEMORY_OBJECT_NULL) {
530 control = swapfile_pager_control(pager);
531 }
532 } else {
533 control = ubc_getobject(vp, UBC_FLAGS_NONE);
534 }
535
536 if (control == NULL) {
537 (void)vnode_put(vp);
538 error = ENOMEM;
539 goto bad;
540 }
541
542 /*
543 * Set credentials:
544 * FIXME: if we're writing the file we need a way to
545 * ensure that someone doesn't replace our R/W creds
546 * with ones that only work for read.
547 */
548
549 ubc_setthreadcred(vp, p, current_thread());
550 docow = FALSE;
551 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
552 docow = TRUE;
553 }
554
555 #ifdef notyet
556 /* Hmm .. */
557 #if defined(VM_PROT_READ_IS_EXEC)
558 if (prot & VM_PROT_READ)
559 prot |= VM_PROT_EXECUTE;
560 if (maxprot & VM_PROT_READ)
561 maxprot |= VM_PROT_EXECUTE;
562 #endif
563 #endif /* notyet */
564
565 #if 3777787
566 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
567 prot |= VM_PROT_READ;
568 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
569 maxprot |= VM_PROT_READ;
570 #endif /* radar 3777787 */
571 map_file_retry:
572 result = vm_map_enter_mem_object_control(user_map,
573 &user_addr, user_size,
574 0, alloc_flags,
575 control, file_pos,
576 docow, prot, maxprot,
577 (flags & MAP_SHARED) ?
578 VM_INHERIT_SHARE :
579 VM_INHERIT_DEFAULT);
580
581 /* If a non-binding address was specified for this file backed
582 * mapping, retry the mapping with a zero base
583 * in the event the mapping operation failed due to
584 * lack of space between the address and the map's maximum.
585 */
586 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
587 user_addr = vm_map_page_size(user_map);
588 goto map_file_retry;
589 }
590 }
591
592 if (!mapanon) {
593 (void)vnode_put(vp);
594 }
595
596 switch (result) {
597 case KERN_SUCCESS:
598 *retval = user_addr + pageoff;
599 error = 0;
600 break;
601 case KERN_INVALID_ADDRESS:
602 case KERN_NO_SPACE:
603 error = ENOMEM;
604 break;
605 case KERN_PROTECTION_FAILURE:
606 error = EACCES;
607 break;
608 default:
609 error = EINVAL;
610 break;
611 }
612 bad:
613 if (pager != MEMORY_OBJECT_NULL) {
614 /*
615 * Release the reference on the pager.
616 * If the mapping was successful, it now holds
617 * an extra reference.
618 */
619 memory_object_deallocate(pager);
620 }
621 if (fpref)
622 fp_drop(p, fd, fp, 0);
623
624 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
625 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
626 (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
627 return(error);
628 }
629
630 int
631 msync(__unused proc_t p, struct msync_args *uap, int32_t *retval)
632 {
633 __pthread_testcancel(1);
634 return(msync_nocancel(p, (struct msync_nocancel_args *)uap, retval));
635 }
636
637 int
638 msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval)
639 {
640 mach_vm_offset_t addr;
641 mach_vm_size_t size;
642 int flags;
643 vm_map_t user_map;
644 int rv;
645 vm_sync_t sync_flags=0;
646
647 user_map = current_map();
648 addr = (mach_vm_offset_t) uap->addr;
649 size = (mach_vm_size_t)uap->len;
650 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
651 if (addr & vm_map_page_mask(user_map)) {
652 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
653 return EINVAL;
654 }
655 if (size == 0) {
656 /*
657 * We cannot support this properly without maintaining
658 * list all mmaps done. Cannot use vm_map_entry as they could be
659 * split or coalesced by indepenedant actions. So instead of
660 * inaccurate results, lets just return error as invalid size
661 * specified
662 */
663 return (EINVAL); /* XXX breaks posix apps */
664 }
665
666 flags = uap->flags;
667 /* disallow contradictory flags */
668 if ((flags & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))
669 return (EINVAL);
670
671 if (flags & MS_KILLPAGES)
672 sync_flags |= VM_SYNC_KILLPAGES;
673 if (flags & MS_DEACTIVATE)
674 sync_flags |= VM_SYNC_DEACTIVATE;
675 if (flags & MS_INVALIDATE)
676 sync_flags |= VM_SYNC_INVALIDATE;
677
678 if ( !(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
679 if (flags & MS_ASYNC)
680 sync_flags |= VM_SYNC_ASYNCHRONOUS;
681 else
682 sync_flags |= VM_SYNC_SYNCHRONOUS;
683 }
684
685 sync_flags |= VM_SYNC_CONTIGUOUS; /* complain if holes */
686
687 rv = mach_vm_msync(user_map, addr, size, sync_flags);
688
689 switch (rv) {
690 case KERN_SUCCESS:
691 break;
692 case KERN_INVALID_ADDRESS: /* hole in region being sync'ed */
693 return (ENOMEM);
694 case KERN_FAILURE:
695 return (EIO);
696 default:
697 return (EINVAL);
698 }
699 return (0);
700 }
701
702
703 int
704 munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval)
705 {
706 mach_vm_offset_t user_addr;
707 mach_vm_size_t user_size;
708 kern_return_t result;
709 vm_map_t user_map;
710
711 user_map = current_map();
712 user_addr = (mach_vm_offset_t) uap->addr;
713 user_size = (mach_vm_size_t) uap->len;
714
715 AUDIT_ARG(addr, user_addr);
716 AUDIT_ARG(len, user_size);
717
718 if (user_addr & vm_map_page_mask(user_map)) {
719 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
720 return EINVAL;
721 }
722
723 if (user_addr + user_size < user_addr)
724 return(EINVAL);
725
726 if (user_size == 0) {
727 /* UNIX SPEC: size is 0, return EINVAL */
728 return EINVAL;
729 }
730
731 result = mach_vm_deallocate(user_map, user_addr, user_size);
732 if (result != KERN_SUCCESS) {
733 return(EINVAL);
734 }
735 return(0);
736 }
737
738 int
739 mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
740 {
741 register vm_prot_t prot;
742 mach_vm_offset_t user_addr;
743 mach_vm_size_t user_size;
744 kern_return_t result;
745 vm_map_t user_map;
746 #if CONFIG_MACF
747 int error;
748 #endif
749
750 AUDIT_ARG(addr, uap->addr);
751 AUDIT_ARG(len, uap->len);
752 AUDIT_ARG(value32, uap->prot);
753
754 user_map = current_map();
755 user_addr = (mach_vm_offset_t) uap->addr;
756 user_size = (mach_vm_size_t) uap->len;
757 prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED));
758
759 if (user_addr & vm_map_page_mask(user_map)) {
760 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
761 return EINVAL;
762 }
763
764 #ifdef notyet
765 /* Hmm .. */
766 #if defined(VM_PROT_READ_IS_EXEC)
767 if (prot & VM_PROT_READ)
768 prot |= VM_PROT_EXECUTE;
769 #endif
770 #endif /* notyet */
771
772 #if 3936456
773 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
774 prot |= VM_PROT_READ;
775 #endif /* 3936456 */
776
777 #if CONFIG_MACF
778 /*
779 * The MAC check for mprotect is of limited use for 2 reasons:
780 * Without mmap revocation, the caller could have asked for the max
781 * protections initially instead of a reduced set, so a mprotect
782 * check would offer no new security.
783 * It is not possible to extract the vnode from the pager object(s)
784 * of the target memory range.
785 * However, the MAC check may be used to prevent a process from,
786 * e.g., making the stack executable.
787 */
788 error = mac_proc_check_mprotect(p, user_addr,
789 user_size, prot);
790 if (error)
791 return (error);
792 #endif
793
794 if(prot & VM_PROT_TRUSTED) {
795 #if CONFIG_DYNAMIC_CODE_SIGNING
796 /* CODE SIGNING ENFORCEMENT - JIT support */
797 /* The special protection value VM_PROT_TRUSTED requests that we treat
798 * this page as if it had a valid code signature.
799 * If this is enabled, there MUST be a MAC policy implementing the
800 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
801 * compromised because the check would always succeed and thusly any
802 * process could sign dynamically. */
803 result = vm_map_sign(
804 user_map,
805 vm_map_trunc_page(user_addr,
806 vm_map_page_mask(user_map)),
807 vm_map_round_page(user_addr+user_size,
808 vm_map_page_mask(user_map)));
809 switch (result) {
810 case KERN_SUCCESS:
811 break;
812 case KERN_INVALID_ADDRESS:
813 /* UNIX SPEC: for an invalid address range, return ENOMEM */
814 return ENOMEM;
815 default:
816 return EINVAL;
817 }
818 #else
819 return ENOTSUP;
820 #endif
821 }
822 prot &= ~VM_PROT_TRUSTED;
823
824 result = mach_vm_protect(user_map, user_addr, user_size,
825 FALSE, prot);
826 switch (result) {
827 case KERN_SUCCESS:
828 return (0);
829 case KERN_PROTECTION_FAILURE:
830 return (EACCES);
831 case KERN_INVALID_ADDRESS:
832 /* UNIX SPEC: for an invalid address range, return ENOMEM */
833 return ENOMEM;
834 }
835 return (EINVAL);
836 }
837
838
839 int
840 minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval)
841 {
842 mach_vm_offset_t addr;
843 mach_vm_size_t size;
844 register vm_inherit_t inherit;
845 vm_map_t user_map;
846 kern_return_t result;
847
848 AUDIT_ARG(addr, uap->addr);
849 AUDIT_ARG(len, uap->len);
850 AUDIT_ARG(value32, uap->inherit);
851
852 addr = (mach_vm_offset_t)uap->addr;
853 size = (mach_vm_size_t)uap->len;
854 inherit = uap->inherit;
855
856 user_map = current_map();
857 result = mach_vm_inherit(user_map, addr, size,
858 inherit);
859 switch (result) {
860 case KERN_SUCCESS:
861 return (0);
862 case KERN_PROTECTION_FAILURE:
863 return (EACCES);
864 }
865 return (EINVAL);
866 }
867
868 int
869 madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
870 {
871 vm_map_t user_map;
872 mach_vm_offset_t start;
873 mach_vm_size_t size;
874 vm_behavior_t new_behavior;
875 kern_return_t result;
876
877 /*
878 * Since this routine is only advisory, we default to conservative
879 * behavior.
880 */
881 switch (uap->behav) {
882 case MADV_RANDOM:
883 new_behavior = VM_BEHAVIOR_RANDOM;
884 break;
885 case MADV_SEQUENTIAL:
886 new_behavior = VM_BEHAVIOR_SEQUENTIAL;
887 break;
888 case MADV_NORMAL:
889 new_behavior = VM_BEHAVIOR_DEFAULT;
890 break;
891 case MADV_WILLNEED:
892 new_behavior = VM_BEHAVIOR_WILLNEED;
893 break;
894 case MADV_DONTNEED:
895 new_behavior = VM_BEHAVIOR_DONTNEED;
896 break;
897 case MADV_FREE:
898 new_behavior = VM_BEHAVIOR_FREE;
899 break;
900 case MADV_ZERO_WIRED_PAGES:
901 new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES;
902 break;
903 case MADV_FREE_REUSABLE:
904 new_behavior = VM_BEHAVIOR_REUSABLE;
905 break;
906 case MADV_FREE_REUSE:
907 new_behavior = VM_BEHAVIOR_REUSE;
908 break;
909 case MADV_CAN_REUSE:
910 new_behavior = VM_BEHAVIOR_CAN_REUSE;
911 break;
912 default:
913 return(EINVAL);
914 }
915
916 start = (mach_vm_offset_t) uap->addr;
917 size = (mach_vm_size_t) uap->len;
918
919 user_map = current_map();
920
921 result = mach_vm_behavior_set(user_map, start, size, new_behavior);
922 switch (result) {
923 case KERN_SUCCESS:
924 return 0;
925 case KERN_INVALID_ADDRESS:
926 return EINVAL;
927 case KERN_NO_SPACE:
928 return ENOMEM;
929 }
930
931 return EINVAL;
932 }
933
934 int
935 mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval)
936 {
937 mach_vm_offset_t addr, first_addr, end;
938 vm_map_t map;
939 user_addr_t vec;
940 int error;
941 int vecindex, lastvecindex;
942 int mincoreinfo=0;
943 int pqueryinfo;
944 kern_return_t ret;
945 int numref;
946
947 char c;
948
949 map = current_map();
950
951 /*
952 * Make sure that the addresses presented are valid for user
953 * mode.
954 */
955 first_addr = addr = vm_map_trunc_page(uap->addr,
956 vm_map_page_mask(map));
957 end = addr + vm_map_round_page(uap->len,
958 vm_map_page_mask(map));
959
960 if (end < addr)
961 return (EINVAL);
962
963 /*
964 * Address of byte vector
965 */
966 vec = uap->vec;
967
968 map = current_map();
969
970 /*
971 * Do this on a map entry basis so that if the pages are not
972 * in the current processes address space, we can easily look
973 * up the pages elsewhere.
974 */
975 lastvecindex = -1;
976 for( ; addr < end; addr += PAGE_SIZE ) {
977 pqueryinfo = 0;
978 ret = mach_vm_page_query(map, addr, &pqueryinfo, &numref);
979 if (ret != KERN_SUCCESS)
980 pqueryinfo = 0;
981 mincoreinfo = 0;
982 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT)
983 mincoreinfo |= MINCORE_INCORE;
984 if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF)
985 mincoreinfo |= MINCORE_REFERENCED;
986 if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY)
987 mincoreinfo |= MINCORE_MODIFIED;
988
989
990 /*
991 * calculate index into user supplied byte vector
992 */
993 vecindex = (addr - first_addr)>> PAGE_SHIFT;
994
995 /*
996 * If we have skipped map entries, we need to make sure that
997 * the byte vector is zeroed for those skipped entries.
998 */
999 while((lastvecindex + 1) < vecindex) {
1000 c = 0;
1001 error = copyout(&c, vec + lastvecindex, 1);
1002 if (error) {
1003 return (EFAULT);
1004 }
1005 ++lastvecindex;
1006 }
1007
1008 /*
1009 * Pass the page information to the user
1010 */
1011 c = (char)mincoreinfo;
1012 error = copyout(&c, vec + vecindex, 1);
1013 if (error) {
1014 return (EFAULT);
1015 }
1016 lastvecindex = vecindex;
1017 }
1018
1019
1020 /*
1021 * Zero the last entries in the byte vector.
1022 */
1023 vecindex = (end - first_addr) >> PAGE_SHIFT;
1024 while((lastvecindex + 1) < vecindex) {
1025 c = 0;
1026 error = copyout(&c, vec + lastvecindex, 1);
1027 if (error) {
1028 return (EFAULT);
1029 }
1030 ++lastvecindex;
1031 }
1032
1033 return (0);
1034 }
1035
1036 int
1037 mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval)
1038 {
1039 vm_map_t user_map;
1040 vm_map_offset_t addr;
1041 vm_map_size_t size, pageoff;
1042 kern_return_t result;
1043
1044 AUDIT_ARG(addr, uap->addr);
1045 AUDIT_ARG(len, uap->len);
1046
1047 addr = (vm_map_offset_t) uap->addr;
1048 size = (vm_map_size_t)uap->len;
1049
1050 /* disable wrap around */
1051 if (addr + size < addr)
1052 return (EINVAL);
1053
1054 if (size == 0)
1055 return (0);
1056
1057 user_map = current_map();
1058 pageoff = (addr & vm_map_page_mask(user_map));
1059 addr -= pageoff;
1060 size = vm_map_round_page(size+pageoff, vm_map_page_mask(user_map));
1061
1062 /* have to call vm_map_wire directly to pass "I don't know" protections */
1063 result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE, TRUE);
1064
1065 if (result == KERN_RESOURCE_SHORTAGE)
1066 return EAGAIN;
1067 else if (result != KERN_SUCCESS)
1068 return ENOMEM;
1069
1070 return 0; /* KERN_SUCCESS */
1071 }
1072
1073 int
1074 munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval)
1075 {
1076 mach_vm_offset_t addr;
1077 mach_vm_size_t size;
1078 vm_map_t user_map;
1079 kern_return_t result;
1080
1081 AUDIT_ARG(addr, uap->addr);
1082 AUDIT_ARG(addr, uap->len);
1083
1084 addr = (mach_vm_offset_t) uap->addr;
1085 size = (mach_vm_size_t)uap->len;
1086 user_map = current_map();
1087
1088 /* JMM - need to remove all wirings by spec - this just removes one */
1089 result = mach_vm_wire(host_priv_self(), user_map, addr, size, VM_PROT_NONE);
1090 return (result == KERN_SUCCESS ? 0 : ENOMEM);
1091 }
1092
1093
1094 int
1095 mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval)
1096 {
1097 return (ENOSYS);
1098 }
1099
1100 int
1101 munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval)
1102 {
1103 return(ENOSYS);
1104 }
1105
1106 #if CONFIG_CODE_DECRYPTION
1107 int
1108 mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __unused int32_t *retval)
1109 {
1110 mach_vm_offset_t user_addr;
1111 mach_vm_size_t user_size;
1112 kern_return_t result;
1113 vm_map_t user_map;
1114 uint32_t cryptid;
1115 cpu_type_t cputype;
1116 cpu_subtype_t cpusubtype;
1117 pager_crypt_info_t crypt_info;
1118 const char * cryptname = 0;
1119 char *vpath;
1120 int len, ret;
1121 struct proc_regioninfo_internal pinfo;
1122 vnode_t vp;
1123 uintptr_t vnodeaddr;
1124 uint32_t vid;
1125
1126 AUDIT_ARG(addr, uap->addr);
1127 AUDIT_ARG(len, uap->len);
1128
1129 user_map = current_map();
1130 user_addr = (mach_vm_offset_t) uap->addr;
1131 user_size = (mach_vm_size_t) uap->len;
1132
1133 cryptid = uap->cryptid;
1134 cputype = uap->cputype;
1135 cpusubtype = uap->cpusubtype;
1136
1137 if (user_addr & vm_map_page_mask(user_map)) {
1138 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1139 return EINVAL;
1140 }
1141
1142 switch(cryptid) {
1143 case 0:
1144 /* not encrypted, just an empty load command */
1145 return 0;
1146 case 1:
1147 cryptname="com.apple.unfree";
1148 break;
1149 case 0x10:
1150 /* some random cryptid that you could manually put into
1151 * your binary if you want NULL */
1152 cryptname="com.apple.null";
1153 break;
1154 default:
1155 return EINVAL;
1156 }
1157
1158 if (NULL == text_crypter_create) return ENOTSUP;
1159
1160 ret = fill_procregioninfo_onlymappedvnodes( proc_task(p), user_addr, &pinfo, &vnodeaddr, &vid);
1161 if (ret == 0 || !vnodeaddr) {
1162 /* No really, this returns 0 if the memory address is not backed by a file */
1163 return (EINVAL);
1164 }
1165
1166 vp = (vnode_t)vnodeaddr;
1167 if ((vnode_getwithvid(vp, vid)) == 0) {
1168 MALLOC_ZONE(vpath, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
1169 if(vpath == NULL) {
1170 vnode_put(vp);
1171 return (ENOMEM);
1172 }
1173
1174 len = MAXPATHLEN;
1175 ret = vn_getpath(vp, vpath, &len);
1176 if(ret) {
1177 FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI);
1178 vnode_put(vp);
1179 return (ret);
1180 }
1181
1182 vnode_put(vp);
1183 } else {
1184 return (EINVAL);
1185 }
1186
1187 #if 0
1188 kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n",
1189 __FUNCTION__, vpath, cryptid, cputype, cpusubtype, (uint64_t)user_addr, (uint64_t)user_size);
1190 #endif
1191
1192 /* set up decrypter first */
1193 crypt_file_data_t crypt_data = {
1194 .filename = vpath,
1195 .cputype = cputype,
1196 .cpusubtype = cpusubtype };
1197 result = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data);
1198 FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI);
1199
1200 if(result) {
1201 printf("%s: unable to create decrypter %s, kr=%d\n",
1202 __FUNCTION__, cryptname, result);
1203 if (result == kIOReturnNotPrivileged) {
1204 /* text encryption returned decryption failure */
1205 return (EPERM);
1206 } else {
1207 return (ENOMEM);
1208 }
1209 }
1210
1211 /* now remap using the decrypter */
1212 result = vm_map_apple_protected(user_map, user_addr, user_addr+user_size, &crypt_info);
1213 if (result) {
1214 printf("%s: mapping failed with %d\n", __FUNCTION__, result);
1215 crypt_info.crypt_end(crypt_info.crypt_ops);
1216 return (EPERM);
1217 }
1218
1219 return 0;
1220 }
1221 #endif /* CONFIG_CODE_DECRYPTION */