]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_mman.c
6da43d2fdb95e5cc796bd51a0b271e30971f87d4
[apple/xnu.git] / bsd / kern / kern_mman.c
1 /*
2 * Copyright (c) 2007 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988 University of Utah.
30 * Copyright (c) 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * the Systems Programming Group of the University of Utah Computer
35 * Science Department.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66 *
67 * @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 /*
77 * Mapped file (mmap) interface to VM
78 */
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/filedesc.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/resourcevar.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/acct.h>
88 #include <sys/wait.h>
89 #include <sys/file_internal.h>
90 #include <sys/vadvise.h>
91 #include <sys/trace.h>
92 #include <sys/mman.h>
93 #include <sys/conf.h>
94 #include <sys/stat.h>
95 #include <sys/ubc.h>
96 #include <sys/ubc_internal.h>
97 #include <sys/sysproto.h>
98
99 #include <sys/syscall.h>
100 #include <sys/kdebug.h>
101
102 #include <security/audit/audit.h>
103 #include <bsm/audit_kevents.h>
104
105 #include <mach/mach_types.h>
106 #include <mach/mach_traps.h>
107 #include <mach/vm_sync.h>
108 #include <mach/vm_behavior.h>
109 #include <mach/vm_inherit.h>
110 #include <mach/vm_statistics.h>
111 #include <mach/mach_vm.h>
112 #include <mach/vm_map.h>
113 #include <mach/host_priv.h>
114
115 #include <kern/cpu_number.h>
116 #include <kern/host.h>
117
118 #include <vm/vm_map.h>
119 #include <vm/vm_kern.h>
120 #include <vm/vm_pager.h>
121 #include <vm/vm_protos.h>
122
123 /* XXX the following function should probably be static */
124 kern_return_t map_fd_funneled(int, vm_object_offset_t, vm_offset_t *,
125 boolean_t, vm_size_t);
126
127 /*
128 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
129 * XXX usage is PROT_* from an interface perspective. Thus the values of
130 * XXX VM_PROT_* and PROT_* need to correspond.
131 */
132 int
133 mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
134 {
135 /*
136 * Map in special device (must be SHARED) or file
137 */
138 struct fileproc *fp;
139 register struct vnode *vp;
140 int flags;
141 int prot;
142 int err=0;
143 vm_map_t user_map;
144 kern_return_t result;
145 mach_vm_offset_t user_addr;
146 mach_vm_size_t user_size;
147 vm_object_offset_t pageoff;
148 vm_object_offset_t file_pos;
149 int alloc_flags=0;
150 boolean_t docow;
151 vm_prot_t maxprot;
152 void *handle;
153 memory_object_t pager = MEMORY_OBJECT_NULL;
154 memory_object_control_t control;
155 int mapanon=0;
156 int fpref=0;
157 int error =0;
158 int fd = uap->fd;
159
160 user_addr = (mach_vm_offset_t)uap->addr;
161 user_size = (mach_vm_size_t) uap->len;
162
163 AUDIT_ARG(addr, user_addr);
164 AUDIT_ARG(len, user_size);
165 AUDIT_ARG(fd, uap->fd);
166
167 prot = (uap->prot & VM_PROT_ALL);
168 #if 3777787
169 /*
170 * Since the hardware currently does not support writing without
171 * read-before-write, or execution-without-read, if the request is
172 * for write or execute access, we must imply read access as well;
173 * otherwise programs expecting this to work will fail to operate.
174 */
175 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
176 prot |= VM_PROT_READ;
177 #endif /* radar 3777787 */
178
179 flags = uap->flags;
180 vp = NULLVP;
181
182 /*
183 * The vm code does not have prototypes & compiler doesn't do the'
184 * the right thing when you cast 64bit value and pass it in function
185 * call. So here it is.
186 */
187 file_pos = (vm_object_offset_t)uap->pos;
188
189
190 /* make sure mapping fits into numeric range etc */
191 if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64)
192 return (EINVAL);
193
194 /*
195 * Align the file position to a page boundary,
196 * and save its page offset component.
197 */
198 pageoff = (file_pos & PAGE_MASK);
199 file_pos -= (vm_object_offset_t)pageoff;
200
201
202 /* Adjust size for rounding (on both ends). */
203 user_size += pageoff; /* low end... */
204 user_size = mach_vm_round_page(user_size); /* hi end */
205
206
207 /*
208 * Check for illegal addresses. Watch out for address wrap... Note
209 * that VM_*_ADDRESS are not constants due to casts (argh).
210 */
211 if (flags & MAP_FIXED) {
212 /*
213 * The specified address must have the same remainder
214 * as the file offset taken modulo PAGE_SIZE, so it
215 * should be aligned after adjustment by pageoff.
216 */
217 user_addr -= pageoff;
218 if (user_addr & PAGE_MASK)
219 return (EINVAL);
220 }
221 #ifdef notyet
222 /* DO not have apis to get this info, need to wait till then*/
223 /*
224 * XXX for non-fixed mappings where no hint is provided or
225 * the hint would fall in the potential heap space,
226 * place it after the end of the largest possible heap.
227 *
228 * There should really be a pmap call to determine a reasonable
229 * location.
230 */
231 else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
232 addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
233
234 #endif
235
236 alloc_flags = 0;
237
238 if (flags & MAP_ANON) {
239 /*
240 * Mapping blank space is trivial. Use positive fds as the alias
241 * value for memory tracking.
242 */
243 if (fd != -1) {
244 /*
245 * Use "fd" to pass (some) Mach VM allocation flags,
246 * (see the VM_FLAGS_* definitions).
247 */
248 alloc_flags = fd & (VM_FLAGS_ALIAS_MASK |
249 VM_FLAGS_PURGABLE);
250 if (alloc_flags != fd) {
251 /* reject if there are any extra flags */
252 return EINVAL;
253 }
254 }
255
256 handle = NULL;
257 maxprot = VM_PROT_ALL;
258 file_pos = 0;
259 mapanon = 1;
260 } else {
261 struct vnode_attr va;
262 vfs_context_t ctx = vfs_context_current();
263
264 /*
265 * Mapping file, get fp for validation. Obtain vnode and make
266 * sure it is of appropriate type.
267 */
268 err = fp_lookup(p, fd, &fp, 0);
269 if (err)
270 return(err);
271 fpref = 1;
272 if(fp->f_fglob->fg_type == DTYPE_PSXSHM) {
273 uap->addr = (user_addr_t)user_addr;
274 uap->len = (user_size_t)user_size;
275 uap->prot = prot;
276 uap->flags = flags;
277 uap->pos = file_pos;
278 error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
279 goto bad;
280 }
281
282 if (fp->f_fglob->fg_type != DTYPE_VNODE) {
283 error = EINVAL;
284 goto bad;
285 }
286 vp = (struct vnode *)fp->f_fglob->fg_data;
287 error = vnode_getwithref(vp);
288 if(error != 0)
289 goto bad;
290
291 if (vp->v_type != VREG && vp->v_type != VCHR) {
292 (void)vnode_put(vp);
293 error = EINVAL;
294 goto bad;
295 }
296
297 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
298
299 /*
300 * POSIX: mmap needs to update access time for mapped files
301 */
302 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
303 VATTR_INIT(&va);
304 nanotime(&va.va_access_time);
305 VATTR_SET_ACTIVE(&va, va_access_time);
306 vnode_setattr(vp, &va, ctx);
307 }
308
309 /*
310 * XXX hack to handle use of /dev/zero to map anon memory (ala
311 * SunOS).
312 */
313 if (vp->v_type == VCHR || vp->v_type == VSTR) {
314 (void)vnode_put(vp);
315 error = ENODEV;
316 goto bad;
317 } else {
318 /*
319 * Ensure that file and memory protections are
320 * compatible. Note that we only worry about
321 * writability if mapping is shared; in this case,
322 * current and max prot are dictated by the open file.
323 * XXX use the vnode instead? Problem is: what
324 * credentials do we use for determination? What if
325 * proc does a setuid?
326 */
327 maxprot = VM_PROT_EXECUTE; /* ??? */
328 if (fp->f_fglob->fg_flag & FREAD)
329 maxprot |= VM_PROT_READ;
330 else if (prot & PROT_READ) {
331 (void)vnode_put(vp);
332 error = EACCES;
333 goto bad;
334 }
335 /*
336 * If we are sharing potential changes (either via
337 * MAP_SHARED or via the implicit sharing of character
338 * device mappings), and we are trying to get write
339 * permission although we opened it without asking
340 * for it, bail out.
341 */
342
343 if ((flags & MAP_SHARED) != 0) {
344 if ((fp->f_fglob->fg_flag & FWRITE) != 0 &&
345 /*
346 * Do not allow writable mappings of
347 * swap files (see vm_swapfile_pager.c).
348 */
349 !vnode_isswap(vp)) {
350 /*
351 * check for write access
352 *
353 * Note that we already made this check when granting FWRITE
354 * against the file, so it seems redundant here.
355 */
356 error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
357
358 /* if not granted for any reason, but we wanted it, bad */
359 if ((prot & PROT_WRITE) && (error != 0)) {
360 vnode_put(vp);
361 goto bad;
362 }
363
364 /* if writable, remember */
365 if (error == 0)
366 maxprot |= VM_PROT_WRITE;
367
368 } else if ((prot & PROT_WRITE) != 0) {
369 (void)vnode_put(vp);
370 error = EACCES;
371 goto bad;
372 }
373 } else
374 maxprot |= VM_PROT_WRITE;
375
376 handle = (void *)vp;
377 #if CONFIG_MACF
378 error = mac_file_check_mmap(vfs_context_ucred(ctx),
379 fp->f_fglob, prot, flags, &maxprot);
380 if (error) {
381 (void)vnode_put(vp);
382 goto bad;
383 }
384 #endif /* MAC */
385 }
386 }
387
388 if (user_size == 0) {
389 if (!mapanon)
390 (void)vnode_put(vp);
391 error = 0;
392 goto bad;
393 }
394
395 /*
396 * We bend a little - round the start and end addresses
397 * to the nearest page boundary.
398 */
399 user_size = mach_vm_round_page(user_size);
400
401 if (file_pos & PAGE_MASK_64) {
402 if (!mapanon)
403 (void)vnode_put(vp);
404 error = EINVAL;
405 goto bad;
406 }
407
408 user_map = current_map();
409
410 if ((flags & MAP_FIXED) == 0) {
411 alloc_flags |= VM_FLAGS_ANYWHERE;
412 user_addr = mach_vm_round_page(user_addr);
413 } else {
414 if (user_addr != mach_vm_trunc_page(user_addr)) {
415 if (!mapanon)
416 (void)vnode_put(vp);
417 error = EINVAL;
418 goto bad;
419 }
420 /*
421 * mmap(MAP_FIXED) will replace any existing mappings in the
422 * specified range, if the new mapping is successful.
423 * If we just deallocate the specified address range here,
424 * another thread might jump in and allocate memory in that
425 * range before we get a chance to establish the new mapping,
426 * and we won't have a chance to restore the old mappings.
427 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
428 * has to deallocate the existing mappings and establish the
429 * new ones atomically.
430 */
431 alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
432 }
433
434 if (flags & MAP_NOCACHE)
435 alloc_flags |= VM_FLAGS_NO_CACHE;
436
437 /*
438 * Lookup/allocate object.
439 */
440 if (handle == NULL) {
441 control = NULL;
442 #ifdef notyet
443 /* Hmm .. */
444 #if defined(VM_PROT_READ_IS_EXEC)
445 if (prot & VM_PROT_READ)
446 prot |= VM_PROT_EXECUTE;
447 if (maxprot & VM_PROT_READ)
448 maxprot |= VM_PROT_EXECUTE;
449 #endif
450 #endif
451
452 #if 3777787
453 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
454 prot |= VM_PROT_READ;
455 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
456 maxprot |= VM_PROT_READ;
457 #endif /* radar 3777787 */
458
459 result = vm_map_enter_mem_object(user_map,
460 &user_addr, user_size,
461 0, alloc_flags,
462 IPC_PORT_NULL, 0, FALSE,
463 prot, maxprot,
464 (flags & MAP_SHARED) ?
465 VM_INHERIT_SHARE :
466 VM_INHERIT_DEFAULT);
467 } else {
468 if (vnode_isswap(vp)) {
469 /*
470 * Map swap files with a special pager
471 * that returns obfuscated contents.
472 */
473 control = NULL;
474 pager = swapfile_pager_setup(vp);
475 if (pager != MEMORY_OBJECT_NULL) {
476 control = swapfile_pager_control(pager);
477 }
478 } else {
479 control = ubc_getobject(vp, UBC_FLAGS_NONE);
480 }
481
482 if (control == NULL) {
483 (void)vnode_put(vp);
484 error = ENOMEM;
485 goto bad;
486 }
487
488 /*
489 * Set credentials:
490 * FIXME: if we're writing the file we need a way to
491 * ensure that someone doesn't replace our R/W creds
492 * with ones that only work for read.
493 */
494
495 ubc_setthreadcred(vp, p, current_thread());
496 docow = FALSE;
497 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
498 docow = TRUE;
499 }
500
501 #ifdef notyet
502 /* Hmm .. */
503 #if defined(VM_PROT_READ_IS_EXEC)
504 if (prot & VM_PROT_READ)
505 prot |= VM_PROT_EXECUTE;
506 if (maxprot & VM_PROT_READ)
507 maxprot |= VM_PROT_EXECUTE;
508 #endif
509 #endif /* notyet */
510
511 #if 3777787
512 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
513 prot |= VM_PROT_READ;
514 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
515 maxprot |= VM_PROT_READ;
516 #endif /* radar 3777787 */
517
518 result = vm_map_enter_mem_object_control(user_map,
519 &user_addr, user_size,
520 0, alloc_flags,
521 control, file_pos,
522 docow, prot, maxprot,
523 (flags & MAP_SHARED) ?
524 VM_INHERIT_SHARE :
525 VM_INHERIT_DEFAULT);
526 }
527
528 if (!mapanon) {
529 (void)vnode_put(vp);
530 }
531
532 switch (result) {
533 case KERN_SUCCESS:
534 *retval = user_addr + pageoff;
535 error = 0;
536 break;
537 case KERN_INVALID_ADDRESS:
538 case KERN_NO_SPACE:
539 error = ENOMEM;
540 break;
541 case KERN_PROTECTION_FAILURE:
542 error = EACCES;
543 break;
544 default:
545 error = EINVAL;
546 break;
547 }
548 bad:
549 if (pager != MEMORY_OBJECT_NULL) {
550 /*
551 * Release the reference on the pager.
552 * If the mapping was successful, it now holds
553 * an extra reference.
554 */
555 memory_object_deallocate(pager);
556 }
557 if (fpref)
558 fp_drop(p, fd, fp, 0);
559
560 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
561 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
562 (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
563
564 return(error);
565 }
566
567 int
568 msync(__unused proc_t p, struct msync_args *uap, int32_t *retval)
569 {
570 __pthread_testcancel(1);
571 return(msync_nocancel(p, (struct msync_nocancel_args *)uap, retval));
572 }
573
574 int
575 msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval)
576 {
577 mach_vm_offset_t addr;
578 mach_vm_size_t size;
579 int flags;
580 vm_map_t user_map;
581 int rv;
582 vm_sync_t sync_flags=0;
583
584 addr = (mach_vm_offset_t) uap->addr;
585 size = (mach_vm_size_t)uap->len;
586
587 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
588
589 if (addr & PAGE_MASK_64) {
590 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
591 return EINVAL;
592 }
593 if (size == 0) {
594 /*
595 * We cannot support this properly without maintaining
596 * list all mmaps done. Cannot use vm_map_entry as they could be
597 * split or coalesced by indepenedant actions. So instead of
598 * inaccurate results, lets just return error as invalid size
599 * specified
600 */
601 return (EINVAL); /* XXX breaks posix apps */
602 }
603
604 flags = uap->flags;
605 /* disallow contradictory flags */
606 if ((flags & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))
607 return (EINVAL);
608
609 if (flags & MS_KILLPAGES)
610 sync_flags |= VM_SYNC_KILLPAGES;
611 if (flags & MS_DEACTIVATE)
612 sync_flags |= VM_SYNC_DEACTIVATE;
613 if (flags & MS_INVALIDATE)
614 sync_flags |= VM_SYNC_INVALIDATE;
615
616 if ( !(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
617 if (flags & MS_ASYNC)
618 sync_flags |= VM_SYNC_ASYNCHRONOUS;
619 else
620 sync_flags |= VM_SYNC_SYNCHRONOUS;
621 }
622
623 sync_flags |= VM_SYNC_CONTIGUOUS; /* complain if holes */
624
625 user_map = current_map();
626 rv = mach_vm_msync(user_map, addr, size, sync_flags);
627
628 switch (rv) {
629 case KERN_SUCCESS:
630 break;
631 case KERN_INVALID_ADDRESS: /* hole in region being sync'ed */
632 return (ENOMEM);
633 case KERN_FAILURE:
634 return (EIO);
635 default:
636 return (EINVAL);
637 }
638 return (0);
639 }
640
641
642 int
643 munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval)
644 {
645 mach_vm_offset_t user_addr;
646 mach_vm_size_t user_size;
647 kern_return_t result;
648
649 user_addr = (mach_vm_offset_t) uap->addr;
650 user_size = (mach_vm_size_t) uap->len;
651
652 AUDIT_ARG(addr, user_addr);
653 AUDIT_ARG(len, user_size);
654
655 if (user_addr & PAGE_MASK_64) {
656 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
657 return EINVAL;
658 }
659
660 if (user_addr + user_size < user_addr)
661 return(EINVAL);
662
663 if (user_size == 0) {
664 /* UNIX SPEC: size is 0, return EINVAL */
665 return EINVAL;
666 }
667
668 result = mach_vm_deallocate(current_map(), user_addr, user_size);
669 if (result != KERN_SUCCESS) {
670 return(EINVAL);
671 }
672 return(0);
673 }
674
675 int
676 mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
677 {
678 register vm_prot_t prot;
679 mach_vm_offset_t user_addr;
680 mach_vm_size_t user_size;
681 kern_return_t result;
682 vm_map_t user_map;
683 #if CONFIG_MACF
684 int error;
685 #endif
686
687 AUDIT_ARG(addr, uap->addr);
688 AUDIT_ARG(len, uap->len);
689 AUDIT_ARG(value32, uap->prot);
690
691 user_addr = (mach_vm_offset_t) uap->addr;
692 user_size = (mach_vm_size_t) uap->len;
693 prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED));
694
695 if (user_addr & PAGE_MASK_64) {
696 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
697 return EINVAL;
698 }
699
700 #ifdef notyet
701 /* Hmm .. */
702 #if defined(VM_PROT_READ_IS_EXEC)
703 if (prot & VM_PROT_READ)
704 prot |= VM_PROT_EXECUTE;
705 #endif
706 #endif /* notyet */
707
708 #if 3936456
709 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
710 prot |= VM_PROT_READ;
711 #endif /* 3936456 */
712
713 user_map = current_map();
714
715 #if CONFIG_MACF
716 /*
717 * The MAC check for mprotect is of limited use for 2 reasons:
718 * Without mmap revocation, the caller could have asked for the max
719 * protections initially instead of a reduced set, so a mprotect
720 * check would offer no new security.
721 * It is not possible to extract the vnode from the pager object(s)
722 * of the target memory range.
723 * However, the MAC check may be used to prevent a process from,
724 * e.g., making the stack executable.
725 */
726 error = mac_proc_check_mprotect(p, user_addr,
727 user_size, prot);
728 if (error)
729 return (error);
730 #endif
731
732 if(prot & VM_PROT_TRUSTED) {
733 #if CONFIG_DYNAMIC_CODE_SIGNING
734 /* CODE SIGNING ENFORCEMENT - JIT support */
735 /* The special protection value VM_PROT_TRUSTED requests that we treat
736 * this page as if it had a valid code signature.
737 * If this is enabled, there MUST be a MAC policy implementing the
738 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
739 * compromised because the check would always succeed and thusly any
740 * process could sign dynamically. */
741 result = vm_map_sign(user_map,
742 vm_map_trunc_page(user_addr),
743 vm_map_round_page(user_addr+user_size));
744 switch (result) {
745 case KERN_SUCCESS:
746 break;
747 case KERN_INVALID_ADDRESS:
748 /* UNIX SPEC: for an invalid address range, return ENOMEM */
749 return ENOMEM;
750 default:
751 return EINVAL;
752 }
753 #else
754 return ENOTSUP;
755 #endif
756 }
757 prot &= ~VM_PROT_TRUSTED;
758
759 result = mach_vm_protect(user_map, user_addr, user_size,
760 FALSE, prot);
761 switch (result) {
762 case KERN_SUCCESS:
763 return (0);
764 case KERN_PROTECTION_FAILURE:
765 return (EACCES);
766 case KERN_INVALID_ADDRESS:
767 /* UNIX SPEC: for an invalid address range, return ENOMEM */
768 return ENOMEM;
769 }
770 return (EINVAL);
771 }
772
773
774 int
775 minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval)
776 {
777 mach_vm_offset_t addr;
778 mach_vm_size_t size;
779 register vm_inherit_t inherit;
780 vm_map_t user_map;
781 kern_return_t result;
782
783 AUDIT_ARG(addr, uap->addr);
784 AUDIT_ARG(len, uap->len);
785 AUDIT_ARG(value32, uap->inherit);
786
787 addr = (mach_vm_offset_t)uap->addr;
788 size = (mach_vm_size_t)uap->len;
789 inherit = uap->inherit;
790
791 user_map = current_map();
792 result = mach_vm_inherit(user_map, addr, size,
793 inherit);
794 switch (result) {
795 case KERN_SUCCESS:
796 return (0);
797 case KERN_PROTECTION_FAILURE:
798 return (EACCES);
799 }
800 return (EINVAL);
801 }
802
803 int
804 madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
805 {
806 vm_map_t user_map;
807 mach_vm_offset_t start;
808 mach_vm_size_t size;
809 vm_behavior_t new_behavior;
810 kern_return_t result;
811
812 /*
813 * Since this routine is only advisory, we default to conservative
814 * behavior.
815 */
816 switch (uap->behav) {
817 case MADV_RANDOM:
818 new_behavior = VM_BEHAVIOR_RANDOM;
819 break;
820 case MADV_SEQUENTIAL:
821 new_behavior = VM_BEHAVIOR_SEQUENTIAL;
822 break;
823 case MADV_NORMAL:
824 new_behavior = VM_BEHAVIOR_DEFAULT;
825 break;
826 case MADV_WILLNEED:
827 new_behavior = VM_BEHAVIOR_WILLNEED;
828 break;
829 case MADV_DONTNEED:
830 new_behavior = VM_BEHAVIOR_DONTNEED;
831 break;
832 case MADV_FREE:
833 new_behavior = VM_BEHAVIOR_FREE;
834 break;
835 case MADV_ZERO_WIRED_PAGES:
836 new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES;
837 break;
838 case MADV_FREE_REUSABLE:
839 new_behavior = VM_BEHAVIOR_REUSABLE;
840 break;
841 case MADV_FREE_REUSE:
842 new_behavior = VM_BEHAVIOR_REUSE;
843 break;
844 case MADV_CAN_REUSE:
845 new_behavior = VM_BEHAVIOR_CAN_REUSE;
846 break;
847 default:
848 return(EINVAL);
849 }
850
851 start = (mach_vm_offset_t) uap->addr;
852 size = (mach_vm_size_t) uap->len;
853
854 user_map = current_map();
855
856 result = mach_vm_behavior_set(user_map, start, size, new_behavior);
857 switch (result) {
858 case KERN_SUCCESS:
859 return (0);
860 case KERN_INVALID_ADDRESS:
861 return (ENOMEM);
862 }
863
864 return (EINVAL);
865 }
866
867 int
868 mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval)
869 {
870 mach_vm_offset_t addr, first_addr, end;
871 vm_map_t map;
872 user_addr_t vec;
873 int error;
874 int vecindex, lastvecindex;
875 int mincoreinfo=0;
876 int pqueryinfo;
877 kern_return_t ret;
878 int numref;
879
880 char c;
881
882 map = current_map();
883
884 /*
885 * Make sure that the addresses presented are valid for user
886 * mode.
887 */
888 first_addr = addr = mach_vm_trunc_page(uap->addr);
889 end = addr + mach_vm_round_page(uap->len);
890
891 if (end < addr)
892 return (EINVAL);
893
894 /*
895 * Address of byte vector
896 */
897 vec = uap->vec;
898
899 map = current_map();
900
901 /*
902 * Do this on a map entry basis so that if the pages are not
903 * in the current processes address space, we can easily look
904 * up the pages elsewhere.
905 */
906 lastvecindex = -1;
907 for( ; addr < end; addr += PAGE_SIZE ) {
908 pqueryinfo = 0;
909 ret = mach_vm_page_query(map, addr, &pqueryinfo, &numref);
910 if (ret != KERN_SUCCESS)
911 pqueryinfo = 0;
912 mincoreinfo = 0;
913 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT)
914 mincoreinfo |= MINCORE_INCORE;
915 if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF)
916 mincoreinfo |= MINCORE_REFERENCED;
917 if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY)
918 mincoreinfo |= MINCORE_MODIFIED;
919
920
921 /*
922 * calculate index into user supplied byte vector
923 */
924 vecindex = (addr - first_addr)>> PAGE_SHIFT;
925
926 /*
927 * If we have skipped map entries, we need to make sure that
928 * the byte vector is zeroed for those skipped entries.
929 */
930 while((lastvecindex + 1) < vecindex) {
931 c = 0;
932 error = copyout(&c, vec + lastvecindex, 1);
933 if (error) {
934 return (EFAULT);
935 }
936 ++lastvecindex;
937 }
938
939 /*
940 * Pass the page information to the user
941 */
942 c = (char)mincoreinfo;
943 error = copyout(&c, vec + vecindex, 1);
944 if (error) {
945 return (EFAULT);
946 }
947 lastvecindex = vecindex;
948 }
949
950
951 /*
952 * Zero the last entries in the byte vector.
953 */
954 vecindex = (end - first_addr) >> PAGE_SHIFT;
955 while((lastvecindex + 1) < vecindex) {
956 c = 0;
957 error = copyout(&c, vec + lastvecindex, 1);
958 if (error) {
959 return (EFAULT);
960 }
961 ++lastvecindex;
962 }
963
964 return (0);
965 }
966
967 int
968 mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval)
969 {
970 vm_map_t user_map;
971 vm_map_offset_t addr;
972 vm_map_size_t size, pageoff;
973 kern_return_t result;
974
975 AUDIT_ARG(addr, uap->addr);
976 AUDIT_ARG(len, uap->len);
977
978 addr = (vm_map_offset_t) uap->addr;
979 size = (vm_map_size_t)uap->len;
980
981 /* disable wrap around */
982 if (addr + size < addr)
983 return (EINVAL);
984
985 if (size == 0)
986 return (0);
987
988 pageoff = (addr & PAGE_MASK);
989 addr -= pageoff;
990 size = vm_map_round_page(size+pageoff);
991 user_map = current_map();
992
993 /* have to call vm_map_wire directly to pass "I don't know" protections */
994 result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE, TRUE);
995
996 if (result == KERN_RESOURCE_SHORTAGE)
997 return EAGAIN;
998 else if (result != KERN_SUCCESS)
999 return ENOMEM;
1000
1001 return 0; /* KERN_SUCCESS */
1002 }
1003
1004 int
1005 munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval)
1006 {
1007 mach_vm_offset_t addr;
1008 mach_vm_size_t size;
1009 vm_map_t user_map;
1010 kern_return_t result;
1011
1012 AUDIT_ARG(addr, uap->addr);
1013 AUDIT_ARG(addr, uap->len);
1014
1015 addr = (mach_vm_offset_t) uap->addr;
1016 size = (mach_vm_size_t)uap->len;
1017 user_map = current_map();
1018
1019 /* JMM - need to remove all wirings by spec - this just removes one */
1020 result = mach_vm_wire(host_priv_self(), user_map, addr, size, VM_PROT_NONE);
1021 return (result == KERN_SUCCESS ? 0 : ENOMEM);
1022 }
1023
1024
1025 int
1026 mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval)
1027 {
1028 return (ENOSYS);
1029 }
1030
1031 int
1032 munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval)
1033 {
1034 return(ENOSYS);
1035 }
1036
1037 /* USV: No! need to obsolete map_fd()! mmap() already supports 64 bits */
1038 kern_return_t
1039 map_fd(struct map_fd_args *args)
1040 {
1041 int fd = args->fd;
1042 vm_offset_t offset = args->offset;
1043 vm_offset_t *va = args->va;
1044 boolean_t findspace = args->findspace;
1045 vm_size_t size = args->size;
1046 kern_return_t ret;
1047
1048 AUDIT_MACH_SYSCALL_ENTER(AUE_MAPFD);
1049 AUDIT_ARG(addr, CAST_DOWN(user_addr_t, args->va));
1050 AUDIT_ARG(fd, fd);
1051
1052 ret = map_fd_funneled( fd, (vm_object_offset_t)offset, va, findspace, size);
1053
1054 AUDIT_MACH_SYSCALL_EXIT(ret);
1055 return ret;
1056 }
1057
1058 kern_return_t
1059 map_fd_funneled(
1060 int fd,
1061 vm_object_offset_t offset,
1062 vm_offset_t *va,
1063 boolean_t findspace,
1064 vm_size_t size)
1065 {
1066 kern_return_t result;
1067 struct fileproc *fp;
1068 struct vnode *vp;
1069 void * pager;
1070 vm_offset_t map_addr=0;
1071 vm_size_t map_size;
1072 int err=0;
1073 vm_map_t my_map;
1074 proc_t p = current_proc();
1075 struct vnode_attr vattr;
1076
1077 /*
1078 * Find the inode; verify that it's a regular file.
1079 */
1080
1081 err = fp_lookup(p, fd, &fp, 0);
1082 if (err)
1083 return(err);
1084
1085 if (fp->f_fglob->fg_type != DTYPE_VNODE){
1086 err = KERN_INVALID_ARGUMENT;
1087 goto bad;
1088 }
1089
1090 if (!(fp->f_fglob->fg_flag & FREAD)) {
1091 err = KERN_PROTECTION_FAILURE;
1092 goto bad;
1093 }
1094
1095 vp = (struct vnode *)fp->f_fglob->fg_data;
1096 err = vnode_getwithref(vp);
1097 if(err != 0)
1098 goto bad;
1099
1100 if (vp->v_type != VREG) {
1101 (void)vnode_put(vp);
1102 err = KERN_INVALID_ARGUMENT;
1103 goto bad;
1104 }
1105
1106 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1107
1108 /*
1109 * POSIX: mmap needs to update access time for mapped files
1110 */
1111 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
1112 VATTR_INIT(&vattr);
1113 nanotime(&vattr.va_access_time);
1114 VATTR_SET_ACTIVE(&vattr, va_access_time);
1115 vnode_setattr(vp, &vattr, vfs_context_current());
1116 }
1117
1118 if (offset & PAGE_MASK_64) {
1119 printf("map_fd: file offset not page aligned(%d : %s)\n",p->p_pid, p->p_comm);
1120 (void)vnode_put(vp);
1121 err = KERN_INVALID_ARGUMENT;
1122 goto bad;
1123 }
1124 map_size = round_page(size);
1125
1126 /*
1127 * Allow user to map in a zero length file.
1128 */
1129 if (size == 0) {
1130 (void)vnode_put(vp);
1131 err = KERN_SUCCESS;
1132 goto bad;
1133 }
1134 /*
1135 * Map in the file.
1136 */
1137 pager = (void *)ubc_getpager(vp);
1138 if (pager == NULL) {
1139 (void)vnode_put(vp);
1140 err = KERN_FAILURE;
1141 goto bad;
1142 }
1143
1144
1145 my_map = current_map();
1146
1147 result = vm_map_64(
1148 my_map,
1149 &map_addr, map_size, (vm_offset_t)0,
1150 VM_FLAGS_ANYWHERE, pager, offset, TRUE,
1151 VM_PROT_DEFAULT, VM_PROT_ALL,
1152 VM_INHERIT_DEFAULT);
1153 if (result != KERN_SUCCESS) {
1154 (void)vnode_put(vp);
1155 err = result;
1156 goto bad;
1157 }
1158
1159
1160 if (!findspace) {
1161 //K64todo fix for 64bit user?
1162 uint32_t dst_addr;
1163 vm_map_copy_t tmp;
1164
1165 if (copyin(CAST_USER_ADDR_T(va), &dst_addr, sizeof (dst_addr)) ||
1166 trunc_page(dst_addr) != dst_addr) {
1167 (void) vm_map_remove(
1168 my_map,
1169 map_addr, map_addr + map_size,
1170 VM_MAP_NO_FLAGS);
1171 (void)vnode_put(vp);
1172 err = KERN_INVALID_ADDRESS;
1173 goto bad;
1174 }
1175
1176 result = vm_map_copyin(my_map, (vm_map_address_t)map_addr,
1177 (vm_map_size_t)map_size, TRUE, &tmp);
1178 if (result != KERN_SUCCESS) {
1179
1180 (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr),
1181 vm_map_round_page(map_addr + map_size),
1182 VM_MAP_NO_FLAGS);
1183 (void)vnode_put(vp);
1184 err = result;
1185 goto bad;
1186 }
1187
1188 result = vm_map_copy_overwrite(my_map,
1189 (vm_map_address_t)dst_addr, tmp, FALSE);
1190 if (result != KERN_SUCCESS) {
1191 vm_map_copy_discard(tmp);
1192 (void)vnode_put(vp);
1193 err = result;
1194 goto bad;
1195 }
1196 } else {
1197 // K64todo bug compatible now, should fix for 64bit user
1198 uint32_t user_map_addr = CAST_DOWN_EXPLICIT(uint32_t, map_addr);
1199 if (copyout(&user_map_addr, CAST_USER_ADDR_T(va), sizeof (user_map_addr))) {
1200 (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr),
1201 vm_map_round_page(map_addr + map_size),
1202 VM_MAP_NO_FLAGS);
1203 (void)vnode_put(vp);
1204 err = KERN_INVALID_ADDRESS;
1205 goto bad;
1206 }
1207 }
1208
1209 ubc_setthreadcred(vp, current_proc(), current_thread());
1210 (void)vnode_put(vp);
1211 err = 0;
1212 bad:
1213 fp_drop(p, fd, fp, 0);
1214 return (err);
1215 }
1216