]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_mman.c
xnu-1699.24.8.tar.gz
[apple/xnu.git] / bsd / kern / kern_mman.c
1 /*
2 * Copyright (c) 2007 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988 University of Utah.
30 * Copyright (c) 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * the Systems Programming Group of the University of Utah Computer
35 * Science Department.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66 *
67 * @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 /*
77 * Mapped file (mmap) interface to VM
78 */
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/filedesc.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/resourcevar.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/acct.h>
88 #include <sys/wait.h>
89 #include <sys/file_internal.h>
90 #include <sys/vadvise.h>
91 #include <sys/trace.h>
92 #include <sys/mman.h>
93 #include <sys/conf.h>
94 #include <sys/stat.h>
95 #include <sys/ubc.h>
96 #include <sys/ubc_internal.h>
97 #include <sys/sysproto.h>
98 #if CONFIG_PROTECT
99 #include <sys/cprotect.h>
100 #endif
101
102 #include <sys/syscall.h>
103 #include <sys/kdebug.h>
104
105 #include <security/audit/audit.h>
106 #include <bsm/audit_kevents.h>
107
108 #include <mach/mach_types.h>
109 #include <mach/mach_traps.h>
110 #include <mach/vm_sync.h>
111 #include <mach/vm_behavior.h>
112 #include <mach/vm_inherit.h>
113 #include <mach/vm_statistics.h>
114 #include <mach/mach_vm.h>
115 #include <mach/vm_map.h>
116 #include <mach/host_priv.h>
117
118 #include <kern/cpu_number.h>
119 #include <kern/host.h>
120
121 #include <vm/vm_map.h>
122 #include <vm/vm_kern.h>
123 #include <vm/vm_pager.h>
124 #include <vm/vm_protos.h>
125
126 /* XXX the following function should probably be static */
127 kern_return_t map_fd_funneled(int, vm_object_offset_t, vm_offset_t *,
128 boolean_t, vm_size_t);
129
130 /*
131 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
132 * XXX usage is PROT_* from an interface perspective. Thus the values of
133 * XXX VM_PROT_* and PROT_* need to correspond.
134 */
135 int
136 mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
137 {
138 /*
139 * Map in special device (must be SHARED) or file
140 */
141 struct fileproc *fp;
142 register struct vnode *vp;
143 int flags;
144 int prot;
145 int err=0;
146 vm_map_t user_map;
147 kern_return_t result;
148 mach_vm_offset_t user_addr;
149 mach_vm_size_t user_size;
150 vm_object_offset_t pageoff;
151 vm_object_offset_t file_pos;
152 int alloc_flags=0;
153 boolean_t docow;
154 vm_prot_t maxprot;
155 void *handle;
156 memory_object_t pager = MEMORY_OBJECT_NULL;
157 memory_object_control_t control;
158 int mapanon=0;
159 int fpref=0;
160 int error =0;
161 int fd = uap->fd;
162 int num_retries = 0;
163
164 user_addr = (mach_vm_offset_t)uap->addr;
165 user_size = (mach_vm_size_t) uap->len;
166
167 AUDIT_ARG(addr, user_addr);
168 AUDIT_ARG(len, user_size);
169 AUDIT_ARG(fd, uap->fd);
170
171 prot = (uap->prot & VM_PROT_ALL);
172 #if 3777787
173 /*
174 * Since the hardware currently does not support writing without
175 * read-before-write, or execution-without-read, if the request is
176 * for write or execute access, we must imply read access as well;
177 * otherwise programs expecting this to work will fail to operate.
178 */
179 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
180 prot |= VM_PROT_READ;
181 #endif /* radar 3777787 */
182
183 flags = uap->flags;
184 vp = NULLVP;
185
186 /*
187 * The vm code does not have prototypes & compiler doesn't do the'
188 * the right thing when you cast 64bit value and pass it in function
189 * call. So here it is.
190 */
191 file_pos = (vm_object_offset_t)uap->pos;
192
193
194 /* make sure mapping fits into numeric range etc */
195 if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64)
196 return (EINVAL);
197
198 /*
199 * Align the file position to a page boundary,
200 * and save its page offset component.
201 */
202 pageoff = (file_pos & PAGE_MASK);
203 file_pos -= (vm_object_offset_t)pageoff;
204
205
206 /* Adjust size for rounding (on both ends). */
207 user_size += pageoff; /* low end... */
208 user_size = mach_vm_round_page(user_size); /* hi end */
209
210 if ((flags & MAP_JIT) && ((flags & MAP_FIXED) || (flags & MAP_SHARED) || (flags & MAP_FILE))){
211 return EINVAL;
212 }
213 /*
214 * Check for illegal addresses. Watch out for address wrap... Note
215 * that VM_*_ADDRESS are not constants due to casts (argh).
216 */
217 if (flags & MAP_FIXED) {
218 /*
219 * The specified address must have the same remainder
220 * as the file offset taken modulo PAGE_SIZE, so it
221 * should be aligned after adjustment by pageoff.
222 */
223 user_addr -= pageoff;
224 if (user_addr & PAGE_MASK)
225 return (EINVAL);
226 }
227 #ifdef notyet
228 /* DO not have apis to get this info, need to wait till then*/
229 /*
230 * XXX for non-fixed mappings where no hint is provided or
231 * the hint would fall in the potential heap space,
232 * place it after the end of the largest possible heap.
233 *
234 * There should really be a pmap call to determine a reasonable
235 * location.
236 */
237 else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
238 addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
239
240 #endif
241
242 alloc_flags = 0;
243
244 if (flags & MAP_ANON) {
245
246 maxprot = VM_PROT_ALL;
247 #if CONFIG_MACF
248 /*
249 * Entitlement check.
250 * Re-enable once mac* is implemented.
251 */
252 /*error = mac_proc_check_map_anon(p, user_addr, user_size, prot, flags, &maxprot);
253 if (error) {
254 return EINVAL;
255 }*/
256 #endif /* MAC */
257
258 /*
259 * Mapping blank space is trivial. Use positive fds as the alias
260 * value for memory tracking.
261 */
262 if (fd != -1) {
263 /*
264 * Use "fd" to pass (some) Mach VM allocation flags,
265 * (see the VM_FLAGS_* definitions).
266 */
267 alloc_flags = fd & (VM_FLAGS_ALIAS_MASK | VM_FLAGS_SUPERPAGE_MASK |
268 VM_FLAGS_PURGABLE);
269 if (alloc_flags != fd) {
270 /* reject if there are any extra flags */
271 return EINVAL;
272 }
273 }
274
275 handle = NULL;
276 file_pos = 0;
277 mapanon = 1;
278 } else {
279 struct vnode_attr va;
280 vfs_context_t ctx = vfs_context_current();
281
282 /*
283 * Mapping file, get fp for validation. Obtain vnode and make
284 * sure it is of appropriate type.
285 */
286 err = fp_lookup(p, fd, &fp, 0);
287 if (err)
288 return(err);
289 fpref = 1;
290 if(fp->f_fglob->fg_type == DTYPE_PSXSHM) {
291 uap->addr = (user_addr_t)user_addr;
292 uap->len = (user_size_t)user_size;
293 uap->prot = prot;
294 uap->flags = flags;
295 uap->pos = file_pos;
296 error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
297 goto bad;
298 }
299
300 if (fp->f_fglob->fg_type != DTYPE_VNODE) {
301 error = EINVAL;
302 goto bad;
303 }
304 vp = (struct vnode *)fp->f_fglob->fg_data;
305 error = vnode_getwithref(vp);
306 if(error != 0)
307 goto bad;
308
309 if (vp->v_type != VREG && vp->v_type != VCHR) {
310 (void)vnode_put(vp);
311 error = EINVAL;
312 goto bad;
313 }
314
315 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
316
317 /*
318 * POSIX: mmap needs to update access time for mapped files
319 */
320 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
321 VATTR_INIT(&va);
322 nanotime(&va.va_access_time);
323 VATTR_SET_ACTIVE(&va, va_access_time);
324 vnode_setattr(vp, &va, ctx);
325 }
326
327 /*
328 * XXX hack to handle use of /dev/zero to map anon memory (ala
329 * SunOS).
330 */
331 if (vp->v_type == VCHR || vp->v_type == VSTR) {
332 (void)vnode_put(vp);
333 error = ENODEV;
334 goto bad;
335 } else {
336 /*
337 * Ensure that file and memory protections are
338 * compatible. Note that we only worry about
339 * writability if mapping is shared; in this case,
340 * current and max prot are dictated by the open file.
341 * XXX use the vnode instead? Problem is: what
342 * credentials do we use for determination? What if
343 * proc does a setuid?
344 */
345 maxprot = VM_PROT_EXECUTE; /* ??? */
346 if (fp->f_fglob->fg_flag & FREAD)
347 maxprot |= VM_PROT_READ;
348 else if (prot & PROT_READ) {
349 (void)vnode_put(vp);
350 error = EACCES;
351 goto bad;
352 }
353 /*
354 * If we are sharing potential changes (either via
355 * MAP_SHARED or via the implicit sharing of character
356 * device mappings), and we are trying to get write
357 * permission although we opened it without asking
358 * for it, bail out.
359 */
360
361 if ((flags & MAP_SHARED) != 0) {
362 if ((fp->f_fglob->fg_flag & FWRITE) != 0 &&
363 /*
364 * Do not allow writable mappings of
365 * swap files (see vm_swapfile_pager.c).
366 */
367 !vnode_isswap(vp)) {
368 /*
369 * check for write access
370 *
371 * Note that we already made this check when granting FWRITE
372 * against the file, so it seems redundant here.
373 */
374 error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
375
376 /* if not granted for any reason, but we wanted it, bad */
377 if ((prot & PROT_WRITE) && (error != 0)) {
378 vnode_put(vp);
379 goto bad;
380 }
381
382 /* if writable, remember */
383 if (error == 0)
384 maxprot |= VM_PROT_WRITE;
385
386 } else if ((prot & PROT_WRITE) != 0) {
387 (void)vnode_put(vp);
388 error = EACCES;
389 goto bad;
390 }
391 } else
392 maxprot |= VM_PROT_WRITE;
393
394 handle = (void *)vp;
395 #if CONFIG_MACF
396 error = mac_file_check_mmap(vfs_context_ucred(ctx),
397 fp->f_fglob, prot, flags, &maxprot);
398 if (error) {
399 (void)vnode_put(vp);
400 goto bad;
401 }
402 #endif /* MAC */
403
404 #if CONFIG_PROTECT
405 {
406 void *cnode;
407 if ((cnode = cp_get_protected_cnode(vp)) != NULL) {
408 error = cp_handle_vnop(cnode, CP_READ_ACCESS | CP_WRITE_ACCESS);
409 if (error) {
410 (void) vnode_put(vp);
411 goto bad;
412 }
413 }
414 }
415 #endif /* CONFIG_PROTECT */
416
417
418 }
419 }
420
421 if (user_size == 0) {
422 if (!mapanon)
423 (void)vnode_put(vp);
424 error = 0;
425 goto bad;
426 }
427
428 /*
429 * We bend a little - round the start and end addresses
430 * to the nearest page boundary.
431 */
432 user_size = mach_vm_round_page(user_size);
433
434 if (file_pos & PAGE_MASK_64) {
435 if (!mapanon)
436 (void)vnode_put(vp);
437 error = EINVAL;
438 goto bad;
439 }
440
441 user_map = current_map();
442
443 if ((flags & MAP_FIXED) == 0) {
444 alloc_flags |= VM_FLAGS_ANYWHERE;
445 user_addr = mach_vm_round_page(user_addr);
446 } else {
447 if (user_addr != mach_vm_trunc_page(user_addr)) {
448 if (!mapanon)
449 (void)vnode_put(vp);
450 error = EINVAL;
451 goto bad;
452 }
453 /*
454 * mmap(MAP_FIXED) will replace any existing mappings in the
455 * specified range, if the new mapping is successful.
456 * If we just deallocate the specified address range here,
457 * another thread might jump in and allocate memory in that
458 * range before we get a chance to establish the new mapping,
459 * and we won't have a chance to restore the old mappings.
460 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
461 * has to deallocate the existing mappings and establish the
462 * new ones atomically.
463 */
464 alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
465 }
466
467 if (flags & MAP_NOCACHE)
468 alloc_flags |= VM_FLAGS_NO_CACHE;
469
470 if (flags & MAP_JIT){
471 alloc_flags |= VM_FLAGS_MAP_JIT;
472 }
473 /*
474 * Lookup/allocate object.
475 */
476 if (handle == NULL) {
477 control = NULL;
478 #ifdef notyet
479 /* Hmm .. */
480 #if defined(VM_PROT_READ_IS_EXEC)
481 if (prot & VM_PROT_READ)
482 prot |= VM_PROT_EXECUTE;
483 if (maxprot & VM_PROT_READ)
484 maxprot |= VM_PROT_EXECUTE;
485 #endif
486 #endif
487
488 #if 3777787
489 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
490 prot |= VM_PROT_READ;
491 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
492 maxprot |= VM_PROT_READ;
493 #endif /* radar 3777787 */
494 map_anon_retry:
495 result = vm_map_enter_mem_object(user_map,
496 &user_addr, user_size,
497 0, alloc_flags,
498 IPC_PORT_NULL, 0, FALSE,
499 prot, maxprot,
500 (flags & MAP_SHARED) ?
501 VM_INHERIT_SHARE :
502 VM_INHERIT_DEFAULT);
503
504 /* If a non-binding address was specified for this anonymous
505 * mapping, retry the mapping with a zero base
506 * in the event the mapping operation failed due to
507 * lack of space between the address and the map's maximum.
508 */
509 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
510 user_addr = PAGE_SIZE;
511 goto map_anon_retry;
512 }
513 } else {
514 if (vnode_isswap(vp)) {
515 /*
516 * Map swap files with a special pager
517 * that returns obfuscated contents.
518 */
519 control = NULL;
520 pager = swapfile_pager_setup(vp);
521 if (pager != MEMORY_OBJECT_NULL) {
522 control = swapfile_pager_control(pager);
523 }
524 } else {
525 control = ubc_getobject(vp, UBC_FLAGS_NONE);
526 }
527
528 if (control == NULL) {
529 (void)vnode_put(vp);
530 error = ENOMEM;
531 goto bad;
532 }
533
534 /*
535 * Set credentials:
536 * FIXME: if we're writing the file we need a way to
537 * ensure that someone doesn't replace our R/W creds
538 * with ones that only work for read.
539 */
540
541 ubc_setthreadcred(vp, p, current_thread());
542 docow = FALSE;
543 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
544 docow = TRUE;
545 }
546
547 #ifdef notyet
548 /* Hmm .. */
549 #if defined(VM_PROT_READ_IS_EXEC)
550 if (prot & VM_PROT_READ)
551 prot |= VM_PROT_EXECUTE;
552 if (maxprot & VM_PROT_READ)
553 maxprot |= VM_PROT_EXECUTE;
554 #endif
555 #endif /* notyet */
556
557 #if 3777787
558 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
559 prot |= VM_PROT_READ;
560 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
561 maxprot |= VM_PROT_READ;
562 #endif /* radar 3777787 */
563 map_file_retry:
564 result = vm_map_enter_mem_object_control(user_map,
565 &user_addr, user_size,
566 0, alloc_flags,
567 control, file_pos,
568 docow, prot, maxprot,
569 (flags & MAP_SHARED) ?
570 VM_INHERIT_SHARE :
571 VM_INHERIT_DEFAULT);
572
573 /* If a non-binding address was specified for this file backed
574 * mapping, retry the mapping with a zero base
575 * in the event the mapping operation failed due to
576 * lack of space between the address and the map's maximum.
577 */
578 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
579 user_addr = PAGE_SIZE;
580 goto map_file_retry;
581 }
582 }
583
584 if (!mapanon) {
585 (void)vnode_put(vp);
586 }
587
588 switch (result) {
589 case KERN_SUCCESS:
590 *retval = user_addr + pageoff;
591 error = 0;
592 break;
593 case KERN_INVALID_ADDRESS:
594 case KERN_NO_SPACE:
595 error = ENOMEM;
596 break;
597 case KERN_PROTECTION_FAILURE:
598 error = EACCES;
599 break;
600 default:
601 error = EINVAL;
602 break;
603 }
604 bad:
605 if (pager != MEMORY_OBJECT_NULL) {
606 /*
607 * Release the reference on the pager.
608 * If the mapping was successful, it now holds
609 * an extra reference.
610 */
611 memory_object_deallocate(pager);
612 }
613 if (fpref)
614 fp_drop(p, fd, fp, 0);
615
616 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
617 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
618 (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
619
620 return(error);
621 }
622
623 int
624 msync(__unused proc_t p, struct msync_args *uap, int32_t *retval)
625 {
626 __pthread_testcancel(1);
627 return(msync_nocancel(p, (struct msync_nocancel_args *)uap, retval));
628 }
629
630 int
631 msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval)
632 {
633 mach_vm_offset_t addr;
634 mach_vm_size_t size;
635 int flags;
636 vm_map_t user_map;
637 int rv;
638 vm_sync_t sync_flags=0;
639
640 addr = (mach_vm_offset_t) uap->addr;
641 size = (mach_vm_size_t)uap->len;
642
643 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
644
645 if (addr & PAGE_MASK_64) {
646 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
647 return EINVAL;
648 }
649 if (size == 0) {
650 /*
651 * We cannot support this properly without maintaining
652 * list all mmaps done. Cannot use vm_map_entry as they could be
653 * split or coalesced by indepenedant actions. So instead of
654 * inaccurate results, lets just return error as invalid size
655 * specified
656 */
657 return (EINVAL); /* XXX breaks posix apps */
658 }
659
660 flags = uap->flags;
661 /* disallow contradictory flags */
662 if ((flags & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))
663 return (EINVAL);
664
665 if (flags & MS_KILLPAGES)
666 sync_flags |= VM_SYNC_KILLPAGES;
667 if (flags & MS_DEACTIVATE)
668 sync_flags |= VM_SYNC_DEACTIVATE;
669 if (flags & MS_INVALIDATE)
670 sync_flags |= VM_SYNC_INVALIDATE;
671
672 if ( !(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
673 if (flags & MS_ASYNC)
674 sync_flags |= VM_SYNC_ASYNCHRONOUS;
675 else
676 sync_flags |= VM_SYNC_SYNCHRONOUS;
677 }
678
679 sync_flags |= VM_SYNC_CONTIGUOUS; /* complain if holes */
680
681 user_map = current_map();
682 rv = mach_vm_msync(user_map, addr, size, sync_flags);
683
684 switch (rv) {
685 case KERN_SUCCESS:
686 break;
687 case KERN_INVALID_ADDRESS: /* hole in region being sync'ed */
688 return (ENOMEM);
689 case KERN_FAILURE:
690 return (EIO);
691 default:
692 return (EINVAL);
693 }
694 return (0);
695 }
696
697
698 int
699 munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval)
700 {
701 mach_vm_offset_t user_addr;
702 mach_vm_size_t user_size;
703 kern_return_t result;
704
705 user_addr = (mach_vm_offset_t) uap->addr;
706 user_size = (mach_vm_size_t) uap->len;
707
708 AUDIT_ARG(addr, user_addr);
709 AUDIT_ARG(len, user_size);
710
711 if (user_addr & PAGE_MASK_64) {
712 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
713 return EINVAL;
714 }
715
716 if (user_addr + user_size < user_addr)
717 return(EINVAL);
718
719 if (user_size == 0) {
720 /* UNIX SPEC: size is 0, return EINVAL */
721 return EINVAL;
722 }
723
724 result = mach_vm_deallocate(current_map(), user_addr, user_size);
725 if (result != KERN_SUCCESS) {
726 return(EINVAL);
727 }
728 return(0);
729 }
730
731 int
732 mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
733 {
734 register vm_prot_t prot;
735 mach_vm_offset_t user_addr;
736 mach_vm_size_t user_size;
737 kern_return_t result;
738 vm_map_t user_map;
739 #if CONFIG_MACF
740 int error;
741 #endif
742
743 AUDIT_ARG(addr, uap->addr);
744 AUDIT_ARG(len, uap->len);
745 AUDIT_ARG(value32, uap->prot);
746
747 user_addr = (mach_vm_offset_t) uap->addr;
748 user_size = (mach_vm_size_t) uap->len;
749 prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED));
750
751 if (user_addr & PAGE_MASK_64) {
752 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
753 return EINVAL;
754 }
755
756 #ifdef notyet
757 /* Hmm .. */
758 #if defined(VM_PROT_READ_IS_EXEC)
759 if (prot & VM_PROT_READ)
760 prot |= VM_PROT_EXECUTE;
761 #endif
762 #endif /* notyet */
763
764 #if 3936456
765 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
766 prot |= VM_PROT_READ;
767 #endif /* 3936456 */
768
769 user_map = current_map();
770
771 #if CONFIG_MACF
772 /*
773 * The MAC check for mprotect is of limited use for 2 reasons:
774 * Without mmap revocation, the caller could have asked for the max
775 * protections initially instead of a reduced set, so a mprotect
776 * check would offer no new security.
777 * It is not possible to extract the vnode from the pager object(s)
778 * of the target memory range.
779 * However, the MAC check may be used to prevent a process from,
780 * e.g., making the stack executable.
781 */
782 error = mac_proc_check_mprotect(p, user_addr,
783 user_size, prot);
784 if (error)
785 return (error);
786 #endif
787
788 if(prot & VM_PROT_TRUSTED) {
789 #if CONFIG_DYNAMIC_CODE_SIGNING
790 /* CODE SIGNING ENFORCEMENT - JIT support */
791 /* The special protection value VM_PROT_TRUSTED requests that we treat
792 * this page as if it had a valid code signature.
793 * If this is enabled, there MUST be a MAC policy implementing the
794 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
795 * compromised because the check would always succeed and thusly any
796 * process could sign dynamically. */
797 result = vm_map_sign(user_map,
798 vm_map_trunc_page(user_addr),
799 vm_map_round_page(user_addr+user_size));
800 switch (result) {
801 case KERN_SUCCESS:
802 break;
803 case KERN_INVALID_ADDRESS:
804 /* UNIX SPEC: for an invalid address range, return ENOMEM */
805 return ENOMEM;
806 default:
807 return EINVAL;
808 }
809 #else
810 return ENOTSUP;
811 #endif
812 }
813 prot &= ~VM_PROT_TRUSTED;
814
815 result = mach_vm_protect(user_map, user_addr, user_size,
816 FALSE, prot);
817 switch (result) {
818 case KERN_SUCCESS:
819 return (0);
820 case KERN_PROTECTION_FAILURE:
821 return (EACCES);
822 case KERN_INVALID_ADDRESS:
823 /* UNIX SPEC: for an invalid address range, return ENOMEM */
824 return ENOMEM;
825 }
826 return (EINVAL);
827 }
828
829
830 int
831 minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval)
832 {
833 mach_vm_offset_t addr;
834 mach_vm_size_t size;
835 register vm_inherit_t inherit;
836 vm_map_t user_map;
837 kern_return_t result;
838
839 AUDIT_ARG(addr, uap->addr);
840 AUDIT_ARG(len, uap->len);
841 AUDIT_ARG(value32, uap->inherit);
842
843 addr = (mach_vm_offset_t)uap->addr;
844 size = (mach_vm_size_t)uap->len;
845 inherit = uap->inherit;
846
847 user_map = current_map();
848 result = mach_vm_inherit(user_map, addr, size,
849 inherit);
850 switch (result) {
851 case KERN_SUCCESS:
852 return (0);
853 case KERN_PROTECTION_FAILURE:
854 return (EACCES);
855 }
856 return (EINVAL);
857 }
858
859 int
860 madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
861 {
862 vm_map_t user_map;
863 mach_vm_offset_t start;
864 mach_vm_size_t size;
865 vm_behavior_t new_behavior;
866 kern_return_t result;
867
868 /*
869 * Since this routine is only advisory, we default to conservative
870 * behavior.
871 */
872 switch (uap->behav) {
873 case MADV_RANDOM:
874 new_behavior = VM_BEHAVIOR_RANDOM;
875 break;
876 case MADV_SEQUENTIAL:
877 new_behavior = VM_BEHAVIOR_SEQUENTIAL;
878 break;
879 case MADV_NORMAL:
880 new_behavior = VM_BEHAVIOR_DEFAULT;
881 break;
882 case MADV_WILLNEED:
883 new_behavior = VM_BEHAVIOR_WILLNEED;
884 break;
885 case MADV_DONTNEED:
886 new_behavior = VM_BEHAVIOR_DONTNEED;
887 break;
888 case MADV_FREE:
889 new_behavior = VM_BEHAVIOR_FREE;
890 break;
891 case MADV_ZERO_WIRED_PAGES:
892 new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES;
893 break;
894 case MADV_FREE_REUSABLE:
895 new_behavior = VM_BEHAVIOR_REUSABLE;
896 break;
897 case MADV_FREE_REUSE:
898 new_behavior = VM_BEHAVIOR_REUSE;
899 break;
900 case MADV_CAN_REUSE:
901 new_behavior = VM_BEHAVIOR_CAN_REUSE;
902 break;
903 default:
904 return(EINVAL);
905 }
906
907 start = (mach_vm_offset_t) uap->addr;
908 size = (mach_vm_size_t) uap->len;
909
910 user_map = current_map();
911
912 result = mach_vm_behavior_set(user_map, start, size, new_behavior);
913 switch (result) {
914 case KERN_SUCCESS:
915 return 0;
916 case KERN_INVALID_ADDRESS:
917 return EINVAL;
918 case KERN_NO_SPACE:
919 return ENOMEM;
920 }
921
922 return EINVAL;
923 }
924
925 int
926 mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval)
927 {
928 mach_vm_offset_t addr, first_addr, end;
929 vm_map_t map;
930 user_addr_t vec;
931 int error;
932 int vecindex, lastvecindex;
933 int mincoreinfo=0;
934 int pqueryinfo;
935 kern_return_t ret;
936 int numref;
937
938 char c;
939
940 map = current_map();
941
942 /*
943 * Make sure that the addresses presented are valid for user
944 * mode.
945 */
946 first_addr = addr = mach_vm_trunc_page(uap->addr);
947 end = addr + mach_vm_round_page(uap->len);
948
949 if (end < addr)
950 return (EINVAL);
951
952 /*
953 * Address of byte vector
954 */
955 vec = uap->vec;
956
957 map = current_map();
958
959 /*
960 * Do this on a map entry basis so that if the pages are not
961 * in the current processes address space, we can easily look
962 * up the pages elsewhere.
963 */
964 lastvecindex = -1;
965 for( ; addr < end; addr += PAGE_SIZE ) {
966 pqueryinfo = 0;
967 ret = mach_vm_page_query(map, addr, &pqueryinfo, &numref);
968 if (ret != KERN_SUCCESS)
969 pqueryinfo = 0;
970 mincoreinfo = 0;
971 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT)
972 mincoreinfo |= MINCORE_INCORE;
973 if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF)
974 mincoreinfo |= MINCORE_REFERENCED;
975 if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY)
976 mincoreinfo |= MINCORE_MODIFIED;
977
978
979 /*
980 * calculate index into user supplied byte vector
981 */
982 vecindex = (addr - first_addr)>> PAGE_SHIFT;
983
984 /*
985 * If we have skipped map entries, we need to make sure that
986 * the byte vector is zeroed for those skipped entries.
987 */
988 while((lastvecindex + 1) < vecindex) {
989 c = 0;
990 error = copyout(&c, vec + lastvecindex, 1);
991 if (error) {
992 return (EFAULT);
993 }
994 ++lastvecindex;
995 }
996
997 /*
998 * Pass the page information to the user
999 */
1000 c = (char)mincoreinfo;
1001 error = copyout(&c, vec + vecindex, 1);
1002 if (error) {
1003 return (EFAULT);
1004 }
1005 lastvecindex = vecindex;
1006 }
1007
1008
1009 /*
1010 * Zero the last entries in the byte vector.
1011 */
1012 vecindex = (end - first_addr) >> PAGE_SHIFT;
1013 while((lastvecindex + 1) < vecindex) {
1014 c = 0;
1015 error = copyout(&c, vec + lastvecindex, 1);
1016 if (error) {
1017 return (EFAULT);
1018 }
1019 ++lastvecindex;
1020 }
1021
1022 return (0);
1023 }
1024
1025 int
1026 mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval)
1027 {
1028 vm_map_t user_map;
1029 vm_map_offset_t addr;
1030 vm_map_size_t size, pageoff;
1031 kern_return_t result;
1032
1033 AUDIT_ARG(addr, uap->addr);
1034 AUDIT_ARG(len, uap->len);
1035
1036 addr = (vm_map_offset_t) uap->addr;
1037 size = (vm_map_size_t)uap->len;
1038
1039 /* disable wrap around */
1040 if (addr + size < addr)
1041 return (EINVAL);
1042
1043 if (size == 0)
1044 return (0);
1045
1046 pageoff = (addr & PAGE_MASK);
1047 addr -= pageoff;
1048 size = vm_map_round_page(size+pageoff);
1049 user_map = current_map();
1050
1051 /* have to call vm_map_wire directly to pass "I don't know" protections */
1052 result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE, TRUE);
1053
1054 if (result == KERN_RESOURCE_SHORTAGE)
1055 return EAGAIN;
1056 else if (result != KERN_SUCCESS)
1057 return ENOMEM;
1058
1059 return 0; /* KERN_SUCCESS */
1060 }
1061
1062 int
1063 munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval)
1064 {
1065 mach_vm_offset_t addr;
1066 mach_vm_size_t size;
1067 vm_map_t user_map;
1068 kern_return_t result;
1069
1070 AUDIT_ARG(addr, uap->addr);
1071 AUDIT_ARG(addr, uap->len);
1072
1073 addr = (mach_vm_offset_t) uap->addr;
1074 size = (mach_vm_size_t)uap->len;
1075 user_map = current_map();
1076
1077 /* JMM - need to remove all wirings by spec - this just removes one */
1078 result = mach_vm_wire(host_priv_self(), user_map, addr, size, VM_PROT_NONE);
1079 return (result == KERN_SUCCESS ? 0 : ENOMEM);
1080 }
1081
1082
1083 int
1084 mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval)
1085 {
1086 return (ENOSYS);
1087 }
1088
1089 int
1090 munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval)
1091 {
1092 return(ENOSYS);
1093 }
1094
1095 #if !defined(CONFIG_EMBEDDED)
1096 /* USV: No! need to obsolete map_fd()! mmap() already supports 64 bits */
1097 kern_return_t
1098 map_fd(struct map_fd_args *args)
1099 {
1100 int fd = args->fd;
1101 vm_offset_t offset = args->offset;
1102 vm_offset_t *va = args->va;
1103 boolean_t findspace = args->findspace;
1104 vm_size_t size = args->size;
1105 kern_return_t ret;
1106
1107 AUDIT_MACH_SYSCALL_ENTER(AUE_MAPFD);
1108 AUDIT_ARG(addr, CAST_DOWN(user_addr_t, args->va));
1109 AUDIT_ARG(fd, fd);
1110
1111 ret = map_fd_funneled( fd, (vm_object_offset_t)offset, va, findspace, size);
1112
1113 AUDIT_MACH_SYSCALL_EXIT(ret);
1114 return ret;
1115 }
1116
1117 kern_return_t
1118 map_fd_funneled(
1119 int fd,
1120 vm_object_offset_t offset,
1121 vm_offset_t *va,
1122 boolean_t findspace,
1123 vm_size_t size)
1124 {
1125 kern_return_t result;
1126 struct fileproc *fp;
1127 struct vnode *vp;
1128 void * pager;
1129 vm_offset_t map_addr=0;
1130 vm_size_t map_size;
1131 int err=0;
1132 vm_prot_t maxprot = VM_PROT_ALL;
1133 vm_map_t my_map;
1134 proc_t p = current_proc();
1135 struct vnode_attr vattr;
1136
1137 /*
1138 * Find the inode; verify that it's a regular file.
1139 */
1140
1141 err = fp_lookup(p, fd, &fp, 0);
1142 if (err)
1143 return(err);
1144
1145 if (fp->f_fglob->fg_type != DTYPE_VNODE){
1146 err = KERN_INVALID_ARGUMENT;
1147 goto bad;
1148 }
1149
1150 if (!(fp->f_fglob->fg_flag & FREAD)) {
1151 err = KERN_PROTECTION_FAILURE;
1152 goto bad;
1153 }
1154
1155 vp = (struct vnode *)fp->f_fglob->fg_data;
1156 err = vnode_getwithref(vp);
1157 if(err != 0)
1158 goto bad;
1159
1160 if (vp->v_type != VREG) {
1161 (void)vnode_put(vp);
1162 err = KERN_INVALID_ARGUMENT;
1163 goto bad;
1164 }
1165
1166 #if CONFIG_MACF
1167 err = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1168 fp->f_fglob, VM_PROT_DEFAULT, MAP_FILE, &maxprot);
1169 if (err) {
1170 (void)vnode_put(vp);
1171 goto bad;
1172 }
1173 #endif /* MAC */
1174
1175 #if CONFIG_PROTECT
1176 /* check for content protection access */
1177 {
1178 void *cnode;
1179 if ((cnode = cp_get_protected_cnode(vp)) != NULL) {
1180 err = cp_handle_vnop(cnode, CP_READ_ACCESS | CP_WRITE_ACCESS);
1181 if (err != 0) {
1182 (void)vnode_put(vp);
1183 goto bad;
1184 }
1185 }
1186 }
1187 #endif /* CONFIG_PROTECT */
1188
1189 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1190
1191 /*
1192 * POSIX: mmap needs to update access time for mapped files
1193 */
1194 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
1195 VATTR_INIT(&vattr);
1196 nanotime(&vattr.va_access_time);
1197 VATTR_SET_ACTIVE(&vattr, va_access_time);
1198 vnode_setattr(vp, &vattr, vfs_context_current());
1199 }
1200
1201 if (offset & PAGE_MASK_64) {
1202 printf("map_fd: file offset not page aligned(%d : %s)\n",p->p_pid, p->p_comm);
1203 (void)vnode_put(vp);
1204 err = KERN_INVALID_ARGUMENT;
1205 goto bad;
1206 }
1207 map_size = round_page(size);
1208
1209 /*
1210 * Allow user to map in a zero length file.
1211 */
1212 if (size == 0) {
1213 (void)vnode_put(vp);
1214 err = KERN_SUCCESS;
1215 goto bad;
1216 }
1217 /*
1218 * Map in the file.
1219 */
1220 pager = (void *)ubc_getpager(vp);
1221 if (pager == NULL) {
1222 (void)vnode_put(vp);
1223 err = KERN_FAILURE;
1224 goto bad;
1225 }
1226
1227
1228 my_map = current_map();
1229
1230 result = vm_map_64(
1231 my_map,
1232 &map_addr, map_size, (vm_offset_t)0,
1233 VM_FLAGS_ANYWHERE, pager, offset, TRUE,
1234 VM_PROT_DEFAULT, maxprot,
1235 VM_INHERIT_DEFAULT);
1236 if (result != KERN_SUCCESS) {
1237 (void)vnode_put(vp);
1238 err = result;
1239 goto bad;
1240 }
1241
1242
1243 if (!findspace) {
1244 //K64todo fix for 64bit user?
1245 uint32_t dst_addr;
1246 vm_map_copy_t tmp;
1247
1248 if (copyin(CAST_USER_ADDR_T(va), &dst_addr, sizeof (dst_addr)) ||
1249 trunc_page(dst_addr) != dst_addr) {
1250 (void) vm_map_remove(
1251 my_map,
1252 map_addr, map_addr + map_size,
1253 VM_MAP_NO_FLAGS);
1254 (void)vnode_put(vp);
1255 err = KERN_INVALID_ADDRESS;
1256 goto bad;
1257 }
1258
1259 result = vm_map_copyin(my_map, (vm_map_address_t)map_addr,
1260 (vm_map_size_t)map_size, TRUE, &tmp);
1261 if (result != KERN_SUCCESS) {
1262
1263 (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr),
1264 vm_map_round_page(map_addr + map_size),
1265 VM_MAP_NO_FLAGS);
1266 (void)vnode_put(vp);
1267 err = result;
1268 goto bad;
1269 }
1270
1271 result = vm_map_copy_overwrite(my_map,
1272 (vm_map_address_t)dst_addr, tmp, FALSE);
1273 if (result != KERN_SUCCESS) {
1274 vm_map_copy_discard(tmp);
1275 (void)vnode_put(vp);
1276 err = result;
1277 goto bad;
1278 }
1279 } else {
1280 // K64todo bug compatible now, should fix for 64bit user
1281 uint32_t user_map_addr = CAST_DOWN_EXPLICIT(uint32_t, map_addr);
1282 if (copyout(&user_map_addr, CAST_USER_ADDR_T(va), sizeof (user_map_addr))) {
1283 (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr),
1284 vm_map_round_page(map_addr + map_size),
1285 VM_MAP_NO_FLAGS);
1286 (void)vnode_put(vp);
1287 err = KERN_INVALID_ADDRESS;
1288 goto bad;
1289 }
1290 }
1291
1292 ubc_setthreadcred(vp, current_proc(), current_thread());
1293 (void)vnode_put(vp);
1294 err = 0;
1295 bad:
1296 fp_drop(p, fd, fp, 0);
1297 return (err);
1298 }
1299 #endif /* !defined(CONFIG_EMBEDDED) */
1300