]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_mman.c
xnu-1228.15.4.tar.gz
[apple/xnu.git] / bsd / kern / kern_mman.c
1 /*
2 * Copyright (c) 2007 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988 University of Utah.
30 * Copyright (c) 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * the Systems Programming Group of the University of Utah Computer
35 * Science Department.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66 *
67 * @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 /*
77 * Mapped file (mmap) interface to VM
78 */
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/filedesc.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/resourcevar.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/acct.h>
88 #include <sys/wait.h>
89 #include <sys/file_internal.h>
90 #include <sys/vadvise.h>
91 #include <sys/trace.h>
92 #include <sys/mman.h>
93 #include <sys/conf.h>
94 #include <sys/stat.h>
95 #include <sys/ubc.h>
96 #include <sys/ubc_internal.h>
97 #include <sys/sysproto.h>
98
99 #include <sys/syscall.h>
100 #include <sys/kdebug.h>
101
102 #include <bsm/audit_kernel.h>
103 #include <bsm/audit_kevents.h>
104
105 #include <mach/mach_types.h>
106 #include <mach/mach_traps.h>
107 #include <mach/vm_sync.h>
108 #include <mach/vm_behavior.h>
109 #include <mach/vm_inherit.h>
110 #include <mach/vm_statistics.h>
111 #include <mach/mach_vm.h>
112 #include <mach/vm_map.h>
113 #include <mach/host_priv.h>
114
115 #include <kern/cpu_number.h>
116 #include <kern/host.h>
117
118 #include <vm/vm_map.h>
119 #include <vm/vm_kern.h>
120 #include <vm/vm_pager.h>
121
122 struct osmmap_args {
123 caddr_t addr;
124 int len;
125 int prot;
126 int share;
127 int fd;
128 long pos;
129 };
130
131 /* XXX the following function should probably be static */
132 kern_return_t map_fd_funneled(int, vm_object_offset_t, vm_offset_t *,
133 boolean_t, vm_size_t);
134
135 /* XXX the following two functions aren't used anywhere */
136 int osmmap(proc_t , struct osmmap_args *, register_t *);
137 int mremap(void);
138
139 int
140 sbrk(__unused proc_t p, __unused struct sbrk_args *uap, __unused register_t *retval)
141 {
142 /* Not yet implemented */
143 return (ENOTSUP);
144 }
145
146 int
147 sstk(__unused proc_t p, __unused struct sstk_args *uap, __unused register_t *retval)
148 {
149 /* Not yet implemented */
150 return (ENOTSUP);
151 }
152
153
154 int
155 osmmap(
156 proc_t curp,
157 struct osmmap_args *uap,
158 register_t *retval)
159 {
160 struct mmap_args newargs;
161 user_addr_t addr;
162 int ret;
163
164 if ((uap->share == MAP_SHARED )|| (uap->share == MAP_PRIVATE )) {
165 newargs.addr = CAST_USER_ADDR_T(uap->addr);
166 newargs.len = CAST_USER_ADDR_T(uap->len);
167 newargs.prot = uap->prot;
168 newargs.flags = uap->share;
169 newargs.fd = uap->fd;
170 newargs.pos = (off_t)uap->pos;
171 ret = mmap(curp, &newargs, &addr);
172 if (ret == 0)
173 *retval = CAST_DOWN(register_t, addr);
174 } else
175 ret = EINVAL;
176 return ret;
177 }
178
179
180 /*
181 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
182 * XXX usage is PROT_* from an interface perspective. Thus the values of
183 * XXX VM_PROT_* and PROT_* need to correspond.
184 */
185 int
186 mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
187 {
188 /*
189 * Map in special device (must be SHARED) or file
190 */
191 struct fileproc *fp;
192 register struct vnode *vp;
193 int flags;
194 int prot;
195 int err=0;
196 vm_map_t user_map;
197 kern_return_t result;
198 mach_vm_offset_t user_addr;
199 mach_vm_size_t user_size;
200 vm_object_offset_t pageoff;
201 vm_object_offset_t file_pos;
202 int alloc_flags=0;
203 boolean_t docow;
204 vm_prot_t maxprot;
205 void *handle;
206 vm_pager_t pager;
207 int mapanon=0;
208 int fpref=0;
209 int error =0;
210 int fd = uap->fd;
211
212 user_addr = (mach_vm_offset_t)uap->addr;
213 user_size = (mach_vm_size_t) uap->len;
214
215 AUDIT_ARG(addr, user_addr);
216 AUDIT_ARG(len, user_size);
217 AUDIT_ARG(fd, uap->fd);
218
219 prot = (uap->prot & VM_PROT_ALL);
220 #if 3777787
221 /*
222 * Since the hardware currently does not support writing without
223 * read-before-write, or execution-without-read, if the request is
224 * for write or execute access, we must imply read access as well;
225 * otherwise programs expecting this to work will fail to operate.
226 */
227 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
228 prot |= VM_PROT_READ;
229 #endif /* radar 3777787 */
230
231 flags = uap->flags;
232 vp = NULLVP;
233
234 /*
235 * The vm code does not have prototypes & compiler doesn't do the'
236 * the right thing when you cast 64bit value and pass it in function
237 * call. So here it is.
238 */
239 file_pos = (vm_object_offset_t)uap->pos;
240
241
242 /* make sure mapping fits into numeric range etc */
243 if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64)
244 return (EINVAL);
245
246 /*
247 * Align the file position to a page boundary,
248 * and save its page offset component.
249 */
250 pageoff = (file_pos & PAGE_MASK);
251 file_pos -= (vm_object_offset_t)pageoff;
252
253
254 /* Adjust size for rounding (on both ends). */
255 user_size += pageoff; /* low end... */
256 user_size = mach_vm_round_page(user_size); /* hi end */
257
258
259 /*
260 * Check for illegal addresses. Watch out for address wrap... Note
261 * that VM_*_ADDRESS are not constants due to casts (argh).
262 */
263 if (flags & MAP_FIXED) {
264 /*
265 * The specified address must have the same remainder
266 * as the file offset taken modulo PAGE_SIZE, so it
267 * should be aligned after adjustment by pageoff.
268 */
269 user_addr -= pageoff;
270 if (user_addr & PAGE_MASK)
271 return (EINVAL);
272 }
273 #ifdef notyet
274 /* DO not have apis to get this info, need to wait till then*/
275 /*
276 * XXX for non-fixed mappings where no hint is provided or
277 * the hint would fall in the potential heap space,
278 * place it after the end of the largest possible heap.
279 *
280 * There should really be a pmap call to determine a reasonable
281 * location.
282 */
283 else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
284 addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
285
286 #endif
287
288 alloc_flags = 0;
289
290 if (flags & MAP_ANON) {
291 /*
292 * Mapping blank space is trivial. Use positive fds as the alias
293 * value for memory tracking.
294 */
295 if (fd != -1) {
296 /*
297 * Use "fd" to pass (some) Mach VM allocation flags,
298 * (see the VM_FLAGS_* definitions).
299 */
300 alloc_flags = fd & (VM_FLAGS_ALIAS_MASK |
301 VM_FLAGS_PURGABLE);
302 if (alloc_flags != fd) {
303 /* reject if there are any extra flags */
304 return EINVAL;
305 }
306 }
307
308 handle = NULL;
309 maxprot = VM_PROT_ALL;
310 file_pos = 0;
311 mapanon = 1;
312 } else {
313 struct vnode_attr va;
314 vfs_context_t ctx = vfs_context_current();
315
316 /*
317 * Mapping file, get fp for validation. Obtain vnode and make
318 * sure it is of appropriate type.
319 */
320 err = fp_lookup(p, fd, &fp, 0);
321 if (err)
322 return(err);
323 fpref = 1;
324 if(fp->f_fglob->fg_type == DTYPE_PSXSHM) {
325 uap->addr = (user_addr_t)user_addr;
326 uap->len = (user_size_t)user_size;
327 uap->prot = prot;
328 uap->flags = flags;
329 uap->pos = file_pos;
330 error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
331 goto bad;
332 }
333
334 if (fp->f_fglob->fg_type != DTYPE_VNODE) {
335 error = EINVAL;
336 goto bad;
337 }
338 vp = (struct vnode *)fp->f_fglob->fg_data;
339 error = vnode_getwithref(vp);
340 if(error != 0)
341 goto bad;
342
343 if (vp->v_type != VREG && vp->v_type != VCHR) {
344 (void)vnode_put(vp);
345 error = EINVAL;
346 goto bad;
347 }
348
349 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
350
351 /*
352 * POSIX: mmap needs to update access time for mapped files
353 */
354 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
355 VATTR_INIT(&va);
356 nanotime(&va.va_access_time);
357 VATTR_SET_ACTIVE(&va, va_access_time);
358 vnode_setattr(vp, &va, ctx);
359 }
360
361 /*
362 * XXX hack to handle use of /dev/zero to map anon memory (ala
363 * SunOS).
364 */
365 if (vp->v_type == VCHR || vp->v_type == VSTR) {
366 (void)vnode_put(vp);
367 error = ENODEV;
368 goto bad;
369 } else {
370 /*
371 * Ensure that file and memory protections are
372 * compatible. Note that we only worry about
373 * writability if mapping is shared; in this case,
374 * current and max prot are dictated by the open file.
375 * XXX use the vnode instead? Problem is: what
376 * credentials do we use for determination? What if
377 * proc does a setuid?
378 */
379 maxprot = VM_PROT_EXECUTE; /* ??? */
380 if (fp->f_fglob->fg_flag & FREAD)
381 maxprot |= VM_PROT_READ;
382 else if (prot & PROT_READ) {
383 (void)vnode_put(vp);
384 error = EACCES;
385 goto bad;
386 }
387 /*
388 * If we are sharing potential changes (either via
389 * MAP_SHARED or via the implicit sharing of character
390 * device mappings), and we are trying to get write
391 * permission although we opened it without asking
392 * for it, bail out.
393 */
394
395 if ((flags & MAP_SHARED) != 0) {
396 if ((fp->f_fglob->fg_flag & FWRITE) != 0) {
397 /*
398 * check for write access
399 *
400 * Note that we already made this check when granting FWRITE
401 * against the file, so it seems redundant here.
402 */
403 error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
404
405 /* if not granted for any reason, but we wanted it, bad */
406 if ((prot & PROT_WRITE) && (error != 0)) {
407 vnode_put(vp);
408 goto bad;
409 }
410
411 /* if writable, remember */
412 if (error == 0)
413 maxprot |= VM_PROT_WRITE;
414
415 } else if ((prot & PROT_WRITE) != 0) {
416 (void)vnode_put(vp);
417 error = EACCES;
418 goto bad;
419 }
420 } else
421 maxprot |= VM_PROT_WRITE;
422
423 handle = (void *)vp;
424 #if CONFIG_MACF
425 error = mac_file_check_mmap(vfs_context_ucred(ctx),
426 fp->f_fglob, prot, flags, &maxprot);
427 if (error) {
428 (void)vnode_put(vp);
429 goto bad;
430 }
431 #endif /* MAC */
432 }
433 }
434
435 if (user_size == 0) {
436 if (!mapanon)
437 (void)vnode_put(vp);
438 error = 0;
439 goto bad;
440 }
441
442 /*
443 * We bend a little - round the start and end addresses
444 * to the nearest page boundary.
445 */
446 user_size = mach_vm_round_page(user_size);
447
448 if (file_pos & PAGE_MASK_64) {
449 if (!mapanon)
450 (void)vnode_put(vp);
451 error = EINVAL;
452 goto bad;
453 }
454
455 user_map = current_map();
456
457 if ((flags & MAP_FIXED) == 0) {
458 alloc_flags |= VM_FLAGS_ANYWHERE;
459 user_addr = mach_vm_round_page(user_addr);
460 } else {
461 if (user_addr != mach_vm_trunc_page(user_addr)) {
462 if (!mapanon)
463 (void)vnode_put(vp);
464 error = EINVAL;
465 goto bad;
466 }
467 /*
468 * mmap(MAP_FIXED) will replace any existing mappings in the
469 * specified range, if the new mapping is successful.
470 * If we just deallocate the specified address range here,
471 * another thread might jump in and allocate memory in that
472 * range before we get a chance to establish the new mapping,
473 * and we won't have a chance to restore the old mappings.
474 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
475 * has to deallocate the existing mappings and establish the
476 * new ones atomically.
477 */
478 alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
479 }
480
481 if (flags & MAP_NOCACHE)
482 alloc_flags |= VM_FLAGS_NO_CACHE;
483
484 /*
485 * Lookup/allocate object.
486 */
487 if (handle == NULL) {
488 pager = NULL;
489 #ifdef notyet
490 /* Hmm .. */
491 #if defined(VM_PROT_READ_IS_EXEC)
492 if (prot & VM_PROT_READ)
493 prot |= VM_PROT_EXECUTE;
494 if (maxprot & VM_PROT_READ)
495 maxprot |= VM_PROT_EXECUTE;
496 #endif
497 #endif
498
499 #if 3777787
500 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
501 prot |= VM_PROT_READ;
502 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
503 maxprot |= VM_PROT_READ;
504 #endif /* radar 3777787 */
505
506 result = vm_map_enter_mem_object(user_map,
507 &user_addr, user_size,
508 0, alloc_flags,
509 IPC_PORT_NULL, 0, FALSE,
510 prot, maxprot,
511 (flags & MAP_SHARED) ?
512 VM_INHERIT_SHARE :
513 VM_INHERIT_DEFAULT);
514 if (result != KERN_SUCCESS)
515 goto out;
516 } else {
517 pager = (vm_pager_t)ubc_getpager(vp);
518
519 if (pager == NULL) {
520 (void)vnode_put(vp);
521 error = ENOMEM;
522 goto bad;
523 }
524
525 /*
526 * Set credentials:
527 * FIXME: if we're writing the file we need a way to
528 * ensure that someone doesn't replace our R/W creds
529 * with ones that only work for read.
530 */
531
532 ubc_setthreadcred(vp, p, current_thread());
533 docow = FALSE;
534 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
535 docow = TRUE;
536 }
537
538 #ifdef notyet
539 /* Hmm .. */
540 #if defined(VM_PROT_READ_IS_EXEC)
541 if (prot & VM_PROT_READ)
542 prot |= VM_PROT_EXECUTE;
543 if (maxprot & VM_PROT_READ)
544 maxprot |= VM_PROT_EXECUTE;
545 #endif
546 #endif /* notyet */
547
548 #if 3777787
549 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
550 prot |= VM_PROT_READ;
551 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
552 maxprot |= VM_PROT_READ;
553 #endif /* radar 3777787 */
554
555 result = vm_map_enter_mem_object(user_map,
556 &user_addr, user_size,
557 0, alloc_flags,
558 (ipc_port_t)pager, file_pos,
559 docow, prot, maxprot,
560 (flags & MAP_SHARED) ?
561 VM_INHERIT_SHARE :
562 VM_INHERIT_DEFAULT);
563
564 if (result != KERN_SUCCESS) {
565 (void)vnode_put(vp);
566 goto out;
567 }
568 }
569
570 if (!mapanon)
571 (void)vnode_put(vp);
572
573 out:
574 switch (result) {
575 case KERN_SUCCESS:
576 *retval = user_addr + pageoff;
577 error = 0;
578 break;
579 case KERN_INVALID_ADDRESS:
580 case KERN_NO_SPACE:
581 error = ENOMEM;
582 break;
583 case KERN_PROTECTION_FAILURE:
584 error = EACCES;
585 break;
586 default:
587 error = EINVAL;
588 break;
589 }
590 bad:
591 if (fpref)
592 fp_drop(p, fd, fp, 0);
593
594 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
595 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
596 (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
597
598 return(error);
599 }
600
601 int
602 msync(__unused proc_t p, struct msync_args *uap, register_t *retval)
603 {
604 __pthread_testcancel(1);
605 return(msync_nocancel(p, (struct msync_nocancel_args *)uap, retval));
606 }
607
608 int
609 msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused register_t *retval)
610 {
611 mach_vm_offset_t addr;
612 mach_vm_size_t size;
613 int flags;
614 vm_map_t user_map;
615 int rv;
616 vm_sync_t sync_flags=0;
617
618 addr = (mach_vm_offset_t) uap->addr;
619 size = (mach_vm_size_t)uap->len;
620
621 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
622
623 if (addr & PAGE_MASK_64) {
624 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
625 return EINVAL;
626 }
627 if (size == 0) {
628 /*
629 * We cannot support this properly without maintaining
630 * list all mmaps done. Cannot use vm_map_entry as they could be
631 * split or coalesced by indepenedant actions. So instead of
632 * inaccurate results, lets just return error as invalid size
633 * specified
634 */
635 return (EINVAL); /* XXX breaks posix apps */
636 }
637
638 flags = uap->flags;
639 /* disallow contradictory flags */
640 if ((flags & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))
641 return (EINVAL);
642
643 if (flags & MS_KILLPAGES)
644 sync_flags |= VM_SYNC_KILLPAGES;
645 if (flags & MS_DEACTIVATE)
646 sync_flags |= VM_SYNC_DEACTIVATE;
647 if (flags & MS_INVALIDATE)
648 sync_flags |= VM_SYNC_INVALIDATE;
649
650 if ( !(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
651 if (flags & MS_ASYNC)
652 sync_flags |= VM_SYNC_ASYNCHRONOUS;
653 else
654 sync_flags |= VM_SYNC_SYNCHRONOUS;
655 }
656
657 sync_flags |= VM_SYNC_CONTIGUOUS; /* complain if holes */
658
659 user_map = current_map();
660 rv = mach_vm_msync(user_map, addr, size, sync_flags);
661
662 switch (rv) {
663 case KERN_SUCCESS:
664 break;
665 case KERN_INVALID_ADDRESS: /* hole in region being sync'ed */
666 return (ENOMEM);
667 case KERN_FAILURE:
668 return (EIO);
669 default:
670 return (EINVAL);
671 }
672 return (0);
673 }
674
675
676 int
677 mremap(void)
678 {
679 /* Not yet implemented */
680 return (ENOTSUP);
681 }
682
683 int
684 munmap(__unused proc_t p, struct munmap_args *uap, __unused register_t *retval)
685 {
686 mach_vm_offset_t user_addr;
687 mach_vm_size_t user_size;
688 kern_return_t result;
689
690 user_addr = (mach_vm_offset_t) uap->addr;
691 user_size = (mach_vm_size_t) uap->len;
692
693 AUDIT_ARG(addr, user_addr);
694 AUDIT_ARG(len, user_size);
695
696 if (user_addr & PAGE_MASK_64) {
697 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
698 return EINVAL;
699 }
700
701 if (user_addr + user_size < user_addr)
702 return(EINVAL);
703
704 if (user_size == 0) {
705 /* UNIX SPEC: size is 0, return EINVAL */
706 return EINVAL;
707 }
708
709 result = mach_vm_deallocate(current_map(), user_addr, user_size);
710 if (result != KERN_SUCCESS) {
711 return(EINVAL);
712 }
713 return(0);
714 }
715
716 int
717 mprotect(__unused proc_t p, struct mprotect_args *uap, __unused register_t *retval)
718 {
719 register vm_prot_t prot;
720 mach_vm_offset_t user_addr;
721 mach_vm_size_t user_size;
722 kern_return_t result;
723 vm_map_t user_map;
724 #if CONFIG_MACF
725 int error;
726 #endif
727
728 AUDIT_ARG(addr, uap->addr);
729 AUDIT_ARG(len, uap->len);
730 AUDIT_ARG(value, uap->prot);
731
732 user_addr = (mach_vm_offset_t) uap->addr;
733 user_size = (mach_vm_size_t) uap->len;
734 prot = (vm_prot_t)(uap->prot & VM_PROT_ALL);
735
736 if (user_addr & PAGE_MASK_64) {
737 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
738 return EINVAL;
739 }
740
741 #ifdef notyet
742 /* Hmm .. */
743 #if defined(VM_PROT_READ_IS_EXEC)
744 if (prot & VM_PROT_READ)
745 prot |= VM_PROT_EXECUTE;
746 #endif
747 #endif /* notyet */
748
749 #if 3936456
750 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
751 prot |= VM_PROT_READ;
752 #endif /* 3936456 */
753
754 user_map = current_map();
755
756 #if CONFIG_MACF
757 /*
758 * The MAC check for mprotect is of limited use for 2 reasons:
759 * Without mmap revocation, the caller could have asked for the max
760 * protections initially instead of a reduced set, so a mprotect
761 * check would offer no new security.
762 * It is not possible to extract the vnode from the pager object(s)
763 * of the target memory range.
764 * However, the MAC check may be used to prevent a process from,
765 * e.g., making the stack executable.
766 */
767 error = mac_proc_check_mprotect(p, user_addr,
768 user_size, prot);
769 if (error)
770 return (error);
771 #endif
772 result = mach_vm_protect(user_map, user_addr, user_size,
773 FALSE, prot);
774 switch (result) {
775 case KERN_SUCCESS:
776 return (0);
777 case KERN_PROTECTION_FAILURE:
778 return (EACCES);
779 case KERN_INVALID_ADDRESS:
780 /* UNIX SPEC: for an invalid address range, return ENOMEM */
781 return ENOMEM;
782 }
783 return (EINVAL);
784 }
785
786
787 int
788 minherit(__unused proc_t p, struct minherit_args *uap, __unused register_t *retval)
789 {
790 mach_vm_offset_t addr;
791 mach_vm_size_t size;
792 register vm_inherit_t inherit;
793 vm_map_t user_map;
794 kern_return_t result;
795
796 AUDIT_ARG(addr, uap->addr);
797 AUDIT_ARG(len, uap->len);
798 AUDIT_ARG(value, uap->inherit);
799
800 addr = (mach_vm_offset_t)uap->addr;
801 size = (mach_vm_size_t)uap->len;
802 inherit = uap->inherit;
803
804 user_map = current_map();
805 result = mach_vm_inherit(user_map, addr, size,
806 inherit);
807 switch (result) {
808 case KERN_SUCCESS:
809 return (0);
810 case KERN_PROTECTION_FAILURE:
811 return (EACCES);
812 }
813 return (EINVAL);
814 }
815
816 int
817 madvise(__unused proc_t p, struct madvise_args *uap, __unused register_t *retval)
818 {
819 vm_map_t user_map;
820 mach_vm_offset_t start;
821 mach_vm_size_t size;
822 vm_behavior_t new_behavior;
823 kern_return_t result;
824
825 /*
826 * Since this routine is only advisory, we default to conservative
827 * behavior.
828 */
829 switch (uap->behav) {
830 case MADV_RANDOM:
831 new_behavior = VM_BEHAVIOR_RANDOM;
832 break;
833 case MADV_SEQUENTIAL:
834 new_behavior = VM_BEHAVIOR_SEQUENTIAL;
835 break;
836 case MADV_NORMAL:
837 new_behavior = VM_BEHAVIOR_DEFAULT;
838 break;
839 case MADV_WILLNEED:
840 new_behavior = VM_BEHAVIOR_WILLNEED;
841 break;
842 case MADV_DONTNEED:
843 new_behavior = VM_BEHAVIOR_DONTNEED;
844 break;
845 default:
846 return(EINVAL);
847 }
848
849 start = (mach_vm_offset_t) uap->addr;
850 size = (mach_vm_size_t) uap->len;
851
852 user_map = current_map();
853
854 result = mach_vm_behavior_set(user_map, start, size, new_behavior);
855 switch (result) {
856 case KERN_SUCCESS:
857 return (0);
858 case KERN_INVALID_ADDRESS:
859 return (ENOMEM);
860 }
861
862 return (EINVAL);
863 }
864
865 int
866 mincore(__unused proc_t p, struct mincore_args *uap, __unused register_t *retval)
867 {
868 mach_vm_offset_t addr, first_addr, end;
869 vm_map_t map;
870 user_addr_t vec;
871 int error;
872 int vecindex, lastvecindex;
873 int mincoreinfo=0;
874 int pqueryinfo;
875 kern_return_t ret;
876 int numref;
877
878 char c;
879
880 map = current_map();
881
882 /*
883 * Make sure that the addresses presented are valid for user
884 * mode.
885 */
886 first_addr = addr = mach_vm_trunc_page(uap->addr);
887 end = addr + mach_vm_round_page(uap->len);
888
889 if (end < addr)
890 return (EINVAL);
891
892 /*
893 * Address of byte vector
894 */
895 vec = uap->vec;
896
897 map = current_map();
898
899 /*
900 * Do this on a map entry basis so that if the pages are not
901 * in the current processes address space, we can easily look
902 * up the pages elsewhere.
903 */
904 lastvecindex = -1;
905 for( ; addr < end; addr += PAGE_SIZE ) {
906 pqueryinfo = 0;
907 ret = mach_vm_page_query(map, addr, &pqueryinfo, &numref);
908 if (ret != KERN_SUCCESS)
909 pqueryinfo = 0;
910 mincoreinfo = 0;
911 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT)
912 mincoreinfo |= MINCORE_INCORE;
913 if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF)
914 mincoreinfo |= MINCORE_REFERENCED;
915 if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY)
916 mincoreinfo |= MINCORE_MODIFIED;
917
918
919 /*
920 * calculate index into user supplied byte vector
921 */
922 vecindex = (addr - first_addr)>> PAGE_SHIFT;
923
924 /*
925 * If we have skipped map entries, we need to make sure that
926 * the byte vector is zeroed for those skipped entries.
927 */
928 while((lastvecindex + 1) < vecindex) {
929 c = 0;
930 error = copyout(&c, vec + lastvecindex, 1);
931 if (error) {
932 return (EFAULT);
933 }
934 ++lastvecindex;
935 }
936
937 /*
938 * Pass the page information to the user
939 */
940 c = (char)mincoreinfo;
941 error = copyout(&c, vec + vecindex, 1);
942 if (error) {
943 return (EFAULT);
944 }
945 lastvecindex = vecindex;
946 }
947
948
949 /*
950 * Zero the last entries in the byte vector.
951 */
952 vecindex = (end - first_addr) >> PAGE_SHIFT;
953 while((lastvecindex + 1) < vecindex) {
954 c = 0;
955 error = copyout(&c, vec + lastvecindex, 1);
956 if (error) {
957 return (EFAULT);
958 }
959 ++lastvecindex;
960 }
961
962 return (0);
963 }
964
965 int
966 mlock(__unused proc_t p, struct mlock_args *uap, __unused register_t *retvalval)
967 {
968 vm_map_t user_map;
969 vm_map_offset_t addr;
970 vm_map_size_t size, pageoff;
971 kern_return_t result;
972
973 AUDIT_ARG(addr, uap->addr);
974 AUDIT_ARG(len, uap->len);
975
976 addr = (vm_map_offset_t) uap->addr;
977 size = (vm_map_size_t)uap->len;
978
979 /* disable wrap around */
980 if (addr + size < addr)
981 return (EINVAL);
982
983 if (size == 0)
984 return (0);
985
986 pageoff = (addr & PAGE_MASK);
987 addr -= pageoff;
988 size = vm_map_round_page(size+pageoff);
989 user_map = current_map();
990
991 /* have to call vm_map_wire directly to pass "I don't know" protections */
992 result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE, TRUE);
993
994 if (result == KERN_RESOURCE_SHORTAGE)
995 return EAGAIN;
996 else if (result != KERN_SUCCESS)
997 return ENOMEM;
998
999 return 0; /* KERN_SUCCESS */
1000 }
1001
1002 int
1003 munlock(__unused proc_t p, struct munlock_args *uap, __unused register_t *retval)
1004 {
1005 mach_vm_offset_t addr;
1006 mach_vm_size_t size;
1007 vm_map_t user_map;
1008 kern_return_t result;
1009
1010 AUDIT_ARG(addr, uap->addr);
1011 AUDIT_ARG(addr, uap->len);
1012
1013 addr = (mach_vm_offset_t) uap->addr;
1014 size = (mach_vm_size_t)uap->len;
1015 user_map = current_map();
1016
1017 /* JMM - need to remove all wirings by spec - this just removes one */
1018 result = mach_vm_wire(host_priv_self(), user_map, addr, size, VM_PROT_NONE);
1019 return (result == KERN_SUCCESS ? 0 : ENOMEM);
1020 }
1021
1022
1023 int
1024 mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused register_t *retval)
1025 {
1026 return (ENOSYS);
1027 }
1028
1029 int
1030 munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused register_t *retval)
1031 {
1032 return(ENOSYS);
1033 }
1034
1035
1036 /* BEGIN DEFUNCT */
1037 int
1038 obreak(__unused proc_t p, __unused struct obreak_args *uap, __unused register_t *retval)
1039 {
1040 /* Not implemented, obsolete */
1041 return (ENOMEM);
1042 }
1043
1044 int both;
1045
1046 int
1047 ovadvise(__unused proc_t p, __unused struct ovadvise_args *uap, __unused register_t *retval)
1048 {
1049
1050 #ifdef lint
1051 both = 0;
1052 #endif
1053 return( 0 );
1054 }
1055 /* END DEFUNCT */
1056
1057 /* USV: No! need to obsolete map_fd()! mmap() already supports 64 bits */
1058 kern_return_t
1059 map_fd(struct map_fd_args *args)
1060 {
1061 int fd = args->fd;
1062 vm_offset_t offset = args->offset;
1063 vm_offset_t *va = args->va;
1064 boolean_t findspace = args->findspace;
1065 vm_size_t size = args->size;
1066 kern_return_t ret;
1067
1068 AUDIT_MACH_SYSCALL_ENTER(AUE_MAPFD);
1069 AUDIT_ARG(addr, CAST_DOWN(user_addr_t, va));
1070 AUDIT_ARG(fd, fd);
1071
1072 ret = map_fd_funneled( fd, (vm_object_offset_t)offset, va, findspace, size);
1073
1074 AUDIT_MACH_SYSCALL_EXIT(ret);
1075 return ret;
1076 }
1077
1078 kern_return_t
1079 map_fd_funneled(
1080 int fd,
1081 vm_object_offset_t offset,
1082 vm_offset_t *va,
1083 boolean_t findspace,
1084 vm_size_t size)
1085 {
1086 kern_return_t result;
1087 struct fileproc *fp;
1088 struct vnode *vp;
1089 void * pager;
1090 vm_offset_t map_addr=0;
1091 vm_size_t map_size;
1092 int err=0;
1093 vm_map_t my_map;
1094 proc_t p = current_proc();
1095 struct vnode_attr vattr;
1096
1097 /*
1098 * Find the inode; verify that it's a regular file.
1099 */
1100
1101 err = fp_lookup(p, fd, &fp, 0);
1102 if (err)
1103 return(err);
1104
1105 if (fp->f_fglob->fg_type != DTYPE_VNODE){
1106 err = KERN_INVALID_ARGUMENT;
1107 goto bad;
1108 }
1109
1110 if (!(fp->f_fglob->fg_flag & FREAD)) {
1111 err = KERN_PROTECTION_FAILURE;
1112 goto bad;
1113 }
1114
1115 vp = (struct vnode *)fp->f_fglob->fg_data;
1116 err = vnode_getwithref(vp);
1117 if(err != 0)
1118 goto bad;
1119
1120 if (vp->v_type != VREG) {
1121 (void)vnode_put(vp);
1122 err = KERN_INVALID_ARGUMENT;
1123 goto bad;
1124 }
1125
1126 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1127
1128 /*
1129 * POSIX: mmap needs to update access time for mapped files
1130 */
1131 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
1132 VATTR_INIT(&vattr);
1133 nanotime(&vattr.va_access_time);
1134 VATTR_SET_ACTIVE(&vattr, va_access_time);
1135 vnode_setattr(vp, &vattr, vfs_context_current());
1136 }
1137
1138 if (offset & PAGE_MASK_64) {
1139 printf("map_fd: file offset not page aligned(%d : %s)\n",p->p_pid, p->p_comm);
1140 (void)vnode_put(vp);
1141 err = KERN_INVALID_ARGUMENT;
1142 goto bad;
1143 }
1144 map_size = round_page(size);
1145
1146 /*
1147 * Allow user to map in a zero length file.
1148 */
1149 if (size == 0) {
1150 (void)vnode_put(vp);
1151 err = KERN_SUCCESS;
1152 goto bad;
1153 }
1154 /*
1155 * Map in the file.
1156 */
1157 pager = (void *)ubc_getpager(vp);
1158 if (pager == NULL) {
1159 (void)vnode_put(vp);
1160 err = KERN_FAILURE;
1161 goto bad;
1162 }
1163
1164
1165 my_map = current_map();
1166
1167 result = vm_map_64(
1168 my_map,
1169 &map_addr, map_size, (vm_offset_t)0,
1170 VM_FLAGS_ANYWHERE, pager, offset, TRUE,
1171 VM_PROT_DEFAULT, VM_PROT_ALL,
1172 VM_INHERIT_DEFAULT);
1173 if (result != KERN_SUCCESS) {
1174 (void)vnode_put(vp);
1175 err = result;
1176 goto bad;
1177 }
1178
1179
1180 if (!findspace) {
1181 vm_offset_t dst_addr;
1182 vm_map_copy_t tmp;
1183
1184 if (copyin(CAST_USER_ADDR_T(va), &dst_addr, sizeof (dst_addr)) ||
1185 trunc_page_32(dst_addr) != dst_addr) {
1186 (void) vm_map_remove(
1187 my_map,
1188 map_addr, map_addr + map_size,
1189 VM_MAP_NO_FLAGS);
1190 (void)vnode_put(vp);
1191 err = KERN_INVALID_ADDRESS;
1192 goto bad;
1193 }
1194
1195 result = vm_map_copyin(my_map, (vm_map_address_t)map_addr,
1196 (vm_map_size_t)map_size, TRUE, &tmp);
1197 if (result != KERN_SUCCESS) {
1198
1199 (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr),
1200 vm_map_round_page(map_addr + map_size),
1201 VM_MAP_NO_FLAGS);
1202 (void)vnode_put(vp);
1203 err = result;
1204 goto bad;
1205 }
1206
1207 result = vm_map_copy_overwrite(my_map,
1208 (vm_map_address_t)dst_addr, tmp, FALSE);
1209 if (result != KERN_SUCCESS) {
1210 vm_map_copy_discard(tmp);
1211 (void)vnode_put(vp);
1212 err = result;
1213 goto bad;
1214 }
1215 } else {
1216 if (copyout(&map_addr, CAST_USER_ADDR_T(va), sizeof (map_addr))) {
1217 (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr),
1218 vm_map_round_page(map_addr + map_size),
1219 VM_MAP_NO_FLAGS);
1220 (void)vnode_put(vp);
1221 err = KERN_INVALID_ADDRESS;
1222 goto bad;
1223 }
1224 }
1225
1226 ubc_setthreadcred(vp, current_proc(), current_thread());
1227 (void)vnode_put(vp);
1228 err = 0;
1229 bad:
1230 fp_drop(p, fd, fp, 0);
1231 return (err);
1232 }
1233