]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_mman.c
xnu-1228.tar.gz
[apple/xnu.git] / bsd / kern / kern_mman.c
1 /*
2 * Copyright (c) 2007 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988 University of Utah.
30 * Copyright (c) 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * the Systems Programming Group of the University of Utah Computer
35 * Science Department.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66 *
67 * @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 /*
77 * Mapped file (mmap) interface to VM
78 */
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/filedesc.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/resourcevar.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/acct.h>
88 #include <sys/wait.h>
89 #include <sys/file_internal.h>
90 #include <sys/vadvise.h>
91 #include <sys/trace.h>
92 #include <sys/mman.h>
93 #include <sys/conf.h>
94 #include <sys/stat.h>
95 #include <sys/ubc.h>
96 #include <sys/ubc_internal.h>
97 #include <sys/sysproto.h>
98
99 #include <sys/syscall.h>
100 #include <sys/kdebug.h>
101
102 #include <bsm/audit_kernel.h>
103 #include <bsm/audit_kevents.h>
104
105 #include <mach/mach_types.h>
106 #include <mach/mach_traps.h>
107 #include <mach/vm_sync.h>
108 #include <mach/vm_behavior.h>
109 #include <mach/vm_inherit.h>
110 #include <mach/vm_statistics.h>
111 #include <mach/mach_vm.h>
112 #include <mach/vm_map.h>
113 #include <mach/host_priv.h>
114
115 #include <kern/cpu_number.h>
116 #include <kern/host.h>
117
118 #include <vm/vm_map.h>
119 #include <vm/vm_kern.h>
120 #include <vm/vm_pager.h>
121
122 struct osmmap_args {
123 caddr_t addr;
124 int len;
125 int prot;
126 int share;
127 int fd;
128 long pos;
129 };
130
131 /* XXX the following function should probably be static */
132 kern_return_t map_fd_funneled(int, vm_object_offset_t, vm_offset_t *,
133 boolean_t, vm_size_t);
134
135 /* XXX the following two functions aren't used anywhere */
136 int osmmap(proc_t , struct osmmap_args *, register_t *);
137 int mremap(void);
138
139 int
140 sbrk(__unused proc_t p, __unused struct sbrk_args *uap, __unused register_t *retval)
141 {
142 /* Not yet implemented */
143 return (ENOTSUP);
144 }
145
146 int
147 sstk(__unused proc_t p, __unused struct sstk_args *uap, __unused register_t *retval)
148 {
149 /* Not yet implemented */
150 return (ENOTSUP);
151 }
152
153
154 int
155 osmmap(
156 proc_t curp,
157 struct osmmap_args *uap,
158 register_t *retval)
159 {
160 struct mmap_args newargs;
161 user_addr_t addr;
162 int ret;
163
164 if ((uap->share == MAP_SHARED )|| (uap->share == MAP_PRIVATE )) {
165 newargs.addr = CAST_USER_ADDR_T(uap->addr);
166 newargs.len = CAST_USER_ADDR_T(uap->len);
167 newargs.prot = uap->prot;
168 newargs.flags = uap->share;
169 newargs.fd = uap->fd;
170 newargs.pos = (off_t)uap->pos;
171 ret = mmap(curp, &newargs, &addr);
172 if (ret == 0)
173 *retval = CAST_DOWN(register_t, addr);
174 } else
175 ret = EINVAL;
176 return ret;
177 }
178
179
180 /*
181 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
182 * XXX usage is PROT_* from an interface perspective. Thus the values of
183 * XXX VM_PROT_* and PROT_* need to correspond.
184 */
185 int
186 mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
187 {
188 /*
189 * Map in special device (must be SHARED) or file
190 */
191 struct fileproc *fp;
192 register struct vnode *vp;
193 int flags;
194 int prot, file_prot;
195 int err=0;
196 vm_map_t user_map;
197 kern_return_t result;
198 mach_vm_offset_t user_addr;
199 mach_vm_size_t user_size;
200 vm_object_offset_t pageoff;
201 vm_object_offset_t file_pos;
202 int alloc_flags=0;
203 boolean_t docow;
204 vm_prot_t maxprot;
205 void *handle;
206 vm_pager_t pager;
207 int mapanon=0;
208 int fpref=0;
209 int error =0;
210 int fd = uap->fd;
211
212 user_addr = (mach_vm_offset_t)uap->addr;
213 user_size = (mach_vm_size_t) uap->len;
214
215 AUDIT_ARG(addr, user_addr);
216 AUDIT_ARG(len, user_size);
217 AUDIT_ARG(fd, uap->fd);
218
219 prot = (uap->prot & VM_PROT_ALL);
220 #if 3777787
221 /*
222 * Since the hardware currently does not support writing without
223 * read-before-write, or execution-without-read, if the request is
224 * for write or execute access, we must imply read access as well;
225 * otherwise programs expecting this to work will fail to operate.
226 */
227 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
228 prot |= VM_PROT_READ;
229 #endif /* radar 3777787 */
230
231 flags = uap->flags;
232 vp = NULLVP;
233
234 /*
235 * The vm code does not have prototypes & compiler doesn't do the'
236 * the right thing when you cast 64bit value and pass it in function
237 * call. So here it is.
238 */
239 file_pos = (vm_object_offset_t)uap->pos;
240
241
242 /* make sure mapping fits into numeric range etc */
243 if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64)
244 return (EINVAL);
245
246 /*
247 * Align the file position to a page boundary,
248 * and save its page offset component.
249 */
250 pageoff = (file_pos & PAGE_MASK);
251 file_pos -= (vm_object_offset_t)pageoff;
252
253
254 /* Adjust size for rounding (on both ends). */
255 user_size += pageoff; /* low end... */
256 user_size = mach_vm_round_page(user_size); /* hi end */
257
258
259 /*
260 * Check for illegal addresses. Watch out for address wrap... Note
261 * that VM_*_ADDRESS are not constants due to casts (argh).
262 */
263 if (flags & MAP_FIXED) {
264 /*
265 * The specified address must have the same remainder
266 * as the file offset taken modulo PAGE_SIZE, so it
267 * should be aligned after adjustment by pageoff.
268 */
269 user_addr -= pageoff;
270 if (user_addr & PAGE_MASK)
271 return (EINVAL);
272 }
273 #ifdef notyet
274 /* DO not have apis to get this info, need to wait till then*/
275 /*
276 * XXX for non-fixed mappings where no hint is provided or
277 * the hint would fall in the potential heap space,
278 * place it after the end of the largest possible heap.
279 *
280 * There should really be a pmap call to determine a reasonable
281 * location.
282 */
283 else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
284 addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
285
286 #endif
287
288 alloc_flags = 0;
289
290 if (flags & MAP_ANON) {
291 /*
292 * Mapping blank space is trivial. Use positive fds as the alias
293 * value for memory tracking.
294 */
295 if (fd != -1) {
296 /*
297 * Use "fd" to pass (some) Mach VM allocation flags,
298 * (see the VM_FLAGS_* definitions).
299 */
300 alloc_flags = fd & (VM_FLAGS_ALIAS_MASK |
301 VM_FLAGS_PURGABLE);
302 if (alloc_flags != fd) {
303 /* reject if there are any extra flags */
304 return EINVAL;
305 }
306 }
307
308 handle = NULL;
309 maxprot = VM_PROT_ALL;
310 file_pos = 0;
311 mapanon = 1;
312 } else {
313 struct vnode_attr va;
314 vfs_context_t ctx = vfs_context_current();
315
316 /*
317 * Mapping file, get fp for validation. Obtain vnode and make
318 * sure it is of appropriate type.
319 */
320 err = fp_lookup(p, fd, &fp, 0);
321 if (err)
322 return(err);
323 fpref = 1;
324 if(fp->f_fglob->fg_type == DTYPE_PSXSHM) {
325 uap->addr = (user_addr_t)user_addr;
326 uap->len = (user_size_t)user_size;
327 uap->prot = prot;
328 uap->flags = flags;
329 uap->pos = file_pos;
330 error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
331 goto bad;
332 }
333
334 if (fp->f_fglob->fg_type != DTYPE_VNODE) {
335 error = EINVAL;
336 goto bad;
337 }
338 vp = (struct vnode *)fp->f_fglob->fg_data;
339 error = vnode_getwithref(vp);
340 if(error != 0)
341 goto bad;
342
343 if (vp->v_type != VREG && vp->v_type != VCHR) {
344 (void)vnode_put(vp);
345 error = EINVAL;
346 goto bad;
347 }
348
349 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
350
351 /*
352 * POSIX: mmap needs to update access time for mapped files
353 */
354 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
355 VATTR_INIT(&va);
356 nanotime(&va.va_access_time);
357 VATTR_SET_ACTIVE(&va, va_access_time);
358 vnode_setattr(vp, &va, ctx);
359 }
360
361 /*
362 * XXX hack to handle use of /dev/zero to map anon memory (ala
363 * SunOS).
364 */
365 if (vp->v_type == VCHR || vp->v_type == VSTR) {
366 (void)vnode_put(vp);
367 error = ENODEV;
368 goto bad;
369 } else {
370 /*
371 * Ensure that file and memory protections are
372 * compatible. Note that we only worry about
373 * writability if mapping is shared; in this case,
374 * current and max prot are dictated by the open file.
375 * XXX use the vnode instead? Problem is: what
376 * credentials do we use for determination? What if
377 * proc does a setuid?
378 */
379 maxprot = VM_PROT_EXECUTE; /* ??? */
380 if (fp->f_fglob->fg_flag & FREAD)
381 maxprot |= VM_PROT_READ;
382 else if (prot & PROT_READ) {
383 (void)vnode_put(vp);
384 error = EACCES;
385 goto bad;
386 }
387 /*
388 * If we are sharing potential changes (either via
389 * MAP_SHARED or via the implicit sharing of character
390 * device mappings), and we are trying to get write
391 * permission although we opened it without asking
392 * for it, bail out.
393 */
394
395 if ((flags & MAP_SHARED) != 0) {
396 if ((fp->f_fglob->fg_flag & FWRITE) != 0) {
397 /*
398 * check for write access
399 *
400 * Note that we already made this check when granting FWRITE
401 * against the file, so it seems redundant here.
402 */
403 error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
404
405 /* if not granted for any reason, but we wanted it, bad */
406 if ((prot & PROT_WRITE) && (error != 0)) {
407 vnode_put(vp);
408 goto bad;
409 }
410
411 /* if writable, remember */
412 if (error == 0)
413 maxprot |= VM_PROT_WRITE;
414
415 } else if ((prot & PROT_WRITE) != 0) {
416 (void)vnode_put(vp);
417 error = EACCES;
418 goto bad;
419 }
420 } else
421 maxprot |= VM_PROT_WRITE;
422
423 handle = (void *)vp;
424 #if CONFIG_MACF
425 error = mac_file_check_mmap(vfs_context_ucred(ctx),
426 fp->f_fglob, prot, flags, &maxprot);
427 if (error) {
428 (void)vnode_put(vp);
429 goto bad;
430 }
431 #endif /* MAC */
432 }
433 }
434
435 if (user_size == 0) {
436 if (!mapanon)
437 (void)vnode_put(vp);
438 error = 0;
439 goto bad;
440 }
441
442 /*
443 * We bend a little - round the start and end addresses
444 * to the nearest page boundary.
445 */
446 user_size = mach_vm_round_page(user_size);
447
448 if (file_pos & PAGE_MASK_64) {
449 if (!mapanon)
450 (void)vnode_put(vp);
451 error = EINVAL;
452 goto bad;
453 }
454
455 user_map = current_map();
456
457 if ((flags & MAP_FIXED) == 0) {
458 alloc_flags |= VM_FLAGS_ANYWHERE;
459 user_addr = mach_vm_round_page(user_addr);
460 } else {
461 if (user_addr != mach_vm_trunc_page(user_addr)) {
462 if (!mapanon)
463 (void)vnode_put(vp);
464 error = EINVAL;
465 goto bad;
466 }
467 /*
468 * mmap(MAP_FIXED) will replace any existing mappings in the
469 * specified range, if the new mapping is successful.
470 * If we just deallocate the specified address range here,
471 * another thread might jump in and allocate memory in that
472 * range before we get a chance to establish the new mapping,
473 * and we won't have a chance to restore the old mappings.
474 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
475 * has to deallocate the existing mappings and establish the
476 * new ones atomically.
477 */
478 alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
479 }
480
481 if (flags & MAP_NOCACHE)
482 alloc_flags |= VM_FLAGS_NO_CACHE;
483
484 /*
485 * Lookup/allocate object.
486 */
487 if (handle == NULL) {
488 pager = NULL;
489 #ifdef notyet
490 /* Hmm .. */
491 #if defined(VM_PROT_READ_IS_EXEC)
492 if (prot & VM_PROT_READ)
493 prot |= VM_PROT_EXECUTE;
494 if (maxprot & VM_PROT_READ)
495 maxprot |= VM_PROT_EXECUTE;
496 #endif
497 #endif
498
499 #if 3777787
500 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
501 prot |= VM_PROT_READ;
502 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
503 maxprot |= VM_PROT_READ;
504 #endif /* radar 3777787 */
505
506 result = vm_map_enter_mem_object(user_map,
507 &user_addr, user_size,
508 0, alloc_flags,
509 IPC_PORT_NULL, 0, FALSE,
510 prot, maxprot,
511 (flags & MAP_SHARED) ?
512 VM_INHERIT_SHARE :
513 VM_INHERIT_DEFAULT);
514 if (result != KERN_SUCCESS)
515 goto out;
516 } else {
517 pager = (vm_pager_t)ubc_getpager(vp);
518
519 if (pager == NULL) {
520 (void)vnode_put(vp);
521 error = ENOMEM;
522 goto bad;
523 }
524
525 /*
526 * Set credentials:
527 * FIXME: if we're writing the file we need a way to
528 * ensure that someone doesn't replace our R/W creds
529 * with ones that only work for read.
530 */
531
532 ubc_setthreadcred(vp, p, current_thread());
533 docow = FALSE;
534 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
535 docow = TRUE;
536 }
537
538 #ifdef notyet
539 /* Hmm .. */
540 #if defined(VM_PROT_READ_IS_EXEC)
541 if (prot & VM_PROT_READ)
542 prot |= VM_PROT_EXECUTE;
543 if (maxprot & VM_PROT_READ)
544 maxprot |= VM_PROT_EXECUTE;
545 #endif
546 #endif /* notyet */
547
548 #if 3777787
549 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
550 prot |= VM_PROT_READ;
551 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
552 maxprot |= VM_PROT_READ;
553 #endif /* radar 3777787 */
554
555 result = vm_map_enter_mem_object(user_map,
556 &user_addr, user_size,
557 0, alloc_flags,
558 (ipc_port_t)pager, file_pos,
559 docow, prot, maxprot,
560 (flags & MAP_SHARED) ?
561 VM_INHERIT_SHARE :
562 VM_INHERIT_DEFAULT);
563
564 if (result != KERN_SUCCESS) {
565 (void)vnode_put(vp);
566 goto out;
567 }
568
569 file_prot = prot & (PROT_READ | PROT_WRITE | PROT_EXEC);
570 if (docow) {
571 /* private mapping: won't write to the file */
572 file_prot &= ~PROT_WRITE;
573 }
574 (void) ubc_map(vp, file_prot);
575 }
576
577 if (!mapanon)
578 (void)vnode_put(vp);
579
580 out:
581 switch (result) {
582 case KERN_SUCCESS:
583 *retval = user_addr + pageoff;
584 error = 0;
585 break;
586 case KERN_INVALID_ADDRESS:
587 case KERN_NO_SPACE:
588 error = ENOMEM;
589 break;
590 case KERN_PROTECTION_FAILURE:
591 error = EACCES;
592 break;
593 default:
594 error = EINVAL;
595 break;
596 }
597 bad:
598 if (fpref)
599 fp_drop(p, fd, fp, 0);
600
601 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
602 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
603 (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
604
605 return(error);
606 }
607
608 int
609 msync(__unused proc_t p, struct msync_args *uap, register_t *retval)
610 {
611 __pthread_testcancel(1);
612 return(msync_nocancel(p, (struct msync_nocancel_args *)uap, retval));
613 }
614
615 int
616 msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused register_t *retval)
617 {
618 mach_vm_offset_t addr;
619 mach_vm_size_t size;
620 int flags;
621 vm_map_t user_map;
622 int rv;
623 vm_sync_t sync_flags=0;
624
625 addr = (mach_vm_offset_t) uap->addr;
626 size = (mach_vm_size_t)uap->len;
627
628 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
629
630 if (addr & PAGE_MASK_64) {
631 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
632 return EINVAL;
633 }
634 if (size == 0) {
635 /*
636 * We cannot support this properly without maintaining
637 * list all mmaps done. Cannot use vm_map_entry as they could be
638 * split or coalesced by indepenedant actions. So instead of
639 * inaccurate results, lets just return error as invalid size
640 * specified
641 */
642 return (EINVAL); /* XXX breaks posix apps */
643 }
644
645 flags = uap->flags;
646 /* disallow contradictory flags */
647 if ((flags & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))
648 return (EINVAL);
649
650 if (flags & MS_KILLPAGES)
651 sync_flags |= VM_SYNC_KILLPAGES;
652 if (flags & MS_DEACTIVATE)
653 sync_flags |= VM_SYNC_DEACTIVATE;
654 if (flags & MS_INVALIDATE)
655 sync_flags |= VM_SYNC_INVALIDATE;
656
657 if ( !(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
658 if (flags & MS_ASYNC)
659 sync_flags |= VM_SYNC_ASYNCHRONOUS;
660 else
661 sync_flags |= VM_SYNC_SYNCHRONOUS;
662 }
663
664 sync_flags |= VM_SYNC_CONTIGUOUS; /* complain if holes */
665
666 user_map = current_map();
667 rv = mach_vm_msync(user_map, addr, size, sync_flags);
668
669 switch (rv) {
670 case KERN_SUCCESS:
671 break;
672 case KERN_INVALID_ADDRESS: /* hole in region being sync'ed */
673 return (ENOMEM);
674 case KERN_FAILURE:
675 return (EIO);
676 default:
677 return (EINVAL);
678 }
679 return (0);
680 }
681
682
683 int
684 mremap(void)
685 {
686 /* Not yet implemented */
687 return (ENOTSUP);
688 }
689
690 int
691 munmap(__unused proc_t p, struct munmap_args *uap, __unused register_t *retval)
692 {
693 mach_vm_offset_t user_addr;
694 mach_vm_size_t user_size;
695 kern_return_t result;
696
697 user_addr = (mach_vm_offset_t) uap->addr;
698 user_size = (mach_vm_size_t) uap->len;
699
700 AUDIT_ARG(addr, user_addr);
701 AUDIT_ARG(len, user_size);
702
703 if (user_addr & PAGE_MASK_64) {
704 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
705 return EINVAL;
706 }
707
708 if (user_addr + user_size < user_addr)
709 return(EINVAL);
710
711 if (user_size == 0) {
712 /* UNIX SPEC: size is 0, return EINVAL */
713 return EINVAL;
714 }
715
716 result = mach_vm_deallocate(current_map(), user_addr, user_size);
717 if (result != KERN_SUCCESS) {
718 return(EINVAL);
719 }
720 return(0);
721 }
722
723 int
724 mprotect(__unused proc_t p, struct mprotect_args *uap, __unused register_t *retval)
725 {
726 register vm_prot_t prot;
727 mach_vm_offset_t user_addr;
728 mach_vm_size_t user_size;
729 kern_return_t result;
730 vm_map_t user_map;
731 #if CONFIG_MACF
732 int error;
733 #endif
734
735 AUDIT_ARG(addr, uap->addr);
736 AUDIT_ARG(len, uap->len);
737 AUDIT_ARG(value, uap->prot);
738
739 user_addr = (mach_vm_offset_t) uap->addr;
740 user_size = (mach_vm_size_t) uap->len;
741 prot = (vm_prot_t)(uap->prot & VM_PROT_ALL);
742
743 if (user_addr & PAGE_MASK_64) {
744 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
745 return EINVAL;
746 }
747
748 #ifdef notyet
749 /* Hmm .. */
750 #if defined(VM_PROT_READ_IS_EXEC)
751 if (prot & VM_PROT_READ)
752 prot |= VM_PROT_EXECUTE;
753 #endif
754 #endif /* notyet */
755
756 #if 3936456
757 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
758 prot |= VM_PROT_READ;
759 #endif /* 3936456 */
760
761 user_map = current_map();
762
763 #if CONFIG_MACF
764 /*
765 * The MAC check for mprotect is of limited use for 2 reasons:
766 * Without mmap revocation, the caller could have asked for the max
767 * protections initially instead of a reduced set, so a mprotect
768 * check would offer no new security.
769 * It is not possible to extract the vnode from the pager object(s)
770 * of the target memory range.
771 * However, the MAC check may be used to prevent a process from,
772 * e.g., making the stack executable.
773 */
774 error = mac_proc_check_mprotect(p, user_addr,
775 user_size, prot);
776 if (error)
777 return (error);
778 #endif
779 result = mach_vm_protect(user_map, user_addr, user_size,
780 FALSE, prot);
781 switch (result) {
782 case KERN_SUCCESS:
783 return (0);
784 case KERN_PROTECTION_FAILURE:
785 return (EACCES);
786 case KERN_INVALID_ADDRESS:
787 /* UNIX SPEC: for an invalid address range, return ENOMEM */
788 return ENOMEM;
789 }
790 return (EINVAL);
791 }
792
793
794 int
795 minherit(__unused proc_t p, struct minherit_args *uap, __unused register_t *retval)
796 {
797 mach_vm_offset_t addr;
798 mach_vm_size_t size;
799 register vm_inherit_t inherit;
800 vm_map_t user_map;
801 kern_return_t result;
802
803 AUDIT_ARG(addr, uap->addr);
804 AUDIT_ARG(len, uap->len);
805 AUDIT_ARG(value, uap->inherit);
806
807 addr = (mach_vm_offset_t)uap->addr;
808 size = (mach_vm_size_t)uap->len;
809 inherit = uap->inherit;
810
811 user_map = current_map();
812 result = mach_vm_inherit(user_map, addr, size,
813 inherit);
814 switch (result) {
815 case KERN_SUCCESS:
816 return (0);
817 case KERN_PROTECTION_FAILURE:
818 return (EACCES);
819 }
820 return (EINVAL);
821 }
822
823 int
824 madvise(__unused proc_t p, struct madvise_args *uap, __unused register_t *retval)
825 {
826 vm_map_t user_map;
827 mach_vm_offset_t start;
828 mach_vm_size_t size;
829 vm_behavior_t new_behavior;
830 kern_return_t result;
831
832 /*
833 * Since this routine is only advisory, we default to conservative
834 * behavior.
835 */
836 switch (uap->behav) {
837 case MADV_RANDOM:
838 new_behavior = VM_BEHAVIOR_RANDOM;
839 break;
840 case MADV_SEQUENTIAL:
841 new_behavior = VM_BEHAVIOR_SEQUENTIAL;
842 break;
843 case MADV_NORMAL:
844 new_behavior = VM_BEHAVIOR_DEFAULT;
845 break;
846 case MADV_WILLNEED:
847 new_behavior = VM_BEHAVIOR_WILLNEED;
848 break;
849 case MADV_DONTNEED:
850 new_behavior = VM_BEHAVIOR_DONTNEED;
851 break;
852 default:
853 return(EINVAL);
854 }
855
856 start = (mach_vm_offset_t) uap->addr;
857 size = (mach_vm_size_t) uap->len;
858
859 user_map = current_map();
860
861 result = mach_vm_behavior_set(user_map, start, size, new_behavior);
862 switch (result) {
863 case KERN_SUCCESS:
864 return (0);
865 case KERN_INVALID_ADDRESS:
866 return (ENOMEM);
867 }
868
869 return (EINVAL);
870 }
871
872 int
873 mincore(__unused proc_t p, struct mincore_args *uap, __unused register_t *retval)
874 {
875 mach_vm_offset_t addr, first_addr, end;
876 vm_map_t map;
877 user_addr_t vec;
878 int error;
879 int vecindex, lastvecindex;
880 int mincoreinfo=0;
881 int pqueryinfo;
882 kern_return_t ret;
883 int numref;
884
885 char c;
886
887 map = current_map();
888
889 /*
890 * Make sure that the addresses presented are valid for user
891 * mode.
892 */
893 first_addr = addr = mach_vm_trunc_page(uap->addr);
894 end = addr + mach_vm_round_page(uap->len);
895
896 if (end < addr)
897 return (EINVAL);
898
899 /*
900 * Address of byte vector
901 */
902 vec = uap->vec;
903
904 map = current_map();
905
906 /*
907 * Do this on a map entry basis so that if the pages are not
908 * in the current processes address space, we can easily look
909 * up the pages elsewhere.
910 */
911 lastvecindex = -1;
912 for( ; addr < end; addr += PAGE_SIZE ) {
913 pqueryinfo = 0;
914 ret = mach_vm_page_query(map, addr, &pqueryinfo, &numref);
915 if (ret != KERN_SUCCESS)
916 pqueryinfo = 0;
917 mincoreinfo = 0;
918 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT)
919 mincoreinfo |= MINCORE_INCORE;
920 if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF)
921 mincoreinfo |= MINCORE_REFERENCED;
922 if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY)
923 mincoreinfo |= MINCORE_MODIFIED;
924
925
926 /*
927 * calculate index into user supplied byte vector
928 */
929 vecindex = (addr - first_addr)>> PAGE_SHIFT;
930
931 /*
932 * If we have skipped map entries, we need to make sure that
933 * the byte vector is zeroed for those skipped entries.
934 */
935 while((lastvecindex + 1) < vecindex) {
936 c = 0;
937 error = copyout(&c, vec + lastvecindex, 1);
938 if (error) {
939 return (EFAULT);
940 }
941 ++lastvecindex;
942 }
943
944 /*
945 * Pass the page information to the user
946 */
947 c = (char)mincoreinfo;
948 error = copyout(&c, vec + vecindex, 1);
949 if (error) {
950 return (EFAULT);
951 }
952 lastvecindex = vecindex;
953 }
954
955
956 /*
957 * Zero the last entries in the byte vector.
958 */
959 vecindex = (end - first_addr) >> PAGE_SHIFT;
960 while((lastvecindex + 1) < vecindex) {
961 c = 0;
962 error = copyout(&c, vec + lastvecindex, 1);
963 if (error) {
964 return (EFAULT);
965 }
966 ++lastvecindex;
967 }
968
969 return (0);
970 }
971
972 int
973 mlock(__unused proc_t p, struct mlock_args *uap, __unused register_t *retvalval)
974 {
975 vm_map_t user_map;
976 vm_map_offset_t addr;
977 vm_map_size_t size, pageoff;
978 kern_return_t result;
979
980 AUDIT_ARG(addr, uap->addr);
981 AUDIT_ARG(len, uap->len);
982
983 addr = (vm_map_offset_t) uap->addr;
984 size = (vm_map_size_t)uap->len;
985
986 /* disable wrap around */
987 if (addr + size < addr)
988 return (EINVAL);
989
990 if (size == 0)
991 return (0);
992
993 pageoff = (addr & PAGE_MASK);
994 addr -= pageoff;
995 size = vm_map_round_page(size+pageoff);
996 user_map = current_map();
997
998 /* have to call vm_map_wire directly to pass "I don't know" protections */
999 result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE, TRUE);
1000
1001 if (result == KERN_RESOURCE_SHORTAGE)
1002 return EAGAIN;
1003 else if (result != KERN_SUCCESS)
1004 return ENOMEM;
1005
1006 return 0; /* KERN_SUCCESS */
1007 }
1008
1009 int
1010 munlock(__unused proc_t p, struct munlock_args *uap, __unused register_t *retval)
1011 {
1012 mach_vm_offset_t addr;
1013 mach_vm_size_t size;
1014 vm_map_t user_map;
1015 kern_return_t result;
1016
1017 AUDIT_ARG(addr, uap->addr);
1018 AUDIT_ARG(addr, uap->len);
1019
1020 addr = (mach_vm_offset_t) uap->addr;
1021 size = (mach_vm_size_t)uap->len;
1022 user_map = current_map();
1023
1024 /* JMM - need to remove all wirings by spec - this just removes one */
1025 result = mach_vm_wire(host_priv_self(), user_map, addr, size, VM_PROT_NONE);
1026 return (result == KERN_SUCCESS ? 0 : ENOMEM);
1027 }
1028
1029
1030 int
1031 mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused register_t *retval)
1032 {
1033 return (ENOSYS);
1034 }
1035
1036 int
1037 munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused register_t *retval)
1038 {
1039 return(ENOSYS);
1040 }
1041
1042
1043 /* BEGIN DEFUNCT */
1044 int
1045 obreak(__unused proc_t p, __unused struct obreak_args *uap, __unused register_t *retval)
1046 {
1047 /* Not implemented, obsolete */
1048 return (ENOMEM);
1049 }
1050
1051 int both;
1052
1053 int
1054 ovadvise(__unused proc_t p, __unused struct ovadvise_args *uap, __unused register_t *retval)
1055 {
1056
1057 #ifdef lint
1058 both = 0;
1059 #endif
1060 return( 0 );
1061 }
1062 /* END DEFUNCT */
1063
1064 /* USV: No! need to obsolete map_fd()! mmap() already supports 64 bits */
1065 kern_return_t
1066 map_fd(struct map_fd_args *args)
1067 {
1068 int fd = args->fd;
1069 vm_offset_t offset = args->offset;
1070 vm_offset_t *va = args->va;
1071 boolean_t findspace = args->findspace;
1072 vm_size_t size = args->size;
1073 kern_return_t ret;
1074
1075 AUDIT_MACH_SYSCALL_ENTER(AUE_MAPFD);
1076 AUDIT_ARG(addr, CAST_DOWN(user_addr_t, va));
1077 AUDIT_ARG(fd, fd);
1078
1079 ret = map_fd_funneled( fd, (vm_object_offset_t)offset, va, findspace, size);
1080
1081 AUDIT_MACH_SYSCALL_EXIT(ret);
1082 return ret;
1083 }
1084
1085 kern_return_t
1086 map_fd_funneled(
1087 int fd,
1088 vm_object_offset_t offset,
1089 vm_offset_t *va,
1090 boolean_t findspace,
1091 vm_size_t size)
1092 {
1093 kern_return_t result;
1094 struct fileproc *fp;
1095 struct vnode *vp;
1096 void * pager;
1097 vm_offset_t map_addr=0;
1098 vm_size_t map_size;
1099 int err=0;
1100 vm_map_t my_map;
1101 proc_t p = current_proc();
1102 struct vnode_attr vattr;
1103
1104 /*
1105 * Find the inode; verify that it's a regular file.
1106 */
1107
1108 err = fp_lookup(p, fd, &fp, 0);
1109 if (err)
1110 return(err);
1111
1112 if (fp->f_fglob->fg_type != DTYPE_VNODE){
1113 err = KERN_INVALID_ARGUMENT;
1114 goto bad;
1115 }
1116
1117 if (!(fp->f_fglob->fg_flag & FREAD)) {
1118 err = KERN_PROTECTION_FAILURE;
1119 goto bad;
1120 }
1121
1122 vp = (struct vnode *)fp->f_fglob->fg_data;
1123 err = vnode_getwithref(vp);
1124 if(err != 0)
1125 goto bad;
1126
1127 if (vp->v_type != VREG) {
1128 (void)vnode_put(vp);
1129 err = KERN_INVALID_ARGUMENT;
1130 goto bad;
1131 }
1132
1133 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1134
1135 /*
1136 * POSIX: mmap needs to update access time for mapped files
1137 */
1138 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
1139 VATTR_INIT(&vattr);
1140 nanotime(&vattr.va_access_time);
1141 VATTR_SET_ACTIVE(&vattr, va_access_time);
1142 vnode_setattr(vp, &vattr, vfs_context_current());
1143 }
1144
1145 if (offset & PAGE_MASK_64) {
1146 printf("map_fd: file offset not page aligned(%d : %s)\n",p->p_pid, p->p_comm);
1147 (void)vnode_put(vp);
1148 err = KERN_INVALID_ARGUMENT;
1149 goto bad;
1150 }
1151 map_size = round_page(size);
1152
1153 /*
1154 * Allow user to map in a zero length file.
1155 */
1156 if (size == 0) {
1157 (void)vnode_put(vp);
1158 err = KERN_SUCCESS;
1159 goto bad;
1160 }
1161 /*
1162 * Map in the file.
1163 */
1164 pager = (void *)ubc_getpager(vp);
1165 if (pager == NULL) {
1166 (void)vnode_put(vp);
1167 err = KERN_FAILURE;
1168 goto bad;
1169 }
1170
1171
1172 my_map = current_map();
1173
1174 result = vm_map_64(
1175 my_map,
1176 &map_addr, map_size, (vm_offset_t)0,
1177 VM_FLAGS_ANYWHERE, pager, offset, TRUE,
1178 VM_PROT_DEFAULT, VM_PROT_ALL,
1179 VM_INHERIT_DEFAULT);
1180 if (result != KERN_SUCCESS) {
1181 (void)vnode_put(vp);
1182 err = result;
1183 goto bad;
1184 }
1185
1186
1187 if (!findspace) {
1188 vm_offset_t dst_addr;
1189 vm_map_copy_t tmp;
1190
1191 if (copyin(CAST_USER_ADDR_T(va), &dst_addr, sizeof (dst_addr)) ||
1192 trunc_page_32(dst_addr) != dst_addr) {
1193 (void) vm_map_remove(
1194 my_map,
1195 map_addr, map_addr + map_size,
1196 VM_MAP_NO_FLAGS);
1197 (void)vnode_put(vp);
1198 err = KERN_INVALID_ADDRESS;
1199 goto bad;
1200 }
1201
1202 result = vm_map_copyin(my_map, (vm_map_address_t)map_addr,
1203 (vm_map_size_t)map_size, TRUE, &tmp);
1204 if (result != KERN_SUCCESS) {
1205
1206 (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr),
1207 vm_map_round_page(map_addr + map_size),
1208 VM_MAP_NO_FLAGS);
1209 (void)vnode_put(vp);
1210 err = result;
1211 goto bad;
1212 }
1213
1214 result = vm_map_copy_overwrite(my_map,
1215 (vm_map_address_t)dst_addr, tmp, FALSE);
1216 if (result != KERN_SUCCESS) {
1217 vm_map_copy_discard(tmp);
1218 (void)vnode_put(vp);
1219 err = result;
1220 goto bad;
1221 }
1222 } else {
1223 if (copyout(&map_addr, CAST_USER_ADDR_T(va), sizeof (map_addr))) {
1224 (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr),
1225 vm_map_round_page(map_addr + map_size),
1226 VM_MAP_NO_FLAGS);
1227 (void)vnode_put(vp);
1228 err = KERN_INVALID_ADDRESS;
1229 goto bad;
1230 }
1231 }
1232
1233 ubc_setthreadcred(vp, current_proc(), current_thread());
1234 (void)ubc_map(vp, (PROT_READ | PROT_EXEC));
1235 (void)vnode_put(vp);
1236 err = 0;
1237 bad:
1238 fp_drop(p, fd, fp, 0);
1239 return (err);
1240 }
1241