]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/kern_mman.c
xnu-1228.12.14.tar.gz
[apple/xnu.git] / bsd / kern / kern_mman.c
CommitLineData
1c79356b 1/*
2d21ac55
A
2 * Copyright (c) 2007 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
1c79356b 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
8f6c56a5 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1988 University of Utah.
30 * Copyright (c) 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * the Systems Programming Group of the University of Utah Computer
35 * Science Department.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66 *
67 * @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95
68 */
2d21ac55
A
69/*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
1c79356b
A
75
76/*
77 * Mapped file (mmap) interface to VM
78 */
79
80#include <sys/param.h>
81#include <sys/systm.h>
82#include <sys/filedesc.h>
91447636
A
83#include <sys/proc_internal.h>
84#include <sys/kauth.h>
1c79356b 85#include <sys/resourcevar.h>
91447636 86#include <sys/vnode_internal.h>
1c79356b
A
87#include <sys/acct.h>
88#include <sys/wait.h>
91447636 89#include <sys/file_internal.h>
1c79356b
A
90#include <sys/vadvise.h>
91#include <sys/trace.h>
92#include <sys/mman.h>
93#include <sys/conf.h>
94#include <sys/stat.h>
95#include <sys/ubc.h>
2d21ac55 96#include <sys/ubc_internal.h>
91447636 97#include <sys/sysproto.h>
1c79356b 98
2d21ac55
A
99#include <sys/syscall.h>
100#include <sys/kdebug.h>
101
e5568f75
A
102#include <bsm/audit_kernel.h>
103#include <bsm/audit_kevents.h>
104
1c79356b 105#include <mach/mach_types.h>
91447636
A
106#include <mach/mach_traps.h>
107#include <mach/vm_sync.h>
108#include <mach/vm_behavior.h>
109#include <mach/vm_inherit.h>
110#include <mach/vm_statistics.h>
111#include <mach/mach_vm.h>
112#include <mach/vm_map.h>
113#include <mach/host_priv.h>
1c79356b
A
114
115#include <kern/cpu_number.h>
91447636 116#include <kern/host.h>
1c79356b
A
117
118#include <vm/vm_map.h>
119#include <vm/vm_kern.h>
120#include <vm/vm_pager.h>
121
2d21ac55
A
122struct osmmap_args {
123 caddr_t addr;
124 int len;
125 int prot;
126 int share;
127 int fd;
128 long pos;
129};
130
131/* XXX the following function should probably be static */
132kern_return_t map_fd_funneled(int, vm_object_offset_t, vm_offset_t *,
133 boolean_t, vm_size_t);
134
135/* XXX the following two functions aren't used anywhere */
136int osmmap(proc_t , struct osmmap_args *, register_t *);
137int mremap(void);
138
1c79356b 139int
2d21ac55 140sbrk(__unused proc_t p, __unused struct sbrk_args *uap, __unused register_t *retval)
1c79356b
A
141{
142 /* Not yet implemented */
91447636 143 return (ENOTSUP);
1c79356b
A
144}
145
1c79356b 146int
2d21ac55 147sstk(__unused proc_t p, __unused struct sstk_args *uap, __unused register_t *retval)
1c79356b
A
148{
149 /* Not yet implemented */
91447636 150 return (ENOTSUP);
1c79356b
A
151}
152
1c79356b 153
55e303ae 154int
91447636 155osmmap(
2d21ac55
A
156 proc_t curp,
157 struct osmmap_args *uap,
91447636 158 register_t *retval)
1c79356b 159{
91447636
A
160 struct mmap_args newargs;
161 user_addr_t addr;
162 int ret;
1c79356b
A
163
164 if ((uap->share == MAP_SHARED )|| (uap->share == MAP_PRIVATE )) {
91447636
A
165 newargs.addr = CAST_USER_ADDR_T(uap->addr);
166 newargs.len = CAST_USER_ADDR_T(uap->len);
1c79356b
A
167 newargs.prot = uap->prot;
168 newargs.flags = uap->share;
169 newargs.fd = uap->fd;
170 newargs.pos = (off_t)uap->pos;
91447636
A
171 ret = mmap(curp, &newargs, &addr);
172 if (ret == 0)
173 *retval = CAST_DOWN(register_t, addr);
1c79356b 174 } else
91447636
A
175 ret = EINVAL;
176 return ret;
1c79356b
A
177}
178
91447636 179
2d21ac55
A
180/*
181 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
182 * XXX usage is PROT_* from an interface perspective. Thus the values of
183 * XXX VM_PROT_* and PROT_* need to correspond.
184 */
1c79356b 185int
2d21ac55 186mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
1c79356b
A
187{
188 /*
189 * Map in special device (must be SHARED) or file
190 */
91447636 191 struct fileproc *fp;
1c79356b
A
192 register struct vnode *vp;
193 int flags;
593a1d5f 194 int prot;
1c79356b
A
195 int err=0;
196 vm_map_t user_map;
197 kern_return_t result;
91447636
A
198 mach_vm_offset_t user_addr;
199 mach_vm_size_t user_size;
200 vm_object_offset_t pageoff;
1c79356b 201 vm_object_offset_t file_pos;
2d21ac55 202 int alloc_flags=0;
91447636 203 boolean_t docow;
1c79356b
A
204 vm_prot_t maxprot;
205 void *handle;
206 vm_pager_t pager;
207 int mapanon=0;
91447636
A
208 int fpref=0;
209 int error =0;
210 int fd = uap->fd;
1c79356b 211
91447636
A
212 user_addr = (mach_vm_offset_t)uap->addr;
213 user_size = (mach_vm_size_t) uap->len;
214
215 AUDIT_ARG(addr, user_addr);
216 AUDIT_ARG(len, user_size);
e5568f75
A
217 AUDIT_ARG(fd, uap->fd);
218
1c79356b 219 prot = (uap->prot & VM_PROT_ALL);
2d21ac55
A
220#if 3777787
221 /*
222 * Since the hardware currently does not support writing without
223 * read-before-write, or execution-without-read, if the request is
224 * for write or execute access, we must imply read access as well;
225 * otherwise programs expecting this to work will fail to operate.
226 */
227 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
228 prot |= VM_PROT_READ;
229#endif /* radar 3777787 */
230
1c79356b 231 flags = uap->flags;
91447636 232 vp = NULLVP;
1c79356b
A
233
234 /*
235 * The vm code does not have prototypes & compiler doesn't do the'
236 * the right thing when you cast 64bit value and pass it in function
237 * call. So here it is.
238 */
239 file_pos = (vm_object_offset_t)uap->pos;
240
241
242 /* make sure mapping fits into numeric range etc */
2d21ac55 243 if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64)
1c79356b
A
244 return (EINVAL);
245
246 /*
247 * Align the file position to a page boundary,
248 * and save its page offset component.
249 */
91447636 250 pageoff = (file_pos & PAGE_MASK);
1c79356b
A
251 file_pos -= (vm_object_offset_t)pageoff;
252
253
254 /* Adjust size for rounding (on both ends). */
255 user_size += pageoff; /* low end... */
91447636 256 user_size = mach_vm_round_page(user_size); /* hi end */
1c79356b
A
257
258
259 /*
260 * Check for illegal addresses. Watch out for address wrap... Note
261 * that VM_*_ADDRESS are not constants due to casts (argh).
262 */
263 if (flags & MAP_FIXED) {
264 /*
265 * The specified address must have the same remainder
266 * as the file offset taken modulo PAGE_SIZE, so it
267 * should be aligned after adjustment by pageoff.
268 */
269 user_addr -= pageoff;
270 if (user_addr & PAGE_MASK)
2d21ac55 271 return (EINVAL);
1c79356b
A
272 }
273#ifdef notyet
274 /* DO not have apis to get this info, need to wait till then*/
275 /*
276 * XXX for non-fixed mappings where no hint is provided or
277 * the hint would fall in the potential heap space,
278 * place it after the end of the largest possible heap.
279 *
280 * There should really be a pmap call to determine a reasonable
281 * location.
282 */
91447636
A
283 else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
284 addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
1c79356b
A
285
286#endif
287
2d21ac55 288 alloc_flags = 0;
1c79356b
A
289
290 if (flags & MAP_ANON) {
291 /*
2d21ac55
A
292 * Mapping blank space is trivial. Use positive fds as the alias
293 * value for memory tracking.
1c79356b 294 */
2d21ac55
A
295 if (fd != -1) {
296 /*
297 * Use "fd" to pass (some) Mach VM allocation flags,
298 * (see the VM_FLAGS_* definitions).
299 */
300 alloc_flags = fd & (VM_FLAGS_ALIAS_MASK |
301 VM_FLAGS_PURGABLE);
302 if (alloc_flags != fd) {
303 /* reject if there are any extra flags */
304 return EINVAL;
305 }
306 }
307
1c79356b
A
308 handle = NULL;
309 maxprot = VM_PROT_ALL;
310 file_pos = 0;
311 mapanon = 1;
312 } else {
91447636 313 struct vnode_attr va;
2d21ac55
A
314 vfs_context_t ctx = vfs_context_current();
315
1c79356b
A
316 /*
317 * Mapping file, get fp for validation. Obtain vnode and make
318 * sure it is of appropriate type.
319 */
91447636 320 err = fp_lookup(p, fd, &fp, 0);
1c79356b
A
321 if (err)
322 return(err);
91447636
A
323 fpref = 1;
324 if(fp->f_fglob->fg_type == DTYPE_PSXSHM) {
325 uap->addr = (user_addr_t)user_addr;
326 uap->len = (user_size_t)user_size;
1c79356b
A
327 uap->prot = prot;
328 uap->flags = flags;
329 uap->pos = file_pos;
91447636
A
330 error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
331 goto bad;
1c79356b
A
332 }
333
91447636
A
334 if (fp->f_fglob->fg_type != DTYPE_VNODE) {
335 error = EINVAL;
336 goto bad;
337 }
338 vp = (struct vnode *)fp->f_fglob->fg_data;
339 error = vnode_getwithref(vp);
340 if(error != 0)
341 goto bad;
342
343 if (vp->v_type != VREG && vp->v_type != VCHR) {
344 (void)vnode_put(vp);
345 error = EINVAL;
346 goto bad;
347 }
e5568f75
A
348
349 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
91447636 350
2d21ac55
A
351 /*
352 * POSIX: mmap needs to update access time for mapped files
91447636 353 */
2d21ac55
A
354 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
355 VATTR_INIT(&va);
356 nanotime(&va.va_access_time);
357 VATTR_SET_ACTIVE(&va, va_access_time);
358 vnode_setattr(vp, &va, ctx);
359 }
360
1c79356b
A
361 /*
362 * XXX hack to handle use of /dev/zero to map anon memory (ala
363 * SunOS).
364 */
365 if (vp->v_type == VCHR || vp->v_type == VSTR) {
91447636
A
366 (void)vnode_put(vp);
367 error = ENODEV;
368 goto bad;
1c79356b
A
369 } else {
370 /*
371 * Ensure that file and memory protections are
372 * compatible. Note that we only worry about
373 * writability if mapping is shared; in this case,
374 * current and max prot are dictated by the open file.
375 * XXX use the vnode instead? Problem is: what
376 * credentials do we use for determination? What if
377 * proc does a setuid?
378 */
379 maxprot = VM_PROT_EXECUTE; /* ??? */
91447636 380 if (fp->f_fglob->fg_flag & FREAD)
1c79356b 381 maxprot |= VM_PROT_READ;
91447636
A
382 else if (prot & PROT_READ) {
383 (void)vnode_put(vp);
384 error = EACCES;
385 goto bad;
386 }
1c79356b
A
387 /*
388 * If we are sharing potential changes (either via
389 * MAP_SHARED or via the implicit sharing of character
390 * device mappings), and we are trying to get write
391 * permission although we opened it without asking
392 * for it, bail out.
393 */
394
395 if ((flags & MAP_SHARED) != 0) {
91447636
A
396 if ((fp->f_fglob->fg_flag & FWRITE) != 0) {
397 /*
398 * check for write access
399 *
400 * Note that we already made this check when granting FWRITE
401 * against the file, so it seems redundant here.
402 */
2d21ac55 403 error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
91447636
A
404
405 /* if not granted for any reason, but we wanted it, bad */
406 if ((prot & PROT_WRITE) && (error != 0)) {
407 vnode_put(vp);
408 goto bad;
409 }
410
411 /* if writable, remember */
412 if (error == 0)
413 maxprot |= VM_PROT_WRITE;
414
415 } else if ((prot & PROT_WRITE) != 0) {
416 (void)vnode_put(vp);
417 error = EACCES;
418 goto bad;
419 }
1c79356b
A
420 } else
421 maxprot |= VM_PROT_WRITE;
422
423 handle = (void *)vp;
2d21ac55
A
424#if CONFIG_MACF
425 error = mac_file_check_mmap(vfs_context_ucred(ctx),
426 fp->f_fglob, prot, flags, &maxprot);
427 if (error) {
428 (void)vnode_put(vp);
429 goto bad;
430 }
431#endif /* MAC */
1c79356b
A
432 }
433 }
434
91447636
A
435 if (user_size == 0) {
436 if (!mapanon)
437 (void)vnode_put(vp);
438 error = 0;
439 goto bad;
440 }
1c79356b
A
441
442 /*
443 * We bend a little - round the start and end addresses
444 * to the nearest page boundary.
445 */
91447636 446 user_size = mach_vm_round_page(user_size);
1c79356b 447
91447636
A
448 if (file_pos & PAGE_MASK_64) {
449 if (!mapanon)
450 (void)vnode_put(vp);
451 error = EINVAL;
452 goto bad;
453 }
1c79356b
A
454
455 user_map = current_map();
456
457 if ((flags & MAP_FIXED) == 0) {
2d21ac55 458 alloc_flags |= VM_FLAGS_ANYWHERE;
91447636 459 user_addr = mach_vm_round_page(user_addr);
1c79356b 460 } else {
91447636
A
461 if (user_addr != mach_vm_trunc_page(user_addr)) {
462 if (!mapanon)
463 (void)vnode_put(vp);
464 error = EINVAL;
465 goto bad;
466 }
467 /*
468 * mmap(MAP_FIXED) will replace any existing mappings in the
469 * specified range, if the new mapping is successful.
470 * If we just deallocate the specified address range here,
471 * another thread might jump in and allocate memory in that
472 * range before we get a chance to establish the new mapping,
473 * and we won't have a chance to restore the old mappings.
474 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
475 * has to deallocate the existing mappings and establish the
476 * new ones atomically.
477 */
2d21ac55 478 alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
1c79356b
A
479 }
480
2d21ac55
A
481 if (flags & MAP_NOCACHE)
482 alloc_flags |= VM_FLAGS_NO_CACHE;
1c79356b
A
483
484 /*
485 * Lookup/allocate object.
486 */
1c79356b
A
487 if (handle == NULL) {
488 pager = NULL;
489#ifdef notyet
490/* Hmm .. */
491#if defined(VM_PROT_READ_IS_EXEC)
492 if (prot & VM_PROT_READ)
493 prot |= VM_PROT_EXECUTE;
1c79356b
A
494 if (maxprot & VM_PROT_READ)
495 maxprot |= VM_PROT_EXECUTE;
496#endif
497#endif
2d21ac55
A
498
499#if 3777787
500 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
501 prot |= VM_PROT_READ;
502 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
503 maxprot |= VM_PROT_READ;
504#endif /* radar 3777787 */
505
506 result = vm_map_enter_mem_object(user_map,
507 &user_addr, user_size,
508 0, alloc_flags,
509 IPC_PORT_NULL, 0, FALSE,
510 prot, maxprot,
511 (flags & MAP_SHARED) ?
512 VM_INHERIT_SHARE :
513 VM_INHERIT_DEFAULT);
1c79356b
A
514 if (result != KERN_SUCCESS)
515 goto out;
1c79356b 516 } else {
55e303ae 517 pager = (vm_pager_t)ubc_getpager(vp);
1c79356b 518
91447636
A
519 if (pager == NULL) {
520 (void)vnode_put(vp);
521 error = ENOMEM;
522 goto bad;
523 }
1c79356b
A
524
525 /*
526 * Set credentials:
527 * FIXME: if we're writing the file we need a way to
528 * ensure that someone doesn't replace our R/W creds
529 * with ones that only work for read.
530 */
531
13fec989 532 ubc_setthreadcred(vp, p, current_thread());
1c79356b
A
533 docow = FALSE;
534 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
535 docow = TRUE;
536 }
537
538#ifdef notyet
539/* Hmm .. */
540#if defined(VM_PROT_READ_IS_EXEC)
541 if (prot & VM_PROT_READ)
542 prot |= VM_PROT_EXECUTE;
1c79356b
A
543 if (maxprot & VM_PROT_READ)
544 maxprot |= VM_PROT_EXECUTE;
545#endif
546#endif /* notyet */
547
2d21ac55
A
548#if 3777787
549 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
550 prot |= VM_PROT_READ;
551 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
552 maxprot |= VM_PROT_READ;
553#endif /* radar 3777787 */
554
555 result = vm_map_enter_mem_object(user_map,
556 &user_addr, user_size,
557 0, alloc_flags,
558 (ipc_port_t)pager, file_pos,
559 docow, prot, maxprot,
560 (flags & MAP_SHARED) ?
561 VM_INHERIT_SHARE :
562 VM_INHERIT_DEFAULT);
1c79356b 563
91447636
A
564 if (result != KERN_SUCCESS) {
565 (void)vnode_put(vp);
1c79356b 566 goto out;
91447636 567 }
1c79356b
A
568 }
569
91447636
A
570 if (!mapanon)
571 (void)vnode_put(vp);
1c79356b
A
572
573out:
574 switch (result) {
575 case KERN_SUCCESS:
91447636
A
576 *retval = user_addr + pageoff;
577 error = 0;
578 break;
1c79356b
A
579 case KERN_INVALID_ADDRESS:
580 case KERN_NO_SPACE:
91447636
A
581 error = ENOMEM;
582 break;
1c79356b 583 case KERN_PROTECTION_FAILURE:
91447636
A
584 error = EACCES;
585 break;
1c79356b 586 default:
91447636
A
587 error = EINVAL;
588 break;
1c79356b 589 }
91447636
A
590bad:
591 if (fpref)
592 fp_drop(p, fd, fp, 0);
2d21ac55
A
593
594 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
595 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
596 (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
597
91447636 598 return(error);
1c79356b
A
599}
600
1c79356b 601int
2d21ac55
A
602msync(__unused proc_t p, struct msync_args *uap, register_t *retval)
603{
604 __pthread_testcancel(1);
605 return(msync_nocancel(p, (struct msync_nocancel_args *)uap, retval));
606}
607
608int
609msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused register_t *retval)
1c79356b 610{
91447636
A
611 mach_vm_offset_t addr;
612 mach_vm_size_t size;
1c79356b
A
613 int flags;
614 vm_map_t user_map;
615 int rv;
616 vm_sync_t sync_flags=0;
617
91447636
A
618 addr = (mach_vm_offset_t) uap->addr;
619 size = (mach_vm_size_t)uap->len;
1c79356b 620
2d21ac55
A
621 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
622
91447636
A
623 if (addr & PAGE_MASK_64) {
624 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
625 return EINVAL;
626 }
1c79356b
A
627 if (size == 0) {
628 /*
629 * We cannot support this properly without maintaining
630 * list all mmaps done. Cannot use vm_map_entry as they could be
631 * split or coalesced by indepenedant actions. So instead of
632 * inaccurate results, lets just return error as invalid size
633 * specified
634 */
55e303ae 635 return (EINVAL); /* XXX breaks posix apps */
1c79356b
A
636 }
637
91447636
A
638 flags = uap->flags;
639 /* disallow contradictory flags */
2d21ac55 640 if ((flags & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))
91447636
A
641 return (EINVAL);
642
1c79356b
A
643 if (flags & MS_KILLPAGES)
644 sync_flags |= VM_SYNC_KILLPAGES;
645 if (flags & MS_DEACTIVATE)
646 sync_flags |= VM_SYNC_DEACTIVATE;
647 if (flags & MS_INVALIDATE)
648 sync_flags |= VM_SYNC_INVALIDATE;
649
650 if ( !(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
651 if (flags & MS_ASYNC)
652 sync_flags |= VM_SYNC_ASYNCHRONOUS;
653 else
654 sync_flags |= VM_SYNC_SYNCHRONOUS;
655 }
91447636
A
656
657 sync_flags |= VM_SYNC_CONTIGUOUS; /* complain if holes */
658
659 user_map = current_map();
660 rv = mach_vm_msync(user_map, addr, size, sync_flags);
1c79356b
A
661
662 switch (rv) {
663 case KERN_SUCCESS:
664 break;
91447636
A
665 case KERN_INVALID_ADDRESS: /* hole in region being sync'ed */
666 return (ENOMEM);
1c79356b
A
667 case KERN_FAILURE:
668 return (EIO);
669 default:
670 return (EINVAL);
671 }
1c79356b 672 return (0);
1c79356b
A
673}
674
675
55e303ae 676int
91447636 677mremap(void)
1c79356b
A
678{
679 /* Not yet implemented */
91447636 680 return (ENOTSUP);
1c79356b
A
681}
682
55e303ae 683int
2d21ac55 684munmap(__unused proc_t p, struct munmap_args *uap, __unused register_t *retval)
1c79356b 685{
91447636
A
686 mach_vm_offset_t user_addr;
687 mach_vm_size_t user_size;
1c79356b
A
688 kern_return_t result;
689
91447636
A
690 user_addr = (mach_vm_offset_t) uap->addr;
691 user_size = (mach_vm_size_t) uap->len;
1c79356b 692
91447636
A
693 AUDIT_ARG(addr, user_addr);
694 AUDIT_ARG(len, user_size);
e5568f75 695
91447636
A
696 if (user_addr & PAGE_MASK_64) {
697 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
698 return EINVAL;
699 }
1c79356b 700
1c79356b
A
701 if (user_addr + user_size < user_addr)
702 return(EINVAL);
703
91447636
A
704 if (user_size == 0) {
705 /* UNIX SPEC: size is 0, return EINVAL */
706 return EINVAL;
707 }
1c79356b 708
91447636 709 result = mach_vm_deallocate(current_map(), user_addr, user_size);
1c79356b
A
710 if (result != KERN_SUCCESS) {
711 return(EINVAL);
712 }
713 return(0);
714}
715
1c79356b 716int
2d21ac55 717mprotect(__unused proc_t p, struct mprotect_args *uap, __unused register_t *retval)
1c79356b
A
718{
719 register vm_prot_t prot;
91447636
A
720 mach_vm_offset_t user_addr;
721 mach_vm_size_t user_size;
1c79356b
A
722 kern_return_t result;
723 vm_map_t user_map;
2d21ac55
A
724#if CONFIG_MACF
725 int error;
726#endif
1c79356b 727
e5568f75
A
728 AUDIT_ARG(addr, uap->addr);
729 AUDIT_ARG(len, uap->len);
730 AUDIT_ARG(value, uap->prot);
91447636
A
731
732 user_addr = (mach_vm_offset_t) uap->addr;
733 user_size = (mach_vm_size_t) uap->len;
1c79356b
A
734 prot = (vm_prot_t)(uap->prot & VM_PROT_ALL);
735
91447636
A
736 if (user_addr & PAGE_MASK_64) {
737 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
738 return EINVAL;
739 }
740
1c79356b
A
741#ifdef notyet
742/* Hmm .. */
743#if defined(VM_PROT_READ_IS_EXEC)
744 if (prot & VM_PROT_READ)
745 prot |= VM_PROT_EXECUTE;
746#endif
747#endif /* notyet */
748
2d21ac55
A
749#if 3936456
750 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
751 prot |= VM_PROT_READ;
752#endif /* 3936456 */
753
1c79356b
A
754 user_map = current_map();
755
2d21ac55
A
756#if CONFIG_MACF
757 /*
758 * The MAC check for mprotect is of limited use for 2 reasons:
759 * Without mmap revocation, the caller could have asked for the max
760 * protections initially instead of a reduced set, so a mprotect
761 * check would offer no new security.
762 * It is not possible to extract the vnode from the pager object(s)
763 * of the target memory range.
764 * However, the MAC check may be used to prevent a process from,
765 * e.g., making the stack executable.
766 */
767 error = mac_proc_check_mprotect(p, user_addr,
768 user_size, prot);
769 if (error)
770 return (error);
771#endif
91447636
A
772 result = mach_vm_protect(user_map, user_addr, user_size,
773 FALSE, prot);
1c79356b
A
774 switch (result) {
775 case KERN_SUCCESS:
776 return (0);
777 case KERN_PROTECTION_FAILURE:
778 return (EACCES);
91447636
A
779 case KERN_INVALID_ADDRESS:
780 /* UNIX SPEC: for an invalid address range, return ENOMEM */
781 return ENOMEM;
1c79356b
A
782 }
783 return (EINVAL);
784}
785
786
1c79356b 787int
2d21ac55 788minherit(__unused proc_t p, struct minherit_args *uap, __unused register_t *retval)
1c79356b 789{
91447636
A
790 mach_vm_offset_t addr;
791 mach_vm_size_t size;
1c79356b
A
792 register vm_inherit_t inherit;
793 vm_map_t user_map;
794 kern_return_t result;
795
e5568f75
A
796 AUDIT_ARG(addr, uap->addr);
797 AUDIT_ARG(len, uap->len);
798 AUDIT_ARG(value, uap->inherit);
1c79356b 799
91447636
A
800 addr = (mach_vm_offset_t)uap->addr;
801 size = (mach_vm_size_t)uap->len;
802 inherit = uap->inherit;
1c79356b
A
803
804 user_map = current_map();
91447636 805 result = mach_vm_inherit(user_map, addr, size,
1c79356b
A
806 inherit);
807 switch (result) {
808 case KERN_SUCCESS:
809 return (0);
810 case KERN_PROTECTION_FAILURE:
811 return (EACCES);
812 }
813 return (EINVAL);
814}
815
1c79356b 816int
2d21ac55 817madvise(__unused proc_t p, struct madvise_args *uap, __unused register_t *retval)
1c79356b
A
818{
819 vm_map_t user_map;
91447636
A
820 mach_vm_offset_t start;
821 mach_vm_size_t size;
1c79356b
A
822 vm_behavior_t new_behavior;
823 kern_return_t result;
824
1c79356b
A
825 /*
826 * Since this routine is only advisory, we default to conservative
827 * behavior.
828 */
1c79356b
A
829 switch (uap->behav) {
830 case MADV_RANDOM:
831 new_behavior = VM_BEHAVIOR_RANDOM;
9bccf70c 832 break;
1c79356b
A
833 case MADV_SEQUENTIAL:
834 new_behavior = VM_BEHAVIOR_SEQUENTIAL;
9bccf70c 835 break;
1c79356b 836 case MADV_NORMAL:
1c79356b 837 new_behavior = VM_BEHAVIOR_DEFAULT;
9bccf70c
A
838 break;
839 case MADV_WILLNEED:
840 new_behavior = VM_BEHAVIOR_WILLNEED;
841 break;
842 case MADV_DONTNEED:
843 new_behavior = VM_BEHAVIOR_DONTNEED;
844 break;
845 default:
846 return(EINVAL);
1c79356b
A
847 }
848
91447636
A
849 start = (mach_vm_offset_t) uap->addr;
850 size = (mach_vm_size_t) uap->len;
851
852 user_map = current_map();
853
854 result = mach_vm_behavior_set(user_map, start, size, new_behavior);
1c79356b
A
855 switch (result) {
856 case KERN_SUCCESS:
857 return (0);
858 case KERN_INVALID_ADDRESS:
91447636 859 return (ENOMEM);
1c79356b
A
860 }
861
862 return (EINVAL);
863}
864
1c79356b 865int
2d21ac55 866mincore(__unused proc_t p, struct mincore_args *uap, __unused register_t *retval)
1c79356b 867{
91447636 868 mach_vm_offset_t addr, first_addr, end;
1c79356b 869 vm_map_t map;
91447636 870 user_addr_t vec;
1c79356b
A
871 int error;
872 int vecindex, lastvecindex;
873 int mincoreinfo=0;
874 int pqueryinfo;
875 kern_return_t ret;
876 int numref;
877
91447636
A
878 char c;
879
1c79356b
A
880 map = current_map();
881
882 /*
883 * Make sure that the addresses presented are valid for user
884 * mode.
885 */
91447636
A
886 first_addr = addr = mach_vm_trunc_page(uap->addr);
887 end = addr + mach_vm_round_page(uap->len);
1c79356b 888
1c79356b
A
889 if (end < addr)
890 return (EINVAL);
891
892 /*
893 * Address of byte vector
894 */
895 vec = uap->vec;
896
897 map = current_map();
898
899 /*
900 * Do this on a map entry basis so that if the pages are not
901 * in the current processes address space, we can easily look
902 * up the pages elsewhere.
903 */
904 lastvecindex = -1;
91447636 905 for( ; addr < end; addr += PAGE_SIZE ) {
1c79356b 906 pqueryinfo = 0;
2d21ac55 907 ret = mach_vm_page_query(map, addr, &pqueryinfo, &numref);
1c79356b
A
908 if (ret != KERN_SUCCESS)
909 pqueryinfo = 0;
910 mincoreinfo = 0;
911 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT)
912 mincoreinfo |= MINCORE_INCORE;
913 if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF)
914 mincoreinfo |= MINCORE_REFERENCED;
915 if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY)
916 mincoreinfo |= MINCORE_MODIFIED;
917
918
919 /*
920 * calculate index into user supplied byte vector
921 */
922 vecindex = (addr - first_addr)>> PAGE_SHIFT;
923
924 /*
925 * If we have skipped map entries, we need to make sure that
926 * the byte vector is zeroed for those skipped entries.
927 */
928 while((lastvecindex + 1) < vecindex) {
91447636
A
929 c = 0;
930 error = copyout(&c, vec + lastvecindex, 1);
1c79356b
A
931 if (error) {
932 return (EFAULT);
933 }
934 ++lastvecindex;
935 }
936
937 /*
938 * Pass the page information to the user
939 */
91447636
A
940 c = (char)mincoreinfo;
941 error = copyout(&c, vec + vecindex, 1);
1c79356b
A
942 if (error) {
943 return (EFAULT);
944 }
945 lastvecindex = vecindex;
946 }
947
948
949 /*
950 * Zero the last entries in the byte vector.
951 */
952 vecindex = (end - first_addr) >> PAGE_SHIFT;
953 while((lastvecindex + 1) < vecindex) {
91447636
A
954 c = 0;
955 error = copyout(&c, vec + lastvecindex, 1);
1c79356b
A
956 if (error) {
957 return (EFAULT);
958 }
959 ++lastvecindex;
960 }
961
962 return (0);
963}
964
1c79356b 965int
2d21ac55 966mlock(__unused proc_t p, struct mlock_args *uap, __unused register_t *retvalval)
1c79356b
A
967{
968 vm_map_t user_map;
91447636
A
969 vm_map_offset_t addr;
970 vm_map_size_t size, pageoff;
1c79356b
A
971 kern_return_t result;
972
e5568f75
A
973 AUDIT_ARG(addr, uap->addr);
974 AUDIT_ARG(len, uap->len);
1c79356b 975
91447636
A
976 addr = (vm_map_offset_t) uap->addr;
977 size = (vm_map_size_t)uap->len;
1c79356b
A
978
979 /* disable wrap around */
980 if (addr + size < addr)
981 return (EINVAL);
91447636
A
982
983 if (size == 0)
984 return (0);
985
986 pageoff = (addr & PAGE_MASK);
987 addr -= pageoff;
988 size = vm_map_round_page(size+pageoff);
1c79356b
A
989 user_map = current_map();
990
91447636
A
991 /* have to call vm_map_wire directly to pass "I don't know" protections */
992 result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE, TRUE);
2d21ac55
A
993
994 if (result == KERN_RESOURCE_SHORTAGE)
995 return EAGAIN;
996 else if (result != KERN_SUCCESS)
997 return ENOMEM;
998
999 return 0; /* KERN_SUCCESS */
1c79356b
A
1000}
1001
1c79356b 1002int
2d21ac55 1003munlock(__unused proc_t p, struct munlock_args *uap, __unused register_t *retval)
1c79356b 1004{
91447636
A
1005 mach_vm_offset_t addr;
1006 mach_vm_size_t size;
1c79356b
A
1007 vm_map_t user_map;
1008 kern_return_t result;
1009
e5568f75 1010 AUDIT_ARG(addr, uap->addr);
91447636 1011 AUDIT_ARG(addr, uap->len);
1c79356b 1012
91447636
A
1013 addr = (mach_vm_offset_t) uap->addr;
1014 size = (mach_vm_size_t)uap->len;
1c79356b
A
1015 user_map = current_map();
1016
91447636
A
1017 /* JMM - need to remove all wirings by spec - this just removes one */
1018 result = mach_vm_wire(host_priv_self(), user_map, addr, size, VM_PROT_NONE);
1c79356b
A
1019 return (result == KERN_SUCCESS ? 0 : ENOMEM);
1020}
1021
1022
1c79356b 1023int
2d21ac55 1024mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused register_t *retval)
1c79356b
A
1025{
1026 return (ENOSYS);
1027}
1028
1c79356b 1029int
2d21ac55 1030munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused register_t *retval)
1c79356b
A
1031{
1032 return(ENOSYS);
1033}
1034
1035
1036/* BEGIN DEFUNCT */
55e303ae 1037int
2d21ac55 1038obreak(__unused proc_t p, __unused struct obreak_args *uap, __unused register_t *retval)
1c79356b
A
1039{
1040 /* Not implemented, obsolete */
1041 return (ENOMEM);
1042}
1043
1044int both;
1045
55e303ae 1046int
2d21ac55 1047ovadvise(__unused proc_t p, __unused struct ovadvise_args *uap, __unused register_t *retval)
1c79356b
A
1048{
1049
1050#ifdef lint
1051 both = 0;
1052#endif
91447636 1053 return( 0 );
1c79356b
A
1054}
1055/* END DEFUNCT */
1c79356b 1056
55e303ae
A
1057/* USV: No! need to obsolete map_fd()! mmap() already supports 64 bits */
1058kern_return_t
91447636 1059map_fd(struct map_fd_args *args)
1c79356b 1060{
91447636
A
1061 int fd = args->fd;
1062 vm_offset_t offset = args->offset;
1063 vm_offset_t *va = args->va;
1064 boolean_t findspace = args->findspace;
1065 vm_size_t size = args->size;
1c79356b 1066 kern_return_t ret;
1c79356b 1067
e5568f75 1068 AUDIT_MACH_SYSCALL_ENTER(AUE_MAPFD);
91447636 1069 AUDIT_ARG(addr, CAST_DOWN(user_addr_t, va));
e5568f75
A
1070 AUDIT_ARG(fd, fd);
1071
91447636 1072 ret = map_fd_funneled( fd, (vm_object_offset_t)offset, va, findspace, size);
1c79356b 1073
e5568f75 1074 AUDIT_MACH_SYSCALL_EXIT(ret);
1c79356b
A
1075 return ret;
1076}
1077
55e303ae
A
1078kern_return_t
1079map_fd_funneled(
1c79356b
A
1080 int fd,
1081 vm_object_offset_t offset,
1082 vm_offset_t *va,
1083 boolean_t findspace,
1084 vm_size_t size)
1085{
1086 kern_return_t result;
91447636 1087 struct fileproc *fp;
1c79356b
A
1088 struct vnode *vp;
1089 void * pager;
1090 vm_offset_t map_addr=0;
1091 vm_size_t map_size;
1c79356b
A
1092 int err=0;
1093 vm_map_t my_map;
2d21ac55 1094 proc_t p = current_proc();
91447636 1095 struct vnode_attr vattr;
1c79356b
A
1096
1097 /*
1098 * Find the inode; verify that it's a regular file.
1099 */
1100
91447636 1101 err = fp_lookup(p, fd, &fp, 0);
1c79356b
A
1102 if (err)
1103 return(err);
1104
91447636
A
1105 if (fp->f_fglob->fg_type != DTYPE_VNODE){
1106 err = KERN_INVALID_ARGUMENT;
1107 goto bad;
1108 }
b4c24cb9 1109
91447636
A
1110 if (!(fp->f_fglob->fg_flag & FREAD)) {
1111 err = KERN_PROTECTION_FAILURE;
1112 goto bad;
1113 }
b4c24cb9 1114
91447636
A
1115 vp = (struct vnode *)fp->f_fglob->fg_data;
1116 err = vnode_getwithref(vp);
1117 if(err != 0)
1118 goto bad;
1c79356b 1119
91447636
A
1120 if (vp->v_type != VREG) {
1121 (void)vnode_put(vp);
1122 err = KERN_INVALID_ARGUMENT;
1123 goto bad;
1124 }
1c79356b 1125
e5568f75
A
1126 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1127
2d21ac55
A
1128 /*
1129 * POSIX: mmap needs to update access time for mapped files
91447636 1130 */
2d21ac55
A
1131 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
1132 VATTR_INIT(&vattr);
1133 nanotime(&vattr.va_access_time);
1134 VATTR_SET_ACTIVE(&vattr, va_access_time);
1135 vnode_setattr(vp, &vattr, vfs_context_current());
1136 }
1137
1c79356b 1138 if (offset & PAGE_MASK_64) {
9bccf70c 1139 printf("map_fd: file offset not page aligned(%d : %s)\n",p->p_pid, p->p_comm);
91447636
A
1140 (void)vnode_put(vp);
1141 err = KERN_INVALID_ARGUMENT;
1142 goto bad;
1c79356b 1143 }
91447636 1144 map_size = round_page(size);
1c79356b
A
1145
1146 /*
1147 * Allow user to map in a zero length file.
1148 */
91447636
A
1149 if (size == 0) {
1150 (void)vnode_put(vp);
1151 err = KERN_SUCCESS;
1152 goto bad;
1153 }
1c79356b
A
1154 /*
1155 * Map in the file.
1156 */
2d21ac55 1157 pager = (void *)ubc_getpager(vp);
91447636
A
1158 if (pager == NULL) {
1159 (void)vnode_put(vp);
1160 err = KERN_FAILURE;
1161 goto bad;
1162 }
1c79356b
A
1163
1164
1165 my_map = current_map();
1166
1167 result = vm_map_64(
1168 my_map,
91447636
A
1169 &map_addr, map_size, (vm_offset_t)0,
1170 VM_FLAGS_ANYWHERE, pager, offset, TRUE,
1c79356b
A
1171 VM_PROT_DEFAULT, VM_PROT_ALL,
1172 VM_INHERIT_DEFAULT);
91447636
A
1173 if (result != KERN_SUCCESS) {
1174 (void)vnode_put(vp);
1175 err = result;
1176 goto bad;
1177 }
1c79356b
A
1178
1179
1180 if (!findspace) {
1181 vm_offset_t dst_addr;
1182 vm_map_copy_t tmp;
1183
91447636 1184 if (copyin(CAST_USER_ADDR_T(va), &dst_addr, sizeof (dst_addr)) ||
55e303ae 1185 trunc_page_32(dst_addr) != dst_addr) {
1c79356b
A
1186 (void) vm_map_remove(
1187 my_map,
1188 map_addr, map_addr + map_size,
1189 VM_MAP_NO_FLAGS);
91447636
A
1190 (void)vnode_put(vp);
1191 err = KERN_INVALID_ADDRESS;
1192 goto bad;
1c79356b
A
1193 }
1194
91447636
A
1195 result = vm_map_copyin(my_map, (vm_map_address_t)map_addr,
1196 (vm_map_size_t)map_size, TRUE, &tmp);
1c79356b
A
1197 if (result != KERN_SUCCESS) {
1198
91447636
A
1199 (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr),
1200 vm_map_round_page(map_addr + map_size),
1c79356b 1201 VM_MAP_NO_FLAGS);
91447636
A
1202 (void)vnode_put(vp);
1203 err = result;
1204 goto bad;
1c79356b
A
1205 }
1206
91447636
A
1207 result = vm_map_copy_overwrite(my_map,
1208 (vm_map_address_t)dst_addr, tmp, FALSE);
1c79356b
A
1209 if (result != KERN_SUCCESS) {
1210 vm_map_copy_discard(tmp);
91447636
A
1211 (void)vnode_put(vp);
1212 err = result;
1213 goto bad;
1c79356b
A
1214 }
1215 } else {
91447636
A
1216 if (copyout(&map_addr, CAST_USER_ADDR_T(va), sizeof (map_addr))) {
1217 (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr),
1218 vm_map_round_page(map_addr + map_size),
1c79356b 1219 VM_MAP_NO_FLAGS);
91447636
A
1220 (void)vnode_put(vp);
1221 err = KERN_INVALID_ADDRESS;
1222 goto bad;
1c79356b
A
1223 }
1224 }
1225
13fec989 1226 ubc_setthreadcred(vp, current_proc(), current_thread());
91447636
A
1227 (void)vnode_put(vp);
1228 err = 0;
1229bad:
1230 fp_drop(p, fd, fp, 0);
1231 return (err);
1c79356b 1232}
91447636 1233