X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/e5568f75972dfc723778653c11cb6b4dc825716a..c6bf4f310a33a9262d455ea4d3f0630b1255e3fe:/bsd/kern/kern_mman.c diff --git a/bsd/kern/kern_mman.c b/bsd/kern/kern_mman.c index e234d8955..0fd0cc336 100644 --- a/bsd/kern/kern_mman.c +++ b/bsd/kern/kern_mman.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2007-2019 Apple Inc. All Rights Reserved. * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * Copyright (c) 1988 University of Utah. @@ -60,6 +66,12 @@ * * @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95 */ +/* + * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. + */ /* * Mapped file (mmap) interface to VM @@ -68,191 +80,203 @@ #include #include #include -#include +#include +#include #include -#include -#include +#include #include #include -#include +#include #include #include #include #include #include #include +#include +#include -#include +#include +#include +#include + +#include #include #include - -#include - -#include -#include -#include - +#include #include #include #include #include +#include +#include +#include +#include -struct sbrk_args { - int incr; -}; - -/* ARGSUSED */ -int -sbrk(p, uap, retval) - struct proc *p; - struct sbrk_args *uap; - register_t *retval; -{ - /* Not yet implemented */ - return (EOPNOTSUPP); -} - -struct sstk_args { - int incr; -} *uap; - -/* ARGSUSED */ -int -sstk(p, uap, retval) - struct proc *p; - struct sstk_args *uap; - register_t *retval; -{ - /* Not yet implemented */ - return (EOPNOTSUPP); -} +#include -#if COMPAT_43 -/* ARGSUSED */ -int -ogetpagesize(p, uap, retval) - struct proc *p; - void *uap; - register_t *retval; -{ +#include +#include +#include +#include - *retval = PAGE_SIZE; - return (0); -} -#endif /* COMPAT_43 */ +#include -struct osmmap_args { - caddr_t addr; - int len; - int prot; - int share; - int fd; - long pos; -}; +#include +#include +#include +#include -int -osmmap(curp, uap, retval) - struct proc *curp; - register struct osmmap_args *uap; - register_t *retval; -{ -struct mmap_args { - caddr_t addr; - size_t len; - int prot; - int flags; - int fd; -#ifdef DOUBLE_ALIGN_PARAMS - long pad; +#if CONFIG_MACF +#include #endif - off_t pos; -} newargs; - - if ((uap->share == MAP_SHARED )|| (uap->share == MAP_PRIVATE )) { - newargs.addr = uap->addr; - newargs.len = (size_t)uap->len; - newargs.prot = uap->prot; - newargs.flags = uap->share; - newargs.fd = uap->fd; - newargs.pos = (off_t)uap->pos; - return(mmap(curp,&newargs, retval)); - } else - return(EINVAL); -} +#include -struct mmap_args { - caddr_t addr; - size_t len; - int prot; - int flags; - int fd; -#ifdef DOUBLE_ALIGN_PARAMS - long pad; +#ifndef CONFIG_EMBEDDED +#include /* for IOTaskHasEntitlement */ +#include /* for csr_check */ +#define MAP_32BIT_ENTITLEMENT "com.apple.security.mmap-map-32bit" #endif - off_t pos; -}; + +/* + * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct + * XXX usage is PROT_* from an interface perspective. Thus the values of + * XXX VM_PROT_* and PROT_* need to correspond. + */ int -mmap(p, uap, retval) - struct proc *p; - struct mmap_args *uap; - register_t *retval; +mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) { /* * Map in special device (must be SHARED) or file */ - struct file *fp; - register struct vnode *vp; - int flags; - int prot; - int err=0; - vm_map_t user_map; - kern_return_t result; - vm_offset_t user_addr; - vm_size_t user_size; - vm_offset_t pageoff; - vm_object_offset_t file_pos; - boolean_t find_space, docow; - vm_prot_t maxprot; - void *handle; - vm_pager_t pager; - int mapanon=0; - - user_addr = (vm_offset_t)uap->addr; - user_size = (vm_size_t) uap->len; - AUDIT_ARG(addr, (void *)user_addr); - AUDIT_ARG(len, (int) user_size); + struct fileproc *fp; + struct vnode *vp; + int flags; + int prot; + int err = 0; + vm_map_t user_map; + kern_return_t result; + vm_map_offset_t user_addr; + vm_map_offset_t sum; + vm_map_size_t user_size; + vm_object_offset_t pageoff; + vm_object_offset_t file_pos; + int alloc_flags = 0; + vm_tag_t tag = VM_KERN_MEMORY_NONE; + vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE; + boolean_t docow; + vm_prot_t maxprot; + void *handle; + memory_object_t pager = MEMORY_OBJECT_NULL; + memory_object_control_t control; + int mapanon = 0; + int fpref = 0; + int error = 0; + int fd = uap->fd; + int num_retries = 0; + + /* + * Note that for UNIX03 conformance, there is additional parameter checking for + * mmap() system call in libsyscall prior to entering the kernel. The sanity + * checks and argument validation done in this function are not the only places + * one can get returned errnos. + */ + + user_map = current_map(); + user_addr = (vm_map_offset_t)uap->addr; + user_size = (vm_map_size_t) uap->len; + + AUDIT_ARG(addr, user_addr); + AUDIT_ARG(len, user_size); AUDIT_ARG(fd, uap->fd); + if (vm_map_range_overflows(user_addr, user_size)) { + return EINVAL; + } prot = (uap->prot & VM_PROT_ALL); +#if 3777787 + /* + * Since the hardware currently does not support writing without + * read-before-write, or execution-without-read, if the request is + * for write or execute access, we must imply read access as well; + * otherwise programs expecting this to work will fail to operate. + */ + if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) { + prot |= VM_PROT_READ; + } +#endif /* radar 3777787 */ + flags = uap->flags; + vp = NULLVP; /* - * The vm code does not have prototypes & compiler doesn't do the' - * the right thing when you cast 64bit value and pass it in function + * The vm code does not have prototypes & compiler doesn't do + * the right thing when you cast 64bit value and pass it in function * call. So here it is. */ file_pos = (vm_object_offset_t)uap->pos; /* make sure mapping fits into numeric range etc */ - if ((file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64) || - ((ssize_t) uap->len < 0 )|| - ((flags & MAP_ANON) && uap->fd != -1)) - return (EINVAL); + if (os_add3_overflow(file_pos, user_size, PAGE_SIZE_64 - 1, &sum)) { + return EINVAL; + } /* * Align the file position to a page boundary, * and save its page offset component. */ - pageoff = ((vm_offset_t)file_pos & PAGE_MASK); + pageoff = (file_pos & vm_map_page_mask(user_map)); file_pos -= (vm_object_offset_t)pageoff; /* Adjust size for rounding (on both ends). */ - user_size += pageoff; /* low end... */ - user_size = (vm_size_t) round_page_32(user_size); /* hi end */ + user_size += pageoff; /* low end... */ + user_size = vm_map_round_page(user_size, + vm_map_page_mask(user_map)); /* hi end */ + + if (flags & MAP_JIT) { + if ((flags & MAP_FIXED) || + (flags & MAP_SHARED) || + !(flags & MAP_ANON) || + (flags & MAP_RESILIENT_CODESIGN) || + (flags & MAP_RESILIENT_MEDIA)) { + return EINVAL; + } + } + if ((flags & MAP_RESILIENT_CODESIGN) || + (flags & MAP_RESILIENT_MEDIA)) { + if ((flags & MAP_ANON) || + (flags & MAP_JIT)) { + return EINVAL; + } + } + if (flags & MAP_RESILIENT_CODESIGN) { + if (prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) { + return EPERM; + } + } + if (flags & MAP_SHARED) { + /* + * MAP_RESILIENT_MEDIA is not valid with MAP_SHARED because + * there is no place to inject zero-filled pages without + * actually adding them to the file. + * Since we didn't reject that combination before, there might + * already be callers using it and getting a valid MAP_SHARED + * mapping but without the resilience. + * For backwards compatibility's sake, let's keep ignoring + * MAP_RESILIENT_MEDIA in that case. + */ + flags &= ~MAP_RESILIENT_MEDIA; + } + if (flags & MAP_RESILIENT_MEDIA) { + if ((flags & MAP_ANON) || + (flags & MAP_SHARED)) { + return EINVAL; + } + } /* * Check for illegal addresses. Watch out for address wrap... Note @@ -265,15 +289,9 @@ mmap(p, uap, retval) * should be aligned after adjustment by pageoff. */ user_addr -= pageoff; - if (user_addr & PAGE_MASK) - return (EINVAL); - /* Address range must be all in user VM space. */ - if (VM_MAX_ADDRESS > 0 && (user_addr + user_size > VM_MAX_ADDRESS)) - return (EINVAL); - if (VM_MIN_ADDRESS > 0 && user_addr < VM_MIN_ADDRESS) - return (EINVAL); - if (user_addr + user_size < user_addr) - return (EINVAL); + if (user_addr & vm_map_page_mask(user_map)) { + return EINVAL; + } } #ifdef notyet /* DO not have apis to get this info, need to wait till then*/ @@ -285,52 +303,116 @@ mmap(p, uap, retval) * There should really be a pmap call to determine a reasonable * location. */ - else if (addr < round_page_32(p->p_vmspace->vm_daddr + MAXDSIZ)) - addr = round_page_32(p->p_vmspace->vm_daddr + MAXDSIZ); + else if (addr < vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ, + vm_map_page_mask(user_map))) { + addr = vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ, + vm_map_page_mask(user_map)); + } #endif + alloc_flags = 0; if (flags & MAP_ANON) { + maxprot = VM_PROT_ALL; +#if CONFIG_MACF + /* + * Entitlement check. + */ + error = mac_proc_check_map_anon(p, user_addr, user_size, prot, flags, &maxprot); + if (error) { + return EINVAL; + } +#endif /* MAC */ + /* - * Mapping blank space is trivial. + * Mapping blank space is trivial. Use positive fds as the alias + * value for memory tracking. */ + if (fd != -1) { + /* + * Use "fd" to pass (some) Mach VM allocation flags, + * (see the VM_FLAGS_* definitions). + */ + alloc_flags = fd & (VM_FLAGS_ALIAS_MASK | + VM_FLAGS_SUPERPAGE_MASK | + VM_FLAGS_PURGABLE | + VM_FLAGS_4GB_CHUNK); + if (alloc_flags != fd) { + /* reject if there are any extra flags */ + return EINVAL; + } + VM_GET_FLAGS_ALIAS(alloc_flags, tag); + alloc_flags &= ~VM_FLAGS_ALIAS_MASK; + } + handle = NULL; - maxprot = VM_PROT_ALL; file_pos = 0; mapanon = 1; } else { + struct vnode_attr va; + vfs_context_t ctx = vfs_context_current(); + + if (flags & MAP_JIT) { + return EINVAL; + } + /* * Mapping file, get fp for validation. Obtain vnode and make * sure it is of appropriate type. */ - err = fdgetf(p, uap->fd, &fp); - if (err) - return(err); - if(fp->f_type == DTYPE_PSXSHM) { - uap->addr = (caddr_t)user_addr; - uap->len = user_size; + err = fp_lookup(p, fd, &fp, 0); + if (err) { + return err; + } + fpref = 1; + switch (FILEGLOB_DTYPE(fp->f_fglob)) { + case DTYPE_PSXSHM: + uap->addr = (user_addr_t)user_addr; + uap->len = (user_size_t)user_size; uap->prot = prot; uap->flags = flags; uap->pos = file_pos; - return(pshm_mmap(p, uap, retval, fp , pageoff)); + error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff); + goto bad; + case DTYPE_VNODE: + break; + default: + error = EINVAL; + goto bad; + } + vp = (struct vnode *)fp->f_fglob->fg_data; + error = vnode_getwithref(vp); + if (error != 0) { + goto bad; } - if (fp->f_type != DTYPE_VNODE) - return(EINVAL); - vp = (struct vnode *)fp->f_data; - - if (vp->v_type != VREG && vp->v_type != VCHR) - return (EINVAL); + if (vp->v_type != VREG && vp->v_type != VCHR) { + (void)vnode_put(vp); + error = EINVAL; + goto bad; + } AUDIT_ARG(vnpath, vp, ARG_VNODE1); + /* + * POSIX: mmap needs to update access time for mapped files + */ + if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) { + VATTR_INIT(&va); + nanotime(&va.va_access_time); + VATTR_SET_ACTIVE(&va, va_access_time); + vnode_setattr(vp, &va, ctx); + } + /* * XXX hack to handle use of /dev/zero to map anon memory (ala * SunOS). */ if (vp->v_type == VCHR || vp->v_type == VSTR) { - return(ENODEV); + (void)vnode_put(vp); + error = ENODEV; + goto bad; } else { /* * Ensure that file and memory protections are @@ -341,498 +423,713 @@ mmap(p, uap, retval) * credentials do we use for determination? What if * proc does a setuid? */ - maxprot = VM_PROT_EXECUTE; /* ??? */ - if (fp->f_flag & FREAD) + maxprot = VM_PROT_EXECUTE; /* ??? */ + if (fp->f_fglob->fg_flag & FREAD) { maxprot |= VM_PROT_READ; - else if (prot & PROT_READ) - return (EACCES); + } else if (prot & PROT_READ) { + (void)vnode_put(vp); + error = EACCES; + goto bad; + } /* * If we are sharing potential changes (either via * MAP_SHARED or via the implicit sharing of character * device mappings), and we are trying to get write * permission although we opened it without asking - * for it, bail out. + * for it, bail out. */ if ((flags & MAP_SHARED) != 0) { - if ((fp->f_flag & FWRITE) != 0) { - struct vattr va; - if ((err = - VOP_GETATTR(vp, &va, - p->p_ucred, p))) - return (err); - if ((va.va_flags & - (IMMUTABLE|APPEND)) == 0) + if ((fp->f_fglob->fg_flag & FWRITE) != 0 && + /* + * Do not allow writable mappings of + * swap files (see vm_swapfile_pager.c). + */ + !vnode_isswap(vp)) { + /* + * check for write access + * + * Note that we already made this check when granting FWRITE + * against the file, so it seems redundant here. + */ + error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx); + + /* if not granted for any reason, but we wanted it, bad */ + if ((prot & PROT_WRITE) && (error != 0)) { + vnode_put(vp); + goto bad; + } + + /* if writable, remember */ + if (error == 0) { maxprot |= VM_PROT_WRITE; - else if (prot & PROT_WRITE) - return (EPERM); - } else if ((prot & PROT_WRITE) != 0) - return (EACCES); - } else + } + } else if ((prot & PROT_WRITE) != 0) { + (void)vnode_put(vp); + error = EACCES; + goto bad; + } + } else { maxprot |= VM_PROT_WRITE; + } handle = (void *)vp; +#if CONFIG_MACF + error = mac_file_check_mmap(vfs_context_ucred(ctx), + fp->f_fglob, prot, flags, file_pos, &maxprot); + if (error) { + (void)vnode_put(vp); + goto bad; + } +#endif /* MAC */ + /* + * Consult the file system to determine if this + * particular file object can be mapped. + */ + error = VNOP_MMAP_CHECK(vp, prot, ctx); + if (error) { + (void)vnode_put(vp); + goto bad; + } } + + /* + * No copy-on-read for mmap() mappings themselves. + */ + vmk_flags.vmkf_no_copy_on_read = 1; } - if (user_size == 0) - return(0); + if (user_size == 0) { + if (!mapanon) { + (void)vnode_put(vp); + } + error = 0; + goto bad; + } /* * We bend a little - round the start and end addresses * to the nearest page boundary. */ - user_size = round_page_32(user_size); - - if (file_pos & PAGE_MASK_64) - return (EINVAL); + user_size = vm_map_round_page(user_size, + vm_map_page_mask(user_map)); - user_map = current_map(); + if (file_pos & vm_map_page_mask(user_map)) { + if (!mapanon) { + (void)vnode_put(vp); + } + error = EINVAL; + goto bad; + } if ((flags & MAP_FIXED) == 0) { - find_space = TRUE; - user_addr = round_page_32(user_addr); + alloc_flags |= VM_FLAGS_ANYWHERE; + user_addr = vm_map_round_page(user_addr, + vm_map_page_mask(user_map)); } else { - if (user_addr != trunc_page_32(user_addr)) - return (EINVAL); - find_space = FALSE; - (void) vm_deallocate(user_map, user_addr, user_size); + if (user_addr != vm_map_trunc_page(user_addr, + vm_map_page_mask(user_map))) { + if (!mapanon) { + (void)vnode_put(vp); + } + error = EINVAL; + goto bad; + } + /* + * mmap(MAP_FIXED) will replace any existing mappings in the + * specified range, if the new mapping is successful. + * If we just deallocate the specified address range here, + * another thread might jump in and allocate memory in that + * range before we get a chance to establish the new mapping, + * and we won't have a chance to restore the old mappings. + * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it + * has to deallocate the existing mappings and establish the + * new ones atomically. + */ + alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE; } + if (flags & MAP_NOCACHE) { + alloc_flags |= VM_FLAGS_NO_CACHE; + } + + if (flags & MAP_JIT) { + vmk_flags.vmkf_map_jit = TRUE; + } + + if (flags & MAP_RESILIENT_CODESIGN) { + alloc_flags |= VM_FLAGS_RESILIENT_CODESIGN; + } + if (flags & MAP_RESILIENT_MEDIA) { + alloc_flags |= VM_FLAGS_RESILIENT_MEDIA; + } + +#ifndef CONFIG_EMBEDDED + if (flags & MAP_32BIT) { + if (csr_check(CSR_ALLOW_UNTRUSTED_KEXTS) == 0 || + IOTaskHasEntitlement(current_task(), MAP_32BIT_ENTITLEMENT)) { + vmk_flags.vmkf_32bit_map_va = TRUE; + } else { + error = EPERM; + goto bad; + } + } +#endif /* * Lookup/allocate object. */ - if (flags & MAP_ANON) { - /* - * Unnamed anonymous regions always start at 0. - */ - if (handle == 0) - file_pos = 0; - } - if (handle == NULL) { - pager = NULL; + control = NULL; #ifdef notyet /* Hmm .. */ #if defined(VM_PROT_READ_IS_EXEC) - if (prot & VM_PROT_READ) + if (prot & VM_PROT_READ) { prot |= VM_PROT_EXECUTE; - - if (maxprot & VM_PROT_READ) + } + if (maxprot & VM_PROT_READ) { maxprot |= VM_PROT_EXECUTE; + } #endif #endif - result = vm_allocate(user_map, &user_addr, user_size, find_space); - if (result != KERN_SUCCESS) - goto out; - - result = vm_protect(user_map, user_addr, user_size, TRUE, maxprot); - if (result != KERN_SUCCESS) - goto out; - result = vm_protect(user_map, user_addr, user_size, FALSE, prot); - if (result != KERN_SUCCESS) - goto out; +#if 3777787 + if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) { + prot |= VM_PROT_READ; + } + if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) { + maxprot |= VM_PROT_READ; + } +#endif /* radar 3777787 */ +map_anon_retry: + result = vm_map_enter_mem_object(user_map, + &user_addr, user_size, + 0, alloc_flags, vmk_flags, + tag, + IPC_PORT_NULL, 0, FALSE, + prot, maxprot, + (flags & MAP_SHARED) ? + VM_INHERIT_SHARE : + VM_INHERIT_DEFAULT); + + /* If a non-binding address was specified for this anonymous + * mapping, retry the mapping with a zero base + * in the event the mapping operation failed due to + * lack of space between the address and the map's maximum. + */ + if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) { + user_addr = vm_map_page_size(user_map); + goto map_anon_retry; + } } else { - UBCINFOCHECK("mmap", vp); - pager = (vm_pager_t)ubc_getpager(vp); - - if (pager == NULL) - return (ENOMEM); + if (vnode_isswap(vp)) { + /* + * Map swap files with a special pager + * that returns obfuscated contents. + */ + control = NULL; + pager = swapfile_pager_setup(vp); + if (pager != MEMORY_OBJECT_NULL) { + control = swapfile_pager_control(pager); + } + } else { + control = ubc_getobject(vp, UBC_FLAGS_NONE); + } + + if (control == NULL) { + (void)vnode_put(vp); + error = ENOMEM; + goto bad; + } /* * Set credentials: * FIXME: if we're writing the file we need a way to * ensure that someone doesn't replace our R/W creds - * with ones that only work for read. + * with ones that only work for read. */ - ubc_setcred(vp, p); + ubc_setthreadcred(vp, p, current_thread()); docow = FALSE; - if ((flags & (MAP_ANON|MAP_SHARED)) == 0) { + if ((flags & (MAP_ANON | MAP_SHARED)) == 0) { docow = TRUE; } #ifdef notyet /* Hmm .. */ #if defined(VM_PROT_READ_IS_EXEC) - if (prot & VM_PROT_READ) + if (prot & VM_PROT_READ) { prot |= VM_PROT_EXECUTE; - - if (maxprot & VM_PROT_READ) + } + if (maxprot & VM_PROT_READ) { maxprot |= VM_PROT_EXECUTE; + } #endif #endif /* notyet */ - result = vm_map_64(user_map, &user_addr, user_size, - 0, find_space, pager, file_pos, docow, - prot, maxprot, - VM_INHERIT_DEFAULT); +#if 3777787 + if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) { + prot |= VM_PROT_READ; + } + if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) { + maxprot |= VM_PROT_READ; + } +#endif /* radar 3777787 */ + +map_file_retry: + if (flags & MAP_RESILIENT_CODESIGN) { + if (prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) { + assert(!mapanon); + vnode_put(vp); + error = EPERM; + goto bad; + } + /* strictly limit access to "prot" */ + maxprot &= prot; + } - if (result != KERN_SUCCESS) - goto out; + vm_object_offset_t end_pos = 0; + if (os_add_overflow(user_size, file_pos, &end_pos)) { + vnode_put(vp); + error = EINVAL; + goto bad; + } - ubc_map(vp); + result = vm_map_enter_mem_object_control(user_map, + &user_addr, user_size, + 0, alloc_flags, vmk_flags, + tag, + control, file_pos, + docow, prot, maxprot, + (flags & MAP_SHARED) ? + VM_INHERIT_SHARE : + VM_INHERIT_DEFAULT); + + /* If a non-binding address was specified for this file backed + * mapping, retry the mapping with a zero base + * in the event the mapping operation failed due to + * lack of space between the address and the map's maximum. + */ + if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) { + user_addr = vm_map_page_size(user_map); + goto map_file_retry; + } } - if (flags & MAP_SHARED) { - result = vm_inherit(user_map, user_addr, user_size, - VM_INHERIT_SHARE); - if (result != KERN_SUCCESS) { - (void) vm_deallocate(user_map, user_addr, user_size); - goto out; - } + if (!mapanon) { + (void)vnode_put(vp); } -out: switch (result) { case KERN_SUCCESS: - if (!mapanon) - *fdflags(p, uap->fd) |= UF_MAPPED; - *retval = (register_t)(user_addr + pageoff); - return (0); + *retval = user_addr + pageoff; + error = 0; + break; case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: - return (ENOMEM); + error = ENOMEM; + break; case KERN_PROTECTION_FAILURE: - return (EACCES); + error = EACCES; + break; default: - return (EINVAL); + error = EINVAL; + break; + } +bad: + if (pager != MEMORY_OBJECT_NULL) { + /* + * Release the reference on the pager. + * If the mapping was successful, it now holds + * an extra reference. + */ + memory_object_deallocate(pager); + } + if (fpref) { + fp_drop(p, fd, fp, 0); } - /*NOTREACHED*/ + + KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0); +#ifndef CONFIG_EMBEDDED + KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32), + (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0); +#endif + return error; +} + +int +msync(__unused proc_t p, struct msync_args *uap, int32_t *retval) +{ + __pthread_testcancel(1); + return msync_nocancel(p, (struct msync_nocancel_args *)uap, retval); } -struct msync_args { - caddr_t addr; - int len; - int flags; -}; int -msync(p, uap, retval) - struct proc *p; - struct msync_args *uap; - register_t *retval; +msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval) { - vm_offset_t addr; - vm_size_t size, pageoff; + mach_vm_offset_t addr; + mach_vm_size_t size; int flags; vm_map_t user_map; int rv; - vm_sync_t sync_flags=0; - - addr = (vm_offset_t) uap->addr; - pageoff = (addr & PAGE_MASK); - addr -= pageoff; - size = uap->len; - size = (vm_size_t) round_page_32(size); - flags = uap->flags; - - if (addr + size < addr) - return(EINVAL); + vm_sync_t sync_flags = 0; user_map = current_map(); - - if ((flags & (MS_ASYNC|MS_SYNC)) == (MS_ASYNC|MS_SYNC)) - return (EINVAL); - - if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) - return (EINVAL); - + addr = (mach_vm_offset_t) uap->addr; + size = (mach_vm_size_t) uap->len; +#ifndef CONFIG_EMBEDDED + KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0); +#endif + if (mach_vm_range_overflows(addr, size)) { + return EINVAL; + } + if (addr & vm_map_page_mask(user_map)) { + /* UNIX SPEC: user address is not page-aligned, return EINVAL */ + return EINVAL; + } if (size == 0) { /* * We cannot support this properly without maintaining * list all mmaps done. Cannot use vm_map_entry as they could be - * split or coalesced by indepenedant actions. So instead of + * split or coalesced by indepenedant actions. So instead of * inaccurate results, lets just return error as invalid size * specified */ - return (EINVAL); /* XXX breaks posix apps */ + return EINVAL; /* XXX breaks posix apps */ } - if (flags & MS_KILLPAGES) - sync_flags |= VM_SYNC_KILLPAGES; - if (flags & MS_DEACTIVATE) - sync_flags |= VM_SYNC_DEACTIVATE; - if (flags & MS_INVALIDATE) - sync_flags |= VM_SYNC_INVALIDATE; + flags = uap->flags; + /* disallow contradictory flags */ + if ((flags & (MS_SYNC | MS_ASYNC)) == (MS_SYNC | MS_ASYNC)) { + return EINVAL; + } + + if (flags & MS_KILLPAGES) { + sync_flags |= VM_SYNC_KILLPAGES; + } + if (flags & MS_DEACTIVATE) { + sync_flags |= VM_SYNC_DEACTIVATE; + } + if (flags & MS_INVALIDATE) { + sync_flags |= VM_SYNC_INVALIDATE; + } - if ( !(flags & (MS_KILLPAGES | MS_DEACTIVATE))) { - if (flags & MS_ASYNC) - sync_flags |= VM_SYNC_ASYNCHRONOUS; - else - sync_flags |= VM_SYNC_SYNCHRONOUS; + if (!(flags & (MS_KILLPAGES | MS_DEACTIVATE))) { + if (flags & MS_ASYNC) { + sync_flags |= VM_SYNC_ASYNCHRONOUS; + } else { + sync_flags |= VM_SYNC_SYNCHRONOUS; + } } - rv = vm_msync(user_map, addr, size, sync_flags); + + sync_flags |= VM_SYNC_CONTIGUOUS; /* complain if holes */ + + rv = mach_vm_msync(user_map, addr, size, sync_flags); switch (rv) { case KERN_SUCCESS: break; - case KERN_INVALID_ADDRESS: - return (EINVAL); /* Sun returns ENOMEM? */ + case KERN_INVALID_ADDRESS: /* hole in region being sync'ed */ + return ENOMEM; case KERN_FAILURE: - return (EIO); + return EIO; default: - return (EINVAL); + return EINVAL; } - - return (0); + return 0; } int -mremap() +munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval) { - /* Not yet implemented */ - return (EOPNOTSUPP); -} - -struct munmap_args { - caddr_t addr; - int len; -}; -int -munmap(p, uap, retval) - struct proc *p; - struct munmap_args *uap; - register_t *retval; - -{ - vm_offset_t user_addr; - vm_size_t user_size, pageoff; - kern_return_t result; - - user_addr = (vm_offset_t) uap->addr; - user_size = (vm_size_t) uap->len; + mach_vm_offset_t user_addr; + mach_vm_size_t user_size; + kern_return_t result; + vm_map_t user_map; - AUDIT_ARG(addr, (void *)user_addr); - AUDIT_ARG(len, (int) user_size); - - pageoff = (user_addr & PAGE_MASK); + user_map = current_map(); + user_addr = (mach_vm_offset_t) uap->addr; + user_size = (mach_vm_size_t) uap->len; - user_addr -= pageoff; - user_size += pageoff; - user_size = round_page_32(user_size); - if (user_addr + user_size < user_addr) - return(EINVAL); + AUDIT_ARG(addr, user_addr); + AUDIT_ARG(len, user_size); - if (user_size == 0) - return (0); + if (user_addr & vm_map_page_mask(user_map)) { + /* UNIX SPEC: user address is not page-aligned, return EINVAL */ + return EINVAL; + } - /* Address range must be all in user VM space. */ - if (VM_MAX_ADDRESS > 0 && (user_addr + user_size > VM_MAX_ADDRESS)) - return (EINVAL); - if (VM_MIN_ADDRESS > 0 && user_addr < VM_MIN_ADDRESS) - return (EINVAL); + if (mach_vm_range_overflows(user_addr, user_size)) { + return EINVAL; + } + if (user_size == 0) { + /* UNIX SPEC: size is 0, return EINVAL */ + return EINVAL; + } - result = vm_deallocate(current_map(), user_addr, user_size); + result = mach_vm_deallocate(user_map, user_addr, user_size); if (result != KERN_SUCCESS) { - return(EINVAL); + return EINVAL; } - return(0); -} - -void -munmapfd(p, fd) - struct proc *p; - int fd; -{ - /* - * XXX should vm_deallocate any regions mapped to this file - */ - *fdflags(p, fd) &= ~UF_MAPPED; + return 0; } -struct mprotect_args { - caddr_t addr; - int len; - int prot; -}; int -mprotect(p, uap, retval) - struct proc *p; - struct mprotect_args *uap; - register_t *retval; +mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval) { - register vm_prot_t prot; - vm_offset_t user_addr; - vm_size_t user_size, pageoff; - kern_return_t result; - vm_map_t user_map; + vm_prot_t prot; + mach_vm_offset_t user_addr; + mach_vm_size_t user_size; + kern_return_t result; + vm_map_t user_map; +#if CONFIG_MACF + int error; +#endif AUDIT_ARG(addr, uap->addr); AUDIT_ARG(len, uap->len); - AUDIT_ARG(value, uap->prot); - user_addr = (vm_offset_t) uap->addr; - user_size = (vm_size_t) uap->len; - prot = (vm_prot_t)(uap->prot & VM_PROT_ALL); + AUDIT_ARG(value32, uap->prot); + + user_map = current_map(); + user_addr = (mach_vm_offset_t) uap->addr; + user_size = (mach_vm_size_t) uap->len; + prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED | VM_PROT_STRIP_READ)); + + if (mach_vm_range_overflows(user_addr, user_size)) { + return EINVAL; + } + if (user_addr & vm_map_page_mask(user_map)) { + /* UNIX SPEC: user address is not page-aligned, return EINVAL */ + return EINVAL; + } #ifdef notyet /* Hmm .. */ #if defined(VM_PROT_READ_IS_EXEC) - if (prot & VM_PROT_READ) + if (prot & VM_PROT_READ) { prot |= VM_PROT_EXECUTE; + } #endif #endif /* notyet */ - pageoff = (user_addr & PAGE_MASK); - user_addr -= pageoff; - user_size += pageoff; - user_size = round_page_32(user_size); - if (user_addr + user_size < user_addr) - return(EINVAL); +#if 3936456 + if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) { + prot |= VM_PROT_READ; + } +#endif /* 3936456 */ - user_map = current_map(); +#if defined(__arm64__) + if (prot & VM_PROT_STRIP_READ) { + prot &= ~(VM_PROT_READ | VM_PROT_STRIP_READ); + } +#endif + +#if CONFIG_MACF + /* + * The MAC check for mprotect is of limited use for 2 reasons: + * Without mmap revocation, the caller could have asked for the max + * protections initially instead of a reduced set, so a mprotect + * check would offer no new security. + * It is not possible to extract the vnode from the pager object(s) + * of the target memory range. + * However, the MAC check may be used to prevent a process from, + * e.g., making the stack executable. + */ + error = mac_proc_check_mprotect(p, user_addr, + user_size, prot); + if (error) { + return error; + } +#endif + + if (prot & VM_PROT_TRUSTED) { +#if CONFIG_DYNAMIC_CODE_SIGNING + /* CODE SIGNING ENFORCEMENT - JIT support */ + /* The special protection value VM_PROT_TRUSTED requests that we treat + * this page as if it had a valid code signature. + * If this is enabled, there MUST be a MAC policy implementing the + * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be + * compromised because the check would always succeed and thusly any + * process could sign dynamically. */ + result = vm_map_sign( + user_map, + vm_map_trunc_page(user_addr, + vm_map_page_mask(user_map)), + vm_map_round_page(user_addr + user_size, + vm_map_page_mask(user_map))); + switch (result) { + case KERN_SUCCESS: + break; + case KERN_INVALID_ADDRESS: + /* UNIX SPEC: for an invalid address range, return ENOMEM */ + return ENOMEM; + default: + return EINVAL; + } +#else + return ENOTSUP; +#endif + } + prot &= ~VM_PROT_TRUSTED; - result = vm_map_protect(user_map, user_addr, user_addr+user_size, prot, - FALSE); + result = mach_vm_protect(user_map, user_addr, user_size, + FALSE, prot); switch (result) { case KERN_SUCCESS: - return (0); + return 0; case KERN_PROTECTION_FAILURE: - return (EACCES); + return EACCES; + case KERN_INVALID_ADDRESS: + /* UNIX SPEC: for an invalid address range, return ENOMEM */ + return ENOMEM; } - return (EINVAL); + return EINVAL; } -struct minherit_args { - void *addr; - size_t len; - int inherit; -}; - int -minherit(p, uap, retval) - struct proc *p; - struct minherit_args *uap; - register_t *retval; +minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval) { - vm_offset_t addr; - vm_size_t size, pageoff; - register vm_inherit_t inherit; - vm_map_t user_map; - kern_return_t result; + mach_vm_offset_t addr; + mach_vm_size_t size; + vm_inherit_t inherit; + vm_map_t user_map; + kern_return_t result; AUDIT_ARG(addr, uap->addr); AUDIT_ARG(len, uap->len); - AUDIT_ARG(value, uap->inherit); - addr = (vm_offset_t)uap->addr; - size = uap->len; - inherit = uap->inherit; - - pageoff = (addr & PAGE_MASK); - addr -= pageoff; - size += pageoff; - size = (vm_size_t) round_page_32(size); - if (addr + size < addr) - return(EINVAL); + AUDIT_ARG(value32, uap->inherit); + addr = (mach_vm_offset_t)uap->addr; + size = (mach_vm_size_t)uap->len; + inherit = uap->inherit; + if (mach_vm_range_overflows(addr, size)) { + return EINVAL; + } user_map = current_map(); - result = vm_inherit(user_map, addr, size, - inherit); + result = mach_vm_inherit(user_map, addr, size, + inherit); switch (result) { case KERN_SUCCESS: - return (0); + return 0; case KERN_PROTECTION_FAILURE: - return (EACCES); + return EACCES; } - return (EINVAL); + return EINVAL; } -struct madvise_args { - caddr_t addr; - int len; - int behav; -}; -/* ARGSUSED */ int -madvise(p, uap, retval) - struct proc *p; - struct madvise_args *uap; - register_t *retval; +madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval) { vm_map_t user_map; - vm_offset_t start, end; + mach_vm_offset_t start; + mach_vm_size_t size; vm_behavior_t new_behavior; - kern_return_t result; - - /* - * Check for illegal addresses. Watch out for address wrap... Note - * that VM_*_ADDRESS are not constants due to casts (argh). - */ - if (VM_MAX_ADDRESS > 0 && - ((vm_offset_t) uap->addr + uap->len) > VM_MAX_ADDRESS) - return (ENOMEM); - if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS) - return (ENOMEM); - - if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) - return (ENOMEM); + kern_return_t result; /* * Since this routine is only advisory, we default to conservative * behavior. */ - start = trunc_page_32((vm_offset_t) uap->addr); - end = round_page_32((vm_offset_t) uap->addr + uap->len); - - user_map = current_map(); - switch (uap->behav) { - case MADV_RANDOM: - new_behavior = VM_BEHAVIOR_RANDOM; - break; - case MADV_SEQUENTIAL: - new_behavior = VM_BEHAVIOR_SEQUENTIAL; - break; - case MADV_NORMAL: - new_behavior = VM_BEHAVIOR_DEFAULT; - break; - case MADV_WILLNEED: - new_behavior = VM_BEHAVIOR_WILLNEED; - break; - case MADV_DONTNEED: - new_behavior = VM_BEHAVIOR_DONTNEED; - break; - default: - return(EINVAL); + case MADV_RANDOM: + new_behavior = VM_BEHAVIOR_RANDOM; + break; + case MADV_SEQUENTIAL: + new_behavior = VM_BEHAVIOR_SEQUENTIAL; + break; + case MADV_NORMAL: + new_behavior = VM_BEHAVIOR_DEFAULT; + break; + case MADV_WILLNEED: + new_behavior = VM_BEHAVIOR_WILLNEED; + break; + case MADV_DONTNEED: + new_behavior = VM_BEHAVIOR_DONTNEED; + break; + case MADV_FREE: + new_behavior = VM_BEHAVIOR_FREE; + break; + case MADV_ZERO_WIRED_PAGES: + new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES; + break; + case MADV_FREE_REUSABLE: + new_behavior = VM_BEHAVIOR_REUSABLE; + break; + case MADV_FREE_REUSE: + new_behavior = VM_BEHAVIOR_REUSE; + break; + case MADV_CAN_REUSE: + new_behavior = VM_BEHAVIOR_CAN_REUSE; + break; + case MADV_PAGEOUT: +#if MACH_ASSERT + new_behavior = VM_BEHAVIOR_PAGEOUT; + break; +#else /* MACH_ASSERT */ + return ENOTSUP; +#endif /* MACH_ASSERT */ + default: + return EINVAL; + } + + start = (mach_vm_offset_t) uap->addr; + size = (mach_vm_size_t) uap->len; + if (mach_vm_range_overflows(start, size)) { + return EINVAL; + } +#if __arm64__ + if (start == 0 && + size != 0 && + (uap->behav == MADV_FREE || + uap->behav == MADV_FREE_REUSABLE)) { + printf("** FOURK_COMPAT: %d[%s] " + "failing madvise(0x%llx,0x%llx,%s)\n", + p->p_pid, p->p_comm, start, size, + ((uap->behav == MADV_FREE_REUSABLE) + ? "MADV_FREE_REUSABLE" + : "MADV_FREE")); + DTRACE_VM3(fourk_compat_madvise, + uint64_t, start, + uint64_t, size, + int, uap->behav); + return EINVAL; } +#endif /* __arm64__ */ - result = vm_behavior_set(user_map, start, end, new_behavior); + user_map = current_map(); + + result = mach_vm_behavior_set(user_map, start, size, new_behavior); switch (result) { - case KERN_SUCCESS: - return (0); - case KERN_INVALID_ADDRESS: - return (EINVAL); + case KERN_SUCCESS: + return 0; + case KERN_INVALID_ADDRESS: + return EINVAL; + case KERN_NO_SPACE: + return ENOMEM; } - return (EINVAL); + return EINVAL; } -struct mincore_args { - const void *addr; - size_t len; - char *vec; -}; -/* ARGSUSED */ int -mincore(p, uap, retval) - struct proc *p; - struct mincore_args *uap; - register_t *retval; +mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval) { - vm_offset_t addr, first_addr; - vm_offset_t end; - vm_map_t map; - char *vec; - int error; - int vecindex, lastvecindex; - int mincoreinfo=0; - int pqueryinfo; - kern_return_t ret; - int numref; + mach_vm_offset_t addr = 0, first_addr = 0, end = 0, cur_end = 0; + vm_map_t map = VM_MAP_NULL; + user_addr_t vec = 0; + int error = 0; + int lastvecindex = 0; + int mincoreinfo = 0; + int pqueryinfo = 0; + unsigned int pqueryinfo_vec_size = 0; + vm_page_info_basic_t info = NULL; + mach_msg_type_number_t count = 0; + char *kernel_vec = NULL; + uint64_t req_vec_size_pages = 0, cur_vec_size_pages = 0, vecindex = 0; + kern_return_t kr = KERN_SUCCESS; map = current_map(); @@ -840,379 +1137,340 @@ mincore(p, uap, retval) * Make sure that the addresses presented are valid for user * mode. */ - first_addr = addr = trunc_page_32((vm_offset_t) uap->addr); - end = addr + (vm_size_t)round_page_32(uap->len); + first_addr = addr = vm_map_trunc_page(uap->addr, + vm_map_page_mask(map)); + end = vm_map_round_page(uap->addr + uap->len, + vm_map_page_mask(map)); + + if (end < addr) { + return EINVAL; + } - if (VM_MAX_ADDRESS > 0 && end > VM_MAX_ADDRESS) - return (EINVAL); - if (end < addr) - return (EINVAL); + if (end == addr) { + return 0; + } /* - * Address of byte vector + * We are going to loop through the whole 'req_vec_size' pages + * range in chunks of 'cur_vec_size'. */ - vec = uap->vec; - map = current_map(); + req_vec_size_pages = (end - addr) >> PAGE_SHIFT; + cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> PAGE_SHIFT)); + + kernel_vec = (void*) _MALLOC(cur_vec_size_pages * sizeof(char), M_TEMP, M_WAITOK | M_ZERO); + + if (kernel_vec == NULL) { + return ENOMEM; + } /* - * Do this on a map entry basis so that if the pages are not - * in the current processes address space, we can easily look - * up the pages elsewhere. + * Address of byte vector */ - lastvecindex = -1; - for(addr; addr < end; addr += PAGE_SIZE) { - pqueryinfo = 0; - ret = vm_map_page_query(map, addr, &pqueryinfo, &numref); - if (ret != KERN_SUCCESS) - pqueryinfo = 0; - mincoreinfo = 0; - if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT) - mincoreinfo |= MINCORE_INCORE; - if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF) - mincoreinfo |= MINCORE_REFERENCED; - if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY) - mincoreinfo |= MINCORE_MODIFIED; - - - /* - * calculate index into user supplied byte vector - */ - vecindex = (addr - first_addr)>> PAGE_SHIFT; + vec = uap->vec; + + pqueryinfo_vec_size = cur_vec_size_pages * sizeof(struct vm_page_info_basic); + info = (void*) _MALLOC(pqueryinfo_vec_size, M_TEMP, M_WAITOK); + + if (info == NULL) { + FREE(kernel_vec, M_TEMP); + return ENOMEM; + } + + while (addr < end) { + cur_end = addr + (cur_vec_size_pages * PAGE_SIZE_64); + + count = VM_PAGE_INFO_BASIC_COUNT; + kr = vm_map_page_range_info_internal(map, + addr, + cur_end, + VM_PAGE_INFO_BASIC, + (vm_page_info_t) info, + &count); + + assert(kr == KERN_SUCCESS); /* - * If we have skipped map entries, we need to make sure that - * the byte vector is zeroed for those skipped entries. + * Do this on a map entry basis so that if the pages are not + * in the current processes address space, we can easily look + * up the pages elsewhere. */ - while((lastvecindex + 1) < vecindex) { - error = subyte( vec + lastvecindex, 0); - if (error) { - return (EFAULT); + lastvecindex = -1; + for (; addr < cur_end; addr += PAGE_SIZE) { + pqueryinfo = info[lastvecindex + 1].disposition; + + mincoreinfo = 0; + + if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT) { + mincoreinfo |= MINCORE_INCORE; } - ++lastvecindex; + if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF) { + mincoreinfo |= MINCORE_REFERENCED; + } + if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY) { + mincoreinfo |= MINCORE_MODIFIED; + } + if (pqueryinfo & VM_PAGE_QUERY_PAGE_PAGED_OUT) { + mincoreinfo |= MINCORE_PAGED_OUT; + } + if (pqueryinfo & VM_PAGE_QUERY_PAGE_COPIED) { + mincoreinfo |= MINCORE_COPIED; + } + if ((pqueryinfo & VM_PAGE_QUERY_PAGE_EXTERNAL) == 0) { + mincoreinfo |= MINCORE_ANONYMOUS; + } + /* + * calculate index into user supplied byte vector + */ + vecindex = (addr - first_addr) >> PAGE_SHIFT; + kernel_vec[vecindex] = (char)mincoreinfo; + lastvecindex = vecindex; } - /* - * Pass the page information to the user - */ - error = subyte( vec + vecindex, mincoreinfo); + + assert(vecindex == (cur_vec_size_pages - 1)); + + error = copyout(kernel_vec, vec, cur_vec_size_pages * sizeof(char) /* a char per page */); + if (error) { - return (EFAULT); + break; } - lastvecindex = vecindex; + + /* + * For the next chunk, we'll need: + * - bump the location in the user buffer for our next disposition. + * - new length + * - starting address + */ + vec += cur_vec_size_pages * sizeof(char); + req_vec_size_pages = (end - addr) >> PAGE_SHIFT; + cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> PAGE_SHIFT)); + + first_addr = addr; } + FREE(kernel_vec, M_TEMP); + FREE(info, M_TEMP); - /* - * Zero the last entries in the byte vector. - */ - vecindex = (end - first_addr) >> PAGE_SHIFT; - while((lastvecindex + 1) < vecindex) { - error = subyte( vec + lastvecindex, 0); - if (error) { - return (EFAULT); - } - ++lastvecindex; + if (error) { + return EFAULT; } - - return (0); -} -struct mlock_args { - caddr_t addr; - size_t len; -}; + return 0; +} int -mlock(p, uap, retval) - struct proc *p; - struct mlock_args *uap; - register_t *retval; +mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval) { vm_map_t user_map; - vm_offset_t addr; - vm_size_t size, pageoff; - int error; - kern_return_t result; + vm_map_offset_t addr; + vm_map_size_t size, pageoff; + kern_return_t result; AUDIT_ARG(addr, uap->addr); AUDIT_ARG(len, uap->len); - addr = (vm_offset_t) uap->addr; - size = uap->len; - pageoff = (addr & PAGE_MASK); - addr -= pageoff; - size += pageoff; - size = (vm_size_t) round_page_32(size); - - /* disable wrap around */ - if (addr + size < addr) - return (EINVAL); -#ifdef notyet -/* Hmm.. What am I going to do with this? */ - if (atop(size) + cnt.v_wire_count > vm_page_max_wired) - return (EAGAIN); -#ifdef pmap_wired_count - if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > - p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) - return (ENOMEM); -#else - error = suser(p->p_ucred, &p->p_acflag); - if (error) - return (error); -#endif -#endif /* notyet */ + addr = (vm_map_offset_t) uap->addr; + size = (vm_map_size_t)uap->len; + + if (vm_map_range_overflows(addr, size)) { + return EINVAL; + } + + if (size == 0) { + return 0; + } user_map = current_map(); + pageoff = (addr & vm_map_page_mask(user_map)); + addr -= pageoff; + size = vm_map_round_page(size + pageoff, vm_map_page_mask(user_map)); + + /* have to call vm_map_wire directly to pass "I don't know" protections */ + result = vm_map_wire_kernel(user_map, addr, addr + size, VM_PROT_NONE, VM_KERN_MEMORY_MLOCK, TRUE); + + if (result == KERN_RESOURCE_SHORTAGE) { + return EAGAIN; + } else if (result == KERN_PROTECTION_FAILURE) { + return EACCES; + } else if (result != KERN_SUCCESS) { + return ENOMEM; + } - /* vm_wire */ - result = vm_map_wire(user_map, addr, (vm_offset_t)(addr+size), VM_PROT_NONE, TRUE); - return (result == KERN_SUCCESS ? 0 : ENOMEM); + return 0; /* KERN_SUCCESS */ } -struct munlock_args { - caddr_t addr; - size_t len; -}; int -munlock(p, uap, retval) - struct proc *p; - struct munlock_args *uap; - register_t *retval; +munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval) { - vm_offset_t addr; - vm_size_t size, pageoff; - int error; + mach_vm_offset_t addr; + mach_vm_size_t size; vm_map_t user_map; - kern_return_t result; + kern_return_t result; AUDIT_ARG(addr, uap->addr); AUDIT_ARG(len, uap->len); - addr = (vm_offset_t) uap->addr; - size = uap->len; - - pageoff = (addr & PAGE_MASK); - addr -= pageoff; - size += pageoff; - size = (vm_size_t) round_page_32(size); - - /* disable wrap around */ - if (addr + size < addr) - return (EINVAL); - -#ifdef notyet -/* Hmm.. What am I going to do with this? */ -#ifndef pmap_wired_count - error = suser(p->p_ucred, &p->p_acflag); - if (error) - return (error); -#endif -#endif /* notyet */ + addr = (mach_vm_offset_t) uap->addr; + size = (mach_vm_size_t)uap->len; user_map = current_map(); - - /* vm_wire */ - result = vm_wire(host_priv_self(), user_map, addr, size, VM_PROT_NONE); - return (result == KERN_SUCCESS ? 0 : ENOMEM); -} - - -struct mlockall_args { - int how; -}; - -int -mlockall(p, uap) - struct proc *p; - struct mlockall_args *uap; -{ - return (ENOSYS); + if (mach_vm_range_overflows(addr, size)) { + return EINVAL; + } + /* JMM - need to remove all wirings by spec - this just removes one */ + result = mach_vm_wire_kernel(host_priv_self(), user_map, addr, size, VM_PROT_NONE, VM_KERN_MEMORY_MLOCK); + return result == KERN_SUCCESS ? 0 : ENOMEM; } -struct munlockall_args { - int how; -}; int -munlockall(p, uap) - struct proc *p; - struct munlockall_args *uap; +mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval) { - return(ENOSYS); + return ENOSYS; } - -/* BEGIN DEFUNCT */ -struct obreak_args { - char *nsiz; -}; int -obreak(p, uap, retval) - struct proc *p; - struct obreak_args *uap; - register_t *retval; +munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval) { - /* Not implemented, obsolete */ - return (ENOMEM); + return ENOSYS; } -int both; - +#if CONFIG_CODE_DECRYPTION int -ovadvise() -{ - -#ifdef lint - both = 0; -#endif -} -/* END DEFUNCT */ - -/* CDY need to fix interface to allow user to map above 32 bits */ -/* USV: No! need to obsolete map_fd()! mmap() already supports 64 bits */ -kern_return_t -map_fd( - int fd, - vm_offset_t offset, - vm_offset_t *va, - boolean_t findspace, - vm_size_t size) -{ - kern_return_t ret; - boolean_t funnel_state; - - AUDIT_MACH_SYSCALL_ENTER(AUE_MAPFD); - AUDIT_ARG(addr, va); - AUDIT_ARG(fd, fd); - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - - ret = map_fd_funneled( fd, (vm_object_offset_t)offset, - va, findspace, size); - - (void) thread_funnel_set(kernel_flock, FALSE); - - AUDIT_MACH_SYSCALL_EXIT(ret); - return ret; -} - -kern_return_t -map_fd_funneled( - int fd, - vm_object_offset_t offset, - vm_offset_t *va, - boolean_t findspace, - vm_size_t size) +mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __unused int32_t *retval) { - kern_return_t result; - struct file *fp; - struct vnode *vp; - void * pager; - vm_offset_t map_addr=0; - vm_size_t map_size; - vm_map_copy_t tmp; - int err=0; - vm_map_t my_map; - struct proc *p =(struct proc *)current_proc(); + mach_vm_offset_t user_addr; + mach_vm_size_t user_size; + kern_return_t result; + vm_map_t user_map; + uint32_t cryptid; + cpu_type_t cputype; + cpu_subtype_t cpusubtype; + pager_crypt_info_t crypt_info; + const char * cryptname = 0; + char *vpath; + int len, ret; + struct proc_regioninfo_internal pinfo; + vnode_t vp; + uintptr_t vnodeaddr; + uint32_t vid; - /* - * Find the inode; verify that it's a regular file. - */ + AUDIT_ARG(addr, uap->addr); + AUDIT_ARG(len, uap->len); - err = fdgetf(p, fd, &fp); - if (err) - return(err); - - if (fp->f_type != DTYPE_VNODE) - return(KERN_INVALID_ARGUMENT); + user_map = current_map(); + user_addr = (mach_vm_offset_t) uap->addr; + user_size = (mach_vm_size_t) uap->len; - if (!(fp->f_flag & FREAD)) - return (KERN_PROTECTION_FAILURE); + cryptid = uap->cryptid; + cputype = uap->cputype; + cpusubtype = uap->cpusubtype; - vp = (struct vnode *)fp->f_data; + if (mach_vm_range_overflows(user_addr, user_size)) { + return EINVAL; + } + if (user_addr & vm_map_page_mask(user_map)) { + /* UNIX SPEC: user address is not page-aligned, return EINVAL */ + return EINVAL; + } - if (vp->v_type != VREG) - return (KERN_INVALID_ARGUMENT); + switch (cryptid) { + case 0: + /* not encrypted, just an empty load command */ + return 0; + case 1: + cryptname = "com.apple.unfree"; + break; + case 0x10: + /* some random cryptid that you could manually put into + * your binary if you want NULL */ + cryptname = "com.apple.null"; + break; + default: + return EINVAL; + } - AUDIT_ARG(vnpath, vp, ARG_VNODE1); + if (NULL == text_crypter_create) { + return ENOTSUP; + } - if (offset & PAGE_MASK_64) { - printf("map_fd: file offset not page aligned(%d : %s)\n",p->p_pid, p->p_comm); - return (KERN_INVALID_ARGUMENT); + ret = fill_procregioninfo_onlymappedvnodes( proc_task(p), user_addr, &pinfo, &vnodeaddr, &vid); + if (ret == 0 || !vnodeaddr) { + /* No really, this returns 0 if the memory address is not backed by a file */ + return EINVAL; } - map_size = round_page_32(size); - /* - * Allow user to map in a zero length file. - */ - if (size == 0) - return (KERN_SUCCESS); - /* - * Map in the file. - */ - UBCINFOCHECK("map_fd_funneled", vp); - pager = (void *) ubc_getpager(vp); - if (pager == NULL) - return (KERN_FAILURE); - - - my_map = current_map(); - - result = vm_map_64( - my_map, - &map_addr, map_size, (vm_offset_t)0, TRUE, - pager, offset, TRUE, - VM_PROT_DEFAULT, VM_PROT_ALL, - VM_INHERIT_DEFAULT); - if (result != KERN_SUCCESS) - return (result); - - - if (!findspace) { - vm_offset_t dst_addr; - vm_map_copy_t tmp; - - if (copyin(va, &dst_addr, sizeof (dst_addr)) || - trunc_page_32(dst_addr) != dst_addr) { - (void) vm_map_remove( - my_map, - map_addr, map_addr + map_size, - VM_MAP_NO_FLAGS); - return (KERN_INVALID_ADDRESS); + vp = (vnode_t)vnodeaddr; + if ((vnode_getwithvid(vp, vid)) == 0) { + MALLOC_ZONE(vpath, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (vpath == NULL) { + vnode_put(vp); + return ENOMEM; } - result = vm_map_copyin( - my_map, - map_addr, map_size, TRUE, - &tmp); - if (result != KERN_SUCCESS) { - - (void) vm_map_remove( - my_map, - map_addr, map_addr + map_size, - VM_MAP_NO_FLAGS); - return (result); + len = MAXPATHLEN; + ret = vn_getpath(vp, vpath, &len); + if (ret) { + FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI); + vnode_put(vp); + return ret; } - result = vm_map_copy_overwrite( - my_map, - dst_addr, tmp, FALSE); - if (result != KERN_SUCCESS) { - vm_map_copy_discard(tmp); - return (result); - } + vnode_put(vp); } else { - if (copyout(&map_addr, va, sizeof (map_addr))) { - (void) vm_map_remove( - my_map, - map_addr, map_addr + map_size, - VM_MAP_NO_FLAGS); - return (KERN_INVALID_ADDRESS); + return EINVAL; + } + +#if 0 + kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n", + __FUNCTION__, vpath, cryptid, cputype, cpusubtype, (uint64_t)user_addr, (uint64_t)user_size); +#endif + + /* set up decrypter first */ + crypt_file_data_t crypt_data = { + .filename = vpath, + .cputype = cputype, + .cpusubtype = cpusubtype + }; + result = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data); +#if VM_MAP_DEBUG_APPLE_PROTECT + if (vm_map_debug_apple_protect) { + printf("APPLE_PROTECT: %d[%s] map %p [0x%llx:0x%llx] %s(%s) -> 0x%x\n", + p->p_pid, p->p_comm, + user_map, + (uint64_t) user_addr, + (uint64_t) (user_addr + user_size), + __FUNCTION__, vpath, result); + } +#endif /* VM_MAP_DEBUG_APPLE_PROTECT */ + FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI); + + if (result) { + printf("%s: unable to create decrypter %s, kr=%d\n", + __FUNCTION__, cryptname, result); + if (result == kIOReturnNotPrivileged) { + /* text encryption returned decryption failure */ + return EPERM; + } else { + return ENOMEM; } } - ubc_setcred(vp, current_proc()); - ubc_map(vp); + /* now remap using the decrypter */ + vm_object_offset_t crypto_backing_offset; + crypto_backing_offset = -1; /* i.e. use map entry's offset */ + result = vm_map_apple_protected(user_map, + user_addr, + user_addr + user_size, + crypto_backing_offset, + &crypt_info); + if (result) { + printf("%s: mapping failed with %d\n", __FUNCTION__, result); + } - return (KERN_SUCCESS); + if (result) { + return EPERM; + } + return 0; } +#endif /* CONFIG_CODE_DECRYPTION */