X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/de355530ae67247cbd0da700edb3a2a1dae884c2..4d15aeb193b2c68f1d38666c317f8d3734f5f083:/bsd/vm/vm_unix.c?ds=sidebyside diff --git a/bsd/vm/vm_unix.c b/bsd/vm/vm_unix.c index 77909659c..69aeca4ab 100644 --- a/bsd/vm/vm_unix.c +++ b/bsd/vm/vm_unix.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * Mach Operating System @@ -25,69 +31,307 @@ * All rights reserved. The CMU software License Agreement specifies * the terms and conditions for use and redistribution. */ - /* + * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. */ - #include +#include + #include #include #include -#include +#include +#include +#include +#include +#include +#include +#include #include +#include #include #include -#include +#include #include #include #include #include -#include +#include +#include #include #include -#include -#include +#include #include #include #include -#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include #include #include #include +#include #include -#include -#include +#include +#include + +#include + +#include + + +int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t); +int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *); + +#if VM_MAP_DEBUG_APPLE_PROTECT +SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, ""); +#endif /* VM_MAP_DEBUG_APPLE_PROTECT */ + +#if VM_MAP_DEBUG_FOURK +SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, ""); +#endif /* VM_MAP_DEBUG_FOURK */ + +#if DEVELOPMENT || DEBUG + +static int +sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + vm_offset_t kaddr; + kern_return_t kr; + int error = 0; + int size = 0; + + error = sysctl_handle_int(oidp, &size, 0, req); + if (error || !req->newptr) + return (error); + + kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size, 0, 0, 0, 0, VM_KERN_MEMORY_IOKIT); + + if (kr == KERN_SUCCESS) + kmem_free(kernel_map, kaddr, size); + + return error; +} + +SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, + 0, 0, &sysctl_kmem_alloc_contig, "I", ""); + +extern int vm_region_footprint; +SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_region_footprint, 0, ""); + +#endif /* DEVELOPMENT || DEBUG */ + + + +#if DEVELOPMENT || DEBUG +extern int radar_20146450; +SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, ""); + +extern int macho_printf; +SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, ""); + +extern int apple_protect_pager_data_request_debug; +SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, ""); + + +#endif /* DEVELOPMENT || DEBUG */ + +SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, ""); +#if VM_SCAN_FOR_SHADOW_CHAIN +static int vm_shadow_max_enabled = 0; /* Disabled by default */ +extern int proc_shadow_max(void); +static int +vm_shadow_max SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2, oidp) + int value = 0; + + if (vm_shadow_max_enabled) + value = proc_shadow_max(); + + return SYSCTL_OUT(req, &value, sizeof(value)); +} +SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED, + 0, 0, &vm_shadow_max, "I", ""); + +SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, ""); + +#endif /* VM_SCAN_FOR_SHADOW_CHAIN */ + +SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, ""); + +__attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__( + mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid); +/* + * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c + */ + +#if DEVELOPMENT || DEBUG +extern int allow_stack_exec, allow_data_exec; + +SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, ""); + +#endif /* DEVELOPMENT || DEBUG */ + +static const char *prot_values[] = { + "none", + "read-only", + "write-only", + "read-write", + "execute-only", + "read-execute", + "write-execute", + "read-write-execute" +}; + +void +log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot) +{ + printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n", + current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]); +} + +/* + * shared_region_unnest_logging: level of logging of unnesting events + * 0 - no logging + * 1 - throttled logging of unexpected unnesting events (default) + * 2 - unthrottled logging of unexpected unnesting events + * 3+ - unthrottled logging of all unnesting events + */ +int shared_region_unnest_logging = 1; + +SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED, + &shared_region_unnest_logging, 0, ""); + +int vm_shared_region_unnest_log_interval = 10; +int shared_region_unnest_log_count_threshold = 5; + +/* + * Shared cache path enforcement. + */ + +static int scdir_enforce = 1; +static char scdir_path[] = "/var/db/dyld/"; + +#ifndef SECURE_KERNEL +SYSCTL_INT(_vm, OID_AUTO, enforce_shared_cache_dir, CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, ""); +#endif + +/* These log rate throttling state variables aren't thread safe, but + * are sufficient unto the task. + */ +static int64_t last_unnest_log_time = 0; +static int shared_region_unnest_log_count = 0; + +void +log_unnest_badness( + vm_map_t m, + vm_map_offset_t s, + vm_map_offset_t e, + boolean_t is_nested_map, + vm_map_offset_t lowest_unnestable_addr) +{ + struct timeval tv; + + if (shared_region_unnest_logging == 0) + return; + + if (shared_region_unnest_logging <= 2 && + is_nested_map && + s >= lowest_unnestable_addr) { + /* + * Unnesting of writable map entries is fine. + */ + return; + } + if (shared_region_unnest_logging <= 1) { + microtime(&tv); + if ((tv.tv_sec - last_unnest_log_time) < + vm_shared_region_unnest_log_interval) { + if (shared_region_unnest_log_count++ > + shared_region_unnest_log_count_threshold) + return; + } else { + last_unnest_log_time = tv.tv_sec; + shared_region_unnest_log_count = 0; + } + } -extern shared_region_mapping_t system_shared_region; -extern zone_t lsf_zone; + DTRACE_VM4(log_unnest_badness, + vm_map_t, m, + vm_map_offset_t, s, + vm_map_offset_t, e, + vm_map_offset_t, lowest_unnestable_addr); + printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, current_proc()->p_pid, (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m)); +} -useracc(addr, len, prot) - caddr_t addr; - u_int len; - int prot; +int +useracc( + user_addr_t addr, + user_size_t len, + int prot) { + vm_map_t map; + + map = current_map(); return (vm_map_check_protection( - current_map(), - trunc_page(addr), round_page(addr+len), + map, + vm_map_trunc_page(addr, + vm_map_page_mask(map)), + vm_map_round_page(addr+len, + vm_map_page_mask(map)), prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE)); } -vslock(addr, len) - caddr_t addr; - int len; +int +vslock( + user_addr_t addr, + user_size_t len) { -kern_return_t kret; - kret = vm_map_wire(current_map(), trunc_page(addr), - round_page(addr+len), - VM_PROT_READ | VM_PROT_WRITE ,FALSE); + kern_return_t kret; + vm_map_t map; + + map = current_map(); + kret = vm_map_wire(map, + vm_map_trunc_page(addr, + vm_map_page_mask(map)), + vm_map_round_page(addr+len, + vm_map_page_mask(map)), + VM_PROT_READ | VM_PROT_WRITE | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_BSD), + FALSE); switch (kret) { case KERN_SUCCESS: @@ -102,23 +346,29 @@ kern_return_t kret; } } -vsunlock(addr, len, dirtied) - caddr_t addr; - int len; - int dirtied; +int +vsunlock( + user_addr_t addr, + user_size_t len, + __unused int dirtied) { - pmap_t pmap; #if FIXME /* [ */ + pmap_t pmap; vm_page_t pg; + vm_map_offset_t vaddr; + ppnum_t paddr; #endif /* FIXME ] */ - vm_offset_t vaddr, paddr; - kern_return_t kret; + kern_return_t kret; + vm_map_t map; + + map = current_map(); #if FIXME /* [ */ if (dirtied) { pmap = get_task_pmap(current_task()); - for (vaddr = trunc_page(addr); vaddr < round_page(addr+len); - vaddr += PAGE_SIZE) { + for (vaddr = vm_map_trunc_page(addr, PAGE_MASK); + vaddr < vm_map_round_page(addr+len, PAGE_MASK); + vaddr += PAGE_SIZE) { paddr = pmap_extract(pmap, vaddr); pg = PHYS_TO_VM_PAGE(paddr); vm_page_set_modified(pg); @@ -128,8 +378,12 @@ vsunlock(addr, len, dirtied) #ifdef lint dirtied++; #endif /* lint */ - kret = vm_map_unwire(current_map(), trunc_page(addr), - round_page(addr+len), FALSE); + kret = vm_map_unwire(map, + vm_map_trunc_page(addr, + vm_map_page_mask(map)), + vm_map_round_page(addr+len, + vm_map_page_mask(map)), + FALSE); switch (kret) { case KERN_SUCCESS: return (0); @@ -143,11 +397,10 @@ vsunlock(addr, len, dirtied) } } -#if defined(sun) || BALANCE || defined(m88k) -#else /*defined(sun) || BALANCE || defined(m88k)*/ -subyte(addr, byte) - void * addr; - int byte; +int +subyte( + user_addr_t addr, + int byte) { char character; @@ -155,18 +408,18 @@ subyte(addr, byte) return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1); } -suibyte(addr, byte) - void * addr; - int byte; +int +suibyte( + user_addr_t addr, + int byte) { char character; character = (char)byte; - return (copyout((void *) &(character), addr, sizeof(char)) == 0 ? 0 : -1); + return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1); } -int fubyte(addr) - void * addr; +int fubyte(user_addr_t addr) { unsigned char byte; @@ -175,8 +428,7 @@ int fubyte(addr) return(byte); } -int fuibyte(addr) - void * addr; +int fuibyte(user_addr_t addr) { unsigned char byte; @@ -185,17 +437,17 @@ int fuibyte(addr) return(byte); } -suword(addr, word) - void * addr; - long word; +int +suword( + user_addr_t addr, + long word) { return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1); } -long fuword(addr) - void * addr; +long fuword(user_addr_t addr) { - long word; + long word = 0; if (copyin(addr, (void *) &word, sizeof(int))) return(-1); @@ -204,45 +456,115 @@ long fuword(addr) /* suiword and fuiword are the same as suword and fuword, respectively */ -suiword(addr, word) - void * addr; - long word; +int +suiword( + user_addr_t addr, + long word) { return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1); } -long fuiword(addr) - void * addr; +long fuiword(user_addr_t addr) { - long word; + long word = 0; if (copyin(addr, (void *) &word, sizeof(int))) return(-1); return(word); } -#endif /* defined(sun) || BALANCE || defined(m88k) || defined(i386) */ +/* + * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the + * fetching and setting of process-sized size_t and pointer values. + */ int -swapon() +sulong(user_addr_t addr, int64_t word) { - return(EOPNOTSUPP); + + if (IS_64BIT_PROCESS(current_proc())) { + return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1); + } else { + return(suiword(addr, (long)word)); + } } +int64_t +fulong(user_addr_t addr) +{ + int64_t longword; + if (IS_64BIT_PROCESS(current_proc())) { + if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) + return(-1); + return(longword); + } else { + return((int64_t)fuiword(addr)); + } +} + +int +suulong(user_addr_t addr, uint64_t uword) +{ + + if (IS_64BIT_PROCESS(current_proc())) { + return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1); + } else { + return(suiword(addr, (uint32_t)uword)); + } +} + +uint64_t +fuulong(user_addr_t addr) +{ + uint64_t ulongword; + + if (IS_64BIT_PROCESS(current_proc())) { + if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) + return(-1ULL); + return(ulongword); + } else { + return((uint64_t)fuiword(addr)); + } +} + +int +swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval) +{ + return(ENOTSUP); +} + +/* + * pid_for_task + * + * Find the BSD process ID for the Mach task associated with the given Mach port + * name + * + * Parameters: args User argument descriptor (see below) + * + * Indirect parameters: args->t Mach port name + * args->pid Process ID (returned value; see below) + * + * Returns: KERL_SUCCESS Success + * KERN_FAILURE Not success + * + * Implicit returns: args->pid Process ID + * + */ kern_return_t -pid_for_task(t, x) - mach_port_t t; - int *x; +pid_for_task( + struct pid_for_task_args *args) { - struct proc * p; + mach_port_name_t t = args->t; + user_addr_t pid_addr = args->pid; + proc_t p; task_t t1; - extern task_t port_name_to_task(mach_port_t t); int pid = -1; kern_return_t err = KERN_SUCCESS; - boolean_t funnel_state; - funnel_state = thread_funnel_set(kernel_flock, TRUE); - t1 = port_name_to_task(t); + AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK); + AUDIT_ARG(mach_port1, t); + + t1 = port_name_to_task_inspect(t); if (t1 == TASK_NULL) { err = KERN_FAILURE; @@ -250,19 +572,121 @@ pid_for_task(t, x) } else { p = get_bsdtask_info(t1); if (p) { - pid = p->p_pid; + pid = proc_pid(p); err = KERN_SUCCESS; - } else { + } else if (is_corpsetask(t1)) { + pid = task_pid(t1); + err = KERN_SUCCESS; + }else { err = KERN_FAILURE; } } task_deallocate(t1); pftout: - (void) copyout((char *) &pid, (char *) x, sizeof(*x)); - thread_funnel_set(kernel_flock, funnel_state); + AUDIT_ARG(pid, pid); + (void) copyout((char *) &pid, pid_addr, sizeof(int)); + AUDIT_MACH_SYSCALL_EXIT(err); return(err); } +/* + * + * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self + * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication + * + */ +static int tfp_policy = KERN_TFP_POLICY_DEFAULT; + +/* + * Routine: task_for_pid_posix_check + * Purpose: + * Verify that the current process should be allowed to + * get the target process's task port. This is only + * permitted if: + * - The current process is root + * OR all of the following are true: + * - The target process's real, effective, and saved uids + * are the same as the current proc's euid, + * - The target process's group set is a subset of the + * calling process's group set, and + * - The target process hasn't switched credentials. + * + * Returns: TRUE: permitted + * FALSE: denied + */ +static int +task_for_pid_posix_check(proc_t target) +{ + kauth_cred_t targetcred, mycred; + uid_t myuid; + int allowed; + + /* No task_for_pid on bad targets */ + if (target->p_stat == SZOMB) { + return FALSE; + } + + mycred = kauth_cred_get(); + myuid = kauth_cred_getuid(mycred); + + /* If we're running as root, the check passes */ + if (kauth_cred_issuser(mycred)) + return TRUE; + + /* We're allowed to get our own task port */ + if (target == current_proc()) + return TRUE; + + /* + * Under DENY, only root can get another proc's task port, + * so no more checks are needed. + */ + if (tfp_policy == KERN_TFP_POLICY_DENY) { + return FALSE; + } + + targetcred = kauth_cred_proc_ref(target); + allowed = TRUE; + + /* Do target's ruid, euid, and saved uid match my euid? */ + if ((kauth_cred_getuid(targetcred) != myuid) || + (kauth_cred_getruid(targetcred) != myuid) || + (kauth_cred_getsvuid(targetcred) != myuid)) { + allowed = FALSE; + goto out; + } + + /* Are target's groups a subset of my groups? */ + if (kauth_cred_gid_subset(targetcred, mycred, &allowed) || + allowed == 0) { + allowed = FALSE; + goto out; + } + + /* Has target switched credentials? */ + if (target->p_flag & P_SUGID) { + allowed = FALSE; + goto out; + } + +out: + kauth_cred_unref(&targetcred); + return allowed; +} + +/* + * __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__ + * + * Description: Waits for the user space daemon to respond to the request + * we made. Function declared non inline to be visible in + * stackshots and spindumps as well as debugging. + */ +__attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__( + mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid) +{ + return check_task_access(task_access_port, calling_pid, calling_gid, target_pid); +} + /* * Routine: task_for_pid * Purpose: @@ -271,1492 +695,1231 @@ pftout: * * Only permitted to privileged processes, or processes * with the same user ID. + * + * Note: if pid == 0, an error is return no matter who is calling. + * + * XXX This should be a BSD system call, not a Mach trap!!! */ kern_return_t -task_for_pid(target_tport, pid, t) - mach_port_t target_tport; - int pid; - mach_port_t *t; +task_for_pid( + struct task_for_pid_args *args) { - struct proc *p; - struct proc *p1; - task_t t1; - mach_port_t tret; - extern task_t port_name_to_task(mach_port_t tp); + mach_port_name_t target_tport = args->target_tport; + int pid = args->pid; + user_addr_t task_addr = args->t; + proc_t p = PROC_NULL; + task_t t1 = TASK_NULL; + mach_port_name_t tret = MACH_PORT_NULL; + ipc_port_t tfpport; void * sright; int error = 0; - boolean_t funnel_state; + + AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID); + AUDIT_ARG(pid, pid); + AUDIT_ARG(mach_port1, target_tport); + + /* Always check if pid == 0 */ + if (pid == 0) { + (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); + AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); + return(KERN_FAILURE); + } t1 = port_name_to_task(target_tport); if (t1 == TASK_NULL) { - (void ) copyout((char *)&t1, (char *)t, sizeof(mach_port_t)); + (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); + AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); return(KERN_FAILURE); } - funnel_state = thread_funnel_set(kernel_flock, TRUE); - - restart: - p1 = get_bsdtask_info(t1); - if ( - ((p = pfind(pid)) != (struct proc *) 0) - && (p1 != (struct proc *) 0) - && (((p->p_ucred->cr_uid == p1->p_ucred->cr_uid) && - ((p->p_cred->p_ruid == p1->p_cred->p_ruid))) - || !(suser(p1->p_ucred, &p1->p_acflag))) - && (p->p_stat != SZOMB) - ) { - if (p->task != TASK_NULL) { - if (!task_reference_try(p->task)) { - mutex_pause(); /* temp loss of funnel */ - goto restart; - } - sright = (void *)convert_task_to_port(p->task); - tret = (void *) - ipc_port_copyout_send(sright, - get_task_ipcspace(current_task())); - } else - tret = MACH_PORT_NULL; - (void ) copyout((char *)&tret, (char *) t, sizeof(mach_port_t)); - task_deallocate(t1); - error = KERN_SUCCESS; - goto tfpout; - } - task_deallocate(t1); - tret = MACH_PORT_NULL; - (void) copyout((char *) &tret, (char *) t, sizeof(mach_port_t)); - error = KERN_FAILURE; -tfpout: - thread_funnel_set(kernel_flock, funnel_state); - return(error); -} + p = proc_find(pid); + if (p == PROC_NULL) { + error = KERN_FAILURE; + goto tfpout; + } -struct load_shared_file_args { - char *filename; - caddr_t mfa; - u_long mfs; - caddr_t *ba; - int map_cnt; - sf_mapping_t *mappings; - int *flags; -}; - -int ws_disabled = 1; - -int -load_shared_file( - struct proc *p, - struct load_shared_file_args *uap, - register *retval) -{ - caddr_t mapped_file_addr=uap->mfa; - u_long mapped_file_size=uap->mfs; - caddr_t *base_address=uap->ba; - int map_cnt=uap->map_cnt; - sf_mapping_t *mappings=uap->mappings; - char *filename=uap->filename; - int *flags=uap->flags; - struct vnode *vp = 0; - struct nameidata nd, *ndp; - char *filename_str; - register int error; - kern_return_t kr; - - struct vattr vattr; - memory_object_control_t file_control; - sf_mapping_t *map_list; - caddr_t local_base; - int local_flags; - int caller_flags; - int i; - int default_regions = 0; - vm_size_t dummy; - kern_return_t kret; +#if CONFIG_AUDIT + AUDIT_ARG(process, p); +#endif - shared_region_mapping_t shared_region; - struct shared_region_task_mappings task_mapping_info; - shared_region_mapping_t next; + if (!(task_for_pid_posix_check(p))) { + error = KERN_FAILURE; + goto tfpout; + } - ndp = &nd; + if (p->task != TASK_NULL) { + /* If we aren't root and target's task access port is set... */ + if (!kauth_cred_issuser(kauth_cred_get()) && + p != current_proc() && + (task_get_task_access_port(p->task, &tfpport) == 0) && + (tfpport != IPC_PORT_NULL)) { + if (tfpport == IPC_PORT_DEAD) { + error = KERN_PROTECTION_FAILURE; + goto tfpout; + } - /* Retrieve the base address */ - if (error = copyin(base_address, &local_base, sizeof (caddr_t))) { - goto lsf_bailout; - } - if (error = copyin(flags, &local_flags, sizeof (int))) { - goto lsf_bailout; - } + /* Call up to the task access server */ + error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid); - if(local_flags & QUERY_IS_SYSTEM_REGION) { - vm_get_shared_region(current_task(), &shared_region); - if (shared_region == system_shared_region) { - local_flags = SYSTEM_REGION_BACKED; - } else { - local_flags = 0; + if (error != MACH_MSG_SUCCESS) { + if (error == MACH_RCV_INTERRUPTED) + error = KERN_ABORTED; + else + error = KERN_FAILURE; + goto tfpout; } - error = 0; - error = copyout(&local_flags, flags, sizeof (int)); - goto lsf_bailout; - } - caller_flags = local_flags; - kret = kmem_alloc(kernel_map, (vm_offset_t *)&filename_str, - (vm_size_t)(MAXPATHLEN)); - if (kret != KERN_SUCCESS) { - error = ENOMEM; - goto lsf_bailout; } - kret = kmem_alloc(kernel_map, (vm_offset_t *)&map_list, - (vm_size_t)(map_cnt*sizeof(sf_mapping_t))); - if (kret != KERN_SUCCESS) { - kmem_free(kernel_map, (vm_offset_t)filename_str, - (vm_size_t)(MAXPATHLEN)); - error = ENOMEM; - goto lsf_bailout; +#if CONFIG_MACF + error = mac_proc_check_get_task(kauth_cred_get(), p); + if (error) { + error = KERN_FAILURE; + goto tfpout; } +#endif - if (error = - copyin(mappings, map_list, (map_cnt*sizeof(sf_mapping_t)))) { - goto lsf_bailout_free; - } + /* Grant task port access */ + task_reference(p->task); + extmod_statistics_incr_task_for_pid(p->task); - if (error = copyinstr(filename, - filename_str, MAXPATHLEN, (size_t *)&dummy)) { - goto lsf_bailout_free; - } + sright = (void *) convert_task_to_port(p->task); + tret = ipc_port_copyout_send( + sright, + get_task_ipcspace(current_task())); + } + error = KERN_SUCCESS; - /* - * Get a vnode for the target file - */ - NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, - filename_str, p); +tfpout: + task_deallocate(t1); + AUDIT_ARG(mach_port2, tret); + (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); + if (p != PROC_NULL) + proc_rele(p); + AUDIT_MACH_SYSCALL_EXIT(error); + return(error); +} - if ((error = namei(ndp))) { - goto lsf_bailout_free; - } +/* + * Routine: task_name_for_pid + * Purpose: + * Get the task name port for another "process", named by its + * process ID on the same host as "target_task". + * + * Only permitted to privileged processes, or processes + * with the same user ID. + * + * XXX This should be a BSD system call, not a Mach trap!!! + */ - vp = ndp->ni_vp; +kern_return_t +task_name_for_pid( + struct task_name_for_pid_args *args) +{ + mach_port_name_t target_tport = args->target_tport; + int pid = args->pid; + user_addr_t task_addr = args->t; + proc_t p = PROC_NULL; + task_t t1; + mach_port_name_t tret; + void * sright; + int error = 0, refheld = 0; + kauth_cred_t target_cred; - if (vp->v_type != VREG) { - error = EINVAL; - goto lsf_bailout_free_vput; - } + AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID); + AUDIT_ARG(pid, pid); + AUDIT_ARG(mach_port1, target_tport); - UBCINFOCHECK("load_shared_file", vp); + t1 = port_name_to_task(target_tport); + if (t1 == TASK_NULL) { + (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); + AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); + return(KERN_FAILURE); + } - if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) { - goto lsf_bailout_free_vput; - } + p = proc_find(pid); + if (p != PROC_NULL) { + AUDIT_ARG(process, p); + target_cred = kauth_cred_proc_ref(p); + refheld = 1; + if ((p->p_stat != SZOMB) + && ((current_proc() == p) + || kauth_cred_issuser(kauth_cred_get()) + || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) && + ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) { - file_control = ubc_getobject(vp, UBC_HOLDOBJECT); - if (file_control == MEMORY_OBJECT_CONTROL_NULL) { - error = EINVAL; - goto lsf_bailout_free_vput; - } - - vm_get_shared_region(current_task(), &shared_region); - if(shared_region == system_shared_region) { - default_regions = 1; - } - if(((vp->v_mount != rootvnode->v_mount) - && (shared_region == system_shared_region)) - && (lsf_mapping_pool_gauge() < 75)) { - /* We don't want to run out of shared memory */ - /* map entries by starting too many private versions */ - /* of the shared library structures */ - int error; - if(p->p_flag & P_NOSHLIB) { - error = clone_system_shared_regions(FALSE); - } else { - error = clone_system_shared_regions(TRUE); - } - if (error) { - goto lsf_bailout_free_vput; + if (p->task != TASK_NULL) { + task_reference(p->task); +#if CONFIG_MACF + error = mac_proc_check_get_task_name(kauth_cred_get(), p); + if (error) { + task_deallocate(p->task); + goto noperm; + } +#endif + sright = (void *)convert_task_name_to_port(p->task); + tret = ipc_port_copyout_send(sright, + get_task_ipcspace(current_task())); + } else + tret = MACH_PORT_NULL; + + AUDIT_ARG(mach_port2, tret); + (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t)); + task_deallocate(t1); + error = KERN_SUCCESS; + goto tnfpout; } - local_flags = local_flags & ~NEW_LOCAL_SHARED_REGIONS; - vm_get_shared_region(current_task(), &shared_region); } -#ifdef notdef - if(vattr.va_size != mapped_file_size) { - error = EINVAL; - goto lsf_bailout_free_vput; + +#if CONFIG_MACF +noperm: +#endif + task_deallocate(t1); + tret = MACH_PORT_NULL; + (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); + error = KERN_FAILURE; +tnfpout: + if (refheld != 0) + kauth_cred_unref(&target_cred); + if (p != PROC_NULL) + proc_rele(p); + AUDIT_MACH_SYSCALL_EXIT(error); + return(error); +} + +kern_return_t +pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret) +{ + task_t target = NULL; + proc_t targetproc = PROC_NULL; + int pid = args->pid; + int error = 0; + +#if CONFIG_MACF + error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND); + if (error) { + error = EPERM; + goto out; } #endif - if(p->p_flag & P_NOSHLIB) { - p->p_flag = p->p_flag & ~P_NOSHLIB; + + if (pid == 0) { + error = EPERM; + goto out; } - /* load alternate regions if the caller has requested. */ - /* Note: the new regions are "clean slates" */ - if (local_flags & NEW_LOCAL_SHARED_REGIONS) { - error = clone_system_shared_regions(FALSE); - if (error) { - goto lsf_bailout_free_vput; - } - vm_get_shared_region(current_task(), &shared_region); - } - - task_mapping_info.self = (vm_offset_t)shared_region; - - shared_region_mapping_info(shared_region, - &(task_mapping_info.text_region), - &(task_mapping_info.text_size), - &(task_mapping_info.data_region), - &(task_mapping_info.data_size), - &(task_mapping_info.region_mappings), - &(task_mapping_info.client_base), - &(task_mapping_info.alternate_base), - &(task_mapping_info.alternate_next), - &(task_mapping_info.flags), &next); - - /* This is a work-around to allow executables which have been */ - /* built without knowledge of the proper shared segment to */ - /* load. This code has been architected as a shared region */ - /* handler, the knowledge of where the regions are loaded is */ - /* problematic for the extension of shared regions as it will */ - /* not be easy to know what region an item should go into. */ - /* The code below however will get around a short term problem */ - /* with executables which believe they are loading at zero. */ - - { - if (((unsigned int)local_base & - (~(task_mapping_info.text_size - 1))) != - task_mapping_info.client_base) { - if(local_flags & ALTERNATE_LOAD_SITE) { - local_base = (caddr_t)( - (unsigned int)local_base & - (task_mapping_info.text_size - 1)); - local_base = (caddr_t)((unsigned int)local_base - | task_mapping_info.client_base); - } else { - error = EINVAL; - goto lsf_bailout_free_vput; - } - } + targetproc = proc_find(pid); + if (targetproc == PROC_NULL) { + error = ESRCH; + goto out; + } + + if (!task_for_pid_posix_check(targetproc)) { + error = EPERM; + goto out; } + target = targetproc->task; + if (target != TASK_NULL) { + mach_port_t tfpport; - if((kr = copyin_shared_file((vm_offset_t)mapped_file_addr, - mapped_file_size, - (vm_offset_t *)&local_base, - map_cnt, map_list, file_control, - &task_mapping_info, &local_flags))) { - switch (kr) { - case KERN_FAILURE: - error = EINVAL; - break; - case KERN_INVALID_ARGUMENT: - error = EINVAL; - break; - case KERN_INVALID_ADDRESS: + /* If we aren't root and target's task access port is set... */ + if (!kauth_cred_issuser(kauth_cred_get()) && + targetproc != current_proc() && + (task_get_task_access_port(target, &tfpport) == 0) && + (tfpport != IPC_PORT_NULL)) { + + if (tfpport == IPC_PORT_DEAD) { error = EACCES; - break; - case KERN_PROTECTION_FAILURE: - /* save EAUTH for authentication in this */ - /* routine */ - error = EPERM; - break; - case KERN_NO_SPACE: - error = ENOMEM; - break; - default: - error = EINVAL; - }; - if((caller_flags & ALTERNATE_LOAD_SITE) && systemLogDiags) { - printf("load_shared_file: Failed to load shared file! error: 0x%x, Base_address: 0x%x, number of mappings: %d, file_control 0x%x\n", error, local_base, map_cnt, file_control); - for(i=0; iba; - int map_cnt=uap->map_cnt; - sf_mapping_t *mappings=uap->mappings; - register int error; - kern_return_t kr; - - sf_mapping_t *map_list; - caddr_t local_base; - vm_offset_t map_address; - int i; - kern_return_t kret; - - /* Retrieve the base address */ - if (error = copyin(base_address, &local_base, sizeof (caddr_t))) { - goto rsf_bailout; - } + task_t target = NULL; + proc_t targetproc = PROC_NULL; + int pid = args->pid; + int error = 0; + +#if CONFIG_MACF + error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME); + if (error) { + error = EPERM; + goto out; + } +#endif - if (((unsigned int)local_base & GLOBAL_SHARED_SEGMENT_MASK) - != GLOBAL_SHARED_TEXT_SEGMENT) { - error = EINVAL; - goto rsf_bailout; + if (pid == 0) { + error = EPERM; + goto out; } - kret = kmem_alloc(kernel_map, (vm_offset_t *)&map_list, - (vm_size_t)(map_cnt*sizeof(sf_mapping_t))); - if (kret != KERN_SUCCESS) { - error = ENOMEM; - goto rsf_bailout; - } + targetproc = proc_find(pid); + if (targetproc == PROC_NULL) { + error = ESRCH; + goto out; + } - if (error = - copyin(mappings, map_list, (map_cnt*sizeof(sf_mapping_t)))) { - - kmem_free(kernel_map, (vm_offset_t)map_list, - (vm_size_t)(map_cnt*sizeof(sf_mapping_t))); - goto rsf_bailout; - } - for (i = 0; itask; + if (target != TASK_NULL) { + mach_port_t tfpport; -rsf_bailout: - return error; -} + /* If we aren't root and target's task access port is set... */ + if (!kauth_cred_issuser(kauth_cred_get()) && + targetproc != current_proc() && + (task_get_task_access_port(target, &tfpport) == 0) && + (tfpport != IPC_PORT_NULL)) { -struct new_system_shared_regions_args { - int dummy; -}; + if (tfpport == IPC_PORT_DEAD) { + error = EACCES; + goto out; + } -int -new_system_shared_regions( - struct proc *p, - struct new_system_shared_regions_args *uap, - register *retval) -{ - shared_region_mapping_t regions; - shared_region_mapping_t new_regions; + /* Call up to the task access server */ + error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid); - if(!(is_suser())) { - *retval = EINVAL; - return EINVAL; + if (error != MACH_MSG_SUCCESS) { + if (error == MACH_RCV_INTERRUPTED) + error = EINTR; + else + error = EPERM; + goto out; + } + } } - /* get current shared region info for */ - /* restoration after new system shared */ - /* regions are in place */ - vm_get_shared_region(current_task(), ®ions); - /* usually only called at boot time */ - /* shared_file_boot_time_init creates */ - /* a new set of system shared regions */ - /* and places them as the system */ - /* shared regions. */ - shared_file_boot_time_init(); + task_reference(target); - /* set current task back to its */ - /* original regions. */ - vm_get_shared_region(current_task(), &new_regions); - shared_region_mapping_dealloc(new_regions); +#if CONFIG_MEMORYSTATUS + memorystatus_on_resume(targetproc); +#endif - vm_set_shared_region(current_task(), regions); + error = task_pidresume(target); + if (error) { + if (error == KERN_INVALID_ARGUMENT) { + error = EINVAL; + } else { + if (error == KERN_MEMORY_ERROR) { + psignal(targetproc, SIGKILL); + error = EIO; + } else + error = EPERM; + } + } + + task_deallocate(target); - *retval = 0; - return 0; +out: + if (targetproc != PROC_NULL) + proc_rele(targetproc); + + *ret = error; + return error; } - -int -clone_system_shared_regions(shared_regions_active) +static int +sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1, + __unused int arg2, struct sysctl_req *req) { - shared_region_mapping_t new_shared_region; - shared_region_mapping_t next; - shared_region_mapping_t old_shared_region; - struct shared_region_task_mappings old_info; - struct shared_region_task_mappings new_info; + int error = 0; + int new_value; - struct proc *p; + error = SYSCTL_OUT(req, arg1, sizeof(int)); + if (error || req->newptr == USER_ADDR_NULL) + return(error); - if (shared_file_create_system_region(&new_shared_region)) - return (ENOMEM); - vm_get_shared_region(current_task(), &old_shared_region); - old_info.self = (vm_offset_t)old_shared_region; - shared_region_mapping_info(old_shared_region, - &(old_info.text_region), - &(old_info.text_size), - &(old_info.data_region), - &(old_info.data_size), - &(old_info.region_mappings), - &(old_info.client_base), - &(old_info.alternate_base), - &(old_info.alternate_next), - &(old_info.flags), &next); - new_info.self = (vm_offset_t)new_shared_region; - shared_region_mapping_info(new_shared_region, - &(new_info.text_region), - &(new_info.text_size), - &(new_info.data_region), - &(new_info.data_size), - &(new_info.region_mappings), - &(new_info.client_base), - &(new_info.alternate_base), - &(new_info.alternate_next), - &(new_info.flags), &next); - if(shared_regions_active) { - if(vm_region_clone(old_info.text_region, new_info.text_region)) { - panic("clone_system_shared_regions: shared region mis-alignment 1"); - shared_region_mapping_dealloc(new_shared_region); - return(EINVAL); - } - if (vm_region_clone(old_info.data_region, new_info.data_region)) { - panic("clone_system_shared_regions: shared region mis-alignment 2"); - shared_region_mapping_dealloc(new_shared_region); - return(EINVAL); - } - shared_region_object_chain_attach( - new_shared_region, old_shared_region); - } - if (vm_map_region_replace(current_map(), old_info.text_region, - new_info.text_region, old_info.client_base, - old_info.client_base+old_info.text_size)) { - panic("clone_system_shared_regions: shared region mis-alignment 3"); - shared_region_mapping_dealloc(new_shared_region); - return(EINVAL); - } - if(vm_map_region_replace(current_map(), old_info.data_region, - new_info.data_region, - old_info.client_base + old_info.text_size, - old_info.client_base - + old_info.text_size + old_info.data_size)) { - panic("clone_system_shared_regions: shared region mis-alignment 4"); - shared_region_mapping_dealloc(new_shared_region); - return(EINVAL); - } - vm_set_shared_region(current_task(), new_shared_region); - - /* consume the reference which wasn't accounted for in object */ - /* chain attach */ - if(!shared_regions_active) - shared_region_mapping_dealloc(old_shared_region); - - return(0); + if (!kauth_cred_issuser(kauth_cred_get())) + return(EPERM); + + if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) { + goto out; + } + if ((new_value == KERN_TFP_POLICY_DENY) + || (new_value == KERN_TFP_POLICY_DEFAULT)) + tfp_policy = new_value; + else + error = EINVAL; +out: + return(error); } -extern vm_map_t bsd_pageable_map; +#if defined(SECURE_KERNEL) +static int kern_secure_kernel = 1; +#else +static int kern_secure_kernel = 0; +#endif -/* header for the profile name file. The profiled app info is held */ -/* in the data file and pointed to by elements in the name file */ +SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, ""); -struct profile_names_header { - unsigned int number_of_profiles; - unsigned int user_id; - unsigned int version; - off_t element_array; - unsigned int spare1; - unsigned int spare2; - unsigned int spare3; -}; +SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp"); +SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy"); -struct profile_element { - off_t addr; - vm_size_t size; - unsigned int mod_date; - unsigned int inode; - char name[12]; -}; +SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED, + &shared_region_trace_level, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED, + &shared_region_version, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED, + &shared_region_persistence, 0, ""); -struct global_profile { - struct vnode *names_vp; - struct vnode *data_vp; - vm_offset_t buf_ptr; - unsigned int user; - unsigned int age; - unsigned int busy; -}; +/* + * shared_region_check_np: + * + * This system call is intended for dyld. + * + * dyld calls this when any process starts to see if the process's shared + * region is already set up and ready to use. + * This call returns the base address of the first mapping in the + * process's shared region's first mapping. + * dyld will then check what's mapped at that address. + * + * If the shared region is empty, dyld will then attempt to map the shared + * cache file in the shared region via the shared_region_map_np() system call. + * + * If something's already mapped in the shared region, dyld will check if it + * matches the shared cache it would like to use for that process. + * If it matches, evrything's ready and the process can proceed and use the + * shared region. + * If it doesn't match, dyld will unmap the shared region and map the shared + * cache into the process's address space via mmap(). + * + * ERROR VALUES + * EINVAL no shared region + * ENOMEM shared region is empty + * EFAULT bad address for "start_address" + */ +int +shared_region_check_np( + __unused struct proc *p, + struct shared_region_check_np_args *uap, + __unused int *retvalp) +{ + vm_shared_region_t shared_region; + mach_vm_offset_t start_address = 0; + int error; + kern_return_t kr; -struct global_profile_cache { - int max_ele; - unsigned int age; - struct global_profile profiles[3]; -}; + SHARED_REGION_TRACE_DEBUG( + ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (uint64_t)uap->start_address)); + + /* retrieve the current tasks's shared region */ + shared_region = vm_shared_region_get(current_task()); + if (shared_region != NULL) { + /* retrieve address of its first mapping... */ + kr = vm_shared_region_start_address(shared_region, + &start_address); + if (kr != KERN_SUCCESS) { + error = ENOMEM; + } else { + /* ... and give it to the caller */ + error = copyout(&start_address, + (user_addr_t) uap->start_address, + sizeof (start_address)); + if (error) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] " + "check_np(0x%llx) " + "copyout(0x%llx) error %d\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (uint64_t)uap->start_address, (uint64_t)start_address, + error)); + } + } + vm_shared_region_deallocate(shared_region); + } else { + /* no shared region ! */ + error = EINVAL; + } -struct global_profile_cache global_user_profile_cache = - {3, 0, NULL, NULL, NULL, 0, 0, 0, - NULL, NULL, NULL, 0, 0, 0, - NULL, NULL, NULL, 0, 0, 0 }; + SHARED_REGION_TRACE_DEBUG( + ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (uint64_t)uap->start_address, (uint64_t)start_address, error)); + + return error; +} -/* BSD_OPEN_PAGE_CACHE_FILES: */ -/* Caller provides a user id. This id was used in */ -/* prepare_profile_database to create two unique absolute */ -/* file paths to the associated profile files. These files */ -/* are either opened or bsd_open_page_cache_files returns an */ -/* error. The header of the names file is then consulted. */ -/* The header and the vnodes for the names and data files are */ -/* returned. */ int -bsd_open_page_cache_files( - unsigned int user, - struct global_profile **profile) +shared_region_copyin_mappings( + struct proc *p, + user_addr_t user_mappings, + unsigned int mappings_count, + struct shared_file_mapping_np *mappings) { - char *cache_path = "/var/vm/app_profile/"; - struct proc *p; - int error; - int resid; - off_t resid_off; - unsigned int lru; - vm_size_t size; - - struct vnode *names_vp; - struct vnode *data_vp; - vm_offset_t names_buf; - vm_offset_t buf_ptr; - - int profile_names_length; - int profile_data_length; - char *profile_data_string; - char *profile_names_string; - char *substring; - - struct vattr vattr; - - struct profile_names_header *profile_header; - kern_return_t ret; - - struct nameidata nd_names; - struct nameidata nd_data; - - int i; - - - p = current_proc(); - -restart: - for(i = 0; ibusy) { - /* - * drop funnel and wait - */ - (void)tsleep((void *) - *profile, - PRIBIO, "app_profile", 0); - goto restart; - } - (*profile)->busy = 1; - (*profile)->age = global_user_profile_cache.age; - global_user_profile_cache.age+=1; - return 0; - } + int error = 0; + vm_size_t mappings_size = 0; + + /* get the list of mappings the caller wants us to establish */ + mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0])); + error = copyin(user_mappings, + mappings, + mappings_size); + if (error) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(): " + "copyin(0x%llx, %d) failed (error=%d)\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (uint64_t)user_mappings, mappings_count, error)); } - - lru = global_user_profile_cache.age; - for(i = 0; iage = global_user_profile_cache.age; - global_user_profile_cache.age+=1; - break; - } - if(global_user_profile_cache.profiles[i].age < lru) { - lru = global_user_profile_cache.profiles[i].age; - *profile = &global_user_profile_cache.profiles[i]; - } + return error; +} +/* + * shared_region_map_np() + * + * This system call is intended for dyld. + * + * dyld uses this to map a shared cache file into a shared region. + * This is usually done only the first time a shared cache is needed. + * Subsequent processes will just use the populated shared region without + * requiring any further setup. + */ +int +_shared_region_map_and_slide( + struct proc *p, + int fd, + uint32_t mappings_count, + struct shared_file_mapping_np *mappings, + uint32_t slide, + user_addr_t slide_start, + user_addr_t slide_size) +{ + int error; + kern_return_t kr; + struct fileproc *fp; + struct vnode *vp, *root_vp, *scdir_vp; + struct vnode_attr va; + off_t fs; + memory_object_size_t file_size; +#if CONFIG_MACF + vm_prot_t maxprot = VM_PROT_ALL; +#endif + memory_object_control_t file_control; + struct vm_shared_region *shared_region; + uint32_t i; + + SHARED_REGION_TRACE_DEBUG( + ("shared_region: %p [%d(%s)] -> map\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm)); + + shared_region = NULL; + fp = NULL; + vp = NULL; + scdir_vp = NULL; + + /* get file structure from file descriptor */ + error = fp_lookup(p, fd, &fp, 0); + if (error) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map: " + "fd=%d lookup failed (error=%d)\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, fd, error)); + goto done; } - if ((*profile)->busy) { - /* - * drop funnel and wait - */ - (void)tsleep((void *) - &(global_user_profile_cache), - PRIBIO, "app_profile", 0); - goto restart; - } - (*profile)->busy = 1; - (*profile)->user = user; - - if((*profile)->data_vp != NULL) { - kmem_free(kernel_map, - (*profile)->buf_ptr, 4 * PAGE_SIZE); - if ((*profile)->names_vp) { - vrele((*profile)->names_vp); - (*profile)->names_vp = NULL; - } - if ((*profile)->data_vp) { - vrele((*profile)->data_vp); - (*profile)->data_vp = NULL; - } + /* make sure we're attempting to map a vnode */ + if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map: " + "fd=%d not a vnode (type=%d)\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + fd, FILEGLOB_DTYPE(fp->f_fglob))); + error = EINVAL; + goto done; } - /* put dummy value in for now to get */ - /* competing request to wait above */ - /* until we are finished */ - (*profile)->data_vp = (struct vnode *)0xFFFFFFFF; - - /* Try to open the appropriate users profile files */ - /* If neither file is present, try to create them */ - /* If one file is present and the other not, fail. */ - /* If the files do exist, check them for the app_file */ - /* requested and read it in if present */ - - - ret = kmem_alloc(kernel_map, - (vm_offset_t *)&profile_data_string, PATH_MAX); - - if(ret) { - (*profile)->data_vp = NULL; - (*profile)->busy = 0; - wakeup(*profile); - return ENOMEM; - } - - /* Split the buffer in half since we know the size of */ - /* our file path and our allocation is adequate for */ - /* both file path names */ - profile_names_string = profile_data_string + (PATH_MAX/2); - - - strcpy(profile_data_string, cache_path); - strcpy(profile_names_string, cache_path); - profile_names_length = profile_data_length - = strlen(profile_data_string); - substring = profile_data_string + profile_data_length; - sprintf(substring, "%x_data", user); - substring = profile_names_string + profile_names_length; - sprintf(substring, "%x_names", user); - - /* We now have the absolute file names */ - - ret = kmem_alloc(kernel_map, - (vm_offset_t *)&names_buf, 4 * PAGE_SIZE); - if(ret) { - kmem_free(kernel_map, - (vm_offset_t)profile_data_string, PATH_MAX); - (*profile)->data_vp = NULL; - (*profile)->busy = 0; - wakeup(*profile); - return ENOMEM; - } - - NDINIT(&nd_names, LOOKUP, FOLLOW | LOCKLEAF, - UIO_SYSSPACE, profile_names_string, p); - NDINIT(&nd_data, LOOKUP, FOLLOW | LOCKLEAF, - UIO_SYSSPACE, profile_data_string, p); - if (error = vn_open(&nd_data, FREAD | FWRITE, 0)) { -#ifdef notdef - printf("bsd_open_page_cache_files: CacheData file not found %s\n", - profile_data_string); -#endif - kmem_free(kernel_map, - (vm_offset_t)names_buf, 4 * PAGE_SIZE); - kmem_free(kernel_map, - (vm_offset_t)profile_data_string, PATH_MAX); - (*profile)->data_vp = NULL; - (*profile)->busy = 0; - wakeup(*profile); - return error; + /* we need at least read permission on the file */ + if (! (fp->f_fglob->fg_flag & FREAD)) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map: " + "fd=%d not readable\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, fd)); + error = EPERM; + goto done; } - data_vp = nd_data.ni_vp; - VOP_UNLOCK(data_vp, 0, p); - - if (error = vn_open(&nd_names, FREAD | FWRITE, 0)) { - printf("bsd_open_page_cache_files: NamesData file not found %s\n", - profile_data_string); - kmem_free(kernel_map, - (vm_offset_t)names_buf, 4 * PAGE_SIZE); - kmem_free(kernel_map, - (vm_offset_t)profile_data_string, PATH_MAX); - vrele(data_vp); - (*profile)->data_vp = NULL; - (*profile)->busy = 0; - wakeup(*profile); - return error; + /* get vnode from file structure */ + error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data); + if (error) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map: " + "fd=%d getwithref failed (error=%d)\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, fd, error)); + goto done; } - names_vp = nd_names.ni_vp; - - if(error = VOP_GETATTR(names_vp, &vattr, p->p_ucred, p)) { - printf("bsd_open_page_cache_files: Can't stat name file %s\n", profile_names_string); - kmem_free(kernel_map, - (vm_offset_t)profile_data_string, PATH_MAX); - kmem_free(kernel_map, - (vm_offset_t)names_buf, 4 * PAGE_SIZE); - vput(names_vp); - vrele(data_vp); - (*profile)->data_vp = NULL; - (*profile)->busy = 0; - wakeup(*profile); - return error; + vp = (struct vnode *) fp->f_fglob->fg_data; + + /* make sure the vnode is a regular file */ + if (vp->v_type != VREG) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "not a file (type=%d)\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), + vp->v_name, vp->v_type)); + error = EINVAL; + goto done; } - size = vattr.va_size; - if(size > 4 * PAGE_SIZE) - size = 4 * PAGE_SIZE; - buf_ptr = names_buf; - resid_off = 0; +#if CONFIG_MACF + /* pass in 0 for the offset argument because AMFI does not need the offset + of the shared cache */ + error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()), + fp->f_fglob, VM_PROT_ALL, MAP_FILE, 0, &maxprot); + if (error) { + goto done; + } +#endif /* MAC */ - while(size) { - error = vn_rdwr(UIO_READ, names_vp, (caddr_t)buf_ptr, - size, resid_off, - UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid, p); - if((error) || (size == resid)) { - if(!error) { - error = EINVAL; - } - kmem_free(kernel_map, - (vm_offset_t)profile_data_string, PATH_MAX); - kmem_free(kernel_map, - (vm_offset_t)names_buf, 4 * PAGE_SIZE); - vput(names_vp); - vrele(data_vp); - (*profile)->data_vp = NULL; - (*profile)->busy = 0; - wakeup(*profile); - return error; - } - buf_ptr += size-resid; - resid_off += size-resid; - size = resid; + /* make sure vnode is on the process's root volume */ + root_vp = p->p_fd->fd_rdir; + if (root_vp == NULL) { + root_vp = rootvnode; + } else { + /* + * Chroot-ed processes can't use the shared_region. + */ + error = EINVAL; + goto done; } - - VOP_UNLOCK(names_vp, 0, p); - kmem_free(kernel_map, (vm_offset_t)profile_data_string, PATH_MAX); - (*profile)->names_vp = names_vp; - (*profile)->data_vp = data_vp; - (*profile)->buf_ptr = names_buf; - return 0; -} + if (vp->v_mount != root_vp->v_mount) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "not on process's root volume\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name)); + error = EPERM; + goto done; + } -void -bsd_close_page_cache_files( - struct global_profile *profile) -{ - profile->busy = 0; - wakeup(profile); -} + /* make sure vnode is owned by "root" */ + VATTR_INIT(&va); + VATTR_WANTED(&va, va_uid); + error = vnode_getattr(vp, &va, vfs_context_current()); + if (error) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "vnode_getattr(%p) failed (error=%d)\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, + (void *)VM_KERNEL_ADDRPERM(vp), error)); + goto done; + } + if (va.va_uid != 0) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "owned by uid=%d instead of 0\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), + vp->v_name, va.va_uid)); + error = EPERM; + goto done; + } -int -bsd_read_page_cache_file( - unsigned int user, - int *fid, - int *mod, - char *app_name, - struct vnode *app_vp, - vm_offset_t *buffer, - vm_offset_t *buf_size) -{ + if (scdir_enforce) { + /* get vnode for scdir_path */ + error = vnode_lookup(scdir_path, 0, &scdir_vp, vfs_context_current()); + if (error) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "vnode_lookup(%s) failed (error=%d)\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, + scdir_path, error)); + goto done; + } - boolean_t funnel_state; + /* ensure parent is scdir_vp */ + if (vnode_parent(vp) != scdir_vp) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "shared cache file not in %s\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), + vp->v_name, scdir_path)); + error = EPERM; + goto done; + } + } - struct proc *p; - int error; - int resid; - vm_size_t size; + /* get vnode size */ + error = vnode_size(vp, &fs, vfs_context_current()); + if (error) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "vnode_size(%p) failed (error=%d)\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, + (void *)VM_KERNEL_ADDRPERM(vp), error)); + goto done; + } + file_size = fs; - off_t profile; - unsigned int profile_size; + /* get the file's memory object handle */ + file_control = ubc_getobject(vp, UBC_HOLDOBJECT); + if (file_control == MEMORY_OBJECT_CONTROL_NULL) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "no memory object\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name)); + error = EINVAL; + goto done; + } - vm_offset_t names_buf; - struct vattr vattr; + /* check that the mappings are properly covered by code signatures */ + if (!cs_enforcement(NULL)) { + /* code signing is not enforced: no need to check */ + } else for (i = 0; i < mappings_count; i++) { + if (mappings[i].sfm_init_prot & VM_PROT_ZF) { + /* zero-filled mapping: not backed by the file */ + continue; + } + if (ubc_cs_is_range_codesigned(vp, + mappings[i].sfm_file_offset, + mappings[i].sfm_size)) { + /* this mapping is fully covered by code signatures */ + continue; + } + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] " + "is not code-signed\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, + i, mappings_count, + mappings[i].sfm_address, + mappings[i].sfm_size, + mappings[i].sfm_file_offset, + mappings[i].sfm_max_prot, + mappings[i].sfm_init_prot)); + error = EINVAL; + goto done; + } - kern_return_t ret; + /* get the process's shared region (setup in vm_map_exec()) */ + shared_region = vm_shared_region_get(current_task()); + if (shared_region == NULL) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "no shared region\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name)); + goto done; + } - struct vnode *names_vp; - struct vnode *data_vp; - struct vnode *vp1; - struct vnode *vp2; + /* map the file into that shared region's submap */ + kr = vm_shared_region_map_file(shared_region, + mappings_count, + mappings, + file_control, + file_size, + (void *) p->p_fd->fd_rdir, + slide, + slide_start, + slide_size); + if (kr != KERN_SUCCESS) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "vm_shared_region_map_file() failed kr=0x%x\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, kr)); + switch (kr) { + case KERN_INVALID_ADDRESS: + error = EFAULT; + break; + case KERN_PROTECTION_FAILURE: + error = EPERM; + break; + case KERN_NO_SPACE: + error = ENOMEM; + break; + case KERN_FAILURE: + case KERN_INVALID_ARGUMENT: + default: + error = EINVAL; + break; + } + goto done; + } - struct global_profile *uid_files; + error = 0; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + vnode_lock_spin(vp); - /* Try to open the appropriate users profile files */ - /* If neither file is present, try to create them */ - /* If one file is present and the other not, fail. */ - /* If the files do exist, check them for the app_file */ - /* requested and read it in if present */ + vp->v_flag |= VSHARED_DYLD; + vnode_unlock(vp); - error = bsd_open_page_cache_files(user, &uid_files); - if(error) { - thread_funnel_set(kernel_flock, funnel_state); - return EINVAL; + /* update the vnode's access time */ + if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) { + VATTR_INIT(&va); + nanotime(&va.va_access_time); + VATTR_SET_ACTIVE(&va, va_access_time); + vnode_setattr(vp, &va, vfs_context_current()); } - p = current_proc(); - - names_vp = uid_files->names_vp; - data_vp = uid_files->data_vp; - names_buf = uid_files->buf_ptr; - - - /* - * Get locks on both files, get the vnode with the lowest address first - */ + if (p->p_flag & P_NOSHLIB) { + /* signal that this process is now using split libraries */ + OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag); + } - if((unsigned int)names_vp < (unsigned int)data_vp) { - vp1 = names_vp; - vp2 = data_vp; - } else { - vp1 = data_vp; - vp2 = names_vp; - } - error = vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY, p); - if(error) { - printf("bsd_read_page_cache_file: Can't lock profile names %x\n", user); - bsd_close_page_cache_files(uid_files); - thread_funnel_set(kernel_flock, funnel_state); - return error; +done: + if (vp != NULL) { + /* + * release the vnode... + * ubc_map() still holds it for us in the non-error case + */ + (void) vnode_put(vp); + vp = NULL; } - error = vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY, p); - if(error) { - printf("bsd_read_page_cache_file: Can't lock profile data %x\n", user); - VOP_UNLOCK(vp1, 0, p); - bsd_close_page_cache_files(uid_files); - thread_funnel_set(kernel_flock, funnel_state); - return error; + if (fp != NULL) { + /* release the file descriptor */ + fp_drop(p, fd, fp, 0); + fp = NULL; + } + if (scdir_vp != NULL) { + (void)vnode_put(scdir_vp); + scdir_vp = NULL; } - if(error = VOP_GETATTR(app_vp, &vattr, p->p_ucred, p)) { - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); - printf("bsd_read_cache_file: Can't stat app file %s\n", app_name); - bsd_close_page_cache_files(uid_files); - thread_funnel_set(kernel_flock, funnel_state); - return error; + if (shared_region != NULL) { + vm_shared_region_deallocate(shared_region); } - *fid = vattr.va_fileid; - *mod = vattr.va_mtime.tv_sec; - + SHARED_REGION_TRACE_DEBUG( + ("shared_region: %p [%d(%s)] <- map\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm)); - if (bsd_search_page_cache_data_base(names_vp, names_buf, app_name, - (unsigned int) vattr.va_mtime.tv_sec, - vattr.va_fileid, &profile, &profile_size) == 0) { - /* profile is an offset in the profile data base */ - /* It is zero if no profile data was found */ - - if(profile_size == 0) { - *buffer = NULL; - *buf_size = 0; - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); - bsd_close_page_cache_files(uid_files); - thread_funnel_set(kernel_flock, funnel_state); - return 0; - } - ret = (vm_offset_t)(kmem_alloc(kernel_map, buffer, profile_size)); - if(ret) { - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); - bsd_close_page_cache_files(uid_files); - thread_funnel_set(kernel_flock, funnel_state); - return ENOMEM; - } - *buf_size = profile_size; - while(profile_size) { - error = vn_rdwr(UIO_READ, data_vp, - (caddr_t) *buffer, profile_size, - profile, UIO_SYSSPACE, IO_NODELOCKED, - p->p_ucred, &resid, p); - if((error) || (profile_size == resid)) { - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); - bsd_close_page_cache_files(uid_files); - kmem_free(kernel_map, (vm_offset_t)*buffer, profile_size); - thread_funnel_set(kernel_flock, funnel_state); - return EINVAL; - } - profile += profile_size - resid; - profile_size = resid; - } - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); - bsd_close_page_cache_files(uid_files); - thread_funnel_set(kernel_flock, funnel_state); - return 0; - } else { - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); - bsd_close_page_cache_files(uid_files); - thread_funnel_set(kernel_flock, funnel_state); - return EINVAL; - } - + return error; } int -bsd_search_page_cache_data_base( - struct vnode *vp, - struct profile_names_header *database, - char *app_name, - unsigned int mod_date, - unsigned int inode, - off_t *profile, - unsigned int *profile_size) +shared_region_map_and_slide_np( + struct proc *p, + struct shared_region_map_and_slide_np_args *uap, + __unused int *retvalp) { + struct shared_file_mapping_np *mappings; + unsigned int mappings_count = uap->count; + kern_return_t kr = KERN_SUCCESS; + uint32_t slide = uap->slide; + +#define SFM_MAX_STACK 8 + struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK]; - struct proc *p; + /* Is the process chrooted?? */ + if (p->p_fd->fd_rdir != NULL) { + kr = EINVAL; + goto done; + } + + if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) { + if (kr == KERN_INVALID_ARGUMENT) { + /* + * This will happen if we request sliding again + * with the same slide value that was used earlier + * for the very first sliding. + */ + kr = KERN_SUCCESS; + } + goto done; + } - unsigned int i; - struct profile_element *element; - unsigned int ele_total; - unsigned int extended_list = 0; - off_t file_off = 0; - unsigned int size; - off_t resid_off; - int resid; - vm_offset_t local_buf = NULL; + if (mappings_count == 0) { + SHARED_REGION_TRACE_INFO( + ("shared_region: %p [%d(%s)] map(): " + "no mappings\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm)); + kr = 0; /* no mappings: we're done ! */ + goto done; + } else if (mappings_count <= SFM_MAX_STACK) { + mappings = &stack_mappings[0]; + } else { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(): " + "too many mappings (%d)\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + mappings_count)); + kr = KERN_FAILURE; + goto done; + } - int error; - kern_return_t ret; + if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) { + goto done; + } - p = current_proc(); - if(((vm_offset_t)database->element_array) != - sizeof(struct profile_names_header)) { - return EINVAL; + kr = _shared_region_map_and_slide(p, uap->fd, mappings_count, mappings, + slide, + uap->slide_start, uap->slide_size); + if (kr != KERN_SUCCESS) { + return kr; } - element = (struct profile_element *)( - (vm_offset_t)database->element_array + - (vm_offset_t)database); - ele_total = database->number_of_profiles; - - *profile = 0; - *profile_size = 0; - while(ele_total) { - /* note: code assumes header + n*ele comes out on a page boundary */ - if(((local_buf == 0) && (sizeof(struct profile_names_header) + - (ele_total * sizeof(struct profile_element))) - > (PAGE_SIZE * 4)) || - ((local_buf != 0) && - (ele_total * sizeof(struct profile_element)) - > (PAGE_SIZE * 4))) { - extended_list = ele_total; - if(element == (struct profile_element *) - ((vm_offset_t)database->element_array + - (vm_offset_t)database)) { - ele_total = ((PAGE_SIZE * 4)/sizeof(struct profile_element)) - 1; - } else { - ele_total = (PAGE_SIZE * 4)/sizeof(struct profile_element); - } - extended_list -= ele_total; - } - for (i=0; i - (PAGE_SIZE * 4)) { - size = PAGE_SIZE * 4; - } else { - size = ele_total * sizeof(struct profile_element); - } - resid_off = 0; - while(size) { - error = vn_rdwr(UIO_READ, vp, - (caddr_t)(local_buf + resid_off), - size, file_off + resid_off, UIO_SYSSPACE, - IO_NODELOCKED, p->p_ucred, &resid, p); - if((error) || (size == resid)) { - if(local_buf != NULL) { - kmem_free(kernel_map, - (vm_offset_t)local_buf, - 4 * PAGE_SIZE); - } - return EINVAL; - } - resid_off += size-resid; - size = resid; - } - } - if(local_buf != NULL) { - kmem_free(kernel_map, - (vm_offset_t)local_buf, 4 * PAGE_SIZE); - } - return 0; +done: + return kr; } -int -bsd_write_page_cache_file( - unsigned int user, - char *file_name, - caddr_t buffer, - vm_size_t size, - int mod, - int fid) -{ - struct proc *p; - struct nameidata nd; - struct vnode *vp = 0; - int resid; - off_t resid_off; - int error; - boolean_t funnel_state; - struct vattr vattr; - struct vattr data_vattr; +/* sysctl overflow room */ - off_t profile; - unsigned int profile_size; +SYSCTL_INT (_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, + (int *) &page_size, 0, "vm page size"); - vm_offset_t names_buf; - struct vnode *names_vp; - struct vnode *data_vp; - struct vnode *vp1; - struct vnode *vp2; +/* vm_page_free_target is provided as a makeshift solution for applications that want to + allocate buffer space, possibly purgeable memory, but not cause inactive pages to be + reclaimed. It allows the app to calculate how much memory is free outside the free target. */ +extern unsigned int vm_page_free_target; +SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_free_target, 0, "Pageout daemon free target"); - struct profile_names_header *profile_header; - off_t name_offset; +extern unsigned int vm_memory_pressure; +SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_memory_pressure, 0, "Memory pressure indicator"); - struct global_profile *uid_files; +static int +vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + unsigned int page_free_wanted; + page_free_wanted = mach_vm_ctl_page_free_wanted(); + return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted)); +} +SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, vm_ctl_page_free_wanted, "I", ""); + +extern unsigned int vm_page_purgeable_count; +SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_purgeable_count, 0, "Purgeable page count"); + +extern unsigned int vm_page_purgeable_wired_count; +SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_purgeable_wired_count, 0, "Wired purgeable page count"); + +extern unsigned int vm_pageout_purged_objects; +SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_pageout_purged_objects, 0, "System purged object count"); + +extern int madvise_free_debug; +SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED, + &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)"); + +SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_stats_reusable.reusable_count, 0, "Reusable page count"); +SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_stats_reusable.reusable_pages_success, ""); +SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_stats_reusable.reusable_pages_failure, ""); +SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_stats_reusable.reusable_pages_shared, ""); +SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_stats_reusable.all_reusable_calls, ""); +SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_stats_reusable.partial_reusable_calls, ""); +SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_stats_reusable.reuse_pages_success, ""); +SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_stats_reusable.reuse_pages_failure, ""); +SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_stats_reusable.all_reuse_calls, ""); +SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_stats_reusable.partial_reuse_calls, ""); +SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_stats_reusable.can_reuse_success, ""); +SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_stats_reusable.can_reuse_failure, ""); +SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_stats_reusable.reusable_reclaimed, ""); +SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_stats_reusable.reusable_nonwritable, ""); +SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_stats_reusable.reusable_shared, ""); +SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_stats_reusable.free_shared, ""); + + +extern unsigned int vm_page_free_count, vm_page_speculative_count; +SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, ""); + +extern unsigned int vm_page_cleaned_count; +SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size"); + +extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count; +SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, ""); + +/* pageout counts */ +extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used; +extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative; +SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, ""); + +extern unsigned int vm_pageout_freed_from_cleaned; +SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, ""); + +/* counts of pages entering the cleaned queue */ +extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty; +SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */ +SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, ""); + +/* counts of pages leaving the cleaned queue */ +extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock; +SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed"); +SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */ +SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated"); +SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated"); +SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated"); +SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated"); +SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)"); +SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)"); + +/* counts of pages prefaulted when entering a memory object */ +extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout; +SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, ""); +SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, ""); + +#if CONFIG_SECLUDED_MEMORY + +SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, ""); +extern unsigned int vm_page_secluded_target; +extern unsigned int vm_page_secluded_count; +extern unsigned int vm_page_secluded_count_free; +extern unsigned int vm_page_secluded_count_inuse; +SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, ""); + +extern struct vm_page_secluded_data vm_page_secluded; +SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, ""); +SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, ""); + +extern uint64_t vm_pageout_freed_from_secluded; +extern uint64_t vm_pageout_secluded_reactivated; +extern uint64_t vm_pageout_secluded_burst_count; +SYSCTL_QUAD(_vm, OID_AUTO, pageout_freed_from_secluded, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_secluded, ""); +SYSCTL_QUAD(_vm, OID_AUTO, pageout_secluded_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_secluded_reactivated, "Secluded pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */ +SYSCTL_QUAD(_vm, OID_AUTO, pageout_secluded_burst_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_secluded_burst_count, ""); + +#endif /* CONFIG_SECLUDED_MEMORY */ - funnel_state = thread_funnel_set(kernel_flock, TRUE); +#include +#include + +void vm_pageout_io_throttle(void); + +void vm_pageout_io_throttle(void) { + struct uthread *uthread = get_bsdthread_info(current_thread()); + + /* + * thread is marked as a low priority I/O type + * and the I/O we issued while in this cleaning operation + * collided with normal I/O operations... we'll + * delay in order to mitigate the impact of this + * task on the normal operation of the system + */ + + if (uthread->uu_lowpri_window) { + throttle_lowpri_io(1); + } +} +int +vm_pressure_monitor( + __unused struct proc *p, + struct vm_pressure_monitor_args *uap, + int *retval) +{ + kern_return_t kr; + uint32_t pages_reclaimed; + uint32_t pages_wanted; + + kr = mach_vm_pressure_monitor( + (boolean_t) uap->wait_for_pressure, + uap->nsecs_monitored, + (uap->pages_reclaimed) ? &pages_reclaimed : NULL, + &pages_wanted); - error = bsd_open_page_cache_files(user, &uid_files); - if(error) { - thread_funnel_set(kernel_flock, funnel_state); + switch (kr) { + case KERN_SUCCESS: + break; + case KERN_ABORTED: + return EINTR; + default: return EINVAL; } - p = current_proc(); + if (uap->pages_reclaimed) { + if (copyout((void *)&pages_reclaimed, + uap->pages_reclaimed, + sizeof (pages_reclaimed)) != 0) { + return EFAULT; + } + } - names_vp = uid_files->names_vp; - data_vp = uid_files->data_vp; - names_buf = uid_files->buf_ptr; + *retval = (int) pages_wanted; + return 0; +} - /* - * Get locks on both files, get the vnode with the lowest address first - */ +int +kas_info(struct proc *p, + struct kas_info_args *uap, + int *retval __unused) +{ +#ifdef SECURE_KERNEL + (void)p; + (void)uap; + return ENOTSUP; +#else /* !SECURE_KERNEL */ + int selector = uap->selector; + user_addr_t valuep = uap->value; + user_addr_t sizep = uap->size; + user_size_t size; + int error; - if((unsigned int)names_vp < (unsigned int)data_vp) { - vp1 = names_vp; - vp2 = data_vp; - } else { - vp1 = data_vp; - vp2 = names_vp; + if (!kauth_cred_issuser(kauth_cred_get())) { + return EPERM; } - error = vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY, p); - if(error) { - printf("bsd_write_page_cache_file: Can't lock profile names %x\n", user); - bsd_close_page_cache_files(uid_files); - thread_funnel_set(kernel_flock, funnel_state); +#if CONFIG_MACF + error = mac_system_check_kas_info(kauth_cred_get(), selector); + if (error) { return error; } - error = vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY, p); - if(error) { - printf("bsd_write_page_cache_file: Can't lock profile data %x\n", user); - VOP_UNLOCK(vp1, 0, p); - bsd_close_page_cache_files(uid_files); - thread_funnel_set(kernel_flock, funnel_state); +#endif + + if (IS_64BIT_PROCESS(p)) { + user64_size_t size64; + error = copyin(sizep, &size64, sizeof(size64)); + size = (user_size_t)size64; + } else { + user32_size_t size32; + error = copyin(sizep, &size32, sizeof(size32)); + size = (user_size_t)size32; + } + if (error) { return error; } - /* Stat data file for size */ + switch (selector) { + case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR: + { + uint64_t slide = vm_kernel_slide; - if(error = VOP_GETATTR(data_vp, &data_vattr, p->p_ucred, p)) { - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); - printf("bsd_write_page_cache_file: Can't stat profile data %s\n", file_name); - bsd_close_page_cache_files(uid_files); - thread_funnel_set(kernel_flock, funnel_state); - return error; - } - - if (bsd_search_page_cache_data_base(names_vp, - (struct profile_names_header *)names_buf, - file_name, (unsigned int) mod, - fid, &profile, &profile_size) == 0) { - /* profile is an offset in the profile data base */ - /* It is zero if no profile data was found */ - - if(profile_size == 0) { - unsigned int header_size; - vm_offset_t buf_ptr; - - /* Our Write case */ - - /* read header for last entry */ - profile_header = - (struct profile_names_header *)names_buf; - name_offset = sizeof(struct profile_names_header) + - (sizeof(struct profile_element) - * profile_header->number_of_profiles); - profile_header->number_of_profiles += 1; - - if(name_offset < PAGE_SIZE * 4) { - struct profile_element *name; - /* write new entry */ - name = (struct profile_element *) - (names_buf + (vm_offset_t)name_offset); - name->addr = data_vattr.va_size; - name->size = size; - name->mod_date = mod; - name->inode = fid; - strncpy (name->name, file_name, 12); - } else { - unsigned int ele_size; - struct profile_element name; - /* write new entry */ - name.addr = data_vattr.va_size; - name.size = size; - name.mod_date = mod; - name.inode = fid; - strncpy (name.name, file_name, 12); - /* write element out separately */ - ele_size = sizeof(struct profile_element); - buf_ptr = (vm_offset_t)&name; - resid_off = name_offset; - - while(ele_size) { - error = vn_rdwr(UIO_WRITE, names_vp, - (caddr_t)buf_ptr, - ele_size, resid_off, - UIO_SYSSPACE, IO_NODELOCKED, - p->p_ucred, &resid, p); - if(error) { - printf("bsd_write_page_cache_file: Can't write name_element %x\n", user); - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); - bsd_close_page_cache_files( - uid_files); - thread_funnel_set( - kernel_flock, - funnel_state); - return error; - } - buf_ptr += (vm_offset_t) - ele_size-resid; - resid_off += ele_size-resid; - ele_size = resid; + if (sizeof(slide) != size) { + return EINVAL; } - } - - if(name_offset < PAGE_SIZE * 4) { - header_size = name_offset + - sizeof(struct profile_element); - } else { - header_size = - sizeof(struct profile_names_header); - } - buf_ptr = (vm_offset_t)profile_header; - resid_off = 0; - - /* write names file header */ - while(header_size) { - error = vn_rdwr(UIO_WRITE, names_vp, - (caddr_t)buf_ptr, - header_size, resid_off, - UIO_SYSSPACE, IO_NODELOCKED, - p->p_ucred, &resid, p); - if(error) { - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); - printf("bsd_write_page_cache_file: Can't write header %x\n", user); - bsd_close_page_cache_files( - uid_files); - thread_funnel_set( - kernel_flock, funnel_state); + if (IS_64BIT_PROCESS(p)) { + user64_size_t size64 = (user64_size_t)size; + error = copyout(&size64, sizep, sizeof(size64)); + } else { + user32_size_t size32 = (user32_size_t)size; + error = copyout(&size32, sizep, sizeof(size32)); + } + if (error) { return error; } - buf_ptr += (vm_offset_t)header_size-resid; - resid_off += header_size-resid; - header_size = resid; - } - /* write profile to data file */ - resid_off = data_vattr.va_size; - while(size) { - error = vn_rdwr(UIO_WRITE, data_vp, - (caddr_t)buffer, size, resid_off, - UIO_SYSSPACE, IO_NODELOCKED, - p->p_ucred, &resid, p); - if(error) { - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); - printf("bsd_write_page_cache_file: Can't write header %x\n", user); - bsd_close_page_cache_files( - uid_files); - thread_funnel_set( - kernel_flock, funnel_state); + + error = copyout(&slide, valuep, sizeof(slide)); + if (error) { return error; } - buffer += size-resid; - resid_off += size-resid; - size = resid; } - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); - bsd_close_page_cache_files(uid_files); - thread_funnel_set(kernel_flock, funnel_state); - return 0; - } - /* Someone else wrote a twin profile before us */ - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); - bsd_close_page_cache_files(uid_files); - thread_funnel_set(kernel_flock, funnel_state); - return 0; - } else { - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); - bsd_close_page_cache_files(uid_files); - thread_funnel_set(kernel_flock, funnel_state); - return EINVAL; - } - -} - -int -prepare_profile_database(int user) -{ - char *cache_path = "/var/vm/app_profile/"; - struct proc *p; - int error; - int resid; - off_t resid_off; - unsigned int lru; - vm_size_t size; - - struct vnode *names_vp; - struct vnode *data_vp; - vm_offset_t names_buf; - vm_offset_t buf_ptr; - - int profile_names_length; - int profile_data_length; - char *profile_data_string; - char *profile_names_string; - char *substring; - - struct vattr vattr; - - struct profile_names_header *profile_header; - kern_return_t ret; - - struct nameidata nd_names; - struct nameidata nd_data; - - int i; - - p = current_proc(); - - ret = kmem_alloc(kernel_map, - (vm_offset_t *)&profile_data_string, PATH_MAX); - - if(ret) { - return ENOMEM; - } - - /* Split the buffer in half since we know the size of */ - /* our file path and our allocation is adequate for */ - /* both file path names */ - profile_names_string = profile_data_string + (PATH_MAX/2); - - - strcpy(profile_data_string, cache_path); - strcpy(profile_names_string, cache_path); - profile_names_length = profile_data_length - = strlen(profile_data_string); - substring = profile_data_string + profile_data_length; - sprintf(substring, "%x_data", user); - substring = profile_names_string + profile_names_length; - sprintf(substring, "%x_names", user); - - /* We now have the absolute file names */ - - ret = kmem_alloc(kernel_map, - (vm_offset_t *)&names_buf, 4 * PAGE_SIZE); - if(ret) { - kmem_free(kernel_map, - (vm_offset_t)profile_data_string, PATH_MAX); - return ENOMEM; - } - - NDINIT(&nd_names, LOOKUP, FOLLOW, - UIO_SYSSPACE, profile_names_string, p); - NDINIT(&nd_data, LOOKUP, FOLLOW, - UIO_SYSSPACE, profile_data_string, p); - - if (error = vn_open(&nd_data, - O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) { - kmem_free(kernel_map, - (vm_offset_t)names_buf, 4 * PAGE_SIZE); - kmem_free(kernel_map, - (vm_offset_t)profile_data_string, PATH_MAX); - return 0; - } - - data_vp = nd_data.ni_vp; - VOP_UNLOCK(data_vp, 0, p); - - if (error = vn_open(&nd_names, - O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) { - printf("prepare_profile_database: Can't create CacheNames %s\n", - profile_data_string); - kmem_free(kernel_map, - (vm_offset_t)names_buf, 4 * PAGE_SIZE); - kmem_free(kernel_map, - (vm_offset_t)profile_data_string, PATH_MAX); - vrele(data_vp); - return error; - } - - names_vp = nd_names.ni_vp; - - - /* Write Header for new names file */ - - profile_header = (struct profile_names_header *)names_buf; - - profile_header->number_of_profiles = 0; - profile_header->user_id = user; - profile_header->version = 1; - profile_header->element_array = - sizeof(struct profile_names_header); - profile_header->spare1 = 0; - profile_header->spare2 = 0; - profile_header->spare3 = 0; - - size = sizeof(struct profile_names_header); - buf_ptr = (vm_offset_t)profile_header; - resid_off = 0; - - while(size) { - error = vn_rdwr(UIO_WRITE, names_vp, - (caddr_t)buf_ptr, size, resid_off, - UIO_SYSSPACE, IO_NODELOCKED, - p->p_ucred, &resid, p); - if(error) { - printf("prepare_profile_database: Can't write header %s\n", profile_names_string); - kmem_free(kernel_map, - (vm_offset_t)names_buf, 4 * PAGE_SIZE); - kmem_free(kernel_map, - (vm_offset_t)profile_data_string, - PATH_MAX); - vput(names_vp); - vrele(data_vp); - return error; - } - buf_ptr += size-resid; - resid_off += size-resid; - size = resid; + break; + default: + return EINVAL; } - VATTR_NULL(&vattr); - vattr.va_uid = user; - error = VOP_SETATTR(names_vp, &vattr, p->p_cred->pc_ucred, p); - if(error) { - printf("prepare_profile_database: " - "Can't set user %s\n", profile_names_string); - } - vput(names_vp); - - error = vn_lock(data_vp, LK_EXCLUSIVE | LK_RETRY, p); - if(error) { - vrele(data_vp); - printf("prepare_profile_database: cannot lock data file %s\n", - profile_data_string); - kmem_free(kernel_map, - (vm_offset_t)profile_data_string, PATH_MAX); - kmem_free(kernel_map, - (vm_offset_t)names_buf, 4 * PAGE_SIZE); - } - VATTR_NULL(&vattr); - vattr.va_uid = user; - error = VOP_SETATTR(data_vp, &vattr, p->p_cred->pc_ucred, p); - if(error) { - printf("prepare_profile_database: " - "Can't set user %s\n", profile_data_string); - } - - vput(data_vp); - kmem_free(kernel_map, - (vm_offset_t)profile_data_string, PATH_MAX); - kmem_free(kernel_map, - (vm_offset_t)names_buf, 4 * PAGE_SIZE); return 0; - +#endif /* !SECURE_KERNEL */ }