/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2018 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
+ *
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
- *
+ *
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
+ *
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
- *
+ *
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
-/*
+/*
* Mach Operating System
* Copyright (c) 1987 Carnegie-Mellon University
* All rights reserved. The CMU software License Agreement specifies
* the terms and conditions for use and redistribution.
*/
-
/*
+ * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
+ * support for mandatory and extensible security protections. This notice
+ * is included in support of clause 2.2 (b) of the Apple Public License,
+ * Version 2.0.
*/
-
-
-#include <meta_features.h>
+#include <vm/vm_options.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <kern/debug.h>
-#include <kern/lock.h>
+#include <kern/extmod_statistics.h>
#include <mach/mach_traps.h>
+#include <mach/port.h>
+#include <mach/sdt.h>
+#include <mach/task.h>
+#include <mach/task_access.h>
+#include <mach/task_special_ports.h>
#include <mach/time_value.h>
#include <mach/vm_map.h>
#include <mach/vm_param.h>
#include <mach/vm_prot.h>
-#include <mach/port.h>
#include <sys/file_internal.h>
#include <sys/param.h>
#include <sys/sysproto.h>
#include <sys/mman.h>
#include <sys/sysctl.h>
-
-#include <bsm/audit_kernel.h>
+#include <sys/cprotect.h>
+#include <sys/kpi_socket.h>
+#include <sys/kas_info.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#if NECP
+#include <net/necp.h>
+#endif /* NECP */
+
+#include <security/audit/audit.h>
+#include <security/mac.h>
#include <bsm/audit_kevents.h>
#include <kern/kalloc.h>
#include <vm/vm_map.h>
#include <vm/vm_kern.h>
+#include <vm/vm_pageout.h>
-#include <machine/spl.h>
-
-#include <mach/shared_memory_server.h>
-#include <vm/vm_shared_memory_server.h>
+#include <mach/shared_region.h>
+#include <vm/vm_shared_region.h>
#include <vm/vm_protos.h>
+#include <sys/kern_memorystatus.h>
+
+#if CONFIG_MACF
+#include <security/mac_framework.h>
+#endif
+
+#if CONFIG_CSR
+#include <sys/csr.h>
+#endif /* CONFIG_CSR */
+
+int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t);
+int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
+
+#if VM_MAP_DEBUG_APPLE_PROTECT
+SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
+#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
+
+#if VM_MAP_DEBUG_FOURK
+SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "");
+#endif /* VM_MAP_DEBUG_FOURK */
+
+#if DEVELOPMENT || DEBUG
+
+static int
+sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+ vm_offset_t kaddr;
+ kern_return_t kr;
+ int error = 0;
+ int size = 0;
+
+ error = sysctl_handle_int(oidp, &size, 0, req);
+ if (error || !req->newptr) {
+ return error;
+ }
+
+ kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size, 0, 0, 0, 0, VM_KERN_MEMORY_IOKIT);
+
+ if (kr == KERN_SUCCESS) {
+ kmem_free(kernel_map, kaddr, size);
+ }
+
+ return error;
+}
+
+SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
+ 0, 0, &sysctl_kmem_alloc_contig, "I", "");
+
+extern int vm_region_footprint;
+SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
+static int
+sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2, oidp)
+ int error = 0;
+ int value;
+
+ value = task_self_region_footprint();
+ error = SYSCTL_OUT(req, &value, sizeof(int));
+ if (error) {
+ return error;
+ }
+
+ if (!req->newptr) {
+ return 0;
+ }
+
+ error = SYSCTL_IN(req, &value, sizeof(int));
+ if (error) {
+ return error;
+ }
+ task_self_region_footprint_set(value);
+ return 0;
+}
+SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
+
+#endif /* DEVELOPMENT || DEBUG */
+
+
+#if CONFIG_EMBEDDED
+
+#if DEVELOPMENT || DEBUG
+extern int panic_on_unsigned_execute;
+SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
+#endif /* DEVELOPMENT || DEBUG */
+
+extern int log_executable_mem_entry;
+extern int cs_executable_create_upl;
+extern int cs_executable_mem_entry;
+extern int cs_executable_wire;
+SYSCTL_INT(_vm, OID_AUTO, log_executable_mem_entry, CTLFLAG_RD | CTLFLAG_LOCKED, &log_executable_mem_entry, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, cs_executable_mem_entry, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_mem_entry, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
+#endif /* CONFIG_EMBEDDED */
+
+#if DEVELOPMENT || DEBUG
+extern int radar_20146450;
+SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
+
+extern int macho_printf;
+SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
+
+extern int apple_protect_pager_data_request_debug;
+SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
+
+#if __arm__ || __arm64__
+/* These are meant to support the page table accounting unit test. */
+extern unsigned int arm_hardware_page_size;
+extern unsigned int arm_pt_desc_size;
+extern unsigned int arm_pt_root_size;
+extern unsigned int free_page_size_tt_count;
+extern unsigned int free_two_page_size_tt_count;
+extern unsigned int free_tt_count;
+extern unsigned int inuse_user_tteroot_count;
+extern unsigned int inuse_kernel_tteroot_count;
+extern unsigned int inuse_user_ttepages_count;
+extern unsigned int inuse_kernel_ttepages_count;
+extern unsigned int inuse_user_ptepages_count;
+extern unsigned int inuse_kernel_ptepages_count;
+SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, free_2page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_two_page_size_tt_count, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
+#endif /* __arm__ || __arm64__ */
+
+#if __arm64__
+extern int fourk_pager_data_request_debug;
+SYSCTL_INT(_vm, OID_AUTO, fourk_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &fourk_pager_data_request_debug, 0, "");
+#endif /* __arm64__ */
+#endif /* DEVELOPMENT || DEBUG */
+
+SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
+#if VM_SCAN_FOR_SHADOW_CHAIN
+static int vm_shadow_max_enabled = 0; /* Disabled by default */
+extern int proc_shadow_max(void);
+static int
+vm_shadow_max SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2, oidp)
+ int value = 0;
+
+ if (vm_shadow_max_enabled) {
+ value = proc_shadow_max();
+ }
+
+ return SYSCTL_OUT(req, &value, sizeof(value));
+}
+SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
+ 0, 0, &vm_shadow_max, "I", "");
+
+SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
+
+#endif /* VM_SCAN_FOR_SHADOW_CHAIN */
+
+SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
+
+__attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
+ mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid);
+/*
+ * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
+ */
+
+#if DEVELOPMENT || DEBUG
+extern int allow_stack_exec, allow_data_exec;
+
+SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
+
+#endif /* DEVELOPMENT || DEBUG */
+
+static const char *prot_values[] = {
+ "none",
+ "read-only",
+ "write-only",
+ "read-write",
+ "execute-only",
+ "read-execute",
+ "write-execute",
+ "read-write-execute"
+};
+
void
-log_nx_failure(addr64_t vaddr, vm_prot_t prot)
+log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
+{
+ printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
+ current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
+}
+
+/*
+ * shared_region_unnest_logging: level of logging of unnesting events
+ * 0 - no logging
+ * 1 - throttled logging of unexpected unnesting events (default)
+ * 2 - unthrottled logging of unexpected unnesting events
+ * 3+ - unthrottled logging of all unnesting events
+ */
+int shared_region_unnest_logging = 1;
+
+SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
+ &shared_region_unnest_logging, 0, "");
+
+int vm_shared_region_unnest_log_interval = 10;
+int shared_region_unnest_log_count_threshold = 5;
+
+/*
+ * Shared cache path enforcement.
+ */
+
+#ifndef CONFIG_EMBEDDED
+static int scdir_enforce = 1;
+static char scdir_path[] = "/var/db/dyld/";
+#else
+static int scdir_enforce = 0;
+static char scdir_path[] = "/System/Library/Caches/com.apple.dyld/";
+#endif
+
+#ifndef SECURE_KERNEL
+static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
{
- printf("NX failure: %s - vaddr=%qx, prot=%x\n", current_proc()->p_comm, vaddr, prot);
+#if CONFIG_CSR
+ if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
+ printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
+ return EPERM;
+ }
+#endif /* CONFIG_CSR */
+ return sysctl_handle_int(oidp, arg1, arg2, req);
}
+SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
+#endif
+
+/* These log rate throttling state variables aren't thread safe, but
+ * are sufficient unto the task.
+ */
+static int64_t last_unnest_log_time = 0;
+static int shared_region_unnest_log_count = 0;
+
+void
+log_unnest_badness(
+ vm_map_t m,
+ vm_map_offset_t s,
+ vm_map_offset_t e,
+ boolean_t is_nested_map,
+ vm_map_offset_t lowest_unnestable_addr)
+{
+ struct timeval tv;
+
+ if (shared_region_unnest_logging == 0) {
+ return;
+ }
+
+ if (shared_region_unnest_logging <= 2 &&
+ is_nested_map &&
+ s >= lowest_unnestable_addr) {
+ /*
+ * Unnesting of writable map entries is fine.
+ */
+ return;
+ }
+
+ if (shared_region_unnest_logging <= 1) {
+ microtime(&tv);
+ if ((tv.tv_sec - last_unnest_log_time) <
+ vm_shared_region_unnest_log_interval) {
+ if (shared_region_unnest_log_count++ >
+ shared_region_unnest_log_count_threshold) {
+ return;
+ }
+ } else {
+ last_unnest_log_time = tv.tv_sec;
+ shared_region_unnest_log_count = 0;
+ }
+ }
+
+ DTRACE_VM4(log_unnest_badness,
+ vm_map_t, m,
+ vm_map_offset_t, s,
+ vm_map_offset_t, e,
+ vm_map_offset_t, lowest_unnestable_addr);
+ printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, current_proc()->p_pid, (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
+}
int
useracc(
- user_addr_t addr,
- user_size_t len,
- int prot)
+ user_addr_t addr,
+ user_size_t len,
+ int prot)
{
- return (vm_map_check_protection(
- current_map(),
- vm_map_trunc_page(addr), vm_map_round_page(addr+len),
- prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
+ vm_map_t map;
+
+ map = current_map();
+ return vm_map_check_protection(
+ map,
+ vm_map_trunc_page(addr,
+ vm_map_page_mask(map)),
+ vm_map_round_page(addr + len,
+ vm_map_page_mask(map)),
+ prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE);
}
int
vslock(
- user_addr_t addr,
- user_size_t len)
+ user_addr_t addr,
+ user_size_t len)
{
- kern_return_t kret;
- kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
- vm_map_round_page(addr+len),
- VM_PROT_READ | VM_PROT_WRITE ,FALSE);
+ kern_return_t kret;
+ vm_map_t map;
+
+ map = current_map();
+ kret = vm_map_wire_kernel(map,
+ vm_map_trunc_page(addr,
+ vm_map_page_mask(map)),
+ vm_map_round_page(addr + len,
+ vm_map_page_mask(map)),
+ VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_BSD,
+ FALSE);
switch (kret) {
case KERN_SUCCESS:
- return (0);
+ return 0;
case KERN_INVALID_ADDRESS:
case KERN_NO_SPACE:
- return (ENOMEM);
+ return ENOMEM;
case KERN_PROTECTION_FAILURE:
- return (EACCES);
+ return EACCES;
default:
- return (EINVAL);
+ return EINVAL;
}
}
__unused int dirtied)
{
#if FIXME /* [ */
- pmap_t pmap;
- vm_page_t pg;
- vm_map_offset_t vaddr;
- ppnum_t paddr;
+ pmap_t pmap;
+ vm_page_t pg;
+ vm_map_offset_t vaddr;
+ ppnum_t paddr;
#endif /* FIXME ] */
- kern_return_t kret;
+ kern_return_t kret;
+ vm_map_t map;
+
+ map = current_map();
#if FIXME /* [ */
if (dirtied) {
pmap = get_task_pmap(current_task());
- for (vaddr = vm_map_trunc_page(addr);
- vaddr < vm_map_round_page(addr+len);
- vaddr += PAGE_SIZE) {
+ for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
+ vaddr < vm_map_round_page(addr + len, PAGE_MASK);
+ vaddr += PAGE_SIZE) {
paddr = pmap_extract(pmap, vaddr);
pg = PHYS_TO_VM_PAGE(paddr);
vm_page_set_modified(pg);
}
}
#endif /* FIXME ] */
-#ifdef lint
+#ifdef lint
dirtied++;
-#endif /* lint */
- kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
- vm_map_round_page(addr+len), FALSE);
+#endif /* lint */
+ kret = vm_map_unwire(map,
+ vm_map_trunc_page(addr,
+ vm_map_page_mask(map)),
+ vm_map_round_page(addr + len,
+ vm_map_page_mask(map)),
+ FALSE);
switch (kret) {
case KERN_SUCCESS:
- return (0);
+ return 0;
case KERN_INVALID_ADDRESS:
case KERN_NO_SPACE:
- return (ENOMEM);
+ return ENOMEM;
case KERN_PROTECTION_FAILURE:
- return (EACCES);
+ return EACCES;
default:
- return (EINVAL);
+ return EINVAL;
}
}
int byte)
{
char character;
-
+
character = (char)byte;
- return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
+ return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
}
int
int byte)
{
char character;
-
+
character = (char)byte;
- return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
+ return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
}
-int fubyte(user_addr_t addr)
+int
+fubyte(user_addr_t addr)
{
unsigned char byte;
- if (copyin(addr, (void *) &byte, sizeof(char)))
- return(-1);
- return(byte);
+ if (copyin(addr, (void *) &byte, sizeof(char))) {
+ return -1;
+ }
+ return byte;
}
-int fuibyte(user_addr_t addr)
+int
+fuibyte(user_addr_t addr)
{
unsigned char byte;
- if (copyin(addr, (void *) &(byte), sizeof(char)))
- return(-1);
- return(byte);
+ if (copyin(addr, (void *) &(byte), sizeof(char))) {
+ return -1;
+ }
+ return byte;
}
int
user_addr_t addr,
long word)
{
- return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
+ return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
}
-long fuword(user_addr_t addr)
+long
+fuword(user_addr_t addr)
{
- long word;
+ long word = 0;
- if (copyin(addr, (void *) &word, sizeof(int)))
- return(-1);
- return(word);
+ if (copyin(addr, (void *) &word, sizeof(int))) {
+ return -1;
+ }
+ return word;
}
/* suiword and fuiword are the same as suword and fuword, respectively */
user_addr_t addr,
long word)
{
- return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
+ return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
}
-long fuiword(user_addr_t addr)
+long
+fuiword(user_addr_t addr)
{
- long word;
+ long word = 0;
- if (copyin(addr, (void *) &word, sizeof(int)))
- return(-1);
- return(word);
+ if (copyin(addr, (void *) &word, sizeof(int))) {
+ return -1;
+ }
+ return word;
}
/*
int
sulong(user_addr_t addr, int64_t word)
{
-
if (IS_64BIT_PROCESS(current_proc())) {
- return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
+ return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
} else {
- return(suiword(addr, (long)word));
+ return suiword(addr, (long)word);
}
}
int64_t longword;
if (IS_64BIT_PROCESS(current_proc())) {
- if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
- return(-1);
- return(longword);
+ if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
+ return -1;
+ }
+ return longword;
} else {
- return((int64_t)fuiword(addr));
+ return (int64_t)fuiword(addr);
}
}
int
suulong(user_addr_t addr, uint64_t uword)
{
-
if (IS_64BIT_PROCESS(current_proc())) {
- return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
+ return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
} else {
- return(suiword(addr, (u_long)uword));
+ return suiword(addr, (uint32_t)uword);
}
}
uint64_t ulongword;
if (IS_64BIT_PROCESS(current_proc())) {
- if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
- return(-1ULL);
- return(ulongword);
+ if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
+ return -1ULL;
+ }
+ return ulongword;
} else {
- return((uint64_t)fuiword(addr));
+ return (uint64_t)fuiword(addr);
}
}
int
-swapon(__unused struct proc *procp, __unused struct swapon_args *uap, __unused int *retval)
+swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
{
- return(ENOTSUP);
+ return ENOTSUP;
}
-
+/*
+ * pid_for_task
+ *
+ * Find the BSD process ID for the Mach task associated with the given Mach port
+ * name
+ *
+ * Parameters: args User argument descriptor (see below)
+ *
+ * Indirect parameters: args->t Mach port name
+ * args->pid Process ID (returned value; see below)
+ *
+ * Returns: KERL_SUCCESS Success
+ * KERN_FAILURE Not success
+ *
+ * Implicit returns: args->pid Process ID
+ *
+ */
kern_return_t
pid_for_task(
struct pid_for_task_args *args)
{
- mach_port_name_t t = args->t;
- user_addr_t pid_addr = args->pid;
- struct proc * p;
- task_t t1;
- int pid = -1;
- kern_return_t err = KERN_SUCCESS;
- boolean_t funnel_state;
+ mach_port_name_t t = args->t;
+ user_addr_t pid_addr = args->pid;
+ proc_t p;
+ task_t t1;
+ int pid = -1;
+ kern_return_t err = KERN_SUCCESS;
AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
AUDIT_ARG(mach_port1, t);
- funnel_state = thread_funnel_set(kernel_flock, TRUE);
- t1 = port_name_to_task(t);
+ t1 = port_name_to_task_inspect(t);
if (t1 == TASK_NULL) {
err = KERN_FAILURE;
if (p) {
pid = proc_pid(p);
err = KERN_SUCCESS;
+ } else if (is_corpsetask(t1)) {
+ pid = task_pid(t1);
+ err = KERN_SUCCESS;
} else {
err = KERN_FAILURE;
}
pftout:
AUDIT_ARG(pid, pid);
(void) copyout((char *) &pid, pid_addr, sizeof(int));
- thread_funnel_set(kernel_flock, funnel_state);
AUDIT_MACH_SYSCALL_EXIT(err);
- return(err);
+ return err;
+}
+
+/*
+ *
+ * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
+ * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
+ *
+ */
+static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
+
+/*
+ * Routine: task_for_pid_posix_check
+ * Purpose:
+ * Verify that the current process should be allowed to
+ * get the target process's task port. This is only
+ * permitted if:
+ * - The current process is root
+ * OR all of the following are true:
+ * - The target process's real, effective, and saved uids
+ * are the same as the current proc's euid,
+ * - The target process's group set is a subset of the
+ * calling process's group set, and
+ * - The target process hasn't switched credentials.
+ *
+ * Returns: TRUE: permitted
+ * FALSE: denied
+ */
+static int
+task_for_pid_posix_check(proc_t target)
+{
+ kauth_cred_t targetcred, mycred;
+ uid_t myuid;
+ int allowed;
+
+ /* No task_for_pid on bad targets */
+ if (target->p_stat == SZOMB) {
+ return FALSE;
+ }
+
+ mycred = kauth_cred_get();
+ myuid = kauth_cred_getuid(mycred);
+
+ /* If we're running as root, the check passes */
+ if (kauth_cred_issuser(mycred)) {
+ return TRUE;
+ }
+
+ /* We're allowed to get our own task port */
+ if (target == current_proc()) {
+ return TRUE;
+ }
+
+ /*
+ * Under DENY, only root can get another proc's task port,
+ * so no more checks are needed.
+ */
+ if (tfp_policy == KERN_TFP_POLICY_DENY) {
+ return FALSE;
+ }
+
+ targetcred = kauth_cred_proc_ref(target);
+ allowed = TRUE;
+
+ /* Do target's ruid, euid, and saved uid match my euid? */
+ if ((kauth_cred_getuid(targetcred) != myuid) ||
+ (kauth_cred_getruid(targetcred) != myuid) ||
+ (kauth_cred_getsvuid(targetcred) != myuid)) {
+ allowed = FALSE;
+ goto out;
+ }
+
+ /* Are target's groups a subset of my groups? */
+ if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
+ allowed == 0) {
+ allowed = FALSE;
+ goto out;
+ }
+
+ /* Has target switched credentials? */
+ if (target->p_flag & P_SUGID) {
+ allowed = FALSE;
+ goto out;
+ }
+
+out:
+ kauth_cred_unref(&targetcred);
+ return allowed;
+}
+
+/*
+ * __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
+ *
+ * Description: Waits for the user space daemon to respond to the request
+ * we made. Function declared non inline to be visible in
+ * stackshots and spindumps as well as debugging.
+ */
+__attribute__((noinline)) int
+__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
+ mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid)
+{
+ return check_task_access(task_access_port, calling_pid, calling_gid, target_pid);
}
/*
* Only permitted to privileged processes, or processes
* with the same user ID.
*
- * XXX This should be a BSD system call, not a Mach trap!!!
- */
-/*
- *
- * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
- * tfp_policy = KERN_TFP_POLICY_PERMISSIVE; Permissive Mode: all permissive; related ones allowed or privileged
- * tfp_policy = KERN_TFP_POLICY_RESTRICTED; Restricted Mode: self access allowed; setgid (to tfp_group) are allowed for other tasks
+ * Note: if pid == 0, an error is return no matter who is calling.
*
+ * XXX This should be a BSD system call, not a Mach trap!!!
*/
-static int tfp_policy = KERN_TFP_POLICY_RESTRICTED;
-/* the groutp is inited to kmem group and is modifiable by sysctl */
-static int tfp_group_inited = 0; /* policy groups are loaded ... */
-static gid_t tfp_group_ronly = 0; /* procview group */
-static gid_t tfp_group_rw = 0; /* procmod group */
-
kern_return_t
task_for_pid(
struct task_for_pid_args *args)
{
- mach_port_name_t target_tport = args->target_tport;
- int pid = args->pid;
- user_addr_t task_addr = args->t;
- struct uthread *uthread;
- struct proc *p;
- struct proc *p1;
- task_t t1;
- mach_port_name_t tret;
+ mach_port_name_t target_tport = args->target_tport;
+ int pid = args->pid;
+ user_addr_t task_addr = args->t;
+ proc_t p = PROC_NULL;
+ task_t t1 = TASK_NULL;
+ task_t task = TASK_NULL;
+ mach_port_name_t tret = MACH_PORT_NULL;
+ ipc_port_t tfpport = MACH_PORT_NULL;
void * sright;
int error = 0;
- int is_member = 0;
- boolean_t funnel_state;
- boolean_t ispermitted = FALSE;
-#if DIAGNOSTIC
- char procname[MAXCOMLEN+1];
-#endif /* DIAGNOSTIC */
AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
AUDIT_ARG(pid, pid);
AUDIT_ARG(mach_port1, target_tport);
+ /* Always check if pid == 0 */
+ if (pid == 0) {
+ (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
+ AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
+ return KERN_FAILURE;
+ }
+
t1 = port_name_to_task(target_tport);
if (t1 == TASK_NULL) {
- (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
+ (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
- return(KERN_FAILURE);
- }
+ return KERN_FAILURE;
+ }
- funnel_state = thread_funnel_set(kernel_flock, TRUE);
- p1 = current_proc();
+ p = proc_find(pid);
+ if (p == PROC_NULL) {
+ error = KERN_FAILURE;
+ goto tfpout;
+ }
- /*
- * Delayed binding of thread credential to process credential, if we
- * are not running with an explicitly set thread credential.
- */
- uthread = get_bsdthread_info(current_thread());
- if (uthread->uu_ucred != p1->p_ucred &&
- (uthread->uu_flag & UT_SETUID) == 0) {
- kauth_cred_t old = uthread->uu_ucred;
- proc_lock(p1);
- kauth_cred_ref(p1->p_ucred);
- uthread->uu_ucred = p1->p_ucred;
- proc_unlock(p1);
- if (IS_VALID_CRED(old))
- kauth_cred_unref(&old);
- }
-
- p = pfind(pid);
+#if CONFIG_AUDIT
AUDIT_ARG(process, p);
+#endif
- /*
- * XXX p_ucred check can be bogus in multithreaded processes,
- * XXX unless the funnel is held.
- */
- switch (tfp_policy) {
-
- case KERN_TFP_POLICY_PERMISSIVE:
- /* self or suser or related ones */
- if ((p != (struct proc *) 0)
- && (p->p_stat != SZOMB)
- && (p1 != (struct proc *) 0)
- && (
- (p1 == p)
- || !(suser(kauth_cred_get(), 0))
- || ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) &&
- ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid))
- && ((p->p_flag & P_SUGID) == 0))
- )
- )
- ispermitted = TRUE;
- break;
+ if (!(task_for_pid_posix_check(p))) {
+ error = KERN_FAILURE;
+ goto tfpout;
+ }
- case KERN_TFP_POLICY_RESTRICTED:
- /* self or suser or setgid and related ones only */
- if ((p != (struct proc *) 0)
- && (p1 != (struct proc *) 0)
- && (p->p_stat != SZOMB)
- && (
- (p1 == p)
- || !(suser(kauth_cred_get(), 0))
- || (((tfp_group_inited != 0) &&
- (
- ((kauth_cred_ismember_gid(kauth_cred_get(),
- tfp_group_ronly, &is_member) == 0) && is_member)
- ||((kauth_cred_ismember_gid(kauth_cred_get(),
- tfp_group_rw, &is_member) == 0) && is_member)
- )
- )
- && ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) &&
- ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid))
- && ((p->p_flag & P_SUGID) == 0))
- )
- )
- )
- ispermitted = TRUE;
+ if (p->task == TASK_NULL) {
+ error = KERN_SUCCESS;
+ goto tfpout;
+ }
- break;
+#if CONFIG_MACF
+ error = mac_proc_check_get_task(kauth_cred_get(), p);
+ if (error) {
+ error = KERN_FAILURE;
+ goto tfpout;
+ }
+#endif
- case KERN_TFP_POLICY_DENY:
- /* self or suser only */
- default:
- /* do not return task port of other task at all */
- if ((p1 != (struct proc *) 0) && (p != (struct proc *) 0) && (p->p_stat != SZOMB)
- && ((p1 == p) || !(suser(kauth_cred_get(), 0))))
- ispermitted = TRUE;
- else
- ispermitted = FALSE;
- break;
- };
+ /* Grab a task reference since the proc ref might be dropped if an upcall to task access server is made */
+ task = p->task;
+ task_reference(task);
+
+ /* If we aren't root and target's task access port is set... */
+ if (!kauth_cred_issuser(kauth_cred_get()) &&
+ p != current_proc() &&
+ (task_get_task_access_port(task, &tfpport) == 0) &&
+ (tfpport != IPC_PORT_NULL)) {
+ if (tfpport == IPC_PORT_DEAD) {
+ error = KERN_PROTECTION_FAILURE;
+ goto tfpout;
+ }
+
+ /*
+ * Drop the proc_find proc ref before making an upcall
+ * to taskgated, since holding a proc_find
+ * ref while making an upcall can cause deadlock.
+ */
+ proc_rele(p);
+ p = PROC_NULL;
+ /* Call up to the task access server */
+ error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
- if (ispermitted == TRUE) {
- if (p->task != TASK_NULL) {
- task_reference(p->task);
- sright = (void *)convert_task_to_port(p->task);
- tret = ipc_port_copyout_send(
- sright,
- get_task_ipcspace(current_task()));
- } else
- tret = MACH_PORT_NULL;
- AUDIT_ARG(mach_port2, tret);
- (void ) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
- task_deallocate(t1);
- error = KERN_SUCCESS;
+ if (error != MACH_MSG_SUCCESS) {
+ if (error == MACH_RCV_INTERRUPTED) {
+ error = KERN_ABORTED;
+ } else {
+ error = KERN_FAILURE;
+ }
goto tfpout;
+ }
}
-#if DIAGNOSTIC
- else {
- /*
- * There is no guarantee that p_comm is null terminated and
- * kernel implementation of string functions are complete. So
- * ensure stale info is not leaked out, bzero the buffer
- */
- bzero(&procname[0], MAXCOMLEN+1);
- strncpy(&procname[0], &p1->p_comm[0], MAXCOMLEN);
- if (tfp_policy != KERN_TFP_POLICY_PERMISSIVE)
- log(LOG_NOTICE, "(%d: %s)tfp: failed on %d:\n",
- ((p1 != PROC_NULL)?(p1->p_pid):0), &procname[0],
- ((p != PROC_NULL)?(p->p_pid):0));
+
+ /* Grant task port access */
+ extmod_statistics_incr_task_for_pid(task);
+ sright = (void *) convert_task_to_port(task);
+
+ /* Check if the task has been corpsified */
+ if (is_corpsetask(task)) {
+ /* task ref consumed by convert_task_to_port */
+ task = TASK_NULL;
+ ipc_port_release_send(sright);
+ error = KERN_FAILURE;
+ goto tfpout;
}
-#endif /* DIAGNOSTIC */
- task_deallocate(t1);
- tret = MACH_PORT_NULL;
- (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
- error = KERN_FAILURE;
+ /* task ref consumed by convert_task_to_port */
+ task = TASK_NULL;
+ tret = ipc_port_copyout_send(
+ sright,
+ get_task_ipcspace(current_task()));
+
+ error = KERN_SUCCESS;
+
tfpout:
- thread_funnel_set(kernel_flock, funnel_state);
+ task_deallocate(t1);
+ AUDIT_ARG(mach_port2, tret);
+ (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
+
+ if (tfpport != IPC_PORT_NULL) {
+ ipc_port_release_send(tfpport);
+ }
+ if (task != TASK_NULL) {
+ task_deallocate(task);
+ }
+ if (p != PROC_NULL) {
+ proc_rele(p);
+ }
AUDIT_MACH_SYSCALL_EXIT(error);
- return(error);
+ return error;
}
/*
task_name_for_pid(
struct task_name_for_pid_args *args)
{
- mach_port_name_t target_tport = args->target_tport;
- int pid = args->pid;
- user_addr_t task_addr = args->t;
- struct uthread *uthread;
- struct proc *p;
- struct proc *p1;
- task_t t1;
- mach_port_name_t tret;
+ mach_port_name_t target_tport = args->target_tport;
+ int pid = args->pid;
+ user_addr_t task_addr = args->t;
+ proc_t p = PROC_NULL;
+ task_t t1;
+ mach_port_name_t tret;
void * sright;
- int error = 0;
- boolean_t funnel_state;
+ int error = 0, refheld = 0;
+ kauth_cred_t target_cred;
AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
AUDIT_ARG(pid, pid);
t1 = port_name_to_task(target_tport);
if (t1 == TASK_NULL) {
- (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
+ (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
- return(KERN_FAILURE);
- }
-
- funnel_state = thread_funnel_set(kernel_flock, TRUE);
-
- p1 = current_proc();
-
- /*
- * Delayed binding of thread credential to process credential, if we
- * are not running with an explicitly set thread credential.
- */
- /*
- * XXX p_ucred check can be bogus in multithreaded processes,
- * XXX unless the funnel is held.
- */
- uthread = get_bsdthread_info(current_thread());
- if (uthread->uu_ucred != p1->p_ucred &&
- (uthread->uu_flag & UT_SETUID) == 0) {
- kauth_cred_t old = uthread->uu_ucred;
- proc_lock(p1);
- kauth_cred_ref(p1->p_ucred);
- uthread->uu_ucred = p1->p_ucred;
- proc_unlock(p1);
- if (IS_VALID_CRED(old))
- kauth_cred_unref(&old);
- }
-
- p = pfind(pid);
- AUDIT_ARG(process, p);
+ return KERN_FAILURE;
+ }
+
+ p = proc_find(pid);
+ if (p != PROC_NULL) {
+ AUDIT_ARG(process, p);
+ target_cred = kauth_cred_proc_ref(p);
+ refheld = 1;
+
+ if ((p->p_stat != SZOMB)
+ && ((current_proc() == p)
+ || kauth_cred_issuser(kauth_cred_get())
+ || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
+ ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
+ if (p->task != TASK_NULL) {
+ task_reference(p->task);
+#if CONFIG_MACF
+ error = mac_proc_check_get_task_name(kauth_cred_get(), p);
+ if (error) {
+ task_deallocate(p->task);
+ goto noperm;
+ }
+#endif
+ sright = (void *)convert_task_name_to_port(p->task);
+ tret = ipc_port_copyout_send(sright,
+ get_task_ipcspace(current_task()));
+ } else {
+ tret = MACH_PORT_NULL;
+ }
- if ((p != (struct proc *) 0)
- && (p->p_stat != SZOMB)
- && (p1 != (struct proc *) 0)
- && ((p1 == p)
- || !(suser(kauth_cred_get(), 0))
- || ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) &&
- ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid)))))
- {
- if (p->task != TASK_NULL)
- {
- task_reference(p->task);
- sright = (void *)convert_task_name_to_port(p->task);
- tret = ipc_port_copyout_send(
- sright,
- get_task_ipcspace(current_task()));
- } else
- tret = MACH_PORT_NULL;
- AUDIT_ARG(mach_port2, tret);
- (void ) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
- task_deallocate(t1);
- error = KERN_SUCCESS;
- goto tnfpout;
+ AUDIT_ARG(mach_port2, tret);
+ (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
+ task_deallocate(t1);
+ error = KERN_SUCCESS;
+ goto tnfpout;
+ }
}
+#if CONFIG_MACF
+noperm:
+#endif
task_deallocate(t1);
tret = MACH_PORT_NULL;
(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
error = KERN_FAILURE;
tnfpout:
- thread_funnel_set(kernel_flock, funnel_state);
+ if (refheld != 0) {
+ kauth_cred_unref(&target_cred);
+ }
+ if (p != PROC_NULL) {
+ proc_rele(p);
+ }
AUDIT_MACH_SYSCALL_EXIT(error);
- return(error);
+ return error;
}
-static int
-sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
- __unused int arg2, struct sysctl_req *req)
+kern_return_t
+pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
{
- int error = 0;
- int new_value;
+ task_t target = NULL;
+ proc_t targetproc = PROC_NULL;
+ int pid = args->pid;
+ int error = 0;
- error = SYSCTL_OUT(req, arg1, sizeof(int));
- if (error || req->newptr == USER_ADDR_NULL)
- return(error);
-
- if (!is_suser())
- return(EPERM);
-
- if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
- goto out;
+#if CONFIG_MACF
+ error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND);
+ if (error) {
+ error = EPERM;
+ goto out;
}
- if ((new_value == KERN_TFP_POLICY_DENY)
- || (new_value == KERN_TFP_POLICY_PERMISSIVE)
- || (new_value == KERN_TFP_POLICY_RESTRICTED))
- tfp_policy = new_value;
- else
- error = EINVAL;
-out:
- return(error);
+#endif
+
+ if (pid == 0) {
+ error = EPERM;
+ goto out;
+ }
+
+ targetproc = proc_find(pid);
+ if (targetproc == PROC_NULL) {
+ error = ESRCH;
+ goto out;
+ }
+
+ if (!task_for_pid_posix_check(targetproc)) {
+ error = EPERM;
+ goto out;
+ }
+
+ target = targetproc->task;
+#ifndef CONFIG_EMBEDDED
+ if (target != TASK_NULL) {
+ mach_port_t tfpport;
+
+ /* If we aren't root and target's task access port is set... */
+ if (!kauth_cred_issuser(kauth_cred_get()) &&
+ targetproc != current_proc() &&
+ (task_get_task_access_port(target, &tfpport) == 0) &&
+ (tfpport != IPC_PORT_NULL)) {
+ if (tfpport == IPC_PORT_DEAD) {
+ error = EACCES;
+ goto out;
+ }
+
+ /* Call up to the task access server */
+ error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
+ if (error != MACH_MSG_SUCCESS) {
+ if (error == MACH_RCV_INTERRUPTED) {
+ error = EINTR;
+ } else {
+ error = EPERM;
+ }
+ goto out;
+ }
+ }
+ }
+#endif
+
+ task_reference(target);
+ error = task_pidsuspend(target);
+ if (error) {
+ if (error == KERN_INVALID_ARGUMENT) {
+ error = EINVAL;
+ } else {
+ error = EPERM;
+ }
+ }
+#if CONFIG_MEMORYSTATUS
+ else {
+ memorystatus_on_suspend(targetproc);
+ }
+#endif
+
+ task_deallocate(target);
+
+out:
+ if (targetproc != PROC_NULL) {
+ proc_rele(targetproc);
+ }
+ *ret = error;
+ return error;
}
-static int
-sysctl_settfp_groups(__unused struct sysctl_oid *oidp, void *arg1,
- __unused int arg2, struct sysctl_req *req)
+kern_return_t
+pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
{
- int error = 0;
- int new_value;
+ task_t target = NULL;
+ proc_t targetproc = PROC_NULL;
+ int pid = args->pid;
+ int error = 0;
+
+#if CONFIG_MACF
+ error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME);
+ if (error) {
+ error = EPERM;
+ goto out;
+ }
+#endif
- error = SYSCTL_OUT(req, arg1, sizeof(int));
- if (error || req->newptr == USER_ADDR_NULL)
- return(error);
+ if (pid == 0) {
+ error = EPERM;
+ goto out;
+ }
- if (!is_suser())
- return(EPERM);
+ targetproc = proc_find(pid);
+ if (targetproc == PROC_NULL) {
+ error = ESRCH;
+ goto out;
+ }
- /*
- * Once set; cannot be reset till next boot. Launchd will set this
- * in its pid 1 init and no one can set after that.
- */
- if (tfp_group_inited != 0)
- return(EPERM);
-
- if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
+ if (!task_for_pid_posix_check(targetproc)) {
+ error = EPERM;
goto out;
}
- if (new_value >= 100)
- error = EINVAL;
- else {
- if (arg1 == &tfp_group_ronly)
- tfp_group_ronly = new_value;
- else if (arg1 == &tfp_group_rw)
- tfp_group_rw = new_value;
- else
+ target = targetproc->task;
+#ifndef CONFIG_EMBEDDED
+ if (target != TASK_NULL) {
+ mach_port_t tfpport;
+
+ /* If we aren't root and target's task access port is set... */
+ if (!kauth_cred_issuser(kauth_cred_get()) &&
+ targetproc != current_proc() &&
+ (task_get_task_access_port(target, &tfpport) == 0) &&
+ (tfpport != IPC_PORT_NULL)) {
+ if (tfpport == IPC_PORT_DEAD) {
+ error = EACCES;
+ goto out;
+ }
+
+ /* Call up to the task access server */
+ error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
+
+ if (error != MACH_MSG_SUCCESS) {
+ if (error == MACH_RCV_INTERRUPTED) {
+ error = EINTR;
+ } else {
+ error = EPERM;
+ }
+ goto out;
+ }
+ }
+ }
+#endif
+
+#if CONFIG_EMBEDDED
+#if SOCKETS
+ resume_proc_sockets(targetproc);
+#endif /* SOCKETS */
+#endif /* CONFIG_EMBEDDED */
+
+ task_reference(target);
+
+#if CONFIG_MEMORYSTATUS
+ memorystatus_on_resume(targetproc);
+#endif
+
+ error = task_pidresume(target);
+ if (error) {
+ if (error == KERN_INVALID_ARGUMENT) {
error = EINVAL;
- if ((tfp_group_ronly != 0 ) && (tfp_group_rw != 0 ))
- tfp_group_inited = 1;
+ } else {
+ if (error == KERN_MEMORY_ERROR) {
+ psignal(targetproc, SIGKILL);
+ error = EIO;
+ } else {
+ error = EPERM;
+ }
+ }
}
+ task_deallocate(target);
+
out:
- return(error);
+ if (targetproc != PROC_NULL) {
+ proc_rele(targetproc);
+ }
+
+ *ret = error;
+ return error;
}
-SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW, 0, "tfp");
-SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW,
- &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
-SYSCTL_PROC(_kern_tfp, KERN_TFP_READ_GROUP, read_group, CTLTYPE_INT | CTLFLAG_RW,
- &tfp_group_ronly, sizeof(uint32_t), &sysctl_settfp_groups ,"I","read_group");
-SYSCTL_PROC(_kern_tfp, KERN_TFP_RW_GROUP, rw_group, CTLTYPE_INT | CTLFLAG_RW,
- &tfp_group_rw, sizeof(uint32_t), &sysctl_settfp_groups ,"I","rw_group");
+#if CONFIG_EMBEDDED
+/*
+ * Freeze the specified process (provided in args->pid), or find and freeze a PID.
+ * When a process is specified, this call is blocking, otherwise we wake up the
+ * freezer thread and do not block on a process being frozen.
+ */
+kern_return_t
+pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
+{
+ int error = 0;
+ proc_t targetproc = PROC_NULL;
+ int pid = args->pid;
+
+#ifndef CONFIG_FREEZE
+ #pragma unused(pid)
+#else
+#if CONFIG_MACF
+ error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_HIBERNATE);
+ if (error) {
+ error = EPERM;
+ goto out;
+ }
+#endif
-SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW, &shared_region_trace_level, 0, "");
+ /*
+ * If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check().
+ */
-/*
- * Try and cap the number of mappings the user might be trying to deal with,
- * so that we don't end up allocating insane amounts of wired memory in the
- * kernel based on bogus user arguments.
- * There are 2 shared regions (TEXT and DATA). The size of each submap
- * is SHARED_TEXT_REGION_SIZE and we can have at most 1 VM map entry per page,
- * so the maximum number of mappings we could ever have to deal with is...
- */
-#define SHARED_REGION_MAX_MAPPINGS ((2 *SHARED_TEXT_REGION_SIZE) >> PAGE_SHIFT)
+ if (pid >= 0) {
+ targetproc = proc_find(pid);
-/*
- * shared_region_make_private_np:
- *
- * This system call is for "dyld" only.
- *
- * It creates a private copy of the current process's "shared region" for
- * split libraries. "dyld" uses this when the shared region is full or
- * it needs to load a split library that conflicts with an already loaded one
- * that this process doesn't need. "dyld" specifies a set of address ranges
- * that it wants to keep in the now-private "shared region". These cover
- * the set of split libraries that the process needs so far. The kernel needs
- * to deallocate the rest of the shared region, so that it's available for
- * more libraries for this process.
- */
+ if (targetproc == PROC_NULL) {
+ error = ESRCH;
+ goto out;
+ }
+
+ if (!task_for_pid_posix_check(targetproc)) {
+ error = EPERM;
+ goto out;
+ }
+ }
+
+ if (pid == -2) {
+ vm_pageout_anonymous_pages();
+ } else if (pid == -1) {
+ memorystatus_on_inactivity(targetproc);
+ } else {
+ error = memorystatus_freeze_process_sync(targetproc);
+ }
+
+out:
+
+#endif /* CONFIG_FREEZE */
+
+ if (targetproc != PROC_NULL) {
+ proc_rele(targetproc);
+ }
+ *ret = error;
+ return error;
+}
+#endif /* CONFIG_EMBEDDED */
+
+#if SOCKETS
int
-shared_region_make_private_np(
- struct proc *p,
- struct shared_region_make_private_np_args *uap,
- __unused int *retvalp)
+networking_memstatus_callout(proc_t p, uint32_t status)
{
- int error;
- kern_return_t kr;
- boolean_t using_shared_regions;
- user_addr_t user_ranges;
- unsigned int range_count;
- vm_size_t ranges_size;
- struct shared_region_range_np *ranges;
- shared_region_mapping_t shared_region;
- struct shared_region_task_mappings task_mapping_info;
- shared_region_mapping_t next;
-
- ranges = NULL;
-
- range_count = uap->rangeCount;
- user_ranges = uap->ranges;
- ranges_size = (vm_size_t) (range_count * sizeof (ranges[0]));
-
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_INFO,
- ("shared_region: %p [%d(%s)] "
- "make_private(rangecount=%d)\n",
- current_thread(), p->p_pid, p->p_comm, range_count));
-
- /* allocate kernel space for the "ranges" */
- if (range_count != 0) {
- if (range_count > SHARED_REGION_MAX_MAPPINGS) {
- error = EINVAL;
- goto done;
+ struct filedesc *fdp;
+ int i;
+
+ /*
+ * proc list lock NOT held
+ * proc lock NOT held
+ * a reference on the proc has been held / shall be dropped by the caller.
+ */
+ LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
+ LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
+
+ proc_fdlock(p);
+ fdp = p->p_fd;
+ for (i = 0; i < fdp->fd_nfiles; i++) {
+ struct fileproc *fp;
+
+ fp = fdp->fd_ofiles[i];
+ if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0) {
+ continue;
}
- if ((mach_vm_size_t) ranges_size !=
- (mach_vm_size_t) range_count * sizeof (ranges[0])) {
- /* 32-bit integer overflow */
- error = EINVAL;
- goto done;
+ switch (FILEGLOB_DTYPE(fp->f_fglob)) {
+#if NECP
+ case DTYPE_NETPOLICY:
+ necp_fd_memstatus(p, status,
+ (struct necp_fd_data *)fp->f_fglob->fg_data);
+ break;
+#endif /* NECP */
+ default:
+ break;
}
- kr = kmem_alloc(kernel_map,
- (vm_offset_t *) &ranges,
- ranges_size);
- if (kr != KERN_SUCCESS) {
- error = ENOMEM;
- goto done;
+ }
+ proc_fdunlock(p);
+
+ return 1;
+}
+
+
+static int
+networking_defunct_callout(proc_t p, void *arg)
+{
+ struct pid_shutdown_sockets_args *args = arg;
+ int pid = args->pid;
+ int level = args->level;
+ struct filedesc *fdp;
+ int i;
+
+ proc_fdlock(p);
+ fdp = p->p_fd;
+ for (i = 0; i < fdp->fd_nfiles; i++) {
+ struct fileproc *fp = fdp->fd_ofiles[i];
+ struct fileglob *fg;
+
+ if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0) {
+ continue;
}
- /* copy "ranges" from user-space */
- error = copyin(user_ranges,
- ranges,
- ranges_size);
- if (error) {
- goto done;
+ fg = fp->f_fglob;
+ switch (FILEGLOB_DTYPE(fg)) {
+ case DTYPE_SOCKET: {
+ struct socket *so = (struct socket *)fg->fg_data;
+ if (p->p_pid == pid || so->last_pid == pid ||
+ ((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) {
+ /* Call networking stack with socket and level */
+ (void) socket_defunct(p, so, level);
+ }
+ break;
+ }
+#if NECP
+ case DTYPE_NETPOLICY:
+ /* first pass: defunct necp and get stats for ntstat */
+ if (p->p_pid == pid) {
+ necp_fd_defunct(p,
+ (struct necp_fd_data *)fg->fg_data);
+ }
+ break;
+#endif /* NECP */
+ default:
+ break;
}
}
- if (p->p_flag & P_NOSHLIB) {
- /* no split library has been mapped for this process so far */
- using_shared_regions = FALSE;
- } else {
- /* this process has already mapped some split libraries */
- using_shared_regions = TRUE;
+ proc_fdunlock(p);
+
+ return PROC_RETURNED;
+}
+
+int
+pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
+{
+ int error = 0;
+ proc_t targetproc = PROC_NULL;
+ int pid = args->pid;
+ int level = args->level;
+
+ if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
+ level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) {
+ error = EINVAL;
+ goto out;
}
- /*
- * Get a private copy of the current shared region.
- * Do not chain it to the system-wide shared region, as we'll want
- * to map other split libraries in place of the old ones. We want
- * to completely detach from the system-wide shared region and go our
- * own way after this point, not sharing anything with other processes.
- */
- error = clone_system_shared_regions(using_shared_regions,
- FALSE, /* chain_regions */
- ENV_DEFAULT_ROOT);
+#if CONFIG_MACF
+ error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
if (error) {
- goto done;
+ error = EPERM;
+ goto out;
}
+#endif
- /* get info on the newly allocated shared region */
- vm_get_shared_region(current_task(), &shared_region);
- task_mapping_info.self = (vm_offset_t) shared_region;
- shared_region_mapping_info(shared_region,
- &(task_mapping_info.text_region),
- &(task_mapping_info.text_size),
- &(task_mapping_info.data_region),
- &(task_mapping_info.data_size),
- &(task_mapping_info.region_mappings),
- &(task_mapping_info.client_base),
- &(task_mapping_info.alternate_base),
- &(task_mapping_info.alternate_next),
- &(task_mapping_info.fs_base),
- &(task_mapping_info.system),
- &(task_mapping_info.flags),
- &next);
+ targetproc = proc_find(pid);
+ if (targetproc == PROC_NULL) {
+ error = ESRCH;
+ goto out;
+ }
- /*
- * We now have our private copy of the shared region, as it was before
- * the call to clone_system_shared_regions(). We now need to clean it
- * up and keep only the memory areas described by the "ranges" array.
- */
- kr = shared_region_cleanup(range_count, ranges, &task_mapping_info);
- switch (kr) {
- case KERN_SUCCESS:
- error = 0;
- break;
- default:
- error = EINVAL;
- goto done;
+ if (!task_for_pid_posix_check(targetproc)) {
+ error = EPERM;
+ goto out;
}
-done:
- if (ranges != NULL) {
- kmem_free(kernel_map,
- (vm_offset_t) ranges,
- ranges_size);
- ranges = NULL;
- }
-
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_INFO,
- ("shared_region: %p [%d(%s)] "
- "make_private(rangecount=%d) -> %d "
- "shared_region=%p[%x,%x,%x]\n",
- current_thread(), p->p_pid, p->p_comm,
- range_count, error, shared_region,
- task_mapping_info.fs_base,
- task_mapping_info.system,
- task_mapping_info.flags));
+ proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
+ networking_defunct_callout, args, NULL, NULL);
+out:
+ if (targetproc != PROC_NULL) {
+ proc_rele(targetproc);
+ }
+ *ret = error;
return error;
}
+#endif /* SOCKETS */
+
+static int
+sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
+ __unused int arg2, struct sysctl_req *req)
+{
+ int error = 0;
+ int new_value;
+
+ error = SYSCTL_OUT(req, arg1, sizeof(int));
+ if (error || req->newptr == USER_ADDR_NULL) {
+ return error;
+ }
+
+ if (!kauth_cred_issuser(kauth_cred_get())) {
+ return EPERM;
+ }
+
+ if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
+ goto out;
+ }
+ if ((new_value == KERN_TFP_POLICY_DENY)
+ || (new_value == KERN_TFP_POLICY_DEFAULT)) {
+ tfp_policy = new_value;
+ } else {
+ error = EINVAL;
+ }
+out:
+ return error;
+}
+
+#if defined(SECURE_KERNEL)
+static int kern_secure_kernel = 1;
+#else
+static int kern_secure_kernel = 0;
+#endif
+
+SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
+
+SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
+SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+ &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy, "I", "policy");
+
+SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
+ &shared_region_trace_level, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &shared_region_version, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
+ &shared_region_persistence, 0, "");
/*
- * shared_region_map_file_np:
+ * shared_region_check_np:
+ *
+ * This system call is intended for dyld.
+ *
+ * dyld calls this when any process starts to see if the process's shared
+ * region is already set up and ready to use.
+ * This call returns the base address of the first mapping in the
+ * process's shared region's first mapping.
+ * dyld will then check what's mapped at that address.
*
- * This system call is for "dyld" only.
+ * If the shared region is empty, dyld will then attempt to map the shared
+ * cache file in the shared region via the shared_region_map_np() system call.
+ *
+ * If something's already mapped in the shared region, dyld will check if it
+ * matches the shared cache it would like to use for that process.
+ * If it matches, evrything's ready and the process can proceed and use the
+ * shared region.
+ * If it doesn't match, dyld will unmap the shared region and map the shared
+ * cache into the process's address space via mmap().
+ *
+ * ERROR VALUES
+ * EINVAL no shared region
+ * ENOMEM shared region is empty
+ * EFAULT bad address for "start_address"
+ */
+int
+shared_region_check_np(
+ __unused struct proc *p,
+ struct shared_region_check_np_args *uap,
+ __unused int *retvalp)
+{
+ vm_shared_region_t shared_region;
+ mach_vm_offset_t start_address = 0;
+ int error;
+ kern_return_t kr;
+
+ SHARED_REGION_TRACE_DEBUG(
+ ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm,
+ (uint64_t)uap->start_address));
+
+ /* retrieve the current tasks's shared region */
+ shared_region = vm_shared_region_get(current_task());
+ if (shared_region != NULL) {
+ /* retrieve address of its first mapping... */
+ kr = vm_shared_region_start_address(shared_region,
+ &start_address);
+ if (kr != KERN_SUCCESS) {
+ error = ENOMEM;
+ } else {
+ /* ... and give it to the caller */
+ error = copyout(&start_address,
+ (user_addr_t) uap->start_address,
+ sizeof(start_address));
+ if (error) {
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: %p [%d(%s)] "
+ "check_np(0x%llx) "
+ "copyout(0x%llx) error %d\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm,
+ (uint64_t)uap->start_address, (uint64_t)start_address,
+ error));
+ }
+ }
+ vm_shared_region_deallocate(shared_region);
+ } else {
+ /* no shared region ! */
+ error = EINVAL;
+ }
+
+ SHARED_REGION_TRACE_DEBUG(
+ ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm,
+ (uint64_t)uap->start_address, (uint64_t)start_address, error));
+
+ return error;
+}
+
+
+int
+shared_region_copyin_mappings(
+ struct proc *p,
+ user_addr_t user_mappings,
+ unsigned int mappings_count,
+ struct shared_file_mapping_np *mappings)
+{
+ int error = 0;
+ vm_size_t mappings_size = 0;
+
+ /* get the list of mappings the caller wants us to establish */
+ mappings_size = (vm_size_t) (mappings_count * sizeof(mappings[0]));
+ error = copyin(user_mappings,
+ mappings,
+ mappings_size);
+ if (error) {
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: %p [%d(%s)] map(): "
+ "copyin(0x%llx, %d) failed (error=%d)\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm,
+ (uint64_t)user_mappings, mappings_count, error));
+ }
+ return error;
+}
+/*
+ * shared_region_map_np()
*
- * "dyld" wants to map parts of a split library in the shared region.
- * We get a file descriptor on the split library to be mapped and a set
- * of mapping instructions, describing which parts of the file to map in\
- * which areas of the shared segment and with what protection.
- * The "shared region" is split in 2 areas:
- * 0x90000000 - 0xa0000000 : read-only area (for TEXT and LINKEDIT sections),
- * 0xa0000000 - 0xb0000000 : writable area (for DATA sections).
+ * This system call is intended for dyld.
*
+ * dyld uses this to map a shared cache file into a shared region.
+ * This is usually done only the first time a shared cache is needed.
+ * Subsequent processes will just use the populated shared region without
+ * requiring any further setup.
*/
int
-shared_region_map_file_np(
- struct proc *p,
- struct shared_region_map_file_np_args *uap,
- __unused int *retvalp)
+_shared_region_map_and_slide(
+ struct proc *p,
+ int fd,
+ uint32_t mappings_count,
+ struct shared_file_mapping_np *mappings,
+ uint32_t slide,
+ user_addr_t slide_start,
+ user_addr_t slide_size)
{
- int error;
- kern_return_t kr;
- int fd;
- unsigned int mapping_count;
- user_addr_t user_mappings; /* 64-bit */
- user_addr_t user_slide_p; /* 64-bit */
- struct shared_file_mapping_np *mappings;
- vm_size_t mappings_size;
- struct fileproc *fp;
- mach_vm_offset_t slide;
- struct vnode *vp;
- struct vfs_context context;
- memory_object_control_t file_control;
- memory_object_size_t file_size;
- shared_region_mapping_t shared_region;
- struct shared_region_task_mappings task_mapping_info;
- shared_region_mapping_t next;
- shared_region_mapping_t default_shared_region;
- boolean_t using_default_region;
- unsigned int j;
- vm_prot_t max_prot;
- mach_vm_offset_t base_offset, end_offset;
- mach_vm_offset_t original_base_offset;
- boolean_t mappings_in_segment;
-#define SFM_MAX_STACK 6
- struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
-
- mappings_size = 0;
- mappings = NULL;
- mapping_count = 0;
+ int error;
+ kern_return_t kr;
+ struct fileproc *fp;
+ struct vnode *vp, *root_vp, *scdir_vp;
+ struct vnode_attr va;
+ off_t fs;
+ memory_object_size_t file_size;
+#if CONFIG_MACF
+ vm_prot_t maxprot = VM_PROT_ALL;
+#endif
+ memory_object_control_t file_control;
+ struct vm_shared_region *shared_region;
+ uint32_t i;
+
+ SHARED_REGION_TRACE_DEBUG(
+ ("shared_region: %p [%d(%s)] -> map\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm));
+
+ shared_region = NULL;
fp = NULL;
vp = NULL;
-
- /* get file descriptor for split library from arguments */
- fd = uap->fd;
+ scdir_vp = NULL;
/* get file structure from file descriptor */
error = fp_lookup(p, fd, &fp, 0);
if (error) {
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_ERROR,
- ("shared_region: %p [%d(%s)] map_file: "
- "fd=%d lookup failed (error=%d)\n",
- current_thread(), p->p_pid, p->p_comm, fd, error));
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: %p [%d(%s)] map: "
+ "fd=%d lookup failed (error=%d)\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm, fd, error));
goto done;
}
/* make sure we're attempting to map a vnode */
- if (fp->f_fglob->fg_type != DTYPE_VNODE) {
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_ERROR,
- ("shared_region: %p [%d(%s)] map_file: "
- "fd=%d not a vnode (type=%d)\n",
- current_thread(), p->p_pid, p->p_comm,
- fd, fp->f_fglob->fg_type));
+ if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: %p [%d(%s)] map: "
+ "fd=%d not a vnode (type=%d)\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm,
+ fd, FILEGLOB_DTYPE(fp->f_fglob)));
error = EINVAL;
goto done;
}
/* we need at least read permission on the file */
- if (! (fp->f_fglob->fg_flag & FREAD)) {
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_ERROR,
- ("shared_region: %p [%d(%s)] map_file: "
- "fd=%d not readable\n",
- current_thread(), p->p_pid, p->p_comm, fd));
+ if (!(fp->f_fglob->fg_flag & FREAD)) {
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: %p [%d(%s)] map: "
+ "fd=%d not readable\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm, fd));
error = EPERM;
goto done;
}
/* get vnode from file structure */
- error = vnode_getwithref((vnode_t)fp->f_fglob->fg_data);
+ error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
if (error) {
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_ERROR,
- ("shared_region: %p [%d(%s)] map_file: "
- "fd=%d getwithref failed (error=%d)\n",
- current_thread(), p->p_pid, p->p_comm, fd, error));
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: %p [%d(%s)] map: "
+ "fd=%d getwithref failed (error=%d)\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm, fd, error));
goto done;
}
vp = (struct vnode *) fp->f_fglob->fg_data;
/* make sure the vnode is a regular file */
if (vp->v_type != VREG) {
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_ERROR,
- ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
- "not a file (type=%d)\n",
- current_thread(), p->p_pid, p->p_comm,
- vp, vp->v_name, vp->v_type));
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+ "not a file (type=%d)\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm,
+ (void *)VM_KERNEL_ADDRPERM(vp),
+ vp->v_name, vp->v_type));
error = EINVAL;
goto done;
}
- /* get vnode size */
- {
- off_t fs;
-
- context.vc_proc = p;
- context.vc_ucred = kauth_cred_get();
- if ((error = vnode_size(vp, &fs, &context)) != 0) {
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_ERROR,
- ("shared_region: %p [%d(%s)] "
- "map_file(%p:'%s'): "
- "vnode_size(%p) failed (error=%d)\n",
- current_thread(), p->p_pid, p->p_comm,
- vp, vp->v_name, vp));
- goto done;
- }
- file_size = fs;
+#if CONFIG_MACF
+ /* pass in 0 for the offset argument because AMFI does not need the offset
+ * of the shared cache */
+ error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
+ fp->f_fglob, VM_PROT_ALL, MAP_FILE, 0, &maxprot);
+ if (error) {
+ goto done;
}
+#endif /* MAC */
- /*
- * Get the list of mappings the caller wants us to establish.
- */
- mapping_count = uap->mappingCount; /* the number of mappings */
- mappings_size = (vm_size_t) (mapping_count * sizeof (mappings[0]));
- if (mapping_count == 0) {
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_INFO,
- ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
- "no mappings\n",
- current_thread(), p->p_pid, p->p_comm,
- vp, vp->v_name));
- error = 0; /* no mappings: we're done ! */
- goto done;
- } else if (mapping_count <= SFM_MAX_STACK) {
- mappings = &stack_mappings[0];
+ /* make sure vnode is on the process's root volume */
+ root_vp = p->p_fd->fd_rdir;
+ if (root_vp == NULL) {
+ root_vp = rootvnode;
} else {
- if (mapping_count > SHARED_REGION_MAX_MAPPINGS) {
- error = EINVAL;
- goto done;
- }
- if ((mach_vm_size_t) mappings_size !=
- (mach_vm_size_t) mapping_count * sizeof (mappings[0])) {
- /* 32-bit integer overflow */
- error = EINVAL;
- goto done;
- }
- kr = kmem_alloc(kernel_map,
- (vm_offset_t *) &mappings,
- mappings_size);
- if (kr != KERN_SUCCESS) {
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_ERROR,
- ("shared_region: %p [%d(%s)] "
- "map_file(%p:'%s'): "
- "failed to allocate %d mappings (kr=0x%x)\n",
- current_thread(), p->p_pid, p->p_comm,
- vp, vp->v_name, mapping_count, kr));
- error = ENOMEM;
- goto done;
- }
+ /*
+ * Chroot-ed processes can't use the shared_region.
+ */
+ error = EINVAL;
+ goto done;
}
- user_mappings = uap->mappings; /* the mappings, in user space */
- error = copyin(user_mappings,
- mappings,
- mappings_size);
- if (error != 0) {
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_ERROR,
- ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
- "failed to copyin %d mappings (error=%d)\n",
- current_thread(), p->p_pid, p->p_comm,
- vp, vp->v_name, mapping_count, error));
+ if (vp->v_mount != root_vp->v_mount) {
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+ "not on process's root volume\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm,
+ (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
+ error = EPERM;
goto done;
}
- /*
- * If the caller provides a "slide" pointer, it means they're OK
- * with us moving the mappings around to make them fit.
- */
- user_slide_p = uap->slide_p;
+ /* make sure vnode is owned by "root" */
+ VATTR_INIT(&va);
+ VATTR_WANTED(&va, va_uid);
+ error = vnode_getattr(vp, &va, vfs_context_current());
+ if (error) {
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+ "vnode_getattr(%p) failed (error=%d)\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm,
+ (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
+ (void *)VM_KERNEL_ADDRPERM(vp), error));
+ goto done;
+ }
+ if (va.va_uid != 0) {
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+ "owned by uid=%d instead of 0\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm,
+ (void *)VM_KERNEL_ADDRPERM(vp),
+ vp->v_name, va.va_uid));
+ error = EPERM;
+ goto done;
+ }
- /*
- * Make each mapping address relative to the beginning of the
- * shared region. Check that all mappings are in the shared region.
- * Compute the maximum set of protections required to tell the
- * buffer cache how we mapped the file (see call to ubc_map() below).
- */
- max_prot = VM_PROT_NONE;
- base_offset = -1LL;
- end_offset = 0;
- mappings_in_segment = TRUE;
- for (j = 0; j < mapping_count; j++) {
- mach_vm_offset_t segment;
- segment = (mappings[j].sfm_address &
- GLOBAL_SHARED_SEGMENT_MASK);
- if (segment != GLOBAL_SHARED_TEXT_SEGMENT &&
- segment != GLOBAL_SHARED_DATA_SEGMENT) {
- /* this mapping is not in the shared region... */
- if (user_slide_p == NULL) {
- /* ... and we can't slide it in: fail */
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_CONFLICT,
- ("shared_region: %p [%d(%s)] "
- "map_file(%p:'%s'): "
- "mapping %p not in shared segment & "
- "no sliding\n",
- current_thread(), p->p_pid, p->p_comm,
- vp, vp->v_name,
- mappings[j].sfm_address));
- error = EINVAL;
- goto done;
- }
- if (j == 0) {
- /* expect all mappings to be outside */
- mappings_in_segment = FALSE;
- } else if (mappings_in_segment != FALSE) {
- /* other mappings were not outside: fail */
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_CONFLICT,
- ("shared_region: %p [%d(%s)] "
- "map_file(%p:'%s'): "
- "mapping %p not in shared segment & "
- "other mappings in shared segment\n",
- current_thread(), p->p_pid, p->p_comm,
- vp, vp->v_name,
- mappings[j].sfm_address));
- error = EINVAL;
- goto done;
- }
- /* we'll try and slide that mapping in the segments */
- } else {
- if (j == 0) {
- /* expect all mappings to be inside */
- mappings_in_segment = TRUE;
- } else if (mappings_in_segment != TRUE) {
- /* other mappings were not inside: fail */
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_CONFLICT,
- ("shared_region: %p [%d(%s)] "
- "map_file(%p:'%s'): "
- "mapping %p in shared segment & "
- "others in shared segment\n",
- current_thread(), p->p_pid, p->p_comm,
- vp, vp->v_name,
- mappings[j].sfm_address));
- error = EINVAL;
- goto done;
- }
- /* get a relative offset inside the shared segments */
- mappings[j].sfm_address -= GLOBAL_SHARED_TEXT_SEGMENT;
- }
- if ((mappings[j].sfm_address & SHARED_TEXT_REGION_MASK)
- < base_offset) {
- base_offset = (mappings[j].sfm_address &
- SHARED_TEXT_REGION_MASK);
+ if (scdir_enforce) {
+ /* get vnode for scdir_path */
+ error = vnode_lookup(scdir_path, 0, &scdir_vp, vfs_context_current());
+ if (error) {
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+ "vnode_lookup(%s) failed (error=%d)\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm,
+ (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
+ scdir_path, error));
+ goto done;
}
- if ((mappings[j].sfm_address & SHARED_TEXT_REGION_MASK) +
- mappings[j].sfm_size > end_offset) {
- end_offset =
- (mappings[j].sfm_address &
- SHARED_TEXT_REGION_MASK) +
- mappings[j].sfm_size;
+
+ /* ensure parent is scdir_vp */
+ if (vnode_parent(vp) != scdir_vp) {
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+ "shared cache file not in %s\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm,
+ (void *)VM_KERNEL_ADDRPERM(vp),
+ vp->v_name, scdir_path));
+ error = EPERM;
+ goto done;
}
- max_prot |= mappings[j].sfm_max_prot;
- }
- /* Make all mappings relative to the base_offset */
- base_offset = vm_map_trunc_page(base_offset);
- end_offset = vm_map_round_page(end_offset);
- for (j = 0; j < mapping_count; j++) {
- mappings[j].sfm_address -= base_offset;
}
- original_base_offset = base_offset;
- if (mappings_in_segment == FALSE) {
- /*
- * We're trying to map a library that was not pre-bound to
- * be in the shared segments. We want to try and slide it
- * back into the shared segments but as far back as possible,
- * so that it doesn't clash with pre-bound libraries. Set
- * the base_offset to the end of the region, so that it can't
- * possibly fit there and will have to be slid.
- */
- base_offset = SHARED_TEXT_REGION_SIZE - end_offset;
+
+ /* get vnode size */
+ error = vnode_size(vp, &fs, vfs_context_current());
+ if (error) {
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+ "vnode_size(%p) failed (error=%d)\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm,
+ (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
+ (void *)VM_KERNEL_ADDRPERM(vp), error));
+ goto done;
}
+ file_size = fs;
/* get the file's memory object handle */
- UBCINFOCHECK("shared_region_map_file_np", vp);
file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_ERROR,
- ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
- "ubc_getobject() failed\n",
- current_thread(), p->p_pid, p->p_comm,
- vp, vp->v_name));
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+ "no memory object\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm,
+ (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
error = EINVAL;
goto done;
}
- /*
- * Get info about the current process's shared region.
- * This might change if we decide we need to clone the shared region.
- */
- vm_get_shared_region(current_task(), &shared_region);
- task_mapping_info.self = (vm_offset_t) shared_region;
- shared_region_mapping_info(shared_region,
- &(task_mapping_info.text_region),
- &(task_mapping_info.text_size),
- &(task_mapping_info.data_region),
- &(task_mapping_info.data_size),
- &(task_mapping_info.region_mappings),
- &(task_mapping_info.client_base),
- &(task_mapping_info.alternate_base),
- &(task_mapping_info.alternate_next),
- &(task_mapping_info.fs_base),
- &(task_mapping_info.system),
- &(task_mapping_info.flags),
- &next);
-
- /*
- * Are we using the system's current shared region
- * for this environment ?
- */
- default_shared_region =
- lookup_default_shared_region(ENV_DEFAULT_ROOT,
- task_mapping_info.system);
- if (shared_region == default_shared_region) {
- using_default_region = TRUE;
+ /* check that the mappings are properly covered by code signatures */
+ if (!cs_system_enforcement()) {
+ /* code signing is not enforced: no need to check */
} else {
- using_default_region = FALSE;
- }
- shared_region_mapping_dealloc(default_shared_region);
-
- if (vp->v_mount != rootvnode->v_mount &&
- using_default_region) {
- /*
- * The split library is not on the root filesystem. We don't
- * want to polute the system-wide ("default") shared region
- * with it.
- * Reject the mapping. The caller (dyld) should "privatize"
- * (via shared_region_make_private()) the shared region and
- * try to establish the mapping privately for this process.
- */
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_CONFLICT,
- ("shared_region: %p [%d(%s)] "
- "map_file(%p:'%s'): "
- "not on root volume\n",
- current_thread(), p->p_pid, p->p_comm,
- vp->v_name));
- error = EXDEV;
- goto done;
+ for (i = 0; i < mappings_count; i++) {
+ if (mappings[i].sfm_init_prot & VM_PROT_ZF) {
+ /* zero-filled mapping: not backed by the file */
+ continue;
+ }
+ if (ubc_cs_is_range_codesigned(vp,
+ mappings[i].sfm_file_offset,
+ mappings[i].sfm_size)) {
+ /* this mapping is fully covered by code signatures */
+ continue;
+ }
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+ "mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
+ "is not code-signed\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm,
+ (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
+ i, mappings_count,
+ mappings[i].sfm_address,
+ mappings[i].sfm_size,
+ mappings[i].sfm_file_offset,
+ mappings[i].sfm_max_prot,
+ mappings[i].sfm_init_prot));
+ error = EINVAL;
+ goto done;
+ }
}
+ /* get the process's shared region (setup in vm_map_exec()) */
+ shared_region = vm_shared_region_trim_and_get(current_task());
+ if (shared_region == NULL) {
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+ "no shared region\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm,
+ (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
+ error = EINVAL;
+ goto done;
+ }
- /*
- * Map the split library.
- */
- kr = map_shared_file(mapping_count,
- mappings,
- file_control,
- file_size,
- &task_mapping_info,
- base_offset,
- (user_slide_p) ? &slide : NULL);
-
- if (kr == KERN_SUCCESS) {
- /*
- * The mapping was successful. Let the buffer cache know
- * that we've mapped that file with these protections. This
- * prevents the vnode from getting recycled while it's mapped.
- */
- (void) ubc_map(vp, max_prot);
- error = 0;
- } else {
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_CONFLICT,
- ("shared_region: %p [%d(%s)] "
- "map_file(%p:'%s'): "
- "map_shared_file failed, kr=0x%x\n",
- current_thread(), p->p_pid, p->p_comm,
- vp, vp->v_name, kr));
+ /* map the file into that shared region's submap */
+ kr = vm_shared_region_map_file(shared_region,
+ mappings_count,
+ mappings,
+ file_control,
+ file_size,
+ (void *) p->p_fd->fd_rdir,
+ slide,
+ slide_start,
+ slide_size);
+ if (kr != KERN_SUCCESS) {
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+ "vm_shared_region_map_file() failed kr=0x%x\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm,
+ (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, kr));
switch (kr) {
case KERN_INVALID_ADDRESS:
error = EFAULT;
- goto done;
+ break;
case KERN_PROTECTION_FAILURE:
error = EPERM;
- goto done;
+ break;
case KERN_NO_SPACE:
error = ENOMEM;
- goto done;
+ break;
case KERN_FAILURE:
case KERN_INVALID_ARGUMENT:
default:
error = EINVAL;
- goto done;
+ break;
}
+ goto done;
}
- if (p->p_flag & P_NOSHLIB) {
- /* signal that this process is now using split libraries */
- p->p_flag &= ~P_NOSHLIB;
+ error = 0;
+
+ vnode_lock_spin(vp);
+
+ vp->v_flag |= VSHARED_DYLD;
+
+ vnode_unlock(vp);
+
+ /* update the vnode's access time */
+ if (!(vnode_vfsvisflags(vp) & MNT_NOATIME)) {
+ VATTR_INIT(&va);
+ nanotime(&va.va_access_time);
+ VATTR_SET_ACTIVE(&va, va_access_time);
+ vnode_setattr(vp, &va, vfs_context_current());
}
- if (user_slide_p) {
- /*
- * The caller provided a pointer to a "slide" offset. Let
- * them know by how much we slid the mappings.
- */
- if (mappings_in_segment == FALSE) {
- /*
- * We faked the base_offset earlier, so undo that
- * and take into account the real base_offset.
- */
- slide += SHARED_TEXT_REGION_SIZE - end_offset;
- slide -= original_base_offset;
- /*
- * The mappings were slid into the shared segments
- * and "slide" is relative to the beginning of the
- * shared segments. Adjust it to be absolute.
- */
- slide += GLOBAL_SHARED_TEXT_SEGMENT;
- }
- error = copyout(&slide,
- user_slide_p,
- sizeof (slide));
- if (slide != 0) {
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_CONFLICT,
- ("shared_region: %p [%d(%s)] "
- "map_file(%p:'%s'): "
- "slid by 0x%llx\n",
- current_thread(), p->p_pid, p->p_comm,
- vp, vp->v_name, slide));
- }
+ if (p->p_flag & P_NOSHLIB) {
+ /* signal that this process is now using split libraries */
+ OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
}
done:
fp_drop(p, fd, fp, 0);
fp = NULL;
}
- if (mappings != NULL &&
- mappings != &stack_mappings[0]) {
- kmem_free(kernel_map,
- (vm_offset_t) mappings,
- mappings_size);
+ if (scdir_vp != NULL) {
+ (void)vnode_put(scdir_vp);
+ scdir_vp = NULL;
}
- mappings = NULL;
- return error;
-}
-
-int
-load_shared_file(
- __unused struct proc *p,
- __unused struct load_shared_file_args *uap,
- __unused int *retval)
-{
- return ENOSYS;
-}
+ if (shared_region != NULL) {
+ vm_shared_region_deallocate(shared_region);
+ }
-int
-reset_shared_file(
- __unused struct proc *p,
- __unused struct reset_shared_file_args *uap,
- __unused int *retval)
-{
- return ENOSYS;
-}
+ SHARED_REGION_TRACE_DEBUG(
+ ("shared_region: %p [%d(%s)] <- map\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm));
-int
-new_system_shared_regions(
- __unused struct proc *p,
- __unused struct new_system_shared_regions_args *uap,
- __unused int *retval)
-{
- return ENOSYS;
+ return error;
}
-
-
int
-clone_system_shared_regions(
- int shared_regions_active,
- int chain_regions,
- int base_vnode)
+shared_region_map_and_slide_np(
+ struct proc *p,
+ struct shared_region_map_and_slide_np_args *uap,
+ __unused int *retvalp)
{
- shared_region_mapping_t new_shared_region;
- shared_region_mapping_t next;
- shared_region_mapping_t old_shared_region;
- struct shared_region_task_mappings old_info;
- struct shared_region_task_mappings new_info;
-
- vm_get_shared_region(current_task(), &old_shared_region);
- old_info.self = (vm_offset_t)old_shared_region;
- shared_region_mapping_info(old_shared_region,
- &(old_info.text_region),
- &(old_info.text_size),
- &(old_info.data_region),
- &(old_info.data_size),
- &(old_info.region_mappings),
- &(old_info.client_base),
- &(old_info.alternate_base),
- &(old_info.alternate_next),
- &(old_info.fs_base),
- &(old_info.system),
- &(old_info.flags), &next);
-
- if (shared_regions_active ||
- base_vnode == ENV_DEFAULT_ROOT) {
- if (shared_file_create_system_region(&new_shared_region,
- old_info.fs_base,
- old_info.system))
- return ENOMEM;
- } else {
- if (old_shared_region &&
- base_vnode == ENV_DEFAULT_ROOT) {
- base_vnode = old_info.fs_base;
- }
- new_shared_region =
- lookup_default_shared_region(base_vnode,
- old_info.system);
- if (new_shared_region == NULL) {
- shared_file_boot_time_init(base_vnode,
- old_info.system);
- vm_get_shared_region(current_task(),
- &new_shared_region);
- } else {
- vm_set_shared_region(current_task(), new_shared_region);
- }
- if (old_shared_region)
- shared_region_mapping_dealloc(old_shared_region);
- }
- new_info.self = (vm_offset_t)new_shared_region;
- shared_region_mapping_info(new_shared_region,
- &(new_info.text_region),
- &(new_info.text_size),
- &(new_info.data_region),
- &(new_info.data_size),
- &(new_info.region_mappings),
- &(new_info.client_base),
- &(new_info.alternate_base),
- &(new_info.alternate_next),
- &(new_info.fs_base),
- &(new_info.system),
- &(new_info.flags), &next);
- if(shared_regions_active) {
- if(vm_region_clone(old_info.text_region, new_info.text_region)) {
- panic("clone_system_shared_regions: shared region mis-alignment 1");
- shared_region_mapping_dealloc(new_shared_region);
- return(EINVAL);
- }
- if (vm_region_clone(old_info.data_region, new_info.data_region)) {
- panic("clone_system_shared_regions: shared region mis-alignment 2");
- shared_region_mapping_dealloc(new_shared_region);
- return(EINVAL);
- }
- if (chain_regions) {
- /*
- * We want a "shadowed" clone, a private superset of the old
- * shared region. The info about the old mappings is still
- * valid for us.
- */
- shared_region_object_chain_attach(
- new_shared_region, old_shared_region);
- } else {
- /*
- * We want a completely detached clone with no link to
- * the old shared region. We'll be removing some mappings
- * in our private, cloned, shared region, so the old mappings
- * will become irrelevant to us. Since we have a private
- * "shared region" now, it isn't going to be shared with
- * anyone else and we won't need to maintain mappings info.
- */
- shared_region_object_chain_detached(new_shared_region);
- }
- }
- if (vm_map_region_replace(current_map(), old_info.text_region,
- new_info.text_region, old_info.client_base,
- old_info.client_base+old_info.text_size)) {
- panic("clone_system_shared_regions: shared region mis-alignment 3");
- shared_region_mapping_dealloc(new_shared_region);
- return(EINVAL);
- }
- if(vm_map_region_replace(current_map(), old_info.data_region,
- new_info.data_region,
- old_info.client_base + old_info.text_size,
- old_info.client_base
- + old_info.text_size + old_info.data_size)) {
- panic("clone_system_shared_regions: shared region mis-alignment 4");
- shared_region_mapping_dealloc(new_shared_region);
- return(EINVAL);
- }
- vm_set_shared_region(current_task(), new_shared_region);
-
- /* consume the reference which wasn't accounted for in object */
- /* chain attach */
- if (!shared_regions_active || !chain_regions)
- shared_region_mapping_dealloc(old_shared_region);
-
- SHARED_REGION_TRACE(
- SHARED_REGION_TRACE_INFO,
- ("shared_region: %p task=%p "
- "clone(active=%d, base=0x%x,chain=%d) "
- "old=%p[%x,%x,%x] new=%p[%x,%x,%x]\n",
- current_thread(), current_task(),
- shared_regions_active, base_vnode, chain_regions,
- old_shared_region,
- old_info.fs_base,
- old_info.system,
- old_info.flags,
- new_shared_region,
- new_info.fs_base,
- new_info.system,
- new_info.flags));
-
- return(0);
-
-}
-
-/* header for the profile name file. The profiled app info is held */
-/* in the data file and pointed to by elements in the name file */
-
-struct profile_names_header {
- unsigned int number_of_profiles;
- unsigned int user_id;
- unsigned int version;
- off_t element_array;
- unsigned int spare1;
- unsigned int spare2;
- unsigned int spare3;
-};
-
-struct profile_element {
- off_t addr;
- vm_size_t size;
- unsigned int mod_date;
- unsigned int inode;
- char name[12];
-};
-
-struct global_profile {
- struct vnode *names_vp;
- struct vnode *data_vp;
- vm_offset_t buf_ptr;
- unsigned int user;
- unsigned int age;
- unsigned int busy;
-};
-
-struct global_profile_cache {
- int max_ele;
- unsigned int age;
- struct global_profile profiles[3];
-};
+ struct shared_file_mapping_np *mappings;
+ unsigned int mappings_count = uap->count;
+ kern_return_t kr = KERN_SUCCESS;
+ uint32_t slide = uap->slide;
-/* forward declarations */
-int bsd_open_page_cache_files(unsigned int user,
- struct global_profile **profile);
-void bsd_close_page_cache_files(struct global_profile *profile);
-int bsd_search_page_cache_data_base(
- struct vnode *vp,
- struct profile_names_header *database,
- char *app_name,
- unsigned int mod_date,
- unsigned int inode,
- off_t *profile,
- unsigned int *profile_size);
-
-struct global_profile_cache global_user_profile_cache =
- {3, 0, {{NULL, NULL, 0, 0, 0, 0},
- {NULL, NULL, 0, 0, 0, 0},
- {NULL, NULL, 0, 0, 0, 0}} };
-
-/* BSD_OPEN_PAGE_CACHE_FILES: */
-/* Caller provides a user id. This id was used in */
-/* prepare_profile_database to create two unique absolute */
-/* file paths to the associated profile files. These files */
-/* are either opened or bsd_open_page_cache_files returns an */
-/* error. The header of the names file is then consulted. */
-/* The header and the vnodes for the names and data files are */
-/* returned. */
+#define SFM_MAX_STACK 8
+ struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
-int
-bsd_open_page_cache_files(
- unsigned int user,
- struct global_profile **profile)
-{
- const char *cache_path = "/var/vm/app_profile/";
- struct proc *p;
- int error;
- vm_size_t resid;
- off_t resid_off;
- unsigned int lru;
- vm_size_t size;
-
- struct vnode *names_vp;
- struct vnode *data_vp;
- vm_offset_t names_buf;
- vm_offset_t buf_ptr;
-
- int profile_names_length;
- int profile_data_length;
- char *profile_data_string;
- char *profile_names_string;
- char *substring;
-
- off_t file_size;
- struct vfs_context context;
-
- kern_return_t ret;
-
- struct nameidata nd_names;
- struct nameidata nd_data;
- int i;
-
-
- p = current_proc();
-
- context.vc_proc = p;
- context.vc_ucred = kauth_cred_get();
-
-restart:
- for(i = 0; i<global_user_profile_cache.max_ele; i++) {
- if((global_user_profile_cache.profiles[i].user == user)
- && (global_user_profile_cache.profiles[i].data_vp
- != NULL)) {
- *profile = &global_user_profile_cache.profiles[i];
- /* already in cache, we're done */
- if ((*profile)->busy) {
- /*
- * drop funnel and wait
- */
- (void)tsleep((void *)
- *profile,
- PRIBIO, "app_profile", 0);
- goto restart;
- }
- (*profile)->busy = 1;
- (*profile)->age = global_user_profile_cache.age;
+ /* Is the process chrooted?? */
+ if (p->p_fd->fd_rdir != NULL) {
+ kr = EINVAL;
+ goto done;
+ }
+ if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
+ if (kr == KERN_INVALID_ARGUMENT) {
/*
- * entries in cache are held with a valid
- * usecount... take an iocount which will
- * be dropped in "bsd_close_page_cache_files"
- * which is called after the read or writes to
- * these files are done
+ * This will happen if we request sliding again
+ * with the same slide value that was used earlier
+ * for the very first sliding.
*/
- if ( (vnode_getwithref((*profile)->data_vp)) ) {
-
- vnode_rele((*profile)->data_vp);
- vnode_rele((*profile)->names_vp);
-
- (*profile)->data_vp = NULL;
- (*profile)->busy = 0;
- wakeup(*profile);
-
- goto restart;
- }
- if ( (vnode_getwithref((*profile)->names_vp)) ) {
-
- vnode_put((*profile)->data_vp);
- vnode_rele((*profile)->data_vp);
- vnode_rele((*profile)->names_vp);
-
- (*profile)->data_vp = NULL;
- (*profile)->busy = 0;
- wakeup(*profile);
-
- goto restart;
- }
- global_user_profile_cache.age+=1;
- return 0;
- }
- }
-
- lru = global_user_profile_cache.age;
- *profile = NULL;
- for(i = 0; i<global_user_profile_cache.max_ele; i++) {
- /* Skip entry if it is in the process of being reused */
- if(global_user_profile_cache.profiles[i].data_vp ==
- (struct vnode *)0xFFFFFFFF)
- continue;
- /* Otherwise grab the first empty entry */
- if(global_user_profile_cache.profiles[i].data_vp == NULL) {
- *profile = &global_user_profile_cache.profiles[i];
- (*profile)->age = global_user_profile_cache.age;
- break;
- }
- /* Otherwise grab the oldest entry */
- if(global_user_profile_cache.profiles[i].age < lru) {
- lru = global_user_profile_cache.profiles[i].age;
- *profile = &global_user_profile_cache.profiles[i];
- }
- }
-
- /* Did we set it? */
- if (*profile == NULL) {
- /*
- * No entries are available; this can only happen if all
- * of them are currently in the process of being reused;
- * if this happens, we sleep on the address of the first
- * element, and restart. This is less than ideal, but we
- * know it will work because we know that there will be a
- * wakeup on any entry currently in the process of being
- * reused.
- *
- * XXX Reccomend a two handed clock and more than 3 total
- * XXX cache entries at some point in the future.
- */
- /*
- * drop funnel and wait
- */
- (void)tsleep((void *)
- &global_user_profile_cache.profiles[0],
- PRIBIO, "app_profile", 0);
- goto restart;
- }
-
- /*
- * If it's currently busy, we've picked the one at the end of the
- * LRU list, but it's currently being actively used. We sleep on
- * its address and restart.
- */
- if ((*profile)->busy) {
- /*
- * drop funnel and wait
- */
- (void)tsleep((void *)
- *profile,
- PRIBIO, "app_profile", 0);
- goto restart;
- }
- (*profile)->busy = 1;
- (*profile)->user = user;
-
- /*
- * put dummy value in for now to get competing request to wait
- * above until we are finished
- *
- * Save the data_vp before setting it, so we can set it before
- * we kmem_free() or vrele(). If we don't do this, then we
- * have a potential funnel race condition we have to deal with.
- */
- data_vp = (*profile)->data_vp;
- (*profile)->data_vp = (struct vnode *)0xFFFFFFFF;
-
- /*
- * Age the cache here in all cases; this guarantees that we won't
- * be reusing only one entry over and over, once the system reaches
- * steady-state.
- */
- global_user_profile_cache.age+=1;
-
- if(data_vp != NULL) {
- kmem_free(kernel_map,
- (*profile)->buf_ptr, 4 * PAGE_SIZE);
- if ((*profile)->names_vp) {
- vnode_rele((*profile)->names_vp);
- (*profile)->names_vp = NULL;
+ kr = KERN_SUCCESS;
}
- vnode_rele(data_vp);
- }
-
- /* Try to open the appropriate users profile files */
- /* If neither file is present, try to create them */
- /* If one file is present and the other not, fail. */
- /* If the files do exist, check them for the app_file */
- /* requested and read it in if present */
-
- ret = kmem_alloc(kernel_map,
- (vm_offset_t *)&profile_data_string, PATH_MAX);
-
- if(ret) {
- (*profile)->data_vp = NULL;
- (*profile)->busy = 0;
- wakeup(*profile);
- return ENOMEM;
+ goto done;
}
- /* Split the buffer in half since we know the size of */
- /* our file path and our allocation is adequate for */
- /* both file path names */
- profile_names_string = profile_data_string + (PATH_MAX/2);
-
-
- strcpy(profile_data_string, cache_path);
- strcpy(profile_names_string, cache_path);
- profile_names_length = profile_data_length
- = strlen(profile_data_string);
- substring = profile_data_string + profile_data_length;
- sprintf(substring, "%x_data", user);
- substring = profile_names_string + profile_names_length;
- sprintf(substring, "%x_names", user);
-
- /* We now have the absolute file names */
-
- ret = kmem_alloc(kernel_map,
- (vm_offset_t *)&names_buf, 4 * PAGE_SIZE);
- if(ret) {
- kmem_free(kernel_map,
- (vm_offset_t)profile_data_string, PATH_MAX);
- (*profile)->data_vp = NULL;
- (*profile)->busy = 0;
- wakeup(*profile);
- return ENOMEM;
+ if (mappings_count == 0) {
+ SHARED_REGION_TRACE_INFO(
+ ("shared_region: %p [%d(%s)] map(): "
+ "no mappings\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm));
+ kr = 0; /* no mappings: we're done ! */
+ goto done;
+ } else if (mappings_count <= SFM_MAX_STACK) {
+ mappings = &stack_mappings[0];
+ } else {
+ SHARED_REGION_TRACE_ERROR(
+ ("shared_region: %p [%d(%s)] map(): "
+ "too many mappings (%d)\n",
+ (void *)VM_KERNEL_ADDRPERM(current_thread()),
+ p->p_pid, p->p_comm,
+ mappings_count));
+ kr = KERN_FAILURE;
+ goto done;
}
- NDINIT(&nd_names, LOOKUP, FOLLOW | LOCKLEAF,
- UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_names_string), &context);
- NDINIT(&nd_data, LOOKUP, FOLLOW | LOCKLEAF,
- UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_data_string), &context);
-
- if ( (error = vn_open(&nd_data, FREAD | FWRITE, 0)) ) {
-#ifdef notdef
- printf("bsd_open_page_cache_files: CacheData file not found %s\n",
- profile_data_string);
-#endif
- kmem_free(kernel_map,
- (vm_offset_t)names_buf, 4 * PAGE_SIZE);
- kmem_free(kernel_map,
- (vm_offset_t)profile_data_string, PATH_MAX);
- (*profile)->data_vp = NULL;
- (*profile)->busy = 0;
- wakeup(*profile);
- return error;
+ if ((kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) {
+ goto done;
}
- data_vp = nd_data.ni_vp;
-
- if ( (error = vn_open(&nd_names, FREAD | FWRITE, 0)) ) {
- printf("bsd_open_page_cache_files: NamesData file not found %s\n",
- profile_data_string);
- kmem_free(kernel_map,
- (vm_offset_t)names_buf, 4 * PAGE_SIZE);
- kmem_free(kernel_map,
- (vm_offset_t)profile_data_string, PATH_MAX);
- vnode_rele(data_vp);
- vnode_put(data_vp);
- (*profile)->data_vp = NULL;
- (*profile)->busy = 0;
- wakeup(*profile);
- return error;
+ kr = _shared_region_map_and_slide(p, uap->fd, mappings_count, mappings,
+ slide,
+ uap->slide_start, uap->slide_size);
+ if (kr != KERN_SUCCESS) {
+ return kr;
}
- names_vp = nd_names.ni_vp;
-
- if ((error = vnode_size(names_vp, &file_size, &context)) != 0) {
- printf("bsd_open_page_cache_files: Can't stat name file %s\n", profile_names_string);
- kmem_free(kernel_map,
- (vm_offset_t)profile_data_string, PATH_MAX);
- kmem_free(kernel_map,
- (vm_offset_t)names_buf, 4 * PAGE_SIZE);
- vnode_rele(names_vp);
- vnode_put(names_vp);
- vnode_rele(data_vp);
- vnode_put(data_vp);
-
- (*profile)->data_vp = NULL;
- (*profile)->busy = 0;
- wakeup(*profile);
- return error;
- }
+done:
+ return kr;
+}
- size = file_size;
- if(size > 4 * PAGE_SIZE)
- size = 4 * PAGE_SIZE;
- buf_ptr = names_buf;
- resid_off = 0;
-
- while(size) {
- int resid_int;
- error = vn_rdwr(UIO_READ, names_vp, (caddr_t)buf_ptr,
- size, resid_off,
- UIO_SYSSPACE32, IO_NODELOCKED, kauth_cred_get(),
- &resid_int, p);
- resid = (vm_size_t) resid_int;
- if((error) || (size == resid)) {
- if(!error) {
- error = EINVAL;
- }
- kmem_free(kernel_map,
- (vm_offset_t)profile_data_string, PATH_MAX);
- kmem_free(kernel_map,
- (vm_offset_t)names_buf, 4 * PAGE_SIZE);
-
- vnode_rele(names_vp);
- vnode_put(names_vp);
- vnode_rele(data_vp);
- vnode_put(data_vp);
-
- (*profile)->data_vp = NULL;
- (*profile)->busy = 0;
- wakeup(*profile);
- return error;
- }
- buf_ptr += size-resid;
- resid_off += size-resid;
- size = resid;
- }
- kmem_free(kernel_map, (vm_offset_t)profile_data_string, PATH_MAX);
+/* sysctl overflow room */
- (*profile)->names_vp = names_vp;
- (*profile)->data_vp = data_vp;
- (*profile)->buf_ptr = names_buf;
+SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
+ (int *) &page_size, 0, "vm page size");
- /*
- * at this point, the both the names_vp and the data_vp have
- * both a valid usecount and an iocount held
- */
- return 0;
+/* vm_page_free_target is provided as a makeshift solution for applications that want to
+ * allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
+ * reclaimed. It allows the app to calculate how much memory is free outside the free target. */
+extern unsigned int vm_page_free_target;
+SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_free_target, 0, "Pageout daemon free target");
-}
+SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
-void
-bsd_close_page_cache_files(
- struct global_profile *profile)
+static int
+vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
{
- vnode_put(profile->data_vp);
- vnode_put(profile->names_vp);
+#pragma unused(oidp, arg1, arg2)
+ unsigned int page_free_wanted;
- profile->busy = 0;
- wakeup(profile);
+ page_free_wanted = mach_vm_ctl_page_free_wanted();
+ return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
}
+SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
+ CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
+ 0, 0, vm_ctl_page_free_wanted, "I", "");
-int
-bsd_read_page_cache_file(
- unsigned int user,
- int *fid,
- int *mod,
- char *app_name,
- struct vnode *app_vp,
- vm_offset_t *buffer,
- vm_offset_t *bufsize)
-{
-
- boolean_t funnel_state;
-
- struct proc *p;
- int error;
- unsigned int resid;
-
- off_t profile;
- unsigned int profile_size;
+extern unsigned int vm_page_purgeable_count;
+SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_purgeable_count, 0, "Purgeable page count");
- vm_offset_t names_buf;
- struct vnode_attr va;
- struct vfs_context context;
+extern unsigned int vm_page_purgeable_wired_count;
+SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
- kern_return_t ret;
+#if DEVELOPMENT || DEBUG
+extern uint64_t get_pages_grabbed_count(void);
- struct vnode *names_vp;
- struct vnode *data_vp;
-
- struct global_profile *uid_files;
-
- funnel_state = thread_funnel_set(kernel_flock, TRUE);
-
- /* Try to open the appropriate users profile files */
- /* If neither file is present, try to create them */
- /* If one file is present and the other not, fail. */
- /* If the files do exist, check them for the app_file */
- /* requested and read it in if present */
+static int
+pages_grabbed SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2, oidp)
+ uint64_t value = get_pages_grabbed_count();
+ return SYSCTL_OUT(req, &value, sizeof(value));
+}
+SYSCTL_PROC(_vm, OID_AUTO, pages_grabbed, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED,
+ 0, 0, &pages_grabbed, "QU", "Total pages grabbed");
+SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
+
+SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
+
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
+SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
+#endif
- error = bsd_open_page_cache_files(user, &uid_files);
- if(error) {
- thread_funnel_set(kernel_flock, funnel_state);
- return EINVAL;
- }
+extern int madvise_free_debug;
+SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
+ &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
+
+SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
+SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_stats_reusable.reusable_pages_success, "");
+SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_stats_reusable.reusable_pages_failure, "");
+SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_stats_reusable.reusable_pages_shared, "");
+SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_stats_reusable.all_reusable_calls, "");
+SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_stats_reusable.partial_reusable_calls, "");
+SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_stats_reusable.reuse_pages_success, "");
+SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_stats_reusable.reuse_pages_failure, "");
+SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_stats_reusable.all_reuse_calls, "");
+SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_stats_reusable.partial_reuse_calls, "");
+SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_stats_reusable.can_reuse_success, "");
+SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_stats_reusable.can_reuse_failure, "");
+SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_stats_reusable.reusable_reclaimed, "");
+SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_stats_reusable.reusable_nonwritable, "");
+SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_stats_reusable.reusable_shared, "");
+SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vm_page_stats_reusable.free_shared, "");
+
+
+extern unsigned int vm_page_free_count, vm_page_speculative_count;
+SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
+
+extern unsigned int vm_page_cleaned_count;
+SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
+
+extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
+SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
+
+/* pageout counts */
+SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
+
+SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
+SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
+SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
+SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
+SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
+SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
+
+
+/* counts of pages prefaulted when entering a memory object */
+extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
+SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
+SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
+
+#if defined (__x86_64__)
+extern unsigned int vm_clump_promote_threshold;
+SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
+#if DEVELOPMENT || DEBUG
+extern unsigned long vm_clump_stats[];
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
+extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
+SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
+#endif /* if DEVELOPMENT || DEBUG */
+#endif /* #if defined (__x86_64__) */
+
+#if CONFIG_SECLUDED_MEMORY
+
+SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
+extern unsigned int vm_page_secluded_target;
+extern unsigned int vm_page_secluded_count;
+extern unsigned int vm_page_secluded_count_free;
+extern unsigned int vm_page_secluded_count_inuse;
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
+
+extern struct vm_page_secluded_data vm_page_secluded;
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
+
+#endif /* CONFIG_SECLUDED_MEMORY */
- p = current_proc();
+#include <kern/thread.h>
+#include <sys/user.h>
- names_vp = uid_files->names_vp;
- data_vp = uid_files->data_vp;
- names_buf = uid_files->buf_ptr;
+void vm_pageout_io_throttle(void);
- context.vc_proc = p;
- context.vc_ucred = kauth_cred_get();
+void
+vm_pageout_io_throttle(void)
+{
+ struct uthread *uthread = get_bsdthread_info(current_thread());
- VATTR_INIT(&va);
- VATTR_WANTED(&va, va_fileid);
- VATTR_WANTED(&va, va_modify_time);
-
- if ((error = vnode_getattr(app_vp, &va, &context))) {
- printf("bsd_read_cache_file: Can't stat app file %s\n", app_name);
- bsd_close_page_cache_files(uid_files);
- thread_funnel_set(kernel_flock, funnel_state);
- return error;
- }
+ /*
+ * thread is marked as a low priority I/O type
+ * and the I/O we issued while in this cleaning operation
+ * collided with normal I/O operations... we'll
+ * delay in order to mitigate the impact of this
+ * task on the normal operation of the system
+ */
- *fid = (u_long)va.va_fileid;
- *mod = va.va_modify_time.tv_sec;
-
- if (bsd_search_page_cache_data_base(
- names_vp,
- (struct profile_names_header *)names_buf,
- app_name,
- (unsigned int) va.va_modify_time.tv_sec,
- (u_long)va.va_fileid, &profile, &profile_size) == 0) {
- /* profile is an offset in the profile data base */
- /* It is zero if no profile data was found */
-
- if(profile_size == 0) {
- *buffer = 0;
- *bufsize = 0;
- bsd_close_page_cache_files(uid_files);
- thread_funnel_set(kernel_flock, funnel_state);
- return 0;
- }
- ret = (vm_offset_t)(kmem_alloc(kernel_map, buffer, profile_size));
- if(ret) {
- bsd_close_page_cache_files(uid_files);
- thread_funnel_set(kernel_flock, funnel_state);
- return ENOMEM;
- }
- *bufsize = profile_size;
- while(profile_size) {
- int resid_int;
- error = vn_rdwr(UIO_READ, data_vp,
- (caddr_t) *buffer, profile_size,
- profile, UIO_SYSSPACE32, IO_NODELOCKED,
- kauth_cred_get(), &resid_int, p);
- resid = (vm_size_t) resid_int;
- if((error) || (profile_size == resid)) {
- bsd_close_page_cache_files(uid_files);
- kmem_free(kernel_map, (vm_offset_t)*buffer, profile_size);
- thread_funnel_set(kernel_flock, funnel_state);
- return EINVAL;
- }
- profile += profile_size - resid;
- profile_size = resid;
- }
- bsd_close_page_cache_files(uid_files);
- thread_funnel_set(kernel_flock, funnel_state);
- return 0;
- } else {
- bsd_close_page_cache_files(uid_files);
- thread_funnel_set(kernel_flock, funnel_state);
- return EINVAL;
+ if (uthread->uu_lowpri_window) {
+ throttle_lowpri_io(1);
}
-
}
int
-bsd_search_page_cache_data_base(
- struct vnode *vp,
- struct profile_names_header *database,
- char *app_name,
- unsigned int mod_date,
- unsigned int inode,
- off_t *profile,
- unsigned int *profile_size)
+vm_pressure_monitor(
+ __unused struct proc *p,
+ struct vm_pressure_monitor_args *uap,
+ int *retval)
{
+ kern_return_t kr;
+ uint32_t pages_reclaimed;
+ uint32_t pages_wanted;
- struct proc *p;
-
- unsigned int i;
- struct profile_element *element;
- unsigned int ele_total;
- unsigned int extended_list = 0;
- off_t file_off = 0;
- unsigned int size;
- off_t resid_off;
- unsigned int resid;
- vm_offset_t local_buf = 0;
+ kr = mach_vm_pressure_monitor(
+ (boolean_t) uap->wait_for_pressure,
+ uap->nsecs_monitored,
+ (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
+ &pages_wanted);
- int error;
- kern_return_t ret;
-
- p = current_proc();
-
- if(((vm_offset_t)database->element_array) !=
- sizeof(struct profile_names_header)) {
+ switch (kr) {
+ case KERN_SUCCESS:
+ break;
+ case KERN_ABORTED:
+ return EINTR;
+ default:
return EINVAL;
}
- element = (struct profile_element *)(
- (vm_offset_t)database->element_array +
- (vm_offset_t)database);
-
- ele_total = database->number_of_profiles;
-
- *profile = 0;
- *profile_size = 0;
- while(ele_total) {
- /* note: code assumes header + n*ele comes out on a page boundary */
- if(((local_buf == 0) && (sizeof(struct profile_names_header) +
- (ele_total * sizeof(struct profile_element)))
- > (PAGE_SIZE * 4)) ||
- ((local_buf != 0) &&
- (ele_total * sizeof(struct profile_element))
- > (PAGE_SIZE * 4))) {
- extended_list = ele_total;
- if(element == (struct profile_element *)
- ((vm_offset_t)database->element_array +
- (vm_offset_t)database)) {
- ele_total = ((PAGE_SIZE * 4)/sizeof(struct profile_element)) - 1;
- } else {
- ele_total = (PAGE_SIZE * 4)/sizeof(struct profile_element);
- }
- extended_list -= ele_total;
- }
- for (i=0; i<ele_total; i++) {
- if((mod_date == element[i].mod_date)
- && (inode == element[i].inode)) {
- if(strncmp(element[i].name, app_name, 12) == 0) {
- *profile = element[i].addr;
- *profile_size = element[i].size;
- if(local_buf != 0) {
- kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
- }
- return 0;
- }
- }
- }
- if(extended_list == 0)
- break;
- if(local_buf == 0) {
- ret = kmem_alloc(kernel_map, &local_buf, 4 * PAGE_SIZE);
- if(ret != KERN_SUCCESS) {
- return ENOMEM;
- }
- }
- element = (struct profile_element *)local_buf;
- ele_total = extended_list;
- extended_list = 0;
- file_off += 4 * PAGE_SIZE;
- if((ele_total * sizeof(struct profile_element)) >
- (PAGE_SIZE * 4)) {
- size = PAGE_SIZE * 4;
- } else {
- size = ele_total * sizeof(struct profile_element);
- }
- resid_off = 0;
- while(size) {
- int resid_int;
- error = vn_rdwr(UIO_READ, vp,
- CAST_DOWN(caddr_t, (local_buf + resid_off)),
- size, file_off + resid_off, UIO_SYSSPACE32,
- IO_NODELOCKED, kauth_cred_get(), &resid_int, p);
- resid = (vm_size_t) resid_int;
- if((error) || (size == resid)) {
- if(local_buf != 0) {
- kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
- }
- return EINVAL;
- }
- resid_off += size-resid;
- size = resid;
+
+ if (uap->pages_reclaimed) {
+ if (copyout((void *)&pages_reclaimed,
+ uap->pages_reclaimed,
+ sizeof(pages_reclaimed)) != 0) {
+ return EFAULT;
}
}
- if(local_buf != 0) {
- kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
- }
+
+ *retval = (int) pages_wanted;
return 0;
}
int
-bsd_write_page_cache_file(
- unsigned int user,
- char *file_name,
- caddr_t buffer,
- vm_size_t size,
- int mod,
- int fid)
+kas_info(struct proc *p,
+ struct kas_info_args *uap,
+ int *retval __unused)
{
- struct proc *p;
- int resid;
- off_t resid_off;
- int error;
- boolean_t funnel_state;
- off_t file_size;
- struct vfs_context context;
- off_t profile;
- unsigned int profile_size;
-
- vm_offset_t names_buf;
- struct vnode *names_vp;
- struct vnode *data_vp;
- struct profile_names_header *profile_header;
- off_t name_offset;
- struct global_profile *uid_files;
-
-
- funnel_state = thread_funnel_set(kernel_flock, TRUE);
-
-
- error = bsd_open_page_cache_files(user, &uid_files);
- if(error) {
- thread_funnel_set(kernel_flock, funnel_state);
- return EINVAL;
+#ifdef SECURE_KERNEL
+ (void)p;
+ (void)uap;
+ return ENOTSUP;
+#else /* !SECURE_KERNEL */
+ int selector = uap->selector;
+ user_addr_t valuep = uap->value;
+ user_addr_t sizep = uap->size;
+ user_size_t size;
+ int error;
+
+ if (!kauth_cred_issuser(kauth_cred_get())) {
+ return EPERM;
+ }
+
+#if CONFIG_MACF
+ error = mac_system_check_kas_info(kauth_cred_get(), selector);
+ if (error) {
+ return error;
}
+#endif
- p = current_proc();
-
- names_vp = uid_files->names_vp;
- data_vp = uid_files->data_vp;
- names_buf = uid_files->buf_ptr;
+ if (IS_64BIT_PROCESS(p)) {
+ user64_size_t size64;
+ error = copyin(sizep, &size64, sizeof(size64));
+ size = (user_size_t)size64;
+ } else {
+ user32_size_t size32;
+ error = copyin(sizep, &size32, sizeof(size32));
+ size = (user_size_t)size32;
+ }
+ if (error) {
+ return error;
+ }
- /* Stat data file for size */
+ switch (selector) {
+ case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
+ {
+ uint64_t slide = vm_kernel_slide;
- context.vc_proc = p;
- context.vc_ucred = kauth_cred_get();
+ if (sizeof(slide) != size) {
+ return EINVAL;
+ }
- if ((error = vnode_size(data_vp, &file_size, &context)) != 0) {
- printf("bsd_write_page_cache_file: Can't stat profile data %s\n", file_name);
- bsd_close_page_cache_files(uid_files);
- thread_funnel_set(kernel_flock, funnel_state);
- return error;
- }
-
- if (bsd_search_page_cache_data_base(names_vp,
- (struct profile_names_header *)names_buf,
- file_name, (unsigned int) mod,
- fid, &profile, &profile_size) == 0) {
- /* profile is an offset in the profile data base */
- /* It is zero if no profile data was found */
-
- if(profile_size == 0) {
- unsigned int header_size;
- vm_offset_t buf_ptr;
-
- /* Our Write case */
-
- /* read header for last entry */
- profile_header =
- (struct profile_names_header *)names_buf;
- name_offset = sizeof(struct profile_names_header) +
- (sizeof(struct profile_element)
- * profile_header->number_of_profiles);
- profile_header->number_of_profiles += 1;
-
- if(name_offset < PAGE_SIZE * 4) {
- struct profile_element *name;
- /* write new entry */
- name = (struct profile_element *)
- (names_buf + (vm_offset_t)name_offset);
- name->addr = file_size;
- name->size = size;
- name->mod_date = mod;
- name->inode = fid;
- strncpy (name->name, file_name, 12);
- } else {
- unsigned int ele_size;
- struct profile_element name;
- /* write new entry */
- name.addr = file_size;
- name.size = size;
- name.mod_date = mod;
- name.inode = fid;
- strncpy (name.name, file_name, 12);
- /* write element out separately */
- ele_size = sizeof(struct profile_element);
- buf_ptr = (vm_offset_t)&name;
- resid_off = name_offset;
-
- while(ele_size) {
- error = vn_rdwr(UIO_WRITE, names_vp,
- (caddr_t)buf_ptr,
- ele_size, resid_off,
- UIO_SYSSPACE32, IO_NODELOCKED,
- kauth_cred_get(), &resid, p);
- if(error) {
- printf("bsd_write_page_cache_file: Can't write name_element %x\n", user);
- bsd_close_page_cache_files(
- uid_files);
- thread_funnel_set(
- kernel_flock,
- funnel_state);
- return error;
- }
- buf_ptr += (vm_offset_t)
- ele_size-resid;
- resid_off += ele_size-resid;
- ele_size = resid;
- }
- }
+ if (IS_64BIT_PROCESS(p)) {
+ user64_size_t size64 = (user64_size_t)size;
+ error = copyout(&size64, sizep, sizeof(size64));
+ } else {
+ user32_size_t size32 = (user32_size_t)size;
+ error = copyout(&size32, sizep, sizeof(size32));
+ }
+ if (error) {
+ return error;
+ }
- if(name_offset < PAGE_SIZE * 4) {
- header_size = name_offset +
- sizeof(struct profile_element);
-
- } else {
- header_size =
- sizeof(struct profile_names_header);
- }
- buf_ptr = (vm_offset_t)profile_header;
- resid_off = 0;
-
- /* write names file header */
- while(header_size) {
- error = vn_rdwr(UIO_WRITE, names_vp,
- (caddr_t)buf_ptr,
- header_size, resid_off,
- UIO_SYSSPACE32, IO_NODELOCKED,
- kauth_cred_get(), &resid, p);
- if(error) {
- printf("bsd_write_page_cache_file: Can't write header %x\n", user);
- bsd_close_page_cache_files(
- uid_files);
- thread_funnel_set(
- kernel_flock, funnel_state);
- return error;
- }
- buf_ptr += (vm_offset_t)header_size-resid;
- resid_off += header_size-resid;
- header_size = resid;
- }
- /* write profile to data file */
- resid_off = file_size;
- while(size) {
- error = vn_rdwr(UIO_WRITE, data_vp,
- (caddr_t)buffer, size, resid_off,
- UIO_SYSSPACE32, IO_NODELOCKED,
- kauth_cred_get(), &resid, p);
- if(error) {
- printf("bsd_write_page_cache_file: Can't write header %x\n", user);
- bsd_close_page_cache_files(
- uid_files);
- thread_funnel_set(
- kernel_flock, funnel_state);
- return error;
- }
- buffer += size-resid;
- resid_off += size-resid;
- size = resid;
- }
- bsd_close_page_cache_files(uid_files);
- thread_funnel_set(kernel_flock, funnel_state);
- return 0;
+ error = copyout(&slide, valuep, sizeof(slide));
+ if (error) {
+ return error;
}
- /* Someone else wrote a twin profile before us */
- bsd_close_page_cache_files(uid_files);
- thread_funnel_set(kernel_flock, funnel_state);
- return 0;
- } else {
- bsd_close_page_cache_files(uid_files);
- thread_funnel_set(kernel_flock, funnel_state);
+ }
+ break;
+ default:
return EINVAL;
}
-
-}
-int
-prepare_profile_database(int user)
-{
- const char *cache_path = "/var/vm/app_profile/";
- struct proc *p;
- int error;
- int resid;
- off_t resid_off;
- vm_size_t size;
+ return 0;
+#endif /* !SECURE_KERNEL */
+}
- struct vnode *names_vp;
- struct vnode *data_vp;
- vm_offset_t names_buf;
- vm_offset_t buf_ptr;
- int profile_names_length;
- int profile_data_length;
- char *profile_data_string;
- char *profile_names_string;
- char *substring;
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wcast-qual"
+#pragma clang diagnostic ignored "-Wunused-function"
- struct vnode_attr va;
- struct vfs_context context;
+static void
+asserts()
+{
+ static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
+ static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
+}
- struct profile_names_header *profile_header;
- kern_return_t ret;
+SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
+SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
+#pragma clang diagnostic pop
- struct nameidata nd_names;
- struct nameidata nd_data;
+extern uint32_t vm_page_pages;
+SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
- p = current_proc();
+extern uint32_t vm_page_busy_absent_skipped;
+SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
- context.vc_proc = p;
- context.vc_ucred = kauth_cred_get();
+#if (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG)
+extern int vm_footprint_suspend_allowed;
+SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
- ret = kmem_alloc(kernel_map,
- (vm_offset_t *)&profile_data_string, PATH_MAX);
+extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
+static int
+sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+ int error = 0;
+ int new_value;
- if(ret) {
- return ENOMEM;
+ if (req->newptr == USER_ADDR_NULL) {
+ return 0;
}
-
- /* Split the buffer in half since we know the size of */
- /* our file path and our allocation is adequate for */
- /* both file path names */
- profile_names_string = profile_data_string + (PATH_MAX/2);
-
-
- strcpy(profile_data_string, cache_path);
- strcpy(profile_names_string, cache_path);
- profile_names_length = profile_data_length
- = strlen(profile_data_string);
- substring = profile_data_string + profile_data_length;
- sprintf(substring, "%x_data", user);
- substring = profile_names_string + profile_names_length;
- sprintf(substring, "%x_names", user);
-
- /* We now have the absolute file names */
-
- ret = kmem_alloc(kernel_map,
- (vm_offset_t *)&names_buf, 4 * PAGE_SIZE);
- if(ret) {
- kmem_free(kernel_map,
- (vm_offset_t)profile_data_string, PATH_MAX);
- return ENOMEM;
+ error = SYSCTL_IN(req, &new_value, sizeof(int));
+ if (error) {
+ return error;
}
-
- NDINIT(&nd_names, LOOKUP, FOLLOW,
- UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_names_string), &context);
- NDINIT(&nd_data, LOOKUP, FOLLOW,
- UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_data_string), &context);
-
- if ( (error = vn_open(&nd_data,
- O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) ) {
- kmem_free(kernel_map,
- (vm_offset_t)names_buf, 4 * PAGE_SIZE);
- kmem_free(kernel_map,
- (vm_offset_t)profile_data_string, PATH_MAX);
-
+ if (!vm_footprint_suspend_allowed) {
+ if (new_value != 0) {
+ /* suspends are not allowed... */
return 0;
- }
- data_vp = nd_data.ni_vp;
-
- if ( (error = vn_open(&nd_names,
- O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) ) {
- printf("prepare_profile_database: Can't create CacheNames %s\n",
- profile_data_string);
- kmem_free(kernel_map,
- (vm_offset_t)names_buf, 4 * PAGE_SIZE);
- kmem_free(kernel_map,
- (vm_offset_t)profile_data_string, PATH_MAX);
-
- vnode_rele(data_vp);
- vnode_put(data_vp);
-
- return error;
- }
- names_vp = nd_names.ni_vp;
-
- /* Write Header for new names file */
-
- profile_header = (struct profile_names_header *)names_buf;
-
- profile_header->number_of_profiles = 0;
- profile_header->user_id = user;
- profile_header->version = 1;
- profile_header->element_array =
- sizeof(struct profile_names_header);
- profile_header->spare1 = 0;
- profile_header->spare2 = 0;
- profile_header->spare3 = 0;
-
- size = sizeof(struct profile_names_header);
- buf_ptr = (vm_offset_t)profile_header;
- resid_off = 0;
-
- while(size) {
- error = vn_rdwr(UIO_WRITE, names_vp,
- (caddr_t)buf_ptr, size, resid_off,
- UIO_SYSSPACE32, IO_NODELOCKED,
- kauth_cred_get(), &resid, p);
- if(error) {
- printf("prepare_profile_database: Can't write header %s\n", profile_names_string);
- kmem_free(kernel_map,
- (vm_offset_t)names_buf, 4 * PAGE_SIZE);
- kmem_free(kernel_map,
- (vm_offset_t)profile_data_string,
- PATH_MAX);
-
- vnode_rele(names_vp);
- vnode_put(names_vp);
- vnode_rele(data_vp);
- vnode_put(data_vp);
-
- return error;
}
- buf_ptr += size-resid;
- resid_off += size-resid;
- size = resid;
+ /* ... but let resumes proceed */
}
- VATTR_INIT(&va);
- VATTR_SET(&va, va_uid, user);
+ DTRACE_VM2(footprint_suspend,
+ vm_map_t, current_map(),
+ int, new_value);
- error = vnode_setattr(names_vp, &va, &context);
- if(error) {
- printf("prepare_profile_database: "
- "Can't set user %s\n", profile_names_string);
- }
- vnode_rele(names_vp);
- vnode_put(names_vp);
-
- VATTR_INIT(&va);
- VATTR_SET(&va, va_uid, user);
- error = vnode_setattr(data_vp, &va, &context);
- if(error) {
- printf("prepare_profile_database: "
- "Can't set user %s\n", profile_data_string);
- }
- vnode_rele(data_vp);
- vnode_put(data_vp);
-
- kmem_free(kernel_map,
- (vm_offset_t)profile_data_string, PATH_MAX);
- kmem_free(kernel_map,
- (vm_offset_t)names_buf, 4 * PAGE_SIZE);
- return 0;
+ pmap_footprint_suspend(current_map(), new_value);
+ return 0;
}
+SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
+ CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
+ 0, 0, &sysctl_vm_footprint_suspend, "I", "");
+#endif /* (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG) */
+
+extern uint64_t vm_map_corpse_footprint_count;
+extern uint64_t vm_map_corpse_footprint_size_avg;
+extern uint64_t vm_map_corpse_footprint_size_max;
+extern uint64_t vm_map_corpse_footprint_full;
+extern uint64_t vm_map_corpse_footprint_no_buf;
+SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
+SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
+SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
+SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
+SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
+
+#if PMAP_CS
+extern uint64_t vm_cs_defer_to_pmap_cs;
+extern uint64_t vm_cs_defer_to_pmap_cs_not;
+SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_pmap_cs,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_pmap_cs, "");
+SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_pmap_cs_not,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_pmap_cs_not, "");
+#endif /* PMAP_CS */
+
+extern uint64_t shared_region_pager_copied;
+extern uint64_t shared_region_pager_slid;
+extern uint64_t shared_region_pager_slid_error;
+extern uint64_t shared_region_pager_reclaimed;
+SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
+SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
+SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
+SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
+
+#if MACH_ASSERT
+extern int pmap_ledgers_panic_leeway;
+SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
+#endif /* MACH_ASSERT */