xnu-3789.70.16.tar.gz

[apple/xnu.git] / bsd / vm / vm_unix.c
diff --git a/bsd/vm/vm_unix.c b/bsd/vm/vm_unix.c

index 1250f48a3f116ee0f5ed78920ac9e22dff0a5079..69aeca4ab2a4fefc29221fede8d76de617d06c5e 100644 (file)
--- a/bsd/vm/vm_unix.c
+++ b/bsd/vm/vm_unix.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -31,23 +31,31 @@
   * All rights reserved.  The CMU software License Agreement specifies
   * the terms and conditions for use and redistribution.
   */
-
  /*
+ * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
+ * support for mandatory and extensible security protections.  This notice
+ * is included in support of clause 2.2 (b) of the Apple Public License,
+ * Version 2.0.
   */
  
-
  #include <meta_features.h>
  
+#include <vm/vm_options.h>
+
  #include <kern/task.h>
  #include <kern/thread.h>
  #include <kern/debug.h>
-#include <kern/lock.h>
+#include <kern/extmod_statistics.h>
  #include <mach/mach_traps.h>
+#include <mach/port.h>
+#include <mach/sdt.h>
+#include <mach/task.h>
+#include <mach/task_access.h>
+#include <mach/task_special_ports.h>
  #include <mach/time_value.h>
  #include <mach/vm_map.h>
  #include <mach/vm_param.h>
  #include <mach/vm_prot.h>
-#include <mach/port.h>
  
  #include <sys/file_internal.h>
  #include <sys/param.h>
@@ -69,27 +77,226 @@
  #include <sys/sysproto.h>
  #include <sys/mman.h>
  #include <sys/sysctl.h>
-
-#include <bsm/audit_kernel.h>
+#include <sys/cprotect.h>
+#include <sys/kpi_socket.h>
+#include <sys/kas_info.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <security/audit/audit.h>
+#include <security/mac.h>
  #include <bsm/audit_kevents.h>
  
  #include <kern/kalloc.h>
  #include <vm/vm_map.h>
  #include <vm/vm_kern.h>
+#include <vm/vm_pageout.h>
  
  #include <machine/spl.h>
  
-#include <mach/shared_memory_server.h>
-#include <vm/vm_shared_memory_server.h>
+#include <mach/shared_region.h>
+#include <vm/vm_shared_region.h>
  
  #include <vm/vm_protos.h>
  
+#include <sys/kern_memorystatus.h>
+
+
+int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t);
+int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
+
+#if VM_MAP_DEBUG_APPLE_PROTECT
+SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
+#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
+
+#if VM_MAP_DEBUG_FOURK
+SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "");
+#endif /* VM_MAP_DEBUG_FOURK */
+
+#if DEVELOPMENT || DEBUG
+
+static int
+sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+       vm_offset_t     kaddr;
+       kern_return_t   kr;
+       int     error = 0;
+       int     size = 0;
+
+       error = sysctl_handle_int(oidp, &size, 0, req);
+       if (error || !req->newptr)
+               return (error);
+
+       kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size, 0, 0, 0, 0, VM_KERN_MEMORY_IOKIT);
+
+       if (kr == KERN_SUCCESS)
+               kmem_free(kernel_map, kaddr, size);
+
+       return error;
+}
+
+SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED,
+           0, 0, &sysctl_kmem_alloc_contig, "I", "");
+
+extern int vm_region_footprint;
+SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
+
+#endif /* DEVELOPMENT || DEBUG */
+
+
+
+#if DEVELOPMENT || DEBUG
+extern int radar_20146450;
+SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
+
+extern int macho_printf;
+SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
+
+extern int apple_protect_pager_data_request_debug;
+SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
+
+
+#endif /* DEVELOPMENT || DEBUG */
+
+SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
+#if VM_SCAN_FOR_SHADOW_CHAIN
+static int vm_shadow_max_enabled = 0;    /* Disabled by default */
+extern int proc_shadow_max(void);
+static int
+vm_shadow_max SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2, oidp)
+       int value = 0;
+
+       if (vm_shadow_max_enabled)
+               value = proc_shadow_max();
+
+       return SYSCTL_OUT(req, &value, sizeof(value));
+}
+SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED,
+    0, 0, &vm_shadow_max, "I", "");
+
+SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
+
+#endif /* VM_SCAN_FOR_SHADOW_CHAIN */
+
+SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
+
+__attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
+       mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid);
+/*
+ * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
+ */
+
+#if DEVELOPMENT || DEBUG
+extern int allow_stack_exec, allow_data_exec;
+
+SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
+
+#endif /* DEVELOPMENT || DEBUG */
+
+static const char *prot_values[] = {
+       "none",
+       "read-only",
+       "write-only",
+       "read-write",
+       "execute-only",
+       "read-execute",
+       "write-execute",
+       "read-write-execute"
+};
+
  void
-log_nx_failure(addr64_t vaddr, vm_prot_t prot)
+log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
  {
-        printf("NX failure: %s  -  vaddr=%qx,  prot=%x\n", current_proc()->p_comm, vaddr, prot);
+       printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n", 
+               current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
  }
  
+/*
+ * shared_region_unnest_logging: level of logging of unnesting events
+ * 0   - no logging
+ * 1   - throttled logging of unexpected unnesting events (default)
+ * 2   - unthrottled logging of unexpected unnesting events
+ * 3+  - unthrottled logging of all unnesting events
+ */
+int shared_region_unnest_logging = 1;
+
+SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
+          &shared_region_unnest_logging, 0, "");
+
+int vm_shared_region_unnest_log_interval = 10;
+int shared_region_unnest_log_count_threshold = 5;
+
+/*
+ * Shared cache path enforcement.
+ */
+
+static int scdir_enforce = 1;
+static char scdir_path[] = "/var/db/dyld/";
+
+#ifndef SECURE_KERNEL
+SYSCTL_INT(_vm, OID_AUTO, enforce_shared_cache_dir, CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, "");
+#endif
+
+/* These log rate throttling state variables aren't thread safe, but
+ * are sufficient unto the task.
+ */
+static int64_t last_unnest_log_time = 0; 
+static int shared_region_unnest_log_count = 0;
+
+void
+log_unnest_badness(
+       vm_map_t        m,
+       vm_map_offset_t s,
+       vm_map_offset_t e,
+       boolean_t       is_nested_map,
+       vm_map_offset_t lowest_unnestable_addr)
+{
+       struct timeval  tv;
+
+       if (shared_region_unnest_logging == 0)
+               return;
+
+       if (shared_region_unnest_logging <= 2 &&
+           is_nested_map &&
+           s >= lowest_unnestable_addr) {
+               /*
+                * Unnesting of writable map entries is fine.
+                */
+               return;
+       }
+
+       if (shared_region_unnest_logging <= 1) {
+               microtime(&tv);
+               if ((tv.tv_sec - last_unnest_log_time) <
+                   vm_shared_region_unnest_log_interval) {
+                       if (shared_region_unnest_log_count++ >
+                           shared_region_unnest_log_count_threshold)
+                               return;
+               } else {
+                       last_unnest_log_time = tv.tv_sec;
+                       shared_region_unnest_log_count = 0;
+               }
+       }
+
+       DTRACE_VM4(log_unnest_badness,
+                  vm_map_t, m,
+                  vm_map_offset_t, s,
+                  vm_map_offset_t, e,
+                  vm_map_offset_t, lowest_unnestable_addr);
+       printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, current_proc()->p_pid, (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
+}
  
  int
  useracc(
@@ -97,9 +304,15 @@ useracc(
         user_size_t     len,
         int     prot)
  {
+       vm_map_t        map;
+
+       map = current_map();
         return (vm_map_check_protection(
-                       current_map(),
-                       vm_map_trunc_page(addr), vm_map_round_page(addr+len),
+                       map,
+                       vm_map_trunc_page(addr,
+                                         vm_map_page_mask(map)),
+                       vm_map_round_page(addr+len,
+                                         vm_map_page_mask(map)),
                         prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
  }
  
@@ -108,10 +321,17 @@ vslock(
         user_addr_t     addr,
         user_size_t     len)
  {
-       kern_return_t kret;
-       kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
-                       vm_map_round_page(addr+len), 
-                       VM_PROT_READ | VM_PROT_WRITE ,FALSE);
+       kern_return_t   kret;
+       vm_map_t        map;
+
+       map = current_map();
+       kret = vm_map_wire(map,
+                          vm_map_trunc_page(addr,
+                                            vm_map_page_mask(map)),
+                          vm_map_round_page(addr+len,
+                                            vm_map_page_mask(map)), 
+                          VM_PROT_READ | VM_PROT_WRITE | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_BSD),
+                          FALSE);
  
         switch (kret) {
         case KERN_SUCCESS:
@@ -138,14 +358,17 @@ vsunlock(
         vm_map_offset_t vaddr;
         ppnum_t         paddr;
  #endif  /* FIXME ] */
-       kern_return_t kret;
+       kern_return_t   kret;
+       vm_map_t        map;
+
+       map = current_map();
  
  #if FIXME  /* [ */
         if (dirtied) {
                 pmap = get_task_pmap(current_task());
-               for (vaddr = vm_map_trunc_page(addr);
-                    vaddr < vm_map_round_page(addr+len);
-                               vaddr += PAGE_SIZE) {
+               for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
+                    vaddr < vm_map_round_page(addr+len, PAGE_MASK);
+                    vaddr += PAGE_SIZE) {
                         paddr = pmap_extract(pmap, vaddr);
                         pg = PHYS_TO_VM_PAGE(paddr);
                         vm_page_set_modified(pg);
@@ -155,8 +378,12 @@ vsunlock(
  #ifdef lint
         dirtied++;
  #endif /* lint */
-       kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
-                               vm_map_round_page(addr+len), FALSE);
+       kret = vm_map_unwire(map,
+                            vm_map_trunc_page(addr,
+                                              vm_map_page_mask(map)),
+                            vm_map_round_page(addr+len,
+                                              vm_map_page_mask(map)),
+                            FALSE);
         switch (kret) {
         case KERN_SUCCESS:
                 return (0);
@@ -220,7 +447,7 @@ suword(
  
  long fuword(user_addr_t addr)
  {
-       long word;
+       long word = 0;
  
         if (copyin(addr, (void *) &word, sizeof(int)))
                 return(-1);
@@ -239,7 +466,7 @@ suiword(
  
  long fuiword(user_addr_t addr)
  {
-       long word;
+       long word = 0;
  
         if (copyin(addr, (void *) &word, sizeof(int)))
                 return(-1);
@@ -282,7 +509,7 @@ suulong(user_addr_t addr, uint64_t uword)
         if (IS_64BIT_PROCESS(current_proc())) {
                 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
         } else {
-               return(suiword(addr, (u_long)uword));
+               return(suiword(addr, (uint32_t)uword));
         }
  }
  
@@ -301,29 +528,43 @@ fuulong(user_addr_t addr)
  }
  
  int
-swapon(__unused struct proc *procp, __unused struct swapon_args *uap, __unused int *retval)
+swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
  {
         return(ENOTSUP);
  }
  
-
+/*
+ * pid_for_task
+ *
+ * Find the BSD process ID for the Mach task associated with the given Mach port 
+ * name
+ *
+ * Parameters: args            User argument descriptor (see below)
+ *
+ * Indirect parameters:        args->t         Mach port name
+ *                     args->pid       Process ID (returned value; see below)
+ *
+ * Returns:    KERL_SUCCESS    Success
+ *             KERN_FAILURE    Not success           
+ *
+ * Implicit returns: args->pid         Process ID
+ *
+ */
  kern_return_t
  pid_for_task(
         struct pid_for_task_args *args)
  {
         mach_port_name_t        t = args->t;
         user_addr_t             pid_addr  = args->pid;  
-       struct proc * p;
+       proc_t p;
         task_t          t1;
         int     pid = -1;
         kern_return_t   err = KERN_SUCCESS;
-       boolean_t funnel_state;
  
         AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
         AUDIT_ARG(mach_port1, t);
  
-       funnel_state = thread_funnel_set(kernel_flock, TRUE);
-       t1 = port_name_to_task(t);
+       t1 = port_name_to_task_inspect(t);
  
         if (t1 == TASK_NULL) {
                 err = KERN_FAILURE;
@@ -333,7 +574,10 @@ pid_for_task(
                 if (p) {
                         pid  = proc_pid(p);
                         err = KERN_SUCCESS;
-               } else {
+               } else if (is_corpsetask(t1)) {
+                       pid = task_pid(t1);
+                       err = KERN_SUCCESS;
+               }else {
                         err = KERN_FAILURE;
                 }
         }
@@ -341,11 +585,108 @@ pid_for_task(
  pftout:
         AUDIT_ARG(pid, pid);
         (void) copyout((char *) &pid, pid_addr, sizeof(int));
-       thread_funnel_set(kernel_flock, funnel_state);
         AUDIT_MACH_SYSCALL_EXIT(err);
         return(err);
  }
  
+/* 
+ *
+ * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
+ * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
+ *
+ */
+static  int tfp_policy = KERN_TFP_POLICY_DEFAULT;
+
+/*
+ *     Routine:        task_for_pid_posix_check
+ *     Purpose:
+ *                     Verify that the current process should be allowed to
+ *                     get the target process's task port. This is only 
+ *                     permitted if:
+ *                     - The current process is root
+ *                     OR all of the following are true:
+ *                     - The target process's real, effective, and saved uids
+ *                       are the same as the current proc's euid,
+ *                     - The target process's group set is a subset of the
+ *                       calling process's group set, and
+ *                     - The target process hasn't switched credentials.
+ *
+ *     Returns:        TRUE: permitted
+ *                     FALSE: denied
+ */
+static int
+task_for_pid_posix_check(proc_t target)
+{
+       kauth_cred_t targetcred, mycred;
+       uid_t myuid;
+       int allowed; 
+
+       /* No task_for_pid on bad targets */
+       if (target->p_stat == SZOMB) {
+               return FALSE;
+       }
+
+       mycred = kauth_cred_get();
+       myuid = kauth_cred_getuid(mycred);
+
+       /* If we're running as root, the check passes */
+       if (kauth_cred_issuser(mycred))
+               return TRUE;
+
+       /* We're allowed to get our own task port */
+       if (target == current_proc())
+               return TRUE;
+
+       /* 
+        * Under DENY, only root can get another proc's task port,
+        * so no more checks are needed.
+        */
+       if (tfp_policy == KERN_TFP_POLICY_DENY) { 
+               return FALSE;
+       }
+
+       targetcred = kauth_cred_proc_ref(target);
+       allowed = TRUE;
+
+       /* Do target's ruid, euid, and saved uid match my euid? */
+       if ((kauth_cred_getuid(targetcred) != myuid) || 
+                       (kauth_cred_getruid(targetcred) != myuid) ||
+                       (kauth_cred_getsvuid(targetcred) != myuid)) {
+               allowed = FALSE;
+               goto out;
+       }
+
+       /* Are target's groups a subset of my groups? */
+       if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
+                       allowed == 0) {
+               allowed = FALSE;
+               goto out;
+       }
+
+       /* Has target switched credentials? */
+       if (target->p_flag & P_SUGID) {
+               allowed = FALSE;
+               goto out;
+       }
+       
+out:
+       kauth_cred_unref(&targetcred);
+       return allowed;
+}
+
+/*
+ *     __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
+ *
+ *     Description:    Waits for the user space daemon to respond to the request
+ *                     we made. Function declared non inline to be visible in
+ *                     stackshots and spindumps as well as debugging.
+ */
+__attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
+       mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid)
+{
+       return check_task_access(task_access_port, calling_pid, calling_gid, target_pid);
+}
+
  /*
   *     Routine:        task_for_pid
   *     Purpose:
@@ -355,21 +696,10 @@ pftout:
   *             Only permitted to privileged processes, or processes
   *             with the same user ID.
   *
- * XXX This should be a BSD system call, not a Mach trap!!!
- */
-/* 
- *
- * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
- * tfp_policy = KERN_TFP_POLICY_PERMISSIVE; Permissive Mode: all permissive; related ones allowed or privileged
- * tfp_policy = KERN_TFP_POLICY_RESTRICTED; Restricted Mode: self access allowed; setgid (to tfp_group) are allowed for other tasks
+ *             Note: if pid == 0, an error is return no matter who is calling.
   *
+ * XXX This should be a BSD system call, not a Mach trap!!!
   */
-static  int tfp_policy = KERN_TFP_POLICY_RESTRICTED;
-/* the groutp is inited to kmem group and is modifiable by sysctl */
-static int tfp_group_inited = 0; /* policy groups are loaded ... */
-static  gid_t tfp_group_ronly = 0; /* procview group */
-static  gid_t tfp_group_rw = 0; /* procmod group */
-
  kern_return_t
  task_for_pid(
         struct task_for_pid_args *args)
@@ -377,152 +707,95 @@ task_for_pid(
         mach_port_name_t        target_tport = args->target_tport;
         int                     pid = args->pid;
         user_addr_t             task_addr = args->t;
-       struct uthread          *uthread;
-       struct proc     *p;
-       struct proc *p1;
-       task_t          t1;
-       mach_port_name_t        tret;
+       proc_t                  p = PROC_NULL;
+       task_t                  t1 = TASK_NULL;
+       mach_port_name_t        tret = MACH_PORT_NULL;
+       ipc_port_t              tfpport;
         void * sright;
         int error = 0;
-       int is_member = 0;
-       boolean_t funnel_state;
-       boolean_t ispermitted = FALSE;
-#if DIAGNOSTIC
-       char procname[MAXCOMLEN+1];
-#endif /* DIAGNOSTIC */
  
         AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
         AUDIT_ARG(pid, pid);
         AUDIT_ARG(mach_port1, target_tport);
  
+       /* Always check if pid == 0 */
+       if (pid == 0) {
+               (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
+               AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
+               return(KERN_FAILURE);
+       }
+
         t1 = port_name_to_task(target_tport);
         if (t1 == TASK_NULL) {
-               (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
+               (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
                 return(KERN_FAILURE);
         } 
  
-       funnel_state = thread_funnel_set(kernel_flock, TRUE);
  
-       p1 = current_proc();
+       p = proc_find(pid);
+       if (p == PROC_NULL) {
+               error = KERN_FAILURE;
+               goto tfpout;
+       }
  
-       /*
-        * Delayed binding of thread credential to process credential, if we
-        * are not running with an explicitly set thread credential.
-        */
-       uthread = get_bsdthread_info(current_thread());
-       if (uthread->uu_ucred != p1->p_ucred &&
-           (uthread->uu_flag & UT_SETUID) == 0) {
-               kauth_cred_t old = uthread->uu_ucred;
-               proc_lock(p1);
-               kauth_cred_ref(p1->p_ucred);
-               uthread->uu_ucred = p1->p_ucred;
-               proc_unlock(p1);
-               if (IS_VALID_CRED(old))
-                       kauth_cred_unref(&old);
-       }
-
-       p = pfind(pid);
+#if CONFIG_AUDIT
         AUDIT_ARG(process, p);
+#endif
  
-       /*
-        * XXX p_ucred check can be bogus in multithreaded processes,
-        * XXX unless the funnel is held.
-        */
-       switch (tfp_policy) {
-
-               case KERN_TFP_POLICY_PERMISSIVE:
-                       /* self or suser or related ones */
-                       if ((p != (struct proc *) 0)
-                               && (p->p_stat != SZOMB)
-                               && (p1 != (struct proc *) 0)
-                               && (
-                                       (p1 == p)
-                                       || !(suser(kauth_cred_get(), 0))
-                                       || ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) && 
-                                               ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid))
-                                               && ((p->p_flag & P_SUGID) == 0))
-                                       )
-                               )
-                                       ispermitted = TRUE;
-                       break;
-
-               case KERN_TFP_POLICY_RESTRICTED:
-                       /* self or suser or  setgid and related ones only */
-                       if ((p != (struct proc *) 0)
-                               && (p1 != (struct proc *) 0)
-                               && (p->p_stat != SZOMB)
-                               && (
-                                       (p1 == p)
-                                       || !(suser(kauth_cred_get(), 0))
-                                       || (((tfp_group_inited != 0) && 
-                                                       (
-                                                       ((kauth_cred_ismember_gid(kauth_cred_get(), 
-                                                                       tfp_group_ronly, &is_member) == 0) && is_member)
-                                                       ||((kauth_cred_ismember_gid(kauth_cred_get(), 
-                                                                       tfp_group_rw, &is_member) == 0) && is_member)
-                                                       )
-                                          )
-                                          && ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) && 
-                                                       ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid))
-                                                       && ((p->p_flag & P_SUGID) == 0))
-                                         )
-                                       )
-                               )
-                                       ispermitted = TRUE;
+       if (!(task_for_pid_posix_check(p))) {
+               error = KERN_FAILURE;
+               goto tfpout;
+       }
  
-                       break;
+       if (p->task != TASK_NULL) {
+               /* If we aren't root and target's task access port is set... */
+               if (!kauth_cred_issuser(kauth_cred_get()) &&
+                       p != current_proc() &&
+                       (task_get_task_access_port(p->task, &tfpport) == 0) &&
+                       (tfpport != IPC_PORT_NULL)) {
  
-               case KERN_TFP_POLICY_DENY:
-                       /* self or suser only */
-               default:
-                       /* do not return task port of other task at all */
-                       if ((p1 != (struct proc *) 0) && (p != (struct proc *) 0) && (p->p_stat != SZOMB)
-                                       && ((p1 == p)  || !(suser(kauth_cred_get(), 0))))
-                               ispermitted = TRUE;
-                       else
-                               ispermitted = FALSE;
-                       break;
-       };
+                       if (tfpport == IPC_PORT_DEAD) {
+                               error = KERN_PROTECTION_FAILURE;
+                               goto tfpout;
+                       }
  
+                       /* Call up to the task access server */
+                       error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
  
-       if (ispermitted == TRUE) {
-               if (p->task != TASK_NULL) {
-                       task_reference(p->task);
-                       sright = (void *)convert_task_to_port(p->task);
-                       tret = ipc_port_copyout_send(
-                                               sright, 
-                                               get_task_ipcspace(current_task()));
-                       } else
-                               tret  = MACH_PORT_NULL;
-                       AUDIT_ARG(mach_port2, tret);
-                       (void ) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
-               task_deallocate(t1);
-                       error = KERN_SUCCESS;
+                       if (error != MACH_MSG_SUCCESS) {
+                               if (error == MACH_RCV_INTERRUPTED)
+                                       error = KERN_ABORTED;
+                               else
+                                       error = KERN_FAILURE;
+                               goto tfpout;
+                       }
+               }
+#if CONFIG_MACF
+               error = mac_proc_check_get_task(kauth_cred_get(), p);
+               if (error) {
+                       error = KERN_FAILURE;
                         goto tfpout;
-       }
-#if DIAGNOSTIC
-       else {
-               /* 
-                * There is no guarantee that p_comm is null terminated and
-                * kernel implementation of string functions are complete. So 
-                * ensure stale info is not leaked out, bzero the  buffer
-                */
-               bzero(&procname[0], MAXCOMLEN+1);
-               strncpy(&procname[0], &p1->p_comm[0], MAXCOMLEN);
-               if (tfp_policy != KERN_TFP_POLICY_PERMISSIVE)
-                       log(LOG_NOTICE, "(%d: %s)tfp: failed on %d:\n",
-                               ((p1 != PROC_NULL)?(p1->p_pid):0), &procname[0],
-                               ((p != PROC_NULL)?(p->p_pid):0));
-       }
-#endif /* DIAGNOSTIC */
+               }
+#endif
+
+               /* Grant task port access */
+               task_reference(p->task);
+               extmod_statistics_incr_task_for_pid(p->task);
+
+               sright = (void *) convert_task_to_port(p->task);
+               tret = ipc_port_copyout_send(
+                               sright, 
+                               get_task_ipcspace(current_task()));
+       } 
+       error = KERN_SUCCESS;
  
-    task_deallocate(t1);
-       tret = MACH_PORT_NULL;
-       (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
-       error = KERN_FAILURE;
  tfpout:
-       thread_funnel_set(kernel_flock, funnel_state);
+       task_deallocate(t1);
+       AUDIT_ARG(mach_port2, tret);
+       (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
+       if (p != PROC_NULL)
+               proc_rele(p);
         AUDIT_MACH_SYSCALL_EXIT(error);
         return(error);
  }
@@ -546,14 +819,12 @@ task_name_for_pid(
         mach_port_name_t        target_tport = args->target_tport;
         int                     pid = args->pid;
         user_addr_t             task_addr = args->t;
-       struct uthread          *uthread;
-       struct proc     *p;
-       struct proc *p1;
+       proc_t          p = PROC_NULL;
         task_t          t1;
         mach_port_name_t        tret;
         void * sright;
-       int error = 0;
-       boolean_t funnel_state;
+       int error = 0, refheld = 0;
+       kauth_cred_t target_cred;
  
         AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
         AUDIT_ARG(pid, pid);
@@ -561,762 +832,706 @@ task_name_for_pid(
  
         t1 = port_name_to_task(target_tport);
         if (t1 == TASK_NULL) {
-               (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
+               (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
                 return(KERN_FAILURE);
         } 
  
-       funnel_state = thread_funnel_set(kernel_flock, TRUE);
-
-       p1 = current_proc();
-
-       /*
-        * Delayed binding of thread credential to process credential, if we
-        * are not running with an explicitly set thread credential.
-        */
-       /*
-        * XXX p_ucred check can be bogus in multithreaded processes,
-        * XXX unless the funnel is held.
-        */
-       uthread = get_bsdthread_info(current_thread());
-       if (uthread->uu_ucred != p1->p_ucred &&
-           (uthread->uu_flag & UT_SETUID) == 0) {
-               kauth_cred_t old = uthread->uu_ucred;
-               proc_lock(p1);
-               kauth_cred_ref(p1->p_ucred);
-               uthread->uu_ucred = p1->p_ucred;
-               proc_unlock(p1);
-               if (IS_VALID_CRED(old))
-                       kauth_cred_unref(&old);
-       }
-
-       p = pfind(pid);
-       AUDIT_ARG(process, p);
-
-       if ((p != (struct proc *) 0)
-           && (p->p_stat != SZOMB)
-           && (p1 != (struct proc *) 0)
-           && ((p1 == p)
-               || !(suser(kauth_cred_get(), 0))
-               || ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) && 
-                   ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid)))))
-       {
-               if (p->task != TASK_NULL)
-               {
-                       task_reference(p->task);
-                       sright = (void *)convert_task_name_to_port(p->task);
-                       tret = ipc_port_copyout_send(
-                                               sright, 
+       p = proc_find(pid);
+       if (p != PROC_NULL) {
+               AUDIT_ARG(process, p);
+               target_cred = kauth_cred_proc_ref(p);
+               refheld = 1;
+
+               if ((p->p_stat != SZOMB)
+                   && ((current_proc() == p)
+                       || kauth_cred_issuser(kauth_cred_get()) 
+                       || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) && 
+                           ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
+
+                       if (p->task != TASK_NULL) {
+                               task_reference(p->task);
+#if CONFIG_MACF
+                               error = mac_proc_check_get_task_name(kauth_cred_get(),  p);
+                               if (error) {
+                                       task_deallocate(p->task);
+                                       goto noperm;
+                               }
+#endif
+                               sright = (void *)convert_task_name_to_port(p->task);
+                               tret = ipc_port_copyout_send(sright, 
                                                 get_task_ipcspace(current_task()));
-               } else
-                       tret  = MACH_PORT_NULL;
-               AUDIT_ARG(mach_port2, tret);
-               (void ) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
-               task_deallocate(t1);
-               error = KERN_SUCCESS;
-               goto tnfpout;
+                       } else
+                               tret  = MACH_PORT_NULL;
+
+                       AUDIT_ARG(mach_port2, tret);
+                       (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
+                       task_deallocate(t1);
+                       error = KERN_SUCCESS;
+                       goto tnfpout;
+               }
         }
  
-       task_deallocate(t1);
+#if CONFIG_MACF
+noperm:
+#endif
+    task_deallocate(t1);
         tret = MACH_PORT_NULL;
         (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
         error = KERN_FAILURE;
  tnfpout:
-       thread_funnel_set(kernel_flock, funnel_state);
+       if (refheld != 0)
+               kauth_cred_unref(&target_cred);
+       if (p != PROC_NULL)
+               proc_rele(p);
         AUDIT_MACH_SYSCALL_EXIT(error);
         return(error);
  }
  
-static int
-sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
-    __unused int arg2, struct sysctl_req *req)
+kern_return_t
+pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
  {
-    int error = 0;
-       int new_value;
+       task_t  target = NULL;
+       proc_t  targetproc = PROC_NULL;
+       int     pid = args->pid;
+       int     error = 0;
  
-    error = SYSCTL_OUT(req, arg1, sizeof(int));
-    if (error || req->newptr == USER_ADDR_NULL)
-        return(error);
+#if CONFIG_MACF
+       error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND);
+       if (error) {
+               error = EPERM;
+               goto out;
+       }
+#endif
  
-       if (!is_suser())
-               return(EPERM);
+       if (pid == 0) {
+               error = EPERM;
+               goto out;
+       }
  
-       if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
+       targetproc = proc_find(pid);
+       if (targetproc == PROC_NULL) {
+               error = ESRCH;
                 goto out;
         }
-       if ((new_value == KERN_TFP_POLICY_DENY) 
-               || (new_value == KERN_TFP_POLICY_PERMISSIVE)
-               || (new_value == KERN_TFP_POLICY_RESTRICTED))
-                       tfp_policy = new_value;
-       else
-                       error = EINVAL;         
-out:
-    return(error);
  
-}
+       if (!task_for_pid_posix_check(targetproc)) {
+               error = EPERM;
+               goto out;
+       }
  
-static int
-sysctl_settfp_groups(__unused struct sysctl_oid *oidp, void *arg1,
-    __unused int arg2, struct sysctl_req *req)
-{
-    int error = 0;
-       int new_value;
+       target = targetproc->task;
+       if (target != TASK_NULL) {
+               mach_port_t tfpport;
  
-    error = SYSCTL_OUT(req, arg1, sizeof(int));
-    if (error || req->newptr == USER_ADDR_NULL)
-        return(error);
+               /* If we aren't root and target's task access port is set... */
+               if (!kauth_cred_issuser(kauth_cred_get()) &&
+                       targetproc != current_proc() &&
+                       (task_get_task_access_port(target, &tfpport) == 0) &&
+                       (tfpport != IPC_PORT_NULL)) {
  
-       if (!is_suser())
-               return(EPERM);
+                       if (tfpport == IPC_PORT_DEAD) {
+                               error = EACCES;
+                               goto out;
+                       }
  
-       /* 
-        * Once set; cannot be reset till next boot. Launchd will set this
-        * in its pid 1 init and no one can set after that.
-        */
-       if (tfp_group_inited != 0)
-               return(EPERM);
-               
-       if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
-               goto out;
+                       /* Call up to the task access server */
+                       error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
+
+                       if (error != MACH_MSG_SUCCESS) {
+                               if (error == MACH_RCV_INTERRUPTED)
+                                       error = EINTR;
+                               else
+                                       error = EPERM;
+                               goto out;
+                       }
+               }
         }
  
-       if (new_value >= 100) 
-                       error = EINVAL;         
-       else {
-               if (arg1 == &tfp_group_ronly) 
-                       tfp_group_ronly = new_value;
-               else if (arg1 == &tfp_group_rw)
-                       tfp_group_rw = new_value;
-               else
+       task_reference(target);
+       error = task_pidsuspend(target);
+       if (error) {
+               if (error == KERN_INVALID_ARGUMENT) {
                         error = EINVAL;
-               if ((tfp_group_ronly != 0 ) && (tfp_group_rw != 0 ))
-                       tfp_group_inited = 1;
+               } else {
+                       error = EPERM;
+               }
+       }
+#if CONFIG_MEMORYSTATUS
+       else {
+               memorystatus_on_suspend(targetproc);
         }
+#endif
+
+       task_deallocate(target);
  
  out:
-    return(error);
+       if (targetproc != PROC_NULL)
+               proc_rele(targetproc);
+       *ret = error;
+       return error;
  }
  
-SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW, 0, "tfp");
-SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW,
-    &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
-SYSCTL_PROC(_kern_tfp, KERN_TFP_READ_GROUP, read_group, CTLTYPE_INT | CTLFLAG_RW,
-    &tfp_group_ronly, sizeof(uint32_t), &sysctl_settfp_groups ,"I","read_group");
-SYSCTL_PROC(_kern_tfp, KERN_TFP_RW_GROUP, rw_group, CTLTYPE_INT | CTLFLAG_RW,
-    &tfp_group_rw, sizeof(uint32_t), &sysctl_settfp_groups ,"I","rw_group");
+kern_return_t
+pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
+{
+       task_t  target = NULL;
+       proc_t  targetproc = PROC_NULL;
+       int     pid = args->pid;
+       int     error = 0;
  
+#if CONFIG_MACF
+       error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME);
+       if (error) {
+               error = EPERM;
+               goto out;
+       }
+#endif
  
-SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW, &shared_region_trace_level, 0, "");
+       if (pid == 0) {
+               error = EPERM;
+               goto out;
+       }
  
-/*
- * Try and cap the number of mappings the user might be trying to deal with,
- * so that we don't end up allocating insane amounts of wired memory in the
- * kernel based on bogus user arguments.
- * There are 2 shared regions (TEXT and DATA). The size of each submap
- * is SHARED_TEXT_REGION_SIZE and we can have at most 1 VM map entry per page,
- * so the maximum number of mappings we could ever have to deal with is...
- */
-#define SHARED_REGION_MAX_MAPPINGS ((2 *SHARED_TEXT_REGION_SIZE) >> PAGE_SHIFT)
+       targetproc = proc_find(pid);
+       if (targetproc == PROC_NULL) {
+               error = ESRCH;
+               goto out;
+       }
+
+       if (!task_for_pid_posix_check(targetproc)) {
+               error = EPERM;
+               goto out;
+       }
+
+       target = targetproc->task;
+       if (target != TASK_NULL) {
+               mach_port_t tfpport;
+
+               /* If we aren't root and target's task access port is set... */
+               if (!kauth_cred_issuser(kauth_cred_get()) &&
+                       targetproc != current_proc() &&
+                       (task_get_task_access_port(target, &tfpport) == 0) &&
+                       (tfpport != IPC_PORT_NULL)) {
+
+                       if (tfpport == IPC_PORT_DEAD) {
+                               error = EACCES;
+                               goto out;
+                       }
+
+                       /* Call up to the task access server */
+                       error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
+
+                       if (error != MACH_MSG_SUCCESS) {
+                               if (error == MACH_RCV_INTERRUPTED)
+                                       error = EINTR;
+                               else
+                                       error = EPERM;
+                               goto out;
+                       }
+               }
+       }
+
+
+       task_reference(target);
+
+#if CONFIG_MEMORYSTATUS
+       memorystatus_on_resume(targetproc);
+#endif
+
+       error = task_pidresume(target);
+       if (error) {
+               if (error == KERN_INVALID_ARGUMENT) {
+                       error = EINVAL;
+               } else {
+                       if (error == KERN_MEMORY_ERROR) {
+                               psignal(targetproc, SIGKILL);
+                               error = EIO;
+                       } else
+                               error = EPERM;
+               }
+       }
+       
+       task_deallocate(target);
+
+out:
+       if (targetproc != PROC_NULL)
+               proc_rele(targetproc);
+       
+       *ret = error;
+       return error;
+}
+
+
+static int
+sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
+    __unused int arg2, struct sysctl_req *req)
+{
+    int error = 0;
+       int new_value;
+
+    error = SYSCTL_OUT(req, arg1, sizeof(int));
+    if (error || req->newptr == USER_ADDR_NULL)
+        return(error);
+
+       if (!kauth_cred_issuser(kauth_cred_get()))
+               return(EPERM);
+
+       if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
+               goto out;
+       }
+       if ((new_value == KERN_TFP_POLICY_DENY) 
+               || (new_value == KERN_TFP_POLICY_DEFAULT))
+                       tfp_policy = new_value;
+       else
+                       error = EINVAL;         
+out:
+    return(error);
+
+}
+
+#if defined(SECURE_KERNEL)
+static int kern_secure_kernel = 1;
+#else
+static int kern_secure_kernel = 0;
+#endif
+
+SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
+
+SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
+SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+    &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
+
+SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
+          &shared_region_trace_level, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &shared_region_version, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
+          &shared_region_persistence, 0, "");
  
  /*
- * shared_region_make_private_np:
+ * shared_region_check_np:
   *
- * This system call is for "dyld" only.
- * 
- * It creates a private copy of the current process's "shared region" for
- * split libraries.  "dyld" uses this when the shared region is full or
- * it needs to load a split library that conflicts with an already loaded one
- * that this process doesn't need.  "dyld" specifies a set of address ranges
- * that it wants to keep in the now-private "shared region".  These cover
- * the set of split libraries that the process needs so far.  The kernel needs
- * to deallocate the rest of the shared region, so that it's available for 
- * more libraries for this process.
+ * This system call is intended for dyld.
+ *
+ * dyld calls this when any process starts to see if the process's shared
+ * region is already set up and ready to use.
+ * This call returns the base address of the first mapping in the
+ * process's shared region's first mapping.
+ * dyld will then check what's mapped at that address.
+ *
+ * If the shared region is empty, dyld will then attempt to map the shared
+ * cache file in the shared region via the shared_region_map_np() system call.
+ *
+ * If something's already mapped in the shared region, dyld will check if it
+ * matches the shared cache it would like to use for that process.
+ * If it matches, evrything's ready and the process can proceed and use the
+ * shared region.
+ * If it doesn't match, dyld will unmap the shared region and map the shared
+ * cache into the process's address space via mmap().
+ *
+ * ERROR VALUES
+ * EINVAL      no shared region
+ * ENOMEM      shared region is empty
+ * EFAULT      bad address for "start_address"
   */
  int
-shared_region_make_private_np(
-       struct proc                                     *p,
-       struct shared_region_make_private_np_args       *uap,
-       __unused int                                    *retvalp)
+shared_region_check_np(
+       __unused struct proc                    *p,
+       struct shared_region_check_np_args      *uap,
+       __unused int                            *retvalp)
  {
-       int                             error;
-       kern_return_t                   kr;
-       boolean_t                       using_shared_regions;
-       user_addr_t                     user_ranges;
-       unsigned int                    range_count;
-       vm_size_t                       ranges_size;
-       struct shared_region_range_np   *ranges;
-       shared_region_mapping_t         shared_region;
-       struct shared_region_task_mappings      task_mapping_info;
-       shared_region_mapping_t         next;
-
-       ranges = NULL;
-
-       range_count = uap->rangeCount;
-       user_ranges = uap->ranges;
-       ranges_size = (vm_size_t) (range_count * sizeof (ranges[0]));
-
-       SHARED_REGION_TRACE(
-               SHARED_REGION_TRACE_INFO,
-               ("shared_region: %p [%d(%s)] "
-                "make_private(rangecount=%d)\n",
-                current_thread(), p->p_pid, p->p_comm, range_count));
-
-       /* allocate kernel space for the "ranges" */
-       if (range_count != 0) {
-               if (range_count > SHARED_REGION_MAX_MAPPINGS) {
-                       error = EINVAL;
-                       goto done;
-               }
-               if ((mach_vm_size_t) ranges_size !=
-                   (mach_vm_size_t) range_count * sizeof (ranges[0])) {
-                       /* 32-bit integer overflow */
-                       error = EINVAL;
-                       goto done;
-               }
-               kr = kmem_alloc(kernel_map,
-                               (vm_offset_t *) &ranges,
-                               ranges_size);
+       vm_shared_region_t      shared_region;
+       mach_vm_offset_t        start_address = 0;
+       int                     error;
+       kern_return_t           kr;
+
+       SHARED_REGION_TRACE_DEBUG(
+               ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
+                (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                p->p_pid, p->p_comm,
+                (uint64_t)uap->start_address));
+
+       /* retrieve the current tasks's shared region */
+       shared_region = vm_shared_region_get(current_task());
+       if (shared_region != NULL) {
+               /* retrieve address of its first mapping... */
+               kr = vm_shared_region_start_address(shared_region,
+                                                   &start_address);
                 if (kr != KERN_SUCCESS) {
                         error = ENOMEM;
-                       goto done;
-               }
-
-               /* copy "ranges" from user-space */
-               error = copyin(user_ranges,
-                              ranges,
-                              ranges_size);
-               if (error) {
-                       goto done;
+               } else {
+                       /* ... and give it to the caller */
+                       error = copyout(&start_address,
+                                       (user_addr_t) uap->start_address,
+                                       sizeof (start_address));
+                       if (error) {
+                               SHARED_REGION_TRACE_ERROR(
+                                       ("shared_region: %p [%d(%s)] "
+                                        "check_np(0x%llx) "
+                                        "copyout(0x%llx) error %d\n",
+                                        (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                                        p->p_pid, p->p_comm,
+                                        (uint64_t)uap->start_address, (uint64_t)start_address,
+                                        error));
+                       }
                 }
-       }
-
-       if (p->p_flag & P_NOSHLIB) {
-               /* no split library has been mapped for this process so far */
-               using_shared_regions = FALSE;
+               vm_shared_region_deallocate(shared_region);
         } else {
-               /* this process has already mapped some split libraries */
-               using_shared_regions = TRUE;
-       }
-
-       /*
-        * Get a private copy of the current shared region.
-        * Do not chain it to the system-wide shared region, as we'll want
-        * to map other split libraries in place of the old ones.  We want
-        * to completely detach from the system-wide shared region and go our
-        * own way after this point, not sharing anything with other processes.
-        */
-       error = clone_system_shared_regions(using_shared_regions,
-                                           FALSE, /* chain_regions */
-                                           ENV_DEFAULT_ROOT);
-       if (error) {
-               goto done;
-       }
-
-       /* get info on the newly allocated shared region */
-       vm_get_shared_region(current_task(), &shared_region);
-       task_mapping_info.self = (vm_offset_t) shared_region;
-       shared_region_mapping_info(shared_region,
-                                  &(task_mapping_info.text_region),
-                                  &(task_mapping_info.text_size),
-                                  &(task_mapping_info.data_region),
-                                  &(task_mapping_info.data_size),
-                                  &(task_mapping_info.region_mappings),
-                                  &(task_mapping_info.client_base),
-                                  &(task_mapping_info.alternate_base),
-                                  &(task_mapping_info.alternate_next),
-                                  &(task_mapping_info.fs_base),
-                                  &(task_mapping_info.system),
-                                  &(task_mapping_info.flags),
-                                  &next);
-
-       /*
-        * We now have our private copy of the shared region, as it was before
-        * the call to clone_system_shared_regions().  We now need to clean it
-        * up and keep only the memory areas described by the "ranges" array.
-        */
-       kr = shared_region_cleanup(range_count, ranges, &task_mapping_info);
-       switch (kr) {
-       case KERN_SUCCESS:
-               error = 0;
-               break;
-       default:
+               /* no shared region ! */
                 error = EINVAL;
-               goto done;
         }
  
-done:
-       if (ranges != NULL) {
-               kmem_free(kernel_map,
-                         (vm_offset_t) ranges,
-                         ranges_size);
-               ranges = NULL;
-       }
-
-       SHARED_REGION_TRACE(
-               SHARED_REGION_TRACE_INFO,
-               ("shared_region: %p [%d(%s)] "
-                "make_private(rangecount=%d) -> %d "
-                "shared_region=%p[%x,%x,%x]\n",
-                current_thread(), p->p_pid, p->p_comm,
-                range_count, error, shared_region,
-                task_mapping_info.fs_base,
-                task_mapping_info.system,
-                task_mapping_info.flags));
+       SHARED_REGION_TRACE_DEBUG(
+               ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
+                (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                p->p_pid, p->p_comm,
+                (uint64_t)uap->start_address, (uint64_t)start_address, error));
  
         return error;
  }
  
  
+int
+shared_region_copyin_mappings(
+               struct proc                     *p,
+               user_addr_t                     user_mappings,
+               unsigned int                    mappings_count,
+               struct shared_file_mapping_np   *mappings)
+{
+       int             error = 0;
+       vm_size_t       mappings_size = 0;
+
+       /* get the list of mappings the caller wants us to establish */
+       mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
+       error = copyin(user_mappings,
+                      mappings,
+                      mappings_size);
+       if (error) {
+               SHARED_REGION_TRACE_ERROR(
+                       ("shared_region: %p [%d(%s)] map(): "
+                        "copyin(0x%llx, %d) failed (error=%d)\n",
+                        (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                        p->p_pid, p->p_comm,
+                        (uint64_t)user_mappings, mappings_count, error));
+       }
+       return error;
+}
  /*
- * shared_region_map_file_np:
- *
- * This system call is for "dyld" only.
+ * shared_region_map_np()
   *
- * "dyld" wants to map parts of a split library in the shared region.
- * We get a file descriptor on the split library to be mapped and a set
- * of mapping instructions, describing which parts of the file to map in\
- * which areas of the shared segment and with what protection.
- * The "shared region" is split in 2 areas:
- * 0x90000000 - 0xa0000000 : read-only area (for TEXT and LINKEDIT sections), 
- * 0xa0000000 - 0xb0000000 : writable area (for DATA sections).
+ * This system call is intended for dyld.
   *
+ * dyld uses this to map a shared cache file into a shared region.
+ * This is usually done only the first time a shared cache is needed.
+ * Subsequent processes will just use the populated shared region without
+ * requiring any further setup.
   */
  int
-shared_region_map_file_np(
+_shared_region_map_and_slide(
         struct proc                             *p,
-       struct shared_region_map_file_np_args   *uap,
-       __unused int                            *retvalp)
+       int                                     fd,
+       uint32_t                                mappings_count,
+       struct shared_file_mapping_np           *mappings,
+       uint32_t                                slide,
+       user_addr_t                             slide_start,
+       user_addr_t                             slide_size)
  {
-       int                                     error;
-       kern_return_t                           kr;
-       int                                     fd;
-       unsigned int                            mapping_count;
-       user_addr_t                             user_mappings; /* 64-bit */
-       user_addr_t                             user_slide_p;  /* 64-bit */
-       struct shared_file_mapping_np           *mappings;
-       vm_size_t                               mappings_size;
-       struct fileproc                         *fp;
-       mach_vm_offset_t                        slide;
-       struct vnode                            *vp;
-       struct vfs_context                      context;
-       memory_object_control_t                 file_control;
-       memory_object_size_t                    file_size;
-       shared_region_mapping_t                 shared_region;
-       struct shared_region_task_mappings      task_mapping_info;
-       shared_region_mapping_t                 next;
-       shared_region_mapping_t                 default_shared_region;
-       boolean_t                               using_default_region;
-       unsigned int                            j;
-       vm_prot_t                               max_prot;
-       mach_vm_offset_t                        base_offset, end_offset;
-       mach_vm_offset_t                        original_base_offset;
-       boolean_t                               mappings_in_segment;
-#define SFM_MAX_STACK  6
-       struct shared_file_mapping_np           stack_mappings[SFM_MAX_STACK];
-
-       mappings_size = 0;
-       mappings = NULL;
-       mapping_count = 0;
+       int                             error;
+       kern_return_t                   kr;
+       struct fileproc                 *fp;
+       struct vnode                    *vp, *root_vp, *scdir_vp;
+       struct vnode_attr               va;
+       off_t                           fs;
+       memory_object_size_t            file_size;
+#if CONFIG_MACF
+       vm_prot_t                       maxprot = VM_PROT_ALL;
+#endif
+       memory_object_control_t         file_control;
+       struct vm_shared_region         *shared_region;
+       uint32_t                        i;
+
+       SHARED_REGION_TRACE_DEBUG(
+               ("shared_region: %p [%d(%s)] -> map\n",
+                (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                p->p_pid, p->p_comm));
+
+       shared_region = NULL;
         fp = NULL;
         vp = NULL;
-
-       /* get file descriptor for split library from arguments */
-       fd = uap->fd;
+       scdir_vp = NULL;
  
         /* get file structure from file descriptor */
         error = fp_lookup(p, fd, &fp, 0);
         if (error) {
-               SHARED_REGION_TRACE(
-                       SHARED_REGION_TRACE_ERROR,
-                       ("shared_region: %p [%d(%s)] map_file: "
+               SHARED_REGION_TRACE_ERROR(
+                       ("shared_region: %p [%d(%s)] map: "
                          "fd=%d lookup failed (error=%d)\n",
-                        current_thread(), p->p_pid, p->p_comm, fd, error));
+                        (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                        p->p_pid, p->p_comm, fd, error));
                 goto done;
         }
  
         /* make sure we're attempting to map a vnode */
-       if (fp->f_fglob->fg_type != DTYPE_VNODE) {
-               SHARED_REGION_TRACE(
-                       SHARED_REGION_TRACE_ERROR,
-                       ("shared_region: %p [%d(%s)] map_file: "
+       if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
+               SHARED_REGION_TRACE_ERROR(
+                       ("shared_region: %p [%d(%s)] map: "
                          "fd=%d not a vnode (type=%d)\n",
-                        current_thread(), p->p_pid, p->p_comm,
-                        fd, fp->f_fglob->fg_type));
+                        (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                        p->p_pid, p->p_comm,
+                        fd, FILEGLOB_DTYPE(fp->f_fglob)));
                 error = EINVAL;
                 goto done;
         }
  
         /* we need at least read permission on the file */
         if (! (fp->f_fglob->fg_flag & FREAD)) {
-               SHARED_REGION_TRACE(
-                       SHARED_REGION_TRACE_ERROR,
-                       ("shared_region: %p [%d(%s)] map_file: "
+               SHARED_REGION_TRACE_ERROR(
+                       ("shared_region: %p [%d(%s)] map: "
                          "fd=%d not readable\n",
-                        current_thread(), p->p_pid, p->p_comm, fd));
+                        (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                        p->p_pid, p->p_comm, fd));
                 error = EPERM;
                 goto done;
         }
  
         /* get vnode from file structure */
-       error = vnode_getwithref((vnode_t)fp->f_fglob->fg_data);
+       error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
         if (error) {
-               SHARED_REGION_TRACE(
-                       SHARED_REGION_TRACE_ERROR,
-                       ("shared_region: %p [%d(%s)] map_file: "
+               SHARED_REGION_TRACE_ERROR(
+                       ("shared_region: %p [%d(%s)] map: "
                          "fd=%d getwithref failed (error=%d)\n",
-                        current_thread(), p->p_pid, p->p_comm, fd, error));
+                        (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                        p->p_pid, p->p_comm, fd, error));
                 goto done;
         }
         vp = (struct vnode *) fp->f_fglob->fg_data;
  
         /* make sure the vnode is a regular file */
         if (vp->v_type != VREG) {
-               SHARED_REGION_TRACE(
-                       SHARED_REGION_TRACE_ERROR,
-                       ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
+               SHARED_REGION_TRACE_ERROR(
+                       ("shared_region: %p [%d(%s)] map(%p:'%s'): "
                          "not a file (type=%d)\n",
-                        current_thread(), p->p_pid, p->p_comm,
-                        vp, vp->v_name, vp->v_type));
+                        (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                        p->p_pid, p->p_comm,
+                        (void *)VM_KERNEL_ADDRPERM(vp),
+                        vp->v_name, vp->v_type));
                 error = EINVAL;
                 goto done;
         }
  
-       /* get vnode size */
-       {
-               off_t   fs;
-               
-               context.vc_proc = p;
-               context.vc_ucred = kauth_cred_get();
-               if ((error = vnode_size(vp, &fs, &context)) != 0) {
-                       SHARED_REGION_TRACE(
-                               SHARED_REGION_TRACE_ERROR,
-                               ("shared_region: %p [%d(%s)] "
-                                "map_file(%p:'%s'): "
-                                "vnode_size(%p) failed (error=%d)\n",
-                                current_thread(), p->p_pid, p->p_comm,
-                                vp, vp->v_name, vp));
-                       goto done;
-               }
-               file_size = fs;
+#if CONFIG_MACF
+       /* pass in 0 for the offset argument because AMFI does not need the offset
+               of the shared cache */
+       error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
+                       fp->f_fglob, VM_PROT_ALL, MAP_FILE, 0, &maxprot);
+       if (error) {
+               goto done;
         }
+#endif /* MAC */
  
-       /*
-        * Get the list of mappings the caller wants us to establish.
-        */
-       mapping_count = uap->mappingCount; /* the number of mappings */
-       mappings_size = (vm_size_t) (mapping_count * sizeof (mappings[0]));
-       if (mapping_count == 0) {
-               SHARED_REGION_TRACE(
-                       SHARED_REGION_TRACE_INFO,
-                       ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
-                        "no mappings\n",
-                        current_thread(), p->p_pid, p->p_comm,
-                        vp, vp->v_name));
-               error = 0;      /* no mappings: we're done ! */
-               goto done;
-       } else if (mapping_count <= SFM_MAX_STACK) {
-               mappings = &stack_mappings[0];
+       /* make sure vnode is on the process's root volume */
+       root_vp = p->p_fd->fd_rdir;
+       if (root_vp == NULL) {
+               root_vp = rootvnode;
         } else {
-               if (mapping_count > SHARED_REGION_MAX_MAPPINGS) {
-                       error = EINVAL;
-                       goto done;
-               }
-               if ((mach_vm_size_t) mappings_size !=
-                   (mach_vm_size_t) mapping_count * sizeof (mappings[0])) {
-                       /* 32-bit integer overflow */
-                       error = EINVAL;
-                       goto done;
-               }
-               kr = kmem_alloc(kernel_map,
-                               (vm_offset_t *) &mappings,
-                               mappings_size);
-               if (kr != KERN_SUCCESS) {
-                       SHARED_REGION_TRACE(
-                               SHARED_REGION_TRACE_ERROR,
-                               ("shared_region: %p [%d(%s)] "
-                                "map_file(%p:'%s'): "
-                                "failed to allocate %d mappings (kr=0x%x)\n",
-                                current_thread(), p->p_pid, p->p_comm,
-                                vp, vp->v_name, mapping_count, kr));
-                       error = ENOMEM;
-                       goto done;
-               }
+               /*
+                * Chroot-ed processes can't use the shared_region.
+                */
+               error = EINVAL;
+               goto done;
         }
  
-       user_mappings = uap->mappings;     /* the mappings, in user space */
-       error = copyin(user_mappings,
-                      mappings,
-                      mappings_size);
-       if (error != 0) {
-               SHARED_REGION_TRACE(
-                       SHARED_REGION_TRACE_ERROR,
-                       ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
-                        "failed to copyin %d mappings (error=%d)\n",
-                        current_thread(), p->p_pid, p->p_comm,
-                        vp, vp->v_name, mapping_count, error));
+       if (vp->v_mount != root_vp->v_mount) {
+               SHARED_REGION_TRACE_ERROR(
+                       ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+                        "not on process's root volume\n",
+                        (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                        p->p_pid, p->p_comm,
+                        (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
+               error = EPERM;
                 goto done;
         }
  
-       /*
-        * If the caller provides a "slide" pointer, it means they're OK
-        * with us moving the mappings around to make them fit.
-        */
-       user_slide_p = uap->slide_p;
+       /* make sure vnode is owned by "root" */
+       VATTR_INIT(&va);
+       VATTR_WANTED(&va, va_uid);
+       error = vnode_getattr(vp, &va, vfs_context_current());
+       if (error) {
+               SHARED_REGION_TRACE_ERROR(
+                       ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+                        "vnode_getattr(%p) failed (error=%d)\n",
+                        (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                        p->p_pid, p->p_comm,
+                        (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
+                        (void *)VM_KERNEL_ADDRPERM(vp), error));
+               goto done;
+       }
+       if (va.va_uid != 0) {
+               SHARED_REGION_TRACE_ERROR(
+                       ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+                        "owned by uid=%d instead of 0\n",
+                        (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                        p->p_pid, p->p_comm,
+                        (void *)VM_KERNEL_ADDRPERM(vp),
+                        vp->v_name, va.va_uid));
+               error = EPERM;
+               goto done;
+       }
  
-       /*
-        * Make each mapping address relative to the beginning of the
-        * shared region.  Check that all mappings are in the shared region.
-        * Compute the maximum set of protections required to tell the
-        * buffer cache how we mapped the file (see call to ubc_map() below).
-        */
-       max_prot = VM_PROT_NONE;
-       base_offset = -1LL;
-       end_offset = 0;
-       mappings_in_segment = TRUE;
-       for (j = 0; j < mapping_count; j++) {
-               mach_vm_offset_t segment;
-               segment = (mappings[j].sfm_address &
-                          GLOBAL_SHARED_SEGMENT_MASK);
-               if (segment != GLOBAL_SHARED_TEXT_SEGMENT &&
-                   segment != GLOBAL_SHARED_DATA_SEGMENT) {
-                       /* this mapping is not in the shared region... */
-                       if (user_slide_p == NULL) {
-                               /* ... and we can't slide it in: fail */
-                               SHARED_REGION_TRACE(
-                                       SHARED_REGION_TRACE_CONFLICT,
-                                       ("shared_region: %p [%d(%s)] "
-                                        "map_file(%p:'%s'): "
-                                        "mapping %p not in shared segment & "
-                                        "no sliding\n",
-                                        current_thread(), p->p_pid, p->p_comm,
-                                        vp, vp->v_name,
-                                        mappings[j].sfm_address));
-                               error = EINVAL;
-                               goto done;
-                       }
-                       if (j == 0) {
-                               /* expect all mappings to be outside */
-                               mappings_in_segment = FALSE;
-                       } else if (mappings_in_segment != FALSE) {
-                               /* other mappings were not outside: fail */
-                               SHARED_REGION_TRACE(
-                                       SHARED_REGION_TRACE_CONFLICT,
-                                       ("shared_region: %p [%d(%s)] "
-                                        "map_file(%p:'%s'): "
-                                        "mapping %p not in shared segment & "
-                                        "other mappings in shared segment\n",
-                                        current_thread(), p->p_pid, p->p_comm,
-                                        vp, vp->v_name,
-                                        mappings[j].sfm_address));
-                               error = EINVAL;
-                               goto done;
-                       }
-                       /* we'll try and slide that mapping in the segments */
-               } else {
-                       if (j == 0) {
-                               /* expect all mappings to be inside */
-                               mappings_in_segment = TRUE;
-                       } else if (mappings_in_segment != TRUE) {
-                               /* other mappings were not inside: fail */
-                               SHARED_REGION_TRACE(
-                                       SHARED_REGION_TRACE_CONFLICT,
-                                       ("shared_region: %p [%d(%s)] "
-                                        "map_file(%p:'%s'): "
-                                        "mapping %p in shared segment & "
-                                        "others in shared segment\n",
-                                        current_thread(), p->p_pid, p->p_comm,
-                                        vp, vp->v_name,
-                                        mappings[j].sfm_address));
-                               error = EINVAL;
-                               goto done;
-                       }
-                       /* get a relative offset inside the shared segments */
-                       mappings[j].sfm_address -= GLOBAL_SHARED_TEXT_SEGMENT;
-               }
-               if ((mappings[j].sfm_address & SHARED_TEXT_REGION_MASK)
-                   < base_offset) {
-                       base_offset = (mappings[j].sfm_address &
-                                      SHARED_TEXT_REGION_MASK);
+       if (scdir_enforce) {
+               /* get vnode for scdir_path */
+               error = vnode_lookup(scdir_path, 0, &scdir_vp, vfs_context_current());
+               if (error) {
+                       SHARED_REGION_TRACE_ERROR(
+                               ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+                                "vnode_lookup(%s) failed (error=%d)\n",
+                                (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                                p->p_pid, p->p_comm,
+                                (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
+                                scdir_path, error));
+                       goto done;
                 }
-               if ((mappings[j].sfm_address & SHARED_TEXT_REGION_MASK) +
-                   mappings[j].sfm_size > end_offset) {
-                       end_offset =
-                               (mappings[j].sfm_address &
-                                SHARED_TEXT_REGION_MASK) +
-                               mappings[j].sfm_size;
+
+               /* ensure parent is scdir_vp */
+               if (vnode_parent(vp) != scdir_vp) {
+                       SHARED_REGION_TRACE_ERROR(
+                               ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+                                "shared cache file not in %s\n",
+                                (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                                p->p_pid, p->p_comm,
+                                (void *)VM_KERNEL_ADDRPERM(vp),
+                                vp->v_name, scdir_path));
+                       error = EPERM;
+                       goto done;
                 }
-               max_prot |= mappings[j].sfm_max_prot;
-       }
-       /* Make all mappings relative to the base_offset */
-       base_offset = vm_map_trunc_page(base_offset);
-       end_offset = vm_map_round_page(end_offset);
-       for (j = 0; j < mapping_count; j++) {
-               mappings[j].sfm_address -= base_offset;
         }
-       original_base_offset = base_offset;
-       if (mappings_in_segment == FALSE) {
-               /*
-                * We're trying to map a library that was not pre-bound to
-                * be in the shared segments.  We want to try and slide it
-                * back into the shared segments but as far back as possible,
-                * so that it doesn't clash with pre-bound libraries.  Set
-                * the base_offset to the end of the region, so that it can't
-                * possibly fit there and will have to be slid.
-                */
-               base_offset = SHARED_TEXT_REGION_SIZE - end_offset;
+
+       /* get vnode size */
+       error = vnode_size(vp, &fs, vfs_context_current());
+       if (error) {
+               SHARED_REGION_TRACE_ERROR(
+                       ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+                        "vnode_size(%p) failed (error=%d)\n",
+                        (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                        p->p_pid, p->p_comm,
+                        (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
+                        (void *)VM_KERNEL_ADDRPERM(vp), error));
+               goto done;
         }
+       file_size = fs;
  
         /* get the file's memory object handle */
-       UBCINFOCHECK("shared_region_map_file_np", vp);
         file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
         if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
-               SHARED_REGION_TRACE(
-                       SHARED_REGION_TRACE_ERROR,
-                       ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
-                        "ubc_getobject() failed\n",
-                        current_thread(), p->p_pid, p->p_comm,
-                        vp, vp->v_name));
+               SHARED_REGION_TRACE_ERROR(
+                       ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+                        "no memory object\n",
+                        (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                        p->p_pid, p->p_comm,
+                        (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
                 error = EINVAL;
                 goto done;
         }
  
-       /*
-        * Get info about the current process's shared region.
-        * This might change if we decide we need to clone the shared region.
-        */
-       vm_get_shared_region(current_task(), &shared_region);
-       task_mapping_info.self = (vm_offset_t) shared_region;
-       shared_region_mapping_info(shared_region,
-                                  &(task_mapping_info.text_region),
-                                  &(task_mapping_info.text_size),
-                                  &(task_mapping_info.data_region),
-                                  &(task_mapping_info.data_size),
-                                  &(task_mapping_info.region_mappings),
-                                  &(task_mapping_info.client_base),
-                                  &(task_mapping_info.alternate_base),
-                                  &(task_mapping_info.alternate_next),
-                                  &(task_mapping_info.fs_base),
-                                  &(task_mapping_info.system),
-                                  &(task_mapping_info.flags),
-                                  &next);
-
-       /*
-        * Are we using the system's current shared region
-        * for this environment ?
-        */
-       default_shared_region =
-               lookup_default_shared_region(ENV_DEFAULT_ROOT,
-                                            task_mapping_info.system);
-       if (shared_region == default_shared_region) {
-               using_default_region = TRUE;
-       } else {
-               using_default_region = FALSE;
+       /* check that the mappings are properly covered by code signatures */
+       if (!cs_enforcement(NULL)) {
+               /* code signing is not enforced: no need to check */
+       } else for (i = 0; i < mappings_count; i++) {
+               if (mappings[i].sfm_init_prot & VM_PROT_ZF) {
+                       /* zero-filled mapping: not backed by the file */
+                       continue;
+               }
+               if (ubc_cs_is_range_codesigned(vp,
+                                              mappings[i].sfm_file_offset,
+                                              mappings[i].sfm_size)) {
+                       /* this mapping is fully covered by code signatures */
+                       continue;
+               }
+               SHARED_REGION_TRACE_ERROR(
+                       ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+                        "mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
+                        "is not code-signed\n",
+                        (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                        p->p_pid, p->p_comm,
+                        (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
+                        i, mappings_count,
+                        mappings[i].sfm_address,
+                        mappings[i].sfm_size,
+                        mappings[i].sfm_file_offset,
+                        mappings[i].sfm_max_prot,
+                        mappings[i].sfm_init_prot));
+               error = EINVAL;
+               goto done;
         }
-       shared_region_mapping_dealloc(default_shared_region);
  
-       if (vp->v_mount != rootvnode->v_mount &&
-           using_default_region) {
-               /*
-                * The split library is not on the root filesystem.  We don't
-                * want to polute the system-wide ("default") shared region
-                * with it.
-                * Reject the mapping.  The caller (dyld) should "privatize"
-                * (via shared_region_make_private()) the shared region and
-                * try to establish the mapping privately for this process.
-                */
-               SHARED_REGION_TRACE(
-                       SHARED_REGION_TRACE_CONFLICT,
-                       ("shared_region: %p [%d(%s)] "
-                        "map_file(%p:'%s'): "
-                        "not on root volume\n",
-                        current_thread(), p->p_pid, p->p_comm,
-                        vp->v_name));
-               error = EXDEV;
+       /* get the process's shared region (setup in vm_map_exec()) */
+       shared_region = vm_shared_region_get(current_task());
+       if (shared_region == NULL) {
+               SHARED_REGION_TRACE_ERROR(
+                       ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+                        "no shared region\n",
+                        (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                        p->p_pid, p->p_comm,
+                        (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
                 goto done;
         }
  
-
-       /*
-        * Map the split library.
-        */
-       kr = map_shared_file(mapping_count,
-                            mappings,
-                            file_control,
-                            file_size,
-                            &task_mapping_info,
-                            base_offset,
-                            (user_slide_p) ? &slide : NULL);
-
-       if (kr == KERN_SUCCESS) {
-               /*
-                * The mapping was successful.  Let the buffer cache know
-                * that we've mapped that file with these protections.  This
-                * prevents the vnode from getting recycled while it's mapped.
-                */
-               (void) ubc_map(vp, max_prot);
-               error = 0;
-       } else {
-               SHARED_REGION_TRACE(
-                       SHARED_REGION_TRACE_CONFLICT,
-                       ("shared_region: %p [%d(%s)] "
-                        "map_file(%p:'%s'): "
-                        "map_shared_file failed, kr=0x%x\n",
-                        current_thread(), p->p_pid, p->p_comm,
-                        vp, vp->v_name, kr));
+       /* map the file into that shared region's submap */
+       kr = vm_shared_region_map_file(shared_region,
+                                      mappings_count,
+                                      mappings,
+                                      file_control,
+                                      file_size,
+                                      (void *) p->p_fd->fd_rdir,
+                                      slide,
+                                      slide_start,
+                                      slide_size);
+       if (kr != KERN_SUCCESS) {
+               SHARED_REGION_TRACE_ERROR(
+                       ("shared_region: %p [%d(%s)] map(%p:'%s'): "
+                        "vm_shared_region_map_file() failed kr=0x%x\n",
+                        (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                        p->p_pid, p->p_comm,
+                        (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, kr));
                 switch (kr) {
                 case KERN_INVALID_ADDRESS:
                         error = EFAULT;
-                       goto done;
+                       break;
                 case KERN_PROTECTION_FAILURE:
                         error = EPERM;
-                       goto done;
+                       break;
                 case KERN_NO_SPACE:
                         error = ENOMEM;
-                       goto done;
+                       break;
                 case KERN_FAILURE:
                 case KERN_INVALID_ARGUMENT:
                 default:
                         error = EINVAL;
-                       goto done;
+                       break;
                 }
+               goto done;
         }
  
-       if (p->p_flag & P_NOSHLIB) {
-               /* signal that this process is now using split libraries */
-               p->p_flag &= ~P_NOSHLIB;
+       error = 0;
+
+       vnode_lock_spin(vp);
+
+       vp->v_flag |= VSHARED_DYLD;
+
+       vnode_unlock(vp);
+
+       /* update the vnode's access time */
+       if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
+               VATTR_INIT(&va);
+               nanotime(&va.va_access_time);
+               VATTR_SET_ACTIVE(&va, va_access_time);
+               vnode_setattr(vp, &va, vfs_context_current());
         }
  
-       if (user_slide_p) {
-               /*
-                * The caller provided a pointer to a "slide" offset.  Let
-                * them know by how much we slid the mappings.
-                */
-               if (mappings_in_segment == FALSE) {
-                       /*
-                        * We faked the base_offset earlier, so undo that
-                        * and take into account the real base_offset.
-                        */
-                       slide += SHARED_TEXT_REGION_SIZE - end_offset;
-                       slide -= original_base_offset;
-                       /*
-                        * The mappings were slid into the shared segments
-                        * and "slide" is relative to the beginning of the
-                        * shared segments.  Adjust it to be absolute.
-                        */
-                       slide += GLOBAL_SHARED_TEXT_SEGMENT;
-               }
-               error = copyout(&slide,
-                               user_slide_p,
-                               sizeof (slide));
-               if (slide != 0) {
-                       SHARED_REGION_TRACE(
-                               SHARED_REGION_TRACE_CONFLICT,
-                               ("shared_region: %p [%d(%s)] "
-                                "map_file(%p:'%s'): "
-                                "slid by 0x%llx\n",
-                                current_thread(), p->p_pid, p->p_comm,
-                                vp, vp->v_name, slide));
-               }
+       if (p->p_flag & P_NOSHLIB) {
+               /* signal that this process is now using split libraries */
+               OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
         }
  
  done:
@@ -1333,1174 +1548,378 @@ done:
                 fp_drop(p, fd, fp, 0);
                 fp = NULL;
         }
-       if (mappings != NULL &&
-           mappings != &stack_mappings[0]) {
-               kmem_free(kernel_map,
-                         (vm_offset_t) mappings,
-                         mappings_size);
+       if (scdir_vp != NULL) {
+               (void)vnode_put(scdir_vp);
+               scdir_vp = NULL;
         }
-       mappings = NULL;
-
-       return error;
-}
  
-int
-load_shared_file(
-       __unused struct proc *p,
-       __unused struct load_shared_file_args *uap,
-       __unused int *retval)
-{
-       return ENOSYS;
-}
+       if (shared_region != NULL) {
+               vm_shared_region_deallocate(shared_region);
+       }
  
-int
-reset_shared_file(
-       __unused struct proc *p,
-       __unused struct reset_shared_file_args *uap,
-       __unused int *retval)
-{
-       return ENOSYS;
-}
+       SHARED_REGION_TRACE_DEBUG(
+               ("shared_region: %p [%d(%s)] <- map\n",
+                (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                p->p_pid, p->p_comm));
  
-int
-new_system_shared_regions(
-       __unused struct proc *p,
-       __unused struct new_system_shared_regions_args *uap,
-       __unused int *retval)
-{
-       return ENOSYS;
+       return error;
  }
  
-
-
  int
-clone_system_shared_regions(
-       int             shared_regions_active,
-       int             chain_regions,
-       int             base_vnode)
-{
-       shared_region_mapping_t new_shared_region;
-       shared_region_mapping_t next;
-       shared_region_mapping_t old_shared_region;
-       struct shared_region_task_mappings old_info;
-       struct shared_region_task_mappings new_info;
-
-       vm_get_shared_region(current_task(), &old_shared_region);
-       old_info.self = (vm_offset_t)old_shared_region;
-       shared_region_mapping_info(old_shared_region,
-               &(old_info.text_region),   
-               &(old_info.text_size),
-               &(old_info.data_region),
-               &(old_info.data_size),
-               &(old_info.region_mappings),
-               &(old_info.client_base),
-               &(old_info.alternate_base),
-               &(old_info.alternate_next), 
-               &(old_info.fs_base),
-               &(old_info.system),
-               &(old_info.flags), &next);
-
-       if (shared_regions_active ||
-           base_vnode == ENV_DEFAULT_ROOT) {
-               if (shared_file_create_system_region(&new_shared_region,
-                                                    old_info.fs_base,
-                                                    old_info.system))
-                       return ENOMEM;
-       } else {
-               if (old_shared_region &&
-                   base_vnode == ENV_DEFAULT_ROOT) {
-                       base_vnode = old_info.fs_base;
-               }
-               new_shared_region =
-                       lookup_default_shared_region(base_vnode,
-                                                    old_info.system);
-               if (new_shared_region == NULL) {
-                       shared_file_boot_time_init(base_vnode,
-                                                  old_info.system);
-                       vm_get_shared_region(current_task(),
-                                            &new_shared_region);
-               } else {
-                       vm_set_shared_region(current_task(), new_shared_region);
-               }
-               if (old_shared_region)
-                       shared_region_mapping_dealloc(old_shared_region);
-       }
-       new_info.self = (vm_offset_t)new_shared_region;
-       shared_region_mapping_info(new_shared_region,
-               &(new_info.text_region),   
-               &(new_info.text_size),
-               &(new_info.data_region),
-               &(new_info.data_size),
-               &(new_info.region_mappings),
-               &(new_info.client_base),
-               &(new_info.alternate_base),
-               &(new_info.alternate_next), 
-               &(new_info.fs_base),
-               &(new_info.system),
-               &(new_info.flags), &next);
-       if(shared_regions_active) {
-          if(vm_region_clone(old_info.text_region, new_info.text_region)) {
-          panic("clone_system_shared_regions: shared region mis-alignment 1");
-               shared_region_mapping_dealloc(new_shared_region);
-               return(EINVAL);
-          }
-          if (vm_region_clone(old_info.data_region, new_info.data_region)) {
-          panic("clone_system_shared_regions: shared region mis-alignment 2");
-               shared_region_mapping_dealloc(new_shared_region);
-               return(EINVAL);
-          }
-          if (chain_regions) {
-                  /*
-                   * We want a "shadowed" clone, a private superset of the old
-                   * shared region.  The info about the old mappings is still
-                   * valid for us.
-                   */
-                  shared_region_object_chain_attach(
-                          new_shared_region, old_shared_region);
-          } else {
-                  /*
-                   * We want a completely detached clone with no link to
-                   * the old shared region.  We'll be removing some mappings
-                   * in our private, cloned, shared region, so the old mappings
-                   * will become irrelevant to us.  Since we have a private
-                   * "shared region" now, it isn't going to be shared with
-                   * anyone else and we won't need to maintain mappings info.
-                   */
-                  shared_region_object_chain_detached(new_shared_region);
-          }
-       }
-       if (vm_map_region_replace(current_map(), old_info.text_region, 
-                       new_info.text_region, old_info.client_base, 
-                       old_info.client_base+old_info.text_size)) {
-       panic("clone_system_shared_regions: shared region mis-alignment 3");
-               shared_region_mapping_dealloc(new_shared_region);
-               return(EINVAL);
-       }
-       if(vm_map_region_replace(current_map(), old_info.data_region, 
-                       new_info.data_region, 
-                       old_info.client_base + old_info.text_size, 
-                       old_info.client_base
-                               + old_info.text_size + old_info.data_size)) {
-       panic("clone_system_shared_regions: shared region mis-alignment 4");
-               shared_region_mapping_dealloc(new_shared_region);
-               return(EINVAL);
-       }
-       vm_set_shared_region(current_task(), new_shared_region);
-
-       /* consume the reference which wasn't accounted for in object */
-       /* chain attach */
-       if (!shared_regions_active || !chain_regions)
-               shared_region_mapping_dealloc(old_shared_region);
-
-       SHARED_REGION_TRACE(
-               SHARED_REGION_TRACE_INFO,
-               ("shared_region: %p task=%p "
-                "clone(active=%d, base=0x%x,chain=%d) "
-                "old=%p[%x,%x,%x] new=%p[%x,%x,%x]\n",
-                current_thread(), current_task(), 
-                shared_regions_active, base_vnode, chain_regions,
-                old_shared_region,
-                old_info.fs_base,
-                old_info.system,
-                old_info.flags,
-                new_shared_region,
-                new_info.fs_base,
-                new_info.system,
-                new_info.flags));
-
-       return(0);
-
-}
-
-/* header for the profile name file.  The profiled app info is held */
-/* in the data file and pointed to by elements in the name file     */
-
-struct profile_names_header {
-       unsigned int    number_of_profiles;
-       unsigned int    user_id;
-       unsigned int    version;
-       off_t           element_array;
-       unsigned int    spare1;
-       unsigned int    spare2;
-       unsigned int    spare3;
-};
-
-struct profile_element {
-       off_t           addr;
-       vm_size_t       size;
-       unsigned int    mod_date;
-       unsigned int    inode;
-       char name[12];
-};
-
-struct global_profile {
-       struct vnode    *names_vp;
-       struct vnode    *data_vp;
-       vm_offset_t     buf_ptr;
-       unsigned int    user;
-       unsigned int    age;
-       unsigned int    busy;
-};
-
-struct global_profile_cache {
-       int                     max_ele;
-       unsigned int            age;
-       struct global_profile   profiles[3];
-};
-
-/* forward declarations */
-int bsd_open_page_cache_files(unsigned int user,
-                             struct global_profile **profile);
-void bsd_close_page_cache_files(struct global_profile *profile);
-int bsd_search_page_cache_data_base(
-       struct  vnode                   *vp,
-       struct profile_names_header     *database,
-       char                            *app_name,
-       unsigned int                    mod_date,
-       unsigned int                    inode,
-       off_t                           *profile,
-       unsigned int                    *profile_size);
-
-struct global_profile_cache global_user_profile_cache =
-       {3, 0, {{NULL, NULL, 0, 0, 0, 0},
-                   {NULL, NULL, 0, 0, 0, 0},
-                   {NULL, NULL, 0, 0, 0, 0}} };
-
-/* BSD_OPEN_PAGE_CACHE_FILES:                                 */
-/* Caller provides a user id.  This id was used in            */
-/* prepare_profile_database to create two unique absolute     */
-/* file paths to the associated profile files.  These files   */
-/* are either opened or bsd_open_page_cache_files returns an  */
-/* error.  The header of the names file is then consulted.    */
-/* The header and the vnodes for the names and data files are */
-/* returned. */
-
-int
-bsd_open_page_cache_files(
-       unsigned int    user,
-       struct global_profile **profile)
+shared_region_map_and_slide_np(
+       struct proc                             *p,
+       struct shared_region_map_and_slide_np_args      *uap,
+       __unused int                                    *retvalp)
  {
-       const char *cache_path = "/var/vm/app_profile/";
-       struct proc     *p;
-       int             error;
-       vm_size_t       resid;
-       off_t           resid_off;
-       unsigned int    lru;
-       vm_size_t       size;
-
-       struct  vnode   *names_vp;
-       struct  vnode   *data_vp;
-       vm_offset_t     names_buf;
-       vm_offset_t     buf_ptr;
-
-       int             profile_names_length;
-       int             profile_data_length;
-       char            *profile_data_string;
-       char            *profile_names_string;
-       char            *substring;
-
-       off_t           file_size;
-       struct vfs_context  context;
-
-       kern_return_t   ret;
-
-       struct nameidata nd_names;
-       struct nameidata nd_data;
-       int             i;
-
-
-       p = current_proc();
-
-       context.vc_proc = p;
-       context.vc_ucred = kauth_cred_get();
-
-restart:
-       for(i = 0; i<global_user_profile_cache.max_ele; i++) {
-               if((global_user_profile_cache.profiles[i].user == user) 
-                       &&  (global_user_profile_cache.profiles[i].data_vp 
-                                                               != NULL)) {
-                       *profile = &global_user_profile_cache.profiles[i];
-                       /* already in cache, we're done */
-                       if ((*profile)->busy) {
-                                       /*
-                                       * drop funnel and wait 
-                                       */
-                               (void)tsleep((void *)
-                                       *profile, 
-                                       PRIBIO, "app_profile", 0);
-                               goto restart;
-                       }
-                       (*profile)->busy = 1;
-                       (*profile)->age = global_user_profile_cache.age;
+       struct shared_file_mapping_np   *mappings;
+       unsigned int                    mappings_count = uap->count;
+       kern_return_t                   kr = KERN_SUCCESS;
+       uint32_t                        slide = uap->slide;
+       
+#define SFM_MAX_STACK  8
+       struct shared_file_mapping_np   stack_mappings[SFM_MAX_STACK];
  
+       /* Is the process chrooted?? */
+       if (p->p_fd->fd_rdir != NULL) {
+               kr = EINVAL;
+               goto done;
+       }
+               
+       if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
+               if (kr == KERN_INVALID_ARGUMENT) {
                         /*
-                        * entries in cache are held with a valid
-                        * usecount... take an iocount which will
-                        * be dropped in "bsd_close_page_cache_files"
-                        * which is called after the read or writes to
-                        * these files are done
+                        * This will happen if we request sliding again 
+                        * with the same slide value that was used earlier
+                        * for the very first sliding.
                          */
-                       if ( (vnode_getwithref((*profile)->data_vp)) ) {
-                         
-                               vnode_rele((*profile)->data_vp);
-                               vnode_rele((*profile)->names_vp);
-
-                               (*profile)->data_vp = NULL;
-                               (*profile)->busy = 0;
-                               wakeup(*profile);
-
-                               goto restart;
-                       }
-                       if ( (vnode_getwithref((*profile)->names_vp)) ) {
-
-                               vnode_put((*profile)->data_vp);
-                               vnode_rele((*profile)->data_vp);
-                               vnode_rele((*profile)->names_vp);
-
-                               (*profile)->data_vp = NULL;
-                               (*profile)->busy = 0;
-                               wakeup(*profile);
-
-                               goto restart;
-                       }
-                       global_user_profile_cache.age+=1;
-                       return 0;
+                       kr = KERN_SUCCESS;
                 }
+               goto done;
         }
  
-       lru = global_user_profile_cache.age;
-       *profile = NULL;
-       for(i = 0; i<global_user_profile_cache.max_ele; i++) {
-               /* Skip entry if it is in the process of being reused */
-               if(global_user_profile_cache.profiles[i].data_vp ==
-                                               (struct vnode *)0xFFFFFFFF)
-                       continue;
-               /* Otherwise grab the first empty entry */
-               if(global_user_profile_cache.profiles[i].data_vp == NULL) {
-                       *profile = &global_user_profile_cache.profiles[i];
-                       (*profile)->age = global_user_profile_cache.age;
-                       break;
-               }
-               /* Otherwise grab the oldest entry */
-               if(global_user_profile_cache.profiles[i].age < lru) {
-                       lru = global_user_profile_cache.profiles[i].age;
-                       *profile = &global_user_profile_cache.profiles[i];
-               }
+       if (mappings_count == 0) {
+               SHARED_REGION_TRACE_INFO(
+                       ("shared_region: %p [%d(%s)] map(): "
+                        "no mappings\n",
+                        (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                        p->p_pid, p->p_comm));
+               kr = 0; /* no mappings: we're done ! */
+               goto done;
+       } else if (mappings_count <= SFM_MAX_STACK) {
+               mappings = &stack_mappings[0];
+       } else {
+               SHARED_REGION_TRACE_ERROR(
+                       ("shared_region: %p [%d(%s)] map(): "
+                        "too many mappings (%d)\n",
+                        (void *)VM_KERNEL_ADDRPERM(current_thread()),
+                        p->p_pid, p->p_comm,
+                        mappings_count));
+               kr = KERN_FAILURE;
+               goto done;
         }
  
-       /* Did we set it? */
-       if (*profile == NULL) {
-               /*
-                * No entries are available; this can only happen if all
-                * of them are currently in the process of being reused;
-                * if this happens, we sleep on the address of the first
-                * element, and restart.  This is less than ideal, but we
-                * know it will work because we know that there will be a
-                * wakeup on any entry currently in the process of being
-                * reused.
-                *
-                * XXX Reccomend a two handed clock and more than 3 total
-                * XXX cache entries at some point in the future.
-                */
-                       /*
-                       * drop funnel and wait 
-                       */
-               (void)tsleep((void *)
-                &global_user_profile_cache.profiles[0],
-                       PRIBIO, "app_profile", 0);
-               goto restart;
-       }
-
-       /*
-        * If it's currently busy, we've picked the one at the end of the
-        * LRU list, but it's currently being actively used.  We sleep on
-        * its address and restart.
-        */
-       if ((*profile)->busy) {
-                       /*
-                       * drop funnel and wait 
-                       */
-               (void)tsleep((void *)
-                       *profile, 
-                       PRIBIO, "app_profile", 0);
-               goto restart;
-       }
-       (*profile)->busy = 1;
-       (*profile)->user = user;
-
-       /*
-        * put dummy value in for now to get competing request to wait
-        * above until we are finished
-        *
-        * Save the data_vp before setting it, so we can set it before
-        * we kmem_free() or vrele().  If we don't do this, then we
-        * have a potential funnel race condition we have to deal with.
-        */
-       data_vp = (*profile)->data_vp;
-       (*profile)->data_vp = (struct vnode *)0xFFFFFFFF;
-
-       /*
-        * Age the cache here in all cases; this guarantees that we won't
-        * be reusing only one entry over and over, once the system reaches
-        * steady-state.
-        */
-       global_user_profile_cache.age+=1;
-
-       if(data_vp != NULL) {
-               kmem_free(kernel_map, 
-                               (*profile)->buf_ptr, 4 * PAGE_SIZE);
-               if ((*profile)->names_vp) {
-                       vnode_rele((*profile)->names_vp);
-                       (*profile)->names_vp = NULL;
-               }
-               vnode_rele(data_vp);
-       }
-       
-       /* Try to open the appropriate users profile files */
-       /* If neither file is present, try to create them  */
-       /* If one file is present and the other not, fail. */
-       /* If the files do exist, check them for the app_file */
-       /* requested and read it in if present */
-
-       ret = kmem_alloc(kernel_map,
-               (vm_offset_t *)&profile_data_string, PATH_MAX);
-
-       if(ret) {
-               (*profile)->data_vp = NULL;
-               (*profile)->busy = 0;
-               wakeup(*profile);
-               return ENOMEM;
-       }
-
-       /* Split the buffer in half since we know the size of */
-       /* our file path and our allocation is adequate for   */
-       /* both file path names */
-       profile_names_string = profile_data_string + (PATH_MAX/2);
-
-
-       strcpy(profile_data_string, cache_path);
-       strcpy(profile_names_string, cache_path);
-       profile_names_length = profile_data_length 
-                       = strlen(profile_data_string);
-       substring = profile_data_string + profile_data_length;
-       sprintf(substring, "%x_data", user);
-       substring = profile_names_string + profile_names_length;
-       sprintf(substring, "%x_names", user);
-
-       /* We now have the absolute file names */
-
-       ret = kmem_alloc(kernel_map,
-                               (vm_offset_t *)&names_buf, 4 * PAGE_SIZE);
-       if(ret) {
-               kmem_free(kernel_map, 
-                               (vm_offset_t)profile_data_string, PATH_MAX);
-               (*profile)->data_vp = NULL;
-               (*profile)->busy = 0;
-               wakeup(*profile);
-               return ENOMEM;
-       }
-
-       NDINIT(&nd_names, LOOKUP, FOLLOW | LOCKLEAF, 
-                       UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_names_string), &context);
-       NDINIT(&nd_data, LOOKUP, FOLLOW | LOCKLEAF, 
-                       UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_data_string), &context);
-
-       if ( (error = vn_open(&nd_data, FREAD | FWRITE, 0)) ) {
-#ifdef notdef
-               printf("bsd_open_page_cache_files: CacheData file not found %s\n",
-                       profile_data_string);
-#endif
-               kmem_free(kernel_map, 
-                               (vm_offset_t)names_buf, 4 * PAGE_SIZE);
-               kmem_free(kernel_map, 
-                       (vm_offset_t)profile_data_string, PATH_MAX);
-               (*profile)->data_vp = NULL;
-               (*profile)->busy = 0;
-               wakeup(*profile);
-               return error;
+       if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) {
+               goto done;
         }
-       data_vp = nd_data.ni_vp;
-
-       if ( (error = vn_open(&nd_names, FREAD | FWRITE, 0)) ) {
-               printf("bsd_open_page_cache_files: NamesData file not found %s\n",
-                       profile_data_string);
-               kmem_free(kernel_map, 
-                               (vm_offset_t)names_buf, 4 * PAGE_SIZE);
-               kmem_free(kernel_map, 
-                       (vm_offset_t)profile_data_string, PATH_MAX);
  
-               vnode_rele(data_vp);
-               vnode_put(data_vp);
  
-               (*profile)->data_vp = NULL;
-               (*profile)->busy = 0;
-               wakeup(*profile);
-               return error;
+       kr = _shared_region_map_and_slide(p, uap->fd, mappings_count, mappings,
+                                         slide,
+                                         uap->slide_start, uap->slide_size);
+       if (kr != KERN_SUCCESS) {
+               return kr;
         }
-       names_vp = nd_names.ni_vp;
-
-       if ((error = vnode_size(names_vp, &file_size, &context)) != 0) {
-               printf("bsd_open_page_cache_files: Can't stat name file %s\n", profile_names_string);
-               kmem_free(kernel_map, 
-                       (vm_offset_t)profile_data_string, PATH_MAX);
-               kmem_free(kernel_map, 
-                       (vm_offset_t)names_buf, 4 * PAGE_SIZE);
-
-               vnode_rele(names_vp);
-               vnode_put(names_vp);
-               vnode_rele(data_vp);
-               vnode_put(data_vp);
  
-               (*profile)->data_vp = NULL;
-               (*profile)->busy = 0;
-               wakeup(*profile);
-               return error;
-       }
+done:
+       return kr;
+}
  
-       size = file_size;
-       if(size > 4 * PAGE_SIZE) 
-               size = 4 * PAGE_SIZE;
-       buf_ptr = names_buf;
-       resid_off = 0;
-
-       while(size) {
-               int resid_int;
-               error = vn_rdwr(UIO_READ, names_vp, (caddr_t)buf_ptr, 
-                       size, resid_off,
-                       UIO_SYSSPACE32, IO_NODELOCKED, kauth_cred_get(),
-                       &resid_int, p);
-               resid = (vm_size_t) resid_int;
-               if((error) || (size == resid)) {
-                       if(!error) {
-                               error = EINVAL;
-                       }
-                       kmem_free(kernel_map, 
-                               (vm_offset_t)profile_data_string, PATH_MAX);
-                       kmem_free(kernel_map, 
-                               (vm_offset_t)names_buf, 4 * PAGE_SIZE);
-
-                       vnode_rele(names_vp);
-                       vnode_put(names_vp);
-                       vnode_rele(data_vp);
-                       vnode_put(data_vp);
-
-                       (*profile)->data_vp = NULL;
-                       (*profile)->busy = 0;
-                       wakeup(*profile);
-                       return error;
-               }
-               buf_ptr += size-resid;
-               resid_off += size-resid;
-               size = resid;
-       }
-       kmem_free(kernel_map, (vm_offset_t)profile_data_string, PATH_MAX);
+/* sysctl overflow room */
  
-       (*profile)->names_vp = names_vp;
-       (*profile)->data_vp = data_vp;
-       (*profile)->buf_ptr = names_buf;
+SYSCTL_INT (_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
+           (int *) &page_size, 0, "vm page size");
  
-       /*
-        * at this point, the both the names_vp and the data_vp have
-        * both a valid usecount and an iocount held
-        */
-       return 0;
+/* vm_page_free_target is provided as a makeshift solution for applications that want to
+       allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
+       reclaimed. It allows the app to calculate how much memory is free outside the free target. */
+extern unsigned int    vm_page_free_target;
+SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED, 
+                  &vm_page_free_target, 0, "Pageout daemon free target");
  
-}
+extern unsigned int    vm_memory_pressure;
+SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_memory_pressure, 0, "Memory pressure indicator");
  
-void
-bsd_close_page_cache_files(
-       struct global_profile *profile)
+static int
+vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
  {
-        vnode_put(profile->data_vp);
-       vnode_put(profile->names_vp);
+#pragma unused(oidp, arg1, arg2)
+       unsigned int page_free_wanted;
  
-       profile->busy = 0;
-       wakeup(profile);
+       page_free_wanted = mach_vm_ctl_page_free_wanted();
+       return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted));
  }
+SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
+           CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
+           0, 0, vm_ctl_page_free_wanted, "I", "");
+
+extern unsigned int    vm_page_purgeable_count;
+SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_page_purgeable_count, 0, "Purgeable page count");
+
+extern unsigned int    vm_page_purgeable_wired_count;
+SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
+
+extern unsigned int    vm_pageout_purged_objects;
+SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_pageout_purged_objects, 0, "System purged object count");
+
+extern int madvise_free_debug;
+SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
+          &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
+
+SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
+SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_page_stats_reusable.reusable_pages_success, "");
+SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_page_stats_reusable.reusable_pages_failure, "");
+SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_page_stats_reusable.reusable_pages_shared, "");
+SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_page_stats_reusable.all_reusable_calls, "");
+SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_page_stats_reusable.partial_reusable_calls, "");
+SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_page_stats_reusable.reuse_pages_success, "");
+SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_page_stats_reusable.reuse_pages_failure, "");
+SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_page_stats_reusable.all_reuse_calls, "");
+SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_page_stats_reusable.partial_reuse_calls, "");
+SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_page_stats_reusable.can_reuse_success, "");
+SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_page_stats_reusable.can_reuse_failure, "");
+SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_page_stats_reusable.reusable_reclaimed, "");
+SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_page_stats_reusable.reusable_nonwritable, "");
+SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_page_stats_reusable.reusable_shared, "");
+SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
+          &vm_page_stats_reusable.free_shared, "");
+
+
+extern unsigned int vm_page_free_count, vm_page_speculative_count;
+SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
+
+extern unsigned int vm_page_cleaned_count;
+SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
+
+extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
+SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
+
+/* pageout counts */
+extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used;
+extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative;
+SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, "");
+
+extern unsigned int vm_pageout_freed_from_cleaned;
+SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, "");
+
+/* counts of pages entering the cleaned queue */
+extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty;
+SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
+SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, "");
+
+/* counts of pages leaving the cleaned queue */
+extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock;
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
+SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
+
+/* counts of pages prefaulted when entering a memory object */
+extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
+SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
+SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
+
+#if CONFIG_SECLUDED_MEMORY
+
+SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
+extern unsigned int vm_page_secluded_target;
+extern unsigned int vm_page_secluded_count;
+extern unsigned int vm_page_secluded_count_free;
+extern unsigned int vm_page_secluded_count_inuse;
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
+
+extern struct vm_page_secluded_data vm_page_secluded;
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
+
+extern uint64_t vm_pageout_freed_from_secluded;
+extern uint64_t vm_pageout_secluded_reactivated;
+extern uint64_t vm_pageout_secluded_burst_count;
+SYSCTL_QUAD(_vm, OID_AUTO, pageout_freed_from_secluded, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_secluded, "");
+SYSCTL_QUAD(_vm, OID_AUTO, pageout_secluded_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_secluded_reactivated, "Secluded pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
+SYSCTL_QUAD(_vm, OID_AUTO, pageout_secluded_burst_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_secluded_burst_count, "");
+
+#endif /* CONFIG_SECLUDED_MEMORY */
  
-int
-bsd_read_page_cache_file(
-       unsigned int    user,
-       int             *fid,
-       int             *mod,
-       char            *app_name,
-       struct vnode    *app_vp,
-       vm_offset_t     *buffer,
-       vm_offset_t     *bufsize)
-{
-
-       boolean_t       funnel_state;
-
-       struct proc     *p;
-       int             error;
-       unsigned int    resid;
-
-       off_t           profile;
-       unsigned int    profile_size;
-
-       vm_offset_t     names_buf;
-       struct vnode_attr       va;
-       struct vfs_context  context;
-
-       kern_return_t   ret;
-
-       struct  vnode   *names_vp;
-       struct  vnode   *data_vp;
-
-       struct global_profile *uid_files;
-
-       funnel_state = thread_funnel_set(kernel_flock, TRUE);
-
-       /* Try to open the appropriate users profile files */
-       /* If neither file is present, try to create them  */
-       /* If one file is present and the other not, fail. */
-       /* If the files do exist, check them for the app_file */
-       /* requested and read it in if present */
-
-
-       error = bsd_open_page_cache_files(user, &uid_files);
-       if(error) {
-               thread_funnel_set(kernel_flock, funnel_state);
-               return EINVAL;
-       }
-
-       p = current_proc();
+#include <kern/thread.h>
+#include <sys/user.h>
  
-       names_vp = uid_files->names_vp;
-       data_vp = uid_files->data_vp;
-       names_buf = uid_files->buf_ptr;
+void vm_pageout_io_throttle(void);
  
-       context.vc_proc = p;
-       context.vc_ucred = kauth_cred_get();
+void vm_pageout_io_throttle(void) {
+       struct uthread *uthread = get_bsdthread_info(current_thread());
+ 
+               /*
+                * thread is marked as a low priority I/O type
+                * and the I/O we issued while in this cleaning operation
+                * collided with normal I/O operations... we'll
+                * delay in order to mitigate the impact of this
+                * task on the normal operation of the system
+                */
  
-       VATTR_INIT(&va);
-       VATTR_WANTED(&va, va_fileid);
-       VATTR_WANTED(&va, va_modify_time);
-       
-       if ((error = vnode_getattr(app_vp, &va, &context))) {
-               printf("bsd_read_cache_file: Can't stat app file %s\n", app_name);
-               bsd_close_page_cache_files(uid_files);
-               thread_funnel_set(kernel_flock, funnel_state);
-               return error;
+       if (uthread->uu_lowpri_window) {
+               throttle_lowpri_io(1);
         }
  
-       *fid = (u_long)va.va_fileid;
-       *mod = va.va_modify_time.tv_sec;
-               
-       if (bsd_search_page_cache_data_base(
-                   names_vp,
-                   (struct profile_names_header *)names_buf,
-                   app_name, 
-                   (unsigned int) va.va_modify_time.tv_sec,  
-                   (u_long)va.va_fileid, &profile, &profile_size) == 0) {
-               /* profile is an offset in the profile data base */
-               /* It is zero if no profile data was found */
-               
-               if(profile_size == 0) {
-                       *buffer = 0;
-                       *bufsize = 0;
-                       bsd_close_page_cache_files(uid_files);
-                       thread_funnel_set(kernel_flock, funnel_state);
-                       return 0;
-               }
-               ret = (vm_offset_t)(kmem_alloc(kernel_map, buffer, profile_size));
-               if(ret) {
-                       bsd_close_page_cache_files(uid_files);
-                       thread_funnel_set(kernel_flock, funnel_state);
-                       return ENOMEM;
-               }
-               *bufsize = profile_size;
-               while(profile_size) {
-                       int resid_int;
-                       error = vn_rdwr(UIO_READ, data_vp, 
-                               (caddr_t) *buffer, profile_size, 
-                               profile, UIO_SYSSPACE32, IO_NODELOCKED, 
-                               kauth_cred_get(), &resid_int, p);
-                       resid = (vm_size_t) resid_int;
-                       if((error) || (profile_size == resid)) {
-                               bsd_close_page_cache_files(uid_files);
-                               kmem_free(kernel_map, (vm_offset_t)*buffer, profile_size);
-                               thread_funnel_set(kernel_flock, funnel_state);
-                               return EINVAL;
-                       }
-                       profile += profile_size - resid;
-                       profile_size = resid;
-               }
-               bsd_close_page_cache_files(uid_files);
-               thread_funnel_set(kernel_flock, funnel_state);
-               return 0;
-       } else {
-               bsd_close_page_cache_files(uid_files);
-               thread_funnel_set(kernel_flock, funnel_state);
-               return EINVAL;
-       }
-       
  }
  
  int
-bsd_search_page_cache_data_base(
-       struct  vnode                   *vp,
-       struct profile_names_header     *database,
-       char                            *app_name,
-       unsigned int                    mod_date,
-       unsigned int                    inode,
-       off_t                           *profile,
-       unsigned int                    *profile_size)
+vm_pressure_monitor(
+       __unused struct proc *p,
+       struct vm_pressure_monitor_args *uap,
+       int *retval)
  {
+       kern_return_t   kr;
+       uint32_t        pages_reclaimed;
+       uint32_t        pages_wanted;
  
-       struct proc             *p;
-
-       unsigned int            i;
-       struct profile_element  *element;
-       unsigned int            ele_total;
-       unsigned int            extended_list = 0;
-       off_t                   file_off = 0;
-       unsigned int            size;
-       off_t                   resid_off;
-       unsigned int            resid;
-       vm_offset_t             local_buf = 0;
-
-       int                     error;
-       kern_return_t           ret;
-
-       p = current_proc();
+       kr = mach_vm_pressure_monitor(
+               (boolean_t) uap->wait_for_pressure,
+               uap->nsecs_monitored,
+               (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
+               &pages_wanted);
  
-       if(((vm_offset_t)database->element_array) !=
-                               sizeof(struct profile_names_header)) {
+       switch (kr) {
+       case KERN_SUCCESS:
+               break;
+       case KERN_ABORTED:
+               return EINTR;
+       default:
                 return EINVAL;
         }
-       element = (struct profile_element *)(
-                       (vm_offset_t)database->element_array + 
-                                               (vm_offset_t)database);
  
-       ele_total = database->number_of_profiles;
-       
-       *profile = 0;
-       *profile_size = 0;
-       while(ele_total) {
-               /* note: code assumes header + n*ele comes out on a page boundary */
-               if(((local_buf == 0) && (sizeof(struct profile_names_header) + 
-                       (ele_total * sizeof(struct profile_element))) 
-                                       > (PAGE_SIZE * 4)) ||
-                       ((local_buf != 0) && 
-                               (ele_total * sizeof(struct profile_element))
-                                        > (PAGE_SIZE * 4))) {
-                       extended_list = ele_total;
-                       if(element == (struct profile_element *)
-                               ((vm_offset_t)database->element_array + 
-                                               (vm_offset_t)database)) {
-                               ele_total = ((PAGE_SIZE * 4)/sizeof(struct profile_element)) - 1;
-                       } else {
-                               ele_total = (PAGE_SIZE * 4)/sizeof(struct profile_element);
-                       }
-                       extended_list -= ele_total;
-               }
-               for (i=0; i<ele_total; i++) {
-                       if((mod_date == element[i].mod_date) 
-                                       && (inode == element[i].inode)) {
-                               if(strncmp(element[i].name, app_name, 12) == 0) {
-                                       *profile = element[i].addr;
-                                       *profile_size = element[i].size;
-                                       if(local_buf != 0) {
-                                               kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
-                                       }
-                                       return 0;
-                               }
-                       }
+       if (uap->pages_reclaimed) {
+               if (copyout((void *)&pages_reclaimed,
+                           uap->pages_reclaimed,
+                           sizeof (pages_reclaimed)) != 0) {
+                       return EFAULT;
                 }
-               if(extended_list == 0)
-                       break;
-               if(local_buf == 0) {
-                       ret = kmem_alloc(kernel_map, &local_buf, 4 * PAGE_SIZE);
-                       if(ret != KERN_SUCCESS) {
-                               return ENOMEM;
-                       }
-               }
-               element = (struct profile_element *)local_buf;
-               ele_total = extended_list;
-               extended_list = 0;
-               file_off +=  4 * PAGE_SIZE;
-               if((ele_total * sizeof(struct profile_element)) > 
-                                                       (PAGE_SIZE * 4)) {
-                       size = PAGE_SIZE * 4;
-               } else {
-                       size = ele_total * sizeof(struct profile_element);
-               }
-               resid_off = 0;
-               while(size) {
-                       int resid_int;
-                       error = vn_rdwr(UIO_READ, vp, 
-                               CAST_DOWN(caddr_t, (local_buf + resid_off)),
-                               size, file_off + resid_off, UIO_SYSSPACE32, 
-                               IO_NODELOCKED, kauth_cred_get(), &resid_int, p);
-                       resid = (vm_size_t) resid_int;
-                       if((error) || (size == resid)) {
-                               if(local_buf != 0) {
-                                       kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
-                               }
-                               return EINVAL;
-                       }
-                       resid_off += size-resid;
-                       size = resid;
-               }
-       }
-       if(local_buf != 0) {
-               kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
         }
+
+       *retval = (int) pages_wanted;
         return 0;
  }
  
  int
-bsd_write_page_cache_file(
-       unsigned int    user,
-       char            *file_name,
-       caddr_t         buffer,
-       vm_size_t       size,
-       int             mod,
-       int             fid)
+kas_info(struct proc *p,
+                         struct kas_info_args *uap,
+                         int *retval __unused)
  {
-       struct proc             *p;
-       int                             resid;
-       off_t                   resid_off;
-       int                             error;
-       boolean_t               funnel_state;
-       off_t                   file_size;
-       struct vfs_context      context;
-       off_t                   profile;
-       unsigned int    profile_size;
-
-       vm_offset_t     names_buf;
-       struct  vnode   *names_vp;
-       struct  vnode   *data_vp;
-       struct  profile_names_header *profile_header;
-       off_t                   name_offset;
-       struct global_profile *uid_files;
-
-
-       funnel_state = thread_funnel_set(kernel_flock, TRUE);
-
+#ifdef SECURE_KERNEL
+       (void)p;
+       (void)uap;
+       return ENOTSUP;
+#else /* !SECURE_KERNEL */
+       int                     selector = uap->selector;
+       user_addr_t     valuep = uap->value;
+       user_addr_t     sizep = uap->size;
+       user_size_t size;
+       int                     error;
  
-       error = bsd_open_page_cache_files(user, &uid_files);
-       if(error) {
-               thread_funnel_set(kernel_flock, funnel_state);
-               return EINVAL;
+       if (!kauth_cred_issuser(kauth_cred_get())) {
+               return EPERM;
         }
  
-       p = current_proc();
-
-       names_vp = uid_files->names_vp;
-       data_vp = uid_files->data_vp;
-       names_buf = uid_files->buf_ptr;
-
-       /* Stat data file for size */
-
-       context.vc_proc = p;
-       context.vc_ucred = kauth_cred_get();
+#if CONFIG_MACF
+       error = mac_system_check_kas_info(kauth_cred_get(), selector);
+       if (error) {
+               return error;
+       }
+#endif
  
-       if ((error = vnode_size(data_vp, &file_size, &context)) != 0) {
-               printf("bsd_write_page_cache_file: Can't stat profile data %s\n", file_name);
-               bsd_close_page_cache_files(uid_files);
-               thread_funnel_set(kernel_flock, funnel_state);
+       if (IS_64BIT_PROCESS(p)) {
+               user64_size_t size64;
+               error = copyin(sizep, &size64, sizeof(size64));
+               size = (user_size_t)size64;
+       } else {
+               user32_size_t size32;
+               error = copyin(sizep, &size32, sizeof(size32));
+               size = (user_size_t)size32;
+       }
+       if (error) {
                 return error;
         }
-               
-       if (bsd_search_page_cache_data_base(names_vp, 
-                       (struct profile_names_header *)names_buf, 
-                       file_name, (unsigned int) mod,  
-                       fid, &profile, &profile_size) == 0) {
-               /* profile is an offset in the profile data base */
-               /* It is zero if no profile data was found */
-               
-               if(profile_size == 0) {
-                       unsigned int    header_size;
-                       vm_offset_t     buf_ptr;
-
-                       /* Our Write case */
-
-                       /* read header for last entry */
-                       profile_header = 
-                               (struct profile_names_header *)names_buf;
-                       name_offset = sizeof(struct profile_names_header) + 
-                               (sizeof(struct profile_element) 
-                                       * profile_header->number_of_profiles);
-                       profile_header->number_of_profiles += 1;
-
-                       if(name_offset < PAGE_SIZE * 4) {
-                               struct profile_element  *name;
-                               /* write new entry */
-                               name = (struct profile_element *)
-                                       (names_buf + (vm_offset_t)name_offset);
-                               name->addr =  file_size;
-                               name->size = size;
-                               name->mod_date = mod;
-                               name->inode = fid;
-                               strncpy (name->name, file_name, 12);
-                       } else {
-                               unsigned int    ele_size;
-                               struct profile_element  name;
-                               /* write new entry */
-                               name.addr = file_size;
-                               name.size = size;
-                               name.mod_date = mod;
-                               name.inode = fid;
-                               strncpy (name.name, file_name, 12);
-                               /* write element out separately */
-                               ele_size = sizeof(struct profile_element);
-                               buf_ptr = (vm_offset_t)&name;
-                               resid_off = name_offset;
-
-                               while(ele_size) {
-                                       error = vn_rdwr(UIO_WRITE, names_vp, 
-                                               (caddr_t)buf_ptr, 
-                                               ele_size, resid_off, 
-                                               UIO_SYSSPACE32, IO_NODELOCKED, 
-                                               kauth_cred_get(), &resid, p);
-                                       if(error) {
-                                               printf("bsd_write_page_cache_file: Can't write name_element %x\n", user);
-                                               bsd_close_page_cache_files(
-                                                       uid_files);
-                                               thread_funnel_set(
-                                                       kernel_flock, 
-                                                       funnel_state);
-                                               return error;
-                                       }
-                                       buf_ptr += (vm_offset_t)
-                                                       ele_size-resid;
-                                       resid_off += ele_size-resid;
-                                       ele_size = resid;
-                               }
-                       }
  
-                       if(name_offset < PAGE_SIZE * 4) {
-                               header_size = name_offset + 
-                                       sizeof(struct profile_element);
+       switch (selector) {
+               case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
+                       {
+                               uint64_t slide = vm_kernel_slide;
+
+                               if (sizeof(slide) != size) {
+                                       return EINVAL;
+                               }
                                 
-                       } else {
-                               header_size = 
-                                       sizeof(struct profile_names_header);
-                       }
-                       buf_ptr = (vm_offset_t)profile_header;
-                       resid_off = 0;
-
-                       /* write names file header */
-                       while(header_size) {
-                               error = vn_rdwr(UIO_WRITE, names_vp, 
-                                       (caddr_t)buf_ptr, 
-                                       header_size, resid_off, 
-                                       UIO_SYSSPACE32, IO_NODELOCKED, 
-                                       kauth_cred_get(), &resid, p);
-                               if(error) {
-                                       printf("bsd_write_page_cache_file: Can't write header %x\n", user);
-                                       bsd_close_page_cache_files(
-                                               uid_files);
-                                       thread_funnel_set(
-                                               kernel_flock, funnel_state);
+                               if (IS_64BIT_PROCESS(p)) {
+                                       user64_size_t size64 = (user64_size_t)size;
+                                       error = copyout(&size64, sizep, sizeof(size64));
+                               } else {
+                                       user32_size_t size32 = (user32_size_t)size;
+                                       error = copyout(&size32, sizep, sizeof(size32));
+                               }
+                               if (error) {
                                         return error;
                                 }
-                               buf_ptr += (vm_offset_t)header_size-resid;
-                               resid_off += header_size-resid;
-                               header_size = resid;
-                       }
-                       /* write profile to data file */
-                       resid_off = file_size;
-                       while(size) {
-                               error = vn_rdwr(UIO_WRITE, data_vp, 
-                                       (caddr_t)buffer, size, resid_off, 
-                                       UIO_SYSSPACE32, IO_NODELOCKED, 
-                                       kauth_cred_get(), &resid, p);
-                               if(error) {
-                                       printf("bsd_write_page_cache_file: Can't write header %x\n", user);
-                                       bsd_close_page_cache_files(
-                                               uid_files);
-                                       thread_funnel_set(
-                                               kernel_flock, funnel_state);
+                               
+                               error = copyout(&slide, valuep, sizeof(slide));
+                               if (error) {
                                         return error;
                                 }
-                               buffer += size-resid;
-                               resid_off += size-resid;
-                               size = resid;
                         }
-                       bsd_close_page_cache_files(uid_files);
-                       thread_funnel_set(kernel_flock, funnel_state);
-                       return 0;
-               }
-               /* Someone else wrote a twin profile before us */
-               bsd_close_page_cache_files(uid_files);
-               thread_funnel_set(kernel_flock, funnel_state);
-               return 0;
-       } else {                
-               bsd_close_page_cache_files(uid_files);
-               thread_funnel_set(kernel_flock, funnel_state);
-               return EINVAL;
-       }
-       
-}
-
-int
-prepare_profile_database(int   user)
-{
-       const char *cache_path = "/var/vm/app_profile/";
-       struct proc     *p;
-       int             error;
-       int             resid;
-       off_t           resid_off;
-       vm_size_t       size;
-
-       struct  vnode   *names_vp;
-       struct  vnode   *data_vp;
-       vm_offset_t     names_buf;
-       vm_offset_t     buf_ptr;
-
-       int             profile_names_length;
-       int             profile_data_length;
-       char            *profile_data_string;
-       char            *profile_names_string;
-       char            *substring;
-
-       struct vnode_attr va;
-       struct vfs_context context;
-
-       struct  profile_names_header *profile_header;
-       kern_return_t   ret;
-
-       struct nameidata nd_names;
-       struct nameidata nd_data;
-
-       p = current_proc();
-
-       context.vc_proc = p;
-       context.vc_ucred = kauth_cred_get();
-
-       ret = kmem_alloc(kernel_map,
-               (vm_offset_t *)&profile_data_string, PATH_MAX);
-
-       if(ret) {
-               return ENOMEM;
-       }
-
-       /* Split the buffer in half since we know the size of */
-       /* our file path and our allocation is adequate for   */
-       /* both file path names */
-       profile_names_string = profile_data_string + (PATH_MAX/2);
-
-
-       strcpy(profile_data_string, cache_path);
-       strcpy(profile_names_string, cache_path);
-       profile_names_length = profile_data_length 
-                       = strlen(profile_data_string);
-       substring = profile_data_string + profile_data_length;
-       sprintf(substring, "%x_data", user);
-       substring = profile_names_string + profile_names_length;
-       sprintf(substring, "%x_names", user);
-
-       /* We now have the absolute file names */
-
-       ret = kmem_alloc(kernel_map,
-                               (vm_offset_t *)&names_buf, 4 * PAGE_SIZE);
-       if(ret) {
-               kmem_free(kernel_map, 
-                               (vm_offset_t)profile_data_string, PATH_MAX);
-               return ENOMEM;
-       }
-
-       NDINIT(&nd_names, LOOKUP, FOLLOW, 
-                       UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_names_string), &context);
-       NDINIT(&nd_data, LOOKUP, FOLLOW,
-                       UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_data_string), &context);
-
-       if ( (error = vn_open(&nd_data, 
-                                                       O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) ) {
-                       kmem_free(kernel_map, 
-                                       (vm_offset_t)names_buf, 4 * PAGE_SIZE);
-                       kmem_free(kernel_map, 
-                               (vm_offset_t)profile_data_string, PATH_MAX);
-                       
-                       return 0;
-       }
-       data_vp = nd_data.ni_vp;
-
-       if ( (error = vn_open(&nd_names, 
-                                                       O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) ) {
-                       printf("prepare_profile_database: Can't create CacheNames %s\n",
-                               profile_data_string);
-                       kmem_free(kernel_map, 
-                                       (vm_offset_t)names_buf, 4 * PAGE_SIZE);
-                       kmem_free(kernel_map, 
-                               (vm_offset_t)profile_data_string, PATH_MAX);
-
-                       vnode_rele(data_vp);
-                       vnode_put(data_vp);
-
-                       return error;
-       }
-       names_vp = nd_names.ni_vp;
-
-       /* Write Header for new names file */
-
-       profile_header = (struct profile_names_header *)names_buf;
-
-       profile_header->number_of_profiles = 0;
-       profile_header->user_id =  user;
-       profile_header->version = 1;
-       profile_header->element_array = 
-                               sizeof(struct profile_names_header);
-       profile_header->spare1 = 0;
-       profile_header->spare2 = 0;
-       profile_header->spare3 = 0;
-
-       size = sizeof(struct profile_names_header);
-       buf_ptr = (vm_offset_t)profile_header;
-       resid_off = 0;
-
-       while(size) {
-               error = vn_rdwr(UIO_WRITE, names_vp, 
-                               (caddr_t)buf_ptr, size, resid_off,
-                               UIO_SYSSPACE32, IO_NODELOCKED, 
-                               kauth_cred_get(), &resid, p);
-               if(error) {
-                       printf("prepare_profile_database: Can't write header %s\n", profile_names_string);
-                       kmem_free(kernel_map, 
-                               (vm_offset_t)names_buf, 4 * PAGE_SIZE);
-                       kmem_free(kernel_map, 
-                               (vm_offset_t)profile_data_string, 
-                               PATH_MAX);
-
-                       vnode_rele(names_vp);
-                       vnode_put(names_vp);
-                       vnode_rele(data_vp);
-                       vnode_put(data_vp);
-
-                       return error;
-               }
-               buf_ptr += size-resid;
-               resid_off += size-resid;
-               size = resid;
+                       break;
+               default:
+                       return EINVAL;
         }
-       VATTR_INIT(&va);
-       VATTR_SET(&va, va_uid, user);
  
-               error = vnode_setattr(names_vp, &va, &context);
-       if(error) {
-               printf("prepare_profile_database: "
-                       "Can't set user %s\n", profile_names_string);
-       }
-       vnode_rele(names_vp);
-       vnode_put(names_vp);
-       
-       VATTR_INIT(&va);
-       VATTR_SET(&va, va_uid, user);
-               error = vnode_setattr(data_vp, &va, &context);
-       if(error) {
-               printf("prepare_profile_database: "
-                       "Can't set user %s\n", profile_data_string);
-       }
-       vnode_rele(data_vp);
-       vnode_put(data_vp);
-
-       kmem_free(kernel_map, 
-                       (vm_offset_t)profile_data_string, PATH_MAX);
-       kmem_free(kernel_map, 
-                       (vm_offset_t)names_buf, 4 * PAGE_SIZE);
         return 0;
-
+#endif /* !SECURE_KERNEL */
  }