xnu-792.6.76.tar.gz

[apple/xnu.git] / bsd / vm / vm_unix.c
diff --git a/bsd/vm/vm_unix.c b/bsd/vm/vm_unix.c

index 41d4714bc40192fbad24932d400bcd2d14c909c2..f20fc6e2c1d62578eac116881396c3a9eb1af4a9 100644 (file)
--- a/bsd/vm/vm_unix.c
+++ b/bsd/vm/vm_unix.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   *
   * @APPLE_LICENSE_HEADER_START@
   * 
@@ -28,74 +28,109 @@
  
  /*
   */
+
+
  #include <meta_features.h>
  
  #include <kern/task.h>
  #include <kern/thread.h>
  #include <kern/debug.h>
  #include <kern/lock.h>
+#include <mach/mach_traps.h>
  #include <mach/time_value.h>
+#include <mach/vm_map.h>
  #include <mach/vm_param.h>
  #include <mach/vm_prot.h>
  #include <mach/port.h>
  
+#include <sys/file_internal.h>
  #include <sys/param.h>
  #include <sys/systm.h>
  #include <sys/dir.h>
  #include <sys/namei.h>
-#include <sys/proc.h>
+#include <sys/proc_internal.h>
+#include <sys/kauth.h>
  #include <sys/vm.h>
  #include <sys/file.h>
-#include <sys/vnode.h>
-#include <sys/buf.h>
+#include <sys/vnode_internal.h>
  #include <sys/mount.h>
  #include <sys/trace.h>
  #include <sys/kernel.h>
-#include <sys/ubc.h>
+#include <sys/ubc_internal.h>
+#include <sys/user.h>
+#include <sys/stat.h>
+#include <sys/sysproto.h>
+#include <sys/mman.h>
+
+#include <bsm/audit_kernel.h>
+#include <bsm/audit_kevents.h>
  
  #include <kern/kalloc.h>
-#include <kern/parallel.h>
  #include <vm/vm_map.h>
  #include <vm/vm_kern.h>
  
  #include <machine/spl.h>
+
  #include <mach/shared_memory_server.h>
+#include <vm/vm_shared_memory_server.h>
  
-useracc(addr, len, prot)
-       caddr_t addr;
-       u_int   len;
-       int     prot;
+#include <vm/vm_protos.h>
+
+
+int
+useracc(
+       user_addr_t     addr,
+       user_size_t     len,
+       int     prot)
  {
         return (vm_map_check_protection(
                         current_map(),
-                       trunc_page(addr), round_page(addr+len),
+                       vm_map_trunc_page(addr), vm_map_round_page(addr+len),
                         prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
  }
  
-vslock(addr, len)
-       caddr_t addr;
-       int     len;
+int
+vslock(
+       user_addr_t     addr,
+       user_size_t     len)
  {
-       vm_map_wire(current_map(), trunc_page(addr),
-                       round_page(addr+len), 
+       kern_return_t kret;
+       kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
+                       vm_map_round_page(addr+len), 
                         VM_PROT_READ | VM_PROT_WRITE ,FALSE);
+
+       switch (kret) {
+       case KERN_SUCCESS:
+               return (0);
+       case KERN_INVALID_ADDRESS:
+       case KERN_NO_SPACE:
+               return (ENOMEM);
+       case KERN_PROTECTION_FAILURE:
+               return (EACCES);
+       default:
+               return (EINVAL);
+       }
  }
  
-vsunlock(addr, len, dirtied)
-       caddr_t addr;
-       int     len;
-       int dirtied;
+int
+vsunlock(
+       user_addr_t addr,
+       user_size_t len,
+       __unused int dirtied)
  {
-       pmap_t          pmap;
  #if FIXME  /* [ */
+       pmap_t          pmap;
         vm_page_t       pg;
+       vm_map_offset_t vaddr;
+       ppnum_t         paddr;
  #endif  /* FIXME ] */
-       vm_offset_t     vaddr, paddr;
+       kern_return_t kret;
  
  #if FIXME  /* [ */
         if (dirtied) {
                 pmap = get_task_pmap(current_task());
-               for (vaddr = trunc_page(addr); vaddr < round_page(addr+len);
+               for (vaddr = vm_map_trunc_page(addr);
+                    vaddr < vm_map_round_page(addr+len);
                                 vaddr += PAGE_SIZE) {
                         paddr = pmap_extract(pmap, vaddr);
                         pg = PHYS_TO_VM_PAGE(paddr);
@@ -106,15 +141,25 @@ vsunlock(addr, len, dirtied)
  #ifdef lint
         dirtied++;
  #endif /* lint */
-       vm_map_unwire(current_map(), trunc_page(addr),
-                               round_page(addr+len), FALSE);
+       kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
+                               vm_map_round_page(addr+len), FALSE);
+       switch (kret) {
+       case KERN_SUCCESS:
+               return (0);
+       case KERN_INVALID_ADDRESS:
+       case KERN_NO_SPACE:
+               return (ENOMEM);
+       case KERN_PROTECTION_FAILURE:
+               return (EACCES);
+       default:
+               return (EINVAL);
+       }
  }
  
-#if    defined(sun) || BALANCE || defined(m88k)
-#else  /*defined(sun) || BALANCE || defined(m88k)*/
-subyte(addr, byte)
-       void * addr;
-       int byte;
+int
+subyte(
+       user_addr_t addr,
+       int byte)
  {
         char character;
         
@@ -122,18 +167,18 @@ subyte(addr, byte)
         return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
  }
  
-suibyte(addr, byte)
-       void * addr;
-       int byte;
+int
+suibyte(
+       user_addr_t addr,
+       int byte)
  {
         char character;
         
         character = (char)byte;
-       return (copyout((void *) &(character), addr, sizeof(char)) == 0 ? 0 : -1);
+       return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
  }
  
-int fubyte(addr)
-       void * addr;
+int fubyte(user_addr_t addr)
  {
         unsigned char byte;
  
@@ -142,8 +187,7 @@ int fubyte(addr)
         return(byte);
  }
  
-int fuibyte(addr)
-       void * addr;
+int fuibyte(user_addr_t addr)
  {
         unsigned char byte;
  
@@ -152,15 +196,15 @@ int fuibyte(addr)
         return(byte);
  }
  
-suword(addr, word)
-       void * addr;
-       long word;
+int
+suword(
+       user_addr_t addr,
+       long word)
  {
         return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
  }
  
-long fuword(addr)
-       void * addr;
+long fuword(user_addr_t addr)
  {
         long word;
  
@@ -171,15 +215,15 @@ long fuword(addr)
  
  /* suiword and fuiword are the same as suword and fuword, respectively */
  
-suiword(addr, word)
-       void * addr;
-       long word;
+int
+suiword(
+       user_addr_t addr,
+       long word)
  {
         return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
  }
  
-long fuiword(addr)
-       void * addr;
+long fuiword(user_addr_t addr)
  {
         long word;
  
@@ -187,78 +231,104 @@ long fuiword(addr)
                 return(-1);
         return(word);
  }
-#endif /* defined(sun) || BALANCE || defined(m88k) || defined(i386) */
+
+/*
+ * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
+ * fetching and setting of process-sized size_t and pointer values.
+ */
+int
+sulong(user_addr_t addr, int64_t word)
+{
+
+       if (IS_64BIT_PROCESS(current_proc())) {
+               return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
+       } else {
+               return(suiword(addr, (long)word));
+       }
+}
+
+int64_t
+fulong(user_addr_t addr)
+{
+       int64_t longword;
+
+       if (IS_64BIT_PROCESS(current_proc())) {
+               if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
+                       return(-1);
+               return(longword);
+       } else {
+               return((int64_t)fuiword(addr));
+       }
+}
  
  int
-swapon()
+suulong(user_addr_t addr, uint64_t uword)
  {
-       return(EOPNOTSUPP);
+
+       if (IS_64BIT_PROCESS(current_proc())) {
+               return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
+       } else {
+               return(suiword(addr, (u_long)uword));
+       }
  }
  
-thread_t
-procdup(
-       struct proc             *child,
-       struct proc             *parent)
+uint64_t
+fuulong(user_addr_t addr)
  {
-       thread_t                thread;
-       task_t                  task;
-       kern_return_t   result;
-
-       if (parent->task == kernel_task)
-               result = task_create_local(TASK_NULL, FALSE, FALSE, &task);
-       else
-               result = task_create_local(parent->task, TRUE, FALSE, &task);
-       if (result != KERN_SUCCESS)
-           printf("fork/procdup: task_create failed. Code: 0x%x\n", result);
-       child->task = task;
-       /* task->proc = child; */
-       set_bsdtask_info(task, child);
-       result = thread_create(task, &thread);
-       if (result != KERN_SUCCESS)
-           printf("fork/procdup: thread_create failed. Code: 0x%x\n", result);
-
-#if FIXME /* [ */
-       thread_deallocate(thread); // extra ref
+       uint64_t ulongword;
  
-       /*
-        *      Don't need to lock thread here because it can't
-        *      possibly execute and no one else knows about it.
-        */
-       /* compute_priority(thread, FALSE); */
-#endif /* ] */
-       return(thread);
+       if (IS_64BIT_PROCESS(current_proc())) {
+               if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
+                       return(-1ULL);
+               return(ulongword);
+       } else {
+               return((uint64_t)fuiword(addr));
+       }
+}
+
+int
+swapon(__unused struct proc *procp, __unused struct swapon_args *uap, __unused int *retval)
+{
+       return(ENOTSUP);
  }
  
+
  kern_return_t
-pid_for_task(t, x)
-       mach_port_t     t;
-       int     *x;
+pid_for_task(
+       struct pid_for_task_args *args)
  {
+       mach_port_name_t        t = args->t;
+       user_addr_t             pid_addr  = args->pid;  
         struct proc * p;
         task_t          t1;
-       extern task_t port_name_to_task(mach_port_t t);
         int     pid = -1;
-       kern_return_t   err;
+       kern_return_t   err = KERN_SUCCESS;
         boolean_t funnel_state;
  
+       AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
+       AUDIT_ARG(mach_port1, t);
+
         funnel_state = thread_funnel_set(kernel_flock, TRUE);
         t1 = port_name_to_task(t);
  
         if (t1 == TASK_NULL) {
                 err = KERN_FAILURE;
+               goto pftout;
         } else {
                 p = get_bsdtask_info(t1);
                 if (p) {
-                       pid  = p->p_pid;
+                       pid  = proc_pid(p);
                         err = KERN_SUCCESS;
                 } else {
                         err = KERN_FAILURE;
                 }
         }
         task_deallocate(t1);
-       (void) copyout((char *) &pid, (char *) x, sizeof(*x));
  pftout:
+       AUDIT_ARG(pid, pid);
+       (void) copyout((char *) &pid, pid_addr, sizeof(int));
         thread_funnel_set(kernel_flock, funnel_state);
+       AUDIT_MACH_SYSCALL_EXIT(err);
         return(err);
  }
  
@@ -270,80 +340,573 @@ pftout:
   *
   *             Only permitted to privileged processes, or processes
   *             with the same user ID.
+ *
+ * XXX This should be a BSD system call, not a Mach trap!!!
   */
  kern_return_t
-task_for_pid(target_tport, pid, t)
-       mach_port_t     target_tport;
-       int             pid;
-       mach_port_t     *t;
+task_for_pid(
+       struct task_for_pid_args *args)
  {
+       mach_port_name_t        target_tport = args->target_tport;
+       int                     pid = args->pid;
+       user_addr_t             task_addr = args->t;
+       struct uthread          *uthread;
         struct proc     *p;
         struct proc *p1;
         task_t          t1;
-       mach_port_t     tret;
-       extern task_t port_name_to_task(mach_port_t tp);
+       mach_port_name_t        tret;
         void * sright;
         int error = 0;
         boolean_t funnel_state;
  
+       AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
+       AUDIT_ARG(pid, pid);
+       AUDIT_ARG(mach_port1, target_tport);
+
         t1 = port_name_to_task(target_tport);
         if (t1 == TASK_NULL) {
-               (void ) copyout((char *)&t1, (char *)t, sizeof(mach_port_t));
-               error = KERN_FAILURE;
-               goto tfpout;
+               (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
+               AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
+               return(KERN_FAILURE);
         } 
  
         funnel_state = thread_funnel_set(kernel_flock, TRUE);
  
- restart:
-       p1 = get_bsdtask_info(t1);
+       p1 = get_bsdtask_info(t1);      /* XXX current proc */
+
+       /*
+        * Delayed binding of thread credential to process credential, if we
+        * are not running with an explicitly set thread credential.
+        */
+       uthread = get_bsdthread_info(current_thread());
+       if (uthread->uu_ucred != p1->p_ucred &&
+           (uthread->uu_flag & UT_SETUID) == 0) {
+               kauth_cred_t old = uthread->uu_ucred;
+               proc_lock(p1);
+               uthread->uu_ucred = p1->p_ucred;
+               kauth_cred_ref(uthread->uu_ucred);
+               proc_unlock(p1);
+               if (old != NOCRED)
+                       kauth_cred_rele(old);
+       }
+
+       p = pfind(pid);
+       AUDIT_ARG(process, p);
+
         if (
-               ((p = pfind(pid)) != (struct proc *) 0)
+               (p != (struct proc *) 0)
                 && (p1 != (struct proc *) 0)
-               && ((p->p_ucred->cr_uid == p1->p_ucred->cr_uid)
-               || !(suser(p1->p_ucred, &p1->p_acflag)))
+               && (
+                       (p1 == p)
+                       || !(suser(kauth_cred_get(), 0))
+                        || ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) 
+                               && (p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid)
+                               && ((p->p_flag & P_SUGID) == 0))
+                 )
                 && (p->p_stat != SZOMB)
                 ) {
                         if (p->task != TASK_NULL) {
-                               if (!task_reference_try(p->task)) {
-                                       mutex_pause(); /* temp loss of funnel */
-                                       goto restart;
-                               }
-                               sright = convert_task_to_port(p->task);
-                               tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
+                               task_reference(p->task);
+                               sright = (void *)convert_task_to_port(p->task);
+                               tret = ipc_port_copyout_send(
+                                       sright, 
+                                       get_task_ipcspace(current_task()));
                         } else
                                 tret  = MACH_PORT_NULL;
-                       (void ) copyout((char *)&tret, (char *) t, sizeof(mach_port_t));
+                       AUDIT_ARG(mach_port2, tret);
+                       (void ) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
                 task_deallocate(t1);
                         error = KERN_SUCCESS;
                         goto tfpout;
         }
      task_deallocate(t1);
         tret = MACH_PORT_NULL;
-       (void) copyout((char *) &tret, (char *) t, sizeof(mach_port_t));
+       (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
         error = KERN_FAILURE;
  tfpout:
         thread_funnel_set(kernel_flock, funnel_state);
+       AUDIT_MACH_SYSCALL_EXIT(error);
         return(error);
  }
  
  
-struct load_shared_file_args {
-               char            *filename;
-               caddr_t         mfa;
-               u_long          mfs;
-               caddr_t         *ba;
-               int             map_cnt;
-               sf_mapping_t    *mappings;
-               int             *flags;
-};
+/*
+ * shared_region_make_private_np:
+ *
+ * This system call is for "dyld" only.
+ * 
+ * It creates a private copy of the current process's "shared region" for
+ * split libraries.  "dyld" uses this when the shared region is full or
+ * it needs to load a split library that conflicts with an already loaded one
+ * that this process doesn't need.  "dyld" specifies a set of address ranges
+ * that it wants to keep in the now-private "shared region".  These cover
+ * the set of split libraries that the process needs so far.  The kernel needs
+ * to deallocate the rest of the shared region, so that it's available for 
+ * more libraries for this process.
+ */
+int
+shared_region_make_private_np(
+       struct proc                                     *p,
+       struct shared_region_make_private_np_args       *uap,
+       __unused int                                    *retvalp)
+{
+       int                             error;
+       kern_return_t                   kr;
+       boolean_t                       using_shared_regions;
+       user_addr_t                     user_ranges;
+       unsigned int                    range_count;
+       struct shared_region_range_np   *ranges;
+       shared_region_mapping_t         shared_region;
+       struct shared_region_task_mappings      task_mapping_info;
+       shared_region_mapping_t         next;
+
+       ranges = NULL;
+
+       range_count = uap->rangeCount;
+       user_ranges = uap->ranges;
+
+       /* allocate kernel space for the "ranges" */
+       if (range_count != 0) {
+               kr = kmem_alloc(kernel_map,
+                               (vm_offset_t *) &ranges,
+                               (vm_size_t) (range_count * sizeof (ranges[0])));
+               if (kr != KERN_SUCCESS) {
+                       error = ENOMEM;
+                       goto done;
+               }
+
+               /* copy "ranges" from user-space */
+               error = copyin(user_ranges,
+                              ranges,
+                              (range_count * sizeof (ranges[0])));
+               if (error) {
+                       goto done;
+               }
+       }
+
+       if (p->p_flag & P_NOSHLIB) {
+               /* no split library has been mapped for this process so far */
+               using_shared_regions = FALSE;
+       } else {
+               /* this process has already mapped some split libraries */
+               using_shared_regions = TRUE;
+       }
+
+       /*
+        * Get a private copy of the current shared region.
+        * Do not chain it to the system-wide shared region, as we'll want
+        * to map other split libraries in place of the old ones.  We want
+        * to completely detach from the system-wide shared region and go our
+        * own way after this point, not sharing anything with other processes.
+        */
+       error = clone_system_shared_regions(using_shared_regions,
+                                           FALSE, /* chain_regions */
+                                           ENV_DEFAULT_ROOT);
+       if (error) {
+               goto done;
+       }
+
+       /* get info on the newly allocated shared region */
+       vm_get_shared_region(current_task(), &shared_region);
+       task_mapping_info.self = (vm_offset_t) shared_region;
+       shared_region_mapping_info(shared_region,
+                                  &(task_mapping_info.text_region),
+                                  &(task_mapping_info.text_size),
+                                  &(task_mapping_info.data_region),
+                                  &(task_mapping_info.data_size),
+                                  &(task_mapping_info.region_mappings),
+                                  &(task_mapping_info.client_base),
+                                  &(task_mapping_info.alternate_base),
+                                  &(task_mapping_info.alternate_next),
+                                  &(task_mapping_info.fs_base),
+                                  &(task_mapping_info.system),
+                                  &(task_mapping_info.flags),
+                                  &next);
+
+       /*
+        * We now have our private copy of the shared region, as it was before
+        * the call to clone_system_shared_regions().  We now need to clean it
+        * up and keep only the memory areas described by the "ranges" array.
+        */
+       kr = shared_region_cleanup(range_count, ranges, &task_mapping_info);
+       switch (kr) {
+       case KERN_SUCCESS:
+               error = 0;
+               break;
+       default:
+               error = EINVAL;
+               goto done;
+       }
+
+done:
+       if (ranges != NULL) {
+               kmem_free(kernel_map,
+                         (vm_offset_t) ranges,
+                         range_count * sizeof (ranges[0]));
+               ranges = NULL;
+       }
+       
+       return error;
+}
  
  
+/*
+ * shared_region_map_file_np:
+ *
+ * This system call is for "dyld" only.
+ *
+ * "dyld" wants to map parts of a split library in the shared region.
+ * We get a file descriptor on the split library to be mapped and a set
+ * of mapping instructions, describing which parts of the file to map in\
+ * which areas of the shared segment and with what protection.
+ * The "shared region" is split in 2 areas:
+ * 0x90000000 - 0xa0000000 : read-only area (for TEXT and LINKEDIT sections), 
+ * 0xa0000000 - 0xb0000000 : writable area (for DATA sections).
+ *
+ */
  int
-load_shared_file(
-       struct proc             *p,
-       struct load_shared_file_args *uap,
-       register                *retval)
+shared_region_map_file_np(
+       struct proc                             *p,
+       struct shared_region_map_file_np_args   *uap,
+       __unused int                            *retvalp)
+{
+       int                                     error;
+       kern_return_t                           kr;
+       int                                     fd;
+       unsigned int                            mapping_count;
+       user_addr_t                             user_mappings; /* 64-bit */
+       user_addr_t                             user_slide_p;  /* 64-bit */
+       struct shared_file_mapping_np           *mappings;
+       struct fileproc                         *fp;
+       mach_vm_offset_t                        slide;
+       struct vnode                            *vp;
+       struct vfs_context                      context;
+       memory_object_control_t                 file_control;
+       memory_object_size_t                    file_size;
+       shared_region_mapping_t                 shared_region;
+       struct shared_region_task_mappings      task_mapping_info;
+       shared_region_mapping_t                 next;
+       shared_region_mapping_t                 default_shared_region;
+       boolean_t                               using_default_region;
+       unsigned int                            j;
+       vm_prot_t                               max_prot;
+       mach_vm_offset_t                        base_offset, end_offset;
+       mach_vm_offset_t                        original_base_offset;
+       boolean_t                               mappings_in_segment;
+#define SFM_MAX_STACK  6
+       struct shared_file_mapping_np           stack_mappings[SFM_MAX_STACK];
+
+       mappings = NULL;
+       mapping_count = 0;
+       fp = NULL;
+       vp = NULL;
+
+       /* get file descriptor for split library from arguments */
+       fd = uap->fd;
+
+       /* get file structure from file descriptor */
+       error = fp_lookup(p, fd, &fp, 0);
+       if (error) {
+               goto done;
+       }
+
+       /* make sure we're attempting to map a vnode */
+       if (fp->f_fglob->fg_type != DTYPE_VNODE) {
+               error = EINVAL;
+               goto done;
+       }
+
+       /* we need at least read permission on the file */
+       if (! (fp->f_fglob->fg_flag & FREAD)) {
+               error = EPERM;
+               goto done;
+       }
+
+       /* get vnode from file structure */
+       error = vnode_getwithref((vnode_t)fp->f_fglob->fg_data);
+       if (error) {
+               goto done;
+       }
+       vp = (struct vnode *) fp->f_fglob->fg_data;
+
+       /* make sure the vnode is a regular file */
+       if (vp->v_type != VREG) {
+               error = EINVAL;
+               goto done;
+       }
+
+       /* get vnode size */
+       {
+               off_t   fs;
+               
+               context.vc_proc = p;
+               context.vc_ucred = kauth_cred_get();
+               if ((error = vnode_size(vp, &fs, &context)) != 0)
+                       goto done;
+               file_size = fs;
+       }
+
+       /*
+        * Get the list of mappings the caller wants us to establish.
+        */
+       mapping_count = uap->mappingCount; /* the number of mappings */
+       if (mapping_count == 0) {
+               error = 0;      /* no mappings: we're done ! */
+               goto done;
+       } else if (mapping_count <= SFM_MAX_STACK) {
+               mappings = &stack_mappings[0];
+       } else {
+               kr = kmem_alloc(kernel_map,
+                               (vm_offset_t *) &mappings,
+                               (vm_size_t) (mapping_count *
+                                            sizeof (mappings[0])));
+               if (kr != KERN_SUCCESS) {
+                       error = ENOMEM;
+                       goto done;
+               }
+       }
+
+       user_mappings = uap->mappings;     /* the mappings, in user space */
+       error = copyin(user_mappings,
+                      mappings,
+                      (mapping_count * sizeof (mappings[0])));
+       if (error != 0) {
+               goto done;
+       }
+
+       /*
+        * If the caller provides a "slide" pointer, it means they're OK
+        * with us moving the mappings around to make them fit.
+        */
+       user_slide_p = uap->slide_p;
+
+       /*
+        * Make each mapping address relative to the beginning of the
+        * shared region.  Check that all mappings are in the shared region.
+        * Compute the maximum set of protections required to tell the
+        * buffer cache how we mapped the file (see call to ubc_map() below).
+        */
+       max_prot = VM_PROT_NONE;
+       base_offset = -1LL;
+       end_offset = 0;
+       mappings_in_segment = TRUE;
+       for (j = 0; j < mapping_count; j++) {
+               mach_vm_offset_t segment;
+               segment = (mappings[j].sfm_address &
+                          GLOBAL_SHARED_SEGMENT_MASK);
+               if (segment != GLOBAL_SHARED_TEXT_SEGMENT &&
+                   segment != GLOBAL_SHARED_DATA_SEGMENT) {
+                       /* this mapping is not in the shared region... */
+                       if (user_slide_p == NULL) {
+                               /* ... and we can't slide it in: fail */
+                               error = EINVAL;
+                               goto done;
+                       }
+                       if (j == 0) {
+                               /* expect all mappings to be outside */
+                               mappings_in_segment = FALSE;
+                       } else if (mappings_in_segment != FALSE) {
+                               /* other mappings were not outside: fail */
+                               error = EINVAL;
+                               goto done;
+                       }
+                       /* we'll try and slide that mapping in the segments */
+               } else {
+                       if (j == 0) {
+                               /* expect all mappings to be inside */
+                               mappings_in_segment = TRUE;
+                       } else if (mappings_in_segment != TRUE) {
+                               /* other mappings were not inside: fail */
+                               error = EINVAL;
+                               goto done;
+                       }
+                       /* get a relative offset inside the shared segments */
+                       mappings[j].sfm_address -= GLOBAL_SHARED_TEXT_SEGMENT;
+               }
+               if ((mappings[j].sfm_address & SHARED_TEXT_REGION_MASK)
+                   < base_offset) {
+                       base_offset = (mappings[j].sfm_address &
+                                      SHARED_TEXT_REGION_MASK);
+               }
+               if ((mappings[j].sfm_address & SHARED_TEXT_REGION_MASK) +
+                   mappings[j].sfm_size > end_offset) {
+                       end_offset =
+                               (mappings[j].sfm_address &
+                                SHARED_TEXT_REGION_MASK) +
+                               mappings[j].sfm_size;
+               }
+               max_prot |= mappings[j].sfm_max_prot;
+       }
+       /* Make all mappings relative to the base_offset */
+       base_offset = vm_map_trunc_page(base_offset);
+       end_offset = vm_map_round_page(end_offset);
+       for (j = 0; j < mapping_count; j++) {
+               mappings[j].sfm_address -= base_offset;
+       }
+       original_base_offset = base_offset;
+       if (mappings_in_segment == FALSE) {
+               /*
+                * We're trying to map a library that was not pre-bound to
+                * be in the shared segments.  We want to try and slide it
+                * back into the shared segments but as far back as possible,
+                * so that it doesn't clash with pre-bound libraries.  Set
+                * the base_offset to the end of the region, so that it can't
+                * possibly fit there and will have to be slid.
+                */
+               base_offset = SHARED_TEXT_REGION_SIZE - end_offset;
+       }
+
+       /* get the file's memory object handle */
+       UBCINFOCHECK("shared_region_map_file_np", vp);
+       file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
+       if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
+               error = EINVAL;
+               goto done;
+       }
+
+       /*
+        * Get info about the current process's shared region.
+        * This might change if we decide we need to clone the shared region.
+        */
+       vm_get_shared_region(current_task(), &shared_region);
+       task_mapping_info.self = (vm_offset_t) shared_region;
+       shared_region_mapping_info(shared_region,
+                                  &(task_mapping_info.text_region),
+                                  &(task_mapping_info.text_size),
+                                  &(task_mapping_info.data_region),
+                                  &(task_mapping_info.data_size),
+                                  &(task_mapping_info.region_mappings),
+                                  &(task_mapping_info.client_base),
+                                  &(task_mapping_info.alternate_base),
+                                  &(task_mapping_info.alternate_next),
+                                  &(task_mapping_info.fs_base),
+                                  &(task_mapping_info.system),
+                                  &(task_mapping_info.flags),
+                                  &next);
+
+       /*
+        * Are we using the system's current shared region
+        * for this environment ?
+        */
+       default_shared_region =
+               lookup_default_shared_region(ENV_DEFAULT_ROOT,
+                                            task_mapping_info.system);
+       if (shared_region == default_shared_region) {
+               using_default_region = TRUE;
+       } else {
+               using_default_region = FALSE;
+       }
+       shared_region_mapping_dealloc(default_shared_region);
+
+       if (vp->v_mount != rootvnode->v_mount &&
+           using_default_region) {
+               /*
+                * The split library is not on the root filesystem.  We don't
+                * want to polute the system-wide ("default") shared region
+                * with it.
+                * Reject the mapping.  The caller (dyld) should "privatize"
+                * (via shared_region_make_private()) the shared region and
+                * try to establish the mapping privately for this process.
+                */
+               error = EXDEV;
+               goto done;
+       }
+
+
+       /*
+        * Map the split library.
+        */
+       kr = map_shared_file(mapping_count,
+                            mappings,
+                            file_control,
+                            file_size,
+                            &task_mapping_info,
+                            base_offset,
+                            (user_slide_p) ? &slide : NULL);
+
+       switch (kr) {
+       case KERN_SUCCESS:
+               /*
+                * The mapping was successful.  Let the buffer cache know
+                * that we've mapped that file with these protections.  This
+                * prevents the vnode from getting recycled while it's mapped.
+                */
+               (void) ubc_map(vp, max_prot);
+               error = 0;
+               break;
+       case KERN_INVALID_ADDRESS:
+               error = EFAULT;
+               goto done;
+       case KERN_PROTECTION_FAILURE:
+               error = EPERM;
+               goto done;
+       case KERN_NO_SPACE:
+               error = ENOMEM;
+               goto done;
+       case KERN_FAILURE:
+       case KERN_INVALID_ARGUMENT:
+       default:
+               error = EINVAL;
+               goto done;
+       }
+
+       if (p->p_flag & P_NOSHLIB) {
+               /* signal that this process is now using split libraries */
+               p->p_flag &= ~P_NOSHLIB;
+       }
+
+       if (user_slide_p) {
+               /*
+                * The caller provided a pointer to a "slide" offset.  Let
+                * them know by how much we slid the mappings.
+                */
+               if (mappings_in_segment == FALSE) {
+                       /*
+                        * We faked the base_offset earlier, so undo that
+                        * and take into account the real base_offset.
+                        */
+                       slide += SHARED_TEXT_REGION_SIZE - end_offset;
+                       slide -= original_base_offset;
+                       /*
+                        * The mappings were slid into the shared segments
+                        * and "slide" is relative to the beginning of the
+                        * shared segments.  Adjust it to be absolute.
+                        */
+                       slide += GLOBAL_SHARED_TEXT_SEGMENT;
+               }
+               error = copyout(&slide,
+                               user_slide_p,
+                               sizeof (int64_t));
+       }
+
+done:
+       if (vp != NULL) {
+               /*
+                * release the vnode...
+                * ubc_map() still holds it for us in the non-error case
+                */
+               (void) vnode_put(vp);
+               vp = NULL;
+       }
+       if (fp != NULL) {
+               /* release the file descriptor */
+               fp_drop(p, fd, fp, 0);
+               fp = NULL;
+       }
+       if (mappings != NULL &&
+           mappings != &stack_mappings[0]) {
+               kmem_free(kernel_map,
+                         (vm_offset_t) mappings,
+                         mapping_count * sizeof (mappings[0]));
+       }
+       mappings = NULL;
+
+       return error;
+}
+
+int
+load_shared_file(struct proc *p, struct load_shared_file_args *uap,
+                                       __unused int *retval)
  {
         caddr_t         mapped_file_addr=uap->mfa;
         u_long          mapped_file_size=uap->mfs;
@@ -358,14 +921,15 @@ load_shared_file(
         register int            error;
         kern_return_t           kr;
  
-       struct vattr    vattr;
-       void            *object;
-       void            *file_object;
+       struct vfs_context context;
+       off_t           file_size;
+       memory_object_control_t file_control;
          sf_mapping_t    *map_list;
          caddr_t                local_base;
         int             local_flags;
         int             caller_flags;
         int             i;
+       int             default_regions = 0;
         vm_size_t       dummy;
         kern_return_t   kret;
  
@@ -373,17 +937,52 @@ load_shared_file(
         struct shared_region_task_mappings      task_mapping_info;
         shared_region_mapping_t next;
  
-       ndp = &nd;
+       context.vc_proc = p;
+       context.vc_ucred = kauth_cred_get();
  
-       unix_master();
+       ndp = &nd;
  
+       AUDIT_ARG(addr, CAST_USER_ADDR_T(base_address));
         /* Retrieve the base address */
-       if (error = copyin(base_address, &local_base, sizeof (caddr_t))) {
-                       goto lsf_bailout;
-        }
-       if (error = copyin(flags, &local_flags, sizeof (int))) {
+       if ( (error = copyin(CAST_USER_ADDR_T(base_address), &local_base, sizeof (caddr_t))) ) {
+               goto lsf_bailout;
+       }
+       if ( (error = copyin(CAST_USER_ADDR_T(flags), &local_flags, sizeof (int))) ) {
+               goto lsf_bailout;
+       }
+
+       if(local_flags & QUERY_IS_SYSTEM_REGION) {
+                       shared_region_mapping_t default_shared_region;
+                       vm_get_shared_region(current_task(), &shared_region);
+                       task_mapping_info.self = (vm_offset_t)shared_region;
+
+                       shared_region_mapping_info(shared_region, 
+                                       &(task_mapping_info.text_region), 
+                                       &(task_mapping_info.text_size),
+                                       &(task_mapping_info.data_region), 
+                                       &(task_mapping_info.data_size), 
+                                       &(task_mapping_info.region_mappings),
+                                       &(task_mapping_info.client_base), 
+                                       &(task_mapping_info.alternate_base),
+                                       &(task_mapping_info.alternate_next), 
+                                       &(task_mapping_info.fs_base),
+                                       &(task_mapping_info.system),
+                                       &(task_mapping_info.flags), &next);
+
+                       default_shared_region =
+                               lookup_default_shared_region(
+                                       ENV_DEFAULT_ROOT, 
+                                       task_mapping_info.system);
+                       if (shared_region == default_shared_region) {
+                               local_flags = SYSTEM_REGION_BACKED;
+                       } else {
+                               local_flags = 0;
+                       }
+                       shared_region_mapping_dealloc(default_shared_region);
+                       error = 0;
+                       error = copyout(&local_flags, CAST_USER_ADDR_T(flags), sizeof (int));
                         goto lsf_bailout;
-        }
+       }
         caller_flags = local_flags;
         kret = kmem_alloc(kernel_map, (vm_offset_t *)&filename_str,
                         (vm_size_t)(MAXPATHLEN));
@@ -400,28 +999,28 @@ load_shared_file(
                         goto lsf_bailout;
                 }
  
-       if (error = 
-               copyin(mappings, map_list, (map_cnt*sizeof(sf_mapping_t)))) {
+       if ( (error = copyin(CAST_USER_ADDR_T(mappings), map_list, (map_cnt*sizeof(sf_mapping_t)))) ) {
                 goto lsf_bailout_free;
         }
  
-       if (error = copyinstr(filename, 
-                       filename_str, MAXPATHLEN, (size_t *)&dummy)) {
+       if ( (error = copyinstr(CAST_USER_ADDR_T(filename), filename_str, 
+                                                       MAXPATHLEN, (size_t *)&dummy)) ) {
                 goto lsf_bailout_free;
         }
  
         /*
          * Get a vnode for the target file
          */
-       NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
-           filename_str, p);
+       NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_SYSSPACE32,
+           CAST_USER_ADDR_T(filename_str), &context);
  
         if ((error = namei(ndp))) {
                 goto lsf_bailout_free;
         }
-
         vp = ndp->ni_vp;
  
+       nameidone(ndp);
+
         if (vp->v_type != VREG) {
                 error = EINVAL;
                 goto lsf_bailout_free_vput;
@@ -429,23 +1028,35 @@ load_shared_file(
  
         UBCINFOCHECK("load_shared_file", vp);
  
-       if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) {
+       if ((error = vnode_size(vp, &file_size, &context)) != 0)
                 goto lsf_bailout_free_vput;
-       }
-
  
-       file_object = ubc_getobject(vp, (UBC_NOREACTIVATE|UBC_HOLDOBJECT));
-       if (file_object == (void *)NULL) {
+       file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
+       if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
                 error = EINVAL;
                 goto lsf_bailout_free_vput;
         }
  
  #ifdef notdef
-       if(vattr.va_size != mapped_file_size) {
+       if(file_size != mapped_file_size) {
                 error = EINVAL;
                 goto lsf_bailout_free_vput;
         }
  #endif
+       if(p->p_flag & P_NOSHLIB) {
+               p->p_flag = p->p_flag & ~P_NOSHLIB;
+       }
+
+       /* load alternate regions if the caller has requested.  */
+       /* Note: the new regions are "clean slates" */
+       if (local_flags & NEW_LOCAL_SHARED_REGIONS) {
+               error = clone_system_shared_regions(FALSE,
+                                                   TRUE, /* chain_regions */
+                                                   ENV_DEFAULT_ROOT);
+               if (error) {
+                       goto lsf_bailout_free_vput;
+               }
+       }
  
         vm_get_shared_region(current_task(), &shared_region);
         task_mapping_info.self = (vm_offset_t)shared_region;
@@ -459,8 +1070,53 @@ load_shared_file(
                         &(task_mapping_info.client_base), 
                         &(task_mapping_info.alternate_base),
                         &(task_mapping_info.alternate_next), 
+                       &(task_mapping_info.fs_base),
+                       &(task_mapping_info.system),
                         &(task_mapping_info.flags), &next);
  
+       {
+               shared_region_mapping_t default_shared_region;
+               default_shared_region =
+                       lookup_default_shared_region(
+                               ENV_DEFAULT_ROOT, 
+                               task_mapping_info.system);
+               if(shared_region == default_shared_region) {
+                       default_regions = 1;
+               }
+               shared_region_mapping_dealloc(default_shared_region);
+       }
+       /* If we are running on a removable file system we must not */
+       /* be in a set of shared regions or the file system will not */
+       /* be removable. */
+       if(((vp->v_mount != rootvnode->v_mount) && (default_regions)) 
+               && (lsf_mapping_pool_gauge() < 75)) {
+                               /* We don't want to run out of shared memory */
+                               /* map entries by starting too many private versions */
+                               /* of the shared library structures */
+               int     error2;
+
+               error2 = clone_system_shared_regions(!(p->p_flag & P_NOSHLIB),
+                                                    TRUE, /* chain_regions */
+                                                    ENV_DEFAULT_ROOT);
+               if (error2) {
+                       goto lsf_bailout_free_vput;
+               }
+               local_flags = local_flags & ~NEW_LOCAL_SHARED_REGIONS;
+               vm_get_shared_region(current_task(), &shared_region);
+               shared_region_mapping_info(shared_region, 
+                       &(task_mapping_info.text_region), 
+                       &(task_mapping_info.text_size),
+                       &(task_mapping_info.data_region), 
+                       &(task_mapping_info.data_size), 
+                       &(task_mapping_info.region_mappings),
+                       &(task_mapping_info.client_base), 
+                       &(task_mapping_info.alternate_base),
+                       &(task_mapping_info.alternate_next), 
+                       &(task_mapping_info.fs_base),
+                       &(task_mapping_info.system),
+                       &(task_mapping_info.flags), &next);
+       }
+
         /*  This is a work-around to allow executables which have been */
         /*  built without knowledge of the proper shared segment to    */
         /*  load.  This code has been architected as a shared region   */
@@ -487,71 +1143,11 @@ load_shared_file(
                 }
         }
  
-       /* load alternate regions if the caller has requested.  */
-       /* Note: the new regions are "clean slates" */
-          
-       if (local_flags & NEW_LOCAL_SHARED_REGIONS) {
-
-               shared_region_mapping_t new_shared_region;
-               shared_region_mapping_t old_shared_region;
-               struct shared_region_task_mappings old_info;
-               struct shared_region_task_mappings new_info;
-
-               if(shared_file_create_system_region(&new_shared_region)) {
-                       error = ENOMEM;
-                       goto lsf_bailout_free_vput;
-               }
-               vm_get_shared_region(current_task(), &old_shared_region);
-
-               old_info.self = (vm_offset_t)old_shared_region;
-               shared_region_mapping_info(old_shared_region,
-                       &(old_info.text_region),   
-                       &(old_info.text_size),
-                       &(old_info.data_region),
-                       &(old_info.data_size),
-                       &(old_info.region_mappings),
-                       &(old_info.client_base),
-                       &(old_info.alternate_base),
-                       &(old_info.alternate_next), 
-                       &(old_info.flags), &next);
-               new_info.self = (vm_offset_t)new_shared_region;
-               shared_region_mapping_info(new_shared_region,
-                       &(new_info.text_region),   
-                       &(new_info.text_size),
-                       &(new_info.data_region),
-                       &(new_info.data_size),
-                       &(new_info.region_mappings),
-                       &(new_info.client_base),
-                       &(new_info.alternate_base),
-                       &(new_info.alternate_next), 
-                       &(new_info.flags), &next);
-               if (vm_map_region_replace(current_map(), old_info.text_region, 
-                               new_info.text_region, old_info.client_base, 
-                               old_info.client_base+old_info.text_size)) {
-                       panic("load_shared_file: shared region mis-alignment");
-                       shared_region_mapping_dealloc(new_shared_region);
-                       error = EINVAL;
-                       goto lsf_bailout_free_vput;
-               }
-               if(vm_map_region_replace(current_map(), old_info.data_region, 
-                               new_info.data_region, 
-                               old_info.client_base + old_info.text_size, 
-                               old_info.client_base
-                               + old_info.text_size + old_info.data_size)) {
-                       panic("load_shared_file: shared region mis-alignment 1");
-                       shared_region_mapping_dealloc(new_shared_region);
-                       error = EINVAL;
-                       goto lsf_bailout_free_vput;
-               }
-               vm_set_shared_region(current_task(), new_shared_region);
-               task_mapping_info = new_info;
-               shared_region_mapping_dealloc(old_shared_region);
-       }
  
         if((kr = copyin_shared_file((vm_offset_t)mapped_file_addr, 
                         mapped_file_size, 
                         (vm_offset_t *)&local_base,
-                       map_cnt, map_list, file_object, 
+                       map_cnt, map_list, file_control, 
                         &task_mapping_info, &local_flags))) {
                 switch (kr) {
                         case KERN_FAILURE:
@@ -561,7 +1157,7 @@ load_shared_file(
                                 error = EINVAL;
                                 break;
                         case KERN_INVALID_ADDRESS:
-                               error = EACCES;
+                               error = EFAULT;
                                 break;
                         case KERN_PROTECTION_FAILURE:
                                 /* save EAUTH for authentication in this */
@@ -575,7 +1171,7 @@ load_shared_file(
                                 error = EINVAL;
                 };
                 if((caller_flags & ALTERNATE_LOAD_SITE) && systemLogDiags) {
-                       printf("load_shared_file:  Failed to load shared file! error: 0x%x, Base_address: 0x%x, number of mappings: %d, file_object 0x%x\n", error, local_base, map_cnt, file_object);
+                       printf("load_shared_file:  Failed to load shared file! error: 0x%x, Base_address: 0x%x, number of mappings: %d, file_control 0x%x\n", error, local_base, map_cnt, file_control);
                         for(i=0; i<map_cnt; i++) {
                                 printf("load_shared_file: Mapping%d, mapping_offset: 0x%x, size: 0x%x, file_offset: 0x%x, protection: 0x%x\n"
                                         , i, map_list[i].mapping_offset, 
@@ -585,14 +1181,16 @@ load_shared_file(
                         }
                 }
         } else {
-               if(!(error = copyout(&local_flags, flags, sizeof (int)))) {
+               if(default_regions)
+                       local_flags |= SYSTEM_REGION_BACKED;
+               if(!(error = copyout(&local_flags, CAST_USER_ADDR_T(flags), sizeof (int)))) {
                         error = copyout(&local_base, 
-                               base_address, sizeof (caddr_t));
+                               CAST_USER_ADDR_T(base_address), sizeof (caddr_t));
                 }
         }
  
  lsf_bailout_free_vput:
-       vput(vp);
+       vnode_put(vp);
  
  lsf_bailout_free:
         kmem_free(kernel_map, (vm_offset_t)filename_str, 
@@ -601,41 +1199,27 @@ lsf_bailout_free:
                                 (vm_size_t)(map_cnt*sizeof(sf_mapping_t)));
  
  lsf_bailout:
-       unix_release();
         return error;
  }
  
-struct reset_shared_file_args {
-               caddr_t         *ba;
-               int             map_cnt;
-               sf_mapping_t    *mappings;
-};
-
  int
-reset_shared_file(
-       struct proc             *p,
-       struct reset_shared_file_args *uap,
-       register                *retval)
+reset_shared_file(__unused struct proc *p, struct reset_shared_file_args *uap,
+                                       __unused register int *retval)
  {
-        caddr_t                *base_address=uap->ba;
-        int             map_cnt=uap->map_cnt;
-        sf_mapping_t       *mappings=uap->mappings;
+       caddr_t                         *base_address=uap->ba;
+       int                     map_cnt=uap->map_cnt;
+       sf_mapping_t            *mappings=uap->mappings;
         register int            error;
-       kern_return_t           kr;
-
-        sf_mapping_t    *map_list;
-        caddr_t                local_base;
-       vm_offset_t     map_address;
-       int             i;
-       kern_return_t   kret;
-
-
-
  
-       unix_master();
+       sf_mapping_t            *map_list;
+       caddr_t                         local_base;
+       vm_offset_t                     map_address;
+       int                                     i;
+       kern_return_t           kret;
  
+       AUDIT_ARG(addr, CAST_DOWN(user_addr_t, base_address));
         /* Retrieve the base address */
-       if (error = copyin(base_address, &local_base, sizeof (caddr_t))) {
+       if ( (error = copyin(CAST_USER_ADDR_T(base_address), &local_base, sizeof (caddr_t))) ) {
                         goto rsf_bailout;
          }
  
@@ -652,8 +1236,8 @@ reset_shared_file(
                         goto rsf_bailout;
                 }
  
-       if (error = 
-               copyin(mappings, map_list, (map_cnt*sizeof(sf_mapping_t)))) {
+       if ( (error = 
+                 copyin(CAST_USER_ADDR_T(mappings), map_list, (map_cnt*sizeof(sf_mapping_t)))) ) {
  
                 kmem_free(kernel_map, (vm_offset_t)map_list, 
                                 (vm_size_t)(map_cnt*sizeof(sf_mapping_t)));
@@ -668,7 +1252,8 @@ reset_shared_file(
                                 map_address,
                                 map_list[i].size);
                         vm_map(current_map(), &map_address,
-                               map_list[i].size, 0, SHARED_LIB_ALIAS,
+                               map_list[i].size, 0,
+                               SHARED_LIB_ALIAS | VM_FLAGS_FIXED,
                                 shared_data_region_handle, 
                                 ((unsigned int)local_base 
                                    & SHARED_DATA_REGION_MASK) +
@@ -683,15 +1268,33 @@ reset_shared_file(
                                 (vm_size_t)(map_cnt*sizeof(sf_mapping_t)));
  
  rsf_bailout:
-       unix_release();
         return error;
  }
  
+int
+new_system_shared_regions(__unused struct proc *p,
+                         __unused struct new_system_shared_regions_args *uap,
+                         register int *retval)
+{
+       if(!(is_suser())) {
+               *retval = EINVAL;
+               return EINVAL;
+       }
+
+       /* clear all of our existing defaults */
+       remove_all_shared_regions();
+
+       *retval = 0;
+       return 0;
+}
  
  
  
  int
-clone_system_shared_regions()
+clone_system_shared_regions(
+       int             shared_regions_active,
+       int             chain_regions,
+       int             base_vnode)
  {
         shared_region_mapping_t new_shared_region;
         shared_region_mapping_t next;
@@ -699,8 +1302,6 @@ clone_system_shared_regions()
         struct shared_region_task_mappings old_info;
         struct shared_region_task_mappings new_info;
  
-       if (shared_file_create_system_region(&new_shared_region))
-               return (ENOMEM);
         vm_get_shared_region(current_task(), &old_shared_region);
         old_info.self = (vm_offset_t)old_shared_region;
         shared_region_mapping_info(old_shared_region,
@@ -712,7 +1313,27 @@ clone_system_shared_regions()
                 &(old_info.client_base),
                 &(old_info.alternate_base),
                 &(old_info.alternate_next), 
+               &(old_info.fs_base),
+               &(old_info.system),
                 &(old_info.flags), &next);
+       if ((shared_regions_active) ||
+               (base_vnode == ENV_DEFAULT_ROOT)) {
+          if (shared_file_create_system_region(&new_shared_region))
+               return (ENOMEM);
+       } else {
+          new_shared_region = 
+               lookup_default_shared_region(
+                       base_vnode, old_info.system);
+          if(new_shared_region == NULL) {
+               shared_file_boot_time_init(
+                       base_vnode, old_info.system);
+               vm_get_shared_region(current_task(), &new_shared_region);
+          } else {
+               vm_set_shared_region(current_task(), new_shared_region);
+          }
+          if(old_shared_region)
+               shared_region_mapping_dealloc(old_shared_region);
+       }
         new_info.self = (vm_offset_t)new_shared_region;
         shared_region_mapping_info(new_shared_region,
                 &(new_info.text_region),   
@@ -723,16 +1344,39 @@ clone_system_shared_regions()
                 &(new_info.client_base),
                 &(new_info.alternate_base),
                 &(new_info.alternate_next), 
+               &(new_info.fs_base),
+               &(new_info.system),
                 &(new_info.flags), &next);
-       if(vm_region_clone(old_info.text_region, new_info.text_region)) {
-       panic("clone_system_shared_regions: shared region mis-alignment 1");
+       if(shared_regions_active) {
+          if(vm_region_clone(old_info.text_region, new_info.text_region)) {
+          panic("clone_system_shared_regions: shared region mis-alignment 1");
                 shared_region_mapping_dealloc(new_shared_region);
                 return(EINVAL);
-       }
-       if (vm_region_clone(old_info.data_region, new_info.data_region)) {
-       panic("clone_system_shared_regions: shared region mis-alignment 2");
+          }
+          if (vm_region_clone(old_info.data_region, new_info.data_region)) {
+          panic("clone_system_shared_regions: shared region mis-alignment 2");
                 shared_region_mapping_dealloc(new_shared_region);
                 return(EINVAL);
+          }
+          if (chain_regions) {
+                  /*
+                   * We want a "shadowed" clone, a private superset of the old
+                   * shared region.  The info about the old mappings is still
+                   * valid for us.
+                   */
+                  shared_region_object_chain_attach(
+                          new_shared_region, old_shared_region);
+          } else {
+                  /*
+                   * We want a completely detached clone with no link to
+                   * the old shared region.  We'll be removing some mappings
+                   * in our private, cloned, shared region, so the old mappings
+                   * will become irrelevant to us.  Since we have a private
+                   * "shared region" now, it isn't going to be shared with
+                   * anyone else and we won't need to maintain mappings info.
+                   */
+                  shared_region_object_chain_detached(new_shared_region);
+          }
         }
         if (vm_map_region_replace(current_map(), old_info.text_region, 
                         new_info.text_region, old_info.client_base, 
@@ -751,7 +1395,998 @@ clone_system_shared_regions()
                 return(EINVAL);
         }
         vm_set_shared_region(current_task(), new_shared_region);
-       shared_region_object_chain_attach(new_shared_region, old_shared_region);
+
+       /* consume the reference which wasn't accounted for in object */
+       /* chain attach */
+       if (!shared_regions_active || !chain_regions)
+               shared_region_mapping_dealloc(old_shared_region);
+
         return(0);
  
  }
+
+/* header for the profile name file.  The profiled app info is held */
+/* in the data file and pointed to by elements in the name file     */
+
+struct profile_names_header {
+       unsigned int    number_of_profiles;
+       unsigned int    user_id;
+       unsigned int    version;
+       off_t           element_array;
+       unsigned int    spare1;
+       unsigned int    spare2;
+       unsigned int    spare3;
+};
+
+struct profile_element {
+       off_t           addr;
+       vm_size_t       size;
+       unsigned int    mod_date;
+       unsigned int    inode;
+       char name[12];
+};
+
+struct global_profile {
+       struct vnode    *names_vp;
+       struct vnode    *data_vp;
+       vm_offset_t     buf_ptr;
+       unsigned int    user;
+       unsigned int    age;
+       unsigned int    busy;
+};
+
+struct global_profile_cache {
+       int                     max_ele;
+       unsigned int            age;
+       struct global_profile   profiles[3];
+};
+
+/* forward declarations */
+int bsd_open_page_cache_files(unsigned int user,
+                             struct global_profile **profile);
+void bsd_close_page_cache_files(struct global_profile *profile);
+int bsd_search_page_cache_data_base(
+       struct  vnode                   *vp,
+       struct profile_names_header     *database,
+       char                            *app_name,
+       unsigned int                    mod_date,
+       unsigned int                    inode,
+       off_t                           *profile,
+       unsigned int                    *profile_size);
+
+struct global_profile_cache global_user_profile_cache =
+       {3, 0, {{NULL, NULL, 0, 0, 0, 0},
+                   {NULL, NULL, 0, 0, 0, 0},
+                   {NULL, NULL, 0, 0, 0, 0}} };
+
+/* BSD_OPEN_PAGE_CACHE_FILES:                                 */
+/* Caller provides a user id.  This id was used in            */
+/* prepare_profile_database to create two unique absolute     */
+/* file paths to the associated profile files.  These files   */
+/* are either opened or bsd_open_page_cache_files returns an  */
+/* error.  The header of the names file is then consulted.    */
+/* The header and the vnodes for the names and data files are */
+/* returned. */
+
+int
+bsd_open_page_cache_files(
+       unsigned int    user,
+       struct global_profile **profile)
+{
+       const char *cache_path = "/var/vm/app_profile/";
+       struct proc     *p;
+       int             error;
+       vm_size_t       resid;
+       off_t           resid_off;
+       unsigned int    lru;
+       vm_size_t       size;
+
+       struct  vnode   *names_vp;
+       struct  vnode   *data_vp;
+       vm_offset_t     names_buf;
+       vm_offset_t     buf_ptr;
+
+       int             profile_names_length;
+       int             profile_data_length;
+       char            *profile_data_string;
+       char            *profile_names_string;
+       char            *substring;
+
+       off_t           file_size;
+       struct vfs_context  context;
+
+       kern_return_t   ret;
+
+       struct nameidata nd_names;
+       struct nameidata nd_data;
+       int             i;
+
+
+       p = current_proc();
+
+       context.vc_proc = p;
+       context.vc_ucred = kauth_cred_get();
+
+restart:
+       for(i = 0; i<global_user_profile_cache.max_ele; i++) {
+               if((global_user_profile_cache.profiles[i].user == user) 
+                       &&  (global_user_profile_cache.profiles[i].data_vp 
+                                                               != NULL)) {
+                       *profile = &global_user_profile_cache.profiles[i];
+                       /* already in cache, we're done */
+                       if ((*profile)->busy) {
+                                       /*
+                                       * drop funnel and wait 
+                                       */
+                               (void)tsleep((void *)
+                                       *profile, 
+                                       PRIBIO, "app_profile", 0);
+                               goto restart;
+                       }
+                       (*profile)->busy = 1;
+                       (*profile)->age = global_user_profile_cache.age;
+
+                       /*
+                        * entries in cache are held with a valid
+                        * usecount... take an iocount which will
+                        * be dropped in "bsd_close_page_cache_files"
+                        * which is called after the read or writes to
+                        * these files are done
+                        */
+                       if ( (vnode_getwithref((*profile)->data_vp)) ) {
+                         
+                               vnode_rele((*profile)->data_vp);
+                               vnode_rele((*profile)->names_vp);
+
+                               (*profile)->data_vp = NULL;
+                               (*profile)->busy = 0;
+                               wakeup(*profile);
+
+                               goto restart;
+                       }
+                       if ( (vnode_getwithref((*profile)->names_vp)) ) {
+
+                               vnode_put((*profile)->data_vp);
+                               vnode_rele((*profile)->data_vp);
+                               vnode_rele((*profile)->names_vp);
+
+                               (*profile)->data_vp = NULL;
+                               (*profile)->busy = 0;
+                               wakeup(*profile);
+
+                               goto restart;
+                       }
+                       global_user_profile_cache.age+=1;
+                       return 0;
+               }
+       }
+
+       lru = global_user_profile_cache.age;
+       *profile = NULL;
+       for(i = 0; i<global_user_profile_cache.max_ele; i++) {
+               /* Skip entry if it is in the process of being reused */
+               if(global_user_profile_cache.profiles[i].data_vp ==
+                                               (struct vnode *)0xFFFFFFFF)
+                       continue;
+               /* Otherwise grab the first empty entry */
+               if(global_user_profile_cache.profiles[i].data_vp == NULL) {
+                       *profile = &global_user_profile_cache.profiles[i];
+                       (*profile)->age = global_user_profile_cache.age;
+                       break;
+               }
+               /* Otherwise grab the oldest entry */
+               if(global_user_profile_cache.profiles[i].age < lru) {
+                       lru = global_user_profile_cache.profiles[i].age;
+                       *profile = &global_user_profile_cache.profiles[i];
+               }
+       }
+
+       /* Did we set it? */
+       if (*profile == NULL) {
+               /*
+                * No entries are available; this can only happen if all
+                * of them are currently in the process of being reused;
+                * if this happens, we sleep on the address of the first
+                * element, and restart.  This is less than ideal, but we
+                * know it will work because we know that there will be a
+                * wakeup on any entry currently in the process of being
+                * reused.
+                *
+                * XXX Reccomend a two handed clock and more than 3 total
+                * XXX cache entries at some point in the future.
+                */
+                       /*
+                       * drop funnel and wait 
+                       */
+               (void)tsleep((void *)
+                &global_user_profile_cache.profiles[0],
+                       PRIBIO, "app_profile", 0);
+               goto restart;
+       }
+
+       /*
+        * If it's currently busy, we've picked the one at the end of the
+        * LRU list, but it's currently being actively used.  We sleep on
+        * its address and restart.
+        */
+       if ((*profile)->busy) {
+                       /*
+                       * drop funnel and wait 
+                       */
+               (void)tsleep((void *)
+                       *profile, 
+                       PRIBIO, "app_profile", 0);
+               goto restart;
+       }
+       (*profile)->busy = 1;
+       (*profile)->user = user;
+
+       /*
+        * put dummy value in for now to get competing request to wait
+        * above until we are finished
+        *
+        * Save the data_vp before setting it, so we can set it before
+        * we kmem_free() or vrele().  If we don't do this, then we
+        * have a potential funnel race condition we have to deal with.
+        */
+       data_vp = (*profile)->data_vp;
+       (*profile)->data_vp = (struct vnode *)0xFFFFFFFF;
+
+       /*
+        * Age the cache here in all cases; this guarantees that we won't
+        * be reusing only one entry over and over, once the system reaches
+        * steady-state.
+        */
+       global_user_profile_cache.age+=1;
+
+       if(data_vp != NULL) {
+               kmem_free(kernel_map, 
+                               (*profile)->buf_ptr, 4 * PAGE_SIZE);
+               if ((*profile)->names_vp) {
+                       vnode_rele((*profile)->names_vp);
+                       (*profile)->names_vp = NULL;
+               }
+               vnode_rele(data_vp);
+       }
+       
+       /* Try to open the appropriate users profile files */
+       /* If neither file is present, try to create them  */
+       /* If one file is present and the other not, fail. */
+       /* If the files do exist, check them for the app_file */
+       /* requested and read it in if present */
+
+       ret = kmem_alloc(kernel_map,
+               (vm_offset_t *)&profile_data_string, PATH_MAX);
+
+       if(ret) {
+               (*profile)->data_vp = NULL;
+               (*profile)->busy = 0;
+               wakeup(*profile);
+               return ENOMEM;
+       }
+
+       /* Split the buffer in half since we know the size of */
+       /* our file path and our allocation is adequate for   */
+       /* both file path names */
+       profile_names_string = profile_data_string + (PATH_MAX/2);
+
+
+       strcpy(profile_data_string, cache_path);
+       strcpy(profile_names_string, cache_path);
+       profile_names_length = profile_data_length 
+                       = strlen(profile_data_string);
+       substring = profile_data_string + profile_data_length;
+       sprintf(substring, "%x_data", user);
+       substring = profile_names_string + profile_names_length;
+       sprintf(substring, "%x_names", user);
+
+       /* We now have the absolute file names */
+
+       ret = kmem_alloc(kernel_map,
+                               (vm_offset_t *)&names_buf, 4 * PAGE_SIZE);
+       if(ret) {
+               kmem_free(kernel_map, 
+                               (vm_offset_t)profile_data_string, PATH_MAX);
+               (*profile)->data_vp = NULL;
+               (*profile)->busy = 0;
+               wakeup(*profile);
+               return ENOMEM;
+       }
+
+       NDINIT(&nd_names, LOOKUP, FOLLOW | LOCKLEAF, 
+                       UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_names_string), &context);
+       NDINIT(&nd_data, LOOKUP, FOLLOW | LOCKLEAF, 
+                       UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_data_string), &context);
+
+       if ( (error = vn_open(&nd_data, FREAD | FWRITE, 0)) ) {
+#ifdef notdef
+               printf("bsd_open_page_cache_files: CacheData file not found %s\n",
+                       profile_data_string);
+#endif
+               kmem_free(kernel_map, 
+                               (vm_offset_t)names_buf, 4 * PAGE_SIZE);
+               kmem_free(kernel_map, 
+                       (vm_offset_t)profile_data_string, PATH_MAX);
+               (*profile)->data_vp = NULL;
+               (*profile)->busy = 0;
+               wakeup(*profile);
+               return error;
+       }
+       data_vp = nd_data.ni_vp;
+
+       if ( (error = vn_open(&nd_names, FREAD | FWRITE, 0)) ) {
+               printf("bsd_open_page_cache_files: NamesData file not found %s\n",
+                       profile_data_string);
+               kmem_free(kernel_map, 
+                               (vm_offset_t)names_buf, 4 * PAGE_SIZE);
+               kmem_free(kernel_map, 
+                       (vm_offset_t)profile_data_string, PATH_MAX);
+
+               vnode_rele(data_vp);
+               vnode_put(data_vp);
+
+               (*profile)->data_vp = NULL;
+               (*profile)->busy = 0;
+               wakeup(*profile);
+               return error;
+       }
+       names_vp = nd_names.ni_vp;
+
+       if ((error = vnode_size(names_vp, &file_size, &context)) != 0) {
+               printf("bsd_open_page_cache_files: Can't stat name file %s\n", profile_names_string);
+               kmem_free(kernel_map, 
+                       (vm_offset_t)profile_data_string, PATH_MAX);
+               kmem_free(kernel_map, 
+                       (vm_offset_t)names_buf, 4 * PAGE_SIZE);
+
+               vnode_rele(names_vp);
+               vnode_put(names_vp);
+               vnode_rele(data_vp);
+               vnode_put(data_vp);
+
+               (*profile)->data_vp = NULL;
+               (*profile)->busy = 0;
+               wakeup(*profile);
+               return error;
+       }
+
+       size = file_size;
+       if(size > 4 * PAGE_SIZE) 
+               size = 4 * PAGE_SIZE;
+       buf_ptr = names_buf;
+       resid_off = 0;
+
+       while(size) {
+               error = vn_rdwr(UIO_READ, names_vp, (caddr_t)buf_ptr, 
+                       size, resid_off,
+                       UIO_SYSSPACE32, IO_NODELOCKED, kauth_cred_get(), &resid, p);
+               if((error) || (size == resid)) {
+                       if(!error) {
+                               error = EINVAL;
+                       }
+                       kmem_free(kernel_map, 
+                               (vm_offset_t)profile_data_string, PATH_MAX);
+                       kmem_free(kernel_map, 
+                               (vm_offset_t)names_buf, 4 * PAGE_SIZE);
+
+                       vnode_rele(names_vp);
+                       vnode_put(names_vp);
+                       vnode_rele(data_vp);
+                       vnode_put(data_vp);
+
+                       (*profile)->data_vp = NULL;
+                       (*profile)->busy = 0;
+                       wakeup(*profile);
+                       return error;
+               }
+               buf_ptr += size-resid;
+               resid_off += size-resid;
+               size = resid;
+       }
+       kmem_free(kernel_map, (vm_offset_t)profile_data_string, PATH_MAX);
+
+       (*profile)->names_vp = names_vp;
+       (*profile)->data_vp = data_vp;
+       (*profile)->buf_ptr = names_buf;
+
+       /*
+        * at this point, the both the names_vp and the data_vp have
+        * both a valid usecount and an iocount held
+        */
+       return 0;
+
+}
+
+void
+bsd_close_page_cache_files(
+       struct global_profile *profile)
+{
+        vnode_put(profile->data_vp);
+       vnode_put(profile->names_vp);
+
+       profile->busy = 0;
+       wakeup(profile);
+}
+
+int
+bsd_read_page_cache_file(
+       unsigned int    user,
+       int             *fid,
+       int             *mod,
+       char            *app_name,
+       struct vnode    *app_vp,
+       vm_offset_t     *buffer,
+       vm_offset_t     *bufsize)
+{
+
+       boolean_t       funnel_state;
+
+       struct proc     *p;
+       int             error;
+       unsigned int    resid;
+
+       off_t           profile;
+       unsigned int    profile_size;
+
+       vm_offset_t     names_buf;
+       struct vnode_attr       va;
+       struct vfs_context  context;
+
+       kern_return_t   ret;
+
+       struct  vnode   *names_vp;
+       struct  vnode   *data_vp;
+
+       struct global_profile *uid_files;
+
+       funnel_state = thread_funnel_set(kernel_flock, TRUE);
+
+       /* Try to open the appropriate users profile files */
+       /* If neither file is present, try to create them  */
+       /* If one file is present and the other not, fail. */
+       /* If the files do exist, check them for the app_file */
+       /* requested and read it in if present */
+
+
+       error = bsd_open_page_cache_files(user, &uid_files);
+       if(error) {
+               thread_funnel_set(kernel_flock, funnel_state);
+               return EINVAL;
+       }
+
+       p = current_proc();
+
+       names_vp = uid_files->names_vp;
+       data_vp = uid_files->data_vp;
+       names_buf = uid_files->buf_ptr;
+
+       context.vc_proc = p;
+       context.vc_ucred = kauth_cred_get();
+
+       VATTR_INIT(&va);
+       VATTR_WANTED(&va, va_fileid);
+       VATTR_WANTED(&va, va_modify_time);
+       
+       if ((error = vnode_getattr(app_vp, &va, &context))) {
+               printf("bsd_read_cache_file: Can't stat app file %s\n", app_name);
+               bsd_close_page_cache_files(uid_files);
+               thread_funnel_set(kernel_flock, funnel_state);
+               return error;
+       }
+
+       *fid = (u_long)va.va_fileid;
+       *mod = va.va_modify_time.tv_sec;
+               
+       if (bsd_search_page_cache_data_base(
+                   names_vp,
+                   (struct profile_names_header *)names_buf,
+                   app_name, 
+                   (unsigned int) va.va_modify_time.tv_sec,  
+                   (u_long)va.va_fileid, &profile, &profile_size) == 0) {
+               /* profile is an offset in the profile data base */
+               /* It is zero if no profile data was found */
+               
+               if(profile_size == 0) {
+                       *buffer = 0;
+                       *bufsize = 0;
+                       bsd_close_page_cache_files(uid_files);
+                       thread_funnel_set(kernel_flock, funnel_state);
+                       return 0;
+               }
+               ret = (vm_offset_t)(kmem_alloc(kernel_map, buffer, profile_size));
+               if(ret) {
+                       bsd_close_page_cache_files(uid_files);
+                       thread_funnel_set(kernel_flock, funnel_state);
+                       return ENOMEM;
+               }
+               *bufsize = profile_size;
+               while(profile_size) {
+                       error = vn_rdwr(UIO_READ, data_vp, 
+                               (caddr_t) *buffer, profile_size, 
+                               profile, UIO_SYSSPACE32, IO_NODELOCKED, 
+                               kauth_cred_get(), &resid, p);
+                       if((error) || (profile_size == resid)) {
+                               bsd_close_page_cache_files(uid_files);
+                               kmem_free(kernel_map, (vm_offset_t)*buffer, profile_size);
+                               thread_funnel_set(kernel_flock, funnel_state);
+                               return EINVAL;
+                       }
+                       profile += profile_size - resid;
+                       profile_size = resid;
+               }
+               bsd_close_page_cache_files(uid_files);
+               thread_funnel_set(kernel_flock, funnel_state);
+               return 0;
+       } else {
+               bsd_close_page_cache_files(uid_files);
+               thread_funnel_set(kernel_flock, funnel_state);
+               return EINVAL;
+       }
+       
+}
+
+int
+bsd_search_page_cache_data_base(
+       struct  vnode                   *vp,
+       struct profile_names_header     *database,
+       char                            *app_name,
+       unsigned int                    mod_date,
+       unsigned int                    inode,
+       off_t                           *profile,
+       unsigned int                    *profile_size)
+{
+
+       struct proc             *p;
+
+       unsigned int            i;
+       struct profile_element  *element;
+       unsigned int            ele_total;
+       unsigned int            extended_list = 0;
+       off_t                   file_off = 0;
+       unsigned int            size;
+       off_t                   resid_off;
+       unsigned int            resid;
+       vm_offset_t             local_buf = 0;
+
+       int                     error;
+       kern_return_t           ret;
+
+       p = current_proc();
+
+       if(((vm_offset_t)database->element_array) !=
+                               sizeof(struct profile_names_header)) {
+               return EINVAL;
+       }
+       element = (struct profile_element *)(
+                       (vm_offset_t)database->element_array + 
+                                               (vm_offset_t)database);
+
+       ele_total = database->number_of_profiles;
+       
+       *profile = 0;
+       *profile_size = 0;
+       while(ele_total) {
+               /* note: code assumes header + n*ele comes out on a page boundary */
+               if(((local_buf == 0) && (sizeof(struct profile_names_header) + 
+                       (ele_total * sizeof(struct profile_element))) 
+                                       > (PAGE_SIZE * 4)) ||
+                       ((local_buf != 0) && 
+                               (ele_total * sizeof(struct profile_element))
+                                        > (PAGE_SIZE * 4))) {
+                       extended_list = ele_total;
+                       if(element == (struct profile_element *)
+                               ((vm_offset_t)database->element_array + 
+                                               (vm_offset_t)database)) {
+                               ele_total = ((PAGE_SIZE * 4)/sizeof(struct profile_element)) - 1;
+                       } else {
+                               ele_total = (PAGE_SIZE * 4)/sizeof(struct profile_element);
+                       }
+                       extended_list -= ele_total;
+               }
+               for (i=0; i<ele_total; i++) {
+                       if((mod_date == element[i].mod_date) 
+                                       && (inode == element[i].inode)) {
+                               if(strncmp(element[i].name, app_name, 12) == 0) {
+                                       *profile = element[i].addr;
+                                       *profile_size = element[i].size;
+                                       if(local_buf != 0) {
+                                               kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
+                                       }
+                                       return 0;
+                               }
+                       }
+               }
+               if(extended_list == 0)
+                       break;
+               if(local_buf == 0) {
+                       ret = kmem_alloc(kernel_map, &local_buf, 4 * PAGE_SIZE);
+                       if(ret != KERN_SUCCESS) {
+                               return ENOMEM;
+                       }
+               }
+               element = (struct profile_element *)local_buf;
+               ele_total = extended_list;
+               extended_list = 0;
+               file_off +=  4 * PAGE_SIZE;
+               if((ele_total * sizeof(struct profile_element)) > 
+                                                       (PAGE_SIZE * 4)) {
+                       size = PAGE_SIZE * 4;
+               } else {
+                       size = ele_total * sizeof(struct profile_element);
+               }
+               resid_off = 0;
+               while(size) {
+                       error = vn_rdwr(UIO_READ, vp, 
+                               CAST_DOWN(caddr_t, (local_buf + resid_off)),
+                               size, file_off + resid_off, UIO_SYSSPACE32, 
+                               IO_NODELOCKED, kauth_cred_get(), &resid, p);
+                       if((error) || (size == resid)) {
+                               if(local_buf != 0) {
+                                       kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
+                               }
+                               return EINVAL;
+                       }
+                       resid_off += size-resid;
+                       size = resid;
+               }
+       }
+       if(local_buf != 0) {
+               kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
+       }
+       return 0;
+}
+
+int
+bsd_write_page_cache_file(
+       unsigned int    user,
+       char            *file_name,
+       caddr_t         buffer,
+       vm_size_t       size,
+       int             mod,
+       int             fid)
+{
+       struct proc             *p;
+       int                             resid;
+       off_t                   resid_off;
+       int                             error;
+       boolean_t               funnel_state;
+       off_t                   file_size;
+       struct vfs_context      context;
+       off_t                   profile;
+       unsigned int    profile_size;
+
+       vm_offset_t     names_buf;
+       struct  vnode   *names_vp;
+       struct  vnode   *data_vp;
+       struct  profile_names_header *profile_header;
+       off_t                   name_offset;
+       struct global_profile *uid_files;
+
+
+       funnel_state = thread_funnel_set(kernel_flock, TRUE);
+
+
+       error = bsd_open_page_cache_files(user, &uid_files);
+       if(error) {
+               thread_funnel_set(kernel_flock, funnel_state);
+               return EINVAL;
+       }
+
+       p = current_proc();
+
+       names_vp = uid_files->names_vp;
+       data_vp = uid_files->data_vp;
+       names_buf = uid_files->buf_ptr;
+
+       /* Stat data file for size */
+
+       context.vc_proc = p;
+       context.vc_ucred = kauth_cred_get();
+
+       if ((error = vnode_size(data_vp, &file_size, &context)) != 0) {
+               printf("bsd_write_page_cache_file: Can't stat profile data %s\n", file_name);
+               bsd_close_page_cache_files(uid_files);
+               thread_funnel_set(kernel_flock, funnel_state);
+               return error;
+       }
+               
+       if (bsd_search_page_cache_data_base(names_vp, 
+                       (struct profile_names_header *)names_buf, 
+                       file_name, (unsigned int) mod,  
+                       fid, &profile, &profile_size) == 0) {
+               /* profile is an offset in the profile data base */
+               /* It is zero if no profile data was found */
+               
+               if(profile_size == 0) {
+                       unsigned int    header_size;
+                       vm_offset_t     buf_ptr;
+
+                       /* Our Write case */
+
+                       /* read header for last entry */
+                       profile_header = 
+                               (struct profile_names_header *)names_buf;
+                       name_offset = sizeof(struct profile_names_header) + 
+                               (sizeof(struct profile_element) 
+                                       * profile_header->number_of_profiles);
+                       profile_header->number_of_profiles += 1;
+
+                       if(name_offset < PAGE_SIZE * 4) {
+                               struct profile_element  *name;
+                               /* write new entry */
+                               name = (struct profile_element *)
+                                       (names_buf + (vm_offset_t)name_offset);
+                               name->addr =  file_size;
+                               name->size = size;
+                               name->mod_date = mod;
+                               name->inode = fid;
+                               strncpy (name->name, file_name, 12);
+                       } else {
+                               unsigned int    ele_size;
+                               struct profile_element  name;
+                               /* write new entry */
+                               name.addr = file_size;
+                               name.size = size;
+                               name.mod_date = mod;
+                               name.inode = fid;
+                               strncpy (name.name, file_name, 12);
+                               /* write element out separately */
+                               ele_size = sizeof(struct profile_element);
+                               buf_ptr = (vm_offset_t)&name;
+                               resid_off = name_offset;
+
+                               while(ele_size) {
+                                       error = vn_rdwr(UIO_WRITE, names_vp, 
+                                               (caddr_t)buf_ptr, 
+                                               ele_size, resid_off, 
+                                               UIO_SYSSPACE32, IO_NODELOCKED, 
+                                               kauth_cred_get(), &resid, p);
+                                       if(error) {
+                                               printf("bsd_write_page_cache_file: Can't write name_element %x\n", user);
+                                               bsd_close_page_cache_files(
+                                                       uid_files);
+                                               thread_funnel_set(
+                                                       kernel_flock, 
+                                                       funnel_state);
+                                               return error;
+                                       }
+                                       buf_ptr += (vm_offset_t)
+                                                       ele_size-resid;
+                                       resid_off += ele_size-resid;
+                                       ele_size = resid;
+                               }
+                       }
+
+                       if(name_offset < PAGE_SIZE * 4) {
+                               header_size = name_offset + 
+                                       sizeof(struct profile_element);
+                               
+                       } else {
+                               header_size = 
+                                       sizeof(struct profile_names_header);
+                       }
+                       buf_ptr = (vm_offset_t)profile_header;
+                       resid_off = 0;
+
+                       /* write names file header */
+                       while(header_size) {
+                               error = vn_rdwr(UIO_WRITE, names_vp, 
+                                       (caddr_t)buf_ptr, 
+                                       header_size, resid_off, 
+                                       UIO_SYSSPACE32, IO_NODELOCKED, 
+                                       kauth_cred_get(), &resid, p);
+                               if(error) {
+                                       printf("bsd_write_page_cache_file: Can't write header %x\n", user);
+                                       bsd_close_page_cache_files(
+                                               uid_files);
+                                       thread_funnel_set(
+                                               kernel_flock, funnel_state);
+                                       return error;
+                               }
+                               buf_ptr += (vm_offset_t)header_size-resid;
+                               resid_off += header_size-resid;
+                               header_size = resid;
+                       }
+                       /* write profile to data file */
+                       resid_off = file_size;
+                       while(size) {
+                               error = vn_rdwr(UIO_WRITE, data_vp, 
+                                       (caddr_t)buffer, size, resid_off, 
+                                       UIO_SYSSPACE32, IO_NODELOCKED, 
+                                       kauth_cred_get(), &resid, p);
+                               if(error) {
+                                       printf("bsd_write_page_cache_file: Can't write header %x\n", user);
+                                       bsd_close_page_cache_files(
+                                               uid_files);
+                                       thread_funnel_set(
+                                               kernel_flock, funnel_state);
+                                       return error;
+                               }
+                               buffer += size-resid;
+                               resid_off += size-resid;
+                               size = resid;
+                       }
+                       bsd_close_page_cache_files(uid_files);
+                       thread_funnel_set(kernel_flock, funnel_state);
+                       return 0;
+               }
+               /* Someone else wrote a twin profile before us */
+               bsd_close_page_cache_files(uid_files);
+               thread_funnel_set(kernel_flock, funnel_state);
+               return 0;
+       } else {                
+               bsd_close_page_cache_files(uid_files);
+               thread_funnel_set(kernel_flock, funnel_state);
+               return EINVAL;
+       }
+       
+}
+
+int
+prepare_profile_database(int   user)
+{
+       const char *cache_path = "/var/vm/app_profile/";
+       struct proc     *p;
+       int             error;
+       int             resid;
+       off_t           resid_off;
+       vm_size_t       size;
+
+       struct  vnode   *names_vp;
+       struct  vnode   *data_vp;
+       vm_offset_t     names_buf;
+       vm_offset_t     buf_ptr;
+
+       int             profile_names_length;
+       int             profile_data_length;
+       char            *profile_data_string;
+       char            *profile_names_string;
+       char            *substring;
+
+       struct vnode_attr va;
+       struct vfs_context context;
+
+       struct  profile_names_header *profile_header;
+       kern_return_t   ret;
+
+       struct nameidata nd_names;
+       struct nameidata nd_data;
+
+       p = current_proc();
+
+       context.vc_proc = p;
+       context.vc_ucred = kauth_cred_get();
+
+       ret = kmem_alloc(kernel_map,
+               (vm_offset_t *)&profile_data_string, PATH_MAX);
+
+       if(ret) {
+               return ENOMEM;
+       }
+
+       /* Split the buffer in half since we know the size of */
+       /* our file path and our allocation is adequate for   */
+       /* both file path names */
+       profile_names_string = profile_data_string + (PATH_MAX/2);
+
+
+       strcpy(profile_data_string, cache_path);
+       strcpy(profile_names_string, cache_path);
+       profile_names_length = profile_data_length 
+                       = strlen(profile_data_string);
+       substring = profile_data_string + profile_data_length;
+       sprintf(substring, "%x_data", user);
+       substring = profile_names_string + profile_names_length;
+       sprintf(substring, "%x_names", user);
+
+       /* We now have the absolute file names */
+
+       ret = kmem_alloc(kernel_map,
+                               (vm_offset_t *)&names_buf, 4 * PAGE_SIZE);
+       if(ret) {
+               kmem_free(kernel_map, 
+                               (vm_offset_t)profile_data_string, PATH_MAX);
+               return ENOMEM;
+       }
+
+       NDINIT(&nd_names, LOOKUP, FOLLOW, 
+                       UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_names_string), &context);
+       NDINIT(&nd_data, LOOKUP, FOLLOW,
+                       UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_data_string), &context);
+
+       if ( (error = vn_open(&nd_data, 
+                                                       O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) ) {
+                       kmem_free(kernel_map, 
+                                       (vm_offset_t)names_buf, 4 * PAGE_SIZE);
+                       kmem_free(kernel_map, 
+                               (vm_offset_t)profile_data_string, PATH_MAX);
+                       
+                       return 0;
+       }
+       data_vp = nd_data.ni_vp;
+
+       if ( (error = vn_open(&nd_names, 
+                                                       O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) ) {
+                       printf("prepare_profile_database: Can't create CacheNames %s\n",
+                               profile_data_string);
+                       kmem_free(kernel_map, 
+                                       (vm_offset_t)names_buf, 4 * PAGE_SIZE);
+                       kmem_free(kernel_map, 
+                               (vm_offset_t)profile_data_string, PATH_MAX);
+
+                       vnode_rele(data_vp);
+                       vnode_put(data_vp);
+
+                       return error;
+       }
+       names_vp = nd_names.ni_vp;
+
+       /* Write Header for new names file */
+
+       profile_header = (struct profile_names_header *)names_buf;
+
+       profile_header->number_of_profiles = 0;
+       profile_header->user_id =  user;
+       profile_header->version = 1;
+       profile_header->element_array = 
+                               sizeof(struct profile_names_header);
+       profile_header->spare1 = 0;
+       profile_header->spare2 = 0;
+       profile_header->spare3 = 0;
+
+       size = sizeof(struct profile_names_header);
+       buf_ptr = (vm_offset_t)profile_header;
+       resid_off = 0;
+
+       while(size) {
+               error = vn_rdwr(UIO_WRITE, names_vp, 
+                               (caddr_t)buf_ptr, size, resid_off,
+                               UIO_SYSSPACE32, IO_NODELOCKED, 
+                               kauth_cred_get(), &resid, p);
+               if(error) {
+                       printf("prepare_profile_database: Can't write header %s\n", profile_names_string);
+                       kmem_free(kernel_map, 
+                               (vm_offset_t)names_buf, 4 * PAGE_SIZE);
+                       kmem_free(kernel_map, 
+                               (vm_offset_t)profile_data_string, 
+                               PATH_MAX);
+
+                       vnode_rele(names_vp);
+                       vnode_put(names_vp);
+                       vnode_rele(data_vp);
+                       vnode_put(data_vp);
+
+                       return error;
+               }
+               buf_ptr += size-resid;
+               resid_off += size-resid;
+               size = resid;
+       }
+       VATTR_INIT(&va);
+       VATTR_SET(&va, va_uid, user);
+
+               error = vnode_setattr(names_vp, &va, &context);
+       if(error) {
+               printf("prepare_profile_database: "
+                       "Can't set user %s\n", profile_names_string);
+       }
+       vnode_rele(names_vp);
+       vnode_put(names_vp);
+       
+       VATTR_INIT(&va);
+       VATTR_SET(&va, va_uid, user);
+               error = vnode_setattr(data_vp, &va, &context);
+       if(error) {
+               printf("prepare_profile_database: "
+                       "Can't set user %s\n", profile_data_string);
+       }
+       vnode_rele(data_vp);
+       vnode_put(data_vp);
+
+       kmem_free(kernel_map, 
+                       (vm_offset_t)profile_data_string, PATH_MAX);
+       kmem_free(kernel_map, 
+                       (vm_offset_t)names_buf, 4 * PAGE_SIZE);
+       return 0;
+
+}