/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
*
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
*
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License"). You may not use this file except in compliance with the
- * License. Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
- *
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
- * License for the specific language governing rights and limitations
- * under the License.
- *
- * @APPLE_LICENSE_HEADER_END@
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the
+ * License may not be used to create, or enable the creation or
+ * redistribution of, unlawful or unlicensed copies of an Apple operating
+ * system, or to circumvent, violate, or enable the circumvention or
+ * violation of, any terms of an Apple operating system software license
+ * agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
*/
/*
* Mach Operating System
/*
*/
+
+
#include <meta_features.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <kern/debug.h>
#include <kern/lock.h>
+#include <mach/mach_traps.h>
#include <mach/time_value.h>
+#include <mach/vm_map.h>
#include <mach/vm_param.h>
#include <mach/vm_prot.h>
#include <mach/port.h>
+#include <sys/file_internal.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/dir.h>
#include <sys/namei.h>
-#include <sys/proc.h>
+#include <sys/proc_internal.h>
+#include <sys/kauth.h>
#include <sys/vm.h>
#include <sys/file.h>
-#include <sys/vnode.h>
-#include <sys/buf.h>
+#include <sys/vnode_internal.h>
#include <sys/mount.h>
#include <sys/trace.h>
#include <sys/kernel.h>
-#include <sys/ubc.h>
+#include <sys/ubc_internal.h>
+#include <sys/user.h>
+#include <sys/syslog.h>
+#include <sys/stat.h>
+#include <sys/sysproto.h>
+#include <sys/mman.h>
+#include <sys/sysctl.h>
+
+#include <bsm/audit_kernel.h>
+#include <bsm/audit_kevents.h>
#include <kern/kalloc.h>
-#include <kern/parallel.h>
#include <vm/vm_map.h>
#include <vm/vm_kern.h>
#include <machine/spl.h>
+
#include <mach/shared_memory_server.h>
+#include <vm/vm_shared_memory_server.h>
+
+#include <vm/vm_protos.h>
-useracc(addr, len, prot)
- caddr_t addr;
- u_int len;
- int prot;
+void
+log_nx_failure(addr64_t vaddr, vm_prot_t prot)
+{
+ printf("NX failure: %s - vaddr=%qx, prot=%x\n", current_proc()->p_comm, vaddr, prot);
+}
+
+
+int
+useracc(
+ user_addr_t addr,
+ user_size_t len,
+ int prot)
{
return (vm_map_check_protection(
current_map(),
- trunc_page(addr), round_page(addr+len),
+ vm_map_trunc_page(addr), vm_map_round_page(addr+len),
prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
}
-vslock(addr, len)
- caddr_t addr;
- int len;
+int
+vslock(
+ user_addr_t addr,
+ user_size_t len)
{
-kern_return_t kret;
- kret = vm_map_wire(current_map(), trunc_page(addr),
- round_page(addr+len),
+ kern_return_t kret;
+ kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
+ vm_map_round_page(addr+len),
VM_PROT_READ | VM_PROT_WRITE ,FALSE);
switch (kret) {
}
}
-vsunlock(addr, len, dirtied)
- caddr_t addr;
- int len;
- int dirtied;
+int
+vsunlock(
+ user_addr_t addr,
+ user_size_t len,
+ __unused int dirtied)
{
- pmap_t pmap;
#if FIXME /* [ */
+ pmap_t pmap;
vm_page_t pg;
+ vm_map_offset_t vaddr;
+ ppnum_t paddr;
#endif /* FIXME ] */
- vm_offset_t vaddr, paddr;
kern_return_t kret;
#if FIXME /* [ */
if (dirtied) {
pmap = get_task_pmap(current_task());
- for (vaddr = trunc_page(addr); vaddr < round_page(addr+len);
+ for (vaddr = vm_map_trunc_page(addr);
+ vaddr < vm_map_round_page(addr+len);
vaddr += PAGE_SIZE) {
paddr = pmap_extract(pmap, vaddr);
pg = PHYS_TO_VM_PAGE(paddr);
#ifdef lint
dirtied++;
#endif /* lint */
- kret = vm_map_unwire(current_map(), trunc_page(addr),
- round_page(addr+len), FALSE);
+ kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
+ vm_map_round_page(addr+len), FALSE);
switch (kret) {
case KERN_SUCCESS:
return (0);
}
}
-#if defined(sun) || BALANCE || defined(m88k)
-#else /*defined(sun) || BALANCE || defined(m88k)*/
-subyte(addr, byte)
- void * addr;
- int byte;
+int
+subyte(
+ user_addr_t addr,
+ int byte)
{
char character;
return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
}
-suibyte(addr, byte)
- void * addr;
- int byte;
+int
+suibyte(
+ user_addr_t addr,
+ int byte)
{
char character;
character = (char)byte;
- return (copyout((void *) &(character), addr, sizeof(char)) == 0 ? 0 : -1);
+ return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
}
-int fubyte(addr)
- void * addr;
+int fubyte(user_addr_t addr)
{
unsigned char byte;
return(byte);
}
-int fuibyte(addr)
- void * addr;
+int fuibyte(user_addr_t addr)
{
unsigned char byte;
return(byte);
}
-suword(addr, word)
- void * addr;
- long word;
+int
+suword(
+ user_addr_t addr,
+ long word)
{
return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
}
-long fuword(addr)
- void * addr;
+long fuword(user_addr_t addr)
{
long word;
/* suiword and fuiword are the same as suword and fuword, respectively */
-suiword(addr, word)
- void * addr;
- long word;
+int
+suiword(
+ user_addr_t addr,
+ long word)
{
return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
}
-long fuiword(addr)
- void * addr;
+long fuiword(user_addr_t addr)
{
long word;
return(-1);
return(word);
}
-#endif /* defined(sun) || BALANCE || defined(m88k) || defined(i386) */
+
+/*
+ * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
+ * fetching and setting of process-sized size_t and pointer values.
+ */
+int
+sulong(user_addr_t addr, int64_t word)
+{
+
+ if (IS_64BIT_PROCESS(current_proc())) {
+ return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
+ } else {
+ return(suiword(addr, (long)word));
+ }
+}
+
+int64_t
+fulong(user_addr_t addr)
+{
+ int64_t longword;
+
+ if (IS_64BIT_PROCESS(current_proc())) {
+ if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
+ return(-1);
+ return(longword);
+ } else {
+ return((int64_t)fuiword(addr));
+ }
+}
int
-swapon()
+suulong(user_addr_t addr, uint64_t uword)
{
- return(EOPNOTSUPP);
+
+ if (IS_64BIT_PROCESS(current_proc())) {
+ return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
+ } else {
+ return(suiword(addr, (u_long)uword));
+ }
+}
+
+uint64_t
+fuulong(user_addr_t addr)
+{
+ uint64_t ulongword;
+
+ if (IS_64BIT_PROCESS(current_proc())) {
+ if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
+ return(-1ULL);
+ return(ulongword);
+ } else {
+ return((uint64_t)fuiword(addr));
+ }
+}
+
+int
+swapon(__unused struct proc *procp, __unused struct swapon_args *uap, __unused int *retval)
+{
+ return(ENOTSUP);
}
kern_return_t
-pid_for_task(t, x)
- mach_port_t t;
- int *x;
+pid_for_task(
+ struct pid_for_task_args *args)
{
+ mach_port_name_t t = args->t;
+ user_addr_t pid_addr = args->pid;
struct proc * p;
task_t t1;
- extern task_t port_name_to_task(mach_port_t t);
int pid = -1;
kern_return_t err = KERN_SUCCESS;
boolean_t funnel_state;
+ AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
+ AUDIT_ARG(mach_port1, t);
+
funnel_state = thread_funnel_set(kernel_flock, TRUE);
t1 = port_name_to_task(t);
} else {
p = get_bsdtask_info(t1);
if (p) {
- pid = p->p_pid;
+ pid = proc_pid(p);
err = KERN_SUCCESS;
} else {
err = KERN_FAILURE;
}
task_deallocate(t1);
pftout:
- (void) copyout((char *) &pid, (char *) x, sizeof(*x));
+ AUDIT_ARG(pid, pid);
+ (void) copyout((char *) &pid, pid_addr, sizeof(int));
thread_funnel_set(kernel_flock, funnel_state);
+ AUDIT_MACH_SYSCALL_EXIT(err);
return(err);
}
*
* Only permitted to privileged processes, or processes
* with the same user ID.
+ *
+ * XXX This should be a BSD system call, not a Mach trap!!!
+ */
+/*
+ *
+ * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
+ * tfp_policy = KERN_TFP_POLICY_PERMISSIVE; Permissive Mode: all permissive; related ones allowed or privileged
+ * tfp_policy = KERN_TFP_POLICY_RESTRICTED; Restricted Mode: self access allowed; setgid (to tfp_group) are allowed for other tasks
+ *
*/
+static int tfp_policy = KERN_TFP_POLICY_RESTRICTED;
+/* the groutp is inited to kmem group and is modifiable by sysctl */
+static int tfp_group_inited = 0; /* policy groups are loaded ... */
+static gid_t tfp_group_ronly = 0; /* procview group */
+static gid_t tfp_group_rw = 0; /* procmod group */
+
kern_return_t
-task_for_pid(target_tport, pid, t)
- mach_port_t target_tport;
- int pid;
- mach_port_t *t;
+task_for_pid(
+ struct task_for_pid_args *args)
{
+ mach_port_name_t target_tport = args->target_tport;
+ int pid = args->pid;
+ user_addr_t task_addr = args->t;
+ struct uthread *uthread;
struct proc *p;
struct proc *p1;
task_t t1;
- mach_port_t tret;
- extern task_t port_name_to_task(mach_port_t tp);
+ mach_port_name_t tret;
void * sright;
int error = 0;
+ int is_member = 0;
boolean_t funnel_state;
+ boolean_t ispermitted = FALSE;
+ char procname[MAXCOMLEN+1];
+
+ AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
+ AUDIT_ARG(pid, pid);
+ AUDIT_ARG(mach_port1, target_tport);
t1 = port_name_to_task(target_tport);
if (t1 == TASK_NULL) {
- (void ) copyout((char *)&t1, (char *)t, sizeof(mach_port_t));
+ (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
+ AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
return(KERN_FAILURE);
}
funnel_state = thread_funnel_set(kernel_flock, TRUE);
- restart:
- p1 = get_bsdtask_info(t1);
- if (
- ((p = pfind(pid)) != (struct proc *) 0)
- && (p1 != (struct proc *) 0)
- && ((p->p_ucred->cr_uid == p1->p_ucred->cr_uid)
- || !(suser(p1->p_ucred, &p1->p_acflag)))
- && (p->p_stat != SZOMB)
- ) {
- if (p->task != TASK_NULL) {
- if (!task_reference_try(p->task)) {
- mutex_pause(); /* temp loss of funnel */
- goto restart;
- }
- sright = convert_task_to_port(p->task);
- tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
+ p1 = current_proc();
+
+ /*
+ * Delayed binding of thread credential to process credential, if we
+ * are not running with an explicitly set thread credential.
+ */
+ uthread = get_bsdthread_info(current_thread());
+ if (uthread->uu_ucred != p1->p_ucred &&
+ (uthread->uu_flag & UT_SETUID) == 0) {
+ kauth_cred_t old = uthread->uu_ucred;
+ proc_lock(p1);
+ uthread->uu_ucred = p1->p_ucred;
+ kauth_cred_ref(uthread->uu_ucred);
+ proc_unlock(p1);
+ if (old != NOCRED)
+ kauth_cred_rele(old);
+ }
+
+ p = pfind(pid);
+ AUDIT_ARG(process, p);
+
+ switch (tfp_policy) {
+
+ case KERN_TFP_POLICY_PERMISSIVE:
+ /* self or suser or related ones */
+ if ((p != (struct proc *) 0)
+ && (p1 != (struct proc *) 0)
+ && (
+ (p1 == p)
+ || !(suser(kauth_cred_get(), 0))
+ || ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) &&
+ ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid))
+ && ((p->p_flag & P_SUGID) == 0))
+ )
+ && (p->p_stat != SZOMB)
+ )
+ ispermitted = TRUE;
+ break;
+
+ case KERN_TFP_POLICY_RESTRICTED:
+ /* self or suser or setgid and related ones only */
+ if ((p != (struct proc *) 0)
+ && (p1 != (struct proc *) 0)
+ && (
+ (p1 == p)
+ || !(suser(kauth_cred_get(), 0))
+ || (((tfp_group_inited != 0) &&
+ (
+ ((kauth_cred_ismember_gid(kauth_cred_get(),
+ tfp_group_ronly, &is_member) == 0) && is_member)
+ ||((kauth_cred_ismember_gid(kauth_cred_get(),
+ tfp_group_rw, &is_member) == 0) && is_member)
+ )
+ )
+ && ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) &&
+ ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid))
+ && ((p->p_flag & P_SUGID) == 0))
+ )
+ )
+ && (p->p_stat != SZOMB)
+ )
+ ispermitted = TRUE;
+
+ break;
+
+ case KERN_TFP_POLICY_DENY:
+ /* self or suser only */
+ default:
+ /* do not return task port of other task at all */
+ if ((p1 != (struct proc *) 0) && (p != (struct proc *) 0) && (p->p_stat != SZOMB)
+ && ((p1 == p) || !(suser(kauth_cred_get(), 0))))
+ ispermitted = TRUE;
+ else
+ ispermitted = FALSE;
+ break;
+ };
+
+
+ if (ispermitted == TRUE) {
+ if (p->task != TASK_NULL) {
+ task_reference(p->task);
+ sright = (void *)convert_task_to_port(p->task);
+ tret = ipc_port_copyout_send(
+ sright,
+ get_task_ipcspace(current_task()));
} else
tret = MACH_PORT_NULL;
- (void ) copyout((char *)&tret, (char *) t, sizeof(mach_port_t));
+ AUDIT_ARG(mach_port2, tret);
+ (void ) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
task_deallocate(t1);
error = KERN_SUCCESS;
goto tfpout;
+ } else {
+ /*
+ * There is no guarantee that p_comm is null terminated and
+ * kernel implementation of string functions are complete. So
+ * ensure stale info is not leaked out, bzero the buffer
+ */
+ bzero(&procname[0], MAXCOMLEN+1);
+ strncpy(&procname[0], &p1->p_comm[0], MAXCOMLEN);
+ if (tfp_policy != KERN_TFP_POLICY_PERMISSIVE)
+ log(LOG_NOTICE, "(%d: %s)tfp: failed on %d:\n",
+ ((p1 != PROC_NULL)?(p1->p_pid):0), &procname[0],
+ ((p != PROC_NULL)?(p->p_pid):0));
}
+
task_deallocate(t1);
tret = MACH_PORT_NULL;
- (void) copyout((char *) &tret, (char *) t, sizeof(mach_port_t));
+ (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
error = KERN_FAILURE;
tfpout:
thread_funnel_set(kernel_flock, funnel_state);
+ AUDIT_MACH_SYSCALL_EXIT(error);
return(error);
}
+/*
+ * Routine: task_name_for_pid
+ * Purpose:
+ * Get the task name port for another "process", named by its
+ * process ID on the same host as "target_task".
+ *
+ * Only permitted to privileged processes, or processes
+ * with the same user ID.
+ *
+ * XXX This should be a BSD system call, not a Mach trap!!!
+ */
-struct load_shared_file_args {
- char *filename;
- caddr_t mfa;
- u_long mfs;
- caddr_t *ba;
- int map_cnt;
- sf_mapping_t *mappings;
- int *flags;
-};
+kern_return_t
+task_name_for_pid(
+ struct task_name_for_pid_args *args)
+{
+ mach_port_name_t target_tport = args->target_tport;
+ int pid = args->pid;
+ user_addr_t task_addr = args->t;
+ struct uthread *uthread;
+ struct proc *p;
+ struct proc *p1;
+ task_t t1;
+ mach_port_name_t tret;
+ void * sright;
+ int error = 0;
+ boolean_t funnel_state;
-int ws_disabled = 1;
+ AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
+ AUDIT_ARG(pid, pid);
+ AUDIT_ARG(mach_port1, target_tport);
-int
-load_shared_file(
- struct proc *p,
- struct load_shared_file_args *uap,
- register *retval)
-{
- caddr_t mapped_file_addr=uap->mfa;
- u_long mapped_file_size=uap->mfs;
- caddr_t *base_address=uap->ba;
- int map_cnt=uap->map_cnt;
- sf_mapping_t *mappings=uap->mappings;
- char *filename=uap->filename;
- int *flags=uap->flags;
- struct vnode *vp = 0;
- struct nameidata nd, *ndp;
- char *filename_str;
- register int error;
- kern_return_t kr;
-
- struct vattr vattr;
- memory_object_control_t file_control;
- sf_mapping_t *map_list;
- caddr_t local_base;
- int local_flags;
- int caller_flags;
- int i;
- vm_size_t dummy;
- kern_return_t kret;
+ t1 = port_name_to_task(target_tport);
+ if (t1 == TASK_NULL) {
+ (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
+ AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
+ return(KERN_FAILURE);
+ }
+
+ funnel_state = thread_funnel_set(kernel_flock, TRUE);
+
+ p1 = current_proc();
+
+ /*
+ * Delayed binding of thread credential to process credential, if we
+ * are not running with an explicitly set thread credential.
+ */
+ uthread = get_bsdthread_info(current_thread());
+ if (uthread->uu_ucred != p1->p_ucred &&
+ (uthread->uu_flag & UT_SETUID) == 0) {
+ kauth_cred_t old = uthread->uu_ucred;
+ proc_lock(p1);
+ uthread->uu_ucred = p1->p_ucred;
+ kauth_cred_ref(uthread->uu_ucred);
+ proc_unlock(p1);
+ if (old != NOCRED)
+ kauth_cred_rele(old);
+ }
+
+ p = pfind(pid);
+ AUDIT_ARG(process, p);
+
+ if ((p != (struct proc *) 0)
+ && (p->p_stat != SZOMB)
+ && (p1 != (struct proc *) 0)
+ && ((p1 == p)
+ || !(suser(kauth_cred_get(), 0))
+ || ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) &&
+ ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid)))))
+ {
+ if (p->task != TASK_NULL)
+ {
+ task_reference(p->task);
+ sright = (void *)convert_task_name_to_port(p->task);
+ tret = ipc_port_copyout_send(
+ sright,
+ get_task_ipcspace(current_task()));
+ } else
+ tret = MACH_PORT_NULL;
+ AUDIT_ARG(mach_port2, tret);
+ (void ) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
+ task_deallocate(t1);
+ error = KERN_SUCCESS;
+ goto tnfpout;
+ }
+
+ task_deallocate(t1);
+ tret = MACH_PORT_NULL;
+ (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
+ error = KERN_FAILURE;
+tnfpout:
+ thread_funnel_set(kernel_flock, funnel_state);
+ AUDIT_MACH_SYSCALL_EXIT(error);
+ return(error);
+}
+
+static int
+sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
+ __unused int arg2, struct sysctl_req *req)
+{
+ int error = 0;
+ int new_value;
- shared_region_mapping_t shared_region;
+ error = SYSCTL_OUT(req, arg1, sizeof(int));
+ if (error || req->newptr == USER_ADDR_NULL)
+ return(error);
+
+ if (!is_suser())
+ return(EPERM);
+
+ if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
+ goto out;
+ }
+ if ((new_value == KERN_TFP_POLICY_DENY)
+ || (new_value == KERN_TFP_POLICY_PERMISSIVE)
+ || (new_value == KERN_TFP_POLICY_RESTRICTED))
+ tfp_policy = new_value;
+ else
+ error = EINVAL;
+out:
+ return(error);
+
+}
+
+static int
+sysctl_settfp_groups(__unused struct sysctl_oid *oidp, void *arg1,
+ __unused int arg2, struct sysctl_req *req)
+{
+ int error = 0;
+ int new_value;
+
+ error = SYSCTL_OUT(req, arg1, sizeof(int));
+ if (error || req->newptr == USER_ADDR_NULL)
+ return(error);
+
+ if (!is_suser())
+ return(EPERM);
+
+ /*
+ * Once set; cannot be reset till next boot. Launchd will set this
+ * in its pid 1 init and no one can set after that.
+ */
+ if (tfp_group_inited != 0)
+ return(EPERM);
+
+ if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
+ goto out;
+ }
+
+ if (new_value >= 100)
+ error = EINVAL;
+ else {
+ if (arg1 == &tfp_group_ronly)
+ tfp_group_ronly = new_value;
+ else if (arg1 == &tfp_group_rw)
+ tfp_group_rw = new_value;
+ else
+ error = EINVAL;
+ if ((tfp_group_ronly != 0 ) && (tfp_group_rw != 0 ))
+ tfp_group_inited = 1;
+ }
+
+out:
+ return(error);
+}
+
+SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW, 0, "tfp");
+SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW,
+ &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
+SYSCTL_PROC(_kern_tfp, KERN_TFP_READ_GROUP, read_group, CTLTYPE_INT | CTLFLAG_RW,
+ &tfp_group_ronly, sizeof(uint32_t), &sysctl_settfp_groups ,"I","read_group");
+SYSCTL_PROC(_kern_tfp, KERN_TFP_RW_GROUP, rw_group, CTLTYPE_INT | CTLFLAG_RW,
+ &tfp_group_rw, sizeof(uint32_t), &sysctl_settfp_groups ,"I","rw_group");
+
+
+SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW, &shared_region_trace_level, 0, "");
+
+/*
+ * shared_region_make_private_np:
+ *
+ * This system call is for "dyld" only.
+ *
+ * It creates a private copy of the current process's "shared region" for
+ * split libraries. "dyld" uses this when the shared region is full or
+ * it needs to load a split library that conflicts with an already loaded one
+ * that this process doesn't need. "dyld" specifies a set of address ranges
+ * that it wants to keep in the now-private "shared region". These cover
+ * the set of split libraries that the process needs so far. The kernel needs
+ * to deallocate the rest of the shared region, so that it's available for
+ * more libraries for this process.
+ */
+int
+shared_region_make_private_np(
+ struct proc *p,
+ struct shared_region_make_private_np_args *uap,
+ __unused int *retvalp)
+{
+ int error;
+ kern_return_t kr;
+ boolean_t using_shared_regions;
+ user_addr_t user_ranges;
+ unsigned int range_count;
+ vm_size_t ranges_size;
+ struct shared_region_range_np *ranges;
+ shared_region_mapping_t shared_region;
struct shared_region_task_mappings task_mapping_info;
- shared_region_mapping_t next;
+ shared_region_mapping_t next;
- ndp = &nd;
-
- unix_master();
-
- /* Retrieve the base address */
- if (error = copyin(base_address, &local_base, sizeof (caddr_t))) {
- goto lsf_bailout;
- }
- if (error = copyin(flags, &local_flags, sizeof (int))) {
- goto lsf_bailout;
- }
- caller_flags = local_flags;
- kret = kmem_alloc(kernel_map, (vm_offset_t *)&filename_str,
- (vm_size_t)(MAXPATHLEN));
- if (kret != KERN_SUCCESS) {
- error = ENOMEM;
- goto lsf_bailout;
+ ranges = NULL;
+
+ range_count = uap->rangeCount;
+ user_ranges = uap->ranges;
+ ranges_size = (vm_size_t) (range_count * sizeof (ranges[0]));
+
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_INFO,
+ ("shared_region: %p [%d(%s)] "
+ "make_private(rangecount=%d)\n",
+ current_thread(), p->p_pid, p->p_comm, range_count));
+
+ /* allocate kernel space for the "ranges" */
+ if (range_count != 0) {
+ if ((mach_vm_size_t) ranges_size !=
+ (mach_vm_size_t) range_count * sizeof (ranges[0])) {
+ /* 32-bit integer overflow */
+ error = EINVAL;
+ goto done;
}
- kret = kmem_alloc(kernel_map, (vm_offset_t *)&map_list,
- (vm_size_t)(map_cnt*sizeof(sf_mapping_t)));
- if (kret != KERN_SUCCESS) {
- kmem_free(kernel_map, (vm_offset_t)filename_str,
- (vm_size_t)(MAXPATHLEN));
+ kr = kmem_alloc(kernel_map,
+ (vm_offset_t *) &ranges,
+ ranges_size);
+ if (kr != KERN_SUCCESS) {
error = ENOMEM;
- goto lsf_bailout;
+ goto done;
}
- if (error =
- copyin(mappings, map_list, (map_cnt*sizeof(sf_mapping_t)))) {
- goto lsf_bailout_free;
+ /* copy "ranges" from user-space */
+ error = copyin(user_ranges,
+ ranges,
+ ranges_size);
+ if (error) {
+ goto done;
+ }
}
- if (error = copyinstr(filename,
- filename_str, MAXPATHLEN, (size_t *)&dummy)) {
- goto lsf_bailout_free;
+ if (p->p_flag & P_NOSHLIB) {
+ /* no split library has been mapped for this process so far */
+ using_shared_regions = FALSE;
+ } else {
+ /* this process has already mapped some split libraries */
+ using_shared_regions = TRUE;
}
/*
- * Get a vnode for the target file
+ * Get a private copy of the current shared region.
+ * Do not chain it to the system-wide shared region, as we'll want
+ * to map other split libraries in place of the old ones. We want
+ * to completely detach from the system-wide shared region and go our
+ * own way after this point, not sharing anything with other processes.
*/
- NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
- filename_str, p);
-
- if ((error = namei(ndp))) {
- goto lsf_bailout_free;
+ error = clone_system_shared_regions(using_shared_regions,
+ FALSE, /* chain_regions */
+ ENV_DEFAULT_ROOT);
+ if (error) {
+ goto done;
}
- vp = ndp->ni_vp;
+ /* get info on the newly allocated shared region */
+ vm_get_shared_region(current_task(), &shared_region);
+ task_mapping_info.self = (vm_offset_t) shared_region;
+ shared_region_mapping_info(shared_region,
+ &(task_mapping_info.text_region),
+ &(task_mapping_info.text_size),
+ &(task_mapping_info.data_region),
+ &(task_mapping_info.data_size),
+ &(task_mapping_info.region_mappings),
+ &(task_mapping_info.client_base),
+ &(task_mapping_info.alternate_base),
+ &(task_mapping_info.alternate_next),
+ &(task_mapping_info.fs_base),
+ &(task_mapping_info.system),
+ &(task_mapping_info.flags),
+ &next);
- if (vp->v_type != VREG) {
+ /*
+ * We now have our private copy of the shared region, as it was before
+ * the call to clone_system_shared_regions(). We now need to clean it
+ * up and keep only the memory areas described by the "ranges" array.
+ */
+ kr = shared_region_cleanup(range_count, ranges, &task_mapping_info);
+ switch (kr) {
+ case KERN_SUCCESS:
+ error = 0;
+ break;
+ default:
error = EINVAL;
- goto lsf_bailout_free_vput;
+ goto done;
}
- UBCINFOCHECK("load_shared_file", vp);
-
- if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) {
- goto lsf_bailout_free_vput;
+done:
+ if (ranges != NULL) {
+ kmem_free(kernel_map,
+ (vm_offset_t) ranges,
+ ranges_size);
+ ranges = NULL;
}
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_INFO,
+ ("shared_region: %p [%d(%s)] "
+ "make_private(rangecount=%d) -> %d "
+ "shared_region=%p[%x,%x,%x]\n",
+ current_thread(), p->p_pid, p->p_comm,
+ range_count, error, shared_region,
+ task_mapping_info.fs_base,
+ task_mapping_info.system,
+ task_mapping_info.flags));
- file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
- if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
- error = EINVAL;
- goto lsf_bailout_free_vput;
+ return error;
+}
+
+
+/*
+ * shared_region_map_file_np:
+ *
+ * This system call is for "dyld" only.
+ *
+ * "dyld" wants to map parts of a split library in the shared region.
+ * We get a file descriptor on the split library to be mapped and a set
+ * of mapping instructions, describing which parts of the file to map in\
+ * which areas of the shared segment and with what protection.
+ * The "shared region" is split in 2 areas:
+ * 0x90000000 - 0xa0000000 : read-only area (for TEXT and LINKEDIT sections),
+ * 0xa0000000 - 0xb0000000 : writable area (for DATA sections).
+ *
+ */
+int
+shared_region_map_file_np(
+ struct proc *p,
+ struct shared_region_map_file_np_args *uap,
+ __unused int *retvalp)
+{
+ int error;
+ kern_return_t kr;
+ int fd;
+ unsigned int mapping_count;
+ user_addr_t user_mappings; /* 64-bit */
+ user_addr_t user_slide_p; /* 64-bit */
+ struct shared_file_mapping_np *mappings;
+ vm_size_t mappings_size;
+ struct fileproc *fp;
+ mach_vm_offset_t slide;
+ struct vnode *vp;
+ struct vfs_context context;
+ memory_object_control_t file_control;
+ memory_object_size_t file_size;
+ shared_region_mapping_t shared_region;
+ struct shared_region_task_mappings task_mapping_info;
+ shared_region_mapping_t next;
+ shared_region_mapping_t default_shared_region;
+ boolean_t using_default_region;
+ unsigned int j;
+ vm_prot_t max_prot;
+ mach_vm_offset_t base_offset, end_offset;
+ mach_vm_offset_t original_base_offset;
+ boolean_t mappings_in_segment;
+#define SFM_MAX_STACK 6
+ struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
+
+ mappings_size = 0;
+ mappings = NULL;
+ mapping_count = 0;
+ fp = NULL;
+ vp = NULL;
+
+ /* get file descriptor for split library from arguments */
+ fd = uap->fd;
+
+ /* get file structure from file descriptor */
+ error = fp_lookup(p, fd, &fp, 0);
+ if (error) {
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_ERROR,
+ ("shared_region: %p [%d(%s)] map_file: "
+ "fd=%d lookup failed (error=%d)\n",
+ current_thread(), p->p_pid, p->p_comm, fd, error));
+ goto done;
}
-#ifdef notdef
- if(vattr.va_size != mapped_file_size) {
+ /* make sure we're attempting to map a vnode */
+ if (fp->f_fglob->fg_type != DTYPE_VNODE) {
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_ERROR,
+ ("shared_region: %p [%d(%s)] map_file: "
+ "fd=%d not a vnode (type=%d)\n",
+ current_thread(), p->p_pid, p->p_comm,
+ fd, fp->f_fglob->fg_type));
error = EINVAL;
- goto lsf_bailout_free_vput;
+ goto done;
}
-#endif
-
- vm_get_shared_region(current_task(), &shared_region);
- task_mapping_info.self = (vm_offset_t)shared_region;
-
- shared_region_mapping_info(shared_region,
- &(task_mapping_info.text_region),
- &(task_mapping_info.text_size),
- &(task_mapping_info.data_region),
- &(task_mapping_info.data_size),
- &(task_mapping_info.region_mappings),
- &(task_mapping_info.client_base),
- &(task_mapping_info.alternate_base),
- &(task_mapping_info.alternate_next),
- &(task_mapping_info.flags), &next);
-
- /* This is a work-around to allow executables which have been */
- /* built without knowledge of the proper shared segment to */
- /* load. This code has been architected as a shared region */
- /* handler, the knowledge of where the regions are loaded is */
- /* problematic for the extension of shared regions as it will */
- /* not be easy to know what region an item should go into. */
- /* The code below however will get around a short term problem */
- /* with executables which believe they are loading at zero. */
- {
- if (((unsigned int)local_base &
- (~(task_mapping_info.text_size - 1))) !=
- task_mapping_info.client_base) {
- if(local_flags & ALTERNATE_LOAD_SITE) {
- local_base = (caddr_t)(
- (unsigned int)local_base &
- (task_mapping_info.text_size - 1));
- local_base = (caddr_t)((unsigned int)local_base
- | task_mapping_info.client_base);
- } else {
- error = EINVAL;
- goto lsf_bailout_free_vput;
- }
- }
+ /* we need at least read permission on the file */
+ if (! (fp->f_fglob->fg_flag & FREAD)) {
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_ERROR,
+ ("shared_region: %p [%d(%s)] map_file: "
+ "fd=%d not readable\n",
+ current_thread(), p->p_pid, p->p_comm, fd));
+ error = EPERM;
+ goto done;
}
- /* load alternate regions if the caller has requested. */
- /* Note: the new regions are "clean slates" */
-
- if (local_flags & NEW_LOCAL_SHARED_REGIONS) {
+ /* get vnode from file structure */
+ error = vnode_getwithref((vnode_t)fp->f_fglob->fg_data);
+ if (error) {
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_ERROR,
+ ("shared_region: %p [%d(%s)] map_file: "
+ "fd=%d getwithref failed (error=%d)\n",
+ current_thread(), p->p_pid, p->p_comm, fd, error));
+ goto done;
+ }
+ vp = (struct vnode *) fp->f_fglob->fg_data;
- shared_region_mapping_t new_shared_region;
- shared_region_mapping_t old_shared_region;
- struct shared_region_task_mappings old_info;
- struct shared_region_task_mappings new_info;
+ /* make sure the vnode is a regular file */
+ if (vp->v_type != VREG) {
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_ERROR,
+ ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
+ "not a file (type=%d)\n",
+ current_thread(), p->p_pid, p->p_comm,
+ vp, vp->v_name, vp->v_type));
+ error = EINVAL;
+ goto done;
+ }
- if(shared_file_create_system_region(&new_shared_region)) {
- error = ENOMEM;
- goto lsf_bailout_free_vput;
+ /* get vnode size */
+ {
+ off_t fs;
+
+ context.vc_proc = p;
+ context.vc_ucred = kauth_cred_get();
+ if ((error = vnode_size(vp, &fs, &context)) != 0) {
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_ERROR,
+ ("shared_region: %p [%d(%s)] "
+ "map_file(%p:'%s'): "
+ "vnode_size(%p) failed (error=%d)\n",
+ current_thread(), p->p_pid, p->p_comm,
+ vp, vp->v_name, vp));
+ goto done;
}
- vm_get_shared_region(current_task(), &old_shared_region);
-
- old_info.self = (vm_offset_t)old_shared_region;
- shared_region_mapping_info(old_shared_region,
- &(old_info.text_region),
- &(old_info.text_size),
- &(old_info.data_region),
- &(old_info.data_size),
- &(old_info.region_mappings),
- &(old_info.client_base),
- &(old_info.alternate_base),
- &(old_info.alternate_next),
- &(old_info.flags), &next);
- new_info.self = (vm_offset_t)new_shared_region;
- shared_region_mapping_info(new_shared_region,
- &(new_info.text_region),
- &(new_info.text_size),
- &(new_info.data_region),
- &(new_info.data_size),
- &(new_info.region_mappings),
- &(new_info.client_base),
- &(new_info.alternate_base),
- &(new_info.alternate_next),
- &(new_info.flags), &next);
- if (vm_map_region_replace(current_map(), old_info.text_region,
- new_info.text_region, old_info.client_base,
- old_info.client_base+old_info.text_size)) {
- panic("load_shared_file: shared region mis-alignment");
- shared_region_mapping_dealloc(new_shared_region);
+ file_size = fs;
+ }
+
+ /*
+ * Get the list of mappings the caller wants us to establish.
+ */
+ mapping_count = uap->mappingCount; /* the number of mappings */
+ mappings_size = (vm_size_t) (mapping_count * sizeof (mappings[0]));
+ if (mapping_count == 0) {
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_INFO,
+ ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
+ "no mappings\n",
+ current_thread(), p->p_pid, p->p_comm,
+ vp, vp->v_name));
+ error = 0; /* no mappings: we're done ! */
+ goto done;
+ } else if (mapping_count <= SFM_MAX_STACK) {
+ mappings = &stack_mappings[0];
+ } else {
+ if ((mach_vm_size_t) mappings_size !=
+ (mach_vm_size_t) mapping_count * sizeof (mappings[0])) {
+ /* 32-bit integer overflow */
error = EINVAL;
- goto lsf_bailout_free_vput;
+ goto done;
}
- if(vm_map_region_replace(current_map(), old_info.data_region,
- new_info.data_region,
- old_info.client_base + old_info.text_size,
- old_info.client_base
- + old_info.text_size + old_info.data_size)) {
- panic("load_shared_file: shared region mis-alignment 1");
- shared_region_mapping_dealloc(new_shared_region);
- error = EINVAL;
- goto lsf_bailout_free_vput;
+ kr = kmem_alloc(kernel_map,
+ (vm_offset_t *) &mappings,
+ mappings_size);
+ if (kr != KERN_SUCCESS) {
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_ERROR,
+ ("shared_region: %p [%d(%s)] "
+ "map_file(%p:'%s'): "
+ "failed to allocate %d mappings (kr=0x%x)\n",
+ current_thread(), p->p_pid, p->p_comm,
+ vp, vp->v_name, mapping_count, kr));
+ error = ENOMEM;
+ goto done;
}
- vm_set_shared_region(current_task(), new_shared_region);
- task_mapping_info = new_info;
- shared_region_mapping_dealloc(old_shared_region);
}
- if((kr = copyin_shared_file((vm_offset_t)mapped_file_addr,
- mapped_file_size,
- (vm_offset_t *)&local_base,
- map_cnt, map_list, file_control,
- &task_mapping_info, &local_flags))) {
- switch (kr) {
- case KERN_FAILURE:
+ user_mappings = uap->mappings; /* the mappings, in user space */
+ error = copyin(user_mappings,
+ mappings,
+ mappings_size);
+ if (error != 0) {
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_ERROR,
+ ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
+ "failed to copyin %d mappings (error=%d)\n",
+ current_thread(), p->p_pid, p->p_comm,
+ vp, vp->v_name, mapping_count, error));
+ goto done;
+ }
+
+ /*
+ * If the caller provides a "slide" pointer, it means they're OK
+ * with us moving the mappings around to make them fit.
+ */
+ user_slide_p = uap->slide_p;
+
+ /*
+ * Make each mapping address relative to the beginning of the
+ * shared region. Check that all mappings are in the shared region.
+ * Compute the maximum set of protections required to tell the
+ * buffer cache how we mapped the file (see call to ubc_map() below).
+ */
+ max_prot = VM_PROT_NONE;
+ base_offset = -1LL;
+ end_offset = 0;
+ mappings_in_segment = TRUE;
+ for (j = 0; j < mapping_count; j++) {
+ mach_vm_offset_t segment;
+ segment = (mappings[j].sfm_address &
+ GLOBAL_SHARED_SEGMENT_MASK);
+ if (segment != GLOBAL_SHARED_TEXT_SEGMENT &&
+ segment != GLOBAL_SHARED_DATA_SEGMENT) {
+ /* this mapping is not in the shared region... */
+ if (user_slide_p == NULL) {
+ /* ... and we can't slide it in: fail */
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_CONFLICT,
+ ("shared_region: %p [%d(%s)] "
+ "map_file(%p:'%s'): "
+ "mapping %p not in shared segment & "
+ "no sliding\n",
+ current_thread(), p->p_pid, p->p_comm,
+ vp, vp->v_name,
+ mappings[j].sfm_address));
error = EINVAL;
- break;
- case KERN_INVALID_ARGUMENT:
+ goto done;
+ }
+ if (j == 0) {
+ /* expect all mappings to be outside */
+ mappings_in_segment = FALSE;
+ } else if (mappings_in_segment != FALSE) {
+ /* other mappings were not outside: fail */
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_CONFLICT,
+ ("shared_region: %p [%d(%s)] "
+ "map_file(%p:'%s'): "
+ "mapping %p not in shared segment & "
+ "other mappings in shared segment\n",
+ current_thread(), p->p_pid, p->p_comm,
+ vp, vp->v_name,
+ mappings[j].sfm_address));
error = EINVAL;
- break;
- case KERN_INVALID_ADDRESS:
- error = EACCES;
- break;
- case KERN_PROTECTION_FAILURE:
- /* save EAUTH for authentication in this */
- /* routine */
- error = EPERM;
- break;
- case KERN_NO_SPACE:
- error = ENOMEM;
- break;
- default:
+ goto done;
+ }
+ /* we'll try and slide that mapping in the segments */
+ } else {
+ if (j == 0) {
+ /* expect all mappings to be inside */
+ mappings_in_segment = TRUE;
+ } else if (mappings_in_segment != TRUE) {
+ /* other mappings were not inside: fail */
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_CONFLICT,
+ ("shared_region: %p [%d(%s)] "
+ "map_file(%p:'%s'): "
+ "mapping %p in shared segment & "
+ "others in shared segment\n",
+ current_thread(), p->p_pid, p->p_comm,
+ vp, vp->v_name,
+ mappings[j].sfm_address));
error = EINVAL;
- };
- if((caller_flags & ALTERNATE_LOAD_SITE) && systemLogDiags) {
- printf("load_shared_file: Failed to load shared file! error: 0x%x, Base_address: 0x%x, number of mappings: %d, file_control 0x%x\n", error, local_base, map_cnt, file_control);
- for(i=0; i<map_cnt; i++) {
- printf("load_shared_file: Mapping%d, mapping_offset: 0x%x, size: 0x%x, file_offset: 0x%x, protection: 0x%x\n"
- , i, map_list[i].mapping_offset,
- map_list[i].size,
- map_list[i].file_offset,
- map_list[i].protection);
+ goto done;
}
+ /* get a relative offset inside the shared segments */
+ mappings[j].sfm_address -= GLOBAL_SHARED_TEXT_SEGMENT;
}
- } else {
- if(!(error = copyout(&local_flags, flags, sizeof (int)))) {
- error = copyout(&local_base,
- base_address, sizeof (caddr_t));
+ if ((mappings[j].sfm_address & SHARED_TEXT_REGION_MASK)
+ < base_offset) {
+ base_offset = (mappings[j].sfm_address &
+ SHARED_TEXT_REGION_MASK);
}
+ if ((mappings[j].sfm_address & SHARED_TEXT_REGION_MASK) +
+ mappings[j].sfm_size > end_offset) {
+ end_offset =
+ (mappings[j].sfm_address &
+ SHARED_TEXT_REGION_MASK) +
+ mappings[j].sfm_size;
+ }
+ max_prot |= mappings[j].sfm_max_prot;
+ }
+ /* Make all mappings relative to the base_offset */
+ base_offset = vm_map_trunc_page(base_offset);
+ end_offset = vm_map_round_page(end_offset);
+ for (j = 0; j < mapping_count; j++) {
+ mappings[j].sfm_address -= base_offset;
+ }
+ original_base_offset = base_offset;
+ if (mappings_in_segment == FALSE) {
+ /*
+ * We're trying to map a library that was not pre-bound to
+ * be in the shared segments. We want to try and slide it
+ * back into the shared segments but as far back as possible,
+ * so that it doesn't clash with pre-bound libraries. Set
+ * the base_offset to the end of the region, so that it can't
+ * possibly fit there and will have to be slid.
+ */
+ base_offset = SHARED_TEXT_REGION_SIZE - end_offset;
}
-lsf_bailout_free_vput:
- vput(vp);
-
-lsf_bailout_free:
- kmem_free(kernel_map, (vm_offset_t)filename_str,
- (vm_size_t)(MAXPATHLEN));
- kmem_free(kernel_map, (vm_offset_t)map_list,
- (vm_size_t)(map_cnt*sizeof(sf_mapping_t)));
-
-lsf_bailout:
- unix_release();
- return error;
-}
-
-struct reset_shared_file_args {
- caddr_t *ba;
- int map_cnt;
- sf_mapping_t *mappings;
-};
-
-int
-reset_shared_file(
- struct proc *p,
- struct reset_shared_file_args *uap,
- register *retval)
-{
- caddr_t *base_address=uap->ba;
- int map_cnt=uap->map_cnt;
- sf_mapping_t *mappings=uap->mappings;
- register int error;
- kern_return_t kr;
-
- sf_mapping_t *map_list;
- caddr_t local_base;
- vm_offset_t map_address;
- int i;
- kern_return_t kret;
-
+ /* get the file's memory object handle */
+ UBCINFOCHECK("shared_region_map_file_np", vp);
+ file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
+ if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_ERROR,
+ ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
+ "ubc_getobject() failed\n",
+ current_thread(), p->p_pid, p->p_comm,
+ vp, vp->v_name));
+ error = EINVAL;
+ goto done;
+ }
+ /*
+ * Get info about the current process's shared region.
+ * This might change if we decide we need to clone the shared region.
+ */
+ vm_get_shared_region(current_task(), &shared_region);
+ task_mapping_info.self = (vm_offset_t) shared_region;
+ shared_region_mapping_info(shared_region,
+ &(task_mapping_info.text_region),
+ &(task_mapping_info.text_size),
+ &(task_mapping_info.data_region),
+ &(task_mapping_info.data_size),
+ &(task_mapping_info.region_mappings),
+ &(task_mapping_info.client_base),
+ &(task_mapping_info.alternate_base),
+ &(task_mapping_info.alternate_next),
+ &(task_mapping_info.fs_base),
+ &(task_mapping_info.system),
+ &(task_mapping_info.flags),
+ &next);
+ /*
+ * Are we using the system's current shared region
+ * for this environment ?
+ */
+ default_shared_region =
+ lookup_default_shared_region(ENV_DEFAULT_ROOT,
+ task_mapping_info.system);
+ if (shared_region == default_shared_region) {
+ using_default_region = TRUE;
+ } else {
+ using_default_region = FALSE;
+ }
+ shared_region_mapping_dealloc(default_shared_region);
+
+ if (vp->v_mount != rootvnode->v_mount &&
+ using_default_region) {
+ /*
+ * The split library is not on the root filesystem. We don't
+ * want to polute the system-wide ("default") shared region
+ * with it.
+ * Reject the mapping. The caller (dyld) should "privatize"
+ * (via shared_region_make_private()) the shared region and
+ * try to establish the mapping privately for this process.
+ */
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_CONFLICT,
+ ("shared_region: %p [%d(%s)] "
+ "map_file(%p:'%s'): "
+ "not on root volume\n",
+ current_thread(), p->p_pid, p->p_comm,
+ vp->v_name));
+ error = EXDEV;
+ goto done;
+ }
- unix_master();
- /* Retrieve the base address */
- if (error = copyin(base_address, &local_base, sizeof (caddr_t))) {
- goto rsf_bailout;
- }
+ /*
+ * Map the split library.
+ */
+ kr = map_shared_file(mapping_count,
+ mappings,
+ file_control,
+ file_size,
+ &task_mapping_info,
+ base_offset,
+ (user_slide_p) ? &slide : NULL);
+
+ if (kr == KERN_SUCCESS) {
+ /*
+ * The mapping was successful. Let the buffer cache know
+ * that we've mapped that file with these protections. This
+ * prevents the vnode from getting recycled while it's mapped.
+ */
+ (void) ubc_map(vp, max_prot);
+ error = 0;
+ } else {
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_CONFLICT,
+ ("shared_region: %p [%d(%s)] "
+ "map_file(%p:'%s'): "
+ "map_shared_file failed, kr=0x%x\n",
+ current_thread(), p->p_pid, p->p_comm,
+ vp, vp->v_name, kr));
+ switch (kr) {
+ case KERN_INVALID_ADDRESS:
+ error = EFAULT;
+ goto done;
+ case KERN_PROTECTION_FAILURE:
+ error = EPERM;
+ goto done;
+ case KERN_NO_SPACE:
+ error = ENOMEM;
+ goto done;
+ case KERN_FAILURE:
+ case KERN_INVALID_ARGUMENT:
+ default:
+ error = EINVAL;
+ goto done;
+ }
+ }
- if (((unsigned int)local_base & GLOBAL_SHARED_SEGMENT_MASK)
- != GLOBAL_SHARED_TEXT_SEGMENT) {
- error = EINVAL;
- goto rsf_bailout;
+ if (p->p_flag & P_NOSHLIB) {
+ /* signal that this process is now using split libraries */
+ p->p_flag &= ~P_NOSHLIB;
}
- kret = kmem_alloc(kernel_map, (vm_offset_t *)&map_list,
- (vm_size_t)(map_cnt*sizeof(sf_mapping_t)));
- if (kret != KERN_SUCCESS) {
- error = ENOMEM;
- goto rsf_bailout;
+ if (user_slide_p) {
+ /*
+ * The caller provided a pointer to a "slide" offset. Let
+ * them know by how much we slid the mappings.
+ */
+ if (mappings_in_segment == FALSE) {
+ /*
+ * We faked the base_offset earlier, so undo that
+ * and take into account the real base_offset.
+ */
+ slide += SHARED_TEXT_REGION_SIZE - end_offset;
+ slide -= original_base_offset;
+ /*
+ * The mappings were slid into the shared segments
+ * and "slide" is relative to the beginning of the
+ * shared segments. Adjust it to be absolute.
+ */
+ slide += GLOBAL_SHARED_TEXT_SEGMENT;
}
-
- if (error =
- copyin(mappings, map_list, (map_cnt*sizeof(sf_mapping_t)))) {
-
- kmem_free(kernel_map, (vm_offset_t)map_list,
- (vm_size_t)(map_cnt*sizeof(sf_mapping_t)));
- goto rsf_bailout;
- }
- for (i = 0; i<map_cnt; i++) {
- if((map_list[i].mapping_offset
- & GLOBAL_SHARED_SEGMENT_MASK) == 0x10000000) {
- map_address = (vm_offset_t)
- (local_base + map_list[i].mapping_offset);
- vm_deallocate(current_map(),
- map_address,
- map_list[i].size);
- vm_map(current_map(), &map_address,
- map_list[i].size, 0, SHARED_LIB_ALIAS,
- shared_data_region_handle,
- ((unsigned int)local_base
- & SHARED_DATA_REGION_MASK) +
- (map_list[i].mapping_offset
- & SHARED_DATA_REGION_MASK),
- TRUE, VM_PROT_READ,
- VM_PROT_READ, VM_INHERIT_SHARE);
+ error = copyout(&slide,
+ user_slide_p,
+ sizeof (slide));
+ if (slide != 0) {
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_CONFLICT,
+ ("shared_region: %p [%d(%s)] "
+ "map_file(%p:'%s'): "
+ "slid by 0x%llx\n",
+ current_thread(), p->p_pid, p->p_comm,
+ vp, vp->v_name, slide));
}
}
- kmem_free(kernel_map, (vm_offset_t)map_list,
- (vm_size_t)(map_cnt*sizeof(sf_mapping_t)));
+done:
+ if (vp != NULL) {
+ /*
+ * release the vnode...
+ * ubc_map() still holds it for us in the non-error case
+ */
+ (void) vnode_put(vp);
+ vp = NULL;
+ }
+ if (fp != NULL) {
+ /* release the file descriptor */
+ fp_drop(p, fd, fp, 0);
+ fp = NULL;
+ }
+ if (mappings != NULL &&
+ mappings != &stack_mappings[0]) {
+ kmem_free(kernel_map,
+ (vm_offset_t) mappings,
+ mappings_size);
+ }
+ mappings = NULL;
-rsf_bailout:
- unix_release();
return error;
}
+int
+load_shared_file(
+ __unused struct proc *p,
+ __unused struct load_shared_file_args *uap,
+ __unused int *retval)
+{
+ return ENOSYS;
+}
+
+int
+reset_shared_file(
+ __unused struct proc *p,
+ __unused struct reset_shared_file_args *uap,
+ __unused int *retval)
+{
+ return ENOSYS;
+}
+
+int
+new_system_shared_regions(
+ __unused struct proc *p,
+ __unused struct new_system_shared_regions_args *uap,
+ __unused int *retval)
+{
+ return ENOSYS;
+}
int
-clone_system_shared_regions()
+clone_system_shared_regions(
+ int shared_regions_active,
+ int chain_regions,
+ int base_vnode)
{
shared_region_mapping_t new_shared_region;
shared_region_mapping_t next;
struct shared_region_task_mappings old_info;
struct shared_region_task_mappings new_info;
- if (shared_file_create_system_region(&new_shared_region))
- return (ENOMEM);
vm_get_shared_region(current_task(), &old_shared_region);
old_info.self = (vm_offset_t)old_shared_region;
shared_region_mapping_info(old_shared_region,
&(old_info.client_base),
&(old_info.alternate_base),
&(old_info.alternate_next),
+ &(old_info.fs_base),
+ &(old_info.system),
&(old_info.flags), &next);
+
+ if (shared_regions_active ||
+ base_vnode == ENV_DEFAULT_ROOT) {
+ if (shared_file_create_system_region(&new_shared_region,
+ old_info.fs_base,
+ old_info.system))
+ return ENOMEM;
+ } else {
+ if (old_shared_region &&
+ base_vnode == ENV_DEFAULT_ROOT) {
+ base_vnode = old_info.fs_base;
+ }
+ new_shared_region =
+ lookup_default_shared_region(base_vnode,
+ old_info.system);
+ if (new_shared_region == NULL) {
+ shared_file_boot_time_init(base_vnode,
+ old_info.system);
+ vm_get_shared_region(current_task(),
+ &new_shared_region);
+ } else {
+ vm_set_shared_region(current_task(), new_shared_region);
+ }
+ if (old_shared_region)
+ shared_region_mapping_dealloc(old_shared_region);
+ }
new_info.self = (vm_offset_t)new_shared_region;
shared_region_mapping_info(new_shared_region,
&(new_info.text_region),
&(new_info.client_base),
&(new_info.alternate_base),
&(new_info.alternate_next),
+ &(new_info.fs_base),
+ &(new_info.system),
&(new_info.flags), &next);
- if(vm_region_clone(old_info.text_region, new_info.text_region)) {
- panic("clone_system_shared_regions: shared region mis-alignment 1");
+ if(shared_regions_active) {
+ if(vm_region_clone(old_info.text_region, new_info.text_region)) {
+ panic("clone_system_shared_regions: shared region mis-alignment 1");
shared_region_mapping_dealloc(new_shared_region);
return(EINVAL);
- }
- if (vm_region_clone(old_info.data_region, new_info.data_region)) {
- panic("clone_system_shared_regions: shared region mis-alignment 2");
+ }
+ if (vm_region_clone(old_info.data_region, new_info.data_region)) {
+ panic("clone_system_shared_regions: shared region mis-alignment 2");
shared_region_mapping_dealloc(new_shared_region);
return(EINVAL);
+ }
+ if (chain_regions) {
+ /*
+ * We want a "shadowed" clone, a private superset of the old
+ * shared region. The info about the old mappings is still
+ * valid for us.
+ */
+ shared_region_object_chain_attach(
+ new_shared_region, old_shared_region);
+ } else {
+ /*
+ * We want a completely detached clone with no link to
+ * the old shared region. We'll be removing some mappings
+ * in our private, cloned, shared region, so the old mappings
+ * will become irrelevant to us. Since we have a private
+ * "shared region" now, it isn't going to be shared with
+ * anyone else and we won't need to maintain mappings info.
+ */
+ shared_region_object_chain_detached(new_shared_region);
+ }
}
if (vm_map_region_replace(current_map(), old_info.text_region,
new_info.text_region, old_info.client_base,
return(EINVAL);
}
vm_set_shared_region(current_task(), new_shared_region);
- shared_region_object_chain_attach(new_shared_region, old_shared_region);
+
+ /* consume the reference which wasn't accounted for in object */
+ /* chain attach */
+ if (!shared_regions_active || !chain_regions)
+ shared_region_mapping_dealloc(old_shared_region);
+
+ SHARED_REGION_TRACE(
+ SHARED_REGION_TRACE_INFO,
+ ("shared_region: %p task=%p "
+ "clone(active=%d, base=0x%x,chain=%d) "
+ "old=%p[%x,%x,%x] new=%p[%x,%x,%x]\n",
+ current_thread(), current_task(),
+ shared_regions_active, base_vnode, chain_regions,
+ old_shared_region,
+ old_info.fs_base,
+ old_info.system,
+ old_info.flags,
+ new_shared_region,
+ new_info.fs_base,
+ new_info.system,
+ new_info.flags));
+
return(0);
}
+
+/* header for the profile name file. The profiled app info is held */
+/* in the data file and pointed to by elements in the name file */
+
+struct profile_names_header {
+ unsigned int number_of_profiles;
+ unsigned int user_id;
+ unsigned int version;
+ off_t element_array;
+ unsigned int spare1;
+ unsigned int spare2;
+ unsigned int spare3;
+};
+
+struct profile_element {
+ off_t addr;
+ vm_size_t size;
+ unsigned int mod_date;
+ unsigned int inode;
+ char name[12];
+};
+
+struct global_profile {
+ struct vnode *names_vp;
+ struct vnode *data_vp;
+ vm_offset_t buf_ptr;
+ unsigned int user;
+ unsigned int age;
+ unsigned int busy;
+};
+
+struct global_profile_cache {
+ int max_ele;
+ unsigned int age;
+ struct global_profile profiles[3];
+};
+
+/* forward declarations */
+int bsd_open_page_cache_files(unsigned int user,
+ struct global_profile **profile);
+void bsd_close_page_cache_files(struct global_profile *profile);
+int bsd_search_page_cache_data_base(
+ struct vnode *vp,
+ struct profile_names_header *database,
+ char *app_name,
+ unsigned int mod_date,
+ unsigned int inode,
+ off_t *profile,
+ unsigned int *profile_size);
+
+struct global_profile_cache global_user_profile_cache =
+ {3, 0, {{NULL, NULL, 0, 0, 0, 0},
+ {NULL, NULL, 0, 0, 0, 0},
+ {NULL, NULL, 0, 0, 0, 0}} };
+
+/* BSD_OPEN_PAGE_CACHE_FILES: */
+/* Caller provides a user id. This id was used in */
+/* prepare_profile_database to create two unique absolute */
+/* file paths to the associated profile files. These files */
+/* are either opened or bsd_open_page_cache_files returns an */
+/* error. The header of the names file is then consulted. */
+/* The header and the vnodes for the names and data files are */
+/* returned. */
+
+int
+bsd_open_page_cache_files(
+ unsigned int user,
+ struct global_profile **profile)
+{
+ const char *cache_path = "/var/vm/app_profile/";
+ struct proc *p;
+ int error;
+ vm_size_t resid;
+ off_t resid_off;
+ unsigned int lru;
+ vm_size_t size;
+
+ struct vnode *names_vp;
+ struct vnode *data_vp;
+ vm_offset_t names_buf;
+ vm_offset_t buf_ptr;
+
+ int profile_names_length;
+ int profile_data_length;
+ char *profile_data_string;
+ char *profile_names_string;
+ char *substring;
+
+ off_t file_size;
+ struct vfs_context context;
+
+ kern_return_t ret;
+
+ struct nameidata nd_names;
+ struct nameidata nd_data;
+ int i;
+
+
+ p = current_proc();
+
+ context.vc_proc = p;
+ context.vc_ucred = kauth_cred_get();
+
+restart:
+ for(i = 0; i<global_user_profile_cache.max_ele; i++) {
+ if((global_user_profile_cache.profiles[i].user == user)
+ && (global_user_profile_cache.profiles[i].data_vp
+ != NULL)) {
+ *profile = &global_user_profile_cache.profiles[i];
+ /* already in cache, we're done */
+ if ((*profile)->busy) {
+ /*
+ * drop funnel and wait
+ */
+ (void)tsleep((void *)
+ *profile,
+ PRIBIO, "app_profile", 0);
+ goto restart;
+ }
+ (*profile)->busy = 1;
+ (*profile)->age = global_user_profile_cache.age;
+
+ /*
+ * entries in cache are held with a valid
+ * usecount... take an iocount which will
+ * be dropped in "bsd_close_page_cache_files"
+ * which is called after the read or writes to
+ * these files are done
+ */
+ if ( (vnode_getwithref((*profile)->data_vp)) ) {
+
+ vnode_rele((*profile)->data_vp);
+ vnode_rele((*profile)->names_vp);
+
+ (*profile)->data_vp = NULL;
+ (*profile)->busy = 0;
+ wakeup(*profile);
+
+ goto restart;
+ }
+ if ( (vnode_getwithref((*profile)->names_vp)) ) {
+
+ vnode_put((*profile)->data_vp);
+ vnode_rele((*profile)->data_vp);
+ vnode_rele((*profile)->names_vp);
+
+ (*profile)->data_vp = NULL;
+ (*profile)->busy = 0;
+ wakeup(*profile);
+
+ goto restart;
+ }
+ global_user_profile_cache.age+=1;
+ return 0;
+ }
+ }
+
+ lru = global_user_profile_cache.age;
+ *profile = NULL;
+ for(i = 0; i<global_user_profile_cache.max_ele; i++) {
+ /* Skip entry if it is in the process of being reused */
+ if(global_user_profile_cache.profiles[i].data_vp ==
+ (struct vnode *)0xFFFFFFFF)
+ continue;
+ /* Otherwise grab the first empty entry */
+ if(global_user_profile_cache.profiles[i].data_vp == NULL) {
+ *profile = &global_user_profile_cache.profiles[i];
+ (*profile)->age = global_user_profile_cache.age;
+ break;
+ }
+ /* Otherwise grab the oldest entry */
+ if(global_user_profile_cache.profiles[i].age < lru) {
+ lru = global_user_profile_cache.profiles[i].age;
+ *profile = &global_user_profile_cache.profiles[i];
+ }
+ }
+
+ /* Did we set it? */
+ if (*profile == NULL) {
+ /*
+ * No entries are available; this can only happen if all
+ * of them are currently in the process of being reused;
+ * if this happens, we sleep on the address of the first
+ * element, and restart. This is less than ideal, but we
+ * know it will work because we know that there will be a
+ * wakeup on any entry currently in the process of being
+ * reused.
+ *
+ * XXX Reccomend a two handed clock and more than 3 total
+ * XXX cache entries at some point in the future.
+ */
+ /*
+ * drop funnel and wait
+ */
+ (void)tsleep((void *)
+ &global_user_profile_cache.profiles[0],
+ PRIBIO, "app_profile", 0);
+ goto restart;
+ }
+
+ /*
+ * If it's currently busy, we've picked the one at the end of the
+ * LRU list, but it's currently being actively used. We sleep on
+ * its address and restart.
+ */
+ if ((*profile)->busy) {
+ /*
+ * drop funnel and wait
+ */
+ (void)tsleep((void *)
+ *profile,
+ PRIBIO, "app_profile", 0);
+ goto restart;
+ }
+ (*profile)->busy = 1;
+ (*profile)->user = user;
+
+ /*
+ * put dummy value in for now to get competing request to wait
+ * above until we are finished
+ *
+ * Save the data_vp before setting it, so we can set it before
+ * we kmem_free() or vrele(). If we don't do this, then we
+ * have a potential funnel race condition we have to deal with.
+ */
+ data_vp = (*profile)->data_vp;
+ (*profile)->data_vp = (struct vnode *)0xFFFFFFFF;
+
+ /*
+ * Age the cache here in all cases; this guarantees that we won't
+ * be reusing only one entry over and over, once the system reaches
+ * steady-state.
+ */
+ global_user_profile_cache.age+=1;
+
+ if(data_vp != NULL) {
+ kmem_free(kernel_map,
+ (*profile)->buf_ptr, 4 * PAGE_SIZE);
+ if ((*profile)->names_vp) {
+ vnode_rele((*profile)->names_vp);
+ (*profile)->names_vp = NULL;
+ }
+ vnode_rele(data_vp);
+ }
+
+ /* Try to open the appropriate users profile files */
+ /* If neither file is present, try to create them */
+ /* If one file is present and the other not, fail. */
+ /* If the files do exist, check them for the app_file */
+ /* requested and read it in if present */
+
+ ret = kmem_alloc(kernel_map,
+ (vm_offset_t *)&profile_data_string, PATH_MAX);
+
+ if(ret) {
+ (*profile)->data_vp = NULL;
+ (*profile)->busy = 0;
+ wakeup(*profile);
+ return ENOMEM;
+ }
+
+ /* Split the buffer in half since we know the size of */
+ /* our file path and our allocation is adequate for */
+ /* both file path names */
+ profile_names_string = profile_data_string + (PATH_MAX/2);
+
+
+ strcpy(profile_data_string, cache_path);
+ strcpy(profile_names_string, cache_path);
+ profile_names_length = profile_data_length
+ = strlen(profile_data_string);
+ substring = profile_data_string + profile_data_length;
+ sprintf(substring, "%x_data", user);
+ substring = profile_names_string + profile_names_length;
+ sprintf(substring, "%x_names", user);
+
+ /* We now have the absolute file names */
+
+ ret = kmem_alloc(kernel_map,
+ (vm_offset_t *)&names_buf, 4 * PAGE_SIZE);
+ if(ret) {
+ kmem_free(kernel_map,
+ (vm_offset_t)profile_data_string, PATH_MAX);
+ (*profile)->data_vp = NULL;
+ (*profile)->busy = 0;
+ wakeup(*profile);
+ return ENOMEM;
+ }
+
+ NDINIT(&nd_names, LOOKUP, FOLLOW | LOCKLEAF,
+ UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_names_string), &context);
+ NDINIT(&nd_data, LOOKUP, FOLLOW | LOCKLEAF,
+ UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_data_string), &context);
+
+ if ( (error = vn_open(&nd_data, FREAD | FWRITE, 0)) ) {
+#ifdef notdef
+ printf("bsd_open_page_cache_files: CacheData file not found %s\n",
+ profile_data_string);
+#endif
+ kmem_free(kernel_map,
+ (vm_offset_t)names_buf, 4 * PAGE_SIZE);
+ kmem_free(kernel_map,
+ (vm_offset_t)profile_data_string, PATH_MAX);
+ (*profile)->data_vp = NULL;
+ (*profile)->busy = 0;
+ wakeup(*profile);
+ return error;
+ }
+ data_vp = nd_data.ni_vp;
+
+ if ( (error = vn_open(&nd_names, FREAD | FWRITE, 0)) ) {
+ printf("bsd_open_page_cache_files: NamesData file not found %s\n",
+ profile_data_string);
+ kmem_free(kernel_map,
+ (vm_offset_t)names_buf, 4 * PAGE_SIZE);
+ kmem_free(kernel_map,
+ (vm_offset_t)profile_data_string, PATH_MAX);
+
+ vnode_rele(data_vp);
+ vnode_put(data_vp);
+
+ (*profile)->data_vp = NULL;
+ (*profile)->busy = 0;
+ wakeup(*profile);
+ return error;
+ }
+ names_vp = nd_names.ni_vp;
+
+ if ((error = vnode_size(names_vp, &file_size, &context)) != 0) {
+ printf("bsd_open_page_cache_files: Can't stat name file %s\n", profile_names_string);
+ kmem_free(kernel_map,
+ (vm_offset_t)profile_data_string, PATH_MAX);
+ kmem_free(kernel_map,
+ (vm_offset_t)names_buf, 4 * PAGE_SIZE);
+
+ vnode_rele(names_vp);
+ vnode_put(names_vp);
+ vnode_rele(data_vp);
+ vnode_put(data_vp);
+
+ (*profile)->data_vp = NULL;
+ (*profile)->busy = 0;
+ wakeup(*profile);
+ return error;
+ }
+
+ size = file_size;
+ if(size > 4 * PAGE_SIZE)
+ size = 4 * PAGE_SIZE;
+ buf_ptr = names_buf;
+ resid_off = 0;
+
+ while(size) {
+ int resid_int;
+ error = vn_rdwr(UIO_READ, names_vp, (caddr_t)buf_ptr,
+ size, resid_off,
+ UIO_SYSSPACE32, IO_NODELOCKED, kauth_cred_get(),
+ &resid_int, p);
+ resid = (vm_size_t) resid_int;
+ if((error) || (size == resid)) {
+ if(!error) {
+ error = EINVAL;
+ }
+ kmem_free(kernel_map,
+ (vm_offset_t)profile_data_string, PATH_MAX);
+ kmem_free(kernel_map,
+ (vm_offset_t)names_buf, 4 * PAGE_SIZE);
+
+ vnode_rele(names_vp);
+ vnode_put(names_vp);
+ vnode_rele(data_vp);
+ vnode_put(data_vp);
+
+ (*profile)->data_vp = NULL;
+ (*profile)->busy = 0;
+ wakeup(*profile);
+ return error;
+ }
+ buf_ptr += size-resid;
+ resid_off += size-resid;
+ size = resid;
+ }
+ kmem_free(kernel_map, (vm_offset_t)profile_data_string, PATH_MAX);
+
+ (*profile)->names_vp = names_vp;
+ (*profile)->data_vp = data_vp;
+ (*profile)->buf_ptr = names_buf;
+
+ /*
+ * at this point, the both the names_vp and the data_vp have
+ * both a valid usecount and an iocount held
+ */
+ return 0;
+
+}
+
+void
+bsd_close_page_cache_files(
+ struct global_profile *profile)
+{
+ vnode_put(profile->data_vp);
+ vnode_put(profile->names_vp);
+
+ profile->busy = 0;
+ wakeup(profile);
+}
+
+int
+bsd_read_page_cache_file(
+ unsigned int user,
+ int *fid,
+ int *mod,
+ char *app_name,
+ struct vnode *app_vp,
+ vm_offset_t *buffer,
+ vm_offset_t *bufsize)
+{
+
+ boolean_t funnel_state;
+
+ struct proc *p;
+ int error;
+ unsigned int resid;
+
+ off_t profile;
+ unsigned int profile_size;
+
+ vm_offset_t names_buf;
+ struct vnode_attr va;
+ struct vfs_context context;
+
+ kern_return_t ret;
+
+ struct vnode *names_vp;
+ struct vnode *data_vp;
+
+ struct global_profile *uid_files;
+
+ funnel_state = thread_funnel_set(kernel_flock, TRUE);
+
+ /* Try to open the appropriate users profile files */
+ /* If neither file is present, try to create them */
+ /* If one file is present and the other not, fail. */
+ /* If the files do exist, check them for the app_file */
+ /* requested and read it in if present */
+
+
+ error = bsd_open_page_cache_files(user, &uid_files);
+ if(error) {
+ thread_funnel_set(kernel_flock, funnel_state);
+ return EINVAL;
+ }
+
+ p = current_proc();
+
+ names_vp = uid_files->names_vp;
+ data_vp = uid_files->data_vp;
+ names_buf = uid_files->buf_ptr;
+
+ context.vc_proc = p;
+ context.vc_ucred = kauth_cred_get();
+
+ VATTR_INIT(&va);
+ VATTR_WANTED(&va, va_fileid);
+ VATTR_WANTED(&va, va_modify_time);
+
+ if ((error = vnode_getattr(app_vp, &va, &context))) {
+ printf("bsd_read_cache_file: Can't stat app file %s\n", app_name);
+ bsd_close_page_cache_files(uid_files);
+ thread_funnel_set(kernel_flock, funnel_state);
+ return error;
+ }
+
+ *fid = (u_long)va.va_fileid;
+ *mod = va.va_modify_time.tv_sec;
+
+ if (bsd_search_page_cache_data_base(
+ names_vp,
+ (struct profile_names_header *)names_buf,
+ app_name,
+ (unsigned int) va.va_modify_time.tv_sec,
+ (u_long)va.va_fileid, &profile, &profile_size) == 0) {
+ /* profile is an offset in the profile data base */
+ /* It is zero if no profile data was found */
+
+ if(profile_size == 0) {
+ *buffer = 0;
+ *bufsize = 0;
+ bsd_close_page_cache_files(uid_files);
+ thread_funnel_set(kernel_flock, funnel_state);
+ return 0;
+ }
+ ret = (vm_offset_t)(kmem_alloc(kernel_map, buffer, profile_size));
+ if(ret) {
+ bsd_close_page_cache_files(uid_files);
+ thread_funnel_set(kernel_flock, funnel_state);
+ return ENOMEM;
+ }
+ *bufsize = profile_size;
+ while(profile_size) {
+ int resid_int;
+ error = vn_rdwr(UIO_READ, data_vp,
+ (caddr_t) *buffer, profile_size,
+ profile, UIO_SYSSPACE32, IO_NODELOCKED,
+ kauth_cred_get(), &resid_int, p);
+ resid = (vm_size_t) resid_int;
+ if((error) || (profile_size == resid)) {
+ bsd_close_page_cache_files(uid_files);
+ kmem_free(kernel_map, (vm_offset_t)*buffer, profile_size);
+ thread_funnel_set(kernel_flock, funnel_state);
+ return EINVAL;
+ }
+ profile += profile_size - resid;
+ profile_size = resid;
+ }
+ bsd_close_page_cache_files(uid_files);
+ thread_funnel_set(kernel_flock, funnel_state);
+ return 0;
+ } else {
+ bsd_close_page_cache_files(uid_files);
+ thread_funnel_set(kernel_flock, funnel_state);
+ return EINVAL;
+ }
+
+}
+
+int
+bsd_search_page_cache_data_base(
+ struct vnode *vp,
+ struct profile_names_header *database,
+ char *app_name,
+ unsigned int mod_date,
+ unsigned int inode,
+ off_t *profile,
+ unsigned int *profile_size)
+{
+
+ struct proc *p;
+
+ unsigned int i;
+ struct profile_element *element;
+ unsigned int ele_total;
+ unsigned int extended_list = 0;
+ off_t file_off = 0;
+ unsigned int size;
+ off_t resid_off;
+ unsigned int resid;
+ vm_offset_t local_buf = 0;
+
+ int error;
+ kern_return_t ret;
+
+ p = current_proc();
+
+ if(((vm_offset_t)database->element_array) !=
+ sizeof(struct profile_names_header)) {
+ return EINVAL;
+ }
+ element = (struct profile_element *)(
+ (vm_offset_t)database->element_array +
+ (vm_offset_t)database);
+
+ ele_total = database->number_of_profiles;
+
+ *profile = 0;
+ *profile_size = 0;
+ while(ele_total) {
+ /* note: code assumes header + n*ele comes out on a page boundary */
+ if(((local_buf == 0) && (sizeof(struct profile_names_header) +
+ (ele_total * sizeof(struct profile_element)))
+ > (PAGE_SIZE * 4)) ||
+ ((local_buf != 0) &&
+ (ele_total * sizeof(struct profile_element))
+ > (PAGE_SIZE * 4))) {
+ extended_list = ele_total;
+ if(element == (struct profile_element *)
+ ((vm_offset_t)database->element_array +
+ (vm_offset_t)database)) {
+ ele_total = ((PAGE_SIZE * 4)/sizeof(struct profile_element)) - 1;
+ } else {
+ ele_total = (PAGE_SIZE * 4)/sizeof(struct profile_element);
+ }
+ extended_list -= ele_total;
+ }
+ for (i=0; i<ele_total; i++) {
+ if((mod_date == element[i].mod_date)
+ && (inode == element[i].inode)) {
+ if(strncmp(element[i].name, app_name, 12) == 0) {
+ *profile = element[i].addr;
+ *profile_size = element[i].size;
+ if(local_buf != 0) {
+ kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
+ }
+ return 0;
+ }
+ }
+ }
+ if(extended_list == 0)
+ break;
+ if(local_buf == 0) {
+ ret = kmem_alloc(kernel_map, &local_buf, 4 * PAGE_SIZE);
+ if(ret != KERN_SUCCESS) {
+ return ENOMEM;
+ }
+ }
+ element = (struct profile_element *)local_buf;
+ ele_total = extended_list;
+ extended_list = 0;
+ file_off += 4 * PAGE_SIZE;
+ if((ele_total * sizeof(struct profile_element)) >
+ (PAGE_SIZE * 4)) {
+ size = PAGE_SIZE * 4;
+ } else {
+ size = ele_total * sizeof(struct profile_element);
+ }
+ resid_off = 0;
+ while(size) {
+ int resid_int;
+ error = vn_rdwr(UIO_READ, vp,
+ CAST_DOWN(caddr_t, (local_buf + resid_off)),
+ size, file_off + resid_off, UIO_SYSSPACE32,
+ IO_NODELOCKED, kauth_cred_get(), &resid_int, p);
+ resid = (vm_size_t) resid_int;
+ if((error) || (size == resid)) {
+ if(local_buf != 0) {
+ kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
+ }
+ return EINVAL;
+ }
+ resid_off += size-resid;
+ size = resid;
+ }
+ }
+ if(local_buf != 0) {
+ kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
+ }
+ return 0;
+}
+
+int
+bsd_write_page_cache_file(
+ unsigned int user,
+ char *file_name,
+ caddr_t buffer,
+ vm_size_t size,
+ int mod,
+ int fid)
+{
+ struct proc *p;
+ int resid;
+ off_t resid_off;
+ int error;
+ boolean_t funnel_state;
+ off_t file_size;
+ struct vfs_context context;
+ off_t profile;
+ unsigned int profile_size;
+
+ vm_offset_t names_buf;
+ struct vnode *names_vp;
+ struct vnode *data_vp;
+ struct profile_names_header *profile_header;
+ off_t name_offset;
+ struct global_profile *uid_files;
+
+
+ funnel_state = thread_funnel_set(kernel_flock, TRUE);
+
+
+ error = bsd_open_page_cache_files(user, &uid_files);
+ if(error) {
+ thread_funnel_set(kernel_flock, funnel_state);
+ return EINVAL;
+ }
+
+ p = current_proc();
+
+ names_vp = uid_files->names_vp;
+ data_vp = uid_files->data_vp;
+ names_buf = uid_files->buf_ptr;
+
+ /* Stat data file for size */
+
+ context.vc_proc = p;
+ context.vc_ucred = kauth_cred_get();
+
+ if ((error = vnode_size(data_vp, &file_size, &context)) != 0) {
+ printf("bsd_write_page_cache_file: Can't stat profile data %s\n", file_name);
+ bsd_close_page_cache_files(uid_files);
+ thread_funnel_set(kernel_flock, funnel_state);
+ return error;
+ }
+
+ if (bsd_search_page_cache_data_base(names_vp,
+ (struct profile_names_header *)names_buf,
+ file_name, (unsigned int) mod,
+ fid, &profile, &profile_size) == 0) {
+ /* profile is an offset in the profile data base */
+ /* It is zero if no profile data was found */
+
+ if(profile_size == 0) {
+ unsigned int header_size;
+ vm_offset_t buf_ptr;
+
+ /* Our Write case */
+
+ /* read header for last entry */
+ profile_header =
+ (struct profile_names_header *)names_buf;
+ name_offset = sizeof(struct profile_names_header) +
+ (sizeof(struct profile_element)
+ * profile_header->number_of_profiles);
+ profile_header->number_of_profiles += 1;
+
+ if(name_offset < PAGE_SIZE * 4) {
+ struct profile_element *name;
+ /* write new entry */
+ name = (struct profile_element *)
+ (names_buf + (vm_offset_t)name_offset);
+ name->addr = file_size;
+ name->size = size;
+ name->mod_date = mod;
+ name->inode = fid;
+ strncpy (name->name, file_name, 12);
+ } else {
+ unsigned int ele_size;
+ struct profile_element name;
+ /* write new entry */
+ name.addr = file_size;
+ name.size = size;
+ name.mod_date = mod;
+ name.inode = fid;
+ strncpy (name.name, file_name, 12);
+ /* write element out separately */
+ ele_size = sizeof(struct profile_element);
+ buf_ptr = (vm_offset_t)&name;
+ resid_off = name_offset;
+
+ while(ele_size) {
+ error = vn_rdwr(UIO_WRITE, names_vp,
+ (caddr_t)buf_ptr,
+ ele_size, resid_off,
+ UIO_SYSSPACE32, IO_NODELOCKED,
+ kauth_cred_get(), &resid, p);
+ if(error) {
+ printf("bsd_write_page_cache_file: Can't write name_element %x\n", user);
+ bsd_close_page_cache_files(
+ uid_files);
+ thread_funnel_set(
+ kernel_flock,
+ funnel_state);
+ return error;
+ }
+ buf_ptr += (vm_offset_t)
+ ele_size-resid;
+ resid_off += ele_size-resid;
+ ele_size = resid;
+ }
+ }
+
+ if(name_offset < PAGE_SIZE * 4) {
+ header_size = name_offset +
+ sizeof(struct profile_element);
+
+ } else {
+ header_size =
+ sizeof(struct profile_names_header);
+ }
+ buf_ptr = (vm_offset_t)profile_header;
+ resid_off = 0;
+
+ /* write names file header */
+ while(header_size) {
+ error = vn_rdwr(UIO_WRITE, names_vp,
+ (caddr_t)buf_ptr,
+ header_size, resid_off,
+ UIO_SYSSPACE32, IO_NODELOCKED,
+ kauth_cred_get(), &resid, p);
+ if(error) {
+ printf("bsd_write_page_cache_file: Can't write header %x\n", user);
+ bsd_close_page_cache_files(
+ uid_files);
+ thread_funnel_set(
+ kernel_flock, funnel_state);
+ return error;
+ }
+ buf_ptr += (vm_offset_t)header_size-resid;
+ resid_off += header_size-resid;
+ header_size = resid;
+ }
+ /* write profile to data file */
+ resid_off = file_size;
+ while(size) {
+ error = vn_rdwr(UIO_WRITE, data_vp,
+ (caddr_t)buffer, size, resid_off,
+ UIO_SYSSPACE32, IO_NODELOCKED,
+ kauth_cred_get(), &resid, p);
+ if(error) {
+ printf("bsd_write_page_cache_file: Can't write header %x\n", user);
+ bsd_close_page_cache_files(
+ uid_files);
+ thread_funnel_set(
+ kernel_flock, funnel_state);
+ return error;
+ }
+ buffer += size-resid;
+ resid_off += size-resid;
+ size = resid;
+ }
+ bsd_close_page_cache_files(uid_files);
+ thread_funnel_set(kernel_flock, funnel_state);
+ return 0;
+ }
+ /* Someone else wrote a twin profile before us */
+ bsd_close_page_cache_files(uid_files);
+ thread_funnel_set(kernel_flock, funnel_state);
+ return 0;
+ } else {
+ bsd_close_page_cache_files(uid_files);
+ thread_funnel_set(kernel_flock, funnel_state);
+ return EINVAL;
+ }
+
+}
+
+int
+prepare_profile_database(int user)
+{
+ const char *cache_path = "/var/vm/app_profile/";
+ struct proc *p;
+ int error;
+ int resid;
+ off_t resid_off;
+ vm_size_t size;
+
+ struct vnode *names_vp;
+ struct vnode *data_vp;
+ vm_offset_t names_buf;
+ vm_offset_t buf_ptr;
+
+ int profile_names_length;
+ int profile_data_length;
+ char *profile_data_string;
+ char *profile_names_string;
+ char *substring;
+
+ struct vnode_attr va;
+ struct vfs_context context;
+
+ struct profile_names_header *profile_header;
+ kern_return_t ret;
+
+ struct nameidata nd_names;
+ struct nameidata nd_data;
+
+ p = current_proc();
+
+ context.vc_proc = p;
+ context.vc_ucred = kauth_cred_get();
+
+ ret = kmem_alloc(kernel_map,
+ (vm_offset_t *)&profile_data_string, PATH_MAX);
+
+ if(ret) {
+ return ENOMEM;
+ }
+
+ /* Split the buffer in half since we know the size of */
+ /* our file path and our allocation is adequate for */
+ /* both file path names */
+ profile_names_string = profile_data_string + (PATH_MAX/2);
+
+
+ strcpy(profile_data_string, cache_path);
+ strcpy(profile_names_string, cache_path);
+ profile_names_length = profile_data_length
+ = strlen(profile_data_string);
+ substring = profile_data_string + profile_data_length;
+ sprintf(substring, "%x_data", user);
+ substring = profile_names_string + profile_names_length;
+ sprintf(substring, "%x_names", user);
+
+ /* We now have the absolute file names */
+
+ ret = kmem_alloc(kernel_map,
+ (vm_offset_t *)&names_buf, 4 * PAGE_SIZE);
+ if(ret) {
+ kmem_free(kernel_map,
+ (vm_offset_t)profile_data_string, PATH_MAX);
+ return ENOMEM;
+ }
+
+ NDINIT(&nd_names, LOOKUP, FOLLOW,
+ UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_names_string), &context);
+ NDINIT(&nd_data, LOOKUP, FOLLOW,
+ UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_data_string), &context);
+
+ if ( (error = vn_open(&nd_data,
+ O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) ) {
+ kmem_free(kernel_map,
+ (vm_offset_t)names_buf, 4 * PAGE_SIZE);
+ kmem_free(kernel_map,
+ (vm_offset_t)profile_data_string, PATH_MAX);
+
+ return 0;
+ }
+ data_vp = nd_data.ni_vp;
+
+ if ( (error = vn_open(&nd_names,
+ O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) ) {
+ printf("prepare_profile_database: Can't create CacheNames %s\n",
+ profile_data_string);
+ kmem_free(kernel_map,
+ (vm_offset_t)names_buf, 4 * PAGE_SIZE);
+ kmem_free(kernel_map,
+ (vm_offset_t)profile_data_string, PATH_MAX);
+
+ vnode_rele(data_vp);
+ vnode_put(data_vp);
+
+ return error;
+ }
+ names_vp = nd_names.ni_vp;
+
+ /* Write Header for new names file */
+
+ profile_header = (struct profile_names_header *)names_buf;
+
+ profile_header->number_of_profiles = 0;
+ profile_header->user_id = user;
+ profile_header->version = 1;
+ profile_header->element_array =
+ sizeof(struct profile_names_header);
+ profile_header->spare1 = 0;
+ profile_header->spare2 = 0;
+ profile_header->spare3 = 0;
+
+ size = sizeof(struct profile_names_header);
+ buf_ptr = (vm_offset_t)profile_header;
+ resid_off = 0;
+
+ while(size) {
+ error = vn_rdwr(UIO_WRITE, names_vp,
+ (caddr_t)buf_ptr, size, resid_off,
+ UIO_SYSSPACE32, IO_NODELOCKED,
+ kauth_cred_get(), &resid, p);
+ if(error) {
+ printf("prepare_profile_database: Can't write header %s\n", profile_names_string);
+ kmem_free(kernel_map,
+ (vm_offset_t)names_buf, 4 * PAGE_SIZE);
+ kmem_free(kernel_map,
+ (vm_offset_t)profile_data_string,
+ PATH_MAX);
+
+ vnode_rele(names_vp);
+ vnode_put(names_vp);
+ vnode_rele(data_vp);
+ vnode_put(data_vp);
+
+ return error;
+ }
+ buf_ptr += size-resid;
+ resid_off += size-resid;
+ size = resid;
+ }
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_uid, user);
+
+ error = vnode_setattr(names_vp, &va, &context);
+ if(error) {
+ printf("prepare_profile_database: "
+ "Can't set user %s\n", profile_names_string);
+ }
+ vnode_rele(names_vp);
+ vnode_put(names_vp);
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_uid, user);
+ error = vnode_setattr(data_vp, &va, &context);
+ if(error) {
+ printf("prepare_profile_database: "
+ "Can't set user %s\n", profile_data_string);
+ }
+ vnode_rele(data_vp);
+ vnode_put(data_vp);
+
+ kmem_free(kernel_map,
+ (vm_offset_t)profile_data_string, PATH_MAX);
+ kmem_free(kernel_map,
+ (vm_offset_t)names_buf, 4 * PAGE_SIZE);
+ return 0;
+
+}