X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/39236c6e673c41db228275375ab7fdb0f837b292..ecc0ceb4089d506a0b8d16686a95817b331af9cb:/bsd/kern/kern_exec.c

diff --git a/bsd/kern/kern_exec.c b/bsd/kern/kern_exec.c
index 0fb4a347a..20b1f0317 100644
--- a/bsd/kern/kern_exec.c
+++ b/bsd/kern/kern_exec.c
@@ -33,8 +33,6 @@
  * the terms and conditions for use and redistribution.
  */
  
-#include <cputypes.h>
-
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -104,6 +102,7 @@
 #include <sys/signal.h>
 #include <sys/aio_kern.h>
 #include <sys/sysproto.h>
+#include <sys/persona.h>
 #if SYSV_SHM
 #include <sys/shm_internal.h>		/* shmexec() */
 #endif
@@ -133,6 +132,8 @@
 #include <kern/affinity.h>
 #include <kern/assert.h>
 #include <kern/task.h>
+#include <kern/coalition.h>
+#include <kern/kalloc.h>
 
 #if CONFIG_MACF
 #include <security/mac.h>
@@ -159,16 +160,22 @@
 #if CONFIG_DTRACE
 /* Do not include dtrace.h, it redefines kmem_[alloc/free] */
 extern void (*dtrace_fasttrap_exec_ptr)(proc_t);
+extern void (*dtrace_proc_waitfor_exec_ptr)(proc_t);
 extern void (*dtrace_helpers_cleanup)(proc_t);
 extern void dtrace_lazy_dofs_destroy(proc_t);
 
+/*
+ * Since dtrace_proc_waitfor_exec_ptr can be added/removed in dtrace_subr.c,
+ * we will store its value before actually calling it.
+ */
+static void (*dtrace_proc_waitfor_hook)(proc_t) = NULL;
+
 #include <sys/dtrace_ptss.h>
 #endif
 
 /* support for child creation in exec after vfork */
-thread_t fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int is64bit);
+thread_t fork_create_child(task_t parent_task, coalition_t *parent_coalition, proc_t child_proc, int inherit_memory, int is64bit);
 void vfork_exit(proc_t p, int rv);
-int setsigvec(proc_t, thread_t, int, struct __kern_sigaction *, boolean_t in_sigstart);
 extern void proc_apply_task_networkbg_internal(proc_t, thread_t);
 
 /*
@@ -185,8 +192,14 @@ kern_return_t ipc_object_copyin(
 	ipc_object_t		*objectp);
 void ipc_port_release_send(ipc_port_t);
 
+#if DEVELOPMENT || DEBUG
+void task_importance_update_owner_info(task_t);
+#endif
+
 extern struct savearea *get_user_regs(thread_t);
+extern kern_return_t machine_thread_neon_state_initialize(thread_t thread);
 
+__attribute__((noinline)) int __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid);
 
 #include <kern/thread.h>
 #include <kern/task.h>
@@ -206,7 +219,7 @@ extern struct savearea *get_user_regs(thread_t);
  *			activator in exec_activate_image() before treating
  *			it as malformed/corrupt.
  */
-#define EAI_ITERLIMIT		10
+#define EAI_ITERLIMIT		3
 
 /*
  * For #! interpreter parsing
@@ -217,9 +230,9 @@ extern struct savearea *get_user_regs(thread_t);
 extern vm_map_t bsd_pageable_map;
 extern const struct fileops vnops;
 
-#define	ROUND_PTR(type, addr)	\
-	(type *)( ( (uintptr_t)(addr) + 16 - 1) \
-		  & ~(16 - 1) )
+#define	USER_ADDR_ALIGN(addr, val) \
+	( ( (user_addr_t)(addr) + (val) - 1) \
+		& ~((val) - 1) )
 
 struct image_params;	/* Forward */
 static int exec_activate_image(struct image_params *imgp);
@@ -238,8 +251,9 @@ static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size);
 static void exec_resettextvp(proc_t, struct image_params *);
 static int check_for_signature(proc_t, struct image_params *);
 static void exec_prefault_data(proc_t, struct image_params *, load_result_t *);
-static errno_t exec_handle_port_actions(struct image_params *imgp, short psa_flags, int * need_portwatch, ipc_port_t * portwatch);
-static errno_t exec_handle_spawnattr_apptype(proc_t p, int psa_apptype);
+static errno_t exec_handle_port_actions(struct image_params *imgp, short psa_flags, boolean_t * portwatch_present, ipc_port_t * portwatch_ports);
+static errno_t exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, uint64_t psa_darwin_role,
+                             ipc_port_t * portwatch_ports, int portwatch_count);
 
 /*
  * exec_add_user_string
@@ -295,6 +309,12 @@ exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolea
 	return error;
 }
 
+/*
+ * dyld is now passed the executable path as a getenv-like variable
+ * in the same fashion as the stack_guard and malloc_entropy keys.
+ */
+#define	EXECUTABLE_KEY "executable_path="
+
 /*
  * exec_save_path
  *
@@ -330,22 +350,26 @@ exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolea
  *		unacceptable for dyld.
  */
 static int
-exec_save_path(struct image_params *imgp, user_addr_t path, int seg)
+exec_save_path(struct image_params *imgp, user_addr_t path, int seg, const char **excpath)
 {
 	int error;
-	size_t	len;
+	size_t len;
 	char *kpath;
 
+	// imgp->ip_strings can come out of a cache, so we need to obliterate the
+	// old path.
+	memset(imgp->ip_strings, '\0', strlen(EXECUTABLE_KEY) + MAXPATHLEN);
+
 	len = MIN(MAXPATHLEN, imgp->ip_strspace);
 
 	switch(seg) {
 	case UIO_USERSPACE32:
 	case UIO_USERSPACE64:	/* Same for copyin()... */
-		error = copyinstr(path, imgp->ip_strings, len, &len);
+		error = copyinstr(path, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len);
 		break;
 	case UIO_SYSSPACE:
 		kpath = CAST_DOWN(char *,path);	/* SAFE */
-		error = copystr(kpath, imgp->ip_strings, len, &len);
+		error = copystr(kpath, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len);
 		break;
 	default:
 		error = EFAULT;
@@ -353,8 +377,15 @@ exec_save_path(struct image_params *imgp, user_addr_t path, int seg)
 	}
 
 	if (!error) {
+		bcopy(EXECUTABLE_KEY, imgp->ip_strings, strlen(EXECUTABLE_KEY));
+		len += strlen(EXECUTABLE_KEY);
+
 		imgp->ip_strendp += len;
 		imgp->ip_strspace -= len;
+
+		if (excpath) {
+			*excpath = imgp->ip_strings + strlen(EXECUTABLE_KEY);
+		}
 	}
 
 	return(error);
@@ -390,14 +421,14 @@ exec_reset_save_path(struct image_params *imgp)
 /*
  * exec_shell_imgact
  *
- * Image activator for interpreter scripts.  If the image begins with the
- * characters "#!", then it is an interpreter script.  Verify that we are
- * not already executing in PowerPC mode, and that the length of the script
- * line indicating the interpreter is not in excess of the maximum allowed
- * size.  If this is the case, then break out the arguments, if any, which
- * are separated by white space, and copy them into the argument save area
- * as if they were provided on the command line before all other arguments.
- * The line ends when we encounter a comment character ('#') or newline.
+ * Image activator for interpreter scripts.  If the image begins with
+ * the characters "#!", then it is an interpreter script.  Verify the
+ * length of the script line indicating the interpreter is not in
+ * excess of the maximum allowed size.  If this is the case, then
+ * break out the arguments, if any, which are separated by white
+ * space, and copy them into the argument save area as if they were
+ * provided on the command line before all other arguments.  The line
+ * ends when we encounter a comment character ('#') or newline.
  *
  * Parameters;	struct image_params *	image parameter block
  *
@@ -423,10 +454,6 @@ exec_shell_imgact(struct image_params *imgp)
 	/*
 	 * Make sure it's a shell script.  If we've already redirected
 	 * from an interpreted file once, don't do it again.
-	 *
-	 * Note: We disallow PowerPC, since the expectation is that we
-	 * may run a PowerPC interpreter, but not an interpret a PowerPC 
-	 * image.  This is consistent with historical behaviour.
 	 */
 	if (vdata[0] != '#' ||
 	    vdata[1] != '!' ||
@@ -434,6 +461,11 @@ exec_shell_imgact(struct image_params *imgp)
 		return (-1);
 	}
 
+	if (imgp->ip_origcputype != 0) {
+		/* Fat header previously matched, don't allow shell script inside */
+		return (-1);
+	}
+
 	imgp->ip_flags |= IMGPF_INTERPRET;
 	imgp->ip_interp_sugid_fd = -1;
 	imgp->ip_interp_buffer[0] = '\0';
@@ -504,7 +536,7 @@ exec_shell_imgact(struct image_params *imgp)
 
 	exec_reset_save_path(imgp);
 	exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer),
-							UIO_SYSSPACE);
+							UIO_SYSSPACE, NULL);
 
 	/* Copy the entire interpreter + args for later processing into argv[] */
 	interp = imgp->ip_interp_buffer;
@@ -578,20 +610,29 @@ exec_fat_imgact(struct image_params *imgp)
 	int resid, error;
 	load_return_t lret;
 
+	if (imgp->ip_origcputype != 0) {
+		/* Fat header previously matched, don't allow another fat file inside */
+		return (-1);
+	}
+
 	/* Make sure it's a fat binary */
-	if ((fat_header->magic != FAT_MAGIC) &&
-            (fat_header->magic != FAT_CIGAM)) {
-	    	error = -1;
+	if (OSSwapBigToHostInt32(fat_header->magic) != FAT_MAGIC) {
+		error = -1; /* not claimed */
+		goto bad;
+	}
+
+	/* imgp->ip_vdata has PAGE_SIZE, zerofilled if the file is smaller */
+	lret = fatfile_validate_fatarches((vm_offset_t)fat_header, PAGE_SIZE);
+	if (lret != LOAD_SUCCESS) {
+		error = load_return_to_errno(lret);
 		goto bad;
 	}
 
 	/* If posix_spawn binprefs exist, respect those prefs. */
 	psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
 	if (psa != NULL && psa->psa_binprefs[0] != 0) {
-		struct fat_arch *arches = (struct fat_arch *) (fat_header + 1);
-		int nfat_arch = 0, pr = 0, f = 0;
+		uint32_t pr = 0;
 
-		nfat_arch = OSSwapBigToHostInt32(fat_header->nfat_arch);
 		/* Check each preference listed against all arches in header */
 		for (pr = 0; pr < NBINPREFS; pr++) {
 			cpu_type_t pref = psa->psa_binprefs[pr];
@@ -603,36 +644,28 @@ exec_fat_imgact(struct image_params *imgp)
 
 			if (pref == CPU_TYPE_ANY) {
 				/* Fall through to regular grading */
-				break;
+				goto regular_grading;
 			}
 
-			for (f = 0; f < nfat_arch; f++) {
-				cpu_type_t archtype = OSSwapBigToHostInt32(
-						arches[f].cputype);
-				cpu_type_t archsubtype = OSSwapBigToHostInt32(
-						arches[f].cpusubtype) & ~CPU_SUBTYPE_MASK;
-				if (pref == archtype &&
-					grade_binary(archtype, archsubtype)) {
-					/* We have a winner! */
-					fat_arch.cputype = archtype; 
-					fat_arch.cpusubtype = archsubtype; 
-					fat_arch.offset = OSSwapBigToHostInt32(
-							arches[f].offset);
-					fat_arch.size = OSSwapBigToHostInt32(
-							arches[f].size);
-					fat_arch.align = OSSwapBigToHostInt32(
-							arches[f].align);
-					goto use_arch;
-				}
+			lret = fatfile_getbestarch_for_cputype(pref,
+							(vm_offset_t)fat_header,
+							PAGE_SIZE,
+							&fat_arch);
+			if (lret == LOAD_SUCCESS) {
+				goto use_arch;
 			}
 		}
+
+		/* Requested binary preference was not honored */
+		error = EBADEXEC;
+		goto bad;
 	}
 
+regular_grading:
 	/* Look up our preferred architecture in the fat file. */
-	lret = fatfile_getarch_affinity(imgp->ip_vp,
-					(vm_offset_t)fat_header,
-					&fat_arch,
-					(p->p_flag & P_AFFINITY));
+	lret = fatfile_getbestarch((vm_offset_t)fat_header,
+				PAGE_SIZE,
+				&fat_arch);
 	if (lret != LOAD_SUCCESS) {
 		error = load_return_to_errno(lret);
 		goto bad;
@@ -648,22 +681,56 @@ use_arch:
 		goto bad;
 	}
 
-	/* Did we read a complete header? */
 	if (resid) {
-		error = EBADEXEC;
-		goto bad;
+		memset(imgp->ip_vdata + (PAGE_SIZE - resid), 0x0, resid);
 	}
 
 	/* Success.  Indicate we have identified an encapsulated binary */
 	error = -2;
 	imgp->ip_arch_offset = (user_size_t)fat_arch.offset;
 	imgp->ip_arch_size = (user_size_t)fat_arch.size;
+	imgp->ip_origcputype = fat_arch.cputype;
+	imgp->ip_origcpusubtype = fat_arch.cpusubtype;
 
 bad:
 	kauth_cred_unref(&cred);
 	return (error);
 }
 
+static int
+activate_thread_state(thread_t thread, load_result_t *result)
+{
+	int ret;
+
+	ret = thread_state_initialize(thread);
+	if (ret != KERN_SUCCESS) {
+		return ret;
+	}
+
+
+	if (result->threadstate) {
+		uint32_t *ts = result->threadstate;
+		uint32_t total_size = result->threadstate_sz;
+
+		while (total_size > 0) {
+			uint32_t flavor = *ts++;
+			uint32_t size = *ts++;
+
+			ret = thread_setstatus(thread, flavor, (thread_state_t)ts, size);
+			if (ret) {
+				return ret;
+			}
+			ts += size;
+			total_size -= (size + 2) * sizeof(uint32_t);
+		}
+	}
+
+	thread_setentrypoint(thread, result->entry_point);
+
+	return KERN_SUCCESS;
+}
+
+
 /*
  * exec_mach_imgact
  *
@@ -693,7 +760,6 @@ exec_mach_imgact(struct image_params *imgp)
 	struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
 	proc_t			p = vfs_context_proc(imgp->ip_vfs_context);
 	int			error = 0;
-	int			vfexec = 0;
 	task_t			task;
 	task_t			new_task = NULL; /* protected by vfexec */
 	thread_t		thread;
@@ -703,7 +769,9 @@ exec_mach_imgact(struct image_params *imgp)
 	load_return_t		lret;
 	load_result_t		load_result;
 	struct _posix_spawnattr *psa = NULL;
-	int spawn = (imgp->ip_flags & IMGPF_SPAWN);
+	int			spawn = (imgp->ip_flags & IMGPF_SPAWN);
+	int			vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
+	int			p_name_len;
 
 	/*
 	 * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
@@ -723,14 +791,19 @@ exec_mach_imgact(struct image_params *imgp)
 		goto bad;
 	}
 
-	switch (mach_header->filetype) {
-	case MH_DYLIB:
-	case MH_BUNDLE:
+	if (mach_header->filetype != MH_EXECUTE) {
 		error = -1;
 		goto bad;
 	}
 
-	if (!imgp->ip_origcputype) {
+	if (imgp->ip_origcputype != 0) {
+		/* Fat header previously had an idea about this thin file */
+		if (imgp->ip_origcputype != mach_header->cputype ||
+			imgp->ip_origcpusubtype != mach_header->cpusubtype) {
+			error = EBADARCH;
+			goto bad;
+		}
+	} else {
 		imgp->ip_origcputype = mach_header->cputype;
 		imgp->ip_origcpusubtype = mach_header->cpusubtype;
 	}
@@ -739,15 +812,6 @@ exec_mach_imgact(struct image_params *imgp)
 	thread = current_thread();
 	uthread = get_bsdthread_info(thread);
 
-	/*
-	 * Save off the vfexec state up front; we have to do this, because
-	 * we need to know if we were in this state initially subsequent to
-	 * creating the backing task, thread, and uthread for the child
-	 * process (from the vfs_context_t from in img_parms).
-	 */
-	if (uthread->uu_flag & UT_VFORK)
-		vfexec = 1;	 /* Mark in exec */
-
 	if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64)
 		imgp->ip_flags |= IMGPF_IS_64BIT;
 
@@ -805,7 +869,7 @@ grade:
 	 */
 	if (vfexec || spawn) {
 		if (vfexec) {
-			imgp->ip_new_thread = fork_create_child(task, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT));
+			imgp->ip_new_thread = fork_create_child(task, NULL, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT));
 			if (imgp->ip_new_thread == NULL) {
 				error = ENOMEM;
 				goto bad;
@@ -840,19 +904,28 @@ grade:
 	 * NOTE: An error after this point  indicates we have potentially
 	 * destroyed or overwritten some process state while attempting an
 	 * execve() following a vfork(), which is an unrecoverable condition.
+	 * We send the new process an immediate SIGKILL to avoid it executing
+	 * any instructions in the mutated address space. For true spawns,
+	 * this is not the case, and "too late" is still not too late to
+	 * return an error code to the parent process.
 	 */
 
 	/*
 	 * Actually load the image file we previously decided to load.
 	 */
-	lret = load_machfile(imgp, mach_header, thread, map, &load_result);
+	lret = load_machfile(imgp, mach_header, thread, &map, &load_result);
 
 	if (lret != LOAD_SUCCESS) {
 		error = load_return_to_errno(lret);
 		goto badtoolate;
 	}
 
-	vm_map_set_user_wire_limit(get_task_map(task), p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
+	proc_lock(p);
+	p->p_cputype = imgp->ip_origcputype;
+	p->p_cpusubtype = imgp->ip_origcpusubtype;
+	proc_unlock(p);
+
+	vm_map_set_user_wire_limit(map, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
 
 	/* 
 	 * Set code-signing flags if this binary is signed, or if parent has
@@ -861,7 +934,7 @@ grade:
 	if (load_result.csflags & CS_VALID) {
 		imgp->ip_csflags |= load_result.csflags & 
 			(CS_VALID|
-			 CS_HARD|CS_KILL|CS_ENFORCEMENT|
+			 CS_HARD|CS_KILL|CS_RESTRICT|CS_ENFORCEMENT|CS_REQUIRE_LV|CS_DYLD_PLATFORM|
 			 CS_EXEC_SET_HARD|CS_EXEC_SET_KILL|CS_EXEC_SET_ENFORCEMENT);
 	} else {
 		imgp->ip_csflags &= ~CS_VALID;
@@ -873,16 +946,14 @@ grade:
 		imgp->ip_csflags |= CS_KILL;
 	if (p->p_csflags & CS_EXEC_SET_ENFORCEMENT)
 		imgp->ip_csflags |= CS_ENFORCEMENT;
-
+	if (p->p_csflags & CS_EXEC_SET_INSTALLER)
+		imgp->ip_csflags |= CS_INSTALLER;
 
 	/*
 	 * Set up the system reserved areas in the new address space.
 	 */
-	vm_map_exec(get_task_map(task),
-		    task,
-		    (void *) p->p_fd->fd_rdir,
-		    cpu_type());
-	
+	vm_map_exec(map, task, (void *)p->p_fd->fd_rdir, cpu_type());
+
 	/*
 	 * Close file descriptors which specify close-on-exec.
 	 */
@@ -892,15 +963,40 @@ grade:
 	 * deal with set[ug]id.
 	 */
 	error = exec_handle_sugid(imgp);
+	if (error) {
+		if (spawn || !vfexec) {
+			vm_map_deallocate(map);
+		}
+		goto badtoolate;
+	}
+
+	/*
+	 * Commit to new map.
+	 *
+	 * Swap the new map for the old, which consumes our new map reference but
+	 * each leaves us responsible for the old_map reference.  That lets us get
+	 * off the pmap associated with it, and then we can release it.
+	 */
+	if (!vfexec) {
+		old_map = swap_task_map(task, thread, map, !spawn);
+		vm_map_deallocate(old_map);
+	}
+
+	lret = activate_thread_state(thread, &load_result);
+	if (lret != KERN_SUCCESS) {
+		goto badtoolate;
+	}
+
+	/*
+	 * deal with voucher on exec-calling thread.
+	 */
+	if (imgp->ip_new_thread == NULL)
+		thread_set_mach_voucher(current_thread(), IPC_VOUCHER_NULL);
 
 	/* Make sure we won't interrupt ourself signalling a partial process */
 	if (!vfexec && !spawn && (p->p_lflag & P_LTRACED))
 		psignal(p, SIGTRAP);
 
-	if (error) {
-		goto badtoolate;
-	}
-	
 	if (load_result.unixproc &&
 		create_unix_stack(get_task_map(task),
 				  &load_result,
@@ -930,7 +1026,7 @@ grade:
 		/* Set the stack */
 		thread_setuserstack(thread, ap);
 	}
-	
+
 	if (load_result.dynlinker) {
 		uint64_t	ap;
 		int			new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
@@ -940,8 +1036,8 @@ grade:
 		error = copyoutptr(load_result.mach_header, ap, new_ptr_size);
 
 		if (error) {
-		        if (vfexec || spawn)
-			        vm_map_switch(old_map);
+			if (vfexec || spawn)
+				vm_map_switch(old_map);
 			goto badtoolate;
 		}
 		task_set_dyld_info(task, load_result.all_image_info_addr,
@@ -954,8 +1050,6 @@ grade:
 	if (vfexec || spawn) {
 		vm_map_switch(old_map);
 	}
-	/* Set the entry point */
-	thread_setentrypoint(thread, load_result.entry_point);
 
 	/* Stop profiling */
 	stopprofclock(p);
@@ -985,23 +1079,32 @@ grade:
 	 * Remember file name for accounting.
 	 */
 	p->p_acflag &= ~AFORK;
-	/* If the translated name isn't NULL, then we want to use
-	 * that translated name as the name we show as the "real" name.
-	 * Otherwise, use the name passed into exec.
+
+	/*
+	 * Set p->p_comm and p->p_name to the name passed to exec
 	 */
-	if (0 != imgp->ip_p_comm[0]) {
-		bcopy((caddr_t)imgp->ip_p_comm, (caddr_t)p->p_comm,
-			sizeof(p->p_comm));
-	} else {
-		if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN)
-			imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
-		bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
-			(unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
-		p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
-	}
+	p_name_len = sizeof(p->p_name) - 1;
+	if(imgp->ip_ndp->ni_cnd.cn_namelen > p_name_len)
+		imgp->ip_ndp->ni_cnd.cn_namelen = p_name_len;
+	bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_name,
+		(unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
+	p->p_name[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
+
+	if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN)
+		imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
+	bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
+		(unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
+	p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
 
 	pal_dbg_set_task_name( p->task );
 
+#if DEVELOPMENT || DEBUG
+	/* 
+	 * Update the pid an proc name for importance base if any
+	 */
+	task_importance_update_owner_info(p->task);
+#endif
+
 	memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid));
 
 // <rdar://6598155> dtrace code cleanup needed
@@ -1036,8 +1139,8 @@ grade:
     	 */
 	proc_lock(p);
 	if (p->p_dtrace_probes && dtrace_fasttrap_exec_ptr) {
-    		(*dtrace_fasttrap_exec_ptr)(p);
-    	}
+		(*dtrace_fasttrap_exec_ptr)(p);
+	}
 	proc_unlock(p);
 #endif
 
@@ -1050,24 +1153,18 @@ grade:
 		kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
 
 		if (vfexec || spawn) {
-			KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE,
+			KERNEL_DEBUG_CONSTANT1(TRACE_DATA_EXEC | DBG_FUNC_NONE,
 					p->p_pid ,0,0,0, (uintptr_t)thread_tid(thread));
-			KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
+			KERNEL_DEBUG_CONSTANT1(TRACE_STRING_EXEC | DBG_FUNC_NONE,
 					dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (uintptr_t)thread_tid(thread));
 		} else {
-			KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE,
+			KERNEL_DEBUG_CONSTANT(TRACE_DATA_EXEC | DBG_FUNC_NONE,
 					p->p_pid ,0,0,0,0);
-			KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
+			KERNEL_DEBUG_CONSTANT(TRACE_STRING_EXEC | DBG_FUNC_NONE,
 					dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
 		}
 	}
 
-	/*
-	 * Ensure the 'translated' and 'affinity' flags are cleared, since we
-	 * no longer run PowerPC binaries.
-	 */
-	OSBitAndAtomic(~((uint32_t)(P_TRANSLATED | P_AFFINITY)), &p->p_flag);
-
 	/*
 	 * If posix_spawned with the START_SUSPENDED flag, stop the
 	 * process before it runs.
@@ -1078,19 +1175,10 @@ grade:
 			proc_lock(p);
 			p->p_stat = SSTOP;
 			proc_unlock(p);
-			(void) task_suspend(p->task);
+			(void) task_suspend_internal(p->task);
 		}
 	}
 
-	/*
-	 * Apply the apptype state (which primes the task for importance donation)
-	 * This must be done after the exec so that the child's thread is ready 
-	 */
-	if (imgp->ip_px_sa != NULL) {
-		psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
-		exec_handle_spawnattr_apptype(p, psa->psa_apptype);
-	}
-
 	/*
 	 * mark as execed, wakeup the process that vforked (if any) and tell
 	 * it that it now has its own resources back
@@ -1112,17 +1200,37 @@ grade:
 		psignal_vfork(p, new_task, thread, SIGTRAP);
 	}
 
+	goto done;
+
 badtoolate:
-if (!spawn)
-	/* notify only if it has not failed due to FP Key error */
-	if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)
-		proc_knote(p, NOTE_EXEC);
+	/* Don't allow child process to execute any instructions */
+	if (!spawn) {
+		if (vfexec) {
+			psignal_vfork(p, new_task, thread, SIGKILL);
+		} else {
+			psignal(p, SIGKILL);
+		}
 
-	if (vfexec || spawn) {
+		/* We can't stop this system call at this point, so just pretend we succeeded */
+		error = 0;
+	}
+	
+done:
+	if (!spawn) {
+		/* notify only if it has not failed due to FP Key error */
+		if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)
+			proc_knote(p, NOTE_EXEC);
+	}
+
+	/* Drop extra references for cases where we don't expect the caller to clean up */
+	if (vfexec || (spawn && error == 0)) {
 		task_deallocate(new_task);
 		thread_deallocate(thread);
-		if (error)
-			error = 0;
+	}
+
+	if (load_result.threadstate) {
+		kfree(load_result.threadstate, load_result.threadstate_sz);
+		load_result.threadstate = NULL;
 	}
 
 bad:
@@ -1174,39 +1282,47 @@ struct execsw {
  *	namei:???
  *	vn_rdwr:???			[anything vn_rdwr can return]
  *	<ex_imgact>:???			[anything an imgact can return]
+ *	EDEADLK				Process is being terminated
  */
 static int
 exec_activate_image(struct image_params *imgp)
 {
-	struct nameidata nd;
+	struct nameidata *ndp = NULL;
+	const char *excpath;
 	int error;
 	int resid;
 	int once = 1;	/* save SGUID-ness for interpreted files */
 	int i;
-	int iterlimit = EAI_ITERLIMIT;
+	int itercount = 0;
 	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
 
 	error = execargs_alloc(imgp);
 	if (error)
 		goto bad_notrans;
 	
-	error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg);
+	error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg, &excpath);
 	if (error) {
 		goto bad_notrans;
 	}
 
-	/* Use imgp->ip_strings, which contains the copyin-ed exec path */
-	DTRACE_PROC1(exec, uintptr_t, imgp->ip_strings);
+	/* Use excpath, which contains the copyin-ed exec path */
+	DTRACE_PROC1(exec, uintptr_t, excpath);
 
-	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
-		   UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context);
+	MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
+	if (ndp == NULL) {
+		error = ENOMEM;
+		goto bad_notrans;
+	}
+
+	NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
+		   UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
 
 again:
-	error = namei(&nd);
+	error = namei(ndp);
 	if (error)
 		goto bad_notrans;
-	imgp->ip_ndp = &nd;	/* successful namei(); call nameidone() later */
-	imgp->ip_vp = nd.ni_vp;	/* if set, need to vnode_put() at some point */
+	imgp->ip_ndp = ndp;	/* successful namei(); call nameidone() later */
+	imgp->ip_vp = ndp->ni_vp;	/* if set, need to vnode_put() at some point */
 
 	/*
 	 * Before we start the transition from binary A to binary B, make
@@ -1217,10 +1333,11 @@ again:
 	 */
 	proc_lock(p);
 	if (p->p_lflag & P_LEXIT) {
+		error = EDEADLK;
 		proc_unlock(p);
 		goto bad_notrans;
 	}
-	error = proc_transstart(p, 1);
+	error = proc_transstart(p, 1, 0);
 	proc_unlock(p);
 	if (error)
 		goto bad_notrans;
@@ -1241,10 +1358,14 @@ again:
 			&resid, vfs_context_proc(imgp->ip_vfs_context));
 	if (error)
 		goto bad;
-		
+
+	if (resid) {
+		memset(imgp->ip_vdata + (PAGE_SIZE - resid), 0x0, resid);
+	}
+
 encapsulated_binary:
 	/* Limit the number of iterations we will attempt on each binary */
-	if (--iterlimit == 0) {
+	if (++itercount > EAI_ITERLIMIT) {
 		error = EBADEXEC;
 		goto bad;
 	}
@@ -1255,7 +1376,7 @@ encapsulated_binary:
 
 		switch (error) {
 		/* case -1: not claimed: continue */
-		case -2:		/* Encapsulated binary */
+		case -2:		/* Encapsulated binary, imgp->ip_XXX set for next iteration */
 			goto encapsulated_binary;
 
 		case -3:		/* Interpreter */
@@ -1284,15 +1405,15 @@ encapsulated_binary:
 				imgp->ip_scriptvp = imgp->ip_vp;
 #endif
 
-			nameidone(&nd);
+			nameidone(ndp);
 
 			vnode_put(imgp->ip_vp);
 			imgp->ip_vp = NULL;	/* already put */
 			imgp->ip_ndp = NULL; /* already nameidone */
 
-			/* Use imgp->ip_strings, which exec_shell_imgact reset to the interpreter */
-			NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF,
-				   UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context);
+			/* Use excpath, which exec_shell_imgact reset to the interpreter */
+			NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF,
+				   UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
 
 			proc_transend(p, 0);
 			goto again;
@@ -1309,9 +1430,20 @@ encapsulated_binary:
 	if (error == 0 && kauth_authorize_fileop_has_listeners()) {
 		kauth_authorize_fileop(vfs_context_ucred(imgp->ip_vfs_context),
 					KAUTH_FILEOP_EXEC,
-					(uintptr_t)nd.ni_vp, 0);
+					(uintptr_t)ndp->ni_vp, 0);
 	}
 
+	if (error == 0) {
+		/*
+		 * Reset atm context from task
+		 */
+		task_atm_reset(p->task);
+
+		/*
+		 * Reset old bank context from task
+		 */
+		task_bank_reset(p->task);
+	}
 bad:
 	proc_transend(p, 0);
 
@@ -1320,15 +1452,17 @@ bad_notrans:
 		execargs_free(imgp);
 	if (imgp->ip_ndp)
 		nameidone(imgp->ip_ndp);
+	if (ndp)
+		FREE(ndp, M_TEMP);
 
 	return (error);
 }
 
 
 /*
- * exec_handle_spawnattr_apptype
+ * exec_handle_spawnattr_policy
  *
- * Description: Decode and apply the posix_spawn apptype to the task.
+ * Description: Decode and apply the posix_spawn apptype, qos clamp, and watchport ports to the task.
  *
  * Parameters:  proc_t p                process to apply attributes to
  *              int psa_apptype         posix spawn attribute apptype
@@ -1336,10 +1470,14 @@ bad_notrans:
  * Returns:     0                       Success
  */
 static errno_t
-exec_handle_spawnattr_apptype(proc_t p, int psa_apptype)
+exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, uint64_t psa_darwin_role,
+                             ipc_port_t * portwatch_ports, int portwatch_count)
 {
+	int apptype     = TASK_APPTYPE_NONE;
+	int qos_clamp   = THREAD_QOS_UNSPECIFIED;
+	int role        = TASK_UNSPECIFIED;
+
 	if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) {
-		int apptype = TASK_APPTYPE_NONE;
 		int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK;
 
 		switch(proctype) {
@@ -1363,13 +1501,38 @@ exec_handle_spawnattr_apptype(proc_t p, int psa_apptype)
 				break;
 			default:
 				apptype = TASK_APPTYPE_NONE;
+				/* TODO: Should an invalid value here fail the spawn? */
 				break;
 		}
+	}
 
-		proc_set_task_apptype(p->task, apptype);
+	if (psa_qos_clamp != POSIX_SPAWN_PROC_CLAMP_NONE) {
+		switch (psa_qos_clamp) {
+			case POSIX_SPAWN_PROC_CLAMP_UTILITY:
+				qos_clamp = THREAD_QOS_UTILITY;
+				break;
+			case POSIX_SPAWN_PROC_CLAMP_BACKGROUND:
+				qos_clamp = THREAD_QOS_BACKGROUND;
+				break;
+			case POSIX_SPAWN_PROC_CLAMP_MAINTENANCE:
+				qos_clamp = THREAD_QOS_MAINTENANCE;
+				break;
+			default:
+				qos_clamp = THREAD_QOS_UNSPECIFIED;
+				/* TODO: Should an invalid value here fail the spawn? */
+				break;
+		}
+	}
+
+	if (psa_darwin_role != PRIO_DARWIN_ROLE_DEFAULT) {
+		proc_darwin_role_to_task_role(psa_darwin_role, &role);
+	}
 
-		/* TODO: Should an invalid value here fail the spawn? */
-		return (0);
+	if (apptype   != TASK_APPTYPE_NONE      ||
+	    qos_clamp != THREAD_QOS_UNSPECIFIED ||
+	    role      != TASK_UNSPECIFIED) {
+		proc_set_task_spawnpolicy(p->task, apptype, qos_clamp, role,
+		                          portwatch_ports, portwatch_count);
 	}
 
 	return (0);
@@ -1391,7 +1554,7 @@ exec_handle_spawnattr_apptype(proc_t p, int psa_apptype)
  * 		ENOTSUP			Illegal posix_spawn attr flag was set
  */
 static errno_t
-exec_handle_port_actions(struct image_params *imgp, short psa_flags, int * need_portwatch, ipc_port_t * portwatch_ports)
+exec_handle_port_actions(struct image_params *imgp, short psa_flags, boolean_t * portwatch_present, ipc_port_t * portwatch_ports)
 {
 	_posix_spawn_port_actions_t pacts = imgp->ip_px_spa;
 	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
@@ -1401,8 +1564,7 @@ exec_handle_port_actions(struct image_params *imgp, short psa_flags, int * need_
 	errno_t ret = 0;
 	int i;
 
-	if (need_portwatch != NULL)
-		*need_portwatch  = 0;
+	*portwatch_present = FALSE;
 
 	for (i = 0; i < pacts->pspa_count; i++) {
 		act = &pacts->pspa_actions[i];
@@ -1440,8 +1602,7 @@ exec_handle_port_actions(struct image_params *imgp, short psa_flags, int * need_
 #endif
 		case PSPA_IMP_WATCHPORTS:
 			if (portwatch_ports != NULL) {
-				if (need_portwatch != NULL)
-					*need_portwatch  = 1;
+				*portwatch_present = TRUE;
 				/* hold on to this till end of spawn */
 				portwatch_ports[i] = port;
 				ret = 0;
@@ -1507,29 +1668,41 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags)
 			 * context of UIO_SYSSPACE, and casts the address
 			 * argument to a user_addr_t.
 			 */
-			struct vnode_attr va;
-			struct nameidata nd;
+			char *bufp = NULL;
+			struct vnode_attr *vap;
+			struct nameidata *ndp;
 			int mode = psfa->psfaa_openargs.psfao_mode;
 			struct dup2_args dup2a;
 			struct close_nocancel_args ca;
 			int origfd;
 
-			VATTR_INIT(&va);
+			MALLOC(bufp, char *, sizeof(*vap) + sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
+			if (bufp == NULL) {
+				error = ENOMEM;
+				break;
+			}
+
+			vap = (struct vnode_attr *) bufp;
+			ndp = (struct nameidata *) (bufp + sizeof(*vap));
+
+			VATTR_INIT(vap);
 			/* Mask off all but regular access permissions */
 			mode = ((mode &~ p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT;
-			VATTR_SET(&va, va_mode, mode & ACCESSPERMS);
+			VATTR_SET(vap, va_mode, mode & ACCESSPERMS);
 
-			NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
+			NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
 			       CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path),
 			       imgp->ip_vfs_context);
 
 			error = open1(imgp->ip_vfs_context, 
-					&nd,
+					ndp,
 					psfa->psfaa_openargs.psfao_oflag,
-					&va,
+					vap,
 					fileproc_alloc_init, NULL,
 					ival);
 
+			FREE(bufp, M_TEMP);
+
 			/*
 			 * If there's an error, or we get the right fd by
 			 * accident, then drop out here.  This is easier than
@@ -1767,6 +1940,182 @@ spawn_free_macpolicyinfo(_posix_spawn_mac_policy_extensions_t psmx)
 }
 #endif /* CONFIG_MACF */
 
+#if CONFIG_COALITIONS
+static inline void spawn_coalitions_release_all(coalition_t coal[COALITION_NUM_TYPES])
+{
+	for (int c = 0; c < COALITION_NUM_TYPES; c++) {
+		if (coal[c]) {
+			coalition_remove_active(coal[c]);
+			coalition_release(coal[c]);
+		}
+	}
+}
+#endif
+
+#if CONFIG_PERSONAS
+static int spawn_validate_persona(struct _posix_spawn_persona_info *px_persona)
+{
+	int error = 0;
+	struct persona *persona = NULL;
+	int verify = px_persona->pspi_flags & POSIX_SPAWN_PERSONA_FLAGS_VERIFY;
+
+	/*
+	 * TODO: rdar://problem/19981151
+	 * Add entitlement check!
+	 */
+	if (!kauth_cred_issuser(kauth_cred_get()))
+		return EPERM;
+
+	persona = persona_lookup(px_persona->pspi_id);
+	if (!persona) {
+		error = ESRCH;
+		goto out;
+	}
+
+	if (verify) {
+		if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_UID) {
+			if (px_persona->pspi_uid != persona_get_uid(persona)) {
+				error = EINVAL;
+				goto out;
+			}
+		}
+		if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GID) {
+			if (px_persona->pspi_gid != persona_get_gid(persona)) {
+				error = EINVAL;
+				goto out;
+			}
+		}
+		if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) {
+			int ngroups = 0;
+			gid_t groups[NGROUPS_MAX];
+
+			if (persona_get_groups(persona, &ngroups, groups,
+					       px_persona->pspi_ngroups) != 0) {
+				error = EINVAL;
+				goto out;
+			}
+			if (ngroups != (int)px_persona->pspi_ngroups) {
+				error = EINVAL;
+				goto out;
+			}
+			while (ngroups--) {
+				if (px_persona->pspi_groups[ngroups] != groups[ngroups]) {
+					error = EINVAL;
+					goto out;
+				}
+			}
+			if (px_persona->pspi_gmuid != persona_get_gmuid(persona)) {
+				error = EINVAL;
+				goto out;
+			}
+		}
+	}
+
+out:
+	if (persona)
+		persona_put(persona);
+
+	return error;
+}
+
+static int spawn_persona_adopt(proc_t p, struct _posix_spawn_persona_info *px_persona)
+{
+	int ret;
+	kauth_cred_t cred;
+	struct persona *persona = NULL;
+	int override = !!(px_persona->pspi_flags & POSIX_SPAWN_PERSONA_FLAGS_OVERRIDE);
+
+	if (!override)
+		return persona_proc_adopt_id(p, px_persona->pspi_id, NULL);
+
+	/*
+	 * we want to spawn into the given persona, but we want to override
+	 * the kauth with a different UID/GID combo
+	 */
+	persona = persona_lookup(px_persona->pspi_id);
+	if (!persona)
+		return ESRCH;
+
+	cred = persona_get_cred(persona);
+	if (!cred) {
+		ret = EINVAL;
+		goto out;
+	}
+
+	if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_UID) {
+		cred = kauth_cred_setresuid(cred,
+					    px_persona->pspi_uid,
+					    px_persona->pspi_uid,
+					    px_persona->pspi_uid,
+					    KAUTH_UID_NONE);
+	}
+
+	if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GID) {
+		cred = kauth_cred_setresgid(cred,
+					    px_persona->pspi_gid,
+					    px_persona->pspi_gid,
+					    px_persona->pspi_gid);
+	}
+
+	if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) {
+		cred = kauth_cred_setgroups(cred,
+					    px_persona->pspi_groups,
+					    px_persona->pspi_ngroups,
+					    px_persona->pspi_gmuid);
+	}
+
+	ret = persona_proc_adopt(p, persona, cred);
+
+out:
+	persona_put(persona);
+	return ret;
+}
+#endif
+
+void
+proc_set_return_wait(proc_t p)
+{
+	proc_lock(p);
+	p->p_lflag |= P_LRETURNWAIT;
+	proc_unlock(p);
+}
+
+void
+proc_clear_return_wait(proc_t p, thread_t child_thread)
+{
+	proc_lock(p);
+
+	p->p_lflag &= ~P_LRETURNWAIT;
+	if (p->p_lflag & P_LRETURNWAITER) {
+		wakeup(&p->p_lflag);
+	}
+
+	proc_unlock(p);
+
+	(void)thread_resume(child_thread);
+}
+
+void
+proc_wait_to_return()
+{
+	proc_t	p;
+
+	p = current_proc();
+	proc_lock(p);
+
+	if (p->p_lflag & P_LRETURNWAIT) {
+		p->p_lflag |= P_LRETURNWAITER;
+		do {
+			msleep(&p->p_lflag, &p->p_mlock, 0,
+				"thread_check_setup_complete", NULL);
+		} while (p->p_lflag & P_LRETURNWAIT);
+		p->p_lflag &= ~P_LRETURNWAITER;
+	}
+
+	proc_unlock(p);
+	thread_bootstrap_return();
+}
+
 /*
  * posix_spawn
  *
@@ -1807,7 +2156,6 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 	struct vnode_attr *origvap;
 	struct uthread	*uthread = 0;	/* compiler complains if not set to 0*/
 	int error, sig;
-	char alt_p_comm[sizeof(p->p_comm)] = {0};	/* for PowerPC */
 	int is_64 = IS_64BIT_PROCESS(p);
 	struct vfs_context context;
 	struct user__posix_spawn_args_desc px_args;
@@ -1818,9 +2166,12 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 	boolean_t spawn_no_exec = FALSE;
 	boolean_t proc_transit_set = TRUE;
 	boolean_t exec_done = FALSE;
-	int need_portwatch = 0, portwatch_count = 0;
+	int portwatch_count = 0;
 	ipc_port_t * portwatch_ports = NULL;
 	vm_size_t px_sa_offset = offsetof(struct _posix_spawnattr, psa_ports); 
+#if CONFIG_PERSONAS
+	struct _posix_spawn_persona_info *px_persona = NULL;
+#endif
 
 	/*
 	 * Allocate a big chunk for locals instead of using stack since these  
@@ -1843,8 +2194,9 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 	imgp->ip_origvattr = origvap;
 	imgp->ip_vfs_context = &context;
 	imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE);
-	imgp->ip_p_comm = alt_p_comm;		/* for PowerPC */
 	imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
+	imgp->ip_mac_return = 0;
+	imgp->ip_px_persona = NULL;
 
 	if (uap->adesc != USER_ADDR_NULL) {
 		if(is_64) {
@@ -1866,6 +2218,10 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 			px_args.port_actions = CAST_USER_ADDR_T(px_args32.port_actions);
 			px_args.mac_extensions_size = px_args32.mac_extensions_size;
 			px_args.mac_extensions = CAST_USER_ADDR_T(px_args32.mac_extensions);
+			px_args.coal_info_size = px_args32.coal_info_size;
+			px_args.coal_info = CAST_USER_ADDR_T(px_args32.coal_info);
+			px_args.persona_info_size = px_args32.persona_info_size;
+			px_args.persona_info = CAST_USER_ADDR_T(px_args32.persona_info);
 		}
 		if (error)
 			goto bad;
@@ -1874,8 +2230,8 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 			/* 
 			 * We are not copying the port_actions pointer, 
 			 * because we already have it from px_args. 
+			 * This is a bit fragile: <rdar://problem/16427422>
 			 */
-			 
 
 			if ((error = copyin(px_args.attrp, &px_sa, px_sa_offset) != 0)) 
 			goto bad;
@@ -1935,6 +2291,29 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 				goto bad;
 			}
 		}
+#if CONFIG_PERSONAS
+		/* copy in the persona info */
+		if (px_args.persona_info_size != 0 && px_args.persona_info != 0) {
+			/* for now, we need the exact same struct in user space */
+			if (px_args.persona_info_size != sizeof(*px_persona)) {
+				error = ERANGE;
+				goto bad;
+			}
+
+			MALLOC(px_persona, struct _posix_spawn_persona_info *, px_args.persona_info_size, M_TEMP, M_WAITOK|M_ZERO);
+			if (px_persona == NULL) {
+				error = ENOMEM;
+				goto bad;
+			}
+			imgp->ip_px_persona = px_persona;
+
+			if ((error = copyin(px_args.persona_info, px_persona,
+					    px_args.persona_info_size)) != 0)
+				goto bad;
+			if ((error = spawn_validate_persona(px_persona)) != 0)
+				goto bad;
+		}
+#endif
 #if CONFIG_MACF
 		if (px_args.mac_extensions_size != 0) {
 			if ((error = spawn_copyin_macpolicyinfo(&px_args, (_posix_spawn_mac_policy_extensions_t *)&imgp->ip_px_smpx)) != 0)
@@ -1964,15 +2343,151 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 	 * and execve().
 	 */
 	if (imgp->ip_px_sa == NULL || !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)){
-		if ((error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN)) != 0)
+
+		/* Set the new task's coalition, if it is requested.  */
+		coalition_t coal[COALITION_NUM_TYPES] = { COALITION_NULL };
+#if CONFIG_COALITIONS
+		int i, ncoals;
+		kern_return_t kr = KERN_SUCCESS;
+		struct _posix_spawn_coalition_info coal_info;
+		int coal_role[COALITION_NUM_TYPES];
+
+		if (imgp->ip_px_sa == NULL || !px_args.coal_info)
+			goto do_fork1;
+
+		memset(&coal_info, 0, sizeof(coal_info));
+
+		if (px_args.coal_info_size > sizeof(coal_info))
+			px_args.coal_info_size = sizeof(coal_info);
+		error = copyin(px_args.coal_info,
+			       &coal_info, px_args.coal_info_size);
+		if (error != 0)
+			goto bad;
+
+		ncoals = 0;
+		for (i = 0; i < COALITION_NUM_TYPES; i++) {
+			uint64_t cid = coal_info.psci_info[i].psci_id;
+			if (cid != 0) {
+				/*
+				 * don't allow tasks which are not in a
+				 * privileged coalition to spawn processes
+				 * into coalitions other than their own
+				 */
+				if (!task_is_in_privileged_coalition(p->task, i)) {
+					coal_dbg("ERROR: %d not in privilegd "
+						 "coalition of type %d",
+						 p->p_pid, i);
+					spawn_coalitions_release_all(coal);
+					error = EPERM;
+					goto bad;
+				}
+
+				coal_dbg("searching for coalition id:%llu", cid);
+				/*
+				 * take a reference and activation on the
+				 * coalition to guard against free-while-spawn
+				 * races
+				 */
+				coal[i] = coalition_find_and_activate_by_id(cid);
+				if (coal[i] == COALITION_NULL) {
+					coal_dbg("could not find coalition id:%llu "
+						 "(perhaps it has been terminated or reaped)", cid);
+					/*
+					 * release any other coalition's we
+					 * may have a reference to
+					 */
+					spawn_coalitions_release_all(coal);
+					error = ESRCH;
+					goto bad;
+				}
+				if (coalition_type(coal[i]) != i) {
+					coal_dbg("coalition with id:%lld is not of type:%d"
+						 " (it's type:%d)", cid, i, coalition_type(coal[i]));
+					error = ESRCH;
+					goto bad;
+				}
+				coal_role[i] = coal_info.psci_info[i].psci_role;
+				ncoals++;
+			}
+		}
+		if (ncoals < COALITION_NUM_TYPES) {
+			/*
+			 * If the user is attempting to spawn into a subset of
+			 * the known coalition types, then make sure they have
+			 * _at_least_ specified a resource coalition. If not,
+			 * the following fork1() call will implicitly force an
+			 * inheritance from 'p' and won't actually spawn the
+			 * new task into the coalitions the user specified.
+			 * (also the call to coalitions_set_roles will panic)
+			 */
+			if (coal[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
+				spawn_coalitions_release_all(coal);
+				error = EINVAL;
+				goto bad;
+			}
+		}
+do_fork1:
+#endif /* CONFIG_COALITIONS */
+
+		/*
+		 * note that this will implicitly inherit the
+		 * caller's persona (if it exists)
+		 */
+		error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN, coal);
+
+#if CONFIG_COALITIONS
+		/* set the roles of this task within each given coalition */
+		if (error == 0) {
+			kr = coalitions_set_roles(coal, get_threadtask(imgp->ip_new_thread), coal_role);
+			if (kr != KERN_SUCCESS)
+				error = EINVAL;
+		}
+
+		/* drop our references and activations - fork1() now holds them */
+		spawn_coalitions_release_all(coal);
+#endif /* CONFIG_COALITIONS */
+		if (error != 0) {
 			goto bad;
+		}
 		imgp->ip_flags |= IMGPF_SPAWN;	/* spawn w/o exec */
 		spawn_no_exec = TRUE;		/* used in later tests */
-		DTRACE_PROC1(create, proc_t, p);
+
+#if CONFIG_PERSONAS
+		/*
+		 * If the parent isn't in a persona (launchd), and
+		 * hasn't specified a new persona for the process,
+		 * then we'll put the process into the system persona
+		 *
+		 * TODO: this will have to be re-worked because as of
+		 *       now, without any launchd adoption, the resulting
+		 *       xpcproxy process will not have sufficient
+		 *       privileges to setuid/gid.
+		 */
+#if 0
+		if (!proc_has_persona(p) && imgp->ip_px_persona == NULL) {
+			MALLOC(px_persona, struct _posix_spawn_persona_info *,
+			       sizeof(*px_persona), M_TEMP, M_WAITOK|M_ZERO);
+			if (px_persona == NULL) {
+				error = ENOMEM;
+				goto bad;
+			}
+			px_persona->pspi_id = persona_get_id(g_system_persona);
+			imgp->ip_px_persona = px_persona;
+		}
+#endif /* 0 */
+#endif /* CONFIG_PERSONAS */
 	}
 
-	if (spawn_no_exec)
+	if (spawn_no_exec) {
 		p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread);
+		
+		/*
+		 * We had to wait until this point before firing the
+		 * proc:::create probe, otherwise p would not point to the
+		 * child process.
+		 */
+		DTRACE_PROC1(create, proc_t, p);
+	}
 	assert(p != NULL);
 
 	/* By default, the thread everyone plays with is the parent */
@@ -2007,6 +2522,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 	/* Has spawn port actions? */
 	if (imgp->ip_px_spa != NULL) {
 		boolean_t is_adaptive = FALSE;
+		boolean_t portwatch_present = FALSE;
 
 		/* Will this process become adaptive? The apptype isn't ready yet, so we can't look there. */
 		if (imgp->ip_px_sa != NULL && px_sa.psa_apptype == POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE)
@@ -2025,8 +2541,14 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 		}
 
 		if ((error = exec_handle_port_actions(imgp,
-		    imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0, &need_portwatch, portwatch_ports)) != 0) 
+		    imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0, &portwatch_present, portwatch_ports)) != 0) 
 			goto bad;
+
+		if (portwatch_present == FALSE && portwatch_ports != NULL) {
+			FREE(portwatch_ports, M_TEMP);
+			portwatch_ports = NULL;
+			portwatch_count = 0;
+		}
 	}
 
 	/* Has spawn attr? */
@@ -2070,16 +2592,35 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 			}
 		}
 
+#if CONFIG_PERSONAS
+		if (spawn_no_exec && imgp->ip_px_persona != NULL) {
+			/*
+			 * If we were asked to spawn a process into a new persona,
+			 * do the credential switch now (which may override the UID/GID
+			 * inherit done just above). It's important to do this switch
+			 * before image activation both for reasons stated above, and
+			 * to ensure that the new persona has access to the image/file
+			 * being executed.
+			 */
+			error = spawn_persona_adopt(p, imgp->ip_px_persona);
+			if (error != 0)
+				goto bad;
+		}
+#endif /* CONFIG_PERSONAS */
+#if !SECURE_KERNEL
 		/*
 		 * Disable ASLR for the spawned process.
-		 */
-		/*
-		 * But only do so if we are not embedded; embedded allows for a
-		 * boot-arg (-disable_aslr) to deal with this (which itself is
-		 * only honored on DEVELOPMENT or DEBUG builds of xnu).
+		 *
+		 * But only do so if we are not embedded + RELEASE.
+		 * While embedded allows for a boot-arg (-disable_aslr)
+		 * to deal with this (which itself is only honored on
+		 * DEVELOPMENT or DEBUG builds of xnu), it is often
+		 * useful or necessary to disable ASLR on a per-process
+		 * basis for unit testing and debugging.
 		 */
 		if (px_sa.psa_flags & _POSIX_SPAWN_DISABLE_ASLR)
 			OSBitOrAtomic(P_DISABLE_ASLR, &p->p_flag);
+#endif /* !SECURE_KERNEL */
 
 		/*
 		 * Forcibly disallow execution from data pages for the spawned process
@@ -2120,7 +2661,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 	 * Activate the image
 	 */
 	error = exec_activate_image(imgp);
-
+	
 	if (error == 0) {
 		/* process completed the exec */
 		exec_done = TRUE;
@@ -2189,56 +2730,17 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 			 * Always treat a CPU monitor activation coming from spawn as entitled. Requiring
 			 * an entitlement to configure the monitor a certain way seems silly, since
 			 * whomever is turning it on could just as easily choose not to do so.
-			 *
-			 * XXX - Ignore the parameters that we get from userland. The spawnattr method of
-			 * activating the monitor always gets the system default parameters. Once we have
-			 * an explicit spawn SPI for configuring the defaults, we can revert this to
-			 * respect the params passed in from userland.
 			 */
 			error = proc_set_task_ruse_cpu(p->task,
 					TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC,
-					PROC_POLICY_CPUMON_DEFAULTS, 0,
+					px_sa.psa_cpumonitor_percent,
+					px_sa.psa_cpumonitor_interval * NSEC_PER_SEC,
 					0, TRUE);
 		}
 	}
 
 bad:
 
-	if (portwatch_ports != NULL) {
-		int needboost = 0;
-
-		/*
-		 * Mark the ports as destined to be handed off to the new task, and
-		 * transfer any boosts to the new task.
-		 * We need to release the rights even if the posix_spawn has failed.
-		 */
-		if (need_portwatch != 0) {
-			for (int i = 0; i < portwatch_count; i++) {
-				ipc_port_t port = NULL;
-				
-				if ((port = portwatch_ports[i]) != NULL) {
-					int boost = 0;
-					if (error == 0)
-						task_add_importance_watchport(p->task, p->p_pid, port, &boost);
-					ipc_port_release_send(port);
-					needboost += boost;
-				}
-			}
-		}
-
-		if (needboost != 0) {			
-			/*
-			 * Apply the boost count found on the ports, which will keep the
-			 * newly created process out of background until it handles the incoming messages.
-			 */
-			task_hold_multiple_assertion(p->task, needboost);
-		}
-
-		FREE(portwatch_ports, M_TEMP);
-		portwatch_ports = NULL;
-		portwatch_count = 0;
-	}
-
 	if (error == 0) {
 		/* reset delay idle sleep status if set */
 		if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP)
@@ -2266,10 +2768,36 @@ bad:
 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
 		/* Has jetsam attributes? */
 		if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_SET)) {
-			memorystatus_update(p, px_sa.psa_priority, 0, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
-			    TRUE, px_sa.psa_high_water_mark, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND));
+			/*
+			 * With 2-level high-water-mark support, POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is no
+			 * longer relevant, as background limits are described via the inactive limit slots.
+			 * At the kernel layer, the flag is ignored.
+			 *
+			 * That said, however, if the POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is passed in,
+			 * we attempt to mimic previous behavior by forcing the BG limit data into the
+			 * inactive/non-fatal mode and force the active slots to hold system_wide/fatal mode.
+			 * The kernel layer will flag this mapping.
+			 */
+			if (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND) {
+				memorystatus_update(p, px_sa.psa_priority, 0,
+					    (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
+					    TRUE,
+					    -1, TRUE,
+					    px_sa.psa_memlimit_inactive, FALSE,
+					    (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND));
+			} else {
+				memorystatus_update(p, px_sa.psa_priority, 0,
+					    (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
+					    TRUE,
+					    px_sa.psa_memlimit_active,
+					    (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_ACTIVE_FATAL),
+					    px_sa.psa_memlimit_inactive,
+					    (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_INACTIVE_FATAL),
+					    (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND));
+			}
+
 		}
-#endif
+#endif /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM*/
 	}
 
 	/*
@@ -2297,6 +2825,64 @@ bad:
 		/* notify only if it has not failed due to FP Key error */
 		if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)
 			proc_knote(p, NOTE_EXEC);
+	} else if (error == 0) {
+		/* reset the importance attribute from our previous life */
+		task_importance_reset(p->task);
+	}
+
+	if (error == 0) {
+		/*
+		 * We need to initialize the bank context behind the protection of
+		 * the proc_trans lock to prevent a race with exit. We can't do this during
+		 * exec_activate_image because task_bank_init checks entitlements that
+		 * aren't loaded until subsequent calls (including exec_resettextvp).
+		 */
+		error = proc_transstart(p, 0, 0);
+
+		if (error == 0) {
+			task_bank_init(p->task);
+			proc_transend(p, 0);
+		}
+	}
+
+
+	/*
+	 * Apply the spawnattr policy, apptype (which primes the task for importance donation),
+	 * and bind any portwatch ports to the new task.
+	 * This must be done after the exec so that the child's thread is ready,
+	 * and after the in transit state has been released, because priority is
+	 * dropped here so we need to be prepared for a potentially long preemption interval
+	 *
+	 * TODO: Consider splitting this up into separate phases
+	 */
+	if (error == 0 && imgp->ip_px_sa != NULL) {
+		struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
+
+		exec_handle_spawnattr_policy(p, psa->psa_apptype, psa->psa_qos_clamp, psa->psa_darwin_role,
+		                              portwatch_ports, portwatch_count);
+	}
+
+	/* Apply the main thread qos */
+	if (error == 0) {
+		thread_t main_thread = (imgp->ip_new_thread != NULL) ? imgp->ip_new_thread : current_thread();
+
+		task_set_main_thread_qos(p->task, main_thread);
+	}
+
+	/*
+	 * Release any ports we kept around for binding to the new task
+	 * We need to release the rights even if the posix_spawn has failed.
+	 */
+	if (portwatch_ports != NULL) {
+		for (int i = 0; i < portwatch_count; i++) {
+			ipc_port_t port = NULL;
+			if ((port = portwatch_ports[i]) != NULL) {
+				ipc_port_release_send(port);
+			}
+		}
+		FREE(portwatch_ports, M_TEMP);
+		portwatch_ports = NULL;
+		portwatch_count = 0;
 	}
 
 	/*
@@ -2329,7 +2915,10 @@ bad:
 			FREE(imgp->ip_px_sfa, M_TEMP);
 		if (imgp->ip_px_spa != NULL)
 			FREE(imgp->ip_px_spa, M_TEMP);
-		
+#if CONFIG_PERSONAS
+		if (imgp->ip_px_persona != NULL)
+			FREE(imgp->ip_px_persona, M_TEMP);
+#endif
 #if CONFIG_MACF
 		if (imgp->ip_px_smpx != NULL)
 			spawn_free_macpolicyinfo(imgp->ip_px_smpx);
@@ -2377,6 +2966,9 @@ bad:
 			DTRACE_PROC(exec__success);
 		}
 	}
+
+	if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL)
+		(*dtrace_proc_waitfor_hook)(p);
 #endif
 
 	/* Return to both the parent and the child? */
@@ -2402,6 +2994,7 @@ bad:
 				p->exit_thread = current_thread();
 				proc_unlock(p);
 				exit1(p, 1, (int *)NULL);
+				proc_clear_return_wait(p, imgp->ip_new_thread);
 				if (exec_done == FALSE) {
 					task_deallocate(get_threadtask(imgp->ip_new_thread));
 					thread_deallocate(imgp->ip_new_thread);
@@ -2409,6 +3002,7 @@ bad:
 			} else {
 				/* someone is doing it for us; just skip it */
 				proc_unlock(p);
+				proc_clear_return_wait(p, imgp->ip_new_thread);
 			}
 		} else {
 
@@ -2421,7 +3015,7 @@ bad:
 			 * queue references on them, so we should be fine
 			 * with the delayed resume of the thread here.
 			 */
-			(void)thread_resume(imgp->ip_new_thread);
+			proc_clear_return_wait(p, imgp->ip_new_thread);
 		}
 	}
 	if (bufp != NULL) {
@@ -2505,9 +3099,9 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval)
 	struct vnode_attr *vap;
 	struct vnode_attr *origvap;
 	int error;
-	char alt_p_comm[sizeof(p->p_comm)] = {0};	/* for PowerPC */
 	int is_64 = IS_64BIT_PROCESS(p);
 	struct vfs_context context;
+	struct uthread	*uthread;
 
 	context.vc_thread = current_thread();
 	context.vc_ucred = kauth_cred_proc_ref(p);	/* XXX must NOT be kauth_cred_get() */
@@ -2532,8 +3126,13 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval)
 	imgp->ip_origvattr = origvap;
 	imgp->ip_vfs_context = &context;
 	imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE) | ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE);
-	imgp->ip_p_comm = alt_p_comm;		/* for PowerPC */
 	imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
+	imgp->ip_mac_return = 0;
+
+	uthread = get_bsdthread_info(current_thread());
+	if (uthread->uu_flag & UT_VFORK) {
+		imgp->ip_flags |= IMGPF_VFORK_EXEC;
+	}
 
 #if CONFIG_MACF
 	if (uap->mac_p != USER_ADDR_NULL) {
@@ -2569,17 +3168,43 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval)
 	if (imgp->ip_scriptlabelp)
 		mac_vnode_label_free(imgp->ip_scriptlabelp);
 #endif
+
 	if (!error) {
-		struct uthread	*uthread;
+		/*
+		 * We need to initialize the bank context behind the protection of
+		 * the proc_trans lock to prevent a race with exit. We can't do this during
+		 * exec_activate_image because task_bank_init checks entitlements that
+		 * aren't loaded until subsequent calls (including exec_resettextvp).
+		 */
+		error = proc_transstart(p, 0, 0);
+
+		if (!error) {
+			task_bank_init(p->task);
+			proc_transend(p, 0);
+		}
+	}
 
+	if (!error) {
 		/* Sever any extant thread affinity */
 		thread_affinity_exec(current_thread());
 
+		thread_t main_thread = (imgp->ip_new_thread != NULL) ? imgp->ip_new_thread : current_thread();
+
+		task_set_main_thread_qos(p->task, main_thread);
+
+		/* reset task importance */
+		task_importance_reset(p->task);
+
 		DTRACE_PROC(exec__success);
-		uthread = get_bsdthread_info(current_thread());
-		if (uthread->uu_flag & UT_VFORK) {
+
+#if CONFIG_DTRACE
+		if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL)
+			(*dtrace_proc_waitfor_hook)(p);
+#endif
+
+		if (imgp->ip_flags & IMGPF_VFORK_EXEC) {
 			vfork_return(p, retval, p->p_pid);
-			(void)thread_resume(imgp->ip_new_thread);
+			proc_clear_return_wait(p, imgp->ip_new_thread);
 		}
 	} else {
 		DTRACE_PROC1(exec__failure, int, error);
@@ -3227,17 +3852,17 @@ exec_add_apple_strings(struct image_params *imgp)
 
 	/* adding the NANO_ENGAGE_KEY key */
 	if (imgp->ip_px_sa) {
-	    int proc_type = (((struct _posix_spawnattr *) imgp->ip_px_sa)->psa_apptype) & POSIX_SPAWN_PROC_TYPE_MASK;
+		int proc_flags = (((struct _posix_spawnattr *) imgp->ip_px_sa)->psa_flags);
 
-	    if (proc_type == POSIX_SPAWN_PROC_TYPE_APP_DEFAULT || proc_type == POSIX_SPAWN_PROC_TYPE_APP_TAL) {
-		char uiapp_string[strlen(NANO_ENGAGE_KEY) + 1];
+		if ((proc_flags & _POSIX_SPAWN_NANO_ALLOCATOR) == _POSIX_SPAWN_NANO_ALLOCATOR) {
+			char uiapp_string[strlen(NANO_ENGAGE_KEY) + 1];
 
-		snprintf(uiapp_string, sizeof(uiapp_string), NANO_ENGAGE_KEY);
-		error = exec_add_user_string(imgp, CAST_USER_ADDR_T(uiapp_string),UIO_SYSSPACE,FALSE);
-		if(error)
-			goto bad;
-		imgp->ip_applec++;
-	    }
+			snprintf(uiapp_string, sizeof(uiapp_string), NANO_ENGAGE_KEY);
+			error = exec_add_user_string(imgp, CAST_USER_ADDR_T(uiapp_string),UIO_SYSSPACE,FALSE);
+			if (error)
+				goto bad;
+			imgp->ip_applec++;
+		}
 	}
 
 	/*
@@ -3419,9 +4044,11 @@ exec_handle_sugid(struct image_params *imgp)
 	proc_t			p = vfs_context_proc(imgp->ip_vfs_context);
 	int			i;
 	int			leave_sugid_clear = 0;
+	int			mac_reset_ipc = 0;
 	int			error = 0;
 #if CONFIG_MACF
-	int			mac_transition;
+	int			mac_transition, disjoint_cred = 0;
+	int 		label_update_return = 0;
 
 	/*
 	 * Determine whether a call to update the MAC label will result in the
@@ -3435,6 +4062,7 @@ exec_handle_sugid(struct image_params *imgp)
 	mac_transition = mac_cred_check_label_update_execve(
 							imgp->ip_vfs_context,
 							imgp->ip_vp,
+							imgp->ip_arch_offset,
 							imgp->ip_scriptvp,
 							imgp->ip_scriptlabelp,
 							imgp->ip_execlabelp,
@@ -3504,14 +4132,27 @@ handle_mac_transition:
 		 * modifying any others sharing it.
 		 */
 		if (mac_transition) { 
-			kauth_cred_t	my_cred;
-			if (kauth_proc_label_update_execve(p,
+			/*
+			 * This hook may generate upcalls that require
+			 * importance donation from the kernel.
+			 * (23925818)
+			 */
+			thread_t thread = current_thread();
+			thread_enable_send_importance(thread, TRUE);
+			kauth_proc_label_update_execve(p,
 						imgp->ip_vfs_context,
 						imgp->ip_vp, 
+						imgp->ip_arch_offset,
 						imgp->ip_scriptvp,
 						imgp->ip_scriptlabelp,
 						imgp->ip_execlabelp,
-						imgp->ip_px_smpx)) {
+						&imgp->ip_csflags,
+						imgp->ip_px_smpx,
+						&disjoint_cred, /* will be non zero if disjoint */
+						&label_update_return);
+			thread_enable_send_importance(thread, FALSE);
+
+			if (disjoint_cred) {
 				/*
 				 * If updating the MAC label resulted in a
 				 * disjoint credential, flag that we need to
@@ -3523,11 +4164,12 @@ handle_mac_transition:
 				 */
 				leave_sugid_clear = 0;
 			}
-
-			my_cred = kauth_cred_proc_ref(p);
-			mac_task_label_update_cred(my_cred, p->task);
-			kauth_cred_unref(&my_cred);
+			
+			imgp->ip_mac_return = label_update_return;
 		}
+		
+		mac_reset_ipc = mac_proc_check_inherit_ipc_ports(p, p->p_textvp, p->p_textoff, imgp->ip_vp, imgp->ip_arch_offset, imgp->ip_scriptvp);
+
 #endif	/* CONFIG_MACF */
 
 		/*
@@ -3539,17 +4181,19 @@ handle_mac_transition:
 		 * So we don't set the P_SUGID or reset mach ports and fds 
 		 * on the basis of simply running this code.
 		 */
-		if (!leave_sugid_clear) {
+		if (mac_reset_ipc || !leave_sugid_clear) {
 			/*
-		 	 * Have mach reset the task and thread ports.
-		 	 * We don't want anyone who had the ports before
-		 	 * a setuid exec to be able to access/control the
-		 	 * task/thread after.
-		 	 */
+			 * Have mach reset the task and thread ports.
+			 * We don't want anyone who had the ports before
+			 * a setuid exec to be able to access/control the
+			 * task/thread after.
+			 */
 			ipc_task_reset(p->task);
 			ipc_thread_reset((imgp->ip_new_thread != NULL) ?
 				 	 imgp->ip_new_thread : current_thread());
+		}
 
+		if (!leave_sugid_clear) {
 			/*
 			 * Flag the process as setuid.
 			 */
@@ -3580,6 +4224,7 @@ handle_mac_transition:
 				struct fileproc *fp;
 				int indx;
 				int flag;
+				struct nameidata *ndp = NULL;
 
 				if (i == 0)
 					flag = FREAD;
@@ -3590,13 +4235,17 @@ handle_mac_transition:
 				    &fp, &indx, imgp->ip_vfs_context)) != 0)
 					continue;
 
-				struct nameidata nd1;
+				MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
+				if (ndp == NULL) {
+					error = ENOMEM;
+					break;
+				}
 
-				NDINIT(&nd1, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE,
+				NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE,
 				    CAST_USER_ADDR_T("/dev/null"),
 				    imgp->ip_vfs_context);
 
-				if ((error = vn_open(&nd1, flag, 0)) != 0) {
+				if ((error = vn_open(ndp, flag, 0)) != 0) {
 					fp_free(p, indx, fp);
 					break;
 				}
@@ -3605,14 +4254,16 @@ handle_mac_transition:
 
 				fg->fg_flag = flag;
 				fg->fg_ops = &vnops;
-				fg->fg_data = nd1.ni_vp;
+				fg->fg_data = ndp->ni_vp;
 
-				vnode_put(nd1.ni_vp);
+				vnode_put(ndp->ni_vp);
 
 				proc_fdlock(p);
 				procfdtbl_releasefd(p, indx, NULL);
 				fp_drop(p, indx, fp, 1);
 				proc_fdunlock(p);
+
+				FREE(ndp, M_TEMP);
 			}
 		}
 	}
@@ -3630,6 +4281,7 @@ handle_mac_transition:
 			goto handle_mac_transition;
 		}
 	}
+
 #endif	/* CONFIG_MACF */
 
 	/*
@@ -3737,66 +4389,68 @@ create_unix_stack(vm_map_t map, load_result_t* load_result,
 
 #include <sys/reboot.h>
 
-static char		init_program_name[128] = "/sbin/launchd";
-
-struct execve_args	init_exec_args;
-
 /*
- * load_init_program
+ * load_init_program_at_path
  *
  * Description:	Load the "init" program; in most cases, this will be "launchd"
  *
  * Parameters:	p			Process to call execve() to create
  *					the "init" program
+ *		scratch_addr		Page in p, scratch space
+ *		path			NULL terminated path
  *
- * Returns:	(void)
+ * Returns:	KERN_SUCCESS		Success
+ *		!KERN_SUCCESS 		See execve/mac_execve for error codes
  *
  * Notes:	The process that is passed in is the first manufactured
  *		process on the system, and gets here via bsd_ast() firing
  *		for the first time.  This is done to ensure that bsd_init()
  *		has run to completion.
+ *
+ *		The address map of the first manufactured process is 32 bit.
+ *		WHEN this becomes 64b, this code will fail; it needs to be
+ *		made 64b capable.
  */
-void
-load_init_program(proc_t p)
+static int
+load_init_program_at_path(proc_t p, user_addr_t scratch_addr, const char* path)
 {
-	vm_offset_t	init_addr;
-	int		argc = 0;
 	uint32_t argv[3];
-	int			error;
-	int 		retval[2];
+ 	uint32_t argc = 0;
+	int retval[2];
+	struct execve_args init_exec_args;
 
 	/*
-	 * Copy out program name.
+	 * Validate inputs and pre-conditions
 	 */
+	assert(p);
+	assert(scratch_addr);
+	assert(path);
 
-	init_addr = VM_MIN_ADDRESS;
-	(void) vm_allocate(current_map(), &init_addr, PAGE_SIZE,
-				VM_FLAGS_ANYWHERE);
-	if (init_addr == 0)
-		init_addr++;
+	if (IS_64BIT_PROCESS(p)) {
+		panic("Init against 64b primordial proc not implemented");
+	}
 
-	(void) copyout((caddr_t) init_program_name, CAST_USER_ADDR_T(init_addr),
-			(unsigned) sizeof(init_program_name)+1);
+	/*
+	 * Copy out program name.
+	 */
+	size_t path_length = strlen(path) + 1;
+	(void) copyout(path, scratch_addr, path_length);
 
-	argv[argc++] = (uint32_t)init_addr;
-	init_addr += sizeof(init_program_name);
-	init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
+	argv[argc++] = (uint32_t)scratch_addr;
+	scratch_addr = USER_ADDR_ALIGN(scratch_addr + path_length, 16);
 
 	/*
 	 * Put out first (and only) argument, similarly.
-	 * Assumes everything fits in a page as allocated
-	 * above.
+	 * Assumes everything fits in a page as allocated above.
 	 */
 	if (boothowto & RB_SINGLE) {
 		const char *init_args = "-s";
+		size_t init_args_length = strlen(init_args)+1;
 
-		copyout(init_args, CAST_USER_ADDR_T(init_addr),
-			strlen(init_args));
-
-		argv[argc++] = (uint32_t)init_addr;
-		init_addr += strlen(init_args);
-		init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
+		copyout(init_args, scratch_addr, init_args_length);
 
+		argv[argc++] = (uint32_t)scratch_addr;
+		scratch_addr = USER_ADDR_ALIGN(scratch_addr + init_args_length, 16);
 	}
 
 	/*
@@ -3807,27 +4461,113 @@ load_init_program(proc_t p)
 	/*
 	 * Copy out the argument list.
 	 */
-	
-	(void) copyout((caddr_t) argv, CAST_USER_ADDR_T(init_addr),
-			(unsigned) sizeof(argv));
+	(void) copyout(argv, scratch_addr, sizeof(argv));
 
 	/*
 	 * Set up argument block for fake call to execve.
 	 */
-
 	init_exec_args.fname = CAST_USER_ADDR_T(argv[0]);
-	init_exec_args.argp = CAST_USER_ADDR_T((char **)init_addr);
-	init_exec_args.envp = CAST_USER_ADDR_T(0);
-	
+	init_exec_args.argp = scratch_addr;
+	init_exec_args.envp = USER_ADDR_NULL;
+
 	/*
-	 * So that mach_init task is set with uid,gid 0 token 
+	 * So that init task is set with uid,gid 0 token
 	 */
 	set_security_token(p);
 
-	error = execve(p,&init_exec_args,retval);
-	if (error)
-		panic("Process 1 exec of %s failed, errno %d",
-		      init_program_name, error);
+	return execve(p, &init_exec_args, retval);
+}
+
+static const char * init_programs[] = {
+#if DEBUG
+	"/usr/local/sbin/launchd.debug",
+#endif
+#if DEVELOPMENT || DEBUG
+	/* Remove DEBUG conditional when <rdar://problem/17931977> is fixed */
+	"/usr/local/sbin/launchd.development",
+#endif
+	"/sbin/launchd",
+};
+
+/*
+ * load_init_program
+ *
+ * Description:	Load the "init" program; in most cases, this will be "launchd"
+ *
+ * Parameters:	p			Process to call execve() to create
+ *					the "init" program
+ *
+ * Returns:	(void)
+ *
+ * Notes:	The process that is passed in is the first manufactured
+ *		process on the system, and gets here via bsd_ast() firing
+ *		for the first time.  This is done to ensure that bsd_init()
+ *		has run to completion.
+ *
+ *		In DEBUG & DEVELOPMENT builds, the launchdsuffix boot-arg
+ *		may be used to select a specific launchd executable. As with
+ *		the kcsuffix boot-arg, setting launchdsuffix to "" or "release"
+ *		will force /sbin/launchd to be selected.
+ *
+ *		The DEBUG kernel will continue to check for a .development
+ *		version until <rdar://problem/17931977> is fixed.
+ *
+ *              Search order by build:
+ *
+ * DEBUG	DEVELOPMENT	RELEASE		PATH
+ * ----------------------------------------------------------------------------------
+ * 1		1		NA		/usr/local/sbin/launchd.$LAUNCHDSUFFIX
+ * 2		NA		NA		/usr/local/sbin/launchd.debug
+ * 3		2		NA		/usr/local/sbin/launchd.development
+ * 4		3		1		/sbin/launchd
+ */
+void
+load_init_program(proc_t p)
+{
+	uint32_t i;
+	int error;
+	vm_offset_t scratch_addr = VM_MIN_ADDRESS;
+
+	(void) vm_allocate(current_map(), &scratch_addr, PAGE_SIZE, VM_FLAGS_ANYWHERE);
+#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
+	(void) memorystatus_init_at_boot_snapshot();
+#endif /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
+
+#if DEBUG || DEVELOPMENT
+	/* Check for boot-arg suffix first */
+	char launchd_suffix[64];
+	if (PE_parse_boot_argn("launchdsuffix", launchd_suffix, sizeof(launchd_suffix))) {
+		char launchd_path[128];
+		boolean_t is_release_suffix = ((launchd_suffix[0] == 0) ||
+					       (strcmp(launchd_suffix, "release") == 0));
+
+		if (is_release_suffix) {
+			error = load_init_program_at_path(p, CAST_USER_ADDR_T(scratch_addr), "/sbin/launchd");
+			if (!error)
+				return;
+
+			panic("Process 1 exec of launchd.release failed, errno %d", error);
+		} else {
+			strlcpy(launchd_path, "/usr/local/sbin/launchd.", sizeof(launchd_path));
+			strlcat(launchd_path, launchd_suffix, sizeof(launchd_path));
+
+			/* All the error data is lost in the loop below, don't
+			 * attempt to save it. */
+			if (!load_init_program_at_path(p, CAST_USER_ADDR_T(scratch_addr), launchd_path)) {
+				return;
+			}
+		}
+	}
+#endif
+
+	error = ENOENT;
+	for (i = 0; i < sizeof(init_programs)/sizeof(init_programs[0]); i++) {
+		error = load_init_program_at_path(p, CAST_USER_ADDR_T(scratch_addr), init_programs[i]);
+		if (!error)
+			return;
+	}
+
+	panic("Process 1 exec of %s failed, errno %d", ((i == 0) ? "<null>" : init_programs[i-1]), error);
 }
 
 /*
@@ -4119,6 +4859,16 @@ taskgated_required(proc_t p, boolean_t *require_success)
 	void *blob;
 	int error;
 
+	if (cs_debug > 2)
+		csvnode_print_debug(p->p_textvp);
+
+	const int can_skip_taskgated = csproc_get_platform_binary(p) && !csproc_get_platform_path(p);
+	if (can_skip_taskgated) {
+		if (cs_debug) printf("taskgated not required for: %s\n", p->p_name);
+		*require_success = FALSE;
+		return FALSE;
+	}
+
 	if ((p->p_csflags & CS_VALID) == 0) {
 		*require_success = FALSE;
 		return TRUE;
@@ -4126,14 +4876,41 @@ taskgated_required(proc_t p, boolean_t *require_success)
 
 	error = cs_entitlements_blob_get(p, &blob, &length);
 	if (error == 0 && blob != NULL) {
-		*require_success = TRUE; /* fatal on the desktop when entitlements are present */
+		/*
+		 * fatal on the desktop when entitlements are present,
+		 * unless we started in single-user mode 
+		 */
+		if ((boothowto & RB_SINGLE) == 0)
+			*require_success = TRUE;
+		/*
+		 * Allow initproc to run without causing taskgated to launch
+		 */
+		if (p == initproc) {
+			*require_success = FALSE;
+			return FALSE;
+		}
+
+		if (cs_debug) printf("taskgated required for: %s\n", p->p_name);
+
 		return TRUE;
 	}
 
 	*require_success = FALSE;
-	return 0;
+	return FALSE;
 }
 
+/*
+ * __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__
+ * 
+ * Description: Waits for the userspace daemon to respond to the request
+ * 		we made. Function declared non inline to be visible in
+ *		stackshots and spindumps as well as debugging.
+ */
+__attribute__((noinline)) int 
+__EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid)
+{
+	return find_code_signature(task_access_port, new_pid);
+}
 
 static int
 check_for_signature(proc_t p, struct image_params *imgp)
@@ -4144,6 +4921,8 @@ check_for_signature(proc_t p, struct image_params *imgp)
 	boolean_t unexpected_failure = FALSE;
 	unsigned char hash[SHA1_RESULTLEN];
 	boolean_t require_success = FALSE;
+	int spawn = (imgp->ip_flags & IMGPF_SPAWN);
+	int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
 
 	/*
 	 * Override inherited code signing flags with the
@@ -4158,6 +4937,17 @@ check_for_signature(proc_t p, struct image_params *imgp)
 	if(p->p_csflags & (CS_HARD|CS_KILL)) {
 		vm_map_switch_protect(get_task_map(p->task), TRUE);
 	}
+	
+	/*
+	 * image activation may be failed due to policy
+	 * which is unexpected but security framework does not
+	 * approve of exec, kill and return immediately.
+	 */
+	if (imgp->ip_mac_return != 0) {
+		error = imgp->ip_mac_return;
+		unexpected_failure = TRUE;
+		goto done;
+	}
 
 	/* check if callout to taskgated is needed */
 	if (!taskgated_required(p, &require_success)) {
@@ -4181,7 +4971,7 @@ check_for_signature(proc_t p, struct image_params *imgp)
 	 * rpc call, taskgated died, mig server died etc.).
 	 */
 
-	kr = find_code_signature(port, p->p_pid);
+	kr = __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(port, p->p_pid);
 	switch (kr) {
 	case KERN_SUCCESS:
 		error = 0;
@@ -4213,7 +5003,12 @@ done:
 		if (!unexpected_failure)
 			p->p_csflags |= CS_KILLED;
 		/* make very sure execution fails */
-		psignal(p, SIGKILL);
+		if (vfexec || spawn) {
+			psignal_vfork(p, p->task, imgp->ip_new_thread, SIGKILL);
+			error = 0;
+		} else {
+			psignal(p, SIGKILL);
+		}
 	}
 	return error;
 }