git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2019 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
	29	/*
	30	* Mach Operating System
	31	* Copyright (c) 1987 Carnegie-Mellon University
	32	* All rights reserved. The CMU software License Agreement specifies
	33	* the terms and conditions for use and redistribution.
	34	*/
	35
	36	/*-
	37	* Copyright (c) 1982, 1986, 1991, 1993
	38	* The Regents of the University of California. All rights reserved.
	39	* (c) UNIX System Laboratories, Inc.
	40	* All or some portions of this file are derived from material licensed
	41	* to the University of California by American Telephone and Telegraph
	42	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
	43	* the permission of UNIX System Laboratories, Inc.
	44	*
	45	* Redistribution and use in source and binary forms, with or without
	46	* modification, are permitted provided that the following conditions
	47	* are met:
	48	* 1. Redistributions of source code must retain the above copyright
	49	* notice, this list of conditions and the following disclaimer.
	50	* 2. Redistributions in binary form must reproduce the above copyright
	51	* notice, this list of conditions and the following disclaimer in the
	52	* documentation and/or other materials provided with the distribution.
	53	* 3. All advertising materials mentioning features or use of this software
	54	* must display the following acknowledgement:
	55	* This product includes software developed by the University of
	56	* California, Berkeley and its contributors.
	57	* 4. Neither the name of the University nor the names of its contributors
	58	* may be used to endorse or promote products derived from this software
	59	* without specific prior written permission.
	60	*
	61	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	62	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	63	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	64	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	65	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	66	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	67	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	68	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	69	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	70	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	71	* SUCH DAMAGE.
	72	*
	73	* from: @(#)kern_exec.c 8.1 (Berkeley) 6/10/93
	74	*/
	75	/*
	76	* NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
	77	* support for mandatory and extensible security protections. This notice
	78	* is included in support of clause 2.2 (b) of the Apple Public License,
	79	* Version 2.0.
	80	*/
	81	#include <machine/reg.h>
	82	#include <machine/cpu_capabilities.h>
	83
	84	#include <sys/param.h>
	85	#include <sys/systm.h>
	86	#include <sys/filedesc.h>
	87	#include <sys/kernel.h>
	88	#include <sys/proc_internal.h>
	89	#include <sys/kauth.h>
	90	#include <sys/user.h>
	91	#include <sys/socketvar.h>
	92	#include <sys/malloc.h>
	93	#include <sys/namei.h>
	94	#include <sys/mount_internal.h>
	95	#include <sys/vnode_internal.h>
	96	#include <sys/file_internal.h>
	97	#include <sys/stat.h>
	98	#include <sys/uio_internal.h>
	99	#include <sys/acct.h>
	100	#include <sys/exec.h>
	101	#include <sys/kdebug.h>
	102	#include <sys/signal.h>
	103	#include <sys/aio_kern.h>
	104	#include <sys/sysproto.h>
	105	#include <sys/persona.h>
	106	#include <sys/reason.h>
	107	#if SYSV_SHM
	108	#include <sys/shm_internal.h> /* shmexec() */
	109	#endif
	110	#include <sys/ubc_internal.h> /* ubc_map() */
	111	#include <sys/spawn.h>
	112	#include <sys/spawn_internal.h>
	113	#include <sys/process_policy.h>
	114	#include <sys/codesign.h>
	115	#include <sys/random.h>
	116	#include <crypto/sha1.h>
	117
	118	#include <libkern/libkern.h>
	119
	120	#include <security/audit/audit.h>
	121
	122	#include <ipc/ipc_types.h>
	123
	124	#include <mach/mach_param.h>
	125	#include <mach/mach_types.h>
	126	#include <mach/port.h>
	127	#include <mach/task.h>
	128	#include <mach/task_access.h>
	129	#include <mach/thread_act.h>
	130	#include <mach/vm_map.h>
	131	#include <mach/mach_vm.h>
	132	#include <mach/vm_param.h>
	133
	134	#include <kern/sched_prim.h> /* thread_wakeup() */
	135	#include <kern/affinity.h>
	136	#include <kern/assert.h>
	137	#include <kern/task.h>
	138	#include <kern/coalition.h>
	139	#include <kern/policy_internal.h>
	140	#include <kern/kalloc.h>
	141
	142	#include <os/log.h>
	143
	144	#if CONFIG_MACF
	145	#include <security/mac_framework.h>
	146	#include <security/mac_mach_internal.h>
	147	#endif
	148
	149	#if CONFIG_AUDIT
	150	#include <bsm/audit_kevents.h>
	151	#endif
	152
	153	#if CONFIG_ARCADE
	154	#include <kern/arcade.h>
	155	#endif
	156
	157	#include <vm/vm_map.h>
	158	#include <vm/vm_kern.h>
	159	#include <vm/vm_protos.h>
	160	#include <vm/vm_kern.h>
	161	#include <vm/vm_fault.h>
	162	#include <vm/vm_pageout.h>
	163
	164	#include <kdp/kdp_dyld.h>
	165
	166	#include <machine/machine_routines.h>
	167	#include <machine/pal_routines.h>
	168
	169	#include <pexpert/pexpert.h>
	170
	171	#if CONFIG_MEMORYSTATUS
	172	#include <sys/kern_memorystatus.h>
	173	#endif
	174
	175	#include <IOKit/IOBSD.h>
	176
	177	extern boolean_t vm_darkwake_mode;
	178
	179	extern int bootarg_execfailurereports; /* bsd_init.c */
	180
	181	#if CONFIG_DTRACE
	182	/* Do not include dtrace.h, it redefines kmem_[alloc/free] */
	183	extern void dtrace_proc_exec(proc_t);
	184	extern void (*dtrace_proc_waitfor_exec_ptr)(proc_t);
	185
	186	/*
	187	* Since dtrace_proc_waitfor_exec_ptr can be added/removed in dtrace_subr.c,
	188	* we will store its value before actually calling it.
	189	*/
	190	static void (*dtrace_proc_waitfor_hook)(proc_t) = NULL;
	191
	192	#include <sys/dtrace_ptss.h>
	193	#endif
	194
	195	/* support for child creation in exec after vfork */
	196	thread_t fork_create_child(task_t parent_task,
	197	coalition_t *parent_coalition,
	198	proc_t child_proc,
	199	int inherit_memory,
	200	int is_64bit_addr,
	201	int is_64bit_data,
	202	int in_exec);
	203	void vfork_exit(proc_t p, int rv);
	204	extern void proc_apply_task_networkbg_internal(proc_t, thread_t);
	205	extern void task_set_did_exec_flag(task_t task);
	206	extern void task_clear_exec_copy_flag(task_t task);
	207	proc_t proc_exec_switch_task(proc_t p, task_t old_task, task_t new_task, thread_t new_thread);
	208	boolean_t task_is_active(task_t);
	209	boolean_t thread_is_active(thread_t thread);
	210	void thread_copy_resource_info(thread_t dst_thread, thread_t src_thread);
	211	void *ipc_importance_exec_switch_task(task_t old_task, task_t new_task);
	212	extern void ipc_importance_release(void *elem);
	213	extern boolean_t task_has_watchports(task_t task);
	214
	215	/*
	216	* Mach things for which prototypes are unavailable from Mach headers
	217	*/
	218	#define IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
	219	void ipc_task_reset(
	220	task_t task);
	221	void ipc_thread_reset(
	222	thread_t thread);
	223	kern_return_t ipc_object_copyin(
	224	ipc_space_t space,
	225	mach_port_name_t name,
	226	mach_msg_type_name_t msgt_name,
	227	ipc_object_t *objectp,
	228	mach_port_context_t context,
	229	mach_msg_guard_flags_t *guard_flags,
	230	uint32_t kmsg_flags);
	231	void ipc_port_release_send(ipc_port_t);
	232
	233	#if DEVELOPMENT \|\| DEBUG
	234	void task_importance_update_owner_info(task_t);
	235	#endif
	236
	237	extern struct savearea *get_user_regs(thread_t);
	238
	239	__attribute__((noinline)) int __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid);
	240
	241	#include <kern/thread.h>
	242	#include <kern/task.h>
	243	#include <kern/ast.h>
	244	#include <kern/mach_loader.h>
	245	#include <kern/mach_fat.h>
	246	#include <mach-o/fat.h>
	247	#include <mach-o/loader.h>
	248	#include <machine/vmparam.h>
	249	#include <sys/imgact.h>
	250
	251	#include <sys/sdt.h>
	252
	253
	254	/*
	255	* EAI_ITERLIMIT The maximum number of times to iterate an image
	256	* activator in exec_activate_image() before treating
	257	* it as malformed/corrupt.
	258	*/
	259	#define EAI_ITERLIMIT 3
	260
	261	/*
	262	* For #! interpreter parsing
	263	*/
	264	#define IS_WHITESPACE(ch) ((ch == ' ') \|\| (ch == '\t'))
	265	#define IS_EOL(ch) ((ch == '#') \|\| (ch == '\n'))
	266
	267	extern vm_map_t bsd_pageable_map;
	268	extern const struct fileops vnops;
	269	extern int nextpidversion;
	270
	271	#define USER_ADDR_ALIGN(addr, val) \
	272	( ( (user_addr_t)(addr) + (val) - 1) \
	273	& ~((val) - 1) )
	274
	275	/* Platform Code Exec Logging */
	276	static int platform_exec_logging = 0;
	277
	278	SYSCTL_DECL(_security_mac);
	279
	280	SYSCTL_INT(_security_mac, OID_AUTO, platform_exec_logging, CTLFLAG_RW, &platform_exec_logging, 0,
	281	"log cdhashes for all platform binary executions");
	282
	283	static os_log_t peLog = OS_LOG_DEFAULT;
	284
	285	struct exec_port_actions {
	286	uint32_t portwatch_count;
	287	uint32_t registered_count;
	288	ipc_port_t *portwatch_array;
	289	ipc_port_t *registered_array;
	290	};
	291
	292	struct image_params; /* Forward */
	293	static int exec_activate_image(struct image_params *imgp);
	294	static int exec_copyout_strings(struct image_params imgp, user_addr_t stackp);
	295	static int load_return_to_errno(load_return_t lrtn);
	296	static int execargs_alloc(struct image_params *imgp);
	297	static int execargs_free(struct image_params *imgp);
	298	static int exec_check_permissions(struct image_params *imgp);
	299	static int exec_extract_strings(struct image_params *imgp);
	300	static int exec_add_apple_strings(struct image_params imgp, const load_result_t load_result);
	301	static int exec_handle_sugid(struct image_params *imgp);
	302	static int sugid_scripts = 0;
	303	SYSCTL_INT(_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW \| CTLFLAG_LOCKED, &sugid_scripts, 0, "");
	304	static kern_return_t create_unix_stack(vm_map_t map, load_result_t* load_result, proc_t p);
	305	static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size);
	306	static void exec_resettextvp(proc_t, struct image_params *);
	307	static int check_for_signature(proc_t, struct image_params *);
	308	static void exec_prefault_data(proc_t, struct image_params , load_result_t );
	309	static errno_t exec_handle_port_actions(struct image_params *imgp,
	310	struct exec_port_actions *port_actions);
	311	static errno_t exec_handle_spawnattr_policy(proc_t p, thread_t thread, int psa_apptype, uint64_t psa_qos_clamp,
	312	uint64_t psa_darwin_role, struct exec_port_actions *port_actions);
	313	static void exec_port_actions_destroy(struct exec_port_actions *port_actions);
	314
	315	/*
	316	* exec_add_user_string
	317	*
	318	* Add the requested string to the string space area.
	319	*
	320	* Parameters; struct image_params * image parameter block
	321	* user_addr_t string to add to strings area
	322	* int segment from which string comes
	323	* boolean_t TRUE if string contributes to NCARGS
	324	*
	325	* Returns: 0 Success
	326	* !0 Failure errno from copyinstr()
	327	*
	328	* Implicit returns:
	329	* (imgp->ip_strendp) updated location of next add, if any
	330	* (imgp->ip_strspace) updated byte count of space remaining
	331	* (imgp->ip_argspace) updated byte count of space in NCARGS
	332	*/
	333	static int
	334	exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolean_t is_ncargs)
	335	{
	336	int error = 0;
	337
	338	do {
	339	size_t len = 0;
	340	int space;
	341
	342	if (is_ncargs) {
	343	space = imgp->ip_argspace; /* by definition smaller than ip_strspace */
	344	} else {
	345	space = imgp->ip_strspace;
	346	}
	347
	348	if (space <= 0) {
	349	error = E2BIG;
	350	break;
	351	}
	352
	353	if (!UIO_SEG_IS_USER_SPACE(seg)) {
	354	char kstr = CAST_DOWN(char , str); /* SAFE */
	355	error = copystr(kstr, imgp->ip_strendp, space, &len);
	356	} else {
	357	error = copyinstr(str, imgp->ip_strendp, space, &len);
	358	}
	359
	360	imgp->ip_strendp += len;
	361	imgp->ip_strspace -= len;
	362	if (is_ncargs) {
	363	imgp->ip_argspace -= len;
	364	}
	365	} while (error == ENAMETOOLONG);
	366
	367	return error;
	368	}
	369
	370	/*
	371	* dyld is now passed the executable path as a getenv-like variable
	372	* in the same fashion as the stack_guard and malloc_entropy keys.
	373	*/
	374	#define EXECUTABLE_KEY "executable_path="
	375
	376	/*
	377	* exec_save_path
	378	*
	379	* To support new app package launching for Mac OS X, the dyld needs the
	380	* first argument to execve() stored on the user stack.
	381	*
	382	* Save the executable path name at the bottom of the strings area and set
	383	* the argument vector pointer to the location following that to indicate
	384	* the start of the argument and environment tuples, setting the remaining
	385	* string space count to the size of the string area minus the path length.
	386	*
	387	* Parameters; struct image_params * image parameter block
	388	* char * path used to invoke program
	389	* int segment from which path comes
	390	*
	391	* Returns: int 0 Success
	392	* EFAULT Bad address
	393	* copy[in]str:EFAULT Bad address
	394	* copy[in]str:ENAMETOOLONG Filename too long
	395	*
	396	* Implicit returns:
	397	* (imgp->ip_strings) saved path
	398	* (imgp->ip_strspace) space remaining in ip_strings
	399	* (imgp->ip_strendp) start of remaining copy area
	400	* (imgp->ip_argspace) space remaining of NCARGS
	401	* (imgp->ip_applec) Initial applev[0]
	402	*
	403	* Note: We have to do this before the initial namei() since in the
	404	* path contains symbolic links, namei() will overwrite the
	405	* original path buffer contents. If the last symbolic link
	406	* resolved was a relative pathname, we would lose the original
	407	* "path", which could be an absolute pathname. This might be
	408	* unacceptable for dyld.
	409	*/
	410	static int
	411	exec_save_path(struct image_params imgp, user_addr_t path, int seg, const char *excpath)
	412	{
	413	int error;
	414	size_t len;
	415	char *kpath;
	416
	417	// imgp->ip_strings can come out of a cache, so we need to obliterate the
	418	// old path.
	419	memset(imgp->ip_strings, '\0', strlen(EXECUTABLE_KEY) + MAXPATHLEN);
	420
	421	len = MIN(MAXPATHLEN, imgp->ip_strspace);
	422
	423	switch (seg) {
	424	case UIO_USERSPACE32:
	425	case UIO_USERSPACE64: /* Same for copyin()... */
	426	error = copyinstr(path, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len);
	427	break;
	428	case UIO_SYSSPACE:
	429	kpath = CAST_DOWN(char , path); / SAFE */
	430	error = copystr(kpath, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len);
	431	break;
	432	default:
	433	error = EFAULT;
	434	break;
	435	}
	436
	437	if (!error) {
	438	bcopy(EXECUTABLE_KEY, imgp->ip_strings, strlen(EXECUTABLE_KEY));
	439	len += strlen(EXECUTABLE_KEY);
	440
	441	imgp->ip_strendp += len;
	442	imgp->ip_strspace -= len;
	443
	444	if (excpath) {
	445	*excpath = imgp->ip_strings + strlen(EXECUTABLE_KEY);
	446	}
	447	}
	448
	449	return error;
	450	}
	451
	452	/*
	453	* exec_reset_save_path
	454	*
	455	* If we detect a shell script, we need to reset the string area
	456	* state so that the interpreter can be saved onto the stack.
	457	*
	458	* Parameters; struct image_params * image parameter block
	459	*
	460	* Returns: int 0 Success
	461	*
	462	* Implicit returns:
	463	* (imgp->ip_strings) saved path
	464	* (imgp->ip_strspace) space remaining in ip_strings
	465	* (imgp->ip_strendp) start of remaining copy area
	466	* (imgp->ip_argspace) space remaining of NCARGS
	467	*
	468	*/
	469	static int
	470	exec_reset_save_path(struct image_params *imgp)
	471	{
	472	imgp->ip_strendp = imgp->ip_strings;
	473	imgp->ip_argspace = NCARGS;
	474	imgp->ip_strspace = (NCARGS + PAGE_SIZE);
	475
	476	return 0;
	477	}
	478
	479	/*
	480	* exec_shell_imgact
	481	*
	482	* Image activator for interpreter scripts. If the image begins with
	483	* the characters "#!", then it is an interpreter script. Verify the
	484	* length of the script line indicating the interpreter is not in
	485	* excess of the maximum allowed size. If this is the case, then
	486	* break out the arguments, if any, which are separated by white
	487	* space, and copy them into the argument save area as if they were
	488	* provided on the command line before all other arguments. The line
	489	* ends when we encounter a comment character ('#') or newline.
	490	*
	491	* Parameters; struct image_params * image parameter block
	492	*
	493	* Returns: -1 not an interpreter (keep looking)
	494	* -3 Success: interpreter: relookup
	495	* >0 Failure: interpreter: error number
	496	*
	497	* A return value other than -1 indicates subsequent image activators should
	498	* not be given the opportunity to attempt to activate the image.
	499	*/
	500	static int
	501	exec_shell_imgact(struct image_params *imgp)
	502	{
	503	char *vdata = imgp->ip_vdata;
	504	char *ihp;
	505	char line_startp, line_endp;
	506	char *interp;
	507
	508	/*
	509	* Make sure it's a shell script. If we've already redirected
	510	* from an interpreted file once, don't do it again.
	511	*/
	512	if (vdata[0] != '#' \|\|
	513	vdata[1] != '!' \|\|
	514	(imgp->ip_flags & IMGPF_INTERPRET) != 0) {
	515	return -1;
	516	}
	517
	518	if (imgp->ip_origcputype != 0) {
	519	/* Fat header previously matched, don't allow shell script inside */
	520	return -1;
	521	}
	522
	523	imgp->ip_flags \|= IMGPF_INTERPRET;
	524	imgp->ip_interp_sugid_fd = -1;
	525	imgp->ip_interp_buffer[0] = '\0';
	526
	527	/* Check to see if SUGID scripts are permitted. If they aren't then
	528	* clear the SUGID bits.
	529	* imgp->ip_vattr is known to be valid.
	530	*/
	531	if (sugid_scripts == 0) {
	532	imgp->ip_origvattr->va_mode &= ~(VSUID \| VSGID);
	533	}
	534
	535	/* Try to find the first non-whitespace character */
	536	for (ihp = &vdata[2]; ihp < &vdata[IMG_SHSIZE]; ihp++) {
	537	if (IS_EOL(*ihp)) {
	538	/* Did not find interpreter, "#!\n" */
	539	return ENOEXEC;
	540	} else if (IS_WHITESPACE(*ihp)) {
	541	/* Whitespace, like "#! /bin/sh\n", keep going. */
	542	} else {
	543	/* Found start of interpreter */
	544	break;
	545	}
	546	}
	547
	548	if (ihp == &vdata[IMG_SHSIZE]) {
	549	/* All whitespace, like "#! " */
	550	return ENOEXEC;
	551	}
	552
	553	line_startp = ihp;
	554
	555	/* Try to find the end of the interpreter+args string */
	556	for (; ihp < &vdata[IMG_SHSIZE]; ihp++) {
	557	if (IS_EOL(*ihp)) {
	558	/* Got it */
	559	break;
	560	} else {
	561	/* Still part of interpreter or args */
	562	}
	563	}
	564
	565	if (ihp == &vdata[IMG_SHSIZE]) {
	566	/* A long line, like "#! blah blah blah" without end */
	567	return ENOEXEC;
	568	}
	569
	570	/* Backtrack until we find the last non-whitespace */
	571	while (IS_EOL(ihp) \|\| IS_WHITESPACE(ihp)) {
	572	ihp--;
	573	}
	574
	575	/* The character after the last non-whitespace is our logical end of line */
	576	line_endp = ihp + 1;
	577
	578	/*
	579	* Now we have pointers to the usable part of:
	580	*
	581	* "#! /usr/bin/int first second third \n"
	582	* ^ line_startp ^ line_endp
	583	*/
	584
	585	/* copy the interpreter name */
	586	interp = imgp->ip_interp_buffer;
	587	for (ihp = line_startp; (ihp < line_endp) && !IS_WHITESPACE(*ihp); ihp++) {
	588	interp++ = ihp;
	589	}
	590	*interp = '\0';
	591
	592	exec_reset_save_path(imgp);
	593	exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer),
	594	UIO_SYSSPACE, NULL);
	595
	596	/* Copy the entire interpreter + args for later processing into argv[] */
	597	interp = imgp->ip_interp_buffer;
	598	for (ihp = line_startp; (ihp < line_endp); ihp++) {
	599	interp++ = ihp;
	600	}
	601	*interp = '\0';
	602
	603	#if !SECURE_KERNEL
	604	/*
	605	* If we have an SUID or SGID script, create a file descriptor
	606	* from the vnode and pass /dev/fd/%d instead of the actual
	607	* path name so that the script does not get opened twice
	608	*/
	609	if (imgp->ip_origvattr->va_mode & (VSUID \| VSGID)) {
	610	proc_t p;
	611	struct fileproc *fp;
	612	int fd;
	613	int error;
	614
	615	p = vfs_context_proc(imgp->ip_vfs_context);
	616	error = falloc(p, &fp, &fd, imgp->ip_vfs_context);
	617	if (error) {
	618	return error;
	619	}
	620
	621	fp->f_fglob->fg_flag = FREAD;
	622	fp->f_fglob->fg_ops = &vnops;
	623	fp->f_fglob->fg_data = (caddr_t)imgp->ip_vp;
	624
	625	proc_fdlock(p);
	626	procfdtbl_releasefd(p, fd, NULL);
	627	fp_drop(p, fd, fp, 1);
	628	proc_fdunlock(p);
	629	vnode_ref(imgp->ip_vp);
	630
	631	imgp->ip_interp_sugid_fd = fd;
	632	}
	633	#endif
	634
	635	return -3;
	636	}
	637
	638
	639
	640	/*
	641	* exec_fat_imgact
	642	*
	643	* Image activator for fat 1.0 binaries. If the binary is fat, then we
	644	* need to select an image from it internally, and make that the image
	645	* we are going to attempt to execute. At present, this consists of
	646	* reloading the first page for the image with a first page from the
	647	* offset location indicated by the fat header.
	648	*
	649	* Parameters; struct image_params * image parameter block
	650	*
	651	* Returns: -1 not a fat binary (keep looking)
	652	* -2 Success: encapsulated binary: reread
	653	* >0 Failure: error number
	654	*
	655	* Important: This image activator is byte order neutral.
	656	*
	657	* Note: A return value other than -1 indicates subsequent image
	658	* activators should not be given the opportunity to attempt
	659	* to activate the image.
	660	*
	661	* If we find an encapsulated binary, we make no assertions
	662	* about its validity; instead, we leave that up to a rescan
	663	* for an activator to claim it, and, if it is claimed by one,
	664	* that activator is responsible for determining validity.
	665	*/
	666	static int
	667	exec_fat_imgact(struct image_params *imgp)
	668	{
	669	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
	670	kauth_cred_t cred = kauth_cred_proc_ref(p);
	671	struct fat_header fat_header = (struct fat_header )imgp->ip_vdata;
	672	struct _posix_spawnattr *psa = NULL;
	673	struct fat_arch fat_arch;
	674	int resid, error;
	675	load_return_t lret;
	676
	677	if (imgp->ip_origcputype != 0) {
	678	/* Fat header previously matched, don't allow another fat file inside */
	679	error = -1; /* not claimed */
	680	goto bad;
	681	}
	682
	683	/* Make sure it's a fat binary */
	684	if (OSSwapBigToHostInt32(fat_header->magic) != FAT_MAGIC) {
	685	error = -1; /* not claimed */
	686	goto bad;
	687	}
	688
	689	/* imgp->ip_vdata has PAGE_SIZE, zerofilled if the file is smaller */
	690	lret = fatfile_validate_fatarches((vm_offset_t)fat_header, PAGE_SIZE);
	691	if (lret != LOAD_SUCCESS) {
	692	error = load_return_to_errno(lret);
	693	goto bad;
	694	}
	695
	696	/* If posix_spawn binprefs exist, respect those prefs. */
	697	psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
	698	if (psa != NULL && psa->psa_binprefs[0] != 0) {
	699	uint32_t pr = 0;
	700
	701	/* Check each preference listed against all arches in header */
	702	for (pr = 0; pr < NBINPREFS; pr++) {
	703	cpu_type_t pref = psa->psa_binprefs[pr];
	704	if (pref == 0) {
	705	/* No suitable arch in the pref list */
	706	error = EBADARCH;
	707	goto bad;
	708	}
	709
	710	if (pref == CPU_TYPE_ANY) {
	711	/* Fall through to regular grading */
	712	goto regular_grading;
	713	}
	714
	715	lret = fatfile_getbestarch_for_cputype(pref,
	716	(vm_offset_t)fat_header,
	717	PAGE_SIZE,
	718	imgp,
	719	&fat_arch);
	720	if (lret == LOAD_SUCCESS) {
	721	goto use_arch;
	722	}
	723	}
	724
	725	/* Requested binary preference was not honored */
	726	error = EBADEXEC;
	727	goto bad;
	728	}
	729
	730	regular_grading:
	731	/* Look up our preferred architecture in the fat file. */
	732	lret = fatfile_getbestarch((vm_offset_t)fat_header,
	733	PAGE_SIZE,
	734	imgp,
	735	&fat_arch);
	736	if (lret != LOAD_SUCCESS) {
	737	error = load_return_to_errno(lret);
	738	goto bad;
	739	}
	740
	741	use_arch:
	742	/* Read the Mach-O header out of fat_arch */
	743	error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata,
	744	PAGE_SIZE, fat_arch.offset,
	745	UIO_SYSSPACE, (IO_UNIT \| IO_NODELOCKED),
	746	cred, &resid, p);
	747	if (error) {
	748	goto bad;
	749	}
	750
	751	if (resid) {
	752	memset(imgp->ip_vdata + (PAGE_SIZE - resid), 0x0, resid);
	753	}
	754
	755	/* Success. Indicate we have identified an encapsulated binary */
	756	error = -2;
	757	imgp->ip_arch_offset = (user_size_t)fat_arch.offset;
	758	imgp->ip_arch_size = (user_size_t)fat_arch.size;
	759	imgp->ip_origcputype = fat_arch.cputype;
	760	imgp->ip_origcpusubtype = fat_arch.cpusubtype;
	761
	762	bad:
	763	kauth_cred_unref(&cred);
	764	return error;
	765	}
	766
	767	static int
	768	activate_exec_state(task_t task, proc_t p, thread_t thread, load_result_t *result)
	769	{
	770	int ret;
	771
	772	task_set_dyld_info(task, MACH_VM_MIN_ADDRESS, 0);
	773	task_set_64bit(task, result->is_64bit_addr, result->is_64bit_data);
	774	if (result->is_64bit_addr) {
	775	OSBitOrAtomic(P_LP64, &p->p_flag);
	776	} else {
	777	OSBitAndAtomic(~((uint32_t)P_LP64), &p->p_flag);
	778	}
	779	task_set_mach_header_address(task, result->mach_header);
	780
	781	ret = thread_state_initialize(thread);
	782	if (ret != KERN_SUCCESS) {
	783	return ret;
	784	}
	785
	786	if (result->threadstate) {
	787	uint32_t *ts = result->threadstate;
	788	uint32_t total_size = result->threadstate_sz;
	789
	790	while (total_size > 0) {
	791	uint32_t flavor = *ts++;
	792	uint32_t size = *ts++;
	793
	794	ret = thread_setstatus(thread, flavor, (thread_state_t)ts, size);
	795	if (ret) {
	796	return ret;
	797	}
	798	ts += size;
	799	total_size -= (size + 2) * sizeof(uint32_t);
	800	}
	801	}
	802
	803	thread_setentrypoint(thread, result->entry_point);
	804
	805	return KERN_SUCCESS;
	806	}
	807
	808
	809	/*
	810	* Set p->p_comm and p->p_name to the name passed to exec
	811	*/
	812	static void
	813	set_proc_name(struct image_params *imgp, proc_t p)
	814	{
	815	int p_name_len = sizeof(p->p_name) - 1;
	816
	817	if (imgp->ip_ndp->ni_cnd.cn_namelen > p_name_len) {
	818	imgp->ip_ndp->ni_cnd.cn_namelen = p_name_len;
	819	}
	820
	821	bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_name,
	822	(unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
	823	p->p_name[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
	824
	825	if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN) {
	826	imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
	827	}
	828
	829	bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
	830	(unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
	831	p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
	832	}
	833
	834	/*
	835	* exec_mach_imgact
	836	*
	837	* Image activator for mach-o 1.0 binaries.
	838	*
	839	* Parameters; struct image_params * image parameter block
	840	*
	841	* Returns: -1 not a fat binary (keep looking)
	842	* -2 Success: encapsulated binary: reread
	843	* >0 Failure: error number
	844	* EBADARCH Mach-o binary, but with an unrecognized
	845	* architecture
	846	* ENOMEM No memory for child process after -
	847	* can only happen after vfork()
	848	*
	849	* Important: This image activator is NOT byte order neutral.
	850	*
	851	* Note: A return value other than -1 indicates subsequent image
	852	* activators should not be given the opportunity to attempt
	853	* to activate the image.
	854	*
	855	* TODO: More gracefully handle failures after vfork
	856	*/
	857	static int
	858	exec_mach_imgact(struct image_params *imgp)
	859	{
	860	struct mach_header mach_header = (struct mach_header )imgp->ip_vdata;
	861	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
	862	int error = 0;
	863	task_t task;
	864	task_t new_task = NULL; /* protected by vfexec */
	865	thread_t thread;
	866	struct uthread *uthread;
	867	vm_map_t old_map = VM_MAP_NULL;
	868	vm_map_t map = VM_MAP_NULL;
	869	load_return_t lret;
	870	load_result_t load_result = {};
	871	struct _posix_spawnattr *psa = NULL;
	872	int spawn = (imgp->ip_flags & IMGPF_SPAWN);
	873	int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
	874	int exec = (imgp->ip_flags & IMGPF_EXEC);
	875	os_reason_t exec_failure_reason = OS_REASON_NULL;
	876
	877	/*
	878	* make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
	879	* is a reserved field on the end, so for the most part, we can
	880	* treat them as if they were identical. Reverse-endian Mach-O
	881	* binaries are recognized but not compatible.
	882	*/
	883	if ((mach_header->magic == MH_CIGAM) \|\|
	884	(mach_header->magic == MH_CIGAM_64)) {
	885	error = EBADARCH;
	886	goto bad;
	887	}
	888
	889	if ((mach_header->magic != MH_MAGIC) &&
	890	(mach_header->magic != MH_MAGIC_64)) {
	891	error = -1;
	892	goto bad;
	893	}
	894
	895	if (mach_header->filetype != MH_EXECUTE) {
	896	error = -1;
	897	goto bad;
	898	}
	899
	900	if (imgp->ip_origcputype != 0) {
	901	/* Fat header previously had an idea about this thin file */
	902	if (imgp->ip_origcputype != mach_header->cputype \|\|
	903	imgp->ip_origcpusubtype != mach_header->cpusubtype) {
	904	error = EBADARCH;
	905	goto bad;
	906	}
	907	} else {
	908	imgp->ip_origcputype = mach_header->cputype;
	909	imgp->ip_origcpusubtype = mach_header->cpusubtype;
	910	}
	911
	912	task = current_task();
	913	thread = current_thread();
	914	uthread = get_bsdthread_info(thread);
	915
	916	if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64) {
	917	imgp->ip_flags \|= IMGPF_IS_64BIT_ADDR \| IMGPF_IS_64BIT_DATA;
	918	}
	919
	920	/* If posix_spawn binprefs exist, respect those prefs. */
	921	psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
	922	if (psa != NULL && psa->psa_binprefs[0] != 0) {
	923	int pr = 0;
	924	for (pr = 0; pr < NBINPREFS; pr++) {
	925	cpu_type_t pref = psa->psa_binprefs[pr];
	926	if (pref == 0) {
	927	/* No suitable arch in the pref list */
	928	error = EBADARCH;
	929	goto bad;
	930	}
	931
	932	if (pref == CPU_TYPE_ANY) {
	933	/* Jump to regular grading */
	934	goto grade;
	935	}
	936
	937	if (pref == imgp->ip_origcputype) {
	938	/* We have a match! */
	939	goto grade;
	940	}
	941	}
	942	error = EBADARCH;
	943	goto bad;
	944	}
	945	grade:
	946	if (!grade_binary(imgp->ip_origcputype, imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK, TRUE)) {
	947	error = EBADARCH;
	948	goto bad;
	949	}
	950
	951	if (validate_potential_simulator_binary(imgp->ip_origcputype, imgp,
	952	imgp->ip_arch_offset, imgp->ip_arch_size) != LOAD_SUCCESS) {
	953	#if __x86_64__
	954	const char *excpath;
	955	error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg, &excpath);
	956	os_log_error(OS_LOG_DEFAULT, "Unsupported 32-bit executable: \"%s\"", (error) ? imgp->ip_vp->v_name : excpath);
	957	#endif
	958	error = EBADARCH;
	959	goto bad;
	960	}
	961
	962	#if defined(HAS_APPLE_PAC)
	963	assert(mach_header->cputype == CPU_TYPE_ARM64
	964	);
	965
	966	if (((mach_header->cputype == CPU_TYPE_ARM64 &&
	967	(mach_header->cpusubtype & ~CPU_SUBTYPE_MASK) == CPU_SUBTYPE_ARM64E)
	968	) && (CPU_SUBTYPE_ARM64_PTR_AUTH_VERSION(mach_header->cpusubtype) == 0)) {
	969	imgp->ip_flags &= ~IMGPF_NOJOP;
	970	} else {
	971	imgp->ip_flags \|= IMGPF_NOJOP;
	972	}
	973	#endif
	974
	975	/* Copy in arguments/environment from the old process */
	976	error = exec_extract_strings(imgp);
	977	if (error) {
	978	goto bad;
	979	}
	980
	981	AUDIT_ARG(argv, imgp->ip_startargv, imgp->ip_argc,
	982	imgp->ip_endargv - imgp->ip_startargv);
	983	AUDIT_ARG(envv, imgp->ip_endargv, imgp->ip_envc,
	984	imgp->ip_endenvv - imgp->ip_endargv);
	985
	986	/*
	987	* We are being called to activate an image subsequent to a vfork()
	988	* operation; in this case, we know that our task, thread, and
	989	* uthread are actually those of our parent, and our proc, which we
	990	* obtained indirectly from the image_params vfs_context_t, is the
	991	* new child process.
	992	*/
	993	if (vfexec) {
	994	imgp->ip_new_thread = fork_create_child(task,
	995	NULL,
	996	p,
	997	FALSE,
	998	(imgp->ip_flags & IMGPF_IS_64BIT_ADDR),
	999	(imgp->ip_flags & IMGPF_IS_64BIT_DATA),
	1000	FALSE);
	1001	/* task and thread ref returned, will be released in __mac_execve */
	1002	if (imgp->ip_new_thread == NULL) {
	1003	error = ENOMEM;
	1004	goto bad;
	1005	}
	1006	}
	1007
	1008
	1009	/* reset local idea of thread, uthread, task */
	1010	thread = imgp->ip_new_thread;
	1011	uthread = get_bsdthread_info(thread);
	1012	task = new_task = get_threadtask(thread);
	1013
	1014	/*
	1015	* Load the Mach-O file.
	1016	*
	1017	* NOTE: An error after this point indicates we have potentially
	1018	* destroyed or overwritten some process state while attempting an
	1019	* execve() following a vfork(), which is an unrecoverable condition.
	1020	* We send the new process an immediate SIGKILL to avoid it executing
	1021	* any instructions in the mutated address space. For true spawns,
	1022	* this is not the case, and "too late" is still not too late to
	1023	* return an error code to the parent process.
	1024	*/
	1025
	1026	/*
	1027	* Actually load the image file we previously decided to load.
	1028	*/
	1029	lret = load_machfile(imgp, mach_header, thread, &map, &load_result);
	1030	if (lret != LOAD_SUCCESS) {
	1031	error = load_return_to_errno(lret);
	1032
	1033	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
	1034	p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO, 0, 0);
	1035	if (lret == LOAD_BADMACHO_UPX) {
	1036	set_proc_name(imgp, p);
	1037	exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_UPX);
	1038	exec_failure_reason->osr_flags \|= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
	1039	} else {
	1040	exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO);
	1041
	1042	if (bootarg_execfailurereports) {
	1043	set_proc_name(imgp, p);
	1044	exec_failure_reason->osr_flags \|= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
	1045	}
	1046	}
	1047
	1048	exec_failure_reason->osr_flags \|= OS_REASON_FLAG_CONSISTENT_FAILURE;
	1049
	1050	goto badtoolate;
	1051	}
	1052
	1053	proc_lock(p);
	1054	p->p_cputype = imgp->ip_origcputype;
	1055	p->p_cpusubtype = imgp->ip_origcpusubtype;
	1056	p->p_platform = load_result.ip_platform;
	1057	p->p_sdk = load_result.lr_sdk;
	1058	proc_unlock(p);
	1059
	1060	vm_map_set_user_wire_limit(map, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
	1061
	1062	/*
	1063	* Set code-signing flags if this binary is signed, or if parent has
	1064	* requested them on exec.
	1065	*/
	1066	if (load_result.csflags & CS_VALID) {
	1067	imgp->ip_csflags \|= load_result.csflags &
	1068	(CS_VALID \| CS_SIGNED \| CS_DEV_CODE \|
	1069	CS_HARD \| CS_KILL \| CS_RESTRICT \| CS_ENFORCEMENT \| CS_REQUIRE_LV \|
	1070	CS_FORCED_LV \| CS_ENTITLEMENTS_VALIDATED \| CS_DYLD_PLATFORM \| CS_RUNTIME \|
	1071	CS_ENTITLEMENT_FLAGS \|
	1072	CS_EXEC_SET_HARD \| CS_EXEC_SET_KILL \| CS_EXEC_SET_ENFORCEMENT);
	1073	} else {
	1074	imgp->ip_csflags &= ~CS_VALID;
	1075	}
	1076
	1077	if (p->p_csflags & CS_EXEC_SET_HARD) {
	1078	imgp->ip_csflags \|= CS_HARD;
	1079	}
	1080	if (p->p_csflags & CS_EXEC_SET_KILL) {
	1081	imgp->ip_csflags \|= CS_KILL;
	1082	}
	1083	if (p->p_csflags & CS_EXEC_SET_ENFORCEMENT) {
	1084	imgp->ip_csflags \|= CS_ENFORCEMENT;
	1085	}
	1086	if (p->p_csflags & CS_EXEC_INHERIT_SIP) {
	1087	if (p->p_csflags & CS_INSTALLER) {
	1088	imgp->ip_csflags \|= CS_INSTALLER;
	1089	}
	1090	if (p->p_csflags & CS_DATAVAULT_CONTROLLER) {
	1091	imgp->ip_csflags \|= CS_DATAVAULT_CONTROLLER;
	1092	}
	1093	if (p->p_csflags & CS_NVRAM_UNRESTRICTED) {
	1094	imgp->ip_csflags \|= CS_NVRAM_UNRESTRICTED;
	1095	}
	1096	}
	1097
	1098	/*
	1099	* Set up the system reserved areas in the new address space.
	1100	*/
	1101	int cpu_subtype;
	1102	cpu_subtype = 0; /* all cpu_subtypes use the same shared region */
	1103	#if defined(HAS_APPLE_PAC)
	1104	if (cpu_type() == CPU_TYPE_ARM64 &&
	1105	(p->p_cpusubtype & ~CPU_SUBTYPE_MASK) == CPU_SUBTYPE_ARM64E) {
	1106	assertf(p->p_cputype == CPU_TYPE_ARM64,
	1107	"p %p cpu_type() 0x%x p->p_cputype 0x%x p->p_cpusubtype 0x%x",
	1108	p, cpu_type(), p->p_cputype, p->p_cpusubtype);
	1109	/*
	1110	* arm64e uses pointer authentication, so request a separate
	1111	* shared region for this CPU subtype.
	1112	*/
	1113	cpu_subtype = p->p_cpusubtype & ~CPU_SUBTYPE_MASK;
	1114	}
	1115	#endif /* HAS_APPLE_PAC */
	1116	vm_map_exec(map, task, load_result.is_64bit_addr, (void *)p->p_fd->fd_rdir, cpu_type(), cpu_subtype);
	1117
	1118	/*
	1119	* Close file descriptors which specify close-on-exec.
	1120	*/
	1121	fdexec(p, psa != NULL ? psa->psa_flags : 0, exec);
	1122
	1123	/*
	1124	* deal with set[ug]id.
	1125	*/
	1126	error = exec_handle_sugid(imgp);
	1127	if (error) {
	1128	vm_map_deallocate(map);
	1129
	1130	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
	1131	p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_SUGID_FAILURE, 0, 0);
	1132
	1133	exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SUGID_FAILURE);
	1134	if (bootarg_execfailurereports) {
	1135	set_proc_name(imgp, p);
	1136	exec_failure_reason->osr_flags \|= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
	1137	}
	1138
	1139	goto badtoolate;
	1140	}
	1141
	1142	/*
	1143	* Commit to new map.
	1144	*
	1145	* Swap the new map for the old for target task, which consumes
	1146	* our new map reference but each leaves us responsible for the
	1147	* old_map reference. That lets us get off the pmap associated
	1148	* with it, and then we can release it.
	1149	*
	1150	* The map needs to be set on the target task which is different
	1151	* than current task, thus swap_task_map is used instead of
	1152	* vm_map_switch.
	1153	*/
	1154	old_map = swap_task_map(task, thread, map);
	1155	vm_map_deallocate(old_map);
	1156	old_map = NULL;
	1157
	1158	lret = activate_exec_state(task, p, thread, &load_result);
	1159	if (lret != KERN_SUCCESS) {
	1160	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
	1161	p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_ACTV_THREADSTATE, 0, 0);
	1162
	1163	exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_ACTV_THREADSTATE);
	1164	if (bootarg_execfailurereports) {
	1165	set_proc_name(imgp, p);
	1166	exec_failure_reason->osr_flags \|= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
	1167	}
	1168
	1169	goto badtoolate;
	1170	}
	1171
	1172	/*
	1173	* deal with voucher on exec-calling thread.
	1174	*/
	1175	if (imgp->ip_new_thread == NULL) {
	1176	thread_set_mach_voucher(current_thread(), IPC_VOUCHER_NULL);
	1177	}
	1178
	1179	/* Make sure we won't interrupt ourself signalling a partial process */
	1180	if (!vfexec && !spawn && (p->p_lflag & P_LTRACED)) {
	1181	psignal(p, SIGTRAP);
	1182	}
	1183
	1184	if (load_result.unixproc &&
	1185	create_unix_stack(get_task_map(task),
	1186	&load_result,
	1187	p) != KERN_SUCCESS) {
	1188	error = load_return_to_errno(LOAD_NOSPACE);
	1189
	1190	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
	1191	p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_STACK_ALLOC, 0, 0);
	1192
	1193	exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_STACK_ALLOC);
	1194	if (bootarg_execfailurereports) {
	1195	set_proc_name(imgp, p);
	1196	exec_failure_reason->osr_flags \|= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
	1197	}
	1198
	1199	goto badtoolate;
	1200	}
	1201
	1202	error = exec_add_apple_strings(imgp, &load_result);
	1203	if (error) {
	1204	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
	1205	p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_APPLE_STRING_INIT, 0, 0);
	1206
	1207	exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_APPLE_STRING_INIT);
	1208	if (bootarg_execfailurereports) {
	1209	set_proc_name(imgp, p);
	1210	exec_failure_reason->osr_flags \|= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
	1211	}
	1212	goto badtoolate;
	1213	}
	1214
	1215	/* Switch to target task's map to copy out strings */
	1216	old_map = vm_map_switch(get_task_map(task));
	1217
	1218	if (load_result.unixproc) {
	1219	user_addr_t ap;
	1220
	1221	/*
	1222	* Copy the strings area out into the new process address
	1223	* space.
	1224	*/
	1225	ap = p->user_stack;
	1226	error = exec_copyout_strings(imgp, &ap);
	1227	if (error) {
	1228	vm_map_switch(old_map);
	1229
	1230	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
	1231	p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_STRINGS, 0, 0);
	1232
	1233	exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_STRINGS);
	1234	if (bootarg_execfailurereports) {
	1235	set_proc_name(imgp, p);
	1236	exec_failure_reason->osr_flags \|= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
	1237	}
	1238	goto badtoolate;
	1239	}
	1240	/* Set the stack */
	1241	thread_setuserstack(thread, ap);
	1242	}
	1243
	1244	if (load_result.dynlinker) {
	1245	uint64_t ap;
	1246	int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? 8 : 4;
	1247
	1248	/* Adjust the stack */
	1249	ap = thread_adjuserstack(thread, -new_ptr_size);
	1250	error = copyoutptr(load_result.mach_header, ap, new_ptr_size);
	1251
	1252	if (error) {
	1253	vm_map_switch(old_map);
	1254
	1255	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
	1256	p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_DYNLINKER, 0, 0);
	1257
	1258	exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_DYNLINKER);
	1259	if (bootarg_execfailurereports) {
	1260	set_proc_name(imgp, p);
	1261	exec_failure_reason->osr_flags \|= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
	1262	}
	1263	goto badtoolate;
	1264	}
	1265	task_set_dyld_info(task, load_result.all_image_info_addr,
	1266	load_result.all_image_info_size);
	1267	}
	1268
	1269	/* Avoid immediate VM faults back into kernel */
	1270	exec_prefault_data(p, imgp, &load_result);
	1271
	1272	vm_map_switch(old_map);
	1273
	1274	/*
	1275	* Reset signal state.
	1276	*/
	1277	execsigs(p, thread);
	1278
	1279	/*
	1280	* need to cancel async IO requests that can be cancelled and wait for those
	1281	* already active. MAY BLOCK!
	1282	*/
	1283	_aio_exec( p );
	1284
	1285	#if SYSV_SHM
	1286	/* FIXME: Till vmspace inherit is fixed: */
	1287	if (!vfexec && p->vm_shm) {
	1288	shmexec(p);
	1289	}
	1290	#endif
	1291	#if SYSV_SEM
	1292	/* Clean up the semaphores */
	1293	semexit(p);
	1294	#endif
	1295
	1296	/*
	1297	* Remember file name for accounting.
	1298	*/
	1299	p->p_acflag &= ~AFORK;
	1300
	1301	set_proc_name(imgp, p);
	1302
	1303	#if CONFIG_SECLUDED_MEMORY
	1304	if (secluded_for_apps &&
	1305	load_result.platform_binary) {
	1306	if (strncmp(p->p_name,
	1307	"Camera",
	1308	sizeof(p->p_name)) == 0) {
	1309	task_set_could_use_secluded_mem(task, TRUE);
	1310	} else {
	1311	task_set_could_use_secluded_mem(task, FALSE);
	1312	}
	1313	if (strncmp(p->p_name,
	1314	"mediaserverd",
	1315	sizeof(p->p_name)) == 0) {
	1316	task_set_could_also_use_secluded_mem(task, TRUE);
	1317	}
	1318	}
	1319	#endif /* CONFIG_SECLUDED_MEMORY */
	1320
	1321	#if __arm64__
	1322	if (load_result.legacy_footprint) {
	1323	task_set_legacy_footprint(task);
	1324	}
	1325	#endif /* __arm64__ */
	1326
	1327	pal_dbg_set_task_name(task);
	1328
	1329	/*
	1330	* The load result will have already been munged by AMFI to include the
	1331	* platform binary flag if boot-args dictated it (AMFI will mark anything
	1332	* that doesn't go through the upcall path as a platform binary if its
	1333	* enforcement is disabled).
	1334	*/
	1335	if (load_result.platform_binary) {
	1336	if (cs_debug) {
	1337	printf("setting platform binary on task: pid = %d\n", p->p_pid);
	1338	}
	1339
	1340	/*
	1341	* We must use 'task' here because the proc's task has not yet been
	1342	* switched to the new one.
	1343	*/
	1344	task_set_platform_binary(task, TRUE);
	1345	} else {
	1346	if (cs_debug) {
	1347	printf("clearing platform binary on task: pid = %d\n", p->p_pid);
	1348	}
	1349
	1350	task_set_platform_binary(task, FALSE);
	1351	}
	1352
	1353	#if DEVELOPMENT \|\| DEBUG
	1354	/*
	1355	* Update the pid an proc name for importance base if any
	1356	*/
	1357	task_importance_update_owner_info(task);
	1358	#endif
	1359
	1360	memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid));
	1361
	1362	#if CONFIG_DTRACE
	1363	dtrace_proc_exec(p);
	1364	#endif
	1365
	1366	if (kdebug_enable) {
	1367	long args[4] = {};
	1368
	1369	uintptr_t fsid = 0, fileid = 0;
	1370	if (imgp->ip_vattr) {
	1371	uint64_t fsid64 = vnode_get_va_fsid(imgp->ip_vattr);
	1372	fsid = fsid64;
	1373	fileid = imgp->ip_vattr->va_fileid;
	1374	// check for (unexpected) overflow and trace zero in that case
	1375	if (fsid != fsid64 \|\| fileid != imgp->ip_vattr->va_fileid) {
	1376	fsid = fileid = 0;
	1377	}
	1378	}
	1379	KERNEL_DEBUG_CONSTANT_IST1(TRACE_DATA_EXEC, p->p_pid, fsid, fileid, 0,
	1380	(uintptr_t)thread_tid(thread));
	1381
	1382	/*
	1383	* Collect the pathname for tracing
	1384	*/
	1385	kdbg_trace_string(p, &args[0], &args[1], &args[2], &args[3]);
	1386	KERNEL_DEBUG_CONSTANT_IST1(TRACE_STRING_EXEC, args[0], args[1],
	1387	args[2], args[3], (uintptr_t)thread_tid(thread));
	1388	}
	1389
	1390	/*
	1391	* If posix_spawned with the START_SUSPENDED flag, stop the
	1392	* process before it runs.
	1393	*/
	1394	if (imgp->ip_px_sa != NULL) {
	1395	psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
	1396	if (psa->psa_flags & POSIX_SPAWN_START_SUSPENDED) {
	1397	proc_lock(p);
	1398	p->p_stat = SSTOP;
	1399	proc_unlock(p);
	1400	(void) task_suspend_internal(task);
	1401	}
	1402	}
	1403
	1404	/*
	1405	* mark as execed, wakeup the process that vforked (if any) and tell
	1406	* it that it now has its own resources back
	1407	*/
	1408	OSBitOrAtomic(P_EXEC, &p->p_flag);
	1409	proc_resetregister(p);
	1410	if (p->p_pptr && (p->p_lflag & P_LPPWAIT)) {
	1411	proc_lock(p);
	1412	p->p_lflag &= ~P_LPPWAIT;
	1413	proc_unlock(p);
	1414	wakeup((caddr_t)p->p_pptr);
	1415	}
	1416
	1417	/*
	1418	* Pay for our earlier safety; deliver the delayed signals from
	1419	* the incomplete vfexec process now that it's complete.
	1420	*/
	1421	if (vfexec && (p->p_lflag & P_LTRACED)) {
	1422	psignal_vfork(p, new_task, thread, SIGTRAP);
	1423	}
	1424
	1425	goto done;
	1426
	1427	badtoolate:
	1428	/* Don't allow child process to execute any instructions */
	1429	if (!spawn) {
	1430	if (vfexec) {
	1431	assert(exec_failure_reason != OS_REASON_NULL);
	1432	psignal_vfork_with_reason(p, new_task, thread, SIGKILL, exec_failure_reason);
	1433	exec_failure_reason = OS_REASON_NULL;
	1434	} else {
	1435	assert(exec_failure_reason != OS_REASON_NULL);
	1436	psignal_with_reason(p, SIGKILL, exec_failure_reason);
	1437	exec_failure_reason = OS_REASON_NULL;
	1438
	1439	if (exec) {
	1440	/* Terminate the exec copy task */
	1441	task_terminate_internal(task);
	1442	}
	1443	}
	1444
	1445	/* We can't stop this system call at this point, so just pretend we succeeded */
	1446	error = 0;
	1447	} else {
	1448	os_reason_free(exec_failure_reason);
	1449	exec_failure_reason = OS_REASON_NULL;
	1450	}
	1451
	1452	done:
	1453	if (load_result.threadstate) {
	1454	kfree(load_result.threadstate, load_result.threadstate_sz);
	1455	load_result.threadstate = NULL;
	1456	}
	1457
	1458	bad:
	1459	/* If we hit this, we likely would have leaked an exit reason */
	1460	assert(exec_failure_reason == OS_REASON_NULL);
	1461	return error;
	1462	}
	1463
	1464
	1465
	1466
	1467	/*
	1468	* Our image activator table; this is the table of the image types we are
	1469	* capable of loading. We list them in order of preference to ensure the
	1470	* fastest image load speed.
	1471	*
	1472	* XXX hardcoded, for now; should use linker sets
	1473	*/
	1474	struct execsw {
	1475	int(const ex_imgact)(struct image_params );
	1476	const char *ex_name;
	1477	}const execsw[] = {
	1478	{ exec_mach_imgact, "Mach-o Binary" },
	1479	{ exec_fat_imgact, "Fat Binary" },
	1480	{ exec_shell_imgact, "Interpreter Script" },
	1481	{ NULL, NULL}
	1482	};
	1483
	1484
	1485	/*
	1486	* exec_activate_image
	1487	*
	1488	* Description: Iterate through the available image activators, and activate
	1489	* the image associated with the imgp structure. We start with
	1490	* the activator for Mach-o binaries followed by that for Fat binaries
	1491	* for Interpreter scripts.
	1492	*
	1493	* Parameters: struct image_params * Image parameter block
	1494	*
	1495	* Returns: 0 Success
	1496	* EBADEXEC The executable is corrupt/unknown
	1497	* execargs_alloc:EINVAL Invalid argument
	1498	* execargs_alloc:EACCES Permission denied
	1499	* execargs_alloc:EINTR Interrupted function
	1500	* execargs_alloc:ENOMEM Not enough space
	1501	* exec_save_path:EFAULT Bad address
	1502	* exec_save_path:ENAMETOOLONG Filename too long
	1503	* exec_check_permissions:EACCES Permission denied
	1504	* exec_check_permissions:ENOEXEC Executable file format error
	1505	* exec_check_permissions:ETXTBSY Text file busy [misuse of error code]
	1506	* exec_check_permissions:???
	1507	* namei:???
	1508	* vn_rdwr:??? [anything vn_rdwr can return]
	1509	* <ex_imgact>:??? [anything an imgact can return]
	1510	* EDEADLK Process is being terminated
	1511	*/
	1512	static int
	1513	exec_activate_image(struct image_params *imgp)
	1514	{
	1515	struct nameidata *ndp = NULL;
	1516	const char *excpath;
	1517	int error;
	1518	int resid;
	1519	int once = 1; /* save SGUID-ness for interpreted files */
	1520	int i;
	1521	int itercount = 0;
	1522	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
	1523
	1524	error = execargs_alloc(imgp);
	1525	if (error) {
	1526	goto bad_notrans;
	1527	}
	1528
	1529	error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg, &excpath);
	1530	if (error) {
	1531	goto bad_notrans;
	1532	}
	1533
	1534	/* Use excpath, which contains the copyin-ed exec path */
	1535	DTRACE_PROC1(exec, uintptr_t, excpath);
	1536
	1537	MALLOC(ndp, struct nameidata , sizeof(ndp), M_TEMP, M_WAITOK \| M_ZERO);
	1538	if (ndp == NULL) {
	1539	error = ENOMEM;
	1540	goto bad_notrans;
	1541	}
	1542
	1543	NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW \| LOCKLEAF \| AUDITVNPATH1,
	1544	UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
	1545
	1546	again:
	1547	error = namei(ndp);
	1548	if (error) {
	1549	goto bad_notrans;
	1550	}
	1551	imgp->ip_ndp = ndp; /* successful namei(); call nameidone() later */
	1552	imgp->ip_vp = ndp->ni_vp; /* if set, need to vnode_put() at some point */
	1553
	1554	/*
	1555	* Before we start the transition from binary A to binary B, make
	1556	* sure another thread hasn't started exiting the process. We grab
	1557	* the proc lock to check p_lflag initially, and the transition
	1558	* mechanism ensures that the value doesn't change after we release
	1559	* the lock.
	1560	*/
	1561	proc_lock(p);
	1562	if (p->p_lflag & P_LEXIT) {
	1563	error = EDEADLK;
	1564	proc_unlock(p);
	1565	goto bad_notrans;
	1566	}
	1567	error = proc_transstart(p, 1, 0);
	1568	proc_unlock(p);
	1569	if (error) {
	1570	goto bad_notrans;
	1571	}
	1572
	1573	error = exec_check_permissions(imgp);
	1574	if (error) {
	1575	goto bad;
	1576	}
	1577
	1578	/* Copy; avoid invocation of an interpreter overwriting the original */
	1579	if (once) {
	1580	once = 0;
	1581	imgp->ip_origvattr = imgp->ip_vattr;
	1582	}
	1583
	1584	error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, 0,
	1585	UIO_SYSSPACE, IO_NODELOCKED,
	1586	vfs_context_ucred(imgp->ip_vfs_context),
	1587	&resid, vfs_context_proc(imgp->ip_vfs_context));
	1588	if (error) {
	1589	goto bad;
	1590	}
	1591
	1592	if (resid) {
	1593	memset(imgp->ip_vdata + (PAGE_SIZE - resid), 0x0, resid);
	1594	}
	1595
	1596	encapsulated_binary:
	1597	/* Limit the number of iterations we will attempt on each binary */
	1598	if (++itercount > EAI_ITERLIMIT) {
	1599	error = EBADEXEC;
	1600	goto bad;
	1601	}
	1602	error = -1;
	1603	for (i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) {
	1604	error = (*execsw[i].ex_imgact)(imgp);
	1605
	1606	switch (error) {
	1607	/* case -1: not claimed: continue */
	1608	case -2: /* Encapsulated binary, imgp->ip_XXX set for next iteration */
	1609	goto encapsulated_binary;
	1610
	1611	case -3: /* Interpreter */
	1612	#if CONFIG_MACF
	1613	/*
	1614	* Copy the script label for later use. Note that
	1615	* the label can be different when the script is
	1616	* actually read by the interpreter.
	1617	*/
	1618	if (imgp->ip_scriptlabelp) {
	1619	mac_vnode_label_free(imgp->ip_scriptlabelp);
	1620	}
	1621	imgp->ip_scriptlabelp = mac_vnode_label_alloc();
	1622	if (imgp->ip_scriptlabelp == NULL) {
	1623	error = ENOMEM;
	1624	break;
	1625	}
	1626	mac_vnode_label_copy(imgp->ip_vp->v_label,
	1627	imgp->ip_scriptlabelp);
	1628
	1629	/*
	1630	* Take a ref of the script vnode for later use.
	1631	*/
	1632	if (imgp->ip_scriptvp) {
	1633	vnode_put(imgp->ip_scriptvp);
	1634	imgp->ip_scriptvp = NULLVP;
	1635	}
	1636	if (vnode_getwithref(imgp->ip_vp) == 0) {
	1637	imgp->ip_scriptvp = imgp->ip_vp;
	1638	}
	1639	#endif
	1640
	1641	nameidone(ndp);
	1642
	1643	vnode_put(imgp->ip_vp);
	1644	imgp->ip_vp = NULL; /* already put */
	1645	imgp->ip_ndp = NULL; /* already nameidone */
	1646
	1647	/* Use excpath, which exec_shell_imgact reset to the interpreter */
	1648	NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW \| LOCKLEAF,
	1649	UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
	1650
	1651	proc_transend(p, 0);
	1652	goto again;
	1653
	1654	default:
	1655	break;
	1656	}
	1657	}
	1658
	1659	if (error == 0) {
	1660	if (imgp->ip_flags & IMGPF_INTERPRET && ndp->ni_vp) {
	1661	AUDIT_ARG(vnpath, ndp->ni_vp, ARG_VNODE2);
	1662	}
	1663
	1664	/*
	1665	* Call out to allow 3rd party notification of exec.
	1666	* Ignore result of kauth_authorize_fileop call.
	1667	*/
	1668	if (kauth_authorize_fileop_has_listeners()) {
	1669	kauth_authorize_fileop(vfs_context_ucred(imgp->ip_vfs_context),
	1670	KAUTH_FILEOP_EXEC,
	1671	(uintptr_t)ndp->ni_vp, 0);
	1672	}
	1673	}
	1674	bad:
	1675	proc_transend(p, 0);
	1676
	1677	bad_notrans:
	1678	if (imgp->ip_strings) {
	1679	execargs_free(imgp);
	1680	}
	1681	if (imgp->ip_ndp) {
	1682	nameidone(imgp->ip_ndp);
	1683	}
	1684	if (ndp) {
	1685	FREE(ndp, M_TEMP);
	1686	}
	1687
	1688	return error;
	1689	}
	1690
	1691	/*
	1692	* exec_validate_spawnattr_policy
	1693	*
	1694	* Description: Validates the entitlements required to set the apptype.
	1695	*
	1696	* Parameters: int psa_apptype posix spawn attribute apptype
	1697	*
	1698	* Returns: 0 Success
	1699	* EPERM Failure
	1700	*/
	1701	static errno_t
	1702	exec_validate_spawnattr_policy(int psa_apptype)
	1703	{
	1704	if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) {
	1705	int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK;
	1706	if (proctype == POSIX_SPAWN_PROC_TYPE_DRIVER) {
	1707	if (!IOTaskHasEntitlement(current_task(), POSIX_SPAWN_ENTITLEMENT_DRIVER)) {
	1708	return EPERM;
	1709	}
	1710	}
	1711	}
	1712
	1713	return 0;
	1714	}
	1715
	1716	/*
	1717	* exec_handle_spawnattr_policy
	1718	*
	1719	* Description: Decode and apply the posix_spawn apptype, qos clamp, and watchport ports to the task.
	1720	*
	1721	* Parameters: proc_t p process to apply attributes to
	1722	* int psa_apptype posix spawn attribute apptype
	1723	*
	1724	* Returns: 0 Success
	1725	*/
	1726	static errno_t
	1727	exec_handle_spawnattr_policy(proc_t p, thread_t thread, int psa_apptype, uint64_t psa_qos_clamp,
	1728	uint64_t psa_darwin_role, struct exec_port_actions *port_actions)
	1729	{
	1730	int apptype = TASK_APPTYPE_NONE;
	1731	int qos_clamp = THREAD_QOS_UNSPECIFIED;
	1732	int role = TASK_UNSPECIFIED;
	1733
	1734	if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) {
	1735	int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK;
	1736
	1737	switch (proctype) {
	1738	case POSIX_SPAWN_PROC_TYPE_DAEMON_INTERACTIVE:
	1739	apptype = TASK_APPTYPE_DAEMON_INTERACTIVE;
	1740	break;
	1741	case POSIX_SPAWN_PROC_TYPE_DAEMON_STANDARD:
	1742	apptype = TASK_APPTYPE_DAEMON_STANDARD;
	1743	break;
	1744	case POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE:
	1745	apptype = TASK_APPTYPE_DAEMON_ADAPTIVE;
	1746	break;
	1747	case POSIX_SPAWN_PROC_TYPE_DAEMON_BACKGROUND:
	1748	apptype = TASK_APPTYPE_DAEMON_BACKGROUND;
	1749	break;
	1750	case POSIX_SPAWN_PROC_TYPE_APP_DEFAULT:
	1751	apptype = TASK_APPTYPE_APP_DEFAULT;
	1752	break;
	1753	#if !CONFIG_EMBEDDED
	1754	case POSIX_SPAWN_PROC_TYPE_APP_TAL:
	1755	apptype = TASK_APPTYPE_APP_TAL;
	1756	break;
	1757	#endif /* !CONFIG_EMBEDDED */
	1758	case POSIX_SPAWN_PROC_TYPE_DRIVER:
	1759	apptype = TASK_APPTYPE_DRIVER;
	1760	break;
	1761	default:
	1762	apptype = TASK_APPTYPE_NONE;
	1763	/* TODO: Should an invalid value here fail the spawn? */
	1764	break;
	1765	}
	1766	}
	1767
	1768	if (psa_qos_clamp != POSIX_SPAWN_PROC_CLAMP_NONE) {
	1769	switch (psa_qos_clamp) {
	1770	case POSIX_SPAWN_PROC_CLAMP_UTILITY:
	1771	qos_clamp = THREAD_QOS_UTILITY;
	1772	break;
	1773	case POSIX_SPAWN_PROC_CLAMP_BACKGROUND:
	1774	qos_clamp = THREAD_QOS_BACKGROUND;
	1775	break;
	1776	case POSIX_SPAWN_PROC_CLAMP_MAINTENANCE:
	1777	qos_clamp = THREAD_QOS_MAINTENANCE;
	1778	break;
	1779	default:
	1780	qos_clamp = THREAD_QOS_UNSPECIFIED;
	1781	/* TODO: Should an invalid value here fail the spawn? */
	1782	break;
	1783	}
	1784	}
	1785
	1786	if (psa_darwin_role != PRIO_DARWIN_ROLE_DEFAULT) {
	1787	proc_darwin_role_to_task_role(psa_darwin_role, &role);
	1788	}
	1789
	1790	if (apptype != TASK_APPTYPE_NONE \|\|
	1791	qos_clamp != THREAD_QOS_UNSPECIFIED \|\|
	1792	role != TASK_UNSPECIFIED \|\|
	1793	port_actions->portwatch_count) {
	1794	proc_set_task_spawnpolicy(p->task, thread, apptype, qos_clamp, role,
	1795	port_actions->portwatch_array, port_actions->portwatch_count);
	1796	}
	1797
	1798	if (port_actions->registered_count) {
	1799	if (mach_ports_register(p->task, port_actions->registered_array,
	1800	port_actions->registered_count)) {
	1801	return EINVAL;
	1802	}
	1803	/* mach_ports_register() consumed the array */
	1804	port_actions->registered_array = NULL;
	1805	port_actions->registered_count = 0;
	1806	}
	1807
	1808	return 0;
	1809	}
	1810
	1811	static void
	1812	exec_port_actions_destroy(struct exec_port_actions *port_actions)
	1813	{
	1814	if (port_actions->portwatch_array) {
	1815	for (uint32_t i = 0; i < port_actions->portwatch_count; i++) {
	1816	ipc_port_t port = NULL;
	1817	if ((port = port_actions->portwatch_array[i]) != NULL) {
	1818	ipc_port_release_send(port);
	1819	}
	1820	}
	1821	kfree(port_actions->portwatch_array,
	1822	port_actions->portwatch_count * sizeof(ipc_port_t *));
	1823	}
	1824
	1825	if (port_actions->registered_array) {
	1826	for (uint32_t i = 0; i < port_actions->registered_count; i++) {
	1827	ipc_port_t port = NULL;
	1828	if ((port = port_actions->registered_array[i]) != NULL) {
	1829	ipc_port_release_send(port);
	1830	}
	1831	}
	1832	kfree(port_actions->registered_array,
	1833	port_actions->registered_count * sizeof(ipc_port_t *));
	1834	}
	1835	}
	1836
	1837	/*
	1838	* exec_handle_port_actions
	1839	*
	1840	* Description: Go through the _posix_port_actions_t contents,
	1841	* calling task_set_special_port, task_set_exception_ports
	1842	* and/or audit_session_spawnjoin for the current task.
	1843	*
	1844	* Parameters: struct image_params * Image parameter block
	1845	*
	1846	* Returns: 0 Success
	1847	* EINVAL Failure
	1848	* ENOTSUP Illegal posix_spawn attr flag was set
	1849	*/
	1850	static errno_t
	1851	exec_handle_port_actions(struct image_params *imgp,
	1852	struct exec_port_actions *actions)
	1853	{
	1854	_posix_spawn_port_actions_t pacts = imgp->ip_px_spa;
	1855	#if CONFIG_AUDIT
	1856	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
	1857	#endif
	1858	_ps_port_action_t *act = NULL;
	1859	task_t task = get_threadtask(imgp->ip_new_thread);
	1860	ipc_port_t port = NULL;
	1861	errno_t ret = 0;
	1862	int i, portwatch_i = 0, registered_i = 0;
	1863	kern_return_t kr;
	1864	boolean_t task_has_watchport_boost = task_has_watchports(current_task());
	1865	boolean_t in_exec = (imgp->ip_flags & IMGPF_EXEC);
	1866
	1867	for (i = 0; i < pacts->pspa_count; i++) {
	1868	act = &pacts->pspa_actions[i];
	1869
	1870	switch (act->port_type) {
	1871	case PSPA_SPECIAL:
	1872	case PSPA_EXCEPTION:
	1873	#if CONFIG_AUDIT
	1874	case PSPA_AU_SESSION:
	1875	#endif
	1876	break;
	1877	case PSPA_IMP_WATCHPORTS:
	1878	if (++actions->portwatch_count > TASK_MAX_WATCHPORT_COUNT) {
	1879	ret = EINVAL;
	1880	goto done;
	1881	}
	1882	break;
	1883	case PSPA_REGISTERED_PORTS:
	1884	if (++actions->registered_count > TASK_PORT_REGISTER_MAX) {
	1885	ret = EINVAL;
	1886	goto done;
	1887	}
	1888	break;
	1889	default:
	1890	ret = EINVAL;
	1891	goto done;
	1892	}
	1893	}
	1894
	1895	if (actions->portwatch_count) {
	1896	if (in_exec && task_has_watchport_boost) {
	1897	ret = EINVAL;
	1898	goto done;
	1899	}
	1900	actions->portwatch_array =
	1901	kalloc(sizeof(ipc_port_t ) actions->portwatch_count);
	1902	if (actions->portwatch_array == NULL) {
	1903	ret = ENOMEM;
	1904	goto done;
	1905	}
	1906	bzero(actions->portwatch_array,
	1907	sizeof(ipc_port_t ) actions->portwatch_count);
	1908	}
	1909
	1910	if (actions->registered_count) {
	1911	actions->registered_array =
	1912	kalloc(sizeof(ipc_port_t ) actions->registered_count);
	1913	if (actions->registered_array == NULL) {
	1914	ret = ENOMEM;
	1915	goto done;
	1916	}
	1917	bzero(actions->registered_array,
	1918	sizeof(ipc_port_t ) actions->registered_count);
	1919	}
	1920
	1921	for (i = 0; i < pacts->pspa_count; i++) {
	1922	act = &pacts->pspa_actions[i];
	1923
	1924	if (MACH_PORT_VALID(act->new_port)) {
	1925	kr = ipc_object_copyin(get_task_ipcspace(current_task()),
	1926	act->new_port, MACH_MSG_TYPE_COPY_SEND,
	1927	(ipc_object_t *) &port, 0, NULL, IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND);
	1928
	1929	if (kr != KERN_SUCCESS) {
	1930	ret = EINVAL;
	1931	goto done;
	1932	}
	1933	} else {
	1934	/* it's NULL or DEAD */
	1935	port = CAST_MACH_NAME_TO_PORT(act->new_port);
	1936	}
	1937
	1938	switch (act->port_type) {
	1939	case PSPA_SPECIAL:
	1940	kr = task_set_special_port(task, act->which, port);
	1941
	1942	if (kr != KERN_SUCCESS) {
	1943	ret = EINVAL;
	1944	}
	1945	break;
	1946
	1947	case PSPA_EXCEPTION:
	1948	kr = task_set_exception_ports(task, act->mask, port,
	1949	act->behavior, act->flavor);
	1950	if (kr != KERN_SUCCESS) {
	1951	ret = EINVAL;
	1952	}
	1953	break;
	1954	#if CONFIG_AUDIT
	1955	case PSPA_AU_SESSION:
	1956	ret = audit_session_spawnjoin(p, task, port);
	1957	if (ret) {
	1958	/* audit_session_spawnjoin() has already dropped the reference in case of error. */
	1959	goto done;
	1960	}
	1961
	1962	break;
	1963	#endif
	1964	case PSPA_IMP_WATCHPORTS:
	1965	if (actions->portwatch_array) {
	1966	/* hold on to this till end of spawn */
	1967	actions->portwatch_array[portwatch_i++] = port;
	1968	} else {
	1969	ipc_port_release_send(port);
	1970	}
	1971	break;
	1972	case PSPA_REGISTERED_PORTS:
	1973	/* hold on to this till end of spawn */
	1974	actions->registered_array[registered_i++] = port;
	1975	break;
	1976	default:
	1977	ret = EINVAL;
	1978	break;
	1979	}
	1980
	1981	if (ret) {
	1982	/* action failed, so release port resources */
	1983	ipc_port_release_send(port);
	1984	break;
	1985	}
	1986	}
	1987
	1988	done:
	1989	if (0 != ret) {
	1990	DTRACE_PROC1(spawn__port__failure, mach_port_name_t, act->new_port);
	1991	}
	1992	return ret;
	1993	}
	1994
	1995	/*
	1996	* exec_handle_file_actions
	1997	*
	1998	* Description: Go through the _posix_file_actions_t contents applying the
	1999	* open, close, and dup2 operations to the open file table for
	2000	* the current process.
	2001	*
	2002	* Parameters: struct image_params * Image parameter block
	2003	*
	2004	* Returns: 0 Success
	2005	* ???
	2006	*
	2007	* Note: Actions are applied in the order specified, with the credential
	2008	* of the parent process. This is done to permit the parent
	2009	* process to utilize POSIX_SPAWN_RESETIDS to drop privilege in
	2010	* the child following operations the child may in fact not be
	2011	* normally permitted to perform.
	2012	*/
	2013	static int
	2014	exec_handle_file_actions(struct image_params *imgp, short psa_flags)
	2015	{
	2016	int error = 0;
	2017	int action;
	2018	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
	2019	_posix_spawn_file_actions_t px_sfap = imgp->ip_px_sfa;
	2020	int ival[2]; /* dummy retval for system calls) */
	2021	#if CONFIG_AUDIT
	2022	struct uthread *uthread = get_bsdthread_info(current_thread());
	2023	#endif
	2024
	2025	for (action = 0; action < px_sfap->psfa_act_count; action++) {
	2026	_psfa_action_t *psfa = &px_sfap->psfa_act_acts[action];
	2027
	2028	switch (psfa->psfaa_type) {
	2029	case PSFA_OPEN: {
	2030	/*
	2031	* Open is different, in that it requires the use of
	2032	* a path argument, which is normally copied in from
	2033	* user space; because of this, we have to support an
	2034	* open from kernel space that passes an address space
	2035	* context of UIO_SYSSPACE, and casts the address
	2036	* argument to a user_addr_t.
	2037	*/
	2038	char *bufp = NULL;
	2039	struct vnode_attr *vap;
	2040	struct nameidata *ndp;
	2041	int mode = psfa->psfaa_openargs.psfao_mode;
	2042	struct dup2_args dup2a;
	2043	struct close_nocancel_args ca;
	2044	int origfd;
	2045
	2046	MALLOC(bufp, char , sizeof(vap) + sizeof(*ndp), M_TEMP, M_WAITOK \| M_ZERO);
	2047	if (bufp == NULL) {
	2048	error = ENOMEM;
	2049	break;
	2050	}
	2051
	2052	vap = (struct vnode_attr *) bufp;
	2053	ndp = (struct nameidata ) (bufp + sizeof(vap));
	2054
	2055	VATTR_INIT(vap);
	2056	/* Mask off all but regular access permissions */
	2057	mode = ((mode & ~p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT;
	2058	VATTR_SET(vap, va_mode, mode & ACCESSPERMS);
	2059
	2060	AUDIT_SUBCALL_ENTER(OPEN, p, uthread);
	2061
	2062	NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW \| AUDITVNPATH1, UIO_SYSSPACE,
	2063	CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path),
	2064	imgp->ip_vfs_context);
	2065
	2066	error = open1(imgp->ip_vfs_context,
	2067	ndp,
	2068	psfa->psfaa_openargs.psfao_oflag,
	2069	vap,
	2070	fileproc_alloc_init, NULL,
	2071	ival);
	2072
	2073	FREE(bufp, M_TEMP);
	2074
	2075	AUDIT_SUBCALL_EXIT(uthread, error);
	2076
	2077	/*
	2078	* If there's an error, or we get the right fd by
	2079	* accident, then drop out here. This is easier than
	2080	* reworking all the open code to preallocate fd
	2081	* slots, and internally taking one as an argument.
	2082	*/
	2083	if (error \|\| ival[0] == psfa->psfaa_filedes) {
	2084	break;
	2085	}
	2086
	2087	origfd = ival[0];
	2088	/*
	2089	* If we didn't fall out from an error, we ended up
	2090	* with the wrong fd; so now we've got to try to dup2
	2091	* it to the right one.
	2092	*/
	2093	dup2a.from = origfd;
	2094	dup2a.to = psfa->psfaa_filedes;
	2095
	2096	/*
	2097	* The dup2() system call implementation sets
	2098	* ival to newfd in the success case, but we
	2099	* can ignore that, since if we didn't get the
	2100	* fd we wanted, the error will stop us.
	2101	*/
	2102	AUDIT_SUBCALL_ENTER(DUP2, p, uthread);
	2103	error = dup2(p, &dup2a, ival);
	2104	AUDIT_SUBCALL_EXIT(uthread, error);
	2105	if (error) {
	2106	break;
	2107	}
	2108
	2109	/*
	2110	* Finally, close the original fd.
	2111	*/
	2112	ca.fd = origfd;
	2113
	2114	AUDIT_SUBCALL_ENTER(CLOSE, p, uthread);
	2115	error = close_nocancel(p, &ca, ival);
	2116	AUDIT_SUBCALL_EXIT(uthread, error);
	2117	}
	2118	break;
	2119
	2120	case PSFA_DUP2: {
	2121	struct dup2_args dup2a;
	2122
	2123	dup2a.from = psfa->psfaa_filedes;
	2124	dup2a.to = psfa->psfaa_dup2args.psfad_newfiledes;
	2125
	2126	/*
	2127	* The dup2() system call implementation sets
	2128	* ival to newfd in the success case, but we
	2129	* can ignore that, since if we didn't get the
	2130	* fd we wanted, the error will stop us.
	2131	*/
	2132	AUDIT_SUBCALL_ENTER(DUP2, p, uthread);
	2133	error = dup2(p, &dup2a, ival);
	2134	AUDIT_SUBCALL_EXIT(uthread, error);
	2135	}
	2136	break;
	2137
	2138	case PSFA_FILEPORT_DUP2: {
	2139	ipc_port_t port;
	2140	kern_return_t kr;
	2141	struct dup2_args dup2a;
	2142	struct close_nocancel_args ca;
	2143
	2144	if (!MACH_PORT_VALID(psfa->psfaa_fileport)) {
	2145	error = EINVAL;
	2146	break;
	2147	}
	2148
	2149	kr = ipc_object_copyin(get_task_ipcspace(current_task()),
	2150	psfa->psfaa_fileport, MACH_MSG_TYPE_COPY_SEND,
	2151	(ipc_object_t *) &port, 0, NULL, IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND);
	2152
	2153	if (kr != KERN_SUCCESS) {
	2154	error = EINVAL;
	2155	break;
	2156	}
	2157
	2158	error = fileport_makefd_internal(p, port, 0, ival);
	2159
	2160	if (IPC_PORT_NULL != port) {
	2161	ipc_port_release_send(port);
	2162	}
	2163
	2164	if (error \|\| ival[0] == psfa->psfaa_dup2args.psfad_newfiledes) {
	2165	break;
	2166	}
	2167
	2168	dup2a.from = ca.fd = ival[0];
	2169	dup2a.to = psfa->psfaa_dup2args.psfad_newfiledes;
	2170	AUDIT_SUBCALL_ENTER(DUP2, p, uthread);
	2171	error = dup2(p, &dup2a, ival);
	2172	AUDIT_SUBCALL_EXIT(uthread, error);
	2173	if (error) {
	2174	break;
	2175	}
	2176
	2177	AUDIT_SUBCALL_ENTER(CLOSE, p, uthread);
	2178	error = close_nocancel(p, &ca, ival);
	2179	AUDIT_SUBCALL_EXIT(uthread, error);
	2180	}
	2181	break;
	2182
	2183	case PSFA_CLOSE: {
	2184	struct close_nocancel_args ca;
	2185
	2186	ca.fd = psfa->psfaa_filedes;
	2187
	2188	AUDIT_SUBCALL_ENTER(CLOSE, p, uthread);
	2189	error = close_nocancel(p, &ca, ival);
	2190	AUDIT_SUBCALL_EXIT(uthread, error);
	2191	}
	2192	break;
	2193
	2194	case PSFA_INHERIT: {
	2195	struct fcntl_nocancel_args fcntla;
	2196
	2197	/*
	2198	* Check to see if the descriptor exists, and
	2199	* ensure it's -not- marked as close-on-exec.
	2200	*
	2201	* Attempting to "inherit" a guarded fd will
	2202	* result in a error.
	2203	*/
	2204	fcntla.fd = psfa->psfaa_filedes;
	2205	fcntla.cmd = F_GETFD;
	2206	if ((error = fcntl_nocancel(p, &fcntla, ival)) != 0) {
	2207	break;
	2208	}
	2209
	2210	if ((ival[0] & FD_CLOEXEC) == FD_CLOEXEC) {
	2211	fcntla.fd = psfa->psfaa_filedes;
	2212	fcntla.cmd = F_SETFD;
	2213	fcntla.arg = ival[0] & ~FD_CLOEXEC;
	2214	error = fcntl_nocancel(p, &fcntla, ival);
	2215	}
	2216	}
	2217	break;
	2218
	2219	case PSFA_CHDIR: {
	2220	/*
	2221	* Chdir is different, in that it requires the use of
	2222	* a path argument, which is normally copied in from
	2223	* user space; because of this, we have to support a
	2224	* chdir from kernel space that passes an address space
	2225	* context of UIO_SYSSPACE, and casts the address
	2226	* argument to a user_addr_t.
	2227	*/
	2228	struct nameidata nd;
	2229
	2230	AUDIT_SUBCALL_ENTER(CHDIR, p, uthread);
	2231	NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW \| AUDITVNPATH1, UIO_SYSSPACE,
	2232	CAST_USER_ADDR_T(psfa->psfaa_chdirargs.psfac_path),
	2233	imgp->ip_vfs_context);
	2234
	2235	error = chdir_internal(p, imgp->ip_vfs_context, &nd, 0);
	2236	AUDIT_SUBCALL_EXIT(uthread, error);
	2237	}
	2238	break;
	2239
	2240	case PSFA_FCHDIR: {
	2241	struct fchdir_args fchdira;
	2242
	2243	fchdira.fd = psfa->psfaa_filedes;
	2244
	2245	AUDIT_SUBCALL_ENTER(FCHDIR, p, uthread);
	2246	error = fchdir(p, &fchdira, ival);
	2247	AUDIT_SUBCALL_EXIT(uthread, error);
	2248	}
	2249	break;
	2250
	2251	default:
	2252	error = EINVAL;
	2253	break;
	2254	}
	2255
	2256	/* All file actions failures are considered fatal, per POSIX */
	2257
	2258	if (error) {
	2259	if (PSFA_OPEN == psfa->psfaa_type) {
	2260	DTRACE_PROC1(spawn__open__failure, uintptr_t,
	2261	psfa->psfaa_openargs.psfao_path);
	2262	} else {
	2263	DTRACE_PROC1(spawn__fd__failure, int, psfa->psfaa_filedes);
	2264	}
	2265	break;
	2266	}
	2267	}
	2268
	2269	if (error != 0 \|\| (psa_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) == 0) {
	2270	return error;
	2271	}
	2272
	2273	/*
	2274	* If POSIX_SPAWN_CLOEXEC_DEFAULT is set, behave (during
	2275	* this spawn only) as if "close on exec" is the default
	2276	* disposition of all pre-existing file descriptors. In this case,
	2277	* the list of file descriptors mentioned in the file actions
	2278	* are the only ones that can be inherited, so mark them now.
	2279	*
	2280	* The actual closing part comes later, in fdexec().
	2281	*/
	2282	proc_fdlock(p);
	2283	for (action = 0; action < px_sfap->psfa_act_count; action++) {
	2284	_psfa_action_t *psfa = &px_sfap->psfa_act_acts[action];
	2285	int fd = psfa->psfaa_filedes;
	2286
	2287	switch (psfa->psfaa_type) {
	2288	case PSFA_DUP2:
	2289	case PSFA_FILEPORT_DUP2:
	2290	fd = psfa->psfaa_dup2args.psfad_newfiledes;
	2291	/FALLTHROUGH/
	2292	case PSFA_OPEN:
	2293	case PSFA_INHERIT:
	2294	*fdflags(p, fd) \|= UF_INHERIT;
	2295	break;
	2296
	2297	case PSFA_CLOSE:
	2298	case PSFA_CHDIR:
	2299	case PSFA_FCHDIR:
	2300	/*
	2301	* Although PSFA_FCHDIR does have a file descriptor, it is not
	2302	* creating one, thus we do not automatically mark it for
	2303	* inheritance under POSIX_SPAWN_CLOEXEC_DEFAULT. A client that
	2304	* wishes it to be inherited should use the PSFA_INHERIT action
	2305	* explicitly.
	2306	*/
	2307	break;
	2308	}
	2309	}
	2310	proc_fdunlock(p);
	2311
	2312	return 0;
	2313	}
	2314
	2315	#if CONFIG_MACF
	2316	/*
	2317	* exec_spawnattr_getmacpolicyinfo
	2318	*/
	2319	void *
	2320	exec_spawnattr_getmacpolicyinfo(const void macextensions, const char policyname, size_t *lenp)
	2321	{
	2322	const struct _posix_spawn_mac_policy_extensions *psmx = macextensions;
	2323	int i;
	2324
	2325	if (psmx == NULL) {
	2326	return NULL;
	2327	}
	2328
	2329	for (i = 0; i < psmx->psmx_count; i++) {
	2330	const _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
	2331	if (strncmp(extension->policyname, policyname, sizeof(extension->policyname)) == 0) {
	2332	if (lenp != NULL) {
	2333	*lenp = extension->datalen;
	2334	}
	2335	return extension->datap;
	2336	}
	2337	}
	2338
	2339	if (lenp != NULL) {
	2340	*lenp = 0;
	2341	}
	2342	return NULL;
	2343	}
	2344
	2345	static int
	2346	spawn_copyin_macpolicyinfo(const struct user__posix_spawn_args_desc px_args, _posix_spawn_mac_policy_extensions_t psmxp)
	2347	{
	2348	_posix_spawn_mac_policy_extensions_t psmx = NULL;
	2349	int error = 0;
	2350	int copycnt = 0;
	2351	int i = 0;
	2352
	2353	*psmxp = NULL;
	2354
	2355	if (px_args->mac_extensions_size < PS_MAC_EXTENSIONS_SIZE(1) \|\|
	2356	px_args->mac_extensions_size > PAGE_SIZE) {
	2357	error = EINVAL;
	2358	goto bad;
	2359	}
	2360
	2361	MALLOC(psmx, _posix_spawn_mac_policy_extensions_t, px_args->mac_extensions_size, M_TEMP, M_WAITOK);
	2362	if ((error = copyin(px_args->mac_extensions, psmx, px_args->mac_extensions_size)) != 0) {
	2363	goto bad;
	2364	}
	2365
	2366	size_t extsize = PS_MAC_EXTENSIONS_SIZE(psmx->psmx_count);
	2367	if (extsize == 0 \|\| extsize > px_args->mac_extensions_size) {
	2368	error = EINVAL;
	2369	goto bad;
	2370	}
	2371
	2372	for (i = 0; i < psmx->psmx_count; i++) {
	2373	_ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
	2374	if (extension->datalen == 0 \|\| extension->datalen > PAGE_SIZE) {
	2375	error = EINVAL;
	2376	goto bad;
	2377	}
	2378	}
	2379
	2380	for (copycnt = 0; copycnt < psmx->psmx_count; copycnt++) {
	2381	_ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[copycnt];
	2382	void *data = NULL;
	2383
	2384	MALLOC(data, void *, extension->datalen, M_TEMP, M_WAITOK);
	2385	if ((error = copyin(extension->data, data, extension->datalen)) != 0) {
	2386	FREE(data, M_TEMP);
	2387	goto bad;
	2388	}
	2389	extension->datap = data;
	2390	}
	2391
	2392	*psmxp = psmx;
	2393	return 0;
	2394
	2395	bad:
	2396	if (psmx != NULL) {
	2397	for (i = 0; i < copycnt; i++) {
	2398	FREE(psmx->psmx_extensions[i].datap, M_TEMP);
	2399	}
	2400	FREE(psmx, M_TEMP);
	2401	}
	2402	return error;
	2403	}
	2404
	2405	static void
	2406	spawn_free_macpolicyinfo(_posix_spawn_mac_policy_extensions_t psmx)
	2407	{
	2408	int i;
	2409
	2410	if (psmx == NULL) {
	2411	return;
	2412	}
	2413	for (i = 0; i < psmx->psmx_count; i++) {
	2414	FREE(psmx->psmx_extensions[i].datap, M_TEMP);
	2415	}
	2416	FREE(psmx, M_TEMP);
	2417	}
	2418	#endif /* CONFIG_MACF */
	2419
	2420	#if CONFIG_COALITIONS
	2421	static inline void
	2422	spawn_coalitions_release_all(coalition_t coal[COALITION_NUM_TYPES])
	2423	{
	2424	for (int c = 0; c < COALITION_NUM_TYPES; c++) {
	2425	if (coal[c]) {
	2426	coalition_remove_active(coal[c]);
	2427	coalition_release(coal[c]);
	2428	}
	2429	}
	2430	}
	2431	#endif
	2432
	2433	#if CONFIG_PERSONAS
	2434	static int
	2435	spawn_validate_persona(struct _posix_spawn_persona_info *px_persona)
	2436	{
	2437	int error = 0;
	2438	struct persona *persona = NULL;
	2439	int verify = px_persona->pspi_flags & POSIX_SPAWN_PERSONA_FLAGS_VERIFY;
	2440
	2441	if (!IOTaskHasEntitlement(current_task(), PERSONA_MGMT_ENTITLEMENT)) {
	2442	return EPERM;
	2443	}
	2444
	2445	if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) {
	2446	if (px_persona->pspi_ngroups > NGROUPS_MAX) {
	2447	return EINVAL;
	2448	}
	2449	}
	2450
	2451	persona = persona_lookup(px_persona->pspi_id);
	2452	if (!persona) {
	2453	error = ESRCH;
	2454	goto out;
	2455	}
	2456
	2457	if (verify) {
	2458	if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_UID) {
	2459	if (px_persona->pspi_uid != persona_get_uid(persona)) {
	2460	error = EINVAL;
	2461	goto out;
	2462	}
	2463	}
	2464	if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GID) {
	2465	if (px_persona->pspi_gid != persona_get_gid(persona)) {
	2466	error = EINVAL;
	2467	goto out;
	2468	}
	2469	}
	2470	if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) {
	2471	unsigned ngroups = 0;
	2472	gid_t groups[NGROUPS_MAX];
	2473
	2474	if (persona_get_groups(persona, &ngroups, groups,
	2475	px_persona->pspi_ngroups) != 0) {
	2476	error = EINVAL;
	2477	goto out;
	2478	}
	2479	if (ngroups != px_persona->pspi_ngroups) {
	2480	error = EINVAL;
	2481	goto out;
	2482	}
	2483	while (ngroups--) {
	2484	if (px_persona->pspi_groups[ngroups] != groups[ngroups]) {
	2485	error = EINVAL;
	2486	goto out;
	2487	}
	2488	}
	2489	if (px_persona->pspi_gmuid != persona_get_gmuid(persona)) {
	2490	error = EINVAL;
	2491	goto out;
	2492	}
	2493	}
	2494	}
	2495
	2496	out:
	2497	if (persona) {
	2498	persona_put(persona);
	2499	}
	2500
	2501	return error;
	2502	}
	2503
	2504	static int
	2505	spawn_persona_adopt(proc_t p, struct _posix_spawn_persona_info *px_persona)
	2506	{
	2507	int ret;
	2508	kauth_cred_t cred;
	2509	struct persona *persona = NULL;
	2510	int override = !!(px_persona->pspi_flags & POSIX_SPAWN_PERSONA_FLAGS_OVERRIDE);
	2511
	2512	if (!override) {
	2513	return persona_proc_adopt_id(p, px_persona->pspi_id, NULL);
	2514	}
	2515
	2516	/*
	2517	* we want to spawn into the given persona, but we want to override
	2518	* the kauth with a different UID/GID combo
	2519	*/
	2520	persona = persona_lookup(px_persona->pspi_id);
	2521	if (!persona) {
	2522	return ESRCH;
	2523	}
	2524
	2525	cred = persona_get_cred(persona);
	2526	if (!cred) {
	2527	ret = EINVAL;
	2528	goto out;
	2529	}
	2530
	2531	if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_UID) {
	2532	cred = kauth_cred_setresuid(cred,
	2533	px_persona->pspi_uid,
	2534	px_persona->pspi_uid,
	2535	px_persona->pspi_uid,
	2536	KAUTH_UID_NONE);
	2537	}
	2538
	2539	if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GID) {
	2540	cred = kauth_cred_setresgid(cred,
	2541	px_persona->pspi_gid,
	2542	px_persona->pspi_gid,
	2543	px_persona->pspi_gid);
	2544	}
	2545
	2546	if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) {
	2547	cred = kauth_cred_setgroups(cred,
	2548	px_persona->pspi_groups,
	2549	px_persona->pspi_ngroups,
	2550	px_persona->pspi_gmuid);
	2551	}
	2552
	2553	ret = persona_proc_adopt(p, persona, cred);
	2554
	2555	out:
	2556	persona_put(persona);
	2557	return ret;
	2558	}
	2559	#endif
	2560
	2561	#if __arm64__
	2562	extern int legacy_footprint_entitlement_mode;
	2563	static inline void
	2564	proc_legacy_footprint_entitled(proc_t p, task_t task, const char *caller)
	2565	{
	2566	#pragma unused(p, caller)
	2567	boolean_t legacy_footprint_entitled;
	2568
	2569	switch (legacy_footprint_entitlement_mode) {
	2570	case LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE:
	2571	/* the entitlement is ignored */
	2572	break;
	2573	case LEGACY_FOOTPRINT_ENTITLEMENT_IOS11_ACCT:
	2574	/* the entitlement grants iOS11 legacy accounting */
	2575	legacy_footprint_entitled = IOTaskHasEntitlement(task,
	2576	"com.apple.private.memory.legacy_footprint");
	2577	if (legacy_footprint_entitled) {
	2578	task_set_legacy_footprint(task);
	2579	}
	2580	break;
	2581	case LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE:
	2582	/* the entitlement grants a footprint limit increase */
	2583	legacy_footprint_entitled = IOTaskHasEntitlement(task,
	2584	"com.apple.private.memory.legacy_footprint");
	2585	if (legacy_footprint_entitled) {
	2586	task_set_extra_footprint_limit(task);
	2587	}
	2588	break;
	2589	default:
	2590	break;
	2591	}
	2592	}
	2593
	2594	static inline void
	2595	proc_ios13extended_footprint_entitled(proc_t p, task_t task, const char *caller)
	2596	{
	2597	#pragma unused(p, caller)
	2598	boolean_t ios13extended_footprint_entitled;
	2599
	2600	/* the entitlement grants a footprint limit increase */
	2601	ios13extended_footprint_entitled = IOTaskHasEntitlement(task,
	2602	"com.apple.developer.memory.ios13extended_footprint");
	2603	if (ios13extended_footprint_entitled) {
	2604	task_set_ios13extended_footprint_limit(task);
	2605	}
	2606	}
	2607	#endif /* __arm64__ */
	2608
	2609	/*
	2610	* Apply a modification on the proc's kauth cred until it converges.
	2611	*
	2612	* `update` consumes its argument to return a new kauth cred.
	2613	*/
	2614	static void
	2615	apply_kauth_cred_update(proc_t p,
	2616	kauth_cred_t (^update)(kauth_cred_t orig_cred))
	2617	{
	2618	kauth_cred_t my_cred, my_new_cred;
	2619
	2620	my_cred = kauth_cred_proc_ref(p);
	2621	for (;;) {
	2622	my_new_cred = update(my_cred);
	2623	if (my_cred == my_new_cred) {
	2624	kauth_cred_unref(&my_new_cred);
	2625	break;
	2626	}
	2627
	2628	/* try update cred on proc */
	2629	proc_ucred_lock(p);
	2630
	2631	if (p->p_ucred == my_cred) {
	2632	/* base pointer didn't change, donate our ref */
	2633	p->p_ucred = my_new_cred;
	2634	PROC_UPDATE_CREDS_ONPROC(p);
	2635	proc_ucred_unlock(p);
	2636
	2637	/* drop p->p_ucred reference */
	2638	kauth_cred_unref(&my_cred);
	2639	break;
	2640	}
	2641
	2642	/* base pointer changed, retry */
	2643	my_cred = p->p_ucred;
	2644	kauth_cred_ref(my_cred);
	2645	proc_ucred_unlock(p);
	2646
	2647	kauth_cred_unref(&my_new_cred);
	2648	}
	2649	}
	2650
	2651	static int
	2652	spawn_posix_cred_adopt(proc_t p,
	2653	struct _posix_spawn_posix_cred_info *px_pcred_info)
	2654	{
	2655	int error = 0;
	2656
	2657	if (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_GID) {
	2658	struct setgid_args args = {
	2659	.gid = px_pcred_info->pspci_gid,
	2660	};
	2661	error = setgid(p, &args, NULL);
	2662	if (error) {
	2663	return error;
	2664	}
	2665	}
	2666
	2667	if (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_GROUPS) {
	2668	error = setgroups_internal(p,
	2669	px_pcred_info->pspci_ngroups,
	2670	px_pcred_info->pspci_groups,
	2671	px_pcred_info->pspci_gmuid);
	2672	if (error) {
	2673	return error;
	2674	}
	2675	}
	2676
	2677	if (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_UID) {
	2678	struct setuid_args args = {
	2679	.uid = px_pcred_info->pspci_uid,
	2680	};
	2681	error = setuid(p, &args, NULL);
	2682	if (error) {
	2683	return error;
	2684	}
	2685	}
	2686	return 0;
	2687	}
	2688
	2689	/*
	2690	* posix_spawn
	2691	*
	2692	* Parameters: uap->pid Pointer to pid return area
	2693	* uap->fname File name to exec
	2694	* uap->argp Argument list
	2695	* uap->envp Environment list
	2696	*
	2697	* Returns: 0 Success
	2698	* EINVAL Invalid argument
	2699	* ENOTSUP Not supported
	2700	* ENOEXEC Executable file format error
	2701	* exec_activate_image:EINVAL Invalid argument
	2702	* exec_activate_image:EACCES Permission denied
	2703	* exec_activate_image:EINTR Interrupted function
	2704	* exec_activate_image:ENOMEM Not enough space
	2705	* exec_activate_image:EFAULT Bad address
	2706	* exec_activate_image:ENAMETOOLONG Filename too long
	2707	* exec_activate_image:ENOEXEC Executable file format error
	2708	* exec_activate_image:ETXTBSY Text file busy [misuse of error code]
	2709	* exec_activate_image:EAUTH Image decryption failed
	2710	* exec_activate_image:EBADEXEC The executable is corrupt/unknown
	2711	* exec_activate_image:???
	2712	* mac_execve_enter:???
	2713	*
	2714	* TODO: Expect to need __mac_posix_spawn() at some point...
	2715	* Handle posix_spawnattr_t
	2716	* Handle posix_spawn_file_actions_t
	2717	*/
	2718	int
	2719	posix_spawn(proc_t ap, struct posix_spawn_args uap, int32_t retval)
	2720	{
	2721	proc_t p = ap; /* quiet bogus GCC vfork() warning */
	2722	user_addr_t pid = uap->pid;
	2723	int ival[2]; /* dummy retval for setpgid() */
	2724	char *bufp = NULL;
	2725	struct image_params *imgp;
	2726	struct vnode_attr *vap;
	2727	struct vnode_attr *origvap;
	2728	struct uthread uthread = 0; / compiler complains if not set to 0*/
	2729	int error, sig;
	2730	int is_64 = IS_64BIT_PROCESS(p);
	2731	struct vfs_context context;
	2732	struct user__posix_spawn_args_desc px_args;
	2733	struct _posix_spawnattr px_sa;
	2734	_posix_spawn_file_actions_t px_sfap = NULL;
	2735	_posix_spawn_port_actions_t px_spap = NULL;
	2736	struct __kern_sigaction vec;
	2737	boolean_t spawn_no_exec = FALSE;
	2738	boolean_t proc_transit_set = TRUE;
	2739	boolean_t exec_done = FALSE;
	2740	struct exec_port_actions port_actions = { };
	2741	vm_size_t px_sa_offset = offsetof(struct _posix_spawnattr, psa_ports);
	2742	task_t old_task = current_task();
	2743	task_t new_task = NULL;
	2744	boolean_t should_release_proc_ref = FALSE;
	2745	void *inherit = NULL;
	2746	#if CONFIG_PERSONAS
	2747	struct _posix_spawn_persona_info *px_persona = NULL;
	2748	#endif
	2749	struct _posix_spawn_posix_cred_info *px_pcred_info = NULL;
	2750
	2751	/*
	2752	* Allocate a big chunk for locals instead of using stack since these
	2753	* structures are pretty big.
	2754	*/
	2755	MALLOC(bufp, char , (sizeof(imgp) + sizeof(vap) + sizeof(origvap)), M_TEMP, M_WAITOK \| M_ZERO);
	2756	imgp = (struct image_params *) bufp;
	2757	if (bufp == NULL) {
	2758	error = ENOMEM;
	2759	goto bad;
	2760	}
	2761	vap = (struct vnode_attr ) (bufp + sizeof(imgp));
	2762	origvap = (struct vnode_attr ) (bufp + sizeof(imgp) + sizeof(*vap));
	2763
	2764	/* Initialize the common data in the image_params structure */
	2765	imgp->ip_user_fname = uap->path;
	2766	imgp->ip_user_argv = uap->argv;
	2767	imgp->ip_user_envv = uap->envp;
	2768	imgp->ip_vattr = vap;
	2769	imgp->ip_origvattr = origvap;
	2770	imgp->ip_vfs_context = &context;
	2771	imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT_ADDR : IMGPF_NONE);
	2772	imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
	2773	imgp->ip_mac_return = 0;
	2774	imgp->ip_px_persona = NULL;
	2775	imgp->ip_px_pcred_info = NULL;
	2776	imgp->ip_cs_error = OS_REASON_NULL;
	2777	imgp->ip_simulator_binary = IMGPF_SB_DEFAULT;
	2778
	2779	if (uap->adesc != USER_ADDR_NULL) {
	2780	if (is_64) {
	2781	error = copyin(uap->adesc, &px_args, sizeof(px_args));
	2782	} else {
	2783	struct user32__posix_spawn_args_desc px_args32;
	2784
	2785	error = copyin(uap->adesc, &px_args32, sizeof(px_args32));
	2786
	2787	/*
	2788	* Convert arguments descriptor from external 32 bit
	2789	* representation to internal 64 bit representation
	2790	*/
	2791	px_args.attr_size = px_args32.attr_size;
	2792	px_args.attrp = CAST_USER_ADDR_T(px_args32.attrp);
	2793	px_args.file_actions_size = px_args32.file_actions_size;
	2794	px_args.file_actions = CAST_USER_ADDR_T(px_args32.file_actions);
	2795	px_args.port_actions_size = px_args32.port_actions_size;
	2796	px_args.port_actions = CAST_USER_ADDR_T(px_args32.port_actions);
	2797	px_args.mac_extensions_size = px_args32.mac_extensions_size;
	2798	px_args.mac_extensions = CAST_USER_ADDR_T(px_args32.mac_extensions);
	2799	px_args.coal_info_size = px_args32.coal_info_size;
	2800	px_args.coal_info = CAST_USER_ADDR_T(px_args32.coal_info);
	2801	px_args.persona_info_size = px_args32.persona_info_size;
	2802	px_args.persona_info = CAST_USER_ADDR_T(px_args32.persona_info);
	2803	px_args.posix_cred_info_size = px_args32.posix_cred_info_size;
	2804	px_args.posix_cred_info = CAST_USER_ADDR_T(px_args32.posix_cred_info);
	2805	}
	2806	if (error) {
	2807	goto bad;
	2808	}
	2809
	2810	if (px_args.attr_size != 0) {
	2811	/*
	2812	* We are not copying the port_actions pointer,
	2813	* because we already have it from px_args.
	2814	* This is a bit fragile: <rdar://problem/16427422>
	2815	*/
	2816
	2817	if ((error = copyin(px_args.attrp, &px_sa, px_sa_offset)) != 0) {
	2818	goto bad;
	2819	}
	2820
	2821	bzero((void *)((unsigned long) &px_sa + px_sa_offset), sizeof(px_sa) - px_sa_offset );
	2822
	2823	imgp->ip_px_sa = &px_sa;
	2824	}
	2825	if (px_args.file_actions_size != 0) {
	2826	/* Limit file_actions to allowed number of open files */
	2827	int maxfa = (p->p_limit ? p->p_rlimit[RLIMIT_NOFILE].rlim_cur : NOFILE);
	2828	size_t maxfa_size = PSF_ACTIONS_SIZE(maxfa);
	2829	if (px_args.file_actions_size < PSF_ACTIONS_SIZE(1) \|\|
	2830	maxfa_size == 0 \|\| px_args.file_actions_size > maxfa_size) {
	2831	error = EINVAL;
	2832	goto bad;
	2833	}
	2834	MALLOC(px_sfap, _posix_spawn_file_actions_t, px_args.file_actions_size, M_TEMP, M_WAITOK);
	2835	if (px_sfap == NULL) {
	2836	error = ENOMEM;
	2837	goto bad;
	2838	}
	2839	imgp->ip_px_sfa = px_sfap;
	2840
	2841	if ((error = copyin(px_args.file_actions, px_sfap,
	2842	px_args.file_actions_size)) != 0) {
	2843	goto bad;
	2844	}
	2845
	2846	/* Verify that the action count matches the struct size */
	2847	size_t psfsize = PSF_ACTIONS_SIZE(px_sfap->psfa_act_count);
	2848	if (psfsize == 0 \|\| psfsize != px_args.file_actions_size) {
	2849	error = EINVAL;
	2850	goto bad;
	2851	}
	2852	}
	2853	if (px_args.port_actions_size != 0) {
	2854	/* Limit port_actions to one page of data */
	2855	if (px_args.port_actions_size < PS_PORT_ACTIONS_SIZE(1) \|\|
	2856	px_args.port_actions_size > PAGE_SIZE) {
	2857	error = EINVAL;
	2858	goto bad;
	2859	}
	2860
	2861	MALLOC(px_spap, _posix_spawn_port_actions_t,
	2862	px_args.port_actions_size, M_TEMP, M_WAITOK);
	2863	if (px_spap == NULL) {
	2864	error = ENOMEM;
	2865	goto bad;
	2866	}
	2867	imgp->ip_px_spa = px_spap;
	2868
	2869	if ((error = copyin(px_args.port_actions, px_spap,
	2870	px_args.port_actions_size)) != 0) {
	2871	goto bad;
	2872	}
	2873
	2874	/* Verify that the action count matches the struct size */
	2875	size_t pasize = PS_PORT_ACTIONS_SIZE(px_spap->pspa_count);
	2876	if (pasize == 0 \|\| pasize != px_args.port_actions_size) {
	2877	error = EINVAL;
	2878	goto bad;
	2879	}
	2880	}
	2881	#if CONFIG_PERSONAS
	2882	/* copy in the persona info */
	2883	if (px_args.persona_info_size != 0 && px_args.persona_info != 0) {
	2884	/* for now, we need the exact same struct in user space */
	2885	if (px_args.persona_info_size != sizeof(*px_persona)) {
	2886	error = ERANGE;
	2887	goto bad;
	2888	}
	2889
	2890	MALLOC(px_persona, struct _posix_spawn_persona_info *, px_args.persona_info_size, M_TEMP, M_WAITOK \| M_ZERO);
	2891	if (px_persona == NULL) {
	2892	error = ENOMEM;
	2893	goto bad;
	2894	}
	2895	imgp->ip_px_persona = px_persona;
	2896
	2897	if ((error = copyin(px_args.persona_info, px_persona,
	2898	px_args.persona_info_size)) != 0) {
	2899	goto bad;
	2900	}
	2901	if ((error = spawn_validate_persona(px_persona)) != 0) {
	2902	goto bad;
	2903	}
	2904	}
	2905	#endif
	2906	/* copy in the posix cred info */
	2907	if (px_args.posix_cred_info_size != 0 && px_args.posix_cred_info != 0) {
	2908	/* for now, we need the exact same struct in user space */
	2909	if (px_args.posix_cred_info_size != sizeof(*px_pcred_info)) {
	2910	error = ERANGE;
	2911	goto bad;
	2912	}
	2913
	2914	if (!kauth_cred_issuser(kauth_cred_get())) {
	2915	error = EPERM;
	2916	goto bad;
	2917	}
	2918
	2919	MALLOC(px_pcred_info, struct _posix_spawn_posix_cred_info *,
	2920	px_args.posix_cred_info_size, M_TEMP, M_WAITOK \| M_ZERO);
	2921	if (px_pcred_info == NULL) {
	2922	error = ENOMEM;
	2923	goto bad;
	2924	}
	2925	imgp->ip_px_pcred_info = px_pcred_info;
	2926
	2927	if ((error = copyin(px_args.posix_cred_info, px_pcred_info,
	2928	px_args.posix_cred_info_size)) != 0) {
	2929	goto bad;
	2930	}
	2931
	2932	if (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_GROUPS) {
	2933	if (px_pcred_info->pspci_ngroups > NGROUPS_MAX) {
	2934	error = EINVAL;
	2935	goto bad;
	2936	}
	2937	}
	2938	}
	2939	#if CONFIG_MACF
	2940	if (px_args.mac_extensions_size != 0) {
	2941	if ((error = spawn_copyin_macpolicyinfo(&px_args, (_posix_spawn_mac_policy_extensions_t *)&imgp->ip_px_smpx)) != 0) {
	2942	goto bad;
	2943	}
	2944	}
	2945	#endif /* CONFIG_MACF */
	2946	}
	2947
	2948	/* set uthread to parent */
	2949	uthread = get_bsdthread_info(current_thread());
	2950
	2951	/*
	2952	* <rdar://6640530>; this does not result in a behaviour change
	2953	* relative to Leopard, so there should not be any existing code
	2954	* which depends on it.
	2955	*/
	2956	if (uthread->uu_flag & UT_VFORK) {
	2957	error = EINVAL;
	2958	goto bad;
	2959	}
	2960
	2961	if (imgp->ip_px_sa != NULL) {
	2962	struct _posix_spawnattr psa = (struct _posix_spawnattr ) imgp->ip_px_sa;
	2963	if ((error = exec_validate_spawnattr_policy(psa->psa_apptype)) != 0) {
	2964	goto bad;
	2965	}
	2966	}
	2967
	2968	/*
	2969	* If we don't have the extension flag that turns "posix_spawn()"
	2970	* into "execve() with options", then we will be creating a new
	2971	* process which does not inherit memory from the parent process,
	2972	* which is one of the most expensive things about using fork()
	2973	* and execve().
	2974	*/
	2975	if (imgp->ip_px_sa == NULL \|\| !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)) {
	2976	/* Set the new task's coalition, if it is requested. */
	2977	coalition_t coal[COALITION_NUM_TYPES] = { COALITION_NULL };
	2978	#if CONFIG_COALITIONS
	2979	int i, ncoals;
	2980	kern_return_t kr = KERN_SUCCESS;
	2981	struct _posix_spawn_coalition_info coal_info;
	2982	int coal_role[COALITION_NUM_TYPES];
	2983
	2984	if (imgp->ip_px_sa == NULL \|\| !px_args.coal_info) {
	2985	goto do_fork1;
	2986	}
	2987
	2988	memset(&coal_info, 0, sizeof(coal_info));
	2989
	2990	if (px_args.coal_info_size > sizeof(coal_info)) {
	2991	px_args.coal_info_size = sizeof(coal_info);
	2992	}
	2993	error = copyin(px_args.coal_info,
	2994	&coal_info, px_args.coal_info_size);
	2995	if (error != 0) {
	2996	goto bad;
	2997	}
	2998
	2999	ncoals = 0;
	3000	for (i = 0; i < COALITION_NUM_TYPES; i++) {
	3001	uint64_t cid = coal_info.psci_info[i].psci_id;
	3002	if (cid != 0) {
	3003	/*
	3004	* don't allow tasks which are not in a
	3005	* privileged coalition to spawn processes
	3006	* into coalitions other than their own
	3007	*/
	3008	if (!task_is_in_privileged_coalition(p->task, i)) {
	3009	coal_dbg("ERROR: %d not in privilegd "
	3010	"coalition of type %d",
	3011	p->p_pid, i);
	3012	spawn_coalitions_release_all(coal);
	3013	error = EPERM;
	3014	goto bad;
	3015	}
	3016
	3017	coal_dbg("searching for coalition id:%llu", cid);
	3018	/*
	3019	* take a reference and activation on the
	3020	* coalition to guard against free-while-spawn
	3021	* races
	3022	*/
	3023	coal[i] = coalition_find_and_activate_by_id(cid);
	3024	if (coal[i] == COALITION_NULL) {
	3025	coal_dbg("could not find coalition id:%llu "
	3026	"(perhaps it has been terminated or reaped)", cid);
	3027	/*
	3028	* release any other coalition's we
	3029	* may have a reference to
	3030	*/
	3031	spawn_coalitions_release_all(coal);
	3032	error = ESRCH;
	3033	goto bad;
	3034	}
	3035	if (coalition_type(coal[i]) != i) {
	3036	coal_dbg("coalition with id:%lld is not of type:%d"
	3037	" (it's type:%d)", cid, i, coalition_type(coal[i]));
	3038	error = ESRCH;
	3039	goto bad;
	3040	}
	3041	coal_role[i] = coal_info.psci_info[i].psci_role;
	3042	ncoals++;
	3043	}
	3044	}
	3045	if (ncoals < COALITION_NUM_TYPES) {
	3046	/*
	3047	* If the user is attempting to spawn into a subset of
	3048	* the known coalition types, then make sure they have
	3049	* _at_least_ specified a resource coalition. If not,
	3050	* the following fork1() call will implicitly force an
	3051	* inheritance from 'p' and won't actually spawn the
	3052	* new task into the coalitions the user specified.
	3053	* (also the call to coalitions_set_roles will panic)
	3054	*/
	3055	if (coal[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
	3056	spawn_coalitions_release_all(coal);
	3057	error = EINVAL;
	3058	goto bad;
	3059	}
	3060	}
	3061	do_fork1:
	3062	#endif /* CONFIG_COALITIONS */
	3063
	3064	/*
	3065	* note that this will implicitly inherit the
	3066	* caller's persona (if it exists)
	3067	*/
	3068	error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN, coal);
	3069	/* returns a thread and task reference */
	3070
	3071	if (error == 0) {
	3072	new_task = get_threadtask(imgp->ip_new_thread);
	3073	}
	3074	#if CONFIG_COALITIONS
	3075	/* set the roles of this task within each given coalition */
	3076	if (error == 0) {
	3077	kr = coalitions_set_roles(coal, new_task, coal_role);
	3078	if (kr != KERN_SUCCESS) {
	3079	error = EINVAL;
	3080	}
	3081	if (kdebug_debugid_enabled(MACHDBG_CODE(DBG_MACH_COALITION,
	3082	MACH_COALITION_ADOPT))) {
	3083	for (i = 0; i < COALITION_NUM_TYPES; i++) {
	3084	if (coal[i] != COALITION_NULL) {
	3085	/*
	3086	* On 32-bit targets, uniqueid
	3087	* will get truncated to 32 bits
	3088	*/
	3089	KDBG_RELEASE(MACHDBG_CODE(
	3090	DBG_MACH_COALITION,
	3091	MACH_COALITION_ADOPT),
	3092	coalition_id(coal[i]),
	3093	get_task_uniqueid(new_task));
	3094	}
	3095	}
	3096	}
	3097	}
	3098
	3099	/* drop our references and activations - fork1() now holds them */
	3100	spawn_coalitions_release_all(coal);
	3101	#endif /* CONFIG_COALITIONS */
	3102	if (error != 0) {
	3103	goto bad;
	3104	}
	3105	imgp->ip_flags \|= IMGPF_SPAWN; /* spawn w/o exec */
	3106	spawn_no_exec = TRUE; /* used in later tests */
	3107	} else {
	3108	/*
	3109	* For execve case, create a new task and thread
	3110	* which points to current_proc. The current_proc will point
	3111	* to the new task after image activation and proc ref drain.
	3112	*
	3113	* proc (current_proc) <----- old_task (current_task)
	3114	* ^ \| ^
	3115	* \| \| \|
	3116	* \| ----------------------------------
	3117	* \|
	3118	* --------- new_task (task marked as TF_EXEC_COPY)
	3119	*
	3120	* After image activation, the proc will point to the new task
	3121	* and would look like following.
	3122	*
	3123	* proc (current_proc) <----- old_task (current_task, marked as TPF_DID_EXEC)
	3124	* ^ \|
	3125	* \| \|
	3126	* \| ----------> new_task
	3127	* \| \|
	3128	* -----------------
	3129	*
	3130	* During exec any transition from new_task -> proc is fine, but don't allow
	3131	* transition from proc->task, since it will modify old_task.
	3132	*/
	3133	imgp->ip_new_thread = fork_create_child(old_task,
	3134	NULL,
	3135	p,
	3136	FALSE,
	3137	p->p_flag & P_LP64,
	3138	task_get_64bit_data(old_task),
	3139	TRUE);
	3140	/* task and thread ref returned by fork_create_child */
	3141	if (imgp->ip_new_thread == NULL) {
	3142	error = ENOMEM;
	3143	goto bad;
	3144	}
	3145
	3146	new_task = get_threadtask(imgp->ip_new_thread);
	3147	imgp->ip_flags \|= IMGPF_EXEC;
	3148	}
	3149
	3150	if (spawn_no_exec) {
	3151	p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread);
	3152
	3153	/*
	3154	* We had to wait until this point before firing the
	3155	* proc:::create probe, otherwise p would not point to the
	3156	* child process.
	3157	*/
	3158	DTRACE_PROC1(create, proc_t, p);
	3159	}
	3160	assert(p != NULL);
	3161
	3162	context.vc_thread = imgp->ip_new_thread;
	3163	context.vc_ucred = p->p_ucred; /* XXX must NOT be kauth_cred_get() */
	3164
	3165	/*
	3166	* Post fdcopy(), pre exec_handle_sugid() - this is where we want
	3167	* to handle the file_actions. Since vfork() also ends up setting
	3168	* us into the parent process group, and saved off the signal flags,
	3169	* this is also where we want to handle the spawn flags.
	3170	*/
	3171
	3172	/* Has spawn file actions? */
	3173	if (imgp->ip_px_sfa != NULL) {
	3174	/*
	3175	* The POSIX_SPAWN_CLOEXEC_DEFAULT flag
	3176	* is handled in exec_handle_file_actions().
	3177	*/
	3178	#if CONFIG_AUDIT
	3179	/*
	3180	* The file actions auditing can overwrite the upath of
	3181	* AUE_POSIX_SPAWN audit record. Save the audit record.
	3182	*/
	3183	struct kaudit_record *save_uu_ar = uthread->uu_ar;
	3184	uthread->uu_ar = NULL;
	3185	#endif
	3186	error = exec_handle_file_actions(imgp,
	3187	imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0);
	3188	#if CONFIG_AUDIT
	3189	/* Restore the AUE_POSIX_SPAWN audit record. */
	3190	uthread->uu_ar = save_uu_ar;
	3191	#endif
	3192	if (error != 0) {
	3193	goto bad;
	3194	}
	3195	}
	3196
	3197	/* Has spawn port actions? */
	3198	if (imgp->ip_px_spa != NULL) {
	3199	#if CONFIG_AUDIT
	3200	/*
	3201	* Do the same for the port actions as we did for the file
	3202	* actions. Save the AUE_POSIX_SPAWN audit record.
	3203	*/
	3204	struct kaudit_record *save_uu_ar = uthread->uu_ar;
	3205	uthread->uu_ar = NULL;
	3206	#endif
	3207	error = exec_handle_port_actions(imgp, &port_actions);
	3208	#if CONFIG_AUDIT
	3209	/* Restore the AUE_POSIX_SPAWN audit record. */
	3210	uthread->uu_ar = save_uu_ar;
	3211	#endif
	3212	if (error != 0) {
	3213	goto bad;
	3214	}
	3215	}
	3216
	3217	/* Has spawn attr? */
	3218	if (imgp->ip_px_sa != NULL) {
	3219	/*
	3220	* Reset UID/GID to parent's RUID/RGID; This works only
	3221	* because the operation occurs after the vfork() and
	3222	* before the call to exec_handle_sugid() by the image
	3223	* activator called from exec_activate_image(). POSIX
	3224	* requires that any setuid/setgid bits on the process
	3225	* image will take precedence over the spawn attributes
	3226	* (re)setting them.
	3227	*
	3228	* Modifications to p_ucred must be guarded using the
	3229	* proc's ucred lock. This prevents others from accessing
	3230	* a garbage credential.
	3231	*/
	3232	if (px_sa.psa_flags & POSIX_SPAWN_RESETIDS) {
	3233	apply_kauth_cred_update(p, ^kauth_cred_t (kauth_cred_t my_cred){
	3234	return kauth_cred_setuidgid(my_cred,
	3235	kauth_cred_getruid(my_cred),
	3236	kauth_cred_getrgid(my_cred));
	3237	});
	3238	}
	3239
	3240	if (imgp->ip_px_pcred_info) {
	3241	if (!spawn_no_exec) {
	3242	error = ENOTSUP;
	3243	goto bad;
	3244	}
	3245
	3246	error = spawn_posix_cred_adopt(p, imgp->ip_px_pcred_info);
	3247	if (error != 0) {
	3248	goto bad;
	3249	}
	3250	}
	3251
	3252	#if CONFIG_PERSONAS
	3253	if (imgp->ip_px_persona != NULL) {
	3254	if (!spawn_no_exec) {
	3255	error = ENOTSUP;
	3256	goto bad;
	3257	}
	3258
	3259	/*
	3260	* If we were asked to spawn a process into a new persona,
	3261	* do the credential switch now (which may override the UID/GID
	3262	* inherit done just above). It's important to do this switch
	3263	* before image activation both for reasons stated above, and
	3264	* to ensure that the new persona has access to the image/file
	3265	* being executed.
	3266	*/
	3267	error = spawn_persona_adopt(p, imgp->ip_px_persona);
	3268	if (error != 0) {
	3269	goto bad;
	3270	}
	3271	}
	3272	#endif /* CONFIG_PERSONAS */
	3273	#if !SECURE_KERNEL
	3274	/*
	3275	* Disable ASLR for the spawned process.
	3276	*
	3277	* But only do so if we are not embedded + RELEASE.
	3278	* While embedded allows for a boot-arg (-disable_aslr)
	3279	* to deal with this (which itself is only honored on
	3280	* DEVELOPMENT or DEBUG builds of xnu), it is often
	3281	* useful or necessary to disable ASLR on a per-process
	3282	* basis for unit testing and debugging.
	3283	*/
	3284	if (px_sa.psa_flags & _POSIX_SPAWN_DISABLE_ASLR) {
	3285	OSBitOrAtomic(P_DISABLE_ASLR, &p->p_flag);
	3286	}
	3287	#endif /* !SECURE_KERNEL */
	3288
	3289	/* Randomize high bits of ASLR slide */
	3290	if (px_sa.psa_flags & _POSIX_SPAWN_HIGH_BITS_ASLR) {
	3291	imgp->ip_flags \|= IMGPF_HIGH_BITS_ASLR;
	3292	}
	3293
	3294	#if !SECURE_KERNEL
	3295	/*
	3296	* Forcibly disallow execution from data pages for the spawned process
	3297	* even if it would otherwise be permitted by the architecture default.
	3298	*/
	3299	if (px_sa.psa_flags & _POSIX_SPAWN_ALLOW_DATA_EXEC) {
	3300	imgp->ip_flags \|= IMGPF_ALLOW_DATA_EXEC;
	3301	}
	3302	#endif /* !SECURE_KERNEL */
	3303
	3304	if ((px_sa.psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) ==
	3305	POSIX_SPAWN_PROC_TYPE_DRIVER) {
	3306	imgp->ip_flags \|= IMGPF_DRIVER;
	3307	}
	3308	}
	3309
	3310	/*
	3311	* Disable ASLR during image activation. This occurs either if the
	3312	* _POSIX_SPAWN_DISABLE_ASLR attribute was found above or if
	3313	* P_DISABLE_ASLR was inherited from the parent process.
	3314	*/
	3315	if (p->p_flag & P_DISABLE_ASLR) {
	3316	imgp->ip_flags \|= IMGPF_DISABLE_ASLR;
	3317	}
	3318
	3319	/*
	3320	* Clear transition flag so we won't hang if exec_activate_image() causes
	3321	* an automount (and launchd does a proc sysctl to service it).
	3322	*
	3323	* <rdar://problem/6848672>, <rdar://problem/5959568>.
	3324	*/
	3325	if (spawn_no_exec) {
	3326	proc_transend(p, 0);
	3327	proc_transit_set = 0;
	3328	}
	3329
	3330	#if MAC_SPAWN /* XXX */
	3331	if (uap->mac_p != USER_ADDR_NULL) {
	3332	error = mac_execve_enter(uap->mac_p, imgp);
	3333	if (error) {
	3334	goto bad;
	3335	}
	3336	}
	3337	#endif
	3338
	3339	/*
	3340	* Activate the image
	3341	*/
	3342	error = exec_activate_image(imgp);
	3343	#if defined(HAS_APPLE_PAC)
	3344	ml_task_set_disable_user_jop(new_task, imgp->ip_flags & IMGPF_NOJOP ? TRUE : FALSE);
	3345	ml_thread_set_disable_user_jop(imgp->ip_new_thread, imgp->ip_flags & IMGPF_NOJOP ? TRUE : FALSE);
	3346	#endif
	3347
	3348	if (error == 0 && !spawn_no_exec) {
	3349	p = proc_exec_switch_task(p, old_task, new_task, imgp->ip_new_thread);
	3350	/* proc ref returned */
	3351	should_release_proc_ref = TRUE;
	3352
	3353	/*
	3354	* Need to transfer pending watch port boosts to the new task while still making
	3355	* sure that the old task remains in the importance linkage. Create an importance
	3356	* linkage from old task to new task, then switch the task importance base
	3357	* of old task and new task. After the switch the port watch boost will be
	3358	* boosting the new task and new task will be donating importance to old task.
	3359	*/
	3360	inherit = ipc_importance_exec_switch_task(old_task, new_task);
	3361	}
	3362
	3363	if (error == 0) {
	3364	/* process completed the exec */
	3365	exec_done = TRUE;
	3366	} else if (error == -1) {
	3367	/* Image not claimed by any activator? */
	3368	error = ENOEXEC;
	3369	}
	3370
	3371	if (!error && imgp->ip_px_sa != NULL) {
	3372	thread_t child_thread = imgp->ip_new_thread;
	3373	uthread_t child_uthread = get_bsdthread_info(child_thread);
	3374
	3375	/*
	3376	* Because of POSIX_SPAWN_SETEXEC, we need to handle this after image
	3377	* activation, else when image activation fails (before the point of no
	3378	* return) would leave the parent process in a modified state.
	3379	*/
	3380	if (px_sa.psa_flags & POSIX_SPAWN_SETPGROUP) {
	3381	struct setpgid_args spga;
	3382	spga.pid = p->p_pid;
	3383	spga.pgid = px_sa.psa_pgroup;
	3384	/*
	3385	* Effectively, call setpgid() system call; works
	3386	* because there are no pointer arguments.
	3387	*/
	3388	if ((error = setpgid(p, &spga, ival)) != 0) {
	3389	goto bad;
	3390	}
	3391	}
	3392
	3393	if (px_sa.psa_flags & POSIX_SPAWN_SETSID) {
	3394	error = setsid_internal(p);
	3395	if (error != 0) {
	3396	goto bad;
	3397	}
	3398	}
	3399
	3400	/*
	3401	* If we have a spawn attr, and it contains signal related flags,
	3402	* the we need to process them in the "context" of the new child
	3403	* process, so we have to process it following image activation,
	3404	* prior to making the thread runnable in user space. This is
	3405	* necessitated by some signal information being per-thread rather
	3406	* than per-process, and we don't have the new allocation in hand
	3407	* until after the image is activated.
	3408	*/
	3409
	3410	/*
	3411	* Mask a list of signals, instead of them being unmasked, if
	3412	* they were unmasked in the parent; note that some signals
	3413	* are not maskable.
	3414	*/
	3415	if (px_sa.psa_flags & POSIX_SPAWN_SETSIGMASK) {
	3416	child_uthread->uu_sigmask = (px_sa.psa_sigmask & ~sigcantmask);
	3417	}
	3418	/*
	3419	* Default a list of signals instead of ignoring them, if
	3420	* they were ignored in the parent. Note that we pass
	3421	* spawn_no_exec to setsigvec() to indicate that we called
	3422	* fork1() and therefore do not need to call proc_signalstart()
	3423	* internally.
	3424	*/
	3425	if (px_sa.psa_flags & POSIX_SPAWN_SETSIGDEF) {
	3426	vec.sa_handler = SIG_DFL;
	3427	vec.sa_tramp = 0;
	3428	vec.sa_mask = 0;
	3429	vec.sa_flags = 0;
	3430	for (sig = 1; sig < NSIG; sig++) {
	3431	if (px_sa.psa_sigdefault & (1 << (sig - 1))) {
	3432	error = setsigvec(p, child_thread, sig, &vec, spawn_no_exec);
	3433	}
	3434	}
	3435	}
	3436
	3437	/*
	3438	* Activate the CPU usage monitor, if requested. This is done via a task-wide, per-thread CPU
	3439	* usage limit, which will generate a resource exceeded exception if any one thread exceeds the
	3440	* limit.
	3441	*
	3442	* Userland gives us interval in seconds, and the kernel SPI expects nanoseconds.
	3443	*/
	3444	if (px_sa.psa_cpumonitor_percent != 0) {
	3445	/*
	3446	* Always treat a CPU monitor activation coming from spawn as entitled. Requiring
	3447	* an entitlement to configure the monitor a certain way seems silly, since
	3448	* whomever is turning it on could just as easily choose not to do so.
	3449	*/
	3450	error = proc_set_task_ruse_cpu(p->task,
	3451	TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC,
	3452	px_sa.psa_cpumonitor_percent,
	3453	px_sa.psa_cpumonitor_interval * NSEC_PER_SEC,
	3454	0, TRUE);
	3455	}
	3456
	3457
	3458	if (px_pcred_info &&
	3459	(px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_LOGIN)) {
	3460	/*
	3461	* setlogin() must happen after setsid()
	3462	*/
	3463	setlogin_internal(p, px_pcred_info->pspci_login);
	3464	}
	3465	}
	3466
	3467	bad:
	3468
	3469	if (error == 0) {
	3470	/* reset delay idle sleep status if set */
	3471	#if !CONFIG_EMBEDDED
	3472	if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP) {
	3473	OSBitAndAtomic(~((uint32_t)P_DELAYIDLESLEEP), &p->p_flag);
	3474	}
	3475	#endif /* !CONFIG_EMBEDDED */
	3476	/* upon successful spawn, re/set the proc control state */
	3477	if (imgp->ip_px_sa != NULL) {
	3478	switch (px_sa.psa_pcontrol) {
	3479	case POSIX_SPAWN_PCONTROL_THROTTLE:
	3480	p->p_pcaction = P_PCTHROTTLE;
	3481	break;
	3482	case POSIX_SPAWN_PCONTROL_SUSPEND:
	3483	p->p_pcaction = P_PCSUSP;
	3484	break;
	3485	case POSIX_SPAWN_PCONTROL_KILL:
	3486	p->p_pcaction = P_PCKILL;
	3487	break;
	3488	case POSIX_SPAWN_PCONTROL_NONE:
	3489	default:
	3490	p->p_pcaction = 0;
	3491	break;
	3492	}
	3493	;
	3494	}
	3495	exec_resettextvp(p, imgp);
	3496
	3497	#if CONFIG_MEMORYSTATUS
	3498	/* Set jetsam priority for DriverKit processes */
	3499	if (px_sa.psa_apptype == POSIX_SPAWN_PROC_TYPE_DRIVER) {
	3500	px_sa.psa_priority = JETSAM_PRIORITY_DRIVER_APPLE;
	3501	}
	3502
	3503	/* Has jetsam attributes? */
	3504	if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_SET)) {
	3505	/*
	3506	* With 2-level high-water-mark support, POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is no
	3507	* longer relevant, as background limits are described via the inactive limit slots.
	3508	*
	3509	* That said, however, if the POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is passed in,
	3510	* we attempt to mimic previous behavior by forcing the BG limit data into the
	3511	* inactive/non-fatal mode and force the active slots to hold system_wide/fatal mode.
	3512	*/
	3513
	3514	if (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND) {
	3515	memorystatus_update(p, px_sa.psa_priority, 0, FALSE, /* assertion priority */
	3516	(px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
	3517	TRUE,
	3518	-1, TRUE,
	3519	px_sa.psa_memlimit_inactive, FALSE);
	3520	} else {
	3521	memorystatus_update(p, px_sa.psa_priority, 0, FALSE, /* assertion priority */
	3522	(px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
	3523	TRUE,
	3524	px_sa.psa_memlimit_active,
	3525	(px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_ACTIVE_FATAL),
	3526	px_sa.psa_memlimit_inactive,
	3527	(px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_INACTIVE_FATAL));
	3528	}
	3529	}
	3530
	3531	/* Has jetsam relaunch behavior? */
	3532	if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_MASK)) {
	3533	/*
	3534	* Launchd has passed in data indicating the behavior of this process in response to jetsam.
	3535	* This data would be used by the jetsam subsystem to determine the position and protection
	3536	* offered to this process on dirty -> clean transitions.
	3537	*/
	3538	int relaunch_flags = P_MEMSTAT_RELAUNCH_UNKNOWN;
	3539	switch (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_MASK) {
	3540	case POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_LOW:
	3541	relaunch_flags = P_MEMSTAT_RELAUNCH_LOW;
	3542	break;
	3543	case POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_MED:
	3544	relaunch_flags = P_MEMSTAT_RELAUNCH_MED;
	3545	break;
	3546	case POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_HIGH:
	3547	relaunch_flags = P_MEMSTAT_RELAUNCH_HIGH;
	3548	break;
	3549	default:
	3550	break;
	3551	}
	3552	memorystatus_relaunch_flags_update(p, relaunch_flags);
	3553	}
	3554
	3555	#endif /* CONFIG_MEMORYSTATUS */
	3556	if (imgp->ip_px_sa != NULL && px_sa.psa_thread_limit > 0) {
	3557	task_set_thread_limit(new_task, (uint16_t)px_sa.psa_thread_limit);
	3558	}
	3559	}
	3560
	3561	/*
	3562	* If we successfully called fork1(), we always need to do this;
	3563	* we identify this case by noting the IMGPF_SPAWN flag. This is
	3564	* because we come back from that call with signals blocked in the
	3565	* child, and we have to unblock them, but we want to wait until
	3566	* after we've performed any spawn actions. This has to happen
	3567	* before check_for_signature(), which uses psignal.
	3568	*/
	3569	if (spawn_no_exec) {
	3570	if (proc_transit_set) {
	3571	proc_transend(p, 0);
	3572	}
	3573
	3574	/*
	3575	* Drop the signal lock on the child which was taken on our
	3576	* behalf by forkproc()/cloneproc() to prevent signals being
	3577	* received by the child in a partially constructed state.
	3578	*/
	3579	proc_signalend(p, 0);
	3580
	3581	/* flag the 'fork' has occurred */
	3582	proc_knote(p->p_pptr, NOTE_FORK \| p->p_pid);
	3583	}
	3584
	3585	/* flag exec has occurred, notify only if it has not failed due to FP Key error */
	3586	if (!error && ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)) {
	3587	proc_knote(p, NOTE_EXEC);
	3588	}
	3589
	3590
	3591	if (error == 0) {
	3592	/*
	3593	* We need to initialize the bank context behind the protection of
	3594	* the proc_trans lock to prevent a race with exit. We can't do this during
	3595	* exec_activate_image because task_bank_init checks entitlements that
	3596	* aren't loaded until subsequent calls (including exec_resettextvp).
	3597	*/
	3598	error = proc_transstart(p, 0, 0);
	3599
	3600	if (error == 0) {
	3601	task_bank_init(new_task);
	3602	proc_transend(p, 0);
	3603	}
	3604
	3605	#if __arm64__
	3606	proc_legacy_footprint_entitled(p, new_task, __FUNCTION__);
	3607	proc_ios13extended_footprint_entitled(p, new_task, __FUNCTION__);
	3608	#endif /* __arm64__ */
	3609	}
	3610
	3611	/* Inherit task role from old task to new task for exec */
	3612	if (error == 0 && !spawn_no_exec) {
	3613	proc_inherit_task_role(new_task, old_task);
	3614	}
	3615
	3616	#if CONFIG_ARCADE
	3617	if (error == 0) {
	3618	/*
	3619	* Check to see if we need to trigger an arcade upcall AST now
	3620	* that the vnode has been reset on the task.
	3621	*/
	3622	arcade_prepare(new_task, imgp->ip_new_thread);
	3623	}
	3624	#endif /* CONFIG_ARCADE */
	3625
	3626	/* Clear the initial wait on the thread before handling spawn policy */
	3627	if (imgp && imgp->ip_new_thread) {
	3628	task_clear_return_wait(get_threadtask(imgp->ip_new_thread), TCRW_CLEAR_INITIAL_WAIT);
	3629	}
	3630
	3631	/*
	3632	* Apply the spawnattr policy, apptype (which primes the task for importance donation),
	3633	* and bind any portwatch ports to the new task.
	3634	* This must be done after the exec so that the child's thread is ready,
	3635	* and after the in transit state has been released, because priority is
	3636	* dropped here so we need to be prepared for a potentially long preemption interval
	3637	*
	3638	* TODO: Consider splitting this up into separate phases
	3639	*/
	3640	if (error == 0 && imgp->ip_px_sa != NULL) {
	3641	struct _posix_spawnattr psa = (struct _posix_spawnattr ) imgp->ip_px_sa;
	3642
	3643	error = exec_handle_spawnattr_policy(p, imgp->ip_new_thread, psa->psa_apptype, psa->psa_qos_clamp,
	3644	psa->psa_darwin_role, &port_actions);
	3645	}
	3646
	3647	/* Transfer the turnstile watchport boost to new task if in exec */
	3648	if (error == 0 && !spawn_no_exec) {
	3649	task_transfer_turnstile_watchports(old_task, new_task, imgp->ip_new_thread);
	3650	}
	3651
	3652	/*
	3653	* Apply the requested maximum address.
	3654	*/
	3655	if (error == 0 && imgp->ip_px_sa != NULL) {
	3656	struct _posix_spawnattr psa = (struct _posix_spawnattr ) imgp->ip_px_sa;
	3657
	3658	if (psa->psa_max_addr) {
	3659	vm_map_set_max_addr(get_task_map(new_task), psa->psa_max_addr);
	3660	}
	3661	}
	3662
	3663	if (error == 0) {
	3664	/* Apply the main thread qos */
	3665	thread_t main_thread = imgp->ip_new_thread;
	3666	task_set_main_thread_qos(new_task, main_thread);
	3667
	3668	#if CONFIG_MACF
	3669	/*
	3670	* Processes with the MAP_JIT entitlement are permitted to have
	3671	* a jumbo-size map.
	3672	*/
	3673	if (mac_proc_check_map_anon(p, 0, 0, 0, MAP_JIT, NULL) == 0) {
	3674	vm_map_set_jumbo(get_task_map(new_task));
	3675	vm_map_set_jit_entitled(get_task_map(new_task));
	3676	}
	3677	#endif /* CONFIG_MACF */
	3678	}
	3679
	3680	/*
	3681	* Release any ports we kept around for binding to the new task
	3682	* We need to release the rights even if the posix_spawn has failed.
	3683	*/
	3684	if (imgp->ip_px_spa != NULL) {
	3685	exec_port_actions_destroy(&port_actions);
	3686	}
	3687
	3688	/*
	3689	* We have to delay operations which might throw a signal until after
	3690	* the signals have been unblocked; however, we want that to happen
	3691	* after exec_resettextvp() so that the textvp is correct when they
	3692	* fire.
	3693	*/
	3694	if (error == 0) {
	3695	error = check_for_signature(p, imgp);
	3696
	3697	/*
	3698	* Pay for our earlier safety; deliver the delayed signals from
	3699	* the incomplete spawn process now that it's complete.
	3700	*/
	3701	if (imgp != NULL && spawn_no_exec && (p->p_lflag & P_LTRACED)) {
	3702	psignal_vfork(p, p->task, imgp->ip_new_thread, SIGTRAP);
	3703	}
	3704
	3705	if (error == 0 && !spawn_no_exec) {
	3706	KDBG(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXEC),
	3707	p->p_pid);
	3708	}
	3709	}
	3710
	3711
	3712	if (imgp != NULL) {
	3713	if (imgp->ip_vp) {
	3714	vnode_put(imgp->ip_vp);
	3715	}
	3716	if (imgp->ip_scriptvp) {
	3717	vnode_put(imgp->ip_scriptvp);
	3718	}
	3719	if (imgp->ip_strings) {
	3720	execargs_free(imgp);
	3721	}
	3722	if (imgp->ip_px_sfa != NULL) {
	3723	FREE(imgp->ip_px_sfa, M_TEMP);
	3724	}
	3725	if (imgp->ip_px_spa != NULL) {
	3726	FREE(imgp->ip_px_spa, M_TEMP);
	3727	}
	3728	#if CONFIG_PERSONAS
	3729	if (imgp->ip_px_persona != NULL) {
	3730	FREE(imgp->ip_px_persona, M_TEMP);
	3731	}
	3732	#endif
	3733	if (imgp->ip_px_pcred_info != NULL) {
	3734	FREE(imgp->ip_px_pcred_info, M_TEMP);
	3735	}
	3736	#if CONFIG_MACF
	3737	if (imgp->ip_px_smpx != NULL) {
	3738	spawn_free_macpolicyinfo(imgp->ip_px_smpx);
	3739	}
	3740	if (imgp->ip_execlabelp) {
	3741	mac_cred_label_free(imgp->ip_execlabelp);
	3742	}
	3743	if (imgp->ip_scriptlabelp) {
	3744	mac_vnode_label_free(imgp->ip_scriptlabelp);
	3745	}
	3746	if (imgp->ip_cs_error != OS_REASON_NULL) {
	3747	os_reason_free(imgp->ip_cs_error);
	3748	imgp->ip_cs_error = OS_REASON_NULL;
	3749	}
	3750	#endif
	3751	}
	3752
	3753	#if CONFIG_DTRACE
	3754	if (spawn_no_exec) {
	3755	/*
	3756	* In the original DTrace reference implementation,
	3757	* posix_spawn() was a libc routine that just
	3758	* did vfork(2) then exec(2). Thus the proc::: probes
	3759	* are very fork/exec oriented. The details of this
	3760	* in-kernel implementation of posix_spawn() is different
	3761	* (while producing the same process-observable effects)
	3762	* particularly w.r.t. errors, and which thread/process
	3763	* is constructing what on behalf of whom.
	3764	*/
	3765	if (error) {
	3766	DTRACE_PROC1(spawn__failure, int, error);
	3767	} else {
	3768	DTRACE_PROC(spawn__success);
	3769	/*
	3770	* Some DTrace scripts, e.g. newproc.d in
	3771	* /usr/bin, rely on the the 'exec-success'
	3772	* probe being fired in the child after the
	3773	* new process image has been constructed
	3774	* in order to determine the associated pid.
	3775	*
	3776	* So, even though the parent built the image
	3777	* here, for compatibility, mark the new thread
	3778	* so 'exec-success' fires on it as it leaves
	3779	* the kernel.
	3780	*/
	3781	dtrace_thread_didexec(imgp->ip_new_thread);
	3782	}
	3783	} else {
	3784	if (error) {
	3785	DTRACE_PROC1(exec__failure, int, error);
	3786	} else {
	3787	dtrace_thread_didexec(imgp->ip_new_thread);
	3788	}
	3789	}
	3790
	3791	if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) {
	3792	(*dtrace_proc_waitfor_hook)(p);
	3793	}
	3794	#endif
	3795
	3796	#if CONFIG_AUDIT
	3797	if (!error && AUDIT_ENABLED() && p) {
	3798	/* Add the CDHash of the new process to the audit record */
	3799	uint8_t *cdhash = cs_get_cdhash(p);
	3800	if (cdhash) {
	3801	AUDIT_ARG(data, cdhash, sizeof(uint8_t), CS_CDHASH_LEN);
	3802	}
	3803	}
	3804	#endif
	3805
	3806	/*
	3807	* clear bsd_info from old task if it did exec.
	3808	*/
	3809	if (task_did_exec(old_task)) {
	3810	set_bsdtask_info(old_task, NULL);
	3811	}
	3812
	3813	/* clear bsd_info from new task and terminate it if exec failed */
	3814	if (new_task != NULL && task_is_exec_copy(new_task)) {
	3815	set_bsdtask_info(new_task, NULL);
	3816	task_terminate_internal(new_task);
	3817	}
	3818
	3819	/* Return to both the parent and the child? */
	3820	if (imgp != NULL && spawn_no_exec) {
	3821	/*
	3822	* If the parent wants the pid, copy it out
	3823	*/
	3824	if (pid != USER_ADDR_NULL) {
	3825	_Static_assert(sizeof(p->p_pid) == 4, "posix_spawn() assumes a 32-bit pid_t");
	3826	bool aligned = (pid & 3) == 0;
	3827	if (aligned) {
	3828	(void)copyout_atomic32(p->p_pid, pid);
	3829	} else {
	3830	(void)suword(pid, p->p_pid);
	3831	}
	3832	}
	3833	retval[0] = error;
	3834
	3835	/*
	3836	* If we had an error, perform an internal reap ; this is
	3837	* entirely safe, as we have a real process backing us.
	3838	*/
	3839	if (error) {
	3840	proc_list_lock();
	3841	p->p_listflag \|= P_LIST_DEADPARENT;
	3842	proc_list_unlock();
	3843	proc_lock(p);
	3844	/* make sure no one else has killed it off... */
	3845	if (p->p_stat != SZOMB && p->exit_thread == NULL) {
	3846	p->exit_thread = current_thread();
	3847	proc_unlock(p);
	3848	exit1(p, 1, (int *)NULL);
	3849	} else {
	3850	/* someone is doing it for us; just skip it */
	3851	proc_unlock(p);
	3852	}
	3853	}
	3854	}
	3855
	3856	/*
	3857	* Do not terminate the current task, if proc_exec_switch_task did not
	3858	* switch the tasks, terminating the current task without the switch would
	3859	* result in loosing the SIGKILL status.
	3860	*/
	3861	if (task_did_exec(old_task)) {
	3862	/* Terminate the current task, since exec will start in new task */
	3863	task_terminate_internal(old_task);
	3864	}
	3865
	3866	/* Release the thread ref returned by fork_create_child/fork1 */
	3867	if (imgp != NULL && imgp->ip_new_thread) {
	3868	/* wake up the new thread */
	3869	task_clear_return_wait(get_threadtask(imgp->ip_new_thread), TCRW_CLEAR_FINAL_WAIT);
	3870	thread_deallocate(imgp->ip_new_thread);
	3871	imgp->ip_new_thread = NULL;
	3872	}
	3873
	3874	/* Release the ref returned by fork_create_child/fork1 */
	3875	if (new_task) {
	3876	task_deallocate(new_task);
	3877	new_task = NULL;
	3878	}
	3879
	3880	if (should_release_proc_ref) {
	3881	proc_rele(p);
	3882	}
	3883
	3884	if (bufp != NULL) {
	3885	FREE(bufp, M_TEMP);
	3886	}
	3887
	3888	if (inherit != NULL) {
	3889	ipc_importance_release(inherit);
	3890	}
	3891
	3892	return error;
	3893	}
	3894
	3895	/*
	3896	* proc_exec_switch_task
	3897	*
	3898	* Parameters: p proc
	3899	* old_task task before exec
	3900	* new_task task after exec
	3901	* new_thread thread in new task
	3902	*
	3903	* Returns: proc.
	3904	*
	3905	* Note: The function will switch the task pointer of proc
	3906	* from old task to new task. The switch needs to happen
	3907	* after draining all proc refs and inside a proc translock.
	3908	* In the case of failure to switch the task, which might happen
	3909	* if the process received a SIGKILL or jetsam killed it, it will make
	3910	* sure that the new tasks terminates. User proc ref returned
	3911	* to caller.
	3912	*
	3913	* This function is called after point of no return, in the case
	3914	* failure to switch, it will terminate the new task and swallow the
	3915	* error and let the terminated process complete exec and die.
	3916	*/
	3917	proc_t
	3918	proc_exec_switch_task(proc_t p, task_t old_task, task_t new_task, thread_t new_thread)
	3919	{
	3920	int error = 0;
	3921	boolean_t task_active;
	3922	boolean_t proc_active;
	3923	boolean_t thread_active;
	3924	thread_t old_thread = current_thread();
	3925
	3926	/*
	3927	* Switch the task pointer of proc to new task.
	3928	* Before switching the task, wait for proc_refdrain.
	3929	* After the switch happens, the proc can disappear,
	3930	* take a ref before it disappears. Waiting for
	3931	* proc_refdrain in exec will block all other threads
	3932	* trying to take a proc ref, boost the current thread
	3933	* to avoid priority inversion.
	3934	*/
	3935	thread_set_exec_promotion(old_thread);
	3936	p = proc_refdrain_with_refwait(p, TRUE);
	3937	/* extra proc ref returned to the caller */
	3938
	3939	assert(get_threadtask(new_thread) == new_task);
	3940	task_active = task_is_active(new_task);
	3941
	3942	/* Take the proc_translock to change the task ptr */
	3943	proc_lock(p);
	3944	proc_active = !(p->p_lflag & P_LEXIT);
	3945
	3946	/* Check if the current thread is not aborted due to SIGKILL */
	3947	thread_active = thread_is_active(old_thread);
	3948
	3949	/*
	3950	* Do not switch the task if the new task or proc is already terminated
	3951	* as a result of error in exec past point of no return
	3952	*/
	3953	if (proc_active && task_active && thread_active) {
	3954	error = proc_transstart(p, 1, 0);
	3955	if (error == 0) {
	3956	uthread_t new_uthread = get_bsdthread_info(new_thread);
	3957	uthread_t old_uthread = get_bsdthread_info(current_thread());
	3958
	3959	/*
	3960	* bsd_info of old_task will get cleared in execve and posix_spawn
	3961	* after firing exec-success/error dtrace probe.
	3962	*/
	3963	p->task = new_task;
	3964
	3965	/* Clear dispatchqueue and workloop ast offset */
	3966	p->p_dispatchqueue_offset = 0;
	3967	p->p_dispatchqueue_serialno_offset = 0;
	3968	p->p_dispatchqueue_label_offset = 0;
	3969	p->p_return_to_kernel_offset = 0;
	3970
	3971	/* Copy the signal state, dtrace state and set bsd ast on new thread */
	3972	act_set_astbsd(new_thread);
	3973	new_uthread->uu_siglist = old_uthread->uu_siglist;
	3974	new_uthread->uu_sigwait = old_uthread->uu_sigwait;
	3975	new_uthread->uu_sigmask = old_uthread->uu_sigmask;
	3976	new_uthread->uu_oldmask = old_uthread->uu_oldmask;
	3977	new_uthread->uu_vforkmask = old_uthread->uu_vforkmask;
	3978	new_uthread->uu_exit_reason = old_uthread->uu_exit_reason;
	3979	#if CONFIG_DTRACE
	3980	new_uthread->t_dtrace_sig = old_uthread->t_dtrace_sig;
	3981	new_uthread->t_dtrace_stop = old_uthread->t_dtrace_stop;
	3982	new_uthread->t_dtrace_resumepid = old_uthread->t_dtrace_resumepid;
	3983	assert(new_uthread->t_dtrace_scratch == NULL);
	3984	new_uthread->t_dtrace_scratch = old_uthread->t_dtrace_scratch;
	3985
	3986	old_uthread->t_dtrace_sig = 0;
	3987	old_uthread->t_dtrace_stop = 0;
	3988	old_uthread->t_dtrace_resumepid = 0;
	3989	old_uthread->t_dtrace_scratch = NULL;
	3990	#endif
	3991	/* Copy the resource accounting info */
	3992	thread_copy_resource_info(new_thread, current_thread());
	3993
	3994	/* Clear the exit reason and signal state on old thread */
	3995	old_uthread->uu_exit_reason = NULL;
	3996	old_uthread->uu_siglist = 0;
	3997
	3998	/* Add the new uthread to proc uthlist and remove the old one */
	3999	TAILQ_INSERT_TAIL(&p->p_uthlist, new_uthread, uu_list);
	4000	TAILQ_REMOVE(&p->p_uthlist, old_uthread, uu_list);
	4001
	4002	task_set_did_exec_flag(old_task);
	4003	task_clear_exec_copy_flag(new_task);
	4004
	4005	task_copy_fields_for_exec(new_task, old_task);
	4006
	4007	proc_transend(p, 1);
	4008	}
	4009	}
	4010
	4011	proc_unlock(p);
	4012	proc_refwake(p);
	4013	thread_clear_exec_promotion(old_thread);
	4014
	4015	if (error != 0 \|\| !task_active \|\| !proc_active \|\| !thread_active) {
	4016	task_terminate_internal(new_task);
	4017	}
	4018
	4019	return p;
	4020	}
	4021
	4022	/*
	4023	* execve
	4024	*
	4025	* Parameters: uap->fname File name to exec
	4026	* uap->argp Argument list
	4027	* uap->envp Environment list
	4028	*
	4029	* Returns: 0 Success
	4030	* __mac_execve:EINVAL Invalid argument
	4031	* __mac_execve:ENOTSUP Invalid argument
	4032	* __mac_execve:EACCES Permission denied
	4033	* __mac_execve:EINTR Interrupted function
	4034	* __mac_execve:ENOMEM Not enough space
	4035	* __mac_execve:EFAULT Bad address
	4036	* __mac_execve:ENAMETOOLONG Filename too long
	4037	* __mac_execve:ENOEXEC Executable file format error
	4038	* __mac_execve:ETXTBSY Text file busy [misuse of error code]
	4039	* __mac_execve:???
	4040	*
	4041	* TODO: Dynamic linker header address on stack is copied via suword()
	4042	*/
	4043	/* ARGSUSED */
	4044	int
	4045	execve(proc_t p, struct execve_args uap, int32_t retval)
	4046	{
	4047	struct __mac_execve_args muap;
	4048	int err;
	4049
	4050	memoryshot(VM_EXECVE, DBG_FUNC_NONE);
	4051
	4052	muap.fname = uap->fname;
	4053	muap.argp = uap->argp;
	4054	muap.envp = uap->envp;
	4055	muap.mac_p = USER_ADDR_NULL;
	4056	err = __mac_execve(p, &muap, retval);
	4057
	4058	return err;
	4059	}
	4060
	4061	/*
	4062	* __mac_execve
	4063	*
	4064	* Parameters: uap->fname File name to exec
	4065	* uap->argp Argument list
	4066	* uap->envp Environment list
	4067	* uap->mac_p MAC label supplied by caller
	4068	*
	4069	* Returns: 0 Success
	4070	* EINVAL Invalid argument
	4071	* ENOTSUP Not supported
	4072	* ENOEXEC Executable file format error
	4073	* exec_activate_image:EINVAL Invalid argument
	4074	* exec_activate_image:EACCES Permission denied
	4075	* exec_activate_image:EINTR Interrupted function
	4076	* exec_activate_image:ENOMEM Not enough space
	4077	* exec_activate_image:EFAULT Bad address
	4078	* exec_activate_image:ENAMETOOLONG Filename too long
	4079	* exec_activate_image:ENOEXEC Executable file format error
	4080	* exec_activate_image:ETXTBSY Text file busy [misuse of error code]
	4081	* exec_activate_image:EBADEXEC The executable is corrupt/unknown
	4082	* exec_activate_image:???
	4083	* mac_execve_enter:???
	4084	*
	4085	* TODO: Dynamic linker header address on stack is copied via suword()
	4086	*/
	4087	int
	4088	__mac_execve(proc_t p, struct __mac_execve_args uap, int32_t retval)
	4089	{
	4090	char *bufp = NULL;
	4091	struct image_params *imgp;
	4092	struct vnode_attr *vap;
	4093	struct vnode_attr *origvap;
	4094	int error;
	4095	int is_64 = IS_64BIT_PROCESS(p);
	4096	struct vfs_context context;
	4097	struct uthread *uthread;
	4098	task_t old_task = current_task();
	4099	task_t new_task = NULL;
	4100	boolean_t should_release_proc_ref = FALSE;
	4101	boolean_t exec_done = FALSE;
	4102	boolean_t in_vfexec = FALSE;
	4103	void *inherit = NULL;
	4104
	4105	context.vc_thread = current_thread();
	4106	context.vc_ucred = kauth_cred_proc_ref(p); /* XXX must NOT be kauth_cred_get() */
	4107
	4108	/* Allocate a big chunk for locals instead of using stack since these
	4109	* structures a pretty big.
	4110	*/
	4111	MALLOC(bufp, char , (sizeof(imgp) + sizeof(vap) + sizeof(origvap)), M_TEMP, M_WAITOK \| M_ZERO);
	4112	imgp = (struct image_params *) bufp;
	4113	if (bufp == NULL) {
	4114	error = ENOMEM;
	4115	goto exit_with_error;
	4116	}
	4117	vap = (struct vnode_attr ) (bufp + sizeof(imgp));
	4118	origvap = (struct vnode_attr ) (bufp + sizeof(imgp) + sizeof(*vap));
	4119
	4120	/* Initialize the common data in the image_params structure */
	4121	imgp->ip_user_fname = uap->fname;
	4122	imgp->ip_user_argv = uap->argp;
	4123	imgp->ip_user_envv = uap->envp;
	4124	imgp->ip_vattr = vap;
	4125	imgp->ip_origvattr = origvap;
	4126	imgp->ip_vfs_context = &context;
	4127	imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT_ADDR : IMGPF_NONE) \| ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE);
	4128	imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
	4129	imgp->ip_mac_return = 0;
	4130	imgp->ip_cs_error = OS_REASON_NULL;
	4131	imgp->ip_simulator_binary = IMGPF_SB_DEFAULT;
	4132
	4133	#if CONFIG_MACF
	4134	if (uap->mac_p != USER_ADDR_NULL) {
	4135	error = mac_execve_enter(uap->mac_p, imgp);
	4136	if (error) {
	4137	kauth_cred_unref(&context.vc_ucred);
	4138	goto exit_with_error;
	4139	}
	4140	}
	4141	#endif
	4142	uthread = get_bsdthread_info(current_thread());
	4143	if (uthread->uu_flag & UT_VFORK) {
	4144	imgp->ip_flags \|= IMGPF_VFORK_EXEC;
	4145	in_vfexec = TRUE;
	4146	} else {
	4147	imgp->ip_flags \|= IMGPF_EXEC;
	4148
	4149	/*
	4150	* For execve case, create a new task and thread
	4151	* which points to current_proc. The current_proc will point
	4152	* to the new task after image activation and proc ref drain.
	4153	*
	4154	* proc (current_proc) <----- old_task (current_task)
	4155	* ^ \| ^
	4156	* \| \| \|
	4157	* \| ----------------------------------
	4158	* \|
	4159	* --------- new_task (task marked as TF_EXEC_COPY)
	4160	*
	4161	* After image activation, the proc will point to the new task
	4162	* and would look like following.
	4163	*
	4164	* proc (current_proc) <----- old_task (current_task, marked as TPF_DID_EXEC)
	4165	* ^ \|
	4166	* \| \|
	4167	* \| ----------> new_task
	4168	* \| \|
	4169	* -----------------
	4170	*
	4171	* During exec any transition from new_task -> proc is fine, but don't allow
	4172	* transition from proc->task, since it will modify old_task.
	4173	*/
	4174	imgp->ip_new_thread = fork_create_child(old_task,
	4175	NULL,
	4176	p,
	4177	FALSE,
	4178	p->p_flag & P_LP64,
	4179	task_get_64bit_data(old_task),
	4180	TRUE);
	4181	/* task and thread ref returned by fork_create_child */
	4182	if (imgp->ip_new_thread == NULL) {
	4183	error = ENOMEM;
	4184	goto exit_with_error;
	4185	}
	4186
	4187	new_task = get_threadtask(imgp->ip_new_thread);
	4188	context.vc_thread = imgp->ip_new_thread;
	4189	}
	4190
	4191	error = exec_activate_image(imgp);
	4192	/* thread and task ref returned for vfexec case */
	4193
	4194	if (imgp->ip_new_thread != NULL) {
	4195	/*
	4196	* task reference might be returned by exec_activate_image
	4197	* for vfexec.
	4198	*/
	4199	new_task = get_threadtask(imgp->ip_new_thread);
	4200	#if defined(HAS_APPLE_PAC)
	4201	ml_task_set_disable_user_jop(new_task, imgp->ip_flags & IMGPF_NOJOP ? TRUE : FALSE);
	4202	ml_thread_set_disable_user_jop(imgp->ip_new_thread, imgp->ip_flags & IMGPF_NOJOP ? TRUE : FALSE);
	4203	#endif
	4204	}
	4205
	4206	if (!error && !in_vfexec) {
	4207	p = proc_exec_switch_task(p, old_task, new_task, imgp->ip_new_thread);
	4208	/* proc ref returned */
	4209	should_release_proc_ref = TRUE;
	4210
	4211	/*
	4212	* Need to transfer pending watch port boosts to the new task while still making
	4213	* sure that the old task remains in the importance linkage. Create an importance
	4214	* linkage from old task to new task, then switch the task importance base
	4215	* of old task and new task. After the switch the port watch boost will be
	4216	* boosting the new task and new task will be donating importance to old task.
	4217	*/
	4218	inherit = ipc_importance_exec_switch_task(old_task, new_task);
	4219	}
	4220
	4221	kauth_cred_unref(&context.vc_ucred);
	4222
	4223	/* Image not claimed by any activator? */
	4224	if (error == -1) {
	4225	error = ENOEXEC;
	4226	}
	4227
	4228	if (!error) {
	4229	exec_done = TRUE;
	4230	assert(imgp->ip_new_thread != NULL);
	4231
	4232	exec_resettextvp(p, imgp);
	4233	error = check_for_signature(p, imgp);
	4234	}
	4235
	4236	/* flag exec has occurred, notify only if it has not failed due to FP Key error */
	4237	if (exec_done && ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)) {
	4238	proc_knote(p, NOTE_EXEC);
	4239	}
	4240
	4241	if (imgp->ip_vp != NULLVP) {
	4242	vnode_put(imgp->ip_vp);
	4243	}
	4244	if (imgp->ip_scriptvp != NULLVP) {
	4245	vnode_put(imgp->ip_scriptvp);
	4246	}
	4247	if (imgp->ip_strings) {
	4248	execargs_free(imgp);
	4249	}
	4250	#if CONFIG_MACF
	4251	if (imgp->ip_execlabelp) {
	4252	mac_cred_label_free(imgp->ip_execlabelp);
	4253	}
	4254	if (imgp->ip_scriptlabelp) {
	4255	mac_vnode_label_free(imgp->ip_scriptlabelp);
	4256	}
	4257	#endif
	4258	if (imgp->ip_cs_error != OS_REASON_NULL) {
	4259	os_reason_free(imgp->ip_cs_error);
	4260	imgp->ip_cs_error = OS_REASON_NULL;
	4261	}
	4262
	4263	if (!error) {
	4264	/*
	4265	* We need to initialize the bank context behind the protection of
	4266	* the proc_trans lock to prevent a race with exit. We can't do this during
	4267	* exec_activate_image because task_bank_init checks entitlements that
	4268	* aren't loaded until subsequent calls (including exec_resettextvp).
	4269	*/
	4270	error = proc_transstart(p, 0, 0);
	4271	}
	4272
	4273	if (!error) {
	4274	task_bank_init(new_task);
	4275	proc_transend(p, 0);
	4276
	4277	#if __arm64__
	4278	proc_legacy_footprint_entitled(p, new_task, __FUNCTION__);
	4279	proc_ios13extended_footprint_entitled(p, new_task, __FUNCTION__);
	4280	#endif /* __arm64__ */
	4281
	4282	/* Sever any extant thread affinity */
	4283	thread_affinity_exec(current_thread());
	4284
	4285	/* Inherit task role from old task to new task for exec */
	4286	if (!in_vfexec) {
	4287	proc_inherit_task_role(new_task, old_task);
	4288	}
	4289
	4290	thread_t main_thread = imgp->ip_new_thread;
	4291
	4292	task_set_main_thread_qos(new_task, main_thread);
	4293
	4294	#if CONFIG_ARCADE
	4295	/*
	4296	* Check to see if we need to trigger an arcade upcall AST now
	4297	* that the vnode has been reset on the task.
	4298	*/
	4299	arcade_prepare(new_task, imgp->ip_new_thread);
	4300	#endif /* CONFIG_ARCADE */
	4301
	4302	#if CONFIG_MACF
	4303	/*
	4304	* Processes with the MAP_JIT entitlement are permitted to have
	4305	* a jumbo-size map.
	4306	*/
	4307	if (mac_proc_check_map_anon(p, 0, 0, 0, MAP_JIT, NULL) == 0) {
	4308	vm_map_set_jumbo(get_task_map(new_task));
	4309	vm_map_set_jit_entitled(get_task_map(new_task));
	4310	}
	4311	#endif /* CONFIG_MACF */
	4312
	4313	if (vm_darkwake_mode == TRUE) {
	4314	/*
	4315	* This process is being launched when the system
	4316	* is in darkwake. So mark it specially. This will
	4317	* cause all its pages to be entered in the background Q.
	4318	*/
	4319	task_set_darkwake_mode(new_task, vm_darkwake_mode);
	4320	}
	4321
	4322	#if CONFIG_DTRACE
	4323	dtrace_thread_didexec(imgp->ip_new_thread);
	4324
	4325	if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) {
	4326	(*dtrace_proc_waitfor_hook)(p);
	4327	}
	4328	#endif
	4329
	4330	#if CONFIG_AUDIT
	4331	if (!error && AUDIT_ENABLED() && p) {
	4332	/* Add the CDHash of the new process to the audit record */
	4333	uint8_t *cdhash = cs_get_cdhash(p);
	4334	if (cdhash) {
	4335	AUDIT_ARG(data, cdhash, sizeof(uint8_t), CS_CDHASH_LEN);
	4336	}
	4337	}
	4338	#endif
	4339
	4340	if (in_vfexec) {
	4341	vfork_return(p, retval, p->p_pid);
	4342	}
	4343	} else {
	4344	DTRACE_PROC1(exec__failure, int, error);
	4345	}
	4346
	4347	exit_with_error:
	4348
	4349	/*
	4350	* clear bsd_info from old task if it did exec.
	4351	*/
	4352	if (task_did_exec(old_task)) {
	4353	set_bsdtask_info(old_task, NULL);
	4354	}
	4355
	4356	/* clear bsd_info from new task and terminate it if exec failed */
	4357	if (new_task != NULL && task_is_exec_copy(new_task)) {
	4358	set_bsdtask_info(new_task, NULL);
	4359	task_terminate_internal(new_task);
	4360	}
	4361
	4362	if (imgp != NULL) {
	4363	/* Clear the initial wait on the thread transferring watchports */
	4364	if (imgp->ip_new_thread) {
	4365	task_clear_return_wait(get_threadtask(imgp->ip_new_thread), TCRW_CLEAR_INITIAL_WAIT);
	4366	}
	4367
	4368	/* Transfer the watchport boost to new task */
	4369	if (!error && !in_vfexec) {
	4370	task_transfer_turnstile_watchports(old_task,
	4371	new_task, imgp->ip_new_thread);
	4372	}
	4373	/*
	4374	* Do not terminate the current task, if proc_exec_switch_task did not
	4375	* switch the tasks, terminating the current task without the switch would
	4376	* result in loosing the SIGKILL status.
	4377	*/
	4378	if (task_did_exec(old_task)) {
	4379	/* Terminate the current task, since exec will start in new task */
	4380	task_terminate_internal(old_task);
	4381	}
	4382
	4383	/* Release the thread ref returned by fork_create_child */
	4384	if (imgp->ip_new_thread) {
	4385	/* wake up the new exec thread */
	4386	task_clear_return_wait(get_threadtask(imgp->ip_new_thread), TCRW_CLEAR_FINAL_WAIT);
	4387	thread_deallocate(imgp->ip_new_thread);
	4388	imgp->ip_new_thread = NULL;
	4389	}
	4390	}
	4391
	4392	/* Release the ref returned by fork_create_child */
	4393	if (new_task) {
	4394	task_deallocate(new_task);
	4395	new_task = NULL;
	4396	}
	4397
	4398	if (should_release_proc_ref) {
	4399	proc_rele(p);
	4400	}
	4401
	4402	if (bufp != NULL) {
	4403	FREE(bufp, M_TEMP);
	4404	}
	4405
	4406	if (inherit != NULL) {
	4407	ipc_importance_release(inherit);
	4408	}
	4409
	4410	return error;
	4411	}
	4412
	4413
	4414	/*
	4415	* copyinptr
	4416	*
	4417	* Description: Copy a pointer in from user space to a user_addr_t in kernel
	4418	* space, based on 32/64 bitness of the user space
	4419	*
	4420	* Parameters: froma User space address
	4421	* toptr Address of kernel space user_addr_t
	4422	* ptr_size 4/8, based on 'froma' address space
	4423	*
	4424	* Returns: 0 Success
	4425	* EFAULT Bad 'froma'
	4426	*
	4427	* Implicit returns:
	4428	* *ptr_size Modified
	4429	*/
	4430	static int
	4431	copyinptr(user_addr_t froma, user_addr_t *toptr, int ptr_size)
	4432	{
	4433	int error;
	4434
	4435	if (ptr_size == 4) {
	4436	/* 64 bit value containing 32 bit address */
	4437	unsigned int i = 0;
	4438
	4439	error = copyin(froma, &i, 4);
	4440	toptr = CAST_USER_ADDR_T(i); / SAFE */
	4441	} else {
	4442	error = copyin(froma, toptr, 8);
	4443	}
	4444	return error;
	4445	}
	4446
	4447
	4448	/*
	4449	* copyoutptr
	4450	*
	4451	* Description: Copy a pointer out from a user_addr_t in kernel space to
	4452	* user space, based on 32/64 bitness of the user space
	4453	*
	4454	* Parameters: ua User space address to copy to
	4455	* ptr Address of kernel space user_addr_t
	4456	* ptr_size 4/8, based on 'ua' address space
	4457	*
	4458	* Returns: 0 Success
	4459	* EFAULT Bad 'ua'
	4460	*
	4461	*/
	4462	static int
	4463	copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size)
	4464	{
	4465	int error;
	4466
	4467	if (ptr_size == 4) {
	4468	/* 64 bit value containing 32 bit address */
	4469	unsigned int i = CAST_DOWN_EXPLICIT(unsigned int, ua); /* SAFE */
	4470
	4471	error = copyout(&i, ptr, 4);
	4472	} else {
	4473	error = copyout(&ua, ptr, 8);
	4474	}
	4475	return error;
	4476	}
	4477
	4478
	4479	/*
	4480	* exec_copyout_strings
	4481	*
	4482	* Copy out the strings segment to user space. The strings segment is put
	4483	* on a preinitialized stack frame.
	4484	*
	4485	* Parameters: struct image_params * the image parameter block
	4486	* int * a pointer to the stack offset variable
	4487	*
	4488	* Returns: 0 Success
	4489	* !0 Faiure: errno
	4490	*
	4491	* Implicit returns:
	4492	* (*stackp) The stack offset, modified
	4493	*
	4494	* Note: The strings segment layout is backward, from the beginning
	4495	* of the top of the stack to consume the minimal amount of
	4496	* space possible; the returned stack pointer points to the
	4497	* end of the area consumed (stacks grow downward).
	4498	*
	4499	* argc is an int; arg[i] are pointers; env[i] are pointers;
	4500	* the 0's are (void *)NULL's
	4501	*
	4502	* The stack frame layout is:
	4503	*
	4504	* +-------------+ <- p->user_stack
	4505	* \| 16b \|
	4506	* +-------------+
	4507	* \| STRING AREA \|
	4508	* \| : \|
	4509	* \| : \|
	4510	* \| : \|
	4511	* +- -- -- -- --+
	4512	* \| PATH AREA \|
	4513	* +-------------+
	4514	* \| 0 \|
	4515	* +-------------+
	4516	* \| applev[n] \|
	4517	* +-------------+
	4518	* :
	4519	* :
	4520	* +-------------+
	4521	* \| applev[1] \|
	4522	* +-------------+
	4523	* \| exec_path / \|
	4524	* \| applev[0] \|
	4525	* +-------------+
	4526	* \| 0 \|
	4527	* +-------------+
	4528	* \| env[n] \|
	4529	* +-------------+
	4530	* :
	4531	* :
	4532	* +-------------+
	4533	* \| env[0] \|
	4534	* +-------------+
	4535	* \| 0 \|
	4536	* +-------------+
	4537	* \| arg[argc-1] \|
	4538	* +-------------+
	4539	* :
	4540	* :
	4541	* +-------------+
	4542	* \| arg[0] \|
	4543	* +-------------+
	4544	* \| argc \|
	4545	* sp-> +-------------+
	4546	*
	4547	* Although technically a part of the STRING AREA, we treat the PATH AREA as
	4548	* a separate entity. This allows us to align the beginning of the PATH AREA
	4549	* to a pointer boundary so that the exec_path, env[i], and argv[i] pointers
	4550	* which preceed it on the stack are properly aligned.
	4551	*/
	4552
	4553	static int
	4554	exec_copyout_strings(struct image_params imgp, user_addr_t stackp)
	4555	{
	4556	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
	4557	int ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? 8 : 4;
	4558	int ptr_area_size;
	4559	void ptr_buffer_start, ptr_buffer;
	4560	int string_size;
	4561
	4562	user_addr_t string_area; /* argv[], env[] */
	4563	user_addr_t ptr_area; /* argv[], env[], applev[] */
	4564	user_addr_t argc_area; /* argc */
	4565	user_addr_t stack;
	4566	int error;
	4567
	4568	unsigned i;
	4569	struct copyout_desc {
	4570	char *start_string;
	4571	int count;
	4572	#if CONFIG_DTRACE
	4573	user_addr_t *dtrace_cookie;
	4574	#endif
	4575	boolean_t null_term;
	4576	} descriptors[] = {
	4577	{
	4578	.start_string = imgp->ip_startargv,
	4579	.count = imgp->ip_argc,
	4580	#if CONFIG_DTRACE
	4581	.dtrace_cookie = &p->p_dtrace_argv,
	4582	#endif
	4583	.null_term = TRUE
	4584	},
	4585	{
	4586	.start_string = imgp->ip_endargv,
	4587	.count = imgp->ip_envc,
	4588	#if CONFIG_DTRACE
	4589	.dtrace_cookie = &p->p_dtrace_envp,
	4590	#endif
	4591	.null_term = TRUE
	4592	},
	4593	{
	4594	.start_string = imgp->ip_strings,
	4595	.count = 1,
	4596	#if CONFIG_DTRACE
	4597	.dtrace_cookie = NULL,
	4598	#endif
	4599	.null_term = FALSE
	4600	},
	4601	{
	4602	.start_string = imgp->ip_endenvv,
	4603	.count = imgp->ip_applec - 1, /* exec_path handled above */
	4604	#if CONFIG_DTRACE
	4605	.dtrace_cookie = NULL,
	4606	#endif
	4607	.null_term = TRUE
	4608	}
	4609	};
	4610
	4611	stack = *stackp;
	4612
	4613	/*
	4614	* All previous contributors to the string area
	4615	* should have aligned their sub-area
	4616	*/
	4617	if (imgp->ip_strspace % ptr_size != 0) {
	4618	error = EINVAL;
	4619	goto bad;
	4620	}
	4621
	4622	/* Grow the stack down for the strings we've been building up */
	4623	string_size = imgp->ip_strendp - imgp->ip_strings;
	4624	stack -= string_size;
	4625	string_area = stack;
	4626
	4627	/*
	4628	* Need room for one pointer for each string, plus
	4629	* one for the NULLs terminating the argv, envv, and apple areas.
	4630	*/
	4631	ptr_area_size = (imgp->ip_argc + imgp->ip_envc + imgp->ip_applec + 3) * ptr_size;
	4632	stack -= ptr_area_size;
	4633	ptr_area = stack;
	4634
	4635	/* We'll construct all the pointer arrays in our string buffer,
	4636	* which we already know is aligned properly, and ip_argspace
	4637	* was used to verify we have enough space.
	4638	*/
	4639	ptr_buffer_start = ptr_buffer = (void *)imgp->ip_strendp;
	4640
	4641	/*
	4642	* Need room for pointer-aligned argc slot.
	4643	*/
	4644	stack -= ptr_size;
	4645	argc_area = stack;
	4646
	4647	/*
	4648	* Record the size of the arguments area so that sysctl_procargs()
	4649	* can return the argument area without having to parse the arguments.
	4650	*/
	4651	proc_lock(p);
	4652	p->p_argc = imgp->ip_argc;
	4653	p->p_argslen = (int)(*stackp - string_area);
	4654	proc_unlock(p);
	4655
	4656	/* Return the initial stack address: the location of argc */
	4657	*stackp = stack;
	4658
	4659	/*
	4660	* Copy out the entire strings area.
	4661	*/
	4662	error = copyout(imgp->ip_strings, string_area,
	4663	string_size);
	4664	if (error) {
	4665	goto bad;
	4666	}
	4667
	4668	for (i = 0; i < sizeof(descriptors) / sizeof(descriptors[0]); i++) {
	4669	char *cur_string = descriptors[i].start_string;
	4670	int j;
	4671
	4672	#if CONFIG_DTRACE
	4673	if (descriptors[i].dtrace_cookie) {
	4674	proc_lock(p);
	4675	descriptors[i].dtrace_cookie = ptr_area + ((uintptr_t)ptr_buffer - (uintptr_t)ptr_buffer_start); / dtrace convenience */
	4676	proc_unlock(p);
	4677	}
	4678	#endif /* CONFIG_DTRACE */
	4679
	4680	/*
	4681	* For each segment (argv, envv, applev), copy as many pointers as requested
	4682	* to our pointer buffer.
	4683	*/
	4684	for (j = 0; j < descriptors[i].count; j++) {
	4685	user_addr_t cur_address = string_area + (cur_string - imgp->ip_strings);
	4686
	4687	/* Copy out the pointer to the current string. Alignment has been verified */
	4688	if (ptr_size == 8) {
	4689	(uint64_t )ptr_buffer = (uint64_t)cur_address;
	4690	} else {
	4691	(uint32_t )ptr_buffer = (uint32_t)cur_address;
	4692	}
	4693
	4694	ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
	4695	cur_string += strlen(cur_string) + 1; /* Only a NUL between strings in the same area */
	4696	}
	4697
	4698	if (descriptors[i].null_term) {
	4699	if (ptr_size == 8) {
	4700	(uint64_t )ptr_buffer = 0ULL;
	4701	} else {
	4702	(uint32_t )ptr_buffer = 0;
	4703	}
	4704
	4705	ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
	4706	}
	4707	}
	4708
	4709	/*
	4710	* Copy out all our pointer arrays in bulk.
	4711	*/
	4712	error = copyout(ptr_buffer_start, ptr_area,
	4713	ptr_area_size);
	4714	if (error) {
	4715	goto bad;
	4716	}
	4717
	4718	/* argc (int32, stored in a ptr_size area) */
	4719	error = copyoutptr((user_addr_t)imgp->ip_argc, argc_area, ptr_size);
	4720	if (error) {
	4721	goto bad;
	4722	}
	4723
	4724	bad:
	4725	return error;
	4726	}
	4727
	4728
	4729	/*
	4730	* exec_extract_strings
	4731	*
	4732	* Copy arguments and environment from user space into work area; we may
	4733	* have already copied some early arguments into the work area, and if
	4734	* so, any arguments opied in are appended to those already there.
	4735	* This function is the primary manipulator of ip_argspace, since
	4736	* these are the arguments the client of execve(2) knows about. After
	4737	* each argv[]/envv[] string is copied, we charge the string length
	4738	* and argv[]/envv[] pointer slot to ip_argspace, so that we can
	4739	* full preflight the arg list size.
	4740	*
	4741	* Parameters: struct image_params * the image parameter block
	4742	*
	4743	* Returns: 0 Success
	4744	* !0 Failure: errno
	4745	*
	4746	* Implicit returns;
	4747	* (imgp->ip_argc) Count of arguments, updated
	4748	* (imgp->ip_envc) Count of environment strings, updated
	4749	* (imgp->ip_argspace) Count of remaining of NCARGS
	4750	* (imgp->ip_interp_buffer) Interpreter and args (mutated in place)
	4751	*
	4752	*
	4753	* Note: The argument and environment vectors are user space pointers
	4754	* to arrays of user space pointers.
	4755	*/
	4756	static int
	4757	exec_extract_strings(struct image_params *imgp)
	4758	{
	4759	int error = 0;
	4760	int ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT_ADDR) ? 8 : 4;
	4761	int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? 8 : 4;
	4762	user_addr_t argv = imgp->ip_user_argv;
	4763	user_addr_t envv = imgp->ip_user_envv;
	4764
	4765	/*
	4766	* Adjust space reserved for the path name by however much padding it
	4767	* needs. Doing this here since we didn't know if this would be a 32-
	4768	* or 64-bit process back in exec_save_path.
	4769	*/
	4770	while (imgp->ip_strspace % new_ptr_size != 0) {
	4771	*imgp->ip_strendp++ = '\0';
	4772	imgp->ip_strspace--;
	4773	/* imgp->ip_argspace--; not counted towards exec args total */
	4774	}
	4775
	4776	/*
	4777	* From now on, we start attributing string space to ip_argspace
	4778	*/
	4779	imgp->ip_startargv = imgp->ip_strendp;
	4780	imgp->ip_argc = 0;
	4781
	4782	if ((imgp->ip_flags & IMGPF_INTERPRET) != 0) {
	4783	user_addr_t arg;
	4784	char argstart, ch;
	4785
	4786	/* First, the arguments in the "#!" string are tokenized and extracted. */
	4787	argstart = imgp->ip_interp_buffer;
	4788	while (argstart) {
	4789	ch = argstart;
	4790	while (ch && !IS_WHITESPACE(ch)) {
	4791	ch++;
	4792	}
	4793
	4794	if (*ch == '\0') {
	4795	/* last argument, no need to NUL-terminate */
	4796	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
	4797	argstart = NULL;
	4798	} else {
	4799	/* NUL-terminate */
	4800	*ch = '\0';
	4801	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
	4802
	4803	/*
	4804	* Find the next string. We know spaces at the end of the string have already
	4805	* been stripped.
	4806	*/
	4807	argstart = ch + 1;
	4808	while (IS_WHITESPACE(*argstart)) {
	4809	argstart++;
	4810	}
	4811	}
	4812
	4813	/* Error-check, regardless of whether this is the last interpreter arg or not */
	4814	if (error) {
	4815	goto bad;
	4816	}
	4817	if (imgp->ip_argspace < new_ptr_size) {
	4818	error = E2BIG;
	4819	goto bad;
	4820	}
	4821	imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
	4822	imgp->ip_argc++;
	4823	}
	4824
	4825	if (argv != 0LL) {
	4826	/*
	4827	* If we are running an interpreter, replace the av[0] that was
	4828	* passed to execve() with the path name that was
	4829	* passed to execve() for interpreters which do not use the PATH
	4830	* to locate their script arguments.
	4831	*/
	4832	error = copyinptr(argv, &arg, ptr_size);
	4833	if (error) {
	4834	goto bad;
	4835	}
	4836	if (arg != 0LL) {
	4837	argv += ptr_size; /* consume without using */
	4838	}
	4839	}
	4840
	4841	if (imgp->ip_interp_sugid_fd != -1) {
	4842	char temp[19]; /* "/dev/fd/" + 10 digits + NUL */
	4843	snprintf(temp, sizeof(temp), "/dev/fd/%d", imgp->ip_interp_sugid_fd);
	4844	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(temp), UIO_SYSSPACE, TRUE);
	4845	} else {
	4846	error = exec_add_user_string(imgp, imgp->ip_user_fname, imgp->ip_seg, TRUE);
	4847	}
	4848
	4849	if (error) {
	4850	goto bad;
	4851	}
	4852	if (imgp->ip_argspace < new_ptr_size) {
	4853	error = E2BIG;
	4854	goto bad;
	4855	}
	4856	imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
	4857	imgp->ip_argc++;
	4858	}
	4859
	4860	while (argv != 0LL) {
	4861	user_addr_t arg;
	4862
	4863	error = copyinptr(argv, &arg, ptr_size);
	4864	if (error) {
	4865	goto bad;
	4866	}
	4867
	4868	if (arg == 0LL) {
	4869	break;
	4870	}
	4871
	4872	argv += ptr_size;
	4873
	4874	/*
	4875	* av[n...] = arg[n]
	4876	*/
	4877	error = exec_add_user_string(imgp, arg, imgp->ip_seg, TRUE);
	4878	if (error) {
	4879	goto bad;
	4880	}
	4881	if (imgp->ip_argspace < new_ptr_size) {
	4882	error = E2BIG;
	4883	goto bad;
	4884	}
	4885	imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
	4886	imgp->ip_argc++;
	4887	}
	4888
	4889	/* Save space for argv[] NULL terminator */
	4890	if (imgp->ip_argspace < new_ptr_size) {
	4891	error = E2BIG;
	4892	goto bad;
	4893	}
	4894	imgp->ip_argspace -= new_ptr_size;
	4895
	4896	/* Note where the args ends and env begins. */
	4897	imgp->ip_endargv = imgp->ip_strendp;
	4898	imgp->ip_envc = 0;
	4899
	4900	/* Now, get the environment */
	4901	while (envv != 0LL) {
	4902	user_addr_t env;
	4903
	4904	error = copyinptr(envv, &env, ptr_size);
	4905	if (error) {
	4906	goto bad;
	4907	}
	4908
	4909	envv += ptr_size;
	4910	if (env == 0LL) {
	4911	break;
	4912	}
	4913	/*
	4914	* av[n...] = env[n]
	4915	*/
	4916	error = exec_add_user_string(imgp, env, imgp->ip_seg, TRUE);
	4917	if (error) {
	4918	goto bad;
	4919	}
	4920	if (imgp->ip_argspace < new_ptr_size) {
	4921	error = E2BIG;
	4922	goto bad;
	4923	}
	4924	imgp->ip_argspace -= new_ptr_size; /* to hold envv[] entry */
	4925	imgp->ip_envc++;
	4926	}
	4927
	4928	/* Save space for envv[] NULL terminator */
	4929	if (imgp->ip_argspace < new_ptr_size) {
	4930	error = E2BIG;
	4931	goto bad;
	4932	}
	4933	imgp->ip_argspace -= new_ptr_size;
	4934
	4935	/* Align the tail of the combined argv+envv area */
	4936	while (imgp->ip_strspace % new_ptr_size != 0) {
	4937	if (imgp->ip_argspace < 1) {
	4938	error = E2BIG;
	4939	goto bad;
	4940	}
	4941	*imgp->ip_strendp++ = '\0';
	4942	imgp->ip_strspace--;
	4943	imgp->ip_argspace--;
	4944	}
	4945
	4946	/* Note where the envv ends and applev begins. */
	4947	imgp->ip_endenvv = imgp->ip_strendp;
	4948
	4949	/*
	4950	* From now on, we are no longer charging argument
	4951	* space to ip_argspace.
	4952	*/
	4953
	4954	bad:
	4955	return error;
	4956	}
	4957
	4958	/*
	4959	* Libc has an 8-element array set up for stack guard values. It only fills
	4960	* in one of those entries, and both gcc and llvm seem to use only a single
	4961	* 8-byte guard. Until somebody needs more than an 8-byte guard value, don't
	4962	* do the work to construct them.
	4963	*/
	4964	#define GUARD_VALUES 1
	4965	#define GUARD_KEY "stack_guard="
	4966
	4967	/*
	4968	* System malloc needs some entropy when it is initialized.
	4969	*/
	4970	#define ENTROPY_VALUES 2
	4971	#define ENTROPY_KEY "malloc_entropy="
	4972
	4973	/*
	4974	* libplatform needs a random pointer-obfuscation value when it is initialized.
	4975	*/
	4976	#define PTR_MUNGE_VALUES 1
	4977	#define PTR_MUNGE_KEY "ptr_munge="
	4978
	4979	/*
	4980	* System malloc engages nanozone for UIAPP.
	4981	*/
	4982	#define NANO_ENGAGE_KEY "MallocNanoZone=1"
	4983
	4984	#define PFZ_KEY "pfz="
	4985	extern user32_addr_t commpage_text32_location;
	4986	extern user64_addr_t commpage_text64_location;
	4987
	4988	#define MAIN_STACK_VALUES 4
	4989	#define MAIN_STACK_KEY "main_stack="
	4990
	4991	#define FSID_KEY "executable_file="
	4992	#define DYLD_FSID_KEY "dyld_file="
	4993	#define CDHASH_KEY "executable_cdhash="
	4994	#define DYLD_FLAGS_KEY "dyld_flags="
	4995
	4996	#define FSID_MAX_STRING "0x1234567890abcdef,0x1234567890abcdef"
	4997
	4998	#define HEX_STR_LEN 18 // 64-bit hex value "0x0123456701234567"
	4999
	5000	static int
	5001	exec_add_entropy_key(struct image_params *imgp,
	5002	const char *key,
	5003	int values,
	5004	boolean_t embedNUL)
	5005	{
	5006	const int limit = 8;
	5007	uint64_t entropy[limit];
	5008	char str[strlen(key) + (HEX_STR_LEN + 1) * limit + 1];
	5009	if (values > limit) {
	5010	values = limit;
	5011	}
	5012
	5013	read_random(entropy, sizeof(entropy[0]) * values);
	5014
	5015	if (embedNUL) {
	5016	entropy[0] &= ~(0xffull << 8);
	5017	}
	5018
	5019	int len = scnprintf(str, sizeof(str), "%s0x%llx", key, entropy[0]);
	5020	int remaining = sizeof(str) - len;
	5021	for (int i = 1; i < values && remaining > 0; ++i) {
	5022	int start = sizeof(str) - remaining;
	5023	len = scnprintf(&str[start], remaining, ",0x%llx", entropy[i]);
	5024	remaining -= len;
	5025	}
	5026
	5027	return exec_add_user_string(imgp, CAST_USER_ADDR_T(str), UIO_SYSSPACE, FALSE);
	5028	}
	5029
	5030	/*
	5031	* Build up the contents of the apple[] string vector
	5032	*/
	5033	#if (DEVELOPMENT \|\| DEBUG)
	5034	uint64_t dyld_flags = 0;
	5035	#endif
	5036
	5037	static int
	5038	exec_add_apple_strings(struct image_params *imgp,
	5039	const load_result_t *load_result)
	5040	{
	5041	int error;
	5042	int img_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? 8 : 4;
	5043
	5044	/* exec_save_path stored the first string */
	5045	imgp->ip_applec = 1;
	5046
	5047	/* adding the pfz string */
	5048	{
	5049	char pfz_string[strlen(PFZ_KEY) + HEX_STR_LEN + 1];
	5050
	5051	if (img_ptr_size == 8) {
	5052	snprintf(pfz_string, sizeof(pfz_string), PFZ_KEY "0x%llx", commpage_text64_location);
	5053	} else {
	5054	snprintf(pfz_string, sizeof(pfz_string), PFZ_KEY "0x%x", commpage_text32_location);
	5055	}
	5056	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(pfz_string), UIO_SYSSPACE, FALSE);
	5057	if (error) {
	5058	goto bad;
	5059	}
	5060	imgp->ip_applec++;
	5061	}
	5062
	5063	/* adding the NANO_ENGAGE_KEY key */
	5064	if (imgp->ip_px_sa) {
	5065	int proc_flags = (((struct _posix_spawnattr *) imgp->ip_px_sa)->psa_flags);
	5066
	5067	if ((proc_flags & _POSIX_SPAWN_NANO_ALLOCATOR) == _POSIX_SPAWN_NANO_ALLOCATOR) {
	5068	const char *nano_string = NANO_ENGAGE_KEY;
	5069	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(nano_string), UIO_SYSSPACE, FALSE);
	5070	if (error) {
	5071	goto bad;
	5072	}
	5073	imgp->ip_applec++;
	5074	}
	5075	}
	5076
	5077	/*
	5078	* Supply libc with a collection of random values to use when
	5079	* implementing -fstack-protector.
	5080	*
	5081	* (The first random string always contains an embedded NUL so that
	5082	* __stack_chk_guard also protects against C string vulnerabilities)
	5083	*/
	5084	error = exec_add_entropy_key(imgp, GUARD_KEY, GUARD_VALUES, TRUE);
	5085	if (error) {
	5086	goto bad;
	5087	}
	5088	imgp->ip_applec++;
	5089
	5090	/*
	5091	* Supply libc with entropy for system malloc.
	5092	*/
	5093	error = exec_add_entropy_key(imgp, ENTROPY_KEY, ENTROPY_VALUES, FALSE);
	5094	if (error) {
	5095	goto bad;
	5096	}
	5097	imgp->ip_applec++;
	5098
	5099	/*
	5100	* Supply libpthread & libplatform with a random value to use for pointer
	5101	* obfuscation.
	5102	*/
	5103	error = exec_add_entropy_key(imgp, PTR_MUNGE_KEY, PTR_MUNGE_VALUES, FALSE);
	5104	if (error) {
	5105	goto bad;
	5106	}
	5107	imgp->ip_applec++;
	5108
	5109	/*
	5110	* Add MAIN_STACK_KEY: Supplies the address and size of the main thread's
	5111	* stack if it was allocated by the kernel.
	5112	*
	5113	* The guard page is not included in this stack size as libpthread
	5114	* expects to add it back in after receiving this value.
	5115	*/
	5116	if (load_result->unixproc) {
	5117	char stack_string[strlen(MAIN_STACK_KEY) + (HEX_STR_LEN + 1) * MAIN_STACK_VALUES + 1];
	5118	snprintf(stack_string, sizeof(stack_string),
	5119	MAIN_STACK_KEY "0x%llx,0x%llx,0x%llx,0x%llx",
	5120	(uint64_t)load_result->user_stack,
	5121	(uint64_t)load_result->user_stack_size,
	5122	(uint64_t)load_result->user_stack_alloc,
	5123	(uint64_t)load_result->user_stack_alloc_size);
	5124	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(stack_string), UIO_SYSSPACE, FALSE);
	5125	if (error) {
	5126	goto bad;
	5127	}
	5128	imgp->ip_applec++;
	5129	}
	5130
	5131	if (imgp->ip_vattr) {
	5132	uint64_t fsid = vnode_get_va_fsid(imgp->ip_vattr);
	5133	uint64_t fsobjid = imgp->ip_vattr->va_fileid;
	5134
	5135	char fsid_string[strlen(FSID_KEY) + strlen(FSID_MAX_STRING) + 1];
	5136	snprintf(fsid_string, sizeof(fsid_string),
	5137	FSID_KEY "0x%llx,0x%llx", fsid, fsobjid);
	5138	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(fsid_string), UIO_SYSSPACE, FALSE);
	5139	if (error) {
	5140	goto bad;
	5141	}
	5142	imgp->ip_applec++;
	5143	}
	5144
	5145	if (imgp->ip_dyld_fsid \|\| imgp->ip_dyld_fsobjid) {
	5146	char fsid_string[strlen(DYLD_FSID_KEY) + strlen(FSID_MAX_STRING) + 1];
	5147	snprintf(fsid_string, sizeof(fsid_string),
	5148	DYLD_FSID_KEY "0x%llx,0x%llx", imgp->ip_dyld_fsid, imgp->ip_dyld_fsobjid);
	5149	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(fsid_string), UIO_SYSSPACE, FALSE);
	5150	if (error) {
	5151	goto bad;
	5152	}
	5153	imgp->ip_applec++;
	5154	}
	5155
	5156	uint8_t cdhash[SHA1_RESULTLEN];
	5157	int cdhash_errror = ubc_cs_getcdhash(imgp->ip_vp, imgp->ip_arch_offset, cdhash);
	5158	if (cdhash_errror == 0) {
	5159	char hash_string[strlen(CDHASH_KEY) + 2 * SHA1_RESULTLEN + 1];
	5160	strncpy(hash_string, CDHASH_KEY, sizeof(hash_string));
	5161	char *p = hash_string + sizeof(CDHASH_KEY) - 1;
	5162	for (int i = 0; i < SHA1_RESULTLEN; i++) {
	5163	snprintf(p, 3, "%02x", (int) cdhash[i]);
	5164	p += 2;
	5165	}
	5166	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(hash_string), UIO_SYSSPACE, FALSE);
	5167	if (error) {
	5168	goto bad;
	5169	}
	5170	imgp->ip_applec++;
	5171	}
	5172	#if (DEVELOPMENT \|\| DEBUG)
	5173	if (dyld_flags) {
	5174	char dyld_flags_string[strlen(DYLD_FLAGS_KEY) + HEX_STR_LEN + 1];
	5175	snprintf(dyld_flags_string, sizeof(dyld_flags_string), DYLD_FLAGS_KEY "0x%llx", dyld_flags);
	5176	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(dyld_flags_string), UIO_SYSSPACE, FALSE);
	5177	if (error) {
	5178	goto bad;
	5179	}
	5180	imgp->ip_applec++;
	5181	}
	5182	#endif
	5183
	5184	/* Align the tail of the combined applev area */
	5185	while (imgp->ip_strspace % img_ptr_size != 0) {
	5186	*imgp->ip_strendp++ = '\0';
	5187	imgp->ip_strspace--;
	5188	}
	5189
	5190	bad:
	5191	return error;
	5192	}
	5193
	5194	#define unix_stack_size(p) (p->p_rlimit[RLIMIT_STACK].rlim_cur)
	5195
	5196	/*
	5197	* exec_check_permissions
	5198	*
	5199	* Description: Verify that the file that is being attempted to be executed
	5200	* is in fact allowed to be executed based on it POSIX file
	5201	* permissions and other access control criteria
	5202	*
	5203	* Parameters: struct image_params * the image parameter block
	5204	*
	5205	* Returns: 0 Success
	5206	* EACCES Permission denied
	5207	* ENOEXEC Executable file format error
	5208	* ETXTBSY Text file busy [misuse of error code]
	5209	* vnode_getattr:???
	5210	* vnode_authorize:???
	5211	*/
	5212	static int
	5213	exec_check_permissions(struct image_params *imgp)
	5214	{
	5215	struct vnode *vp = imgp->ip_vp;
	5216	struct vnode_attr *vap = imgp->ip_vattr;
	5217	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
	5218	int error;
	5219	kauth_action_t action;
	5220
	5221	/* Only allow execution of regular files */
	5222	if (!vnode_isreg(vp)) {
	5223	return EACCES;
	5224	}
	5225
	5226	/* Get the file attributes that we will be using here and elsewhere */
	5227	VATTR_INIT(vap);
	5228	VATTR_WANTED(vap, va_uid);
	5229	VATTR_WANTED(vap, va_gid);
	5230	VATTR_WANTED(vap, va_mode);
	5231	VATTR_WANTED(vap, va_fsid);
	5232	VATTR_WANTED(vap, va_fsid64);
	5233	VATTR_WANTED(vap, va_fileid);
	5234	VATTR_WANTED(vap, va_data_size);
	5235	if ((error = vnode_getattr(vp, vap, imgp->ip_vfs_context)) != 0) {
	5236	return error;
	5237	}
	5238
	5239	/*
	5240	* Ensure that at least one execute bit is on - otherwise root
	5241	* will always succeed, and we don't want to happen unless the
	5242	* file really is executable.
	5243	*/
	5244	if (!vfs_authopaque(vnode_mount(vp)) && ((vap->va_mode & (S_IXUSR \| S_IXGRP \| S_IXOTH)) == 0)) {
	5245	return EACCES;
	5246	}
	5247
	5248	/* Disallow zero length files */
	5249	if (vap->va_data_size == 0) {
	5250	return ENOEXEC;
	5251	}
	5252
	5253	imgp->ip_arch_offset = (user_size_t)0;
	5254	imgp->ip_arch_size = vap->va_data_size;
	5255
	5256	/* Disable setuid-ness for traced programs or if MNT_NOSUID */
	5257	if ((vp->v_mount->mnt_flag & MNT_NOSUID) \|\| (p->p_lflag & P_LTRACED)) {
	5258	vap->va_mode &= ~(VSUID \| VSGID);
	5259	}
	5260
	5261	/*
	5262	* Disable _POSIX_SPAWN_ALLOW_DATA_EXEC and _POSIX_SPAWN_DISABLE_ASLR
	5263	* flags for setuid/setgid binaries.
	5264	*/
	5265	if (vap->va_mode & (VSUID \| VSGID)) {
	5266	imgp->ip_flags &= ~(IMGPF_ALLOW_DATA_EXEC \| IMGPF_DISABLE_ASLR);
	5267	}
	5268
	5269	#if CONFIG_MACF
	5270	error = mac_vnode_check_exec(imgp->ip_vfs_context, vp, imgp);
	5271	if (error) {
	5272	return error;
	5273	}
	5274	#endif
	5275
	5276	/* Check for execute permission */
	5277	action = KAUTH_VNODE_EXECUTE;
	5278	/* Traced images must also be readable */
	5279	if (p->p_lflag & P_LTRACED) {
	5280	action \|= KAUTH_VNODE_READ_DATA;
	5281	}
	5282	if ((error = vnode_authorize(vp, NULL, action, imgp->ip_vfs_context)) != 0) {
	5283	return error;
	5284	}
	5285
	5286	#if 0
	5287	/* Don't let it run if anyone had it open for writing */
	5288	vnode_lock(vp);
	5289	if (vp->v_writecount) {
	5290	panic("going to return ETXTBSY %x", vp);
	5291	vnode_unlock(vp);
	5292	return ETXTBSY;
	5293	}
	5294	vnode_unlock(vp);
	5295	#endif
	5296
	5297
	5298	/* XXX May want to indicate to underlying FS that vnode is open */
	5299
	5300	return error;
	5301	}
	5302
	5303
	5304	/*
	5305	* exec_handle_sugid
	5306	*
	5307	* Initially clear the P_SUGID in the process flags; if an SUGID process is
	5308	* exec'ing a non-SUGID image, then this is the point of no return.
	5309	*
	5310	* If the image being activated is SUGID, then replace the credential with a
	5311	* copy, disable tracing (unless the tracing process is root), reset the
	5312	* mach task port to revoke it, set the P_SUGID bit,
	5313	*
	5314	* If the saved user and group ID will be changing, then make sure it happens
	5315	* to a new credential, rather than a shared one.
	5316	*
	5317	* Set the security token (this is probably obsolete, given that the token
	5318	* should not technically be separate from the credential itself).
	5319	*
	5320	* Parameters: struct image_params * the image parameter block
	5321	*
	5322	* Returns: void No failure indication
	5323	*
	5324	* Implicit returns:
	5325	* <process credential> Potentially modified/replaced
	5326	* <task port> Potentially revoked
	5327	* <process flags> P_SUGID bit potentially modified
	5328	* <security token> Potentially modified
	5329	*/
	5330	static int
	5331	exec_handle_sugid(struct image_params *imgp)
	5332	{
	5333	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
	5334	kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context);
	5335	int i;
	5336	int leave_sugid_clear = 0;
	5337	int mac_reset_ipc = 0;
	5338	int error = 0;
	5339	task_t task = NULL;
	5340	#if CONFIG_MACF
	5341	int mac_transition, disjoint_cred = 0;
	5342	int label_update_return = 0;
	5343
	5344	/*
	5345	* Determine whether a call to update the MAC label will result in the
	5346	* credential changing.
	5347	*
	5348	* Note: MAC policies which do not actually end up modifying
	5349	* the label subsequently are strongly encouraged to
	5350	* return 0 for this check, since a non-zero answer will
	5351	* slow down the exec fast path for normal binaries.
	5352	*/
	5353	mac_transition = mac_cred_check_label_update_execve(
	5354	imgp->ip_vfs_context,
	5355	imgp->ip_vp,
	5356	imgp->ip_arch_offset,
	5357	imgp->ip_scriptvp,
	5358	imgp->ip_scriptlabelp,
	5359	imgp->ip_execlabelp,
	5360	p,
	5361	imgp->ip_px_smpx);
	5362	#endif
	5363
	5364	OSBitAndAtomic(~((uint32_t)P_SUGID), &p->p_flag);
	5365
	5366	/*
	5367	* Order of the following is important; group checks must go last,
	5368	* as we use the success of the 'ismember' check combined with the
	5369	* failure of the explicit match to indicate that we will be setting
	5370	* the egid of the process even though the new process did not
	5371	* require VSUID/VSGID bits in order for it to set the new group as
	5372	* its egid.
	5373	*
	5374	* Note: Technically, by this we are implying a call to
	5375	* setegid() in the new process, rather than implying
	5376	* it used its VSGID bit to set the effective group,
	5377	* even though there is no code in that process to make
	5378	* such a call.
	5379	*/
	5380	if (((imgp->ip_origvattr->va_mode & VSUID) != 0 &&
	5381	kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) \|\|
	5382	((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
	5383	((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &leave_sugid_clear) \|\| !leave_sugid_clear) \|\|
	5384	(kauth_cred_getgid(cred) != imgp->ip_origvattr->va_gid)))) {
	5385	#if CONFIG_MACF
	5386	/* label for MAC transition and neither VSUID nor VSGID */
	5387	handle_mac_transition:
	5388	#endif
	5389
	5390	#if !SECURE_KERNEL
	5391	/*
	5392	* Replace the credential with a copy of itself if euid or
	5393	* egid change.
	5394	*
	5395	* Note: setuid binaries will automatically opt out of
	5396	* group resolver participation as a side effect
	5397	* of this operation. This is an intentional
	5398	* part of the security model, which requires a
	5399	* participating credential be established by
	5400	* escalating privilege, setting up all other
	5401	* aspects of the credential including whether
	5402	* or not to participate in external group
	5403	* membership resolution, then dropping their
	5404	* effective privilege to that of the desired
	5405	* final credential state.
	5406	*
	5407	* Modifications to p_ucred must be guarded using the
	5408	* proc's ucred lock. This prevents others from accessing
	5409	* a garbage credential.
	5410	*/
	5411	if (imgp->ip_origvattr->va_mode & VSUID) {
	5412	apply_kauth_cred_update(p, ^kauth_cred_t (kauth_cred_t my_cred) {
	5413	return kauth_cred_setresuid(my_cred,
	5414	KAUTH_UID_NONE,
	5415	imgp->ip_origvattr->va_uid,
	5416	imgp->ip_origvattr->va_uid,
	5417	KAUTH_UID_NONE);
	5418	});
	5419	}
	5420
	5421	if (imgp->ip_origvattr->va_mode & VSGID) {
	5422	apply_kauth_cred_update(p, ^kauth_cred_t (kauth_cred_t my_cred) {
	5423	return kauth_cred_setresgid(my_cred,
	5424	KAUTH_GID_NONE,
	5425	imgp->ip_origvattr->va_gid,
	5426	imgp->ip_origvattr->va_gid);
	5427	});
	5428	}
	5429	#endif /* !SECURE_KERNEL */
	5430
	5431	#if CONFIG_MACF
	5432	/*
	5433	* If a policy has indicated that it will transition the label,
	5434	* before making the call into the MAC policies, get a new
	5435	* duplicate credential, so they can modify it without
	5436	* modifying any others sharing it.
	5437	*/
	5438	if (mac_transition) {
	5439	/*
	5440	* This hook may generate upcalls that require
	5441	* importance donation from the kernel.
	5442	* (23925818)
	5443	*/
	5444	thread_t thread = current_thread();
	5445	thread_enable_send_importance(thread, TRUE);
	5446	kauth_proc_label_update_execve(p,
	5447	imgp->ip_vfs_context,
	5448	imgp->ip_vp,
	5449	imgp->ip_arch_offset,
	5450	imgp->ip_scriptvp,
	5451	imgp->ip_scriptlabelp,
	5452	imgp->ip_execlabelp,
	5453	&imgp->ip_csflags,
	5454	imgp->ip_px_smpx,
	5455	&disjoint_cred, /* will be non zero if disjoint */
	5456	&label_update_return);
	5457	thread_enable_send_importance(thread, FALSE);
	5458
	5459	if (disjoint_cred) {
	5460	/*
	5461	* If updating the MAC label resulted in a
	5462	* disjoint credential, flag that we need to
	5463	* set the P_SUGID bit. This protects
	5464	* against debuggers being attached by an
	5465	* insufficiently privileged process onto the
	5466	* result of a transition to a more privileged
	5467	* credential.
	5468	*/
	5469	leave_sugid_clear = 0;
	5470	}
	5471
	5472	imgp->ip_mac_return = label_update_return;
	5473	}
	5474
	5475	mac_reset_ipc = mac_proc_check_inherit_ipc_ports(p, p->p_textvp, p->p_textoff, imgp->ip_vp, imgp->ip_arch_offset, imgp->ip_scriptvp);
	5476
	5477	#endif /* CONFIG_MACF */
	5478
	5479	/*
	5480	* If 'leave_sugid_clear' is non-zero, then we passed the
	5481	* VSUID and MACF checks, and successfully determined that
	5482	* the previous cred was a member of the VSGID group, but
	5483	* that it was not the default at the time of the execve,
	5484	* and that the post-labelling credential was not disjoint.
	5485	* So we don't set the P_SUGID or reset mach ports and fds
	5486	* on the basis of simply running this code.
	5487	*/
	5488	if (mac_reset_ipc \|\| !leave_sugid_clear) {
	5489	/*
	5490	* Have mach reset the task and thread ports.
	5491	* We don't want anyone who had the ports before
	5492	* a setuid exec to be able to access/control the
	5493	* task/thread after.
	5494	*/
	5495	ipc_task_reset((imgp->ip_new_thread != NULL) ?
	5496	get_threadtask(imgp->ip_new_thread) : p->task);
	5497	ipc_thread_reset((imgp->ip_new_thread != NULL) ?
	5498	imgp->ip_new_thread : current_thread());
	5499	}
	5500
	5501	if (!leave_sugid_clear) {
	5502	/*
	5503	* Flag the process as setuid.
	5504	*/
	5505	OSBitOrAtomic(P_SUGID, &p->p_flag);
	5506
	5507	/*
	5508	* Radar 2261856; setuid security hole fix
	5509	* XXX For setuid processes, attempt to ensure that
	5510	* stdin, stdout, and stderr are already allocated.
	5511	* We do not want userland to accidentally allocate
	5512	* descriptors in this range which has implied meaning
	5513	* to libc.
	5514	*/
	5515	for (i = 0; i < 3; i++) {
	5516	if (p->p_fd->fd_ofiles[i] != NULL) {
	5517	continue;
	5518	}
	5519
	5520	/*
	5521	* Do the kernel equivalent of
	5522	*
	5523	* if i == 0
	5524	* (void) open("/dev/null", O_RDONLY);
	5525	* else
	5526	* (void) open("/dev/null", O_WRONLY);
	5527	*/
	5528
	5529	struct fileproc *fp;
	5530	int indx;
	5531	int flag;
	5532	struct nameidata *ndp = NULL;
	5533
	5534	if (i == 0) {
	5535	flag = FREAD;
	5536	} else {
	5537	flag = FWRITE;
	5538	}
	5539
	5540	if ((error = falloc(p,
	5541	&fp, &indx, imgp->ip_vfs_context)) != 0) {
	5542	continue;
	5543	}
	5544
	5545	MALLOC(ndp, struct nameidata , sizeof(ndp), M_TEMP, M_WAITOK \| M_ZERO);
	5546	if (ndp == NULL) {
	5547	fp_free(p, indx, fp);
	5548	error = ENOMEM;
	5549	break;
	5550	}
	5551
	5552	NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE,
	5553	CAST_USER_ADDR_T("/dev/null"),
	5554	imgp->ip_vfs_context);
	5555
	5556	if ((error = vn_open(ndp, flag, 0)) != 0) {
	5557	fp_free(p, indx, fp);
	5558	FREE(ndp, M_TEMP);
	5559	break;
	5560	}
	5561
	5562	struct fileglob *fg = fp->f_fglob;
	5563
	5564	fg->fg_flag = flag;
	5565	fg->fg_ops = &vnops;
	5566	fg->fg_data = ndp->ni_vp;
	5567
	5568	vnode_put(ndp->ni_vp);
	5569
	5570	proc_fdlock(p);
	5571	procfdtbl_releasefd(p, indx, NULL);
	5572	fp_drop(p, indx, fp, 1);
	5573	proc_fdunlock(p);
	5574
	5575	FREE(ndp, M_TEMP);
	5576	}
	5577	}
	5578	}
	5579	#if CONFIG_MACF
	5580	else {
	5581	/*
	5582	* We are here because we were told that the MAC label will
	5583	* be transitioned, and the binary is not VSUID or VSGID; to
	5584	* deal with this case, we could either duplicate a lot of
	5585	* code, or we can indicate we want to default the P_SUGID
	5586	* bit clear and jump back up.
	5587	*/
	5588	if (mac_transition) {
	5589	leave_sugid_clear = 1;
	5590	goto handle_mac_transition;
	5591	}
	5592	}
	5593
	5594	#endif /* CONFIG_MACF */
	5595
	5596	/*
	5597	* Implement the semantic where the effective user and group become
	5598	* the saved user and group in exec'ed programs.
	5599	*
	5600	* Modifications to p_ucred must be guarded using the
	5601	* proc's ucred lock. This prevents others from accessing
	5602	* a garbage credential.
	5603	*/
	5604	apply_kauth_cred_update(p, ^kauth_cred_t (kauth_cred_t my_cred) {
	5605	return kauth_cred_setsvuidgid(my_cred,
	5606	kauth_cred_getuid(my_cred),
	5607	kauth_cred_getgid(my_cred));
	5608	});
	5609
	5610	/* Update the process' identity version and set the security token */
	5611	p->p_idversion = OSIncrementAtomic(&nextpidversion);
	5612
	5613	if (imgp->ip_new_thread != NULL) {
	5614	task = get_threadtask(imgp->ip_new_thread);
	5615	} else {
	5616	task = p->task;
	5617	}
	5618	set_security_token_task_internal(p, task);
	5619
	5620	return error;
	5621	}
	5622
	5623
	5624	/*
	5625	* create_unix_stack
	5626	*
	5627	* Description: Set the user stack address for the process to the provided
	5628	* address. If a custom stack was not set as a result of the
	5629	* load process (i.e. as specified by the image file for the
	5630	* executable), then allocate the stack in the provided map and
	5631	* set up appropriate guard pages for enforcing administrative
	5632	* limits on stack growth, if they end up being needed.
	5633	*
	5634	* Parameters: p Process to set stack on
	5635	* load_result Information from mach-o load commands
	5636	* map Address map in which to allocate the new stack
	5637	*
	5638	* Returns: KERN_SUCCESS Stack successfully created
	5639	* !KERN_SUCCESS Mach failure code
	5640	*/
	5641	static kern_return_t
	5642	create_unix_stack(vm_map_t map, load_result_t* load_result,
	5643	proc_t p)
	5644	{
	5645	mach_vm_size_t size, prot_size;
	5646	mach_vm_offset_t addr, prot_addr;
	5647	kern_return_t kr;
	5648
	5649	mach_vm_address_t user_stack = load_result->user_stack;
	5650
	5651	proc_lock(p);
	5652	p->user_stack = user_stack;
	5653	if (load_result->custom_stack) {
	5654	p->p_lflag \|= P_LCUSTOM_STACK;
	5655	}
	5656	proc_unlock(p);
	5657
	5658	if (load_result->user_stack_alloc_size > 0) {
	5659	/*
	5660	* Allocate enough space for the maximum stack size we
	5661	* will ever authorize and an extra page to act as
	5662	* a guard page for stack overflows. For default stacks,
	5663	* vm_initial_limit_stack takes care of the extra guard page.
	5664	* Otherwise we must allocate it ourselves.
	5665	*/
	5666	if (mach_vm_round_page_overflow(load_result->user_stack_alloc_size, &size)) {
	5667	return KERN_INVALID_ARGUMENT;
	5668	}
	5669	addr = mach_vm_trunc_page(load_result->user_stack - size);
	5670	kr = mach_vm_allocate_kernel(map, &addr, size,
	5671	VM_FLAGS_FIXED, VM_MEMORY_STACK);
	5672	if (kr != KERN_SUCCESS) {
	5673	// Can't allocate at default location, try anywhere
	5674	addr = 0;
	5675	kr = mach_vm_allocate_kernel(map, &addr, size,
	5676	VM_FLAGS_ANYWHERE, VM_MEMORY_STACK);
	5677	if (kr != KERN_SUCCESS) {
	5678	return kr;
	5679	}
	5680
	5681	user_stack = addr + size;
	5682	load_result->user_stack = user_stack;
	5683
	5684	proc_lock(p);
	5685	p->user_stack = user_stack;
	5686	proc_unlock(p);
	5687	}
	5688
	5689	load_result->user_stack_alloc = addr;
	5690
	5691	/*
	5692	* And prevent access to what's above the current stack
	5693	* size limit for this process.
	5694	*/
	5695	if (load_result->user_stack_size == 0) {
	5696	proc_list_lock();
	5697	load_result->user_stack_size = unix_stack_size(p);
	5698	proc_list_unlock();
	5699	prot_size = mach_vm_trunc_page(size - load_result->user_stack_size);
	5700	} else {
	5701	prot_size = PAGE_SIZE;
	5702	}
	5703
	5704	prot_addr = addr;
	5705	kr = mach_vm_protect(map,
	5706	prot_addr,
	5707	prot_size,
	5708	FALSE,
	5709	VM_PROT_NONE);
	5710	if (kr != KERN_SUCCESS) {
	5711	(void)mach_vm_deallocate(map, addr, size);
	5712	return kr;
	5713	}
	5714	}
	5715
	5716	return KERN_SUCCESS;
	5717	}
	5718
	5719	#include <sys/reboot.h>
	5720
	5721	/*
	5722	* load_init_program_at_path
	5723	*
	5724	* Description: Load the "init" program; in most cases, this will be "launchd"
	5725	*
	5726	* Parameters: p Process to call execve() to create
	5727	* the "init" program
	5728	* scratch_addr Page in p, scratch space
	5729	* path NULL terminated path
	5730	*
	5731	* Returns: KERN_SUCCESS Success
	5732	* !KERN_SUCCESS See execve/mac_execve for error codes
	5733	*
	5734	* Notes: The process that is passed in is the first manufactured
	5735	* process on the system, and gets here via bsd_ast() firing
	5736	* for the first time. This is done to ensure that bsd_init()
	5737	* has run to completion.
	5738	*
	5739	* The address map of the first manufactured process matches the
	5740	* word width of the kernel. Once the self-exec completes, the
	5741	* initproc might be different.
	5742	*/
	5743	static int
	5744	load_init_program_at_path(proc_t p, user_addr_t scratch_addr, const char* path)
	5745	{
	5746	int retval[2];
	5747	int error;
	5748	struct execve_args init_exec_args;
	5749	user_addr_t argv0 = USER_ADDR_NULL, argv1 = USER_ADDR_NULL;
	5750
	5751	/*
	5752	* Validate inputs and pre-conditions
	5753	*/
	5754	assert(p);
	5755	assert(scratch_addr);
	5756	assert(path);
	5757
	5758	/*
	5759	* Copy out program name.
	5760	*/
	5761	size_t path_length = strlen(path) + 1;
	5762	argv0 = scratch_addr;
	5763	error = copyout(path, argv0, path_length);
	5764	if (error) {
	5765	return error;
	5766	}
	5767
	5768	scratch_addr = USER_ADDR_ALIGN(scratch_addr + path_length, sizeof(user_addr_t));
	5769
	5770	/*
	5771	* Put out first (and only) argument, similarly.
	5772	* Assumes everything fits in a page as allocated above.
	5773	*/
	5774	if (boothowto & RB_SINGLE) {
	5775	const char *init_args = "-s";
	5776	size_t init_args_length = strlen(init_args) + 1;
	5777
	5778	argv1 = scratch_addr;
	5779	error = copyout(init_args, argv1, init_args_length);
	5780	if (error) {
	5781	return error;
	5782	}
	5783
	5784	scratch_addr = USER_ADDR_ALIGN(scratch_addr + init_args_length, sizeof(user_addr_t));
	5785	}
	5786
	5787	if (proc_is64bit(p)) {
	5788	user64_addr_t argv64bit[3] = {};
	5789
	5790	argv64bit[0] = argv0;
	5791	argv64bit[1] = argv1;
	5792	argv64bit[2] = USER_ADDR_NULL;
	5793
	5794	error = copyout(argv64bit, scratch_addr, sizeof(argv64bit));
	5795	if (error) {
	5796	return error;
	5797	}
	5798	} else {
	5799	user32_addr_t argv32bit[3] = {};
	5800
	5801	argv32bit[0] = (user32_addr_t)argv0;
	5802	argv32bit[1] = (user32_addr_t)argv1;
	5803	argv32bit[2] = USER_ADDR_NULL;
	5804
	5805	error = copyout(argv32bit, scratch_addr, sizeof(argv32bit));
	5806	if (error) {
	5807	return error;
	5808	}
	5809	}
	5810
	5811	/*
	5812	* Set up argument block for fake call to execve.
	5813	*/
	5814	init_exec_args.fname = argv0;
	5815	init_exec_args.argp = scratch_addr;
	5816	init_exec_args.envp = USER_ADDR_NULL;
	5817
	5818	/*
	5819	* So that init task is set with uid,gid 0 token
	5820	*/
	5821	set_security_token(p);
	5822
	5823	return execve(p, &init_exec_args, retval);
	5824	}
	5825
	5826	static const char * init_programs[] = {
	5827	#if DEBUG
	5828	"/usr/local/sbin/launchd.debug",
	5829	#endif
	5830	#if DEVELOPMENT \|\| DEBUG
	5831	"/usr/local/sbin/launchd.development",
	5832	#endif
	5833	"/sbin/launchd",
	5834	};
	5835
	5836	/*
	5837	* load_init_program
	5838	*
	5839	* Description: Load the "init" program; in most cases, this will be "launchd"
	5840	*
	5841	* Parameters: p Process to call execve() to create
	5842	* the "init" program
	5843	*
	5844	* Returns: (void)
	5845	*
	5846	* Notes: The process that is passed in is the first manufactured
	5847	* process on the system, and gets here via bsd_ast() firing
	5848	* for the first time. This is done to ensure that bsd_init()
	5849	* has run to completion.
	5850	*
	5851	* In DEBUG & DEVELOPMENT builds, the launchdsuffix boot-arg
	5852	* may be used to select a specific launchd executable. As with
	5853	* the kcsuffix boot-arg, setting launchdsuffix to "" or "release"
	5854	* will force /sbin/launchd to be selected.
	5855	*
	5856	* Search order by build:
	5857	*
	5858	* DEBUG DEVELOPMENT RELEASE PATH
	5859	* ----------------------------------------------------------------------------------
	5860	* 1 1 NA /usr/local/sbin/launchd.$LAUNCHDSUFFIX
	5861	* 2 NA NA /usr/local/sbin/launchd.debug
	5862	* 3 2 NA /usr/local/sbin/launchd.development
	5863	* 4 3 1 /sbin/launchd
	5864	*/
	5865	void
	5866	load_init_program(proc_t p)
	5867	{
	5868	uint32_t i;
	5869	int error;
	5870	vm_map_t map = current_map();
	5871	mach_vm_offset_t scratch_addr = 0;
	5872	mach_vm_size_t map_page_size = vm_map_page_size(map);
	5873
	5874	(void) mach_vm_allocate_kernel(map, &scratch_addr, map_page_size, VM_FLAGS_ANYWHERE, VM_KERN_MEMORY_NONE);
	5875	#if CONFIG_MEMORYSTATUS
	5876	(void) memorystatus_init_at_boot_snapshot();
	5877	#endif /* CONFIG_MEMORYSTATUS */
	5878
	5879	#if DEBUG \|\| DEVELOPMENT
	5880	/* Check for boot-arg suffix first */
	5881	char launchd_suffix[64];
	5882	if (PE_parse_boot_argn("launchdsuffix", launchd_suffix, sizeof(launchd_suffix))) {
	5883	char launchd_path[128];
	5884	boolean_t is_release_suffix = ((launchd_suffix[0] == 0) \|\|
	5885	(strcmp(launchd_suffix, "release") == 0));
	5886
	5887	if (is_release_suffix) {
	5888	printf("load_init_program: attempting to load /sbin/launchd\n");
	5889	error = load_init_program_at_path(p, (user_addr_t)scratch_addr, "/sbin/launchd");
	5890	if (!error) {
	5891	return;
	5892	}
	5893
	5894	panic("Process 1 exec of launchd.release failed, errno %d", error);
	5895	} else {
	5896	strlcpy(launchd_path, "/usr/local/sbin/launchd.", sizeof(launchd_path));
	5897	strlcat(launchd_path, launchd_suffix, sizeof(launchd_path));
	5898
	5899	printf("load_init_program: attempting to load %s\n", launchd_path);
	5900	error = load_init_program_at_path(p, (user_addr_t)scratch_addr, launchd_path);
	5901	if (!error) {
	5902	return;
	5903	} else {
	5904	printf("load_init_program: failed loading %s: errno %d\n", launchd_path, error);
	5905	}
	5906	}
	5907	}
	5908	#endif
	5909
	5910	error = ENOENT;
	5911	for (i = 0; i < sizeof(init_programs) / sizeof(init_programs[0]); i++) {
	5912	printf("load_init_program: attempting to load %s\n", init_programs[i]);
	5913	error = load_init_program_at_path(p, (user_addr_t)scratch_addr, init_programs[i]);
	5914	if (!error) {
	5915	return;
	5916	} else {
	5917	printf("load_init_program: failed loading %s: errno %d\n", init_programs[i], error);
	5918	}
	5919	}
	5920
	5921	panic("Process 1 exec of %s failed, errno %d", ((i == 0) ? "<null>" : init_programs[i - 1]), error);
	5922	}
	5923
	5924	/*
	5925	* load_return_to_errno
	5926	*
	5927	* Description: Convert a load_return_t (Mach error) to an errno (BSD error)
	5928	*
	5929	* Parameters: lrtn Mach error number
	5930	*
	5931	* Returns: (int) BSD error number
	5932	* 0 Success
	5933	* EBADARCH Bad architecture
	5934	* EBADMACHO Bad Mach object file
	5935	* ESHLIBVERS Bad shared library version
	5936	* ENOMEM Out of memory/resource shortage
	5937	* EACCES Access denied
	5938	* ENOENT Entry not found (usually "file does
	5939	* does not exist")
	5940	* EIO An I/O error occurred
	5941	* EBADEXEC The executable is corrupt/unknown
	5942	*/
	5943	static int
	5944	load_return_to_errno(load_return_t lrtn)
	5945	{
	5946	switch (lrtn) {
	5947	case LOAD_SUCCESS:
	5948	return 0;
	5949	case LOAD_BADARCH:
	5950	return EBADARCH;
	5951	case LOAD_BADMACHO:
	5952	case LOAD_BADMACHO_UPX:
	5953	return EBADMACHO;
	5954	case LOAD_SHLIB:
	5955	return ESHLIBVERS;
	5956	case LOAD_NOSPACE:
	5957	case LOAD_RESOURCE:
	5958	return ENOMEM;
	5959	case LOAD_PROTECT:
	5960	return EACCES;
	5961	case LOAD_ENOENT:
	5962	return ENOENT;
	5963	case LOAD_IOERROR:
	5964	return EIO;
	5965	case LOAD_DECRYPTFAIL:
	5966	return EAUTH;
	5967	case LOAD_FAILURE:
	5968	default:
	5969	return EBADEXEC;
	5970	}
	5971	}
	5972
	5973	#include <mach/mach_types.h>
	5974	#include <mach/vm_prot.h>
	5975	#include <mach/semaphore.h>
	5976	#include <mach/sync_policy.h>
	5977	#include <kern/clock.h>
	5978	#include <mach/kern_return.h>
	5979
	5980	/*
	5981	* execargs_alloc
	5982	*
	5983	* Description: Allocate the block of memory used by the execve arguments.
	5984	* At the same time, we allocate a page so that we can read in
	5985	* the first page of the image.
	5986	*
	5987	* Parameters: struct image_params * the image parameter block
	5988	*
	5989	* Returns: 0 Success
	5990	* EINVAL Invalid argument
	5991	* EACCES Permission denied
	5992	* EINTR Interrupted function
	5993	* ENOMEM Not enough space
	5994	*
	5995	* Notes: This is a temporary allocation into the kernel address space
	5996	* to enable us to copy arguments in from user space. This is
	5997	* necessitated by not mapping the process calling execve() into
	5998	* the kernel address space during the execve() system call.
	5999	*
	6000	* We assemble the argument and environment, etc., into this
	6001	* region before copying it as a single block into the child
	6002	* process address space (at the top or bottom of the stack,
	6003	* depending on which way the stack grows; see the function
	6004	* exec_copyout_strings() for details).
	6005	*
	6006	* This ends up with a second (possibly unnecessary) copy compared
	6007	* with assembing the data directly into the child address space,
	6008	* instead, but since we cannot be guaranteed that the parent has
	6009	* not modified its environment, we can't really know that it's
	6010	* really a block there as well.
	6011	*/
	6012
	6013
	6014	static int execargs_waiters = 0;
	6015	lck_mtx_t *execargs_cache_lock;
	6016
	6017	static void
	6018	execargs_lock_lock(void)
	6019	{
	6020	lck_mtx_lock_spin(execargs_cache_lock);
	6021	}
	6022
	6023	static void
	6024	execargs_lock_unlock(void)
	6025	{
	6026	lck_mtx_unlock(execargs_cache_lock);
	6027	}
	6028
	6029	static wait_result_t
	6030	execargs_lock_sleep(void)
	6031	{
	6032	return lck_mtx_sleep(execargs_cache_lock, LCK_SLEEP_DEFAULT, &execargs_free_count, THREAD_INTERRUPTIBLE);
	6033	}
	6034
	6035	static kern_return_t
	6036	execargs_purgeable_allocate(char **execarg_address)
	6037	{
	6038	kern_return_t kr = vm_allocate_kernel(bsd_pageable_map, (vm_offset_t *)execarg_address, BSD_PAGEABLE_SIZE_PER_EXEC, VM_FLAGS_ANYWHERE \| VM_FLAGS_PURGABLE, VM_KERN_MEMORY_NONE);
	6039	assert(kr == KERN_SUCCESS);
	6040	return kr;
	6041	}
	6042
	6043	static kern_return_t
	6044	execargs_purgeable_reference(void *execarg_address)
	6045	{
	6046	int state = VM_PURGABLE_NONVOLATILE;
	6047	kern_return_t kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
	6048
	6049	assert(kr == KERN_SUCCESS);
	6050	return kr;
	6051	}
	6052
	6053	static kern_return_t
	6054	execargs_purgeable_volatilize(void *execarg_address)
	6055	{
	6056	int state = VM_PURGABLE_VOLATILE \| VM_PURGABLE_ORDERING_OBSOLETE;
	6057	kern_return_t kr;
	6058	kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
	6059
	6060	assert(kr == KERN_SUCCESS);
	6061
	6062	return kr;
	6063	}
	6064
	6065	static void
	6066	execargs_wakeup_waiters(void)
	6067	{
	6068	thread_wakeup(&execargs_free_count);
	6069	}
	6070
	6071	static int
	6072	execargs_alloc(struct image_params *imgp)
	6073	{
	6074	kern_return_t kret;
	6075	wait_result_t res;
	6076	int i, cache_index = -1;
	6077
	6078	execargs_lock_lock();
	6079
	6080	while (execargs_free_count == 0) {
	6081	execargs_waiters++;
	6082	res = execargs_lock_sleep();
	6083	execargs_waiters--;
	6084	if (res != THREAD_AWAKENED) {
	6085	execargs_lock_unlock();
	6086	return EINTR;
	6087	}
	6088	}
	6089
	6090	execargs_free_count--;
	6091
	6092	for (i = 0; i < execargs_cache_size; i++) {
	6093	vm_offset_t element = execargs_cache[i];
	6094	if (element) {
	6095	cache_index = i;
	6096	imgp->ip_strings = (char *)(execargs_cache[i]);
	6097	execargs_cache[i] = 0;
	6098	break;
	6099	}
	6100	}
	6101
	6102	assert(execargs_free_count >= 0);
	6103
	6104	execargs_lock_unlock();
	6105
	6106	if (cache_index == -1) {
	6107	kret = execargs_purgeable_allocate(&imgp->ip_strings);
	6108	} else {
	6109	kret = execargs_purgeable_reference(imgp->ip_strings);
	6110	}
	6111
	6112	assert(kret == KERN_SUCCESS);
	6113	if (kret != KERN_SUCCESS) {
	6114	return ENOMEM;
	6115	}
	6116
	6117	/* last page used to read in file headers */
	6118	imgp->ip_vdata = imgp->ip_strings + (NCARGS + PAGE_SIZE);
	6119	imgp->ip_strendp = imgp->ip_strings;
	6120	imgp->ip_argspace = NCARGS;
	6121	imgp->ip_strspace = (NCARGS + PAGE_SIZE);
	6122
	6123	return 0;
	6124	}
	6125
	6126	/*
	6127	* execargs_free
	6128	*
	6129	* Description: Free the block of memory used by the execve arguments and the
	6130	* first page of the executable by a previous call to the function
	6131	* execargs_alloc().
	6132	*
	6133	* Parameters: struct image_params * the image parameter block
	6134	*
	6135	* Returns: 0 Success
	6136	* EINVAL Invalid argument
	6137	* EINTR Oeration interrupted
	6138	*/
	6139	static int
	6140	execargs_free(struct image_params *imgp)
	6141	{
	6142	kern_return_t kret;
	6143	int i;
	6144	boolean_t needs_wakeup = FALSE;
	6145
	6146	kret = execargs_purgeable_volatilize(imgp->ip_strings);
	6147
	6148	execargs_lock_lock();
	6149	execargs_free_count++;
	6150
	6151	for (i = 0; i < execargs_cache_size; i++) {
	6152	vm_offset_t element = execargs_cache[i];
	6153	if (element == 0) {
	6154	execargs_cache[i] = (vm_offset_t) imgp->ip_strings;
	6155	imgp->ip_strings = NULL;
	6156	break;
	6157	}
	6158	}
	6159
	6160	assert(imgp->ip_strings == NULL);
	6161
	6162	if (execargs_waiters > 0) {
	6163	needs_wakeup = TRUE;
	6164	}
	6165
	6166	execargs_lock_unlock();
	6167
	6168	if (needs_wakeup == TRUE) {
	6169	execargs_wakeup_waiters();
	6170	}
	6171
	6172	return kret == KERN_SUCCESS ? 0 : EINVAL;
	6173	}
	6174
	6175	static void
	6176	exec_resettextvp(proc_t p, struct image_params *imgp)
	6177	{
	6178	vnode_t vp;
	6179	off_t offset;
	6180	vnode_t tvp = p->p_textvp;
	6181	int ret;
	6182
	6183	vp = imgp->ip_vp;
	6184	offset = imgp->ip_arch_offset;
	6185
	6186	if (vp == NULLVP) {
	6187	panic("exec_resettextvp: expected valid vp");
	6188	}
	6189
	6190	ret = vnode_ref(vp);
	6191	proc_lock(p);
	6192	if (ret == 0) {
	6193	p->p_textvp = vp;
	6194	p->p_textoff = offset;
	6195	} else {
	6196	p->p_textvp = NULLVP; /* this is paranoia */
	6197	p->p_textoff = 0;
	6198	}
	6199	proc_unlock(p);
	6200
	6201	if (tvp != NULLVP) {
	6202	if (vnode_getwithref(tvp) == 0) {
	6203	vnode_rele(tvp);
	6204	vnode_put(tvp);
	6205	}
	6206	}
	6207	}
	6208
	6209	// Includes the 0-byte (therefore "SIZE" instead of "LEN").
	6210	static const size_t CS_CDHASH_STRING_SIZE = CS_CDHASH_LEN * 2 + 1;
	6211
	6212	static void
	6213	cdhash_to_string(char str[CS_CDHASH_STRING_SIZE], uint8_t const * const cdhash)
	6214	{
	6215	static char const nibble[] = "0123456789abcdef";
	6216
	6217	/* Apparently still the safest way to get a hex representation
	6218	* of binary data.
	6219	* xnu's printf routines have %*D/%20D in theory, but "not really", see:
	6220	* <rdar://problem/33328859> confusion around %*D/%nD in printf
	6221	*/
	6222	for (int i = 0; i < CS_CDHASH_LEN; ++i) {
	6223	str[i * 2] = nibble[(cdhash[i] & 0xf0) >> 4];
	6224	str[i * 2 + 1] = nibble[cdhash[i] & 0x0f];
	6225	}
	6226	str[CS_CDHASH_STRING_SIZE - 1] = 0;
	6227	}
	6228
	6229	/*
	6230	* __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__
	6231	*
	6232	* Description: Waits for the userspace daemon to respond to the request
	6233	* we made. Function declared non inline to be visible in
	6234	* stackshots and spindumps as well as debugging.
	6235	*/
	6236	__attribute__((noinline)) int
	6237	__EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid)
	6238	{
	6239	return find_code_signature(task_access_port, new_pid);
	6240	}
	6241
	6242	static int
	6243	check_for_signature(proc_t p, struct image_params *imgp)
	6244	{
	6245	mach_port_t port = IPC_PORT_NULL;
	6246	kern_return_t kr = KERN_FAILURE;
	6247	int error = EACCES;
	6248	boolean_t unexpected_failure = FALSE;
	6249	struct cs_blob *csb;
	6250	boolean_t require_success = FALSE;
	6251	int spawn = (imgp->ip_flags & IMGPF_SPAWN);
	6252	int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
	6253	os_reason_t signature_failure_reason = OS_REASON_NULL;
	6254
	6255	/*
	6256	* Override inherited code signing flags with the
	6257	* ones for the process that is being successfully
	6258	* loaded
	6259	*/
	6260	proc_lock(p);
	6261	p->p_csflags = imgp->ip_csflags;
	6262	proc_unlock(p);
	6263
	6264	/* Set the switch_protect flag on the map */
	6265	if (p->p_csflags & (CS_HARD \| CS_KILL)) {
	6266	vm_map_switch_protect(get_task_map(p->task), TRUE);
	6267	}
	6268
	6269	/*
	6270	* image activation may be failed due to policy
	6271	* which is unexpected but security framework does not
	6272	* approve of exec, kill and return immediately.
	6273	*/
	6274	if (imgp->ip_mac_return != 0) {
	6275	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
	6276	p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_SECURITY_POLICY, 0, 0);
	6277	signature_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SECURITY_POLICY);
	6278	error = imgp->ip_mac_return;
	6279	unexpected_failure = TRUE;
	6280	goto done;
	6281	}
	6282
	6283	if (imgp->ip_cs_error != OS_REASON_NULL) {
	6284	signature_failure_reason = imgp->ip_cs_error;
	6285	imgp->ip_cs_error = OS_REASON_NULL;
	6286	error = EACCES;
	6287	goto done;
	6288	}
	6289
	6290	/* If the code signature came through the image activation path, we skip the
	6291	* taskgated / externally attached path. */
	6292	if (imgp->ip_csflags & CS_SIGNED) {
	6293	error = 0;
	6294	goto done;
	6295	}
	6296
	6297	/* The rest of the code is for signatures that either already have been externally
	6298	* attached (likely, but not necessarily by a previous run through the taskgated
	6299	* path), or that will now be attached by taskgated. */
	6300
	6301	kr = task_get_task_access_port(p->task, &port);
	6302	if (KERN_SUCCESS != kr \|\| !IPC_PORT_VALID(port)) {
	6303	error = 0;
	6304	if (require_success) {
	6305	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
	6306	p->p_pid, OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASK_ACCESS_PORT, 0, 0);
	6307	signature_failure_reason = os_reason_create(OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASK_ACCESS_PORT);
	6308	error = EACCES;
	6309	}
	6310	goto done;
	6311	}
	6312
	6313	/*
	6314	* taskgated returns KERN_SUCCESS if it has completed its work
	6315	* and the exec should continue, KERN_FAILURE if the exec should
	6316	* fail, or it may error out with different error code in an
	6317	* event of mig failure (e.g. process was signalled during the
	6318	* rpc call, taskgated died, mig server died etc.).
	6319	*/
	6320
	6321	kr = __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(port, p->p_pid);
	6322	switch (kr) {
	6323	case KERN_SUCCESS:
	6324	error = 0;
	6325	break;
	6326	case KERN_FAILURE:
	6327	error = EACCES;
	6328
	6329	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
	6330	p->p_pid, OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASKGATED_INVALID_SIG, 0, 0);
	6331	signature_failure_reason = os_reason_create(OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASKGATED_INVALID_SIG);
	6332	goto done;
	6333	default:
	6334	error = EACCES;
	6335
	6336	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
	6337	p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_TASKGATED_OTHER, 0, 0);
	6338	signature_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_TASKGATED_OTHER);
	6339	unexpected_failure = TRUE;
	6340	goto done;
	6341	}
	6342
	6343	/* Only do this if exec_resettextvp() did not fail */
	6344	if (p->p_textvp != NULLVP) {
	6345	csb = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff);
	6346
	6347	if (csb != NULL) {
	6348	/* As the enforcement we can do here is very limited, we only allow things that
	6349	* are the only reason why this code path still exists:
	6350	* Adhoc signed non-platform binaries without special cs_flags and without any
	6351	* entitlements (unrestricted ones still pass AMFI). */
	6352	if (
	6353	/* Revalidate the blob if necessary through bumped generation count. */
	6354	(ubc_cs_generation_check(p->p_textvp) == 0 \|\|
	6355	ubc_cs_blob_revalidate(p->p_textvp, csb, imgp, 0) == 0) &&
	6356	/* Only CS_ADHOC, no CS_KILL, CS_HARD etc. */
	6357	(csb->csb_flags & CS_ALLOWED_MACHO) == CS_ADHOC &&
	6358	/* If it has a CMS blob, it's not adhoc. The CS_ADHOC flag can lie. */
	6359	csblob_find_blob_bytes((const uint8_t *)csb->csb_mem_kaddr, csb->csb_mem_size,
	6360	CSSLOT_SIGNATURESLOT,
	6361	CSMAGIC_BLOBWRAPPER) == NULL &&
	6362	/* It could still be in a trust cache (unlikely with CS_ADHOC), or a magic path. */
	6363	csb->csb_platform_binary == 0 &&
	6364	/* No entitlements, not even unrestricted ones. */
	6365	csb->csb_entitlements_blob == NULL) {
	6366	proc_lock(p);
	6367	p->p_csflags \|= CS_SIGNED \| CS_VALID;
	6368	proc_unlock(p);
	6369	} else {
	6370	uint8_t cdhash[CS_CDHASH_LEN];
	6371	char cdhash_string[CS_CDHASH_STRING_SIZE];
	6372	proc_getcdhash(p, cdhash);
	6373	cdhash_to_string(cdhash_string, cdhash);
	6374	printf("ignoring detached code signature on '%s' with cdhash '%s' "
	6375	"because it is invalid, or not a simple adhoc signature.\n",
	6376	p->p_name, cdhash_string);
	6377	}
	6378	}
	6379	}
	6380
	6381	done:
	6382	if (0 == error) {
	6383	/* The process's code signature related properties are
	6384	* fully set up, so this is an opportune moment to log
	6385	* platform binary execution, if desired. */
	6386	if (platform_exec_logging != 0 && csproc_get_platform_binary(p)) {
	6387	uint8_t cdhash[CS_CDHASH_LEN];
	6388	char cdhash_string[CS_CDHASH_STRING_SIZE];
	6389	proc_getcdhash(p, cdhash);
	6390	cdhash_to_string(cdhash_string, cdhash);
	6391
	6392	os_log(peLog, "CS Platform Exec Logging: Executing platform signed binary "
	6393	"'%s' with cdhash %s\n", p->p_name, cdhash_string);
	6394	}
	6395	} else {
	6396	if (!unexpected_failure) {
	6397	p->p_csflags \|= CS_KILLED;
	6398	}
	6399	/* make very sure execution fails */
	6400	if (vfexec \|\| spawn) {
	6401	assert(signature_failure_reason != OS_REASON_NULL);
	6402	psignal_vfork_with_reason(p, p->task, imgp->ip_new_thread,
	6403	SIGKILL, signature_failure_reason);
	6404	signature_failure_reason = OS_REASON_NULL;
	6405	error = 0;
	6406	} else {
	6407	assert(signature_failure_reason != OS_REASON_NULL);
	6408	psignal_with_reason(p, SIGKILL, signature_failure_reason);
	6409	signature_failure_reason = OS_REASON_NULL;
	6410	}
	6411	}
	6412
	6413	if (port != IPC_PORT_NULL) {
	6414	ipc_port_release_send(port);
	6415	}
	6416
	6417	/* If we hit this, we likely would have leaked an exit reason */
	6418	assert(signature_failure_reason == OS_REASON_NULL);
	6419	return error;
	6420	}
	6421
	6422	/*
	6423	* Typically as soon as we start executing this process, the
	6424	* first instruction will trigger a VM fault to bring the text
	6425	* pages (as executable) into the address space, followed soon
	6426	* thereafter by dyld data structures (for dynamic executable).
	6427	* To optimize this, as well as improve support for hardware
	6428	* debuggers that can only access resident pages present
	6429	* in the process' page tables, we prefault some pages if
	6430	* possible. Errors are non-fatal.
	6431	*/
	6432	static void
	6433	exec_prefault_data(proc_t p __unused, struct image_params imgp, load_result_t load_result)
	6434	{
	6435	int ret;
	6436	size_t expected_all_image_infos_size;
	6437
	6438	/*
	6439	* Prefault executable or dyld entry point.
	6440	*/
	6441	vm_fault(current_map(),
	6442	vm_map_trunc_page(load_result->entry_point,
	6443	vm_map_page_mask(current_map())),
	6444	VM_PROT_READ \| VM_PROT_EXECUTE,
	6445	FALSE, VM_KERN_MEMORY_NONE,
	6446	THREAD_UNINT, NULL, 0);
	6447
	6448	if (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) {
	6449	expected_all_image_infos_size = sizeof(struct user64_dyld_all_image_infos);
	6450	} else {
	6451	expected_all_image_infos_size = sizeof(struct user32_dyld_all_image_infos);
	6452	}
	6453
	6454	/* Decode dyld anchor structure from <mach-o/dyld_images.h> */
	6455	if (load_result->dynlinker &&
	6456	load_result->all_image_info_addr &&
	6457	load_result->all_image_info_size >= expected_all_image_infos_size) {
	6458	union {
	6459	struct user64_dyld_all_image_infos infos64;
	6460	struct user32_dyld_all_image_infos infos32;
	6461	} all_image_infos;
	6462
	6463	/*
	6464	* Pre-fault to avoid copyin() going through the trap handler
	6465	* and recovery path.
	6466	*/
	6467	vm_fault(current_map(),
	6468	vm_map_trunc_page(load_result->all_image_info_addr,
	6469	vm_map_page_mask(current_map())),
	6470	VM_PROT_READ \| VM_PROT_WRITE,
	6471	FALSE, VM_KERN_MEMORY_NONE,
	6472	THREAD_UNINT, NULL, 0);
	6473	if ((load_result->all_image_info_addr & PAGE_MASK) + expected_all_image_infos_size > PAGE_SIZE) {
	6474	/* all_image_infos straddles a page */
	6475	vm_fault(current_map(),
	6476	vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size - 1,
	6477	vm_map_page_mask(current_map())),
	6478	VM_PROT_READ \| VM_PROT_WRITE,
	6479	FALSE, VM_KERN_MEMORY_NONE,
	6480	THREAD_UNINT, NULL, 0);
	6481	}
	6482
	6483	ret = copyin(load_result->all_image_info_addr,
	6484	&all_image_infos,
	6485	expected_all_image_infos_size);
	6486	if (ret == 0 && all_image_infos.infos32.version >= DYLD_ALL_IMAGE_INFOS_ADDRESS_MINIMUM_VERSION) {
	6487	user_addr_t notification_address;
	6488	user_addr_t dyld_image_address;
	6489	user_addr_t dyld_version_address;
	6490	user_addr_t dyld_all_image_infos_address;
	6491	user_addr_t dyld_slide_amount;
	6492
	6493	if (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) {
	6494	notification_address = all_image_infos.infos64.notification;
	6495	dyld_image_address = all_image_infos.infos64.dyldImageLoadAddress;
	6496	dyld_version_address = all_image_infos.infos64.dyldVersion;
	6497	dyld_all_image_infos_address = all_image_infos.infos64.dyldAllImageInfosAddress;
	6498	} else {
	6499	notification_address = all_image_infos.infos32.notification;
	6500	dyld_image_address = all_image_infos.infos32.dyldImageLoadAddress;
	6501	dyld_version_address = all_image_infos.infos32.dyldVersion;
	6502	dyld_all_image_infos_address = all_image_infos.infos32.dyldAllImageInfosAddress;
	6503	}
	6504
	6505	/*
	6506	* dyld statically sets up the all_image_infos in its Mach-O
	6507	* binary at static link time, with pointers relative to its default
	6508	* load address. Since ASLR might slide dyld before its first
	6509	* instruction is executed, "dyld_slide_amount" tells us how far
	6510	* dyld was loaded compared to its default expected load address.
	6511	* All other pointers into dyld's image should be adjusted by this
	6512	* amount. At some point later, dyld will fix up pointers to take
	6513	* into account the slide, at which point the all_image_infos_address
	6514	* field in the structure will match the runtime load address, and
	6515	* "dyld_slide_amount" will be 0, if we were to consult it again.
	6516	*/
	6517
	6518	dyld_slide_amount = load_result->all_image_info_addr - dyld_all_image_infos_address;
	6519
	6520	#if 0
	6521	kprintf("exec_prefault: 0x%016llx 0x%08x 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
	6522	(uint64_t)load_result->all_image_info_addr,
	6523	all_image_infos.infos32.version,
	6524	(uint64_t)notification_address,
	6525	(uint64_t)dyld_image_address,
	6526	(uint64_t)dyld_version_address,
	6527	(uint64_t)dyld_all_image_infos_address);
	6528	#endif
	6529
	6530	vm_fault(current_map(),
	6531	vm_map_trunc_page(notification_address + dyld_slide_amount,
	6532	vm_map_page_mask(current_map())),
	6533	VM_PROT_READ \| VM_PROT_EXECUTE,
	6534	FALSE, VM_KERN_MEMORY_NONE,
	6535	THREAD_UNINT, NULL, 0);
	6536	vm_fault(current_map(),
	6537	vm_map_trunc_page(dyld_image_address + dyld_slide_amount,
	6538	vm_map_page_mask(current_map())),
	6539	VM_PROT_READ \| VM_PROT_EXECUTE,
	6540	FALSE, VM_KERN_MEMORY_NONE,
	6541	THREAD_UNINT, NULL, 0);
	6542	vm_fault(current_map(),
	6543	vm_map_trunc_page(dyld_version_address + dyld_slide_amount,
	6544	vm_map_page_mask(current_map())),
	6545	VM_PROT_READ,
	6546	FALSE, VM_KERN_MEMORY_NONE,
	6547	THREAD_UNINT, NULL, 0);
	6548	vm_fault(current_map(),
	6549	vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount,
	6550	vm_map_page_mask(current_map())),
	6551	VM_PROT_READ \| VM_PROT_WRITE,
	6552	FALSE, VM_KERN_MEMORY_NONE,
	6553	THREAD_UNINT, NULL, 0);
	6554	}
	6555	}
	6556	}