]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_exec.c
0385bf1b48cbb46ebc556efc2cbee4a5d6cbd521
[apple/xnu.git] / bsd / kern / kern_exec.c
1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Mach Operating System
31 * Copyright (c) 1987 Carnegie-Mellon University
32 * All rights reserved. The CMU software License Agreement specifies
33 * the terms and conditions for use and redistribution.
34 */
35
36 /*-
37 * Copyright (c) 1982, 1986, 1991, 1993
38 * The Regents of the University of California. All rights reserved.
39 * (c) UNIX System Laboratories, Inc.
40 * All or some portions of this file are derived from material licensed
41 * to the University of California by American Telephone and Telegraph
42 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
43 * the permission of UNIX System Laboratories, Inc.
44 *
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions
47 * are met:
48 * 1. Redistributions of source code must retain the above copyright
49 * notice, this list of conditions and the following disclaimer.
50 * 2. Redistributions in binary form must reproduce the above copyright
51 * notice, this list of conditions and the following disclaimer in the
52 * documentation and/or other materials provided with the distribution.
53 * 3. All advertising materials mentioning features or use of this software
54 * must display the following acknowledgement:
55 * This product includes software developed by the University of
56 * California, Berkeley and its contributors.
57 * 4. Neither the name of the University nor the names of its contributors
58 * may be used to endorse or promote products derived from this software
59 * without specific prior written permission.
60 *
61 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
62 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
64 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
65 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
66 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
67 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
68 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
69 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
70 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
71 * SUCH DAMAGE.
72 *
73 * from: @(#)kern_exec.c 8.1 (Berkeley) 6/10/93
74 */
75 /*
76 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
77 * support for mandatory and extensible security protections. This notice
78 * is included in support of clause 2.2 (b) of the Apple Public License,
79 * Version 2.0.
80 */
81 #include <machine/reg.h>
82 #include <machine/cpu_capabilities.h>
83
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/filedesc.h>
87 #include <sys/kernel.h>
88 #include <sys/proc_internal.h>
89 #include <sys/kauth.h>
90 #include <sys/user.h>
91 #include <sys/socketvar.h>
92 #include <sys/malloc.h>
93 #include <sys/namei.h>
94 #include <sys/mount_internal.h>
95 #include <sys/vnode_internal.h>
96 #include <sys/file_internal.h>
97 #include <sys/stat.h>
98 #include <sys/uio_internal.h>
99 #include <sys/acct.h>
100 #include <sys/exec.h>
101 #include <sys/kdebug.h>
102 #include <sys/signal.h>
103 #include <sys/aio_kern.h>
104 #include <sys/sysproto.h>
105 #include <sys/persona.h>
106 #include <sys/reason.h>
107 #if SYSV_SHM
108 #include <sys/shm_internal.h> /* shmexec() */
109 #endif
110 #include <sys/ubc_internal.h> /* ubc_map() */
111 #include <sys/spawn.h>
112 #include <sys/spawn_internal.h>
113 #include <sys/process_policy.h>
114 #include <sys/codesign.h>
115 #include <sys/random.h>
116 #include <crypto/sha1.h>
117
118 #include <libkern/libkern.h>
119 #include <libkern/crypto/sha2.h>
120 #include <security/audit/audit.h>
121
122 #include <ipc/ipc_types.h>
123
124 #include <mach/mach_param.h>
125 #include <mach/mach_types.h>
126 #include <mach/port.h>
127 #include <mach/task.h>
128 #include <mach/task_access.h>
129 #include <mach/thread_act.h>
130 #include <mach/vm_map.h>
131 #include <mach/mach_vm.h>
132 #include <mach/vm_param.h>
133
134 #include <kern/sched_prim.h> /* thread_wakeup() */
135 #include <kern/affinity.h>
136 #include <kern/assert.h>
137 #include <kern/task.h>
138 #include <kern/coalition.h>
139 #include <kern/policy_internal.h>
140 #include <kern/kalloc.h>
141
142 #include <os/log.h>
143
144 #if CONFIG_MACF
145 #include <security/mac_framework.h>
146 #include <security/mac_mach_internal.h>
147 #endif
148
149 #if CONFIG_AUDIT
150 #include <bsm/audit_kevents.h>
151 #endif
152
153 #if CONFIG_ARCADE
154 #include <kern/arcade.h>
155 #endif
156
157 #include <vm/vm_map.h>
158 #include <vm/vm_kern.h>
159 #include <vm/vm_protos.h>
160 #include <vm/vm_kern.h>
161 #include <vm/vm_fault.h>
162 #include <vm/vm_pageout.h>
163
164 #include <kdp/kdp_dyld.h>
165
166 #include <machine/machine_routines.h>
167 #include <machine/pal_routines.h>
168
169 #include <pexpert/pexpert.h>
170
171 #if CONFIG_MEMORYSTATUS
172 #include <sys/kern_memorystatus.h>
173 #endif
174
175 #include <IOKit/IOBSD.h>
176
177 extern boolean_t vm_darkwake_mode;
178
179 extern int bootarg_execfailurereports; /* bsd_init.c */
180 boolean_t unentitled_ios_sim_launch = FALSE;
181
182 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
183 static TUNABLE(bool, bootarg_arm64e_preview_abi, "-arm64e_preview_abi", false);
184 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
185
186 #if CONFIG_DTRACE
187 /* Do not include dtrace.h, it redefines kmem_[alloc/free] */
188 extern void dtrace_proc_exec(proc_t);
189 extern void (*dtrace_proc_waitfor_exec_ptr)(proc_t);
190
191 /*
192 * Since dtrace_proc_waitfor_exec_ptr can be added/removed in dtrace_subr.c,
193 * we will store its value before actually calling it.
194 */
195 static void (*dtrace_proc_waitfor_hook)(proc_t) = NULL;
196
197 #include <sys/dtrace_ptss.h>
198 #endif
199
200 #if __has_feature(ptrauth_calls)
201 static int vm_shared_region_per_team_id = 1;
202 static int vm_shared_region_by_entitlement = 1;
203
204 /* Flag to control whether shared cache randomized resliding is enabled */
205 #if DEVELOPMENT || DEBUG || XNU_TARGET_OS_IOS
206 static int vm_shared_region_reslide_aslr = 1;
207 #else /* DEVELOPMENT || DEBUG || XNU_TARGET_OS_IOS */
208 static int vm_shared_region_reslide_aslr = 0;
209 #endif /* DEVELOPMENT || DEBUG || XNU_TARGET_OS_IOS */
210 /*
211 * Flag to control what processes should get shared cache randomize resliding
212 * after a fault in the shared cache region:
213 *
214 * 0 - all processes get a new randomized slide
215 * 1 - only platform processes get a new randomized slide
216 */
217 int vm_shared_region_reslide_restrict = 1;
218
219 #if DEVELOPMENT || DEBUG
220 SYSCTL_INT(_vm, OID_AUTO, vm_shared_region_per_team_id, CTLFLAG_RW, &vm_shared_region_per_team_id, 0, "");
221 SYSCTL_INT(_vm, OID_AUTO, vm_shared_region_by_entitlement, CTLFLAG_RW, &vm_shared_region_by_entitlement, 0, "");
222 SYSCTL_INT(_vm, OID_AUTO, vm_shared_region_reslide_restrict, CTLFLAG_RW, &vm_shared_region_reslide_restrict, 0, "");
223 SYSCTL_INT(_vm, OID_AUTO, vm_shared_region_reslide_aslr, CTLFLAG_RW, &vm_shared_region_reslide_aslr, 0, "");
224 #endif
225
226 #endif /* __has_feature(ptrauth_calls) */
227
228 /* support for child creation in exec after vfork */
229 thread_t fork_create_child(task_t parent_task,
230 coalition_t *parent_coalition,
231 proc_t child_proc,
232 int inherit_memory,
233 int is_64bit_addr,
234 int is_64bit_data,
235 int in_exec);
236 void vfork_exit(proc_t p, int rv);
237 extern void proc_apply_task_networkbg_internal(proc_t, thread_t);
238 extern void task_set_did_exec_flag(task_t task);
239 extern void task_clear_exec_copy_flag(task_t task);
240 proc_t proc_exec_switch_task(proc_t p, task_t old_task, task_t new_task, thread_t new_thread, void **inherit);
241 boolean_t task_is_active(task_t);
242 boolean_t thread_is_active(thread_t thread);
243 void thread_copy_resource_info(thread_t dst_thread, thread_t src_thread);
244 void *ipc_importance_exec_switch_task(task_t old_task, task_t new_task);
245 extern void ipc_importance_release(void *elem);
246 extern boolean_t task_has_watchports(task_t task);
247 extern void task_set_no_smt(task_t task);
248 #if defined(HAS_APPLE_PAC)
249 char *task_get_vm_shared_region_id_and_jop_pid(task_t task, uint64_t *jop_pid);
250 #endif
251 task_t convert_port_to_task(ipc_port_t port);
252
253 /*
254 * Mach things for which prototypes are unavailable from Mach headers
255 */
256 #define IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
257 void ipc_task_reset(
258 task_t task);
259 void ipc_thread_reset(
260 thread_t thread);
261 kern_return_t ipc_object_copyin(
262 ipc_space_t space,
263 mach_port_name_t name,
264 mach_msg_type_name_t msgt_name,
265 ipc_object_t *objectp,
266 mach_port_context_t context,
267 mach_msg_guard_flags_t *guard_flags,
268 uint32_t kmsg_flags);
269 void ipc_port_release_send(ipc_port_t);
270
271 #if DEVELOPMENT || DEBUG
272 void task_importance_update_owner_info(task_t);
273 #endif
274
275 extern struct savearea *get_user_regs(thread_t);
276
277 __attribute__((noinline)) int __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid);
278
279 #include <kern/thread.h>
280 #include <kern/task.h>
281 #include <kern/ast.h>
282 #include <kern/mach_loader.h>
283 #include <kern/mach_fat.h>
284 #include <mach-o/fat.h>
285 #include <mach-o/loader.h>
286 #include <machine/vmparam.h>
287 #include <sys/imgact.h>
288
289 #include <sys/sdt.h>
290
291
292 /*
293 * EAI_ITERLIMIT The maximum number of times to iterate an image
294 * activator in exec_activate_image() before treating
295 * it as malformed/corrupt.
296 */
297 #define EAI_ITERLIMIT 3
298
299 /*
300 * For #! interpreter parsing
301 */
302 #define IS_WHITESPACE(ch) ((ch == ' ') || (ch == '\t'))
303 #define IS_EOL(ch) ((ch == '#') || (ch == '\n'))
304
305 extern vm_map_t bsd_pageable_map;
306 extern const struct fileops vnops;
307 extern int nextpidversion;
308
309
310 #define USER_ADDR_ALIGN(addr, val) \
311 ( ( (user_addr_t)(addr) + (val) - 1) \
312 & ~((val) - 1) )
313
314 /*
315 * For subsystem root support
316 */
317 #define SPAWN_SUBSYSTEM_ROOT_ENTITLEMENT "com.apple.private.spawn-subsystem-root"
318
319 /* Platform Code Exec Logging */
320 static int platform_exec_logging = 0;
321
322 SYSCTL_DECL(_security_mac);
323
324 SYSCTL_INT(_security_mac, OID_AUTO, platform_exec_logging, CTLFLAG_RW, &platform_exec_logging, 0,
325 "log cdhashes for all platform binary executions");
326
327 static os_log_t peLog = OS_LOG_DEFAULT;
328
329 struct exec_port_actions {
330 uint32_t portwatch_count;
331 uint32_t registered_count;
332 ipc_port_t *portwatch_array;
333 ipc_port_t *registered_array;
334 };
335
336 struct image_params; /* Forward */
337 static int exec_activate_image(struct image_params *imgp);
338 static int exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp);
339 static int load_return_to_errno(load_return_t lrtn);
340 static int execargs_alloc(struct image_params *imgp);
341 static int execargs_free(struct image_params *imgp);
342 static int exec_check_permissions(struct image_params *imgp);
343 static int exec_extract_strings(struct image_params *imgp);
344 static int exec_add_apple_strings(struct image_params *imgp, const load_result_t *load_result);
345 static int exec_handle_sugid(struct image_params *imgp);
346 static int sugid_scripts = 0;
347 SYSCTL_INT(_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW | CTLFLAG_LOCKED, &sugid_scripts, 0, "");
348 static kern_return_t create_unix_stack(vm_map_t map, load_result_t* load_result, proc_t p);
349 static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size);
350 static void exec_resettextvp(proc_t, struct image_params *);
351 static int check_for_signature(proc_t, struct image_params *);
352 static void exec_prefault_data(proc_t, struct image_params *, load_result_t *);
353 static errno_t exec_handle_port_actions(struct image_params *imgp,
354 struct exec_port_actions *port_actions);
355 static errno_t exec_handle_spawnattr_policy(proc_t p, thread_t thread, int psa_apptype, uint64_t psa_qos_clamp,
356 task_role_t psa_darwin_role, struct exec_port_actions *port_actions);
357 static void exec_port_actions_destroy(struct exec_port_actions *port_actions);
358
359 /*
360 * exec_add_user_string
361 *
362 * Add the requested string to the string space area.
363 *
364 * Parameters; struct image_params * image parameter block
365 * user_addr_t string to add to strings area
366 * int segment from which string comes
367 * boolean_t TRUE if string contributes to NCARGS
368 *
369 * Returns: 0 Success
370 * !0 Failure errno from copyinstr()
371 *
372 * Implicit returns:
373 * (imgp->ip_strendp) updated location of next add, if any
374 * (imgp->ip_strspace) updated byte count of space remaining
375 * (imgp->ip_argspace) updated byte count of space in NCARGS
376 */
377 __attribute__((noinline))
378 static int
379 exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolean_t is_ncargs)
380 {
381 int error = 0;
382
383 do {
384 size_t len = 0;
385 int space;
386
387 if (is_ncargs) {
388 space = imgp->ip_argspace; /* by definition smaller than ip_strspace */
389 } else {
390 space = imgp->ip_strspace;
391 }
392
393 if (space <= 0) {
394 error = E2BIG;
395 break;
396 }
397
398 if (!UIO_SEG_IS_USER_SPACE(seg)) {
399 char *kstr = CAST_DOWN(char *, str); /* SAFE */
400 error = copystr(kstr, imgp->ip_strendp, space, &len);
401 } else {
402 error = copyinstr(str, imgp->ip_strendp, space, &len);
403 }
404
405 imgp->ip_strendp += len;
406 imgp->ip_strspace -= len;
407 if (is_ncargs) {
408 imgp->ip_argspace -= len;
409 }
410 } while (error == ENAMETOOLONG);
411
412 return error;
413 }
414
415 /*
416 * dyld is now passed the executable path as a getenv-like variable
417 * in the same fashion as the stack_guard and malloc_entropy keys.
418 */
419 #define EXECUTABLE_KEY "executable_path="
420
421 /*
422 * exec_save_path
423 *
424 * To support new app package launching for Mac OS X, the dyld needs the
425 * first argument to execve() stored on the user stack.
426 *
427 * Save the executable path name at the bottom of the strings area and set
428 * the argument vector pointer to the location following that to indicate
429 * the start of the argument and environment tuples, setting the remaining
430 * string space count to the size of the string area minus the path length.
431 *
432 * Parameters; struct image_params * image parameter block
433 * char * path used to invoke program
434 * int segment from which path comes
435 *
436 * Returns: int 0 Success
437 * EFAULT Bad address
438 * copy[in]str:EFAULT Bad address
439 * copy[in]str:ENAMETOOLONG Filename too long
440 *
441 * Implicit returns:
442 * (imgp->ip_strings) saved path
443 * (imgp->ip_strspace) space remaining in ip_strings
444 * (imgp->ip_strendp) start of remaining copy area
445 * (imgp->ip_argspace) space remaining of NCARGS
446 * (imgp->ip_applec) Initial applev[0]
447 *
448 * Note: We have to do this before the initial namei() since in the
449 * path contains symbolic links, namei() will overwrite the
450 * original path buffer contents. If the last symbolic link
451 * resolved was a relative pathname, we would lose the original
452 * "path", which could be an absolute pathname. This might be
453 * unacceptable for dyld.
454 */
455 static int
456 exec_save_path(struct image_params *imgp, user_addr_t path, int seg, const char **excpath)
457 {
458 int error;
459 size_t len;
460 char *kpath;
461
462 // imgp->ip_strings can come out of a cache, so we need to obliterate the
463 // old path.
464 memset(imgp->ip_strings, '\0', strlen(EXECUTABLE_KEY) + MAXPATHLEN);
465
466 len = MIN(MAXPATHLEN, imgp->ip_strspace);
467
468 switch (seg) {
469 case UIO_USERSPACE32:
470 case UIO_USERSPACE64: /* Same for copyin()... */
471 error = copyinstr(path, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len);
472 break;
473 case UIO_SYSSPACE:
474 kpath = CAST_DOWN(char *, path); /* SAFE */
475 error = copystr(kpath, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len);
476 break;
477 default:
478 error = EFAULT;
479 break;
480 }
481
482 if (!error) {
483 bcopy(EXECUTABLE_KEY, imgp->ip_strings, strlen(EXECUTABLE_KEY));
484 len += strlen(EXECUTABLE_KEY);
485
486 imgp->ip_strendp += len;
487 imgp->ip_strspace -= len;
488
489 if (excpath) {
490 *excpath = imgp->ip_strings + strlen(EXECUTABLE_KEY);
491 }
492 }
493
494 return error;
495 }
496
497 /*
498 * exec_reset_save_path
499 *
500 * If we detect a shell script, we need to reset the string area
501 * state so that the interpreter can be saved onto the stack.
502 *
503 * Parameters; struct image_params * image parameter block
504 *
505 * Returns: int 0 Success
506 *
507 * Implicit returns:
508 * (imgp->ip_strings) saved path
509 * (imgp->ip_strspace) space remaining in ip_strings
510 * (imgp->ip_strendp) start of remaining copy area
511 * (imgp->ip_argspace) space remaining of NCARGS
512 *
513 */
514 static int
515 exec_reset_save_path(struct image_params *imgp)
516 {
517 imgp->ip_strendp = imgp->ip_strings;
518 imgp->ip_argspace = NCARGS;
519 imgp->ip_strspace = (NCARGS + PAGE_SIZE);
520
521 return 0;
522 }
523
524 /*
525 * exec_shell_imgact
526 *
527 * Image activator for interpreter scripts. If the image begins with
528 * the characters "#!", then it is an interpreter script. Verify the
529 * length of the script line indicating the interpreter is not in
530 * excess of the maximum allowed size. If this is the case, then
531 * break out the arguments, if any, which are separated by white
532 * space, and copy them into the argument save area as if they were
533 * provided on the command line before all other arguments. The line
534 * ends when we encounter a comment character ('#') or newline.
535 *
536 * Parameters; struct image_params * image parameter block
537 *
538 * Returns: -1 not an interpreter (keep looking)
539 * -3 Success: interpreter: relookup
540 * >0 Failure: interpreter: error number
541 *
542 * A return value other than -1 indicates subsequent image activators should
543 * not be given the opportunity to attempt to activate the image.
544 */
545 static int
546 exec_shell_imgact(struct image_params *imgp)
547 {
548 char *vdata = imgp->ip_vdata;
549 char *ihp;
550 char *line_startp, *line_endp;
551 char *interp;
552
553 /*
554 * Make sure it's a shell script. If we've already redirected
555 * from an interpreted file once, don't do it again.
556 */
557 if (vdata[0] != '#' ||
558 vdata[1] != '!' ||
559 (imgp->ip_flags & IMGPF_INTERPRET) != 0) {
560 return -1;
561 }
562
563 if (imgp->ip_origcputype != 0) {
564 /* Fat header previously matched, don't allow shell script inside */
565 return -1;
566 }
567
568 imgp->ip_flags |= IMGPF_INTERPRET;
569 imgp->ip_interp_sugid_fd = -1;
570 imgp->ip_interp_buffer[0] = '\0';
571
572 /* Check to see if SUGID scripts are permitted. If they aren't then
573 * clear the SUGID bits.
574 * imgp->ip_vattr is known to be valid.
575 */
576 if (sugid_scripts == 0) {
577 imgp->ip_origvattr->va_mode &= ~(VSUID | VSGID);
578 }
579
580 /* Try to find the first non-whitespace character */
581 for (ihp = &vdata[2]; ihp < &vdata[IMG_SHSIZE]; ihp++) {
582 if (IS_EOL(*ihp)) {
583 /* Did not find interpreter, "#!\n" */
584 return ENOEXEC;
585 } else if (IS_WHITESPACE(*ihp)) {
586 /* Whitespace, like "#! /bin/sh\n", keep going. */
587 } else {
588 /* Found start of interpreter */
589 break;
590 }
591 }
592
593 if (ihp == &vdata[IMG_SHSIZE]) {
594 /* All whitespace, like "#! " */
595 return ENOEXEC;
596 }
597
598 line_startp = ihp;
599
600 /* Try to find the end of the interpreter+args string */
601 for (; ihp < &vdata[IMG_SHSIZE]; ihp++) {
602 if (IS_EOL(*ihp)) {
603 /* Got it */
604 break;
605 } else {
606 /* Still part of interpreter or args */
607 }
608 }
609
610 if (ihp == &vdata[IMG_SHSIZE]) {
611 /* A long line, like "#! blah blah blah" without end */
612 return ENOEXEC;
613 }
614
615 /* Backtrack until we find the last non-whitespace */
616 while (IS_EOL(*ihp) || IS_WHITESPACE(*ihp)) {
617 ihp--;
618 }
619
620 /* The character after the last non-whitespace is our logical end of line */
621 line_endp = ihp + 1;
622
623 /*
624 * Now we have pointers to the usable part of:
625 *
626 * "#! /usr/bin/int first second third \n"
627 * ^ line_startp ^ line_endp
628 */
629
630 /* copy the interpreter name */
631 interp = imgp->ip_interp_buffer;
632 for (ihp = line_startp; (ihp < line_endp) && !IS_WHITESPACE(*ihp); ihp++) {
633 *interp++ = *ihp;
634 }
635 *interp = '\0';
636
637 exec_reset_save_path(imgp);
638 exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer),
639 UIO_SYSSPACE, NULL);
640
641 /* Copy the entire interpreter + args for later processing into argv[] */
642 interp = imgp->ip_interp_buffer;
643 for (ihp = line_startp; (ihp < line_endp); ihp++) {
644 *interp++ = *ihp;
645 }
646 *interp = '\0';
647
648 #if CONFIG_SETUID
649 /*
650 * If we have an SUID or SGID script, create a file descriptor
651 * from the vnode and pass /dev/fd/%d instead of the actual
652 * path name so that the script does not get opened twice
653 */
654 if (imgp->ip_origvattr->va_mode & (VSUID | VSGID)) {
655 proc_t p;
656 struct fileproc *fp;
657 int fd;
658 int error;
659
660 p = vfs_context_proc(imgp->ip_vfs_context);
661 error = falloc(p, &fp, &fd, imgp->ip_vfs_context);
662 if (error) {
663 return error;
664 }
665
666 fp->fp_glob->fg_flag = FREAD;
667 fp->fp_glob->fg_ops = &vnops;
668 fp->fp_glob->fg_data = (caddr_t)imgp->ip_vp;
669
670 proc_fdlock(p);
671 procfdtbl_releasefd(p, fd, NULL);
672 fp_drop(p, fd, fp, 1);
673 proc_fdunlock(p);
674 vnode_ref(imgp->ip_vp);
675
676 imgp->ip_interp_sugid_fd = fd;
677 }
678 #endif /* CONFIG_SETUID */
679
680 return -3;
681 }
682
683
684
685 /*
686 * exec_fat_imgact
687 *
688 * Image activator for fat 1.0 binaries. If the binary is fat, then we
689 * need to select an image from it internally, and make that the image
690 * we are going to attempt to execute. At present, this consists of
691 * reloading the first page for the image with a first page from the
692 * offset location indicated by the fat header.
693 *
694 * Parameters; struct image_params * image parameter block
695 *
696 * Returns: -1 not a fat binary (keep looking)
697 * -2 Success: encapsulated binary: reread
698 * >0 Failure: error number
699 *
700 * Important: This image activator is byte order neutral.
701 *
702 * Note: A return value other than -1 indicates subsequent image
703 * activators should not be given the opportunity to attempt
704 * to activate the image.
705 *
706 * If we find an encapsulated binary, we make no assertions
707 * about its validity; instead, we leave that up to a rescan
708 * for an activator to claim it, and, if it is claimed by one,
709 * that activator is responsible for determining validity.
710 */
711 static int
712 exec_fat_imgact(struct image_params *imgp)
713 {
714 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
715 kauth_cred_t cred = kauth_cred_proc_ref(p);
716 struct fat_header *fat_header = (struct fat_header *)imgp->ip_vdata;
717 struct _posix_spawnattr *psa = NULL;
718 struct fat_arch fat_arch;
719 int resid, error;
720 load_return_t lret;
721
722 if (imgp->ip_origcputype != 0) {
723 /* Fat header previously matched, don't allow another fat file inside */
724 error = -1; /* not claimed */
725 goto bad;
726 }
727
728 /* Make sure it's a fat binary */
729 if (OSSwapBigToHostInt32(fat_header->magic) != FAT_MAGIC) {
730 error = -1; /* not claimed */
731 goto bad;
732 }
733
734 /* imgp->ip_vdata has PAGE_SIZE, zerofilled if the file is smaller */
735 lret = fatfile_validate_fatarches((vm_offset_t)fat_header, PAGE_SIZE);
736 if (lret != LOAD_SUCCESS) {
737 error = load_return_to_errno(lret);
738 goto bad;
739 }
740
741 /* If posix_spawn binprefs exist, respect those prefs. */
742 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
743 if (psa != NULL && psa->psa_binprefs[0] != 0) {
744 uint32_t pr = 0;
745
746 /* Check each preference listed against all arches in header */
747 for (pr = 0; pr < NBINPREFS; pr++) {
748 cpu_type_t pref = psa->psa_binprefs[pr];
749 cpu_type_t subpref = psa->psa_subcpuprefs[pr];
750
751 if (pref == 0) {
752 /* No suitable arch in the pref list */
753 error = EBADARCH;
754 goto bad;
755 }
756
757 if (pref == CPU_TYPE_ANY) {
758 /* Fall through to regular grading */
759 goto regular_grading;
760 }
761
762 lret = fatfile_getbestarch_for_cputype(pref,
763 subpref,
764 (vm_offset_t)fat_header,
765 PAGE_SIZE,
766 imgp,
767 &fat_arch);
768 if (lret == LOAD_SUCCESS) {
769 goto use_arch;
770 }
771 }
772
773 /* Requested binary preference was not honored */
774 error = EBADEXEC;
775 goto bad;
776 }
777
778 regular_grading:
779 /* Look up our preferred architecture in the fat file. */
780 lret = fatfile_getbestarch((vm_offset_t)fat_header,
781 PAGE_SIZE,
782 imgp,
783 &fat_arch,
784 (p->p_flag & P_AFFINITY) != 0);
785 if (lret != LOAD_SUCCESS) {
786 error = load_return_to_errno(lret);
787 goto bad;
788 }
789
790 use_arch:
791 /* Read the Mach-O header out of fat_arch */
792 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata,
793 PAGE_SIZE, fat_arch.offset,
794 UIO_SYSSPACE, (IO_UNIT | IO_NODELOCKED),
795 cred, &resid, p);
796 if (error) {
797 goto bad;
798 }
799
800 if (resid) {
801 memset(imgp->ip_vdata + (PAGE_SIZE - resid), 0x0, resid);
802 }
803
804 /* Success. Indicate we have identified an encapsulated binary */
805 error = -2;
806 imgp->ip_arch_offset = (user_size_t)fat_arch.offset;
807 imgp->ip_arch_size = (user_size_t)fat_arch.size;
808 imgp->ip_origcputype = fat_arch.cputype;
809 imgp->ip_origcpusubtype = fat_arch.cpusubtype;
810
811 bad:
812 kauth_cred_unref(&cred);
813 return error;
814 }
815
816 static int
817 activate_exec_state(task_t task, proc_t p, thread_t thread, load_result_t *result)
818 {
819 int ret;
820
821 task_set_dyld_info(task, MACH_VM_MIN_ADDRESS, 0);
822 task_set_64bit(task, result->is_64bit_addr, result->is_64bit_data);
823 if (result->is_64bit_addr) {
824 OSBitOrAtomic(P_LP64, &p->p_flag);
825 } else {
826 OSBitAndAtomic(~((uint32_t)P_LP64), &p->p_flag);
827 }
828 task_set_mach_header_address(task, result->mach_header);
829
830 ret = thread_state_initialize(thread);
831 if (ret != KERN_SUCCESS) {
832 return ret;
833 }
834
835 if (result->threadstate) {
836 uint32_t *ts = result->threadstate;
837 uint32_t total_size = (uint32_t)result->threadstate_sz;
838
839 while (total_size > 0) {
840 uint32_t flavor = *ts++;
841 uint32_t size = *ts++;
842
843 ret = thread_setstatus(thread, flavor, (thread_state_t)ts, size);
844 if (ret) {
845 return ret;
846 }
847 ts += size;
848 total_size -= (size + 2) * sizeof(uint32_t);
849 }
850 }
851
852 thread_setentrypoint(thread, result->entry_point);
853
854 return KERN_SUCCESS;
855 }
856
857
858 /*
859 * Set p->p_comm and p->p_name to the name passed to exec
860 */
861 static void
862 set_proc_name(struct image_params *imgp, proc_t p)
863 {
864 int p_name_len = sizeof(p->p_name) - 1;
865
866 if (imgp->ip_ndp->ni_cnd.cn_namelen > p_name_len) {
867 imgp->ip_ndp->ni_cnd.cn_namelen = p_name_len;
868 }
869
870 bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_name,
871 (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
872 p->p_name[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
873
874 if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN) {
875 imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
876 }
877
878 bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
879 (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
880 p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
881 }
882
883 #if __has_feature(ptrauth_calls)
884 /**
885 * Returns a team ID string that may be used to assign a shared region.
886 *
887 * Platform binaries do not have team IDs and will return NULL. Non-platform
888 * binaries without a team ID will be assigned an artificial team ID of ""
889 * (empty string) so that they will not be assigned to the default shared
890 * region.
891 *
892 * @param imgp image parameter block
893 * @return NULL if this is a platform binary, or an appropriate team ID string
894 * otherwise
895 */
896 static inline const char *
897 get_teamid_for_shared_region(struct image_params *imgp)
898 {
899 assert(imgp->ip_vp != NULL);
900
901 const char *ret = csvnode_get_teamid(imgp->ip_vp, imgp->ip_arch_offset);
902 if (ret) {
903 return ret;
904 }
905
906 struct cs_blob *blob = csvnode_get_blob(imgp->ip_vp, imgp->ip_arch_offset);
907 if (csblob_get_platform_binary(blob)) {
908 return NULL;
909 } else {
910 static const char *NO_TEAM_ID = "";
911 return NO_TEAM_ID;
912 }
913 }
914
915 /**
916 * Determines whether ptrauth should be enabled for the provided arm64 CPU subtype.
917 *
918 * @param cpusubtype Mach-O style CPU subtype
919 * @return whether the CPU subtype matches arm64e with the current ptrauth ABI
920 */
921 static inline bool
922 arm64_cpusubtype_uses_ptrauth(cpu_subtype_t cpusubtype)
923 {
924 return (cpusubtype & ~CPU_SUBTYPE_MASK) == CPU_SUBTYPE_ARM64E &&
925 CPU_SUBTYPE_ARM64_PTR_AUTH_VERSION(cpusubtype) == CPU_SUBTYPE_ARM64_PTR_AUTH_CURRENT_VERSION;
926 }
927
928 #endif /* __has_feature(ptrauth_calls) */
929
930 /**
931 * Returns whether a type/subtype slice matches the requested
932 * type/subtype.
933 *
934 * @param mask Bits to mask from the requested/tested cpu type
935 * @param req_cpu Requested cpu type
936 * @param req_subcpu Requested cpu subtype
937 * @param test_cpu Tested slice cpu type
938 * @param test_subcpu Tested slice cpu subtype
939 */
940 boolean_t
941 binary_match(cpu_type_t mask, cpu_type_t req_cpu,
942 cpu_subtype_t req_subcpu, cpu_type_t test_cpu,
943 cpu_subtype_t test_subcpu)
944 {
945 if ((test_cpu & ~mask) != (req_cpu & ~mask)) {
946 return FALSE;
947 }
948
949 test_subcpu &= ~CPU_SUBTYPE_MASK;
950 req_subcpu &= ~CPU_SUBTYPE_MASK;
951
952 if (test_subcpu != req_subcpu && req_subcpu != (CPU_SUBTYPE_ANY & ~CPU_SUBTYPE_MASK)) {
953 return FALSE;
954 }
955
956 return TRUE;
957 }
958
959
960 /*
961 * exec_mach_imgact
962 *
963 * Image activator for mach-o 1.0 binaries.
964 *
965 * Parameters; struct image_params * image parameter block
966 *
967 * Returns: -1 not a fat binary (keep looking)
968 * -2 Success: encapsulated binary: reread
969 * >0 Failure: error number
970 * EBADARCH Mach-o binary, but with an unrecognized
971 * architecture
972 * ENOMEM No memory for child process after -
973 * can only happen after vfork()
974 *
975 * Important: This image activator is NOT byte order neutral.
976 *
977 * Note: A return value other than -1 indicates subsequent image
978 * activators should not be given the opportunity to attempt
979 * to activate the image.
980 *
981 * TODO: More gracefully handle failures after vfork
982 */
983 static int
984 exec_mach_imgact(struct image_params *imgp)
985 {
986 struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
987 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
988 int error = 0;
989 task_t task;
990 task_t new_task = NULL; /* protected by vfexec */
991 thread_t thread;
992 struct uthread *uthread;
993 vm_map_t old_map = VM_MAP_NULL;
994 vm_map_t map = VM_MAP_NULL;
995 load_return_t lret;
996 load_result_t load_result = {};
997 struct _posix_spawnattr *psa = NULL;
998 int spawn = (imgp->ip_flags & IMGPF_SPAWN);
999 int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
1000 int exec = (imgp->ip_flags & IMGPF_EXEC);
1001 os_reason_t exec_failure_reason = OS_REASON_NULL;
1002 boolean_t reslide = FALSE;
1003
1004 /*
1005 * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
1006 * is a reserved field on the end, so for the most part, we can
1007 * treat them as if they were identical. Reverse-endian Mach-O
1008 * binaries are recognized but not compatible.
1009 */
1010 if ((mach_header->magic == MH_CIGAM) ||
1011 (mach_header->magic == MH_CIGAM_64)) {
1012 error = EBADARCH;
1013 goto bad;
1014 }
1015
1016 if ((mach_header->magic != MH_MAGIC) &&
1017 (mach_header->magic != MH_MAGIC_64)) {
1018 error = -1;
1019 goto bad;
1020 }
1021
1022 if (mach_header->filetype != MH_EXECUTE) {
1023 error = -1;
1024 goto bad;
1025 }
1026
1027 if (imgp->ip_origcputype != 0) {
1028 /* Fat header previously had an idea about this thin file */
1029 if (imgp->ip_origcputype != mach_header->cputype ||
1030 imgp->ip_origcpusubtype != mach_header->cpusubtype) {
1031 error = EBADARCH;
1032 goto bad;
1033 }
1034 } else {
1035 imgp->ip_origcputype = mach_header->cputype;
1036 imgp->ip_origcpusubtype = mach_header->cpusubtype;
1037 }
1038
1039 task = current_task();
1040 thread = current_thread();
1041 uthread = get_bsdthread_info(thread);
1042
1043 if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64) {
1044 imgp->ip_flags |= IMGPF_IS_64BIT_ADDR | IMGPF_IS_64BIT_DATA;
1045 }
1046
1047
1048 /* If posix_spawn binprefs exist, respect those prefs. */
1049 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
1050 if (psa != NULL && psa->psa_binprefs[0] != 0) {
1051 int pr = 0;
1052 for (pr = 0; pr < NBINPREFS; pr++) {
1053 cpu_type_t pref = psa->psa_binprefs[pr];
1054 cpu_subtype_t subpref = psa->psa_subcpuprefs[pr];
1055
1056 if (pref == 0) {
1057 /* No suitable arch in the pref list */
1058 error = EBADARCH;
1059 goto bad;
1060 }
1061
1062 if (pref == CPU_TYPE_ANY) {
1063 /* Jump to regular grading */
1064 goto grade;
1065 }
1066
1067 if (binary_match(CPU_ARCH_MASK, pref, subpref,
1068 imgp->ip_origcputype, imgp->ip_origcpusubtype)) {
1069 goto grade;
1070 }
1071 }
1072 error = EBADARCH;
1073 goto bad;
1074 }
1075 grade:
1076 if (!grade_binary(imgp->ip_origcputype, imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK,
1077 imgp->ip_origcpusubtype & CPU_SUBTYPE_MASK, TRUE)) {
1078 error = EBADARCH;
1079 goto bad;
1080 }
1081
1082 if (validate_potential_simulator_binary(imgp->ip_origcputype, imgp,
1083 imgp->ip_arch_offset, imgp->ip_arch_size) != LOAD_SUCCESS) {
1084 #if __x86_64__
1085 const char *excpath;
1086 error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg, &excpath);
1087 os_log_error(OS_LOG_DEFAULT, "Unsupported 32-bit executable: \"%s\"", (error) ? imgp->ip_vp->v_name : excpath);
1088 #endif
1089 error = EBADARCH;
1090 goto bad;
1091 }
1092
1093 #if defined(HAS_APPLE_PAC)
1094 assert(mach_header->cputype == CPU_TYPE_ARM64
1095 );
1096
1097 if ((mach_header->cputype == CPU_TYPE_ARM64 &&
1098 arm64_cpusubtype_uses_ptrauth(mach_header->cpusubtype))
1099 ) {
1100 imgp->ip_flags &= ~IMGPF_NOJOP;
1101 } else {
1102 imgp->ip_flags |= IMGPF_NOJOP;
1103 }
1104 #endif
1105
1106 /* Copy in arguments/environment from the old process */
1107 error = exec_extract_strings(imgp);
1108 if (error) {
1109 goto bad;
1110 }
1111
1112 AUDIT_ARG(argv, imgp->ip_startargv, imgp->ip_argc,
1113 imgp->ip_endargv - imgp->ip_startargv);
1114 AUDIT_ARG(envv, imgp->ip_endargv, imgp->ip_envc,
1115 imgp->ip_endenvv - imgp->ip_endargv);
1116
1117 /*
1118 * We are being called to activate an image subsequent to a vfork()
1119 * operation; in this case, we know that our task, thread, and
1120 * uthread are actually those of our parent, and our proc, which we
1121 * obtained indirectly from the image_params vfs_context_t, is the
1122 * new child process.
1123 */
1124 if (vfexec) {
1125 imgp->ip_new_thread = fork_create_child(task,
1126 NULL,
1127 p,
1128 FALSE,
1129 (imgp->ip_flags & IMGPF_IS_64BIT_ADDR),
1130 (imgp->ip_flags & IMGPF_IS_64BIT_DATA),
1131 FALSE);
1132 /* task and thread ref returned, will be released in __mac_execve */
1133 if (imgp->ip_new_thread == NULL) {
1134 error = ENOMEM;
1135 goto bad;
1136 }
1137 }
1138
1139
1140 /* reset local idea of thread, uthread, task */
1141 thread = imgp->ip_new_thread;
1142 uthread = get_bsdthread_info(thread);
1143 task = new_task = get_threadtask(thread);
1144
1145 /*
1146 * Load the Mach-O file.
1147 *
1148 * NOTE: An error after this point indicates we have potentially
1149 * destroyed or overwritten some process state while attempting an
1150 * execve() following a vfork(), which is an unrecoverable condition.
1151 * We send the new process an immediate SIGKILL to avoid it executing
1152 * any instructions in the mutated address space. For true spawns,
1153 * this is not the case, and "too late" is still not too late to
1154 * return an error code to the parent process.
1155 */
1156
1157 /*
1158 * Actually load the image file we previously decided to load.
1159 */
1160 lret = load_machfile(imgp, mach_header, thread, &map, &load_result);
1161 if (lret != LOAD_SUCCESS) {
1162 error = load_return_to_errno(lret);
1163
1164 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1165 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO, 0, 0);
1166 if (lret == LOAD_BADMACHO_UPX) {
1167 set_proc_name(imgp, p);
1168 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_UPX);
1169 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
1170 } else {
1171 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO);
1172
1173 if (bootarg_execfailurereports) {
1174 set_proc_name(imgp, p);
1175 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
1176 }
1177 }
1178
1179 exec_failure_reason->osr_flags |= OS_REASON_FLAG_CONSISTENT_FAILURE;
1180
1181 goto badtoolate;
1182 }
1183
1184 proc_lock(p);
1185 {
1186 p->p_cputype = imgp->ip_origcputype;
1187 p->p_cpusubtype = imgp->ip_origcpusubtype;
1188 }
1189 p->p_platform = load_result.ip_platform;
1190 p->p_min_sdk = load_result.lr_min_sdk;
1191 p->p_sdk = load_result.lr_sdk;
1192 vm_map_set_user_wire_limit(map, (vm_size_t)proc_limitgetcur(p, RLIMIT_MEMLOCK, FALSE));
1193 #if XNU_TARGET_OS_OSX
1194 if (p->p_platform == PLATFORM_IOS) {
1195 vm_map_mark_alien(map);
1196 }
1197 #endif /* XNU_TARGET_OS_OSX */
1198 proc_unlock(p);
1199
1200 /*
1201 * Set code-signing flags if this binary is signed, or if parent has
1202 * requested them on exec.
1203 */
1204 if (load_result.csflags & CS_VALID) {
1205 imgp->ip_csflags |= load_result.csflags &
1206 (CS_VALID | CS_SIGNED | CS_DEV_CODE | CS_LINKER_SIGNED |
1207 CS_HARD | CS_KILL | CS_RESTRICT | CS_ENFORCEMENT | CS_REQUIRE_LV |
1208 CS_FORCED_LV | CS_ENTITLEMENTS_VALIDATED | CS_DYLD_PLATFORM | CS_RUNTIME |
1209 CS_ENTITLEMENT_FLAGS |
1210 CS_EXEC_SET_HARD | CS_EXEC_SET_KILL | CS_EXEC_SET_ENFORCEMENT);
1211 } else {
1212 imgp->ip_csflags &= ~CS_VALID;
1213 }
1214
1215 if (p->p_csflags & CS_EXEC_SET_HARD) {
1216 imgp->ip_csflags |= CS_HARD;
1217 }
1218 if (p->p_csflags & CS_EXEC_SET_KILL) {
1219 imgp->ip_csflags |= CS_KILL;
1220 }
1221 if (p->p_csflags & CS_EXEC_SET_ENFORCEMENT) {
1222 imgp->ip_csflags |= CS_ENFORCEMENT;
1223 }
1224 if (p->p_csflags & CS_EXEC_INHERIT_SIP) {
1225 if (p->p_csflags & CS_INSTALLER) {
1226 imgp->ip_csflags |= CS_INSTALLER;
1227 }
1228 if (p->p_csflags & CS_DATAVAULT_CONTROLLER) {
1229 imgp->ip_csflags |= CS_DATAVAULT_CONTROLLER;
1230 }
1231 if (p->p_csflags & CS_NVRAM_UNRESTRICTED) {
1232 imgp->ip_csflags |= CS_NVRAM_UNRESTRICTED;
1233 }
1234 }
1235
1236 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
1237 /*
1238 * ptrauth version 0 is a preview ABI. Developers can opt into running
1239 * their own arm64e binaries for local testing, with the understanding
1240 * that future OSes may break ABI.
1241 */
1242 if ((imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK) == CPU_SUBTYPE_ARM64E &&
1243 CPU_SUBTYPE_ARM64_PTR_AUTH_VERSION(imgp->ip_origcpusubtype) == 0 &&
1244 !load_result.platform_binary &&
1245 !bootarg_arm64e_preview_abi) {
1246 static bool logged_once = false;
1247 set_proc_name(imgp, p);
1248
1249 printf("%s: not running binary \"%s\" built against preview arm64e ABI\n", __func__, p->p_name);
1250 if (!os_atomic_xchg(&logged_once, true, relaxed)) {
1251 printf("%s: (to allow this, add \"-arm64e_preview_abi\" to boot-args)\n", __func__);
1252 }
1253
1254 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO);
1255 if (bootarg_execfailurereports) {
1256 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
1257 exec_failure_reason->osr_flags |= OS_REASON_FLAG_CONSISTENT_FAILURE;
1258 }
1259 goto badtoolate;
1260 }
1261
1262 if ((imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK) != CPU_SUBTYPE_ARM64E &&
1263 imgp->ip_origcputype == CPU_TYPE_ARM64 &&
1264 load_result.platform_binary &&
1265 (imgp->ip_flags & IMGPF_DRIVER) != 0) {
1266 set_proc_name(imgp, p);
1267 printf("%s: disallowing arm64 platform driverkit binary \"%s\", should be arm64e\n", __func__, p->p_name);
1268 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO);
1269 if (bootarg_execfailurereports) {
1270 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
1271 exec_failure_reason->osr_flags |= OS_REASON_FLAG_CONSISTENT_FAILURE;
1272 }
1273 goto badtoolate;
1274 }
1275 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
1276
1277 /*
1278 * Set up the shared cache region in the new process.
1279 *
1280 * Normally there is a single shared region per architecture.
1281 * However on systems with Pointer Authentication, we can create
1282 * multiple shared caches with the amount of sharing determined
1283 * by team-id or entitlement. Inherited shared region IDs are used
1284 * for system processes that need to match and be able to inspect
1285 * a pre-existing task.
1286 */
1287 int cpu_subtype = 0; /* all cpu_subtypes use the same shared region */
1288 #if __has_feature(ptrauth_calls)
1289 char *shared_region_id = NULL;
1290 size_t len;
1291 char *base;
1292 const char *cbase;
1293 #define TEAM_ID_PREFIX "T-"
1294 #define ENTITLE_PREFIX "E-"
1295 #define SR_PREFIX_LEN 2
1296 #define SR_ENTITLEMENT "com.apple.pac.shared_region_id"
1297
1298 if (cpu_type() == CPU_TYPE_ARM64 &&
1299 arm64_cpusubtype_uses_ptrauth(p->p_cpusubtype) &&
1300 (imgp->ip_flags & IMGPF_NOJOP) == 0) {
1301 assertf(p->p_cputype == CPU_TYPE_ARM64,
1302 "p %p cpu_type() 0x%x p->p_cputype 0x%x p->p_cpusubtype 0x%x",
1303 p, cpu_type(), p->p_cputype, p->p_cpusubtype);
1304
1305 /*
1306 * arm64e uses pointer authentication, so request a separate
1307 * shared region for this CPU subtype.
1308 */
1309 cpu_subtype = p->p_cpusubtype & ~CPU_SUBTYPE_MASK;
1310
1311 /*
1312 * Determine which shared cache to select based on being told,
1313 * matching a team-id or matching an entitlement.
1314 */
1315 if (imgp->ip_inherited_shared_region_id) {
1316 len = strlen(imgp->ip_inherited_shared_region_id);
1317 shared_region_id = kheap_alloc(KHEAP_DATA_BUFFERS,
1318 len + 1, Z_WAITOK);
1319 memcpy(shared_region_id, imgp->ip_inherited_shared_region_id, len + 1);
1320 } else if ((cbase = get_teamid_for_shared_region(imgp)) != NULL) {
1321 len = strlen(cbase);
1322 if (vm_shared_region_per_team_id) {
1323 shared_region_id = kheap_alloc(KHEAP_DATA_BUFFERS,
1324 len + SR_PREFIX_LEN + 1, Z_WAITOK);
1325 memcpy(shared_region_id, TEAM_ID_PREFIX, SR_PREFIX_LEN);
1326 memcpy(shared_region_id + SR_PREFIX_LEN, cbase, len + 1);
1327 }
1328 } else if ((base = IOVnodeGetEntitlement(imgp->ip_vp,
1329 (int64_t)imgp->ip_arch_offset, SR_ENTITLEMENT)) != NULL) {
1330 len = strlen(base);
1331 if (vm_shared_region_by_entitlement) {
1332 shared_region_id = kheap_alloc(KHEAP_DATA_BUFFERS,
1333 len + SR_PREFIX_LEN + 1, Z_WAITOK);
1334 memcpy(shared_region_id, ENTITLE_PREFIX, SR_PREFIX_LEN);
1335 memcpy(shared_region_id + SR_PREFIX_LEN, base, len + 1);
1336 }
1337 /* Discard the copy of the entitlement */
1338 kheap_free(KHEAP_DATA_BUFFERS, base, len + 1);
1339 }
1340 }
1341
1342 if (imgp->ip_flags & IMGPF_RESLIDE) {
1343 reslide = TRUE;
1344 }
1345
1346 /* use "" as the default shared_region_id */
1347 if (shared_region_id == NULL) {
1348 shared_region_id = kheap_alloc(KHEAP_DATA_BUFFERS, 1, Z_WAITOK);
1349 *shared_region_id = 0;
1350 }
1351
1352 /* ensure there's a unique pointer signing key for this shared_region_id */
1353 shared_region_key_alloc(shared_region_id,
1354 imgp->ip_inherited_shared_region_id != NULL, imgp->ip_inherited_jop_pid);
1355 task_set_shared_region_id(task, shared_region_id);
1356 shared_region_id = NULL;
1357 #endif /* __has_feature(ptrauth_calls) */
1358
1359 int cputype = cpu_type();
1360 vm_map_exec(map, task, load_result.is_64bit_addr, (void *)p->p_fd->fd_rdir, cputype, cpu_subtype, reslide);
1361
1362 /*
1363 * Close file descriptors which specify close-on-exec.
1364 */
1365 fdexec(p, psa != NULL ? psa->psa_flags : 0, exec);
1366
1367 /*
1368 * deal with set[ug]id.
1369 */
1370 error = exec_handle_sugid(imgp);
1371 if (error) {
1372 vm_map_deallocate(map);
1373
1374 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1375 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_SUGID_FAILURE, 0, 0);
1376
1377 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SUGID_FAILURE);
1378 if (bootarg_execfailurereports) {
1379 set_proc_name(imgp, p);
1380 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
1381 }
1382
1383 goto badtoolate;
1384 }
1385
1386 /*
1387 * Commit to new map.
1388 *
1389 * Swap the new map for the old for target task, which consumes
1390 * our new map reference but each leaves us responsible for the
1391 * old_map reference. That lets us get off the pmap associated
1392 * with it, and then we can release it.
1393 *
1394 * The map needs to be set on the target task which is different
1395 * than current task, thus swap_task_map is used instead of
1396 * vm_map_switch.
1397 */
1398 old_map = swap_task_map(task, thread, map);
1399 vm_map_deallocate(old_map);
1400 old_map = NULL;
1401
1402 lret = activate_exec_state(task, p, thread, &load_result);
1403 if (lret != KERN_SUCCESS) {
1404 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1405 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_ACTV_THREADSTATE, 0, 0);
1406
1407 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_ACTV_THREADSTATE);
1408 if (bootarg_execfailurereports) {
1409 set_proc_name(imgp, p);
1410 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
1411 }
1412
1413 goto badtoolate;
1414 }
1415
1416 /*
1417 * deal with voucher on exec-calling thread.
1418 */
1419 if (imgp->ip_new_thread == NULL) {
1420 thread_set_mach_voucher(current_thread(), IPC_VOUCHER_NULL);
1421 }
1422
1423 /* Make sure we won't interrupt ourself signalling a partial process */
1424 if (!vfexec && !spawn && (p->p_lflag & P_LTRACED)) {
1425 psignal(p, SIGTRAP);
1426 }
1427
1428 if (load_result.unixproc &&
1429 create_unix_stack(get_task_map(task),
1430 &load_result,
1431 p) != KERN_SUCCESS) {
1432 error = load_return_to_errno(LOAD_NOSPACE);
1433
1434 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1435 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_STACK_ALLOC, 0, 0);
1436
1437 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_STACK_ALLOC);
1438 if (bootarg_execfailurereports) {
1439 set_proc_name(imgp, p);
1440 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
1441 }
1442
1443 goto badtoolate;
1444 }
1445
1446 error = exec_add_apple_strings(imgp, &load_result);
1447 if (error) {
1448 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1449 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_APPLE_STRING_INIT, 0, 0);
1450
1451 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_APPLE_STRING_INIT);
1452 if (bootarg_execfailurereports) {
1453 set_proc_name(imgp, p);
1454 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
1455 }
1456 goto badtoolate;
1457 }
1458
1459 /* Switch to target task's map to copy out strings */
1460 old_map = vm_map_switch(get_task_map(task));
1461
1462 if (load_result.unixproc) {
1463 user_addr_t ap;
1464
1465 /*
1466 * Copy the strings area out into the new process address
1467 * space.
1468 */
1469 ap = p->user_stack;
1470 error = exec_copyout_strings(imgp, &ap);
1471 if (error) {
1472 vm_map_switch(old_map);
1473
1474 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1475 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_STRINGS, 0, 0);
1476
1477 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_STRINGS);
1478 if (bootarg_execfailurereports) {
1479 set_proc_name(imgp, p);
1480 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
1481 }
1482 goto badtoolate;
1483 }
1484 /* Set the stack */
1485 thread_setuserstack(thread, ap);
1486 }
1487
1488 if (load_result.dynlinker || load_result.is_cambria) {
1489 user_addr_t ap;
1490 int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? 8 : 4;
1491
1492 /* Adjust the stack */
1493 ap = thread_adjuserstack(thread, -new_ptr_size);
1494 error = copyoutptr(load_result.mach_header, ap, new_ptr_size);
1495
1496 if (error) {
1497 vm_map_switch(old_map);
1498
1499 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1500 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_DYNLINKER, 0, 0);
1501
1502 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_DYNLINKER);
1503 if (bootarg_execfailurereports) {
1504 set_proc_name(imgp, p);
1505 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
1506 }
1507 goto badtoolate;
1508 }
1509 task_set_dyld_info(task, load_result.all_image_info_addr,
1510 load_result.all_image_info_size);
1511 }
1512
1513
1514 /* Avoid immediate VM faults back into kernel */
1515 exec_prefault_data(p, imgp, &load_result);
1516
1517 vm_map_switch(old_map);
1518
1519 /*
1520 * Reset signal state.
1521 */
1522 execsigs(p, thread);
1523
1524 /*
1525 * need to cancel async IO requests that can be cancelled and wait for those
1526 * already active. MAY BLOCK!
1527 */
1528 _aio_exec( p );
1529
1530 #if SYSV_SHM
1531 /* FIXME: Till vmspace inherit is fixed: */
1532 if (!vfexec && p->vm_shm) {
1533 shmexec(p);
1534 }
1535 #endif
1536 #if SYSV_SEM
1537 /* Clean up the semaphores */
1538 semexit(p);
1539 #endif
1540
1541 /*
1542 * Remember file name for accounting.
1543 */
1544 p->p_acflag &= ~AFORK;
1545
1546 set_proc_name(imgp, p);
1547
1548 #if CONFIG_SECLUDED_MEMORY
1549 if (secluded_for_apps &&
1550 load_result.platform_binary) {
1551 if (strncmp(p->p_name,
1552 "Camera",
1553 sizeof(p->p_name)) == 0) {
1554 task_set_could_use_secluded_mem(task, TRUE);
1555 } else {
1556 task_set_could_use_secluded_mem(task, FALSE);
1557 }
1558 if (strncmp(p->p_name,
1559 "mediaserverd",
1560 sizeof(p->p_name)) == 0) {
1561 task_set_could_also_use_secluded_mem(task, TRUE);
1562 }
1563 }
1564 #endif /* CONFIG_SECLUDED_MEMORY */
1565
1566 #if __arm64__
1567 if (load_result.legacy_footprint) {
1568 task_set_legacy_footprint(task);
1569 }
1570 #endif /* __arm64__ */
1571
1572 pal_dbg_set_task_name(task);
1573
1574 /*
1575 * The load result will have already been munged by AMFI to include the
1576 * platform binary flag if boot-args dictated it (AMFI will mark anything
1577 * that doesn't go through the upcall path as a platform binary if its
1578 * enforcement is disabled).
1579 */
1580 if (load_result.platform_binary) {
1581 if (cs_debug) {
1582 printf("setting platform binary on task: pid = %d\n", p->p_pid);
1583 }
1584
1585 /*
1586 * We must use 'task' here because the proc's task has not yet been
1587 * switched to the new one.
1588 */
1589 task_set_platform_binary(task, TRUE);
1590 } else {
1591 if (cs_debug) {
1592 printf("clearing platform binary on task: pid = %d\n", p->p_pid);
1593 }
1594
1595 task_set_platform_binary(task, FALSE);
1596 }
1597
1598 #if DEVELOPMENT || DEBUG
1599 /*
1600 * Update the pid an proc name for importance base if any
1601 */
1602 task_importance_update_owner_info(task);
1603 #endif
1604
1605 memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid));
1606
1607 #if CONFIG_DTRACE
1608 dtrace_proc_exec(p);
1609 #endif
1610
1611 if (kdebug_enable) {
1612 long args[4] = {};
1613
1614 uintptr_t fsid = 0, fileid = 0;
1615 if (imgp->ip_vattr) {
1616 uint64_t fsid64 = vnode_get_va_fsid(imgp->ip_vattr);
1617 fsid = (uintptr_t)fsid64;
1618 fileid = (uintptr_t)imgp->ip_vattr->va_fileid;
1619 // check for (unexpected) overflow and trace zero in that case
1620 if (fsid != fsid64 || fileid != imgp->ip_vattr->va_fileid) {
1621 fsid = fileid = 0;
1622 }
1623 }
1624 KERNEL_DEBUG_CONSTANT_IST1(TRACE_DATA_EXEC, p->p_pid, fsid, fileid, 0,
1625 (uintptr_t)thread_tid(thread));
1626
1627 /*
1628 * Collect the pathname for tracing
1629 */
1630 kdbg_trace_string(p, &args[0], &args[1], &args[2], &args[3]);
1631 KERNEL_DEBUG_CONSTANT_IST1(TRACE_STRING_EXEC, args[0], args[1],
1632 args[2], args[3], (uintptr_t)thread_tid(thread));
1633 }
1634
1635
1636 /*
1637 * If posix_spawned with the START_SUSPENDED flag, stop the
1638 * process before it runs.
1639 */
1640 if (imgp->ip_px_sa != NULL) {
1641 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
1642 if (psa->psa_flags & POSIX_SPAWN_START_SUSPENDED) {
1643 proc_lock(p);
1644 p->p_stat = SSTOP;
1645 proc_unlock(p);
1646 (void) task_suspend_internal(task);
1647 }
1648 }
1649
1650 /*
1651 * mark as execed, wakeup the process that vforked (if any) and tell
1652 * it that it now has its own resources back
1653 */
1654 OSBitOrAtomic(P_EXEC, &p->p_flag);
1655 proc_resetregister(p);
1656 if (p->p_pptr && (p->p_lflag & P_LPPWAIT)) {
1657 proc_lock(p);
1658 p->p_lflag &= ~P_LPPWAIT;
1659 proc_unlock(p);
1660 wakeup((caddr_t)p->p_pptr);
1661 }
1662
1663 /*
1664 * Pay for our earlier safety; deliver the delayed signals from
1665 * the incomplete vfexec process now that it's complete.
1666 */
1667 if (vfexec && (p->p_lflag & P_LTRACED)) {
1668 psignal_vfork(p, new_task, thread, SIGTRAP);
1669 }
1670
1671 goto done;
1672
1673 badtoolate:
1674 /* Don't allow child process to execute any instructions */
1675 if (!spawn) {
1676 if (vfexec) {
1677 assert(exec_failure_reason != OS_REASON_NULL);
1678 psignal_vfork_with_reason(p, new_task, thread, SIGKILL, exec_failure_reason);
1679 exec_failure_reason = OS_REASON_NULL;
1680 } else {
1681 assert(exec_failure_reason != OS_REASON_NULL);
1682 psignal_with_reason(p, SIGKILL, exec_failure_reason);
1683 exec_failure_reason = OS_REASON_NULL;
1684
1685 if (exec) {
1686 /* Terminate the exec copy task */
1687 task_terminate_internal(task);
1688 }
1689 }
1690
1691 /* We can't stop this system call at this point, so just pretend we succeeded */
1692 error = 0;
1693 } else {
1694 os_reason_free(exec_failure_reason);
1695 exec_failure_reason = OS_REASON_NULL;
1696 }
1697
1698 done:
1699 if (load_result.threadstate) {
1700 kfree(load_result.threadstate, load_result.threadstate_sz);
1701 load_result.threadstate = NULL;
1702 }
1703
1704 bad:
1705 /* If we hit this, we likely would have leaked an exit reason */
1706 assert(exec_failure_reason == OS_REASON_NULL);
1707 return error;
1708 }
1709
1710
1711
1712
1713 /*
1714 * Our image activator table; this is the table of the image types we are
1715 * capable of loading. We list them in order of preference to ensure the
1716 * fastest image load speed.
1717 *
1718 * XXX hardcoded, for now; should use linker sets
1719 */
1720 struct execsw {
1721 int(*const ex_imgact)(struct image_params *);
1722 const char *ex_name;
1723 }const execsw[] = {
1724 { exec_mach_imgact, "Mach-o Binary" },
1725 { exec_fat_imgact, "Fat Binary" },
1726 { exec_shell_imgact, "Interpreter Script" },
1727 { NULL, NULL}
1728 };
1729
1730
1731 /*
1732 * exec_activate_image
1733 *
1734 * Description: Iterate through the available image activators, and activate
1735 * the image associated with the imgp structure. We start with
1736 * the activator for Mach-o binaries followed by that for Fat binaries
1737 * for Interpreter scripts.
1738 *
1739 * Parameters: struct image_params * Image parameter block
1740 *
1741 * Returns: 0 Success
1742 * EBADEXEC The executable is corrupt/unknown
1743 * execargs_alloc:EINVAL Invalid argument
1744 * execargs_alloc:EACCES Permission denied
1745 * execargs_alloc:EINTR Interrupted function
1746 * execargs_alloc:ENOMEM Not enough space
1747 * exec_save_path:EFAULT Bad address
1748 * exec_save_path:ENAMETOOLONG Filename too long
1749 * exec_check_permissions:EACCES Permission denied
1750 * exec_check_permissions:ENOEXEC Executable file format error
1751 * exec_check_permissions:ETXTBSY Text file busy [misuse of error code]
1752 * exec_check_permissions:???
1753 * namei:???
1754 * vn_rdwr:??? [anything vn_rdwr can return]
1755 * <ex_imgact>:??? [anything an imgact can return]
1756 * EDEADLK Process is being terminated
1757 */
1758 static int
1759 exec_activate_image(struct image_params *imgp)
1760 {
1761 struct nameidata *ndp = NULL;
1762 const char *excpath;
1763 int error;
1764 int resid;
1765 int once = 1; /* save SGUID-ness for interpreted files */
1766 int i;
1767 int itercount = 0;
1768 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1769
1770 error = execargs_alloc(imgp);
1771 if (error) {
1772 goto bad_notrans;
1773 }
1774
1775 error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg, &excpath);
1776 if (error) {
1777 goto bad_notrans;
1778 }
1779
1780 /* Use excpath, which contains the copyin-ed exec path */
1781 DTRACE_PROC1(exec, uintptr_t, excpath);
1782
1783 MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
1784 if (ndp == NULL) {
1785 error = ENOMEM;
1786 goto bad_notrans;
1787 }
1788
1789 NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
1790 UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
1791
1792 again:
1793 error = namei(ndp);
1794 if (error) {
1795 goto bad_notrans;
1796 }
1797 imgp->ip_ndp = ndp; /* successful namei(); call nameidone() later */
1798 imgp->ip_vp = ndp->ni_vp; /* if set, need to vnode_put() at some point */
1799
1800 /*
1801 * Before we start the transition from binary A to binary B, make
1802 * sure another thread hasn't started exiting the process. We grab
1803 * the proc lock to check p_lflag initially, and the transition
1804 * mechanism ensures that the value doesn't change after we release
1805 * the lock.
1806 */
1807 proc_lock(p);
1808 if (p->p_lflag & P_LEXIT) {
1809 error = EDEADLK;
1810 proc_unlock(p);
1811 goto bad_notrans;
1812 }
1813 error = proc_transstart(p, 1, 0);
1814 proc_unlock(p);
1815 if (error) {
1816 goto bad_notrans;
1817 }
1818
1819 error = exec_check_permissions(imgp);
1820 if (error) {
1821 goto bad;
1822 }
1823
1824 /* Copy; avoid invocation of an interpreter overwriting the original */
1825 if (once) {
1826 once = 0;
1827 *imgp->ip_origvattr = *imgp->ip_vattr;
1828 }
1829
1830 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, 0,
1831 UIO_SYSSPACE, IO_NODELOCKED,
1832 vfs_context_ucred(imgp->ip_vfs_context),
1833 &resid, vfs_context_proc(imgp->ip_vfs_context));
1834 if (error) {
1835 goto bad;
1836 }
1837
1838 if (resid) {
1839 memset(imgp->ip_vdata + (PAGE_SIZE - resid), 0x0, resid);
1840 }
1841
1842 encapsulated_binary:
1843 /* Limit the number of iterations we will attempt on each binary */
1844 if (++itercount > EAI_ITERLIMIT) {
1845 error = EBADEXEC;
1846 goto bad;
1847 }
1848 error = -1;
1849 for (i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) {
1850 error = (*execsw[i].ex_imgact)(imgp);
1851
1852 switch (error) {
1853 /* case -1: not claimed: continue */
1854 case -2: /* Encapsulated binary, imgp->ip_XXX set for next iteration */
1855 goto encapsulated_binary;
1856
1857 case -3: /* Interpreter */
1858 #if CONFIG_MACF
1859 /*
1860 * Copy the script label for later use. Note that
1861 * the label can be different when the script is
1862 * actually read by the interpreter.
1863 */
1864 if (imgp->ip_scriptlabelp) {
1865 mac_vnode_label_free(imgp->ip_scriptlabelp);
1866 }
1867 imgp->ip_scriptlabelp = mac_vnode_label_alloc();
1868 if (imgp->ip_scriptlabelp == NULL) {
1869 error = ENOMEM;
1870 break;
1871 }
1872 mac_vnode_label_copy(imgp->ip_vp->v_label,
1873 imgp->ip_scriptlabelp);
1874
1875 /*
1876 * Take a ref of the script vnode for later use.
1877 */
1878 if (imgp->ip_scriptvp) {
1879 vnode_put(imgp->ip_scriptvp);
1880 imgp->ip_scriptvp = NULLVP;
1881 }
1882 if (vnode_getwithref(imgp->ip_vp) == 0) {
1883 imgp->ip_scriptvp = imgp->ip_vp;
1884 }
1885 #endif
1886
1887 nameidone(ndp);
1888
1889 vnode_put(imgp->ip_vp);
1890 imgp->ip_vp = NULL; /* already put */
1891 imgp->ip_ndp = NULL; /* already nameidone */
1892
1893 /* Use excpath, which exec_shell_imgact reset to the interpreter */
1894 NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF,
1895 UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
1896
1897 proc_transend(p, 0);
1898 goto again;
1899
1900 default:
1901 break;
1902 }
1903 }
1904
1905 if (error == 0) {
1906 if (imgp->ip_flags & IMGPF_INTERPRET && ndp->ni_vp) {
1907 AUDIT_ARG(vnpath, ndp->ni_vp, ARG_VNODE2);
1908 }
1909
1910 /*
1911 * Call out to allow 3rd party notification of exec.
1912 * Ignore result of kauth_authorize_fileop call.
1913 */
1914 if (kauth_authorize_fileop_has_listeners()) {
1915 kauth_authorize_fileop(vfs_context_ucred(imgp->ip_vfs_context),
1916 KAUTH_FILEOP_EXEC,
1917 (uintptr_t)ndp->ni_vp, 0);
1918 }
1919 }
1920 bad:
1921 proc_transend(p, 0);
1922
1923 bad_notrans:
1924 if (imgp->ip_strings) {
1925 execargs_free(imgp);
1926 }
1927 if (imgp->ip_ndp) {
1928 nameidone(imgp->ip_ndp);
1929 }
1930 if (ndp) {
1931 FREE(ndp, M_TEMP);
1932 }
1933
1934 return error;
1935 }
1936
1937 /*
1938 * exec_validate_spawnattr_policy
1939 *
1940 * Description: Validates the entitlements required to set the apptype.
1941 *
1942 * Parameters: int psa_apptype posix spawn attribute apptype
1943 *
1944 * Returns: 0 Success
1945 * EPERM Failure
1946 */
1947 static errno_t
1948 exec_validate_spawnattr_policy(int psa_apptype)
1949 {
1950 if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) {
1951 int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK;
1952 if (proctype == POSIX_SPAWN_PROC_TYPE_DRIVER) {
1953 if (!IOTaskHasEntitlement(current_task(), POSIX_SPAWN_ENTITLEMENT_DRIVER)) {
1954 return EPERM;
1955 }
1956 }
1957 }
1958
1959 return 0;
1960 }
1961
1962 /*
1963 * exec_handle_spawnattr_policy
1964 *
1965 * Description: Decode and apply the posix_spawn apptype, qos clamp, and watchport ports to the task.
1966 *
1967 * Parameters: proc_t p process to apply attributes to
1968 * int psa_apptype posix spawn attribute apptype
1969 *
1970 * Returns: 0 Success
1971 */
1972 static errno_t
1973 exec_handle_spawnattr_policy(proc_t p, thread_t thread, int psa_apptype, uint64_t psa_qos_clamp,
1974 task_role_t psa_darwin_role, struct exec_port_actions *port_actions)
1975 {
1976 int apptype = TASK_APPTYPE_NONE;
1977 int qos_clamp = THREAD_QOS_UNSPECIFIED;
1978 task_role_t role = TASK_UNSPECIFIED;
1979
1980 if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) {
1981 int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK;
1982
1983 switch (proctype) {
1984 case POSIX_SPAWN_PROC_TYPE_DAEMON_INTERACTIVE:
1985 apptype = TASK_APPTYPE_DAEMON_INTERACTIVE;
1986 break;
1987 case POSIX_SPAWN_PROC_TYPE_DAEMON_STANDARD:
1988 apptype = TASK_APPTYPE_DAEMON_STANDARD;
1989 break;
1990 case POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE:
1991 apptype = TASK_APPTYPE_DAEMON_ADAPTIVE;
1992 break;
1993 case POSIX_SPAWN_PROC_TYPE_DAEMON_BACKGROUND:
1994 apptype = TASK_APPTYPE_DAEMON_BACKGROUND;
1995 break;
1996 case POSIX_SPAWN_PROC_TYPE_APP_DEFAULT:
1997 apptype = TASK_APPTYPE_APP_DEFAULT;
1998 break;
1999 case POSIX_SPAWN_PROC_TYPE_DRIVER:
2000 apptype = TASK_APPTYPE_DRIVER;
2001 break;
2002 default:
2003 apptype = TASK_APPTYPE_NONE;
2004 /* TODO: Should an invalid value here fail the spawn? */
2005 break;
2006 }
2007 }
2008
2009 if (psa_qos_clamp != POSIX_SPAWN_PROC_CLAMP_NONE) {
2010 switch (psa_qos_clamp) {
2011 case POSIX_SPAWN_PROC_CLAMP_UTILITY:
2012 qos_clamp = THREAD_QOS_UTILITY;
2013 break;
2014 case POSIX_SPAWN_PROC_CLAMP_BACKGROUND:
2015 qos_clamp = THREAD_QOS_BACKGROUND;
2016 break;
2017 case POSIX_SPAWN_PROC_CLAMP_MAINTENANCE:
2018 qos_clamp = THREAD_QOS_MAINTENANCE;
2019 break;
2020 default:
2021 qos_clamp = THREAD_QOS_UNSPECIFIED;
2022 /* TODO: Should an invalid value here fail the spawn? */
2023 break;
2024 }
2025 }
2026
2027 if (psa_darwin_role != PRIO_DARWIN_ROLE_DEFAULT) {
2028 proc_darwin_role_to_task_role(psa_darwin_role, &role);
2029 }
2030
2031 if (apptype != TASK_APPTYPE_NONE ||
2032 qos_clamp != THREAD_QOS_UNSPECIFIED ||
2033 role != TASK_UNSPECIFIED ||
2034 port_actions->portwatch_count) {
2035 proc_set_task_spawnpolicy(p->task, thread, apptype, qos_clamp, role,
2036 port_actions->portwatch_array, port_actions->portwatch_count);
2037 }
2038
2039 if (port_actions->registered_count) {
2040 if (mach_ports_register(p->task, port_actions->registered_array,
2041 port_actions->registered_count)) {
2042 return EINVAL;
2043 }
2044 /* mach_ports_register() consumed the array */
2045 port_actions->registered_array = NULL;
2046 port_actions->registered_count = 0;
2047 }
2048
2049 return 0;
2050 }
2051
2052 static void
2053 exec_port_actions_destroy(struct exec_port_actions *port_actions)
2054 {
2055 if (port_actions->portwatch_array) {
2056 for (uint32_t i = 0; i < port_actions->portwatch_count; i++) {
2057 ipc_port_t port = NULL;
2058 if ((port = port_actions->portwatch_array[i]) != NULL) {
2059 ipc_port_release_send(port);
2060 }
2061 }
2062 kfree(port_actions->portwatch_array,
2063 port_actions->portwatch_count * sizeof(ipc_port_t *));
2064 }
2065
2066 if (port_actions->registered_array) {
2067 for (uint32_t i = 0; i < port_actions->registered_count; i++) {
2068 ipc_port_t port = NULL;
2069 if ((port = port_actions->registered_array[i]) != NULL) {
2070 ipc_port_release_send(port);
2071 }
2072 }
2073 kfree(port_actions->registered_array,
2074 port_actions->registered_count * sizeof(ipc_port_t *));
2075 }
2076 }
2077
2078 /*
2079 * exec_handle_port_actions
2080 *
2081 * Description: Go through the _posix_port_actions_t contents,
2082 * calling task_set_special_port, task_set_exception_ports
2083 * and/or audit_session_spawnjoin for the current task.
2084 *
2085 * Parameters: struct image_params * Image parameter block
2086 *
2087 * Returns: 0 Success
2088 * EINVAL Failure
2089 * ENOTSUP Illegal posix_spawn attr flag was set
2090 */
2091 static errno_t
2092 exec_handle_port_actions(struct image_params *imgp,
2093 struct exec_port_actions *actions)
2094 {
2095 _posix_spawn_port_actions_t pacts = imgp->ip_px_spa;
2096 #if CONFIG_AUDIT
2097 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
2098 #endif
2099 _ps_port_action_t *act = NULL;
2100 task_t task = get_threadtask(imgp->ip_new_thread);
2101 ipc_port_t port = NULL;
2102 errno_t ret = 0;
2103 int i, portwatch_i = 0, registered_i = 0;
2104 kern_return_t kr;
2105 boolean_t task_has_watchport_boost = task_has_watchports(current_task());
2106 boolean_t in_exec = (imgp->ip_flags & IMGPF_EXEC);
2107 int ptrauth_task_port_count = 0;
2108 boolean_t suid_cred_specified = FALSE;
2109
2110 for (i = 0; i < pacts->pspa_count; i++) {
2111 act = &pacts->pspa_actions[i];
2112
2113 switch (act->port_type) {
2114 case PSPA_SPECIAL:
2115 case PSPA_EXCEPTION:
2116 #if CONFIG_AUDIT
2117 case PSPA_AU_SESSION:
2118 #endif
2119 break;
2120 case PSPA_IMP_WATCHPORTS:
2121 if (++actions->portwatch_count > TASK_MAX_WATCHPORT_COUNT) {
2122 ret = EINVAL;
2123 goto done;
2124 }
2125 break;
2126 case PSPA_REGISTERED_PORTS:
2127 if (++actions->registered_count > TASK_PORT_REGISTER_MAX) {
2128 ret = EINVAL;
2129 goto done;
2130 }
2131 break;
2132
2133 case PSPA_PTRAUTH_TASK_PORT:
2134 if (++ptrauth_task_port_count > 1) {
2135 ret = EINVAL;
2136 goto done;
2137 }
2138 break;
2139
2140 case PSPA_SUID_CRED:
2141 /* Only a single suid credential can be specified. */
2142 if (suid_cred_specified) {
2143 ret = EINVAL;
2144 goto done;
2145 }
2146 suid_cred_specified = TRUE;
2147 break;
2148
2149 default:
2150 ret = EINVAL;
2151 goto done;
2152 }
2153 }
2154
2155 if (actions->portwatch_count) {
2156 if (in_exec && task_has_watchport_boost) {
2157 ret = EINVAL;
2158 goto done;
2159 }
2160 actions->portwatch_array =
2161 kalloc(sizeof(ipc_port_t *) * actions->portwatch_count);
2162 if (actions->portwatch_array == NULL) {
2163 ret = ENOMEM;
2164 goto done;
2165 }
2166 bzero(actions->portwatch_array,
2167 sizeof(ipc_port_t *) * actions->portwatch_count);
2168 }
2169
2170 if (actions->registered_count) {
2171 actions->registered_array =
2172 kalloc(sizeof(ipc_port_t *) * actions->registered_count);
2173 if (actions->registered_array == NULL) {
2174 ret = ENOMEM;
2175 goto done;
2176 }
2177 bzero(actions->registered_array,
2178 sizeof(ipc_port_t *) * actions->registered_count);
2179 }
2180
2181 for (i = 0; i < pacts->pspa_count; i++) {
2182 act = &pacts->pspa_actions[i];
2183
2184 if (MACH_PORT_VALID(act->new_port)) {
2185 kr = ipc_object_copyin(get_task_ipcspace(current_task()),
2186 act->new_port, MACH_MSG_TYPE_COPY_SEND,
2187 (ipc_object_t *) &port, 0, NULL, IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND);
2188
2189 if (kr != KERN_SUCCESS) {
2190 ret = EINVAL;
2191 goto done;
2192 }
2193 } else {
2194 /* it's NULL or DEAD */
2195 port = CAST_MACH_NAME_TO_PORT(act->new_port);
2196 }
2197
2198 switch (act->port_type) {
2199 case PSPA_SPECIAL:
2200 kr = task_set_special_port(task, act->which, port);
2201
2202 if (kr != KERN_SUCCESS) {
2203 ret = EINVAL;
2204 }
2205 break;
2206
2207 case PSPA_EXCEPTION:
2208 kr = task_set_exception_ports(task, act->mask, port,
2209 act->behavior, act->flavor);
2210 if (kr != KERN_SUCCESS) {
2211 ret = EINVAL;
2212 }
2213 break;
2214 #if CONFIG_AUDIT
2215 case PSPA_AU_SESSION:
2216 ret = audit_session_spawnjoin(p, task, port);
2217 if (ret) {
2218 /* audit_session_spawnjoin() has already dropped the reference in case of error. */
2219 goto done;
2220 }
2221
2222 break;
2223 #endif
2224 case PSPA_IMP_WATCHPORTS:
2225 if (actions->portwatch_array) {
2226 /* hold on to this till end of spawn */
2227 actions->portwatch_array[portwatch_i++] = port;
2228 } else {
2229 ipc_port_release_send(port);
2230 }
2231 break;
2232 case PSPA_REGISTERED_PORTS:
2233 /* hold on to this till end of spawn */
2234 actions->registered_array[registered_i++] = port;
2235 break;
2236
2237 case PSPA_PTRAUTH_TASK_PORT:
2238 #if defined(HAS_APPLE_PAC)
2239 {
2240 task_t ptr_auth_task = convert_port_to_task(port);
2241
2242 if (ptr_auth_task == TASK_NULL) {
2243 ret = EINVAL;
2244 break;
2245 }
2246
2247 imgp->ip_inherited_shared_region_id =
2248 task_get_vm_shared_region_id_and_jop_pid(ptr_auth_task,
2249 &imgp->ip_inherited_jop_pid);
2250
2251 /* Deallocate task ref returned by convert_port_to_task */
2252 task_deallocate(ptr_auth_task);
2253 }
2254 #endif /* HAS_APPLE_PAC */
2255
2256 /* consume the port right in case of success */
2257 ipc_port_release_send(port);
2258 break;
2259
2260 case PSPA_SUID_CRED:
2261 imgp->ip_sc_port = port;
2262 break;
2263
2264 default:
2265 ret = EINVAL;
2266 break;
2267 }
2268
2269 if (ret) {
2270 /* action failed, so release port resources */
2271 ipc_port_release_send(port);
2272 break;
2273 }
2274 }
2275
2276 done:
2277 if (0 != ret) {
2278 DTRACE_PROC1(spawn__port__failure, mach_port_name_t, act->new_port);
2279 }
2280 return ret;
2281 }
2282
2283 /*
2284 * exec_handle_file_actions
2285 *
2286 * Description: Go through the _posix_file_actions_t contents applying the
2287 * open, close, and dup2 operations to the open file table for
2288 * the current process.
2289 *
2290 * Parameters: struct image_params * Image parameter block
2291 *
2292 * Returns: 0 Success
2293 * ???
2294 *
2295 * Note: Actions are applied in the order specified, with the credential
2296 * of the parent process. This is done to permit the parent
2297 * process to utilize POSIX_SPAWN_RESETIDS to drop privilege in
2298 * the child following operations the child may in fact not be
2299 * normally permitted to perform.
2300 */
2301 static int
2302 exec_handle_file_actions(struct image_params *imgp, short psa_flags)
2303 {
2304 int error = 0;
2305 int action;
2306 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
2307 _posix_spawn_file_actions_t px_sfap = imgp->ip_px_sfa;
2308 int ival[2]; /* dummy retval for system calls) */
2309 #if CONFIG_AUDIT
2310 struct uthread *uthread = get_bsdthread_info(current_thread());
2311 #endif
2312
2313 for (action = 0; action < px_sfap->psfa_act_count; action++) {
2314 _psfa_action_t *psfa = &px_sfap->psfa_act_acts[action];
2315
2316 switch (psfa->psfaa_type) {
2317 case PSFA_OPEN: {
2318 /*
2319 * Open is different, in that it requires the use of
2320 * a path argument, which is normally copied in from
2321 * user space; because of this, we have to support an
2322 * open from kernel space that passes an address space
2323 * context of UIO_SYSSPACE, and casts the address
2324 * argument to a user_addr_t.
2325 */
2326 char *bufp = NULL;
2327 struct vnode_attr *vap;
2328 struct nameidata *ndp;
2329 int mode = psfa->psfaa_openargs.psfao_mode;
2330 int origfd;
2331
2332 MALLOC(bufp, char *, sizeof(*vap) + sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
2333 if (bufp == NULL) {
2334 error = ENOMEM;
2335 break;
2336 }
2337
2338 vap = (struct vnode_attr *) bufp;
2339 ndp = (struct nameidata *) (bufp + sizeof(*vap));
2340
2341 VATTR_INIT(vap);
2342 /* Mask off all but regular access permissions */
2343 mode = ((mode & ~p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2344 VATTR_SET(vap, va_mode, mode & ACCESSPERMS);
2345
2346 AUDIT_SUBCALL_ENTER(OPEN, p, uthread);
2347
2348 NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
2349 CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path),
2350 imgp->ip_vfs_context);
2351
2352 error = open1(imgp->ip_vfs_context,
2353 ndp,
2354 psfa->psfaa_openargs.psfao_oflag,
2355 vap,
2356 fileproc_alloc_init, NULL,
2357 &origfd);
2358
2359 FREE(bufp, M_TEMP);
2360
2361 AUDIT_SUBCALL_EXIT(uthread, error);
2362
2363 /*
2364 * If there's an error, or we get the right fd by
2365 * accident, then drop out here. This is easier than
2366 * reworking all the open code to preallocate fd
2367 * slots, and internally taking one as an argument.
2368 */
2369 if (error || origfd == psfa->psfaa_filedes) {
2370 break;
2371 }
2372
2373 /*
2374 * If we didn't fall out from an error, we ended up
2375 * with the wrong fd; so now we've got to try to dup2
2376 * it to the right one.
2377 */
2378 AUDIT_SUBCALL_ENTER(DUP2, p, uthread);
2379 error = dup2(p, origfd, psfa->psfaa_filedes, ival);
2380 AUDIT_SUBCALL_EXIT(uthread, error);
2381 if (error) {
2382 break;
2383 }
2384
2385 /*
2386 * Finally, close the original fd.
2387 */
2388 AUDIT_SUBCALL_ENTER(CLOSE, p, uthread);
2389 error = close_nocancel(p, origfd);
2390 AUDIT_SUBCALL_EXIT(uthread, error);
2391 }
2392 break;
2393
2394 case PSFA_DUP2: {
2395 AUDIT_SUBCALL_ENTER(DUP2, p, uthread);
2396 error = dup2(p, psfa->psfaa_filedes,
2397 psfa->psfaa_dup2args.psfad_newfiledes, ival);
2398 AUDIT_SUBCALL_EXIT(uthread, error);
2399 }
2400 break;
2401
2402 case PSFA_FILEPORT_DUP2: {
2403 ipc_port_t port;
2404 kern_return_t kr;
2405 int origfd;
2406
2407 if (!MACH_PORT_VALID(psfa->psfaa_fileport)) {
2408 error = EINVAL;
2409 break;
2410 }
2411
2412 kr = ipc_object_copyin(get_task_ipcspace(current_task()),
2413 psfa->psfaa_fileport, MACH_MSG_TYPE_COPY_SEND,
2414 (ipc_object_t *) &port, 0, NULL, IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND);
2415
2416 if (kr != KERN_SUCCESS) {
2417 error = EINVAL;
2418 break;
2419 }
2420
2421 error = fileport_makefd(p, port, 0, &origfd);
2422
2423 if (IPC_PORT_NULL != port) {
2424 ipc_port_release_send(port);
2425 }
2426
2427 if (error || origfd == psfa->psfaa_dup2args.psfad_newfiledes) {
2428 break;
2429 }
2430
2431 AUDIT_SUBCALL_ENTER(DUP2, p, uthread);
2432 error = dup2(p, origfd,
2433 psfa->psfaa_dup2args.psfad_newfiledes, ival);
2434 AUDIT_SUBCALL_EXIT(uthread, error);
2435 if (error) {
2436 break;
2437 }
2438
2439 AUDIT_SUBCALL_ENTER(CLOSE, p, uthread);
2440 error = close_nocancel(p, origfd);
2441 AUDIT_SUBCALL_EXIT(uthread, error);
2442 }
2443 break;
2444
2445 case PSFA_CLOSE: {
2446 AUDIT_SUBCALL_ENTER(CLOSE, p, uthread);
2447 error = close_nocancel(p, psfa->psfaa_filedes);
2448 AUDIT_SUBCALL_EXIT(uthread, error);
2449 }
2450 break;
2451
2452 case PSFA_INHERIT: {
2453 struct fileproc *fp;
2454
2455 /*
2456 * Check to see if the descriptor exists, and
2457 * ensure it's -not- marked as close-on-exec.
2458 *
2459 * Attempting to "inherit" a guarded fd will
2460 * result in a error.
2461 */
2462
2463 proc_fdlock(p);
2464 if ((fp = fp_get_noref_locked(p, psfa->psfaa_filedes)) == NULL) {
2465 error = EBADF;
2466 } else if (FILEPROC_TYPE(fp) == FTYPE_GUARDED) {
2467 error = fp_guard_exception(p, psfa->psfaa_filedes,
2468 fp, kGUARD_EXC_NOCLOEXEC);
2469 } else {
2470 p->p_fd->fd_ofileflags[psfa->psfaa_filedes] &= ~UF_EXCLOSE;
2471 error = 0;
2472 }
2473 proc_fdunlock(p);
2474 }
2475 break;
2476
2477 case PSFA_CHDIR: {
2478 /*
2479 * Chdir is different, in that it requires the use of
2480 * a path argument, which is normally copied in from
2481 * user space; because of this, we have to support a
2482 * chdir from kernel space that passes an address space
2483 * context of UIO_SYSSPACE, and casts the address
2484 * argument to a user_addr_t.
2485 */
2486 struct nameidata *nd;
2487 nd = kheap_alloc(KHEAP_TEMP, sizeof(*nd), Z_WAITOK | Z_ZERO);
2488 if (nd == NULL) {
2489 error = ENOMEM;
2490 break;
2491 }
2492
2493 AUDIT_SUBCALL_ENTER(CHDIR, p, uthread);
2494 NDINIT(nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
2495 CAST_USER_ADDR_T(psfa->psfaa_chdirargs.psfac_path),
2496 imgp->ip_vfs_context);
2497
2498 error = chdir_internal(p, imgp->ip_vfs_context, nd, 0);
2499 kheap_free(KHEAP_TEMP, nd, sizeof(*nd));
2500 AUDIT_SUBCALL_EXIT(uthread, error);
2501 }
2502 break;
2503
2504 case PSFA_FCHDIR: {
2505 struct fchdir_args fchdira;
2506
2507 fchdira.fd = psfa->psfaa_filedes;
2508
2509 AUDIT_SUBCALL_ENTER(FCHDIR, p, uthread);
2510 error = fchdir(p, &fchdira, ival);
2511 AUDIT_SUBCALL_EXIT(uthread, error);
2512 }
2513 break;
2514
2515 default:
2516 error = EINVAL;
2517 break;
2518 }
2519
2520 /* All file actions failures are considered fatal, per POSIX */
2521
2522 if (error) {
2523 if (PSFA_OPEN == psfa->psfaa_type) {
2524 DTRACE_PROC1(spawn__open__failure, uintptr_t,
2525 psfa->psfaa_openargs.psfao_path);
2526 } else {
2527 DTRACE_PROC1(spawn__fd__failure, int, psfa->psfaa_filedes);
2528 }
2529 break;
2530 }
2531 }
2532
2533 if (error != 0 || (psa_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) == 0) {
2534 return error;
2535 }
2536
2537 /*
2538 * If POSIX_SPAWN_CLOEXEC_DEFAULT is set, behave (during
2539 * this spawn only) as if "close on exec" is the default
2540 * disposition of all pre-existing file descriptors. In this case,
2541 * the list of file descriptors mentioned in the file actions
2542 * are the only ones that can be inherited, so mark them now.
2543 *
2544 * The actual closing part comes later, in fdexec().
2545 */
2546 proc_fdlock(p);
2547 for (action = 0; action < px_sfap->psfa_act_count; action++) {
2548 _psfa_action_t *psfa = &px_sfap->psfa_act_acts[action];
2549 int fd = psfa->psfaa_filedes;
2550
2551 switch (psfa->psfaa_type) {
2552 case PSFA_DUP2:
2553 case PSFA_FILEPORT_DUP2:
2554 fd = psfa->psfaa_dup2args.psfad_newfiledes;
2555 OS_FALLTHROUGH;
2556 case PSFA_OPEN:
2557 case PSFA_INHERIT:
2558 *fdflags(p, fd) |= UF_INHERIT;
2559 break;
2560
2561 case PSFA_CLOSE:
2562 case PSFA_CHDIR:
2563 case PSFA_FCHDIR:
2564 /*
2565 * Although PSFA_FCHDIR does have a file descriptor, it is not
2566 * *creating* one, thus we do not automatically mark it for
2567 * inheritance under POSIX_SPAWN_CLOEXEC_DEFAULT. A client that
2568 * wishes it to be inherited should use the PSFA_INHERIT action
2569 * explicitly.
2570 */
2571 break;
2572 }
2573 }
2574 proc_fdunlock(p);
2575
2576 return 0;
2577 }
2578
2579 #if CONFIG_MACF
2580 /*
2581 * exec_spawnattr_getmacpolicyinfo
2582 */
2583 void *
2584 exec_spawnattr_getmacpolicyinfo(const void *macextensions, const char *policyname, size_t *lenp)
2585 {
2586 const struct _posix_spawn_mac_policy_extensions *psmx = macextensions;
2587 int i;
2588
2589 if (psmx == NULL) {
2590 return NULL;
2591 }
2592
2593 for (i = 0; i < psmx->psmx_count; i++) {
2594 const _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
2595 if (strncmp(extension->policyname, policyname, sizeof(extension->policyname)) == 0) {
2596 if (lenp != NULL) {
2597 *lenp = (size_t)extension->datalen;
2598 }
2599 return extension->datap;
2600 }
2601 }
2602
2603 if (lenp != NULL) {
2604 *lenp = 0;
2605 }
2606 return NULL;
2607 }
2608
2609 static int
2610 spawn_copyin_macpolicyinfo(const struct user__posix_spawn_args_desc *px_args, _posix_spawn_mac_policy_extensions_t *psmxp)
2611 {
2612 _posix_spawn_mac_policy_extensions_t psmx = NULL;
2613 int error = 0;
2614 int copycnt = 0;
2615 int i = 0;
2616
2617 *psmxp = NULL;
2618
2619 if (px_args->mac_extensions_size < PS_MAC_EXTENSIONS_SIZE(1) ||
2620 px_args->mac_extensions_size > PAGE_SIZE) {
2621 error = EINVAL;
2622 goto bad;
2623 }
2624
2625 MALLOC(psmx, _posix_spawn_mac_policy_extensions_t, px_args->mac_extensions_size, M_TEMP, M_WAITOK);
2626 if ((error = copyin(px_args->mac_extensions, psmx, px_args->mac_extensions_size)) != 0) {
2627 goto bad;
2628 }
2629
2630 size_t extsize = PS_MAC_EXTENSIONS_SIZE(psmx->psmx_count);
2631 if (extsize == 0 || extsize > px_args->mac_extensions_size) {
2632 error = EINVAL;
2633 goto bad;
2634 }
2635
2636 for (i = 0; i < psmx->psmx_count; i++) {
2637 _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
2638 if (extension->datalen == 0 || extension->datalen > PAGE_SIZE) {
2639 error = EINVAL;
2640 goto bad;
2641 }
2642 }
2643
2644 for (copycnt = 0; copycnt < psmx->psmx_count; copycnt++) {
2645 _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[copycnt];
2646 void *data = NULL;
2647
2648 #if !__LP64__
2649 if (extension->data > UINT32_MAX) {
2650 goto bad;
2651 }
2652 #endif
2653 MALLOC(data, void *, (size_t)extension->datalen, M_TEMP, M_WAITOK);
2654 if ((error = copyin((user_addr_t)extension->data, data, (size_t)extension->datalen)) != 0) {
2655 FREE(data, M_TEMP);
2656 goto bad;
2657 }
2658 extension->datap = data;
2659 }
2660
2661 *psmxp = psmx;
2662 return 0;
2663
2664 bad:
2665 if (psmx != NULL) {
2666 for (i = 0; i < copycnt; i++) {
2667 FREE(psmx->psmx_extensions[i].datap, M_TEMP);
2668 }
2669 FREE(psmx, M_TEMP);
2670 }
2671 return error;
2672 }
2673
2674 static void
2675 spawn_free_macpolicyinfo(_posix_spawn_mac_policy_extensions_t psmx)
2676 {
2677 int i;
2678
2679 if (psmx == NULL) {
2680 return;
2681 }
2682 for (i = 0; i < psmx->psmx_count; i++) {
2683 FREE(psmx->psmx_extensions[i].datap, M_TEMP);
2684 }
2685 FREE(psmx, M_TEMP);
2686 }
2687 #endif /* CONFIG_MACF */
2688
2689 #if CONFIG_COALITIONS
2690 static inline void
2691 spawn_coalitions_release_all(coalition_t coal[COALITION_NUM_TYPES])
2692 {
2693 for (int c = 0; c < COALITION_NUM_TYPES; c++) {
2694 if (coal[c]) {
2695 coalition_remove_active(coal[c]);
2696 coalition_release(coal[c]);
2697 }
2698 }
2699 }
2700 #endif
2701
2702 #if CONFIG_PERSONAS
2703 static int
2704 spawn_validate_persona(struct _posix_spawn_persona_info *px_persona)
2705 {
2706 int error = 0;
2707 struct persona *persona = NULL;
2708 int verify = px_persona->pspi_flags & POSIX_SPAWN_PERSONA_FLAGS_VERIFY;
2709
2710 if (!IOTaskHasEntitlement(current_task(), PERSONA_MGMT_ENTITLEMENT)) {
2711 return EPERM;
2712 }
2713
2714 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) {
2715 if (px_persona->pspi_ngroups > NGROUPS_MAX) {
2716 return EINVAL;
2717 }
2718 }
2719
2720 persona = persona_lookup(px_persona->pspi_id);
2721 if (!persona) {
2722 error = ESRCH;
2723 goto out;
2724 }
2725
2726 if (verify) {
2727 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_UID) {
2728 if (px_persona->pspi_uid != persona_get_uid(persona)) {
2729 error = EINVAL;
2730 goto out;
2731 }
2732 }
2733 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GID) {
2734 if (px_persona->pspi_gid != persona_get_gid(persona)) {
2735 error = EINVAL;
2736 goto out;
2737 }
2738 }
2739 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) {
2740 size_t ngroups = 0;
2741 gid_t groups[NGROUPS_MAX];
2742
2743 if (persona_get_groups(persona, &ngroups, groups,
2744 px_persona->pspi_ngroups) != 0) {
2745 error = EINVAL;
2746 goto out;
2747 }
2748 if (ngroups != px_persona->pspi_ngroups) {
2749 error = EINVAL;
2750 goto out;
2751 }
2752 while (ngroups--) {
2753 if (px_persona->pspi_groups[ngroups] != groups[ngroups]) {
2754 error = EINVAL;
2755 goto out;
2756 }
2757 }
2758 if (px_persona->pspi_gmuid != persona_get_gmuid(persona)) {
2759 error = EINVAL;
2760 goto out;
2761 }
2762 }
2763 }
2764
2765 out:
2766 if (persona) {
2767 persona_put(persona);
2768 }
2769
2770 return error;
2771 }
2772
2773 static int
2774 spawn_persona_adopt(proc_t p, struct _posix_spawn_persona_info *px_persona)
2775 {
2776 int ret;
2777 kauth_cred_t cred;
2778 struct persona *persona = NULL;
2779 int override = !!(px_persona->pspi_flags & POSIX_SPAWN_PERSONA_FLAGS_OVERRIDE);
2780
2781 if (!override) {
2782 return persona_proc_adopt_id(p, px_persona->pspi_id, NULL);
2783 }
2784
2785 /*
2786 * we want to spawn into the given persona, but we want to override
2787 * the kauth with a different UID/GID combo
2788 */
2789 persona = persona_lookup(px_persona->pspi_id);
2790 if (!persona) {
2791 return ESRCH;
2792 }
2793
2794 cred = persona_get_cred(persona);
2795 if (!cred) {
2796 ret = EINVAL;
2797 goto out;
2798 }
2799
2800 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_UID) {
2801 cred = kauth_cred_setresuid(cred,
2802 px_persona->pspi_uid,
2803 px_persona->pspi_uid,
2804 px_persona->pspi_uid,
2805 KAUTH_UID_NONE);
2806 }
2807
2808 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GID) {
2809 cred = kauth_cred_setresgid(cred,
2810 px_persona->pspi_gid,
2811 px_persona->pspi_gid,
2812 px_persona->pspi_gid);
2813 }
2814
2815 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) {
2816 cred = kauth_cred_setgroups(cred,
2817 px_persona->pspi_groups,
2818 px_persona->pspi_ngroups,
2819 px_persona->pspi_gmuid);
2820 }
2821
2822 ret = persona_proc_adopt(p, persona, cred);
2823
2824 out:
2825 persona_put(persona);
2826 return ret;
2827 }
2828 #endif
2829
2830 #if __arm64__
2831 extern int legacy_footprint_entitlement_mode;
2832 static inline void
2833 proc_legacy_footprint_entitled(proc_t p, task_t task)
2834 {
2835 #pragma unused(p)
2836 boolean_t legacy_footprint_entitled;
2837
2838 switch (legacy_footprint_entitlement_mode) {
2839 case LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE:
2840 /* the entitlement is ignored */
2841 break;
2842 case LEGACY_FOOTPRINT_ENTITLEMENT_IOS11_ACCT:
2843 /* the entitlement grants iOS11 legacy accounting */
2844 legacy_footprint_entitled = IOTaskHasEntitlement(task,
2845 "com.apple.private.memory.legacy_footprint");
2846 if (legacy_footprint_entitled) {
2847 task_set_legacy_footprint(task);
2848 }
2849 break;
2850 case LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE:
2851 /* the entitlement grants a footprint limit increase */
2852 legacy_footprint_entitled = IOTaskHasEntitlement(task,
2853 "com.apple.private.memory.legacy_footprint");
2854 if (legacy_footprint_entitled) {
2855 task_set_extra_footprint_limit(task);
2856 }
2857 break;
2858 default:
2859 break;
2860 }
2861 }
2862
2863 static inline void
2864 proc_ios13extended_footprint_entitled(proc_t p, task_t task)
2865 {
2866 #pragma unused(p)
2867 boolean_t ios13extended_footprint_entitled;
2868
2869 /* the entitlement grants a footprint limit increase */
2870 ios13extended_footprint_entitled = IOTaskHasEntitlement(task,
2871 "com.apple.developer.memory.ios13extended_footprint");
2872 if (ios13extended_footprint_entitled) {
2873 task_set_ios13extended_footprint_limit(task);
2874 }
2875 }
2876 static inline void
2877 proc_increased_memory_limit_entitled(proc_t p, task_t task)
2878 {
2879 static const char kIncreasedMemoryLimitEntitlement[] = "com.apple.developer.kernel.increased-memory-limit";
2880 bool entitled = false;
2881
2882 entitled = IOTaskHasEntitlement(task, kIncreasedMemoryLimitEntitlement);
2883 if (entitled) {
2884 memorystatus_act_on_entitled_task_limit(p);
2885 }
2886 }
2887
2888 /*
2889 * Check for any of the various entitlements that permit a higher
2890 * task footprint limit or alternate accounting and apply them.
2891 */
2892 static inline void
2893 proc_footprint_entitlement_hacks(proc_t p, task_t task)
2894 {
2895 proc_legacy_footprint_entitled(p, task);
2896 proc_ios13extended_footprint_entitled(p, task);
2897 proc_increased_memory_limit_entitled(p, task);
2898 }
2899 #endif /* __arm64__ */
2900
2901 #if CONFIG_MACF
2902 /*
2903 * Processes with certain entitlements are granted a jumbo-size VM map.
2904 */
2905 static inline void
2906 proc_apply_jit_and_jumbo_va_policies(proc_t p, task_t task)
2907 {
2908 bool jit_entitled;
2909 jit_entitled = (mac_proc_check_map_anon(p, 0, 0, 0, MAP_JIT, NULL) == 0);
2910 if (jit_entitled || (IOTaskHasEntitlement(task,
2911 "com.apple.developer.kernel.extended-virtual-addressing"))) {
2912 vm_map_set_jumbo(get_task_map(task));
2913 if (jit_entitled) {
2914 vm_map_set_jit_entitled(get_task_map(task));
2915 }
2916 }
2917 }
2918 #endif /* CONFIG_MACF */
2919
2920 /*
2921 * Apply a modification on the proc's kauth cred until it converges.
2922 *
2923 * `update` consumes its argument to return a new kauth cred.
2924 */
2925 static void
2926 apply_kauth_cred_update(proc_t p,
2927 kauth_cred_t (^update)(kauth_cred_t orig_cred))
2928 {
2929 kauth_cred_t my_cred, my_new_cred;
2930
2931 my_cred = kauth_cred_proc_ref(p);
2932 for (;;) {
2933 my_new_cred = update(my_cred);
2934 if (my_cred == my_new_cred) {
2935 kauth_cred_unref(&my_new_cred);
2936 break;
2937 }
2938
2939 /* try update cred on proc */
2940 proc_ucred_lock(p);
2941
2942 if (p->p_ucred == my_cred) {
2943 /* base pointer didn't change, donate our ref */
2944 p->p_ucred = my_new_cred;
2945 PROC_UPDATE_CREDS_ONPROC(p);
2946 proc_ucred_unlock(p);
2947
2948 /* drop p->p_ucred reference */
2949 kauth_cred_unref(&my_cred);
2950 break;
2951 }
2952
2953 /* base pointer changed, retry */
2954 my_cred = p->p_ucred;
2955 kauth_cred_ref(my_cred);
2956 proc_ucred_unlock(p);
2957
2958 kauth_cred_unref(&my_new_cred);
2959 }
2960 }
2961
2962 static int
2963 spawn_posix_cred_adopt(proc_t p,
2964 struct _posix_spawn_posix_cred_info *px_pcred_info)
2965 {
2966 int error = 0;
2967
2968 if (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_GID) {
2969 struct setgid_args args = {
2970 .gid = px_pcred_info->pspci_gid,
2971 };
2972 error = setgid(p, &args, NULL);
2973 if (error) {
2974 return error;
2975 }
2976 }
2977
2978 if (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_GROUPS) {
2979 error = setgroups_internal(p,
2980 px_pcred_info->pspci_ngroups,
2981 px_pcred_info->pspci_groups,
2982 px_pcred_info->pspci_gmuid);
2983 if (error) {
2984 return error;
2985 }
2986 }
2987
2988 if (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_UID) {
2989 struct setuid_args args = {
2990 .uid = px_pcred_info->pspci_uid,
2991 };
2992 error = setuid(p, &args, NULL);
2993 if (error) {
2994 return error;
2995 }
2996 }
2997 return 0;
2998 }
2999
3000 /*
3001 * posix_spawn
3002 *
3003 * Parameters: uap->pid Pointer to pid return area
3004 * uap->fname File name to exec
3005 * uap->argp Argument list
3006 * uap->envp Environment list
3007 *
3008 * Returns: 0 Success
3009 * EINVAL Invalid argument
3010 * ENOTSUP Not supported
3011 * ENOEXEC Executable file format error
3012 * exec_activate_image:EINVAL Invalid argument
3013 * exec_activate_image:EACCES Permission denied
3014 * exec_activate_image:EINTR Interrupted function
3015 * exec_activate_image:ENOMEM Not enough space
3016 * exec_activate_image:EFAULT Bad address
3017 * exec_activate_image:ENAMETOOLONG Filename too long
3018 * exec_activate_image:ENOEXEC Executable file format error
3019 * exec_activate_image:ETXTBSY Text file busy [misuse of error code]
3020 * exec_activate_image:EAUTH Image decryption failed
3021 * exec_activate_image:EBADEXEC The executable is corrupt/unknown
3022 * exec_activate_image:???
3023 * mac_execve_enter:???
3024 *
3025 * TODO: Expect to need __mac_posix_spawn() at some point...
3026 * Handle posix_spawnattr_t
3027 * Handle posix_spawn_file_actions_t
3028 */
3029 int
3030 posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
3031 {
3032 proc_t p = ap; /* quiet bogus GCC vfork() warning */
3033 user_addr_t pid = uap->pid;
3034 int ival[2]; /* dummy retval for setpgid() */
3035 char *bufp = NULL;
3036 char *subsystem_root_path = NULL;
3037 struct image_params *imgp;
3038 struct vnode_attr *vap;
3039 struct vnode_attr *origvap;
3040 struct uthread *uthread = 0; /* compiler complains if not set to 0*/
3041 int error, sig;
3042 int is_64 = IS_64BIT_PROCESS(p);
3043 struct vfs_context context;
3044 struct user__posix_spawn_args_desc px_args;
3045 struct _posix_spawnattr px_sa;
3046 _posix_spawn_file_actions_t px_sfap = NULL;
3047 _posix_spawn_port_actions_t px_spap = NULL;
3048 struct __kern_sigaction vec;
3049 boolean_t spawn_no_exec = FALSE;
3050 boolean_t proc_transit_set = TRUE;
3051 boolean_t exec_done = FALSE;
3052 struct exec_port_actions port_actions = { };
3053 vm_size_t px_sa_offset = offsetof(struct _posix_spawnattr, psa_ports);
3054 task_t old_task = current_task();
3055 task_t new_task = NULL;
3056 boolean_t should_release_proc_ref = FALSE;
3057 void *inherit = NULL;
3058 #if CONFIG_PERSONAS
3059 struct _posix_spawn_persona_info *px_persona = NULL;
3060 #endif
3061 struct _posix_spawn_posix_cred_info *px_pcred_info = NULL;
3062
3063 /*
3064 * Allocate a big chunk for locals instead of using stack since these
3065 * structures are pretty big.
3066 */
3067 MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
3068 imgp = (struct image_params *) bufp;
3069 if (bufp == NULL) {
3070 error = ENOMEM;
3071 goto bad;
3072 }
3073 vap = (struct vnode_attr *) (bufp + sizeof(*imgp));
3074 origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap));
3075
3076 /* Initialize the common data in the image_params structure */
3077 imgp->ip_user_fname = uap->path;
3078 imgp->ip_user_argv = uap->argv;
3079 imgp->ip_user_envv = uap->envp;
3080 imgp->ip_vattr = vap;
3081 imgp->ip_origvattr = origvap;
3082 imgp->ip_vfs_context = &context;
3083 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT_ADDR : IMGPF_NONE);
3084 imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
3085 imgp->ip_mac_return = 0;
3086 imgp->ip_px_persona = NULL;
3087 imgp->ip_px_pcred_info = NULL;
3088 imgp->ip_cs_error = OS_REASON_NULL;
3089 imgp->ip_simulator_binary = IMGPF_SB_DEFAULT;
3090 imgp->ip_subsystem_root_path = NULL;
3091 imgp->ip_inherited_shared_region_id = NULL;
3092 imgp->ip_inherited_jop_pid = 0;
3093
3094 if (uap->adesc != USER_ADDR_NULL) {
3095 if (is_64) {
3096 error = copyin(uap->adesc, &px_args, sizeof(px_args));
3097 } else {
3098 struct user32__posix_spawn_args_desc px_args32;
3099
3100 error = copyin(uap->adesc, &px_args32, sizeof(px_args32));
3101
3102 /*
3103 * Convert arguments descriptor from external 32 bit
3104 * representation to internal 64 bit representation
3105 */
3106 px_args.attr_size = px_args32.attr_size;
3107 px_args.attrp = CAST_USER_ADDR_T(px_args32.attrp);
3108 px_args.file_actions_size = px_args32.file_actions_size;
3109 px_args.file_actions = CAST_USER_ADDR_T(px_args32.file_actions);
3110 px_args.port_actions_size = px_args32.port_actions_size;
3111 px_args.port_actions = CAST_USER_ADDR_T(px_args32.port_actions);
3112 px_args.mac_extensions_size = px_args32.mac_extensions_size;
3113 px_args.mac_extensions = CAST_USER_ADDR_T(px_args32.mac_extensions);
3114 px_args.coal_info_size = px_args32.coal_info_size;
3115 px_args.coal_info = CAST_USER_ADDR_T(px_args32.coal_info);
3116 px_args.persona_info_size = px_args32.persona_info_size;
3117 px_args.persona_info = CAST_USER_ADDR_T(px_args32.persona_info);
3118 px_args.posix_cred_info_size = px_args32.posix_cred_info_size;
3119 px_args.posix_cred_info = CAST_USER_ADDR_T(px_args32.posix_cred_info);
3120 px_args.subsystem_root_path_size = px_args32.subsystem_root_path_size;
3121 px_args.subsystem_root_path = CAST_USER_ADDR_T(px_args32.subsystem_root_path);
3122 }
3123 if (error) {
3124 goto bad;
3125 }
3126
3127 if (px_args.attr_size != 0) {
3128 /*
3129 * We are not copying the port_actions pointer,
3130 * because we already have it from px_args.
3131 * This is a bit fragile: <rdar://problem/16427422>
3132 */
3133
3134 if ((error = copyin(px_args.attrp, &px_sa, px_sa_offset)) != 0) {
3135 goto bad;
3136 }
3137
3138 bzero((void *)((unsigned long) &px_sa + px_sa_offset), sizeof(px_sa) - px_sa_offset );
3139
3140 imgp->ip_px_sa = &px_sa;
3141 }
3142 if (px_args.file_actions_size != 0) {
3143 /* Limit file_actions to allowed number of open files */
3144 rlim_t maxfa = (p->p_limit ? MIN(proc_limitgetcur(p, RLIMIT_NOFILE, TRUE), maxfilesperproc) : NOFILE);
3145 size_t maxfa_size = PSF_ACTIONS_SIZE(maxfa);
3146 if (px_args.file_actions_size < PSF_ACTIONS_SIZE(1) ||
3147 maxfa_size == 0 || px_args.file_actions_size > maxfa_size) {
3148 error = EINVAL;
3149 goto bad;
3150 }
3151 MALLOC(px_sfap, _posix_spawn_file_actions_t, px_args.file_actions_size, M_TEMP, M_WAITOK);
3152 if (px_sfap == NULL) {
3153 error = ENOMEM;
3154 goto bad;
3155 }
3156 imgp->ip_px_sfa = px_sfap;
3157
3158 if ((error = copyin(px_args.file_actions, px_sfap,
3159 px_args.file_actions_size)) != 0) {
3160 goto bad;
3161 }
3162
3163 /* Verify that the action count matches the struct size */
3164 size_t psfsize = PSF_ACTIONS_SIZE(px_sfap->psfa_act_count);
3165 if (psfsize == 0 || psfsize != px_args.file_actions_size) {
3166 error = EINVAL;
3167 goto bad;
3168 }
3169 }
3170 if (px_args.port_actions_size != 0) {
3171 /* Limit port_actions to one page of data */
3172 if (px_args.port_actions_size < PS_PORT_ACTIONS_SIZE(1) ||
3173 px_args.port_actions_size > PAGE_SIZE) {
3174 error = EINVAL;
3175 goto bad;
3176 }
3177
3178 MALLOC(px_spap, _posix_spawn_port_actions_t,
3179 px_args.port_actions_size, M_TEMP, M_WAITOK);
3180 if (px_spap == NULL) {
3181 error = ENOMEM;
3182 goto bad;
3183 }
3184 imgp->ip_px_spa = px_spap;
3185
3186 if ((error = copyin(px_args.port_actions, px_spap,
3187 px_args.port_actions_size)) != 0) {
3188 goto bad;
3189 }
3190
3191 /* Verify that the action count matches the struct size */
3192 size_t pasize = PS_PORT_ACTIONS_SIZE(px_spap->pspa_count);
3193 if (pasize == 0 || pasize != px_args.port_actions_size) {
3194 error = EINVAL;
3195 goto bad;
3196 }
3197 }
3198 #if CONFIG_PERSONAS
3199 /* copy in the persona info */
3200 if (px_args.persona_info_size != 0 && px_args.persona_info != 0) {
3201 /* for now, we need the exact same struct in user space */
3202 if (px_args.persona_info_size != sizeof(*px_persona)) {
3203 error = ERANGE;
3204 goto bad;
3205 }
3206
3207 MALLOC(px_persona, struct _posix_spawn_persona_info *, px_args.persona_info_size, M_TEMP, M_WAITOK | M_ZERO);
3208 if (px_persona == NULL) {
3209 error = ENOMEM;
3210 goto bad;
3211 }
3212 imgp->ip_px_persona = px_persona;
3213
3214 if ((error = copyin(px_args.persona_info, px_persona,
3215 px_args.persona_info_size)) != 0) {
3216 goto bad;
3217 }
3218 if ((error = spawn_validate_persona(px_persona)) != 0) {
3219 goto bad;
3220 }
3221 }
3222 #endif
3223 /* copy in the posix cred info */
3224 if (px_args.posix_cred_info_size != 0 && px_args.posix_cred_info != 0) {
3225 /* for now, we need the exact same struct in user space */
3226 if (px_args.posix_cred_info_size != sizeof(*px_pcred_info)) {
3227 error = ERANGE;
3228 goto bad;
3229 }
3230
3231 if (!kauth_cred_issuser(kauth_cred_get())) {
3232 error = EPERM;
3233 goto bad;
3234 }
3235
3236 MALLOC(px_pcred_info, struct _posix_spawn_posix_cred_info *,
3237 px_args.posix_cred_info_size, M_TEMP, M_WAITOK | M_ZERO);
3238 if (px_pcred_info == NULL) {
3239 error = ENOMEM;
3240 goto bad;
3241 }
3242 imgp->ip_px_pcred_info = px_pcred_info;
3243
3244 if ((error = copyin(px_args.posix_cred_info, px_pcred_info,
3245 px_args.posix_cred_info_size)) != 0) {
3246 goto bad;
3247 }
3248
3249 if (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_GROUPS) {
3250 if (px_pcred_info->pspci_ngroups > NGROUPS_MAX) {
3251 error = EINVAL;
3252 goto bad;
3253 }
3254 }
3255 }
3256 #if CONFIG_MACF
3257 if (px_args.mac_extensions_size != 0) {
3258 if ((error = spawn_copyin_macpolicyinfo(&px_args, (_posix_spawn_mac_policy_extensions_t *)&imgp->ip_px_smpx)) != 0) {
3259 goto bad;
3260 }
3261 }
3262 #endif /* CONFIG_MACF */
3263 if ((px_args.subsystem_root_path_size > 0) && (px_args.subsystem_root_path_size <= MAXPATHLEN)) {
3264 /*
3265 * If a valid-looking subsystem root has been
3266 * specified...
3267 */
3268 if (IOTaskHasEntitlement(old_task, SPAWN_SUBSYSTEM_ROOT_ENTITLEMENT)) {
3269 /*
3270 * ...AND the parent has the entitlement, copy
3271 * the subsystem root path in.
3272 */
3273 MALLOC(subsystem_root_path, char *, px_args.subsystem_root_path_size, M_SBUF, M_WAITOK | M_ZERO | M_NULL);
3274
3275 if (subsystem_root_path == NULL) {
3276 error = ENOMEM;
3277 goto bad;
3278 }
3279
3280 if ((error = copyin(px_args.subsystem_root_path, subsystem_root_path, px_args.subsystem_root_path_size))) {
3281 goto bad;
3282 }
3283
3284 /* Paranoia */
3285 subsystem_root_path[px_args.subsystem_root_path_size - 1] = 0;
3286 }
3287 }
3288 }
3289
3290 /* set uthread to parent */
3291 uthread = get_bsdthread_info(current_thread());
3292
3293 /*
3294 * <rdar://6640530>; this does not result in a behaviour change
3295 * relative to Leopard, so there should not be any existing code
3296 * which depends on it.
3297 */
3298 if (uthread->uu_flag & UT_VFORK) {
3299 error = EINVAL;
3300 goto bad;
3301 }
3302
3303 if (imgp->ip_px_sa != NULL) {
3304 struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
3305 if ((psa->psa_options & PSA_OPTION_PLUGIN_HOST_DISABLE_A_KEYS) == PSA_OPTION_PLUGIN_HOST_DISABLE_A_KEYS) {
3306 imgp->ip_flags |= IMGPF_PLUGIN_HOST_DISABLE_A_KEYS;
3307 }
3308
3309 if ((error = exec_validate_spawnattr_policy(psa->psa_apptype)) != 0) {
3310 goto bad;
3311 }
3312 }
3313
3314 /*
3315 * If we don't have the extension flag that turns "posix_spawn()"
3316 * into "execve() with options", then we will be creating a new
3317 * process which does not inherit memory from the parent process,
3318 * which is one of the most expensive things about using fork()
3319 * and execve().
3320 */
3321 if (imgp->ip_px_sa == NULL || !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)) {
3322 /* Set the new task's coalition, if it is requested. */
3323 coalition_t coal[COALITION_NUM_TYPES] = { COALITION_NULL };
3324 #if CONFIG_COALITIONS
3325 int i, ncoals;
3326 kern_return_t kr = KERN_SUCCESS;
3327 struct _posix_spawn_coalition_info coal_info;
3328 int coal_role[COALITION_NUM_TYPES];
3329
3330 if (imgp->ip_px_sa == NULL || !px_args.coal_info) {
3331 goto do_fork1;
3332 }
3333
3334 memset(&coal_info, 0, sizeof(coal_info));
3335
3336 if (px_args.coal_info_size > sizeof(coal_info)) {
3337 px_args.coal_info_size = sizeof(coal_info);
3338 }
3339 error = copyin(px_args.coal_info,
3340 &coal_info, px_args.coal_info_size);
3341 if (error != 0) {
3342 goto bad;
3343 }
3344
3345 ncoals = 0;
3346 for (i = 0; i < COALITION_NUM_TYPES; i++) {
3347 uint64_t cid = coal_info.psci_info[i].psci_id;
3348 if (cid != 0) {
3349 /*
3350 * don't allow tasks which are not in a
3351 * privileged coalition to spawn processes
3352 * into coalitions other than their own
3353 */
3354 if (!task_is_in_privileged_coalition(p->task, i) &&
3355 !IOTaskHasEntitlement(p->task, COALITION_SPAWN_ENTITLEMENT)) {
3356 coal_dbg("ERROR: %d not in privilegd "
3357 "coalition of type %d",
3358 p->p_pid, i);
3359 spawn_coalitions_release_all(coal);
3360 error = EPERM;
3361 goto bad;
3362 }
3363
3364 coal_dbg("searching for coalition id:%llu", cid);
3365 /*
3366 * take a reference and activation on the
3367 * coalition to guard against free-while-spawn
3368 * races
3369 */
3370 coal[i] = coalition_find_and_activate_by_id(cid);
3371 if (coal[i] == COALITION_NULL) {
3372 coal_dbg("could not find coalition id:%llu "
3373 "(perhaps it has been terminated or reaped)", cid);
3374 /*
3375 * release any other coalition's we
3376 * may have a reference to
3377 */
3378 spawn_coalitions_release_all(coal);
3379 error = ESRCH;
3380 goto bad;
3381 }
3382 if (coalition_type(coal[i]) != i) {
3383 coal_dbg("coalition with id:%lld is not of type:%d"
3384 " (it's type:%d)", cid, i, coalition_type(coal[i]));
3385 error = ESRCH;
3386 goto bad;
3387 }
3388 coal_role[i] = coal_info.psci_info[i].psci_role;
3389 ncoals++;
3390 }
3391 }
3392 if (ncoals < COALITION_NUM_TYPES) {
3393 /*
3394 * If the user is attempting to spawn into a subset of
3395 * the known coalition types, then make sure they have
3396 * _at_least_ specified a resource coalition. If not,
3397 * the following fork1() call will implicitly force an
3398 * inheritance from 'p' and won't actually spawn the
3399 * new task into the coalitions the user specified.
3400 * (also the call to coalitions_set_roles will panic)
3401 */
3402 if (coal[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
3403 spawn_coalitions_release_all(coal);
3404 error = EINVAL;
3405 goto bad;
3406 }
3407 }
3408 do_fork1:
3409 #endif /* CONFIG_COALITIONS */
3410
3411 /*
3412 * note that this will implicitly inherit the
3413 * caller's persona (if it exists)
3414 */
3415 error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN, coal);
3416 /* returns a thread and task reference */
3417
3418 if (error == 0) {
3419 new_task = get_threadtask(imgp->ip_new_thread);
3420 }
3421 #if CONFIG_COALITIONS
3422 /* set the roles of this task within each given coalition */
3423 if (error == 0) {
3424 kr = coalitions_set_roles(coal, new_task, coal_role);
3425 if (kr != KERN_SUCCESS) {
3426 error = EINVAL;
3427 }
3428 if (kdebug_debugid_enabled(MACHDBG_CODE(DBG_MACH_COALITION,
3429 MACH_COALITION_ADOPT))) {
3430 for (i = 0; i < COALITION_NUM_TYPES; i++) {
3431 if (coal[i] != COALITION_NULL) {
3432 /*
3433 * On 32-bit targets, uniqueid
3434 * will get truncated to 32 bits
3435 */
3436 KDBG_RELEASE(MACHDBG_CODE(
3437 DBG_MACH_COALITION,
3438 MACH_COALITION_ADOPT),
3439 coalition_id(coal[i]),
3440 get_task_uniqueid(new_task));
3441 }
3442 }
3443 }
3444 }
3445
3446 /* drop our references and activations - fork1() now holds them */
3447 spawn_coalitions_release_all(coal);
3448 #endif /* CONFIG_COALITIONS */
3449 if (error != 0) {
3450 goto bad;
3451 }
3452 imgp->ip_flags |= IMGPF_SPAWN; /* spawn w/o exec */
3453 spawn_no_exec = TRUE; /* used in later tests */
3454 } else {
3455 /*
3456 * For execve case, create a new task and thread
3457 * which points to current_proc. The current_proc will point
3458 * to the new task after image activation and proc ref drain.
3459 *
3460 * proc (current_proc) <----- old_task (current_task)
3461 * ^ | ^
3462 * | | |
3463 * | ----------------------------------
3464 * |
3465 * --------- new_task (task marked as TF_EXEC_COPY)
3466 *
3467 * After image activation, the proc will point to the new task
3468 * and would look like following.
3469 *
3470 * proc (current_proc) <----- old_task (current_task, marked as TPF_DID_EXEC)
3471 * ^ |
3472 * | |
3473 * | ----------> new_task
3474 * | |
3475 * -----------------
3476 *
3477 * During exec any transition from new_task -> proc is fine, but don't allow
3478 * transition from proc->task, since it will modify old_task.
3479 */
3480 imgp->ip_new_thread = fork_create_child(old_task,
3481 NULL,
3482 p,
3483 FALSE,
3484 p->p_flag & P_LP64,
3485 task_get_64bit_data(old_task),
3486 TRUE);
3487 /* task and thread ref returned by fork_create_child */
3488 if (imgp->ip_new_thread == NULL) {
3489 error = ENOMEM;
3490 goto bad;
3491 }
3492
3493 new_task = get_threadtask(imgp->ip_new_thread);
3494 imgp->ip_flags |= IMGPF_EXEC;
3495 }
3496
3497 if (spawn_no_exec) {
3498 p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread);
3499
3500 /*
3501 * We had to wait until this point before firing the
3502 * proc:::create probe, otherwise p would not point to the
3503 * child process.
3504 */
3505 DTRACE_PROC1(create, proc_t, p);
3506 }
3507 assert(p != NULL);
3508
3509 if (subsystem_root_path) {
3510 /* If a subsystem root was specified, swap it in */
3511 char * old_subsystem_root_path = p->p_subsystem_root_path;
3512 p->p_subsystem_root_path = subsystem_root_path;
3513 subsystem_root_path = old_subsystem_root_path;
3514 }
3515
3516 /* We'll need the subsystem root for setting up Apple strings */
3517 imgp->ip_subsystem_root_path = p->p_subsystem_root_path;
3518
3519 context.vc_thread = imgp->ip_new_thread;
3520 context.vc_ucred = p->p_ucred; /* XXX must NOT be kauth_cred_get() */
3521
3522 /*
3523 * Post fdcopy(), pre exec_handle_sugid() - this is where we want
3524 * to handle the file_actions. Since vfork() also ends up setting
3525 * us into the parent process group, and saved off the signal flags,
3526 * this is also where we want to handle the spawn flags.
3527 */
3528
3529 /* Has spawn file actions? */
3530 if (imgp->ip_px_sfa != NULL) {
3531 /*
3532 * The POSIX_SPAWN_CLOEXEC_DEFAULT flag
3533 * is handled in exec_handle_file_actions().
3534 */
3535 #if CONFIG_AUDIT
3536 /*
3537 * The file actions auditing can overwrite the upath of
3538 * AUE_POSIX_SPAWN audit record. Save the audit record.
3539 */
3540 struct kaudit_record *save_uu_ar = uthread->uu_ar;
3541 uthread->uu_ar = NULL;
3542 #endif
3543 error = exec_handle_file_actions(imgp,
3544 imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0);
3545 #if CONFIG_AUDIT
3546 /* Restore the AUE_POSIX_SPAWN audit record. */
3547 uthread->uu_ar = save_uu_ar;
3548 #endif
3549 if (error != 0) {
3550 goto bad;
3551 }
3552 }
3553
3554 /* Has spawn port actions? */
3555 if (imgp->ip_px_spa != NULL) {
3556 #if CONFIG_AUDIT
3557 /*
3558 * Do the same for the port actions as we did for the file
3559 * actions. Save the AUE_POSIX_SPAWN audit record.
3560 */
3561 struct kaudit_record *save_uu_ar = uthread->uu_ar;
3562 uthread->uu_ar = NULL;
3563 #endif
3564 error = exec_handle_port_actions(imgp, &port_actions);
3565 #if CONFIG_AUDIT
3566 /* Restore the AUE_POSIX_SPAWN audit record. */
3567 uthread->uu_ar = save_uu_ar;
3568 #endif
3569 if (error != 0) {
3570 goto bad;
3571 }
3572 }
3573
3574 /* Has spawn attr? */
3575 if (imgp->ip_px_sa != NULL) {
3576 /*
3577 * Reset UID/GID to parent's RUID/RGID; This works only
3578 * because the operation occurs *after* the vfork() and
3579 * before the call to exec_handle_sugid() by the image
3580 * activator called from exec_activate_image(). POSIX
3581 * requires that any setuid/setgid bits on the process
3582 * image will take precedence over the spawn attributes
3583 * (re)setting them.
3584 *
3585 * Modifications to p_ucred must be guarded using the
3586 * proc's ucred lock. This prevents others from accessing
3587 * a garbage credential.
3588 */
3589 if (px_sa.psa_flags & POSIX_SPAWN_RESETIDS) {
3590 apply_kauth_cred_update(p, ^kauth_cred_t (kauth_cred_t my_cred){
3591 return kauth_cred_setuidgid(my_cred,
3592 kauth_cred_getruid(my_cred),
3593 kauth_cred_getrgid(my_cred));
3594 });
3595 }
3596
3597 if (imgp->ip_px_pcred_info) {
3598 if (!spawn_no_exec) {
3599 error = ENOTSUP;
3600 goto bad;
3601 }
3602
3603 error = spawn_posix_cred_adopt(p, imgp->ip_px_pcred_info);
3604 if (error != 0) {
3605 goto bad;
3606 }
3607 }
3608
3609 #if CONFIG_PERSONAS
3610 if (imgp->ip_px_persona != NULL) {
3611 if (!spawn_no_exec) {
3612 error = ENOTSUP;
3613 goto bad;
3614 }
3615
3616 /*
3617 * If we were asked to spawn a process into a new persona,
3618 * do the credential switch now (which may override the UID/GID
3619 * inherit done just above). It's important to do this switch
3620 * before image activation both for reasons stated above, and
3621 * to ensure that the new persona has access to the image/file
3622 * being executed.
3623 */
3624 error = spawn_persona_adopt(p, imgp->ip_px_persona);
3625 if (error != 0) {
3626 goto bad;
3627 }
3628 }
3629 #endif /* CONFIG_PERSONAS */
3630 #if !SECURE_KERNEL
3631 /*
3632 * Disable ASLR for the spawned process.
3633 *
3634 * But only do so if we are not embedded + RELEASE.
3635 * While embedded allows for a boot-arg (-disable_aslr)
3636 * to deal with this (which itself is only honored on
3637 * DEVELOPMENT or DEBUG builds of xnu), it is often
3638 * useful or necessary to disable ASLR on a per-process
3639 * basis for unit testing and debugging.
3640 */
3641 if (px_sa.psa_flags & _POSIX_SPAWN_DISABLE_ASLR) {
3642 OSBitOrAtomic(P_DISABLE_ASLR, &p->p_flag);
3643 }
3644 #endif /* !SECURE_KERNEL */
3645
3646 /* Randomize high bits of ASLR slide */
3647 if (px_sa.psa_flags & _POSIX_SPAWN_HIGH_BITS_ASLR) {
3648 imgp->ip_flags |= IMGPF_HIGH_BITS_ASLR;
3649 }
3650
3651 #if !SECURE_KERNEL
3652 /*
3653 * Forcibly disallow execution from data pages for the spawned process
3654 * even if it would otherwise be permitted by the architecture default.
3655 */
3656 if (px_sa.psa_flags & _POSIX_SPAWN_ALLOW_DATA_EXEC) {
3657 imgp->ip_flags |= IMGPF_ALLOW_DATA_EXEC;
3658 }
3659 #endif /* !SECURE_KERNEL */
3660
3661 #if __has_feature(ptrauth_calls)
3662 if (vm_shared_region_reslide_aslr && is_64 && (px_sa.psa_flags & _POSIX_SPAWN_RESLIDE)) {
3663 imgp->ip_flags |= IMGPF_RESLIDE;
3664 }
3665 #endif /* __has_feature(ptrauth_calls) */
3666
3667 if ((px_sa.psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) ==
3668 POSIX_SPAWN_PROC_TYPE_DRIVER) {
3669 imgp->ip_flags |= IMGPF_DRIVER;
3670 }
3671 }
3672
3673 /*
3674 * Disable ASLR during image activation. This occurs either if the
3675 * _POSIX_SPAWN_DISABLE_ASLR attribute was found above or if
3676 * P_DISABLE_ASLR was inherited from the parent process.
3677 */
3678 if (p->p_flag & P_DISABLE_ASLR) {
3679 imgp->ip_flags |= IMGPF_DISABLE_ASLR;
3680 }
3681
3682 /*
3683 * Clear transition flag so we won't hang if exec_activate_image() causes
3684 * an automount (and launchd does a proc sysctl to service it).
3685 *
3686 * <rdar://problem/6848672>, <rdar://problem/5959568>.
3687 */
3688 if (spawn_no_exec) {
3689 proc_transend(p, 0);
3690 proc_transit_set = 0;
3691 }
3692
3693 #if MAC_SPAWN /* XXX */
3694 if (uap->mac_p != USER_ADDR_NULL) {
3695 error = mac_execve_enter(uap->mac_p, imgp);
3696 if (error) {
3697 goto bad;
3698 }
3699 }
3700 #endif
3701
3702 /*
3703 * Activate the image
3704 */
3705 error = exec_activate_image(imgp);
3706 #if defined(HAS_APPLE_PAC)
3707 ml_task_set_jop_pid_from_shared_region(new_task);
3708 ml_task_set_disable_user_jop(new_task, imgp->ip_flags & IMGPF_NOJOP ? TRUE : FALSE);
3709 ml_thread_set_disable_user_jop(imgp->ip_new_thread, imgp->ip_flags & IMGPF_NOJOP ? TRUE : FALSE);
3710 ml_thread_set_jop_pid(imgp->ip_new_thread, new_task);
3711 #endif
3712
3713 if (error == 0 && !spawn_no_exec) {
3714 p = proc_exec_switch_task(p, old_task, new_task, imgp->ip_new_thread, &inherit);
3715 /* proc ref returned */
3716 should_release_proc_ref = TRUE;
3717 }
3718
3719 if (error == 0) {
3720 /* process completed the exec */
3721 exec_done = TRUE;
3722 } else if (error == -1) {
3723 /* Image not claimed by any activator? */
3724 error = ENOEXEC;
3725 }
3726
3727 if (!error && imgp->ip_px_sa != NULL) {
3728 thread_t child_thread = imgp->ip_new_thread;
3729 uthread_t child_uthread = get_bsdthread_info(child_thread);
3730
3731 /*
3732 * Because of POSIX_SPAWN_SETEXEC, we need to handle this after image
3733 * activation, else when image activation fails (before the point of no
3734 * return) would leave the parent process in a modified state.
3735 */
3736 if (px_sa.psa_flags & POSIX_SPAWN_SETPGROUP) {
3737 struct setpgid_args spga;
3738 spga.pid = p->p_pid;
3739 spga.pgid = px_sa.psa_pgroup;
3740 /*
3741 * Effectively, call setpgid() system call; works
3742 * because there are no pointer arguments.
3743 */
3744 if ((error = setpgid(p, &spga, ival)) != 0) {
3745 goto bad;
3746 }
3747 }
3748
3749 if (px_sa.psa_flags & POSIX_SPAWN_SETSID) {
3750 error = setsid_internal(p);
3751 if (error != 0) {
3752 goto bad;
3753 }
3754 }
3755
3756 /*
3757 * If we have a spawn attr, and it contains signal related flags,
3758 * the we need to process them in the "context" of the new child
3759 * process, so we have to process it following image activation,
3760 * prior to making the thread runnable in user space. This is
3761 * necessitated by some signal information being per-thread rather
3762 * than per-process, and we don't have the new allocation in hand
3763 * until after the image is activated.
3764 */
3765
3766 /*
3767 * Mask a list of signals, instead of them being unmasked, if
3768 * they were unmasked in the parent; note that some signals
3769 * are not maskable.
3770 */
3771 if (px_sa.psa_flags & POSIX_SPAWN_SETSIGMASK) {
3772 child_uthread->uu_sigmask = (px_sa.psa_sigmask & ~sigcantmask);
3773 }
3774 /*
3775 * Default a list of signals instead of ignoring them, if
3776 * they were ignored in the parent. Note that we pass
3777 * spawn_no_exec to setsigvec() to indicate that we called
3778 * fork1() and therefore do not need to call proc_signalstart()
3779 * internally.
3780 */
3781 if (px_sa.psa_flags & POSIX_SPAWN_SETSIGDEF) {
3782 vec.sa_handler = SIG_DFL;
3783 vec.sa_tramp = 0;
3784 vec.sa_mask = 0;
3785 vec.sa_flags = 0;
3786 for (sig = 1; sig < NSIG; sig++) {
3787 if (px_sa.psa_sigdefault & (1 << (sig - 1))) {
3788 error = setsigvec(p, child_thread, sig, &vec, spawn_no_exec);
3789 }
3790 }
3791 }
3792
3793 /*
3794 * Activate the CPU usage monitor, if requested. This is done via a task-wide, per-thread CPU
3795 * usage limit, which will generate a resource exceeded exception if any one thread exceeds the
3796 * limit.
3797 *
3798 * Userland gives us interval in seconds, and the kernel SPI expects nanoseconds.
3799 */
3800 if ((px_sa.psa_cpumonitor_percent != 0) && (px_sa.psa_cpumonitor_percent < UINT8_MAX)) {
3801 /*
3802 * Always treat a CPU monitor activation coming from spawn as entitled. Requiring
3803 * an entitlement to configure the monitor a certain way seems silly, since
3804 * whomever is turning it on could just as easily choose not to do so.
3805 */
3806 error = proc_set_task_ruse_cpu(p->task,
3807 TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC,
3808 (uint8_t)px_sa.psa_cpumonitor_percent,
3809 px_sa.psa_cpumonitor_interval * NSEC_PER_SEC,
3810 0, TRUE);
3811 }
3812
3813
3814 if (px_pcred_info &&
3815 (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_LOGIN)) {
3816 /*
3817 * setlogin() must happen after setsid()
3818 */
3819 setlogin_internal(p, px_pcred_info->pspci_login);
3820 }
3821 }
3822
3823 bad:
3824
3825 if (error == 0) {
3826 /* reset delay idle sleep status if set */
3827 #if CONFIG_DELAY_IDLE_SLEEP
3828 if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP) {
3829 OSBitAndAtomic(~((uint32_t)P_DELAYIDLESLEEP), &p->p_flag);
3830 }
3831 #endif /* CONFIG_DELAY_IDLE_SLEEP */
3832 /* upon successful spawn, re/set the proc control state */
3833 if (imgp->ip_px_sa != NULL) {
3834 switch (px_sa.psa_pcontrol) {
3835 case POSIX_SPAWN_PCONTROL_THROTTLE:
3836 p->p_pcaction = P_PCTHROTTLE;
3837 break;
3838 case POSIX_SPAWN_PCONTROL_SUSPEND:
3839 p->p_pcaction = P_PCSUSP;
3840 break;
3841 case POSIX_SPAWN_PCONTROL_KILL:
3842 p->p_pcaction = P_PCKILL;
3843 break;
3844 case POSIX_SPAWN_PCONTROL_NONE:
3845 default:
3846 p->p_pcaction = 0;
3847 break;
3848 }
3849 ;
3850 }
3851 exec_resettextvp(p, imgp);
3852
3853 #if CONFIG_MEMORYSTATUS
3854 /* Set jetsam priority for DriverKit processes */
3855 if (px_sa.psa_apptype == POSIX_SPAWN_PROC_TYPE_DRIVER) {
3856 px_sa.psa_priority = JETSAM_PRIORITY_DRIVER_APPLE;
3857 }
3858
3859 /* Has jetsam attributes? */
3860 if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_SET)) {
3861 /*
3862 * With 2-level high-water-mark support, POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is no
3863 * longer relevant, as background limits are described via the inactive limit slots.
3864 *
3865 * That said, however, if the POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is passed in,
3866 * we attempt to mimic previous behavior by forcing the BG limit data into the
3867 * inactive/non-fatal mode and force the active slots to hold system_wide/fatal mode.
3868 */
3869
3870 if (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND) {
3871 memorystatus_update(p, px_sa.psa_priority, 0, FALSE, /* assertion priority */
3872 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
3873 TRUE,
3874 -1, TRUE,
3875 px_sa.psa_memlimit_inactive, FALSE);
3876 } else {
3877 memorystatus_update(p, px_sa.psa_priority, 0, FALSE, /* assertion priority */
3878 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
3879 TRUE,
3880 px_sa.psa_memlimit_active,
3881 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_ACTIVE_FATAL),
3882 px_sa.psa_memlimit_inactive,
3883 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_INACTIVE_FATAL));
3884 }
3885 }
3886
3887 /* Has jetsam relaunch behavior? */
3888 if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_MASK)) {
3889 /*
3890 * Launchd has passed in data indicating the behavior of this process in response to jetsam.
3891 * This data would be used by the jetsam subsystem to determine the position and protection
3892 * offered to this process on dirty -> clean transitions.
3893 */
3894 int relaunch_flags = P_MEMSTAT_RELAUNCH_UNKNOWN;
3895 switch (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_MASK) {
3896 case POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_LOW:
3897 relaunch_flags = P_MEMSTAT_RELAUNCH_LOW;
3898 break;
3899 case POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_MED:
3900 relaunch_flags = P_MEMSTAT_RELAUNCH_MED;
3901 break;
3902 case POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_HIGH:
3903 relaunch_flags = P_MEMSTAT_RELAUNCH_HIGH;
3904 break;
3905 default:
3906 break;
3907 }
3908 memorystatus_relaunch_flags_update(p, relaunch_flags);
3909 }
3910
3911 #endif /* CONFIG_MEMORYSTATUS */
3912 if (imgp->ip_px_sa != NULL && px_sa.psa_thread_limit > 0) {
3913 task_set_thread_limit(new_task, (uint16_t)px_sa.psa_thread_limit);
3914 }
3915
3916 /* Disable wakeup monitoring for DriverKit processes */
3917 if (px_sa.psa_apptype == POSIX_SPAWN_PROC_TYPE_DRIVER) {
3918 uint32_t flags = WAKEMON_DISABLE;
3919 task_wakeups_monitor_ctl(new_task, &flags, NULL);
3920 }
3921 }
3922
3923 /*
3924 * If we successfully called fork1(), we always need to do this;
3925 * we identify this case by noting the IMGPF_SPAWN flag. This is
3926 * because we come back from that call with signals blocked in the
3927 * child, and we have to unblock them, but we want to wait until
3928 * after we've performed any spawn actions. This has to happen
3929 * before check_for_signature(), which uses psignal.
3930 */
3931 if (spawn_no_exec) {
3932 if (proc_transit_set) {
3933 proc_transend(p, 0);
3934 }
3935
3936 /*
3937 * Drop the signal lock on the child which was taken on our
3938 * behalf by forkproc()/cloneproc() to prevent signals being
3939 * received by the child in a partially constructed state.
3940 */
3941 proc_signalend(p, 0);
3942 }
3943
3944 if (error == 0) {
3945 /*
3946 * We need to initialize the bank context behind the protection of
3947 * the proc_trans lock to prevent a race with exit. We can't do this during
3948 * exec_activate_image because task_bank_init checks entitlements that
3949 * aren't loaded until subsequent calls (including exec_resettextvp).
3950 */
3951 error = proc_transstart(p, 0, 0);
3952
3953 if (error == 0) {
3954 task_bank_init(new_task);
3955 proc_transend(p, 0);
3956 }
3957
3958 #if __arm64__
3959 proc_footprint_entitlement_hacks(p, new_task);
3960 #endif /* __arm64__ */
3961
3962 #if __has_feature(ptrauth_calls)
3963 task_set_pac_exception_fatal_flag(new_task);
3964 #endif /* __has_feature(ptrauth_calls) */
3965 }
3966
3967 /* Inherit task role from old task to new task for exec */
3968 if (error == 0 && !spawn_no_exec) {
3969 proc_inherit_task_role(new_task, old_task);
3970 }
3971
3972 #if CONFIG_ARCADE
3973 if (error == 0) {
3974 /*
3975 * Check to see if we need to trigger an arcade upcall AST now
3976 * that the vnode has been reset on the task.
3977 */
3978 arcade_prepare(new_task, imgp->ip_new_thread);
3979 }
3980 #endif /* CONFIG_ARCADE */
3981
3982 /* Clear the initial wait on the thread before handling spawn policy */
3983 if (imgp && imgp->ip_new_thread) {
3984 task_clear_return_wait(get_threadtask(imgp->ip_new_thread), TCRW_CLEAR_INITIAL_WAIT);
3985 }
3986
3987 /*
3988 * Apply the spawnattr policy, apptype (which primes the task for importance donation),
3989 * and bind any portwatch ports to the new task.
3990 * This must be done after the exec so that the child's thread is ready,
3991 * and after the in transit state has been released, because priority is
3992 * dropped here so we need to be prepared for a potentially long preemption interval
3993 *
3994 * TODO: Consider splitting this up into separate phases
3995 */
3996 if (error == 0 && imgp->ip_px_sa != NULL) {
3997 struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
3998
3999 error = exec_handle_spawnattr_policy(p, imgp->ip_new_thread, psa->psa_apptype, psa->psa_qos_clamp,
4000 psa->psa_darwin_role, &port_actions);
4001 }
4002
4003 /* Transfer the turnstile watchport boost to new task if in exec */
4004 if (error == 0 && !spawn_no_exec) {
4005 task_transfer_turnstile_watchports(old_task, new_task, imgp->ip_new_thread);
4006 }
4007
4008 /*
4009 * Apply the requested maximum address.
4010 */
4011 if (error == 0 && imgp->ip_px_sa != NULL) {
4012 struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
4013
4014 if (psa->psa_max_addr) {
4015 vm_map_set_max_addr(get_task_map(new_task), (vm_map_offset_t)psa->psa_max_addr);
4016 }
4017 }
4018
4019 if (error == 0 && imgp->ip_px_sa != NULL) {
4020 struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
4021
4022 if (psa->psa_no_smt) {
4023 task_set_no_smt(new_task);
4024 }
4025 if (psa->psa_tecs) {
4026 task_set_tecs(new_task);
4027 }
4028 }
4029
4030 if (error == 0) {
4031 /* Apply the main thread qos */
4032 thread_t main_thread = imgp->ip_new_thread;
4033 task_set_main_thread_qos(new_task, main_thread);
4034
4035 #if CONFIG_MACF
4036 proc_apply_jit_and_jumbo_va_policies(p, new_task);
4037 #endif /* CONFIG_MACF */
4038 }
4039
4040 /*
4041 * Release any ports we kept around for binding to the new task
4042 * We need to release the rights even if the posix_spawn has failed.
4043 */
4044 if (imgp->ip_px_spa != NULL) {
4045 exec_port_actions_destroy(&port_actions);
4046 }
4047
4048 /*
4049 * We have to delay operations which might throw a signal until after
4050 * the signals have been unblocked; however, we want that to happen
4051 * after exec_resettextvp() so that the textvp is correct when they
4052 * fire.
4053 */
4054 if (error == 0) {
4055 error = check_for_signature(p, imgp);
4056
4057 /*
4058 * Pay for our earlier safety; deliver the delayed signals from
4059 * the incomplete spawn process now that it's complete.
4060 */
4061 if (imgp != NULL && spawn_no_exec && (p->p_lflag & P_LTRACED)) {
4062 psignal_vfork(p, p->task, imgp->ip_new_thread, SIGTRAP);
4063 }
4064
4065 if (error == 0 && !spawn_no_exec) {
4066 KDBG(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXEC),
4067 p->p_pid);
4068 }
4069 }
4070
4071 if (spawn_no_exec) {
4072 /* flag the 'fork' has occurred */
4073 proc_knote(p->p_pptr, NOTE_FORK | p->p_pid);
4074 }
4075
4076 /* flag exec has occurred, notify only if it has not failed due to FP Key error */
4077 if (!error && ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)) {
4078 proc_knote(p, NOTE_EXEC);
4079 }
4080
4081 if (imgp != NULL) {
4082 if (imgp->ip_vp) {
4083 vnode_put(imgp->ip_vp);
4084 }
4085 if (imgp->ip_scriptvp) {
4086 vnode_put(imgp->ip_scriptvp);
4087 }
4088 if (imgp->ip_strings) {
4089 execargs_free(imgp);
4090 }
4091 if (imgp->ip_px_sfa != NULL) {
4092 FREE(imgp->ip_px_sfa, M_TEMP);
4093 }
4094 if (imgp->ip_px_spa != NULL) {
4095 FREE(imgp->ip_px_spa, M_TEMP);
4096 }
4097 #if CONFIG_PERSONAS
4098 if (imgp->ip_px_persona != NULL) {
4099 FREE(imgp->ip_px_persona, M_TEMP);
4100 }
4101 #endif
4102 if (imgp->ip_px_pcred_info != NULL) {
4103 FREE(imgp->ip_px_pcred_info, M_TEMP);
4104 }
4105
4106 if (subsystem_root_path != NULL) {
4107 FREE(subsystem_root_path, M_SBUF);
4108 }
4109 #if CONFIG_MACF
4110 if (imgp->ip_px_smpx != NULL) {
4111 spawn_free_macpolicyinfo(imgp->ip_px_smpx);
4112 }
4113 if (imgp->ip_execlabelp) {
4114 mac_cred_label_free(imgp->ip_execlabelp);
4115 }
4116 if (imgp->ip_scriptlabelp) {
4117 mac_vnode_label_free(imgp->ip_scriptlabelp);
4118 }
4119 if (imgp->ip_cs_error != OS_REASON_NULL) {
4120 os_reason_free(imgp->ip_cs_error);
4121 imgp->ip_cs_error = OS_REASON_NULL;
4122 }
4123 if (imgp->ip_inherited_shared_region_id != NULL) {
4124 kheap_free(KHEAP_DATA_BUFFERS, imgp->ip_inherited_shared_region_id,
4125 strlen(imgp->ip_inherited_shared_region_id) + 1);
4126 imgp->ip_inherited_shared_region_id = NULL;
4127 }
4128 #endif
4129 if (imgp->ip_sc_port != NULL) {
4130 ipc_port_release_send(imgp->ip_sc_port);
4131 imgp->ip_sc_port = NULL;
4132 }
4133 }
4134
4135 #if CONFIG_DTRACE
4136 if (spawn_no_exec) {
4137 /*
4138 * In the original DTrace reference implementation,
4139 * posix_spawn() was a libc routine that just
4140 * did vfork(2) then exec(2). Thus the proc::: probes
4141 * are very fork/exec oriented. The details of this
4142 * in-kernel implementation of posix_spawn() is different
4143 * (while producing the same process-observable effects)
4144 * particularly w.r.t. errors, and which thread/process
4145 * is constructing what on behalf of whom.
4146 */
4147 if (error) {
4148 DTRACE_PROC1(spawn__failure, int, error);
4149 } else {
4150 DTRACE_PROC(spawn__success);
4151 /*
4152 * Some DTrace scripts, e.g. newproc.d in
4153 * /usr/bin, rely on the the 'exec-success'
4154 * probe being fired in the child after the
4155 * new process image has been constructed
4156 * in order to determine the associated pid.
4157 *
4158 * So, even though the parent built the image
4159 * here, for compatibility, mark the new thread
4160 * so 'exec-success' fires on it as it leaves
4161 * the kernel.
4162 */
4163 dtrace_thread_didexec(imgp->ip_new_thread);
4164 }
4165 } else {
4166 if (error) {
4167 DTRACE_PROC1(exec__failure, int, error);
4168 } else {
4169 dtrace_thread_didexec(imgp->ip_new_thread);
4170 }
4171 }
4172
4173 if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) {
4174 (*dtrace_proc_waitfor_hook)(p);
4175 }
4176 #endif
4177
4178 #if CONFIG_AUDIT
4179 if (!error && AUDIT_ENABLED() && p) {
4180 /* Add the CDHash of the new process to the audit record */
4181 uint8_t *cdhash = cs_get_cdhash(p);
4182 if (cdhash) {
4183 AUDIT_ARG(data, cdhash, sizeof(uint8_t), CS_CDHASH_LEN);
4184 }
4185 }
4186 #endif
4187
4188 /*
4189 * clear bsd_info from old task if it did exec.
4190 */
4191 if (task_did_exec(old_task)) {
4192 set_bsdtask_info(old_task, NULL);
4193 }
4194
4195 /* clear bsd_info from new task and terminate it if exec failed */
4196 if (new_task != NULL && task_is_exec_copy(new_task)) {
4197 set_bsdtask_info(new_task, NULL);
4198 task_terminate_internal(new_task);
4199 }
4200
4201 /* Return to both the parent and the child? */
4202 if (imgp != NULL && spawn_no_exec) {
4203 /*
4204 * If the parent wants the pid, copy it out
4205 */
4206 if (pid != USER_ADDR_NULL) {
4207 _Static_assert(sizeof(p->p_pid) == 4, "posix_spawn() assumes a 32-bit pid_t");
4208 bool aligned = (pid & 3) == 0;
4209 if (aligned) {
4210 (void)copyout_atomic32(p->p_pid, pid);
4211 } else {
4212 (void)suword(pid, p->p_pid);
4213 }
4214 }
4215 retval[0] = error;
4216
4217 /*
4218 * If we had an error, perform an internal reap ; this is
4219 * entirely safe, as we have a real process backing us.
4220 */
4221 if (error) {
4222 proc_list_lock();
4223 p->p_listflag |= P_LIST_DEADPARENT;
4224 proc_list_unlock();
4225 proc_lock(p);
4226 /* make sure no one else has killed it off... */
4227 if (p->p_stat != SZOMB && p->exit_thread == NULL) {
4228 p->exit_thread = current_thread();
4229 proc_unlock(p);
4230 exit1(p, 1, (int *)NULL);
4231 } else {
4232 /* someone is doing it for us; just skip it */
4233 proc_unlock(p);
4234 }
4235 }
4236 }
4237
4238 /*
4239 * Do not terminate the current task, if proc_exec_switch_task did not
4240 * switch the tasks, terminating the current task without the switch would
4241 * result in loosing the SIGKILL status.
4242 */
4243 if (task_did_exec(old_task)) {
4244 /* Terminate the current task, since exec will start in new task */
4245 task_terminate_internal(old_task);
4246 }
4247
4248 /* Release the thread ref returned by fork_create_child/fork1 */
4249 if (imgp != NULL && imgp->ip_new_thread) {
4250 /* wake up the new thread */
4251 task_clear_return_wait(get_threadtask(imgp->ip_new_thread), TCRW_CLEAR_FINAL_WAIT);
4252 thread_deallocate(imgp->ip_new_thread);
4253 imgp->ip_new_thread = NULL;
4254 }
4255
4256 /* Release the ref returned by fork_create_child/fork1 */
4257 if (new_task) {
4258 task_deallocate(new_task);
4259 new_task = NULL;
4260 }
4261
4262 if (should_release_proc_ref) {
4263 proc_rele(p);
4264 }
4265
4266 if (bufp != NULL) {
4267 FREE(bufp, M_TEMP);
4268 }
4269
4270 if (inherit != NULL) {
4271 ipc_importance_release(inherit);
4272 }
4273
4274 return error;
4275 }
4276
4277 /*
4278 * proc_exec_switch_task
4279 *
4280 * Parameters: p proc
4281 * old_task task before exec
4282 * new_task task after exec
4283 * new_thread thread in new task
4284 * inherit resulting importance linkage
4285 *
4286 * Returns: proc.
4287 *
4288 * Note: The function will switch the task pointer of proc
4289 * from old task to new task. The switch needs to happen
4290 * after draining all proc refs and inside a proc translock.
4291 * In the case of failure to switch the task, which might happen
4292 * if the process received a SIGKILL or jetsam killed it, it will make
4293 * sure that the new tasks terminates. User proc ref returned
4294 * to caller.
4295 *
4296 * This function is called after point of no return, in the case
4297 * failure to switch, it will terminate the new task and swallow the
4298 * error and let the terminated process complete exec and die.
4299 */
4300 proc_t
4301 proc_exec_switch_task(proc_t p, task_t old_task, task_t new_task, thread_t new_thread,
4302 void **inherit)
4303 {
4304 int error = 0;
4305 boolean_t task_active;
4306 boolean_t proc_active;
4307 boolean_t thread_active;
4308 thread_t old_thread = current_thread();
4309
4310 /*
4311 * Switch the task pointer of proc to new task.
4312 * Before switching the task, wait for proc_refdrain.
4313 * After the switch happens, the proc can disappear,
4314 * take a ref before it disappears. Waiting for
4315 * proc_refdrain in exec will block all other threads
4316 * trying to take a proc ref, boost the current thread
4317 * to avoid priority inversion.
4318 */
4319 thread_set_exec_promotion(old_thread);
4320 p = proc_refdrain_with_refwait(p, TRUE);
4321 /* extra proc ref returned to the caller */
4322
4323 assert(get_threadtask(new_thread) == new_task);
4324 task_active = task_is_active(new_task);
4325
4326 /* Take the proc_translock to change the task ptr */
4327 proc_lock(p);
4328 proc_active = !(p->p_lflag & P_LEXIT);
4329
4330 /* Check if the current thread is not aborted due to SIGKILL */
4331 thread_active = thread_is_active(old_thread);
4332
4333 /*
4334 * Do not switch the task if the new task or proc is already terminated
4335 * as a result of error in exec past point of no return
4336 */
4337 if (proc_active && task_active && thread_active) {
4338 error = proc_transstart(p, 1, 0);
4339 if (error == 0) {
4340 uthread_t new_uthread = get_bsdthread_info(new_thread);
4341 uthread_t old_uthread = get_bsdthread_info(current_thread());
4342
4343 /*
4344 * bsd_info of old_task will get cleared in execve and posix_spawn
4345 * after firing exec-success/error dtrace probe.
4346 */
4347 p->task = new_task;
4348
4349 /* Clear dispatchqueue and workloop ast offset */
4350 p->p_dispatchqueue_offset = 0;
4351 p->p_dispatchqueue_serialno_offset = 0;
4352 p->p_dispatchqueue_label_offset = 0;
4353 p->p_return_to_kernel_offset = 0;
4354
4355 /* Copy the signal state, dtrace state and set bsd ast on new thread */
4356 act_set_astbsd(new_thread);
4357 new_uthread->uu_siglist = old_uthread->uu_siglist;
4358 new_uthread->uu_sigwait = old_uthread->uu_sigwait;
4359 new_uthread->uu_sigmask = old_uthread->uu_sigmask;
4360 new_uthread->uu_oldmask = old_uthread->uu_oldmask;
4361 new_uthread->uu_vforkmask = old_uthread->uu_vforkmask;
4362 new_uthread->uu_exit_reason = old_uthread->uu_exit_reason;
4363 #if CONFIG_DTRACE
4364 new_uthread->t_dtrace_sig = old_uthread->t_dtrace_sig;
4365 new_uthread->t_dtrace_stop = old_uthread->t_dtrace_stop;
4366 new_uthread->t_dtrace_resumepid = old_uthread->t_dtrace_resumepid;
4367 assert(new_uthread->t_dtrace_scratch == NULL);
4368 new_uthread->t_dtrace_scratch = old_uthread->t_dtrace_scratch;
4369
4370 old_uthread->t_dtrace_sig = 0;
4371 old_uthread->t_dtrace_stop = 0;
4372 old_uthread->t_dtrace_resumepid = 0;
4373 old_uthread->t_dtrace_scratch = NULL;
4374 #endif
4375 /* Copy the resource accounting info */
4376 thread_copy_resource_info(new_thread, current_thread());
4377
4378 /* Clear the exit reason and signal state on old thread */
4379 old_uthread->uu_exit_reason = NULL;
4380 old_uthread->uu_siglist = 0;
4381
4382 /* Add the new uthread to proc uthlist and remove the old one */
4383 TAILQ_INSERT_TAIL(&p->p_uthlist, new_uthread, uu_list);
4384 TAILQ_REMOVE(&p->p_uthlist, old_uthread, uu_list);
4385
4386 task_set_did_exec_flag(old_task);
4387 task_clear_exec_copy_flag(new_task);
4388
4389 task_copy_fields_for_exec(new_task, old_task);
4390
4391 /* Transfer sandbox filter bits to new_task. */
4392 task_transfer_mach_filter_bits(new_task, old_task);
4393
4394 /*
4395 * Need to transfer pending watch port boosts to the new task
4396 * while still making sure that the old task remains in the
4397 * importance linkage. Create an importance linkage from old task
4398 * to new task, then switch the task importance base of old task
4399 * and new task. After the switch the port watch boost will be
4400 * boosting the new task and new task will be donating importance
4401 * to old task.
4402 */
4403 *inherit = ipc_importance_exec_switch_task(old_task, new_task);
4404
4405 proc_transend(p, 1);
4406 }
4407 }
4408
4409 proc_unlock(p);
4410 proc_refwake(p);
4411 thread_clear_exec_promotion(old_thread);
4412
4413 if (error != 0 || !task_active || !proc_active || !thread_active) {
4414 task_terminate_internal(new_task);
4415 }
4416
4417 return p;
4418 }
4419
4420 /*
4421 * execve
4422 *
4423 * Parameters: uap->fname File name to exec
4424 * uap->argp Argument list
4425 * uap->envp Environment list
4426 *
4427 * Returns: 0 Success
4428 * __mac_execve:EINVAL Invalid argument
4429 * __mac_execve:ENOTSUP Invalid argument
4430 * __mac_execve:EACCES Permission denied
4431 * __mac_execve:EINTR Interrupted function
4432 * __mac_execve:ENOMEM Not enough space
4433 * __mac_execve:EFAULT Bad address
4434 * __mac_execve:ENAMETOOLONG Filename too long
4435 * __mac_execve:ENOEXEC Executable file format error
4436 * __mac_execve:ETXTBSY Text file busy [misuse of error code]
4437 * __mac_execve:???
4438 *
4439 * TODO: Dynamic linker header address on stack is copied via suword()
4440 */
4441 /* ARGSUSED */
4442 int
4443 execve(proc_t p, struct execve_args *uap, int32_t *retval)
4444 {
4445 struct __mac_execve_args muap;
4446 int err;
4447
4448 memoryshot(VM_EXECVE, DBG_FUNC_NONE);
4449
4450 muap.fname = uap->fname;
4451 muap.argp = uap->argp;
4452 muap.envp = uap->envp;
4453 muap.mac_p = USER_ADDR_NULL;
4454 err = __mac_execve(p, &muap, retval);
4455
4456 return err;
4457 }
4458
4459 /*
4460 * __mac_execve
4461 *
4462 * Parameters: uap->fname File name to exec
4463 * uap->argp Argument list
4464 * uap->envp Environment list
4465 * uap->mac_p MAC label supplied by caller
4466 *
4467 * Returns: 0 Success
4468 * EINVAL Invalid argument
4469 * ENOTSUP Not supported
4470 * ENOEXEC Executable file format error
4471 * exec_activate_image:EINVAL Invalid argument
4472 * exec_activate_image:EACCES Permission denied
4473 * exec_activate_image:EINTR Interrupted function
4474 * exec_activate_image:ENOMEM Not enough space
4475 * exec_activate_image:EFAULT Bad address
4476 * exec_activate_image:ENAMETOOLONG Filename too long
4477 * exec_activate_image:ENOEXEC Executable file format error
4478 * exec_activate_image:ETXTBSY Text file busy [misuse of error code]
4479 * exec_activate_image:EBADEXEC The executable is corrupt/unknown
4480 * exec_activate_image:???
4481 * mac_execve_enter:???
4482 *
4483 * TODO: Dynamic linker header address on stack is copied via suword()
4484 */
4485 int
4486 __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval)
4487 {
4488 char *bufp = NULL;
4489 struct image_params *imgp;
4490 struct vnode_attr *vap;
4491 struct vnode_attr *origvap;
4492 int error;
4493 int is_64 = IS_64BIT_PROCESS(p);
4494 struct vfs_context context;
4495 struct uthread *uthread;
4496 task_t old_task = current_task();
4497 task_t new_task = NULL;
4498 boolean_t should_release_proc_ref = FALSE;
4499 boolean_t exec_done = FALSE;
4500 boolean_t in_vfexec = FALSE;
4501 void *inherit = NULL;
4502
4503 context.vc_thread = current_thread();
4504 context.vc_ucred = kauth_cred_proc_ref(p); /* XXX must NOT be kauth_cred_get() */
4505
4506 /* Allocate a big chunk for locals instead of using stack since these
4507 * structures a pretty big.
4508 */
4509 MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
4510 imgp = (struct image_params *) bufp;
4511 if (bufp == NULL) {
4512 error = ENOMEM;
4513 goto exit_with_error;
4514 }
4515 vap = (struct vnode_attr *) (bufp + sizeof(*imgp));
4516 origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap));
4517
4518 /* Initialize the common data in the image_params structure */
4519 imgp->ip_user_fname = uap->fname;
4520 imgp->ip_user_argv = uap->argp;
4521 imgp->ip_user_envv = uap->envp;
4522 imgp->ip_vattr = vap;
4523 imgp->ip_origvattr = origvap;
4524 imgp->ip_vfs_context = &context;
4525 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT_ADDR : IMGPF_NONE) | ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE);
4526 imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
4527 imgp->ip_mac_return = 0;
4528 imgp->ip_cs_error = OS_REASON_NULL;
4529 imgp->ip_simulator_binary = IMGPF_SB_DEFAULT;
4530 imgp->ip_subsystem_root_path = NULL;
4531
4532 #if CONFIG_MACF
4533 if (uap->mac_p != USER_ADDR_NULL) {
4534 error = mac_execve_enter(uap->mac_p, imgp);
4535 if (error) {
4536 kauth_cred_unref(&context.vc_ucred);
4537 goto exit_with_error;
4538 }
4539 }
4540 #endif
4541 uthread = get_bsdthread_info(current_thread());
4542 if (uthread->uu_flag & UT_VFORK) {
4543 imgp->ip_flags |= IMGPF_VFORK_EXEC;
4544 in_vfexec = TRUE;
4545 } else {
4546 imgp->ip_flags |= IMGPF_EXEC;
4547
4548 /*
4549 * For execve case, create a new task and thread
4550 * which points to current_proc. The current_proc will point
4551 * to the new task after image activation and proc ref drain.
4552 *
4553 * proc (current_proc) <----- old_task (current_task)
4554 * ^ | ^
4555 * | | |
4556 * | ----------------------------------
4557 * |
4558 * --------- new_task (task marked as TF_EXEC_COPY)
4559 *
4560 * After image activation, the proc will point to the new task
4561 * and would look like following.
4562 *
4563 * proc (current_proc) <----- old_task (current_task, marked as TPF_DID_EXEC)
4564 * ^ |
4565 * | |
4566 * | ----------> new_task
4567 * | |
4568 * -----------------
4569 *
4570 * During exec any transition from new_task -> proc is fine, but don't allow
4571 * transition from proc->task, since it will modify old_task.
4572 */
4573 imgp->ip_new_thread = fork_create_child(old_task,
4574 NULL,
4575 p,
4576 FALSE,
4577 p->p_flag & P_LP64,
4578 task_get_64bit_data(old_task),
4579 TRUE);
4580 /* task and thread ref returned by fork_create_child */
4581 if (imgp->ip_new_thread == NULL) {
4582 error = ENOMEM;
4583 goto exit_with_error;
4584 }
4585
4586 new_task = get_threadtask(imgp->ip_new_thread);
4587 context.vc_thread = imgp->ip_new_thread;
4588 }
4589
4590 imgp->ip_subsystem_root_path = p->p_subsystem_root_path;
4591
4592 error = exec_activate_image(imgp);
4593 /* thread and task ref returned for vfexec case */
4594
4595 if (imgp->ip_new_thread != NULL) {
4596 /*
4597 * task reference might be returned by exec_activate_image
4598 * for vfexec.
4599 */
4600 new_task = get_threadtask(imgp->ip_new_thread);
4601 #if defined(HAS_APPLE_PAC)
4602 ml_task_set_disable_user_jop(new_task, imgp->ip_flags & IMGPF_NOJOP ? TRUE : FALSE);
4603 ml_thread_set_disable_user_jop(imgp->ip_new_thread, imgp->ip_flags & IMGPF_NOJOP ? TRUE : FALSE);
4604 #endif
4605 }
4606
4607 if (!error && !in_vfexec) {
4608 p = proc_exec_switch_task(p, old_task, new_task, imgp->ip_new_thread, &inherit);
4609 /* proc ref returned */
4610 should_release_proc_ref = TRUE;
4611 }
4612
4613 kauth_cred_unref(&context.vc_ucred);
4614
4615 /* Image not claimed by any activator? */
4616 if (error == -1) {
4617 error = ENOEXEC;
4618 }
4619
4620 if (!error) {
4621 exec_done = TRUE;
4622 assert(imgp->ip_new_thread != NULL);
4623
4624 exec_resettextvp(p, imgp);
4625 error = check_for_signature(p, imgp);
4626 }
4627
4628 #if defined(HAS_APPLE_PAC)
4629 if (imgp->ip_new_thread && !error) {
4630 ml_task_set_jop_pid_from_shared_region(new_task);
4631 ml_thread_set_jop_pid(imgp->ip_new_thread, new_task);
4632 }
4633 #endif /* defined(HAS_APPLE_PAC) */
4634
4635 /* flag exec has occurred, notify only if it has not failed due to FP Key error */
4636 if (exec_done && ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)) {
4637 proc_knote(p, NOTE_EXEC);
4638 }
4639
4640 if (imgp->ip_vp != NULLVP) {
4641 vnode_put(imgp->ip_vp);
4642 }
4643 if (imgp->ip_scriptvp != NULLVP) {
4644 vnode_put(imgp->ip_scriptvp);
4645 }
4646 if (imgp->ip_strings) {
4647 execargs_free(imgp);
4648 }
4649 #if CONFIG_MACF
4650 if (imgp->ip_execlabelp) {
4651 mac_cred_label_free(imgp->ip_execlabelp);
4652 }
4653 if (imgp->ip_scriptlabelp) {
4654 mac_vnode_label_free(imgp->ip_scriptlabelp);
4655 }
4656 #endif
4657 if (imgp->ip_cs_error != OS_REASON_NULL) {
4658 os_reason_free(imgp->ip_cs_error);
4659 imgp->ip_cs_error = OS_REASON_NULL;
4660 }
4661
4662 if (!error) {
4663 /*
4664 * We need to initialize the bank context behind the protection of
4665 * the proc_trans lock to prevent a race with exit. We can't do this during
4666 * exec_activate_image because task_bank_init checks entitlements that
4667 * aren't loaded until subsequent calls (including exec_resettextvp).
4668 */
4669 error = proc_transstart(p, 0, 0);
4670 }
4671
4672 if (!error) {
4673 task_bank_init(new_task);
4674 proc_transend(p, 0);
4675
4676 #if __arm64__
4677 proc_footprint_entitlement_hacks(p, new_task);
4678 #endif /* __arm64__ */
4679
4680 /* Sever any extant thread affinity */
4681 thread_affinity_exec(current_thread());
4682
4683 /* Inherit task role from old task to new task for exec */
4684 if (!in_vfexec) {
4685 proc_inherit_task_role(new_task, old_task);
4686 }
4687
4688 thread_t main_thread = imgp->ip_new_thread;
4689
4690 task_set_main_thread_qos(new_task, main_thread);
4691
4692 #if __has_feature(ptrauth_calls)
4693 task_set_pac_exception_fatal_flag(new_task);
4694 #endif /* __has_feature(ptrauth_calls) */
4695
4696 #if CONFIG_ARCADE
4697 /*
4698 * Check to see if we need to trigger an arcade upcall AST now
4699 * that the vnode has been reset on the task.
4700 */
4701 arcade_prepare(new_task, imgp->ip_new_thread);
4702 #endif /* CONFIG_ARCADE */
4703
4704 #if CONFIG_MACF
4705 proc_apply_jit_and_jumbo_va_policies(p, new_task);
4706 #endif /* CONFIG_MACF */
4707
4708 if (vm_darkwake_mode == TRUE) {
4709 /*
4710 * This process is being launched when the system
4711 * is in darkwake. So mark it specially. This will
4712 * cause all its pages to be entered in the background Q.
4713 */
4714 task_set_darkwake_mode(new_task, vm_darkwake_mode);
4715 }
4716
4717 #if CONFIG_DTRACE
4718 dtrace_thread_didexec(imgp->ip_new_thread);
4719
4720 if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) {
4721 (*dtrace_proc_waitfor_hook)(p);
4722 }
4723 #endif
4724
4725 #if CONFIG_AUDIT
4726 if (!error && AUDIT_ENABLED() && p) {
4727 /* Add the CDHash of the new process to the audit record */
4728 uint8_t *cdhash = cs_get_cdhash(p);
4729 if (cdhash) {
4730 AUDIT_ARG(data, cdhash, sizeof(uint8_t), CS_CDHASH_LEN);
4731 }
4732 }
4733 #endif
4734
4735 if (in_vfexec) {
4736 vfork_return(p, retval, p->p_pid);
4737 }
4738 } else {
4739 DTRACE_PROC1(exec__failure, int, error);
4740 }
4741
4742 exit_with_error:
4743
4744 /*
4745 * clear bsd_info from old task if it did exec.
4746 */
4747 if (task_did_exec(old_task)) {
4748 set_bsdtask_info(old_task, NULL);
4749 }
4750
4751 /* clear bsd_info from new task and terminate it if exec failed */
4752 if (new_task != NULL && task_is_exec_copy(new_task)) {
4753 set_bsdtask_info(new_task, NULL);
4754 task_terminate_internal(new_task);
4755 }
4756
4757 if (imgp != NULL) {
4758 /* Clear the initial wait on the thread transferring watchports */
4759 if (imgp->ip_new_thread) {
4760 task_clear_return_wait(get_threadtask(imgp->ip_new_thread), TCRW_CLEAR_INITIAL_WAIT);
4761 }
4762
4763 /* Transfer the watchport boost to new task */
4764 if (!error && !in_vfexec) {
4765 task_transfer_turnstile_watchports(old_task,
4766 new_task, imgp->ip_new_thread);
4767 }
4768 /*
4769 * Do not terminate the current task, if proc_exec_switch_task did not
4770 * switch the tasks, terminating the current task without the switch would
4771 * result in loosing the SIGKILL status.
4772 */
4773 if (task_did_exec(old_task)) {
4774 /* Terminate the current task, since exec will start in new task */
4775 task_terminate_internal(old_task);
4776 }
4777
4778 /* Release the thread ref returned by fork_create_child */
4779 if (imgp->ip_new_thread) {
4780 /* wake up the new exec thread */
4781 task_clear_return_wait(get_threadtask(imgp->ip_new_thread), TCRW_CLEAR_FINAL_WAIT);
4782 thread_deallocate(imgp->ip_new_thread);
4783 imgp->ip_new_thread = NULL;
4784 }
4785 }
4786
4787 /* Release the ref returned by fork_create_child */
4788 if (new_task) {
4789 task_deallocate(new_task);
4790 new_task = NULL;
4791 }
4792
4793 if (should_release_proc_ref) {
4794 proc_rele(p);
4795 }
4796
4797 if (bufp != NULL) {
4798 FREE(bufp, M_TEMP);
4799 }
4800
4801 if (inherit != NULL) {
4802 ipc_importance_release(inherit);
4803 }
4804
4805 return error;
4806 }
4807
4808
4809 /*
4810 * copyinptr
4811 *
4812 * Description: Copy a pointer in from user space to a user_addr_t in kernel
4813 * space, based on 32/64 bitness of the user space
4814 *
4815 * Parameters: froma User space address
4816 * toptr Address of kernel space user_addr_t
4817 * ptr_size 4/8, based on 'froma' address space
4818 *
4819 * Returns: 0 Success
4820 * EFAULT Bad 'froma'
4821 *
4822 * Implicit returns:
4823 * *ptr_size Modified
4824 */
4825 static int
4826 copyinptr(user_addr_t froma, user_addr_t *toptr, int ptr_size)
4827 {
4828 int error;
4829
4830 if (ptr_size == 4) {
4831 /* 64 bit value containing 32 bit address */
4832 unsigned int i = 0;
4833
4834 error = copyin(froma, &i, 4);
4835 *toptr = CAST_USER_ADDR_T(i); /* SAFE */
4836 } else {
4837 error = copyin(froma, toptr, 8);
4838 }
4839 return error;
4840 }
4841
4842
4843 /*
4844 * copyoutptr
4845 *
4846 * Description: Copy a pointer out from a user_addr_t in kernel space to
4847 * user space, based on 32/64 bitness of the user space
4848 *
4849 * Parameters: ua User space address to copy to
4850 * ptr Address of kernel space user_addr_t
4851 * ptr_size 4/8, based on 'ua' address space
4852 *
4853 * Returns: 0 Success
4854 * EFAULT Bad 'ua'
4855 *
4856 */
4857 static int
4858 copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size)
4859 {
4860 int error;
4861
4862 if (ptr_size == 4) {
4863 /* 64 bit value containing 32 bit address */
4864 unsigned int i = CAST_DOWN_EXPLICIT(unsigned int, ua); /* SAFE */
4865
4866 error = copyout(&i, ptr, 4);
4867 } else {
4868 error = copyout(&ua, ptr, 8);
4869 }
4870 return error;
4871 }
4872
4873
4874 /*
4875 * exec_copyout_strings
4876 *
4877 * Copy out the strings segment to user space. The strings segment is put
4878 * on a preinitialized stack frame.
4879 *
4880 * Parameters: struct image_params * the image parameter block
4881 * int * a pointer to the stack offset variable
4882 *
4883 * Returns: 0 Success
4884 * !0 Faiure: errno
4885 *
4886 * Implicit returns:
4887 * (*stackp) The stack offset, modified
4888 *
4889 * Note: The strings segment layout is backward, from the beginning
4890 * of the top of the stack to consume the minimal amount of
4891 * space possible; the returned stack pointer points to the
4892 * end of the area consumed (stacks grow downward).
4893 *
4894 * argc is an int; arg[i] are pointers; env[i] are pointers;
4895 * the 0's are (void *)NULL's
4896 *
4897 * The stack frame layout is:
4898 *
4899 * +-------------+ <- p->user_stack
4900 * | 16b |
4901 * +-------------+
4902 * | STRING AREA |
4903 * | : |
4904 * | : |
4905 * | : |
4906 * +- -- -- -- --+
4907 * | PATH AREA |
4908 * +-------------+
4909 * | 0 |
4910 * +-------------+
4911 * | applev[n] |
4912 * +-------------+
4913 * :
4914 * :
4915 * +-------------+
4916 * | applev[1] |
4917 * +-------------+
4918 * | exec_path / |
4919 * | applev[0] |
4920 * +-------------+
4921 * | 0 |
4922 * +-------------+
4923 * | env[n] |
4924 * +-------------+
4925 * :
4926 * :
4927 * +-------------+
4928 * | env[0] |
4929 * +-------------+
4930 * | 0 |
4931 * +-------------+
4932 * | arg[argc-1] |
4933 * +-------------+
4934 * :
4935 * :
4936 * +-------------+
4937 * | arg[0] |
4938 * +-------------+
4939 * | argc |
4940 * sp-> +-------------+
4941 *
4942 * Although technically a part of the STRING AREA, we treat the PATH AREA as
4943 * a separate entity. This allows us to align the beginning of the PATH AREA
4944 * to a pointer boundary so that the exec_path, env[i], and argv[i] pointers
4945 * which preceed it on the stack are properly aligned.
4946 */
4947 __attribute__((noinline))
4948 static int
4949 exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp)
4950 {
4951 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
4952 int ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? 8 : 4;
4953 int ptr_area_size;
4954 void *ptr_buffer_start, *ptr_buffer;
4955 size_t string_size;
4956
4957 user_addr_t string_area; /* *argv[], *env[] */
4958 user_addr_t ptr_area; /* argv[], env[], applev[] */
4959 user_addr_t argc_area; /* argc */
4960 user_addr_t stack;
4961 int error;
4962
4963 unsigned i;
4964 struct copyout_desc {
4965 char *start_string;
4966 int count;
4967 #if CONFIG_DTRACE
4968 user_addr_t *dtrace_cookie;
4969 #endif
4970 boolean_t null_term;
4971 } descriptors[] = {
4972 {
4973 .start_string = imgp->ip_startargv,
4974 .count = imgp->ip_argc,
4975 #if CONFIG_DTRACE
4976 .dtrace_cookie = &p->p_dtrace_argv,
4977 #endif
4978 .null_term = TRUE
4979 },
4980 {
4981 .start_string = imgp->ip_endargv,
4982 .count = imgp->ip_envc,
4983 #if CONFIG_DTRACE
4984 .dtrace_cookie = &p->p_dtrace_envp,
4985 #endif
4986 .null_term = TRUE
4987 },
4988 {
4989 .start_string = imgp->ip_strings,
4990 .count = 1,
4991 #if CONFIG_DTRACE
4992 .dtrace_cookie = NULL,
4993 #endif
4994 .null_term = FALSE
4995 },
4996 {
4997 .start_string = imgp->ip_endenvv,
4998 .count = imgp->ip_applec - 1, /* exec_path handled above */
4999 #if CONFIG_DTRACE
5000 .dtrace_cookie = NULL,
5001 #endif
5002 .null_term = TRUE
5003 }
5004 };
5005
5006 stack = *stackp;
5007
5008 /*
5009 * All previous contributors to the string area
5010 * should have aligned their sub-area
5011 */
5012 if (imgp->ip_strspace % ptr_size != 0) {
5013 error = EINVAL;
5014 goto bad;
5015 }
5016
5017 /* Grow the stack down for the strings we've been building up */
5018 string_size = imgp->ip_strendp - imgp->ip_strings;
5019 stack -= string_size;
5020 string_area = stack;
5021
5022 /*
5023 * Need room for one pointer for each string, plus
5024 * one for the NULLs terminating the argv, envv, and apple areas.
5025 */
5026 ptr_area_size = (imgp->ip_argc + imgp->ip_envc + imgp->ip_applec + 3) * ptr_size;
5027 stack -= ptr_area_size;
5028 ptr_area = stack;
5029
5030 /* We'll construct all the pointer arrays in our string buffer,
5031 * which we already know is aligned properly, and ip_argspace
5032 * was used to verify we have enough space.
5033 */
5034 ptr_buffer_start = ptr_buffer = (void *)imgp->ip_strendp;
5035
5036 /*
5037 * Need room for pointer-aligned argc slot.
5038 */
5039 stack -= ptr_size;
5040 argc_area = stack;
5041
5042 /*
5043 * Record the size of the arguments area so that sysctl_procargs()
5044 * can return the argument area without having to parse the arguments.
5045 */
5046 proc_lock(p);
5047 p->p_argc = imgp->ip_argc;
5048 p->p_argslen = (int)(*stackp - string_area);
5049 proc_unlock(p);
5050
5051 /* Return the initial stack address: the location of argc */
5052 *stackp = stack;
5053
5054 /*
5055 * Copy out the entire strings area.
5056 */
5057 error = copyout(imgp->ip_strings, string_area,
5058 string_size);
5059 if (error) {
5060 goto bad;
5061 }
5062
5063 for (i = 0; i < sizeof(descriptors) / sizeof(descriptors[0]); i++) {
5064 char *cur_string = descriptors[i].start_string;
5065 int j;
5066
5067 #if CONFIG_DTRACE
5068 if (descriptors[i].dtrace_cookie) {
5069 proc_lock(p);
5070 *descriptors[i].dtrace_cookie = ptr_area + ((uintptr_t)ptr_buffer - (uintptr_t)ptr_buffer_start); /* dtrace convenience */
5071 proc_unlock(p);
5072 }
5073 #endif /* CONFIG_DTRACE */
5074
5075 /*
5076 * For each segment (argv, envv, applev), copy as many pointers as requested
5077 * to our pointer buffer.
5078 */
5079 for (j = 0; j < descriptors[i].count; j++) {
5080 user_addr_t cur_address = string_area + (cur_string - imgp->ip_strings);
5081
5082 /* Copy out the pointer to the current string. Alignment has been verified */
5083 if (ptr_size == 8) {
5084 *(uint64_t *)ptr_buffer = (uint64_t)cur_address;
5085 } else {
5086 *(uint32_t *)ptr_buffer = (uint32_t)cur_address;
5087 }
5088
5089 ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
5090 cur_string += strlen(cur_string) + 1; /* Only a NUL between strings in the same area */
5091 }
5092
5093 if (descriptors[i].null_term) {
5094 if (ptr_size == 8) {
5095 *(uint64_t *)ptr_buffer = 0ULL;
5096 } else {
5097 *(uint32_t *)ptr_buffer = 0;
5098 }
5099
5100 ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
5101 }
5102 }
5103
5104 /*
5105 * Copy out all our pointer arrays in bulk.
5106 */
5107 error = copyout(ptr_buffer_start, ptr_area,
5108 ptr_area_size);
5109 if (error) {
5110 goto bad;
5111 }
5112
5113 /* argc (int32, stored in a ptr_size area) */
5114 error = copyoutptr((user_addr_t)imgp->ip_argc, argc_area, ptr_size);
5115 if (error) {
5116 goto bad;
5117 }
5118
5119 bad:
5120 return error;
5121 }
5122
5123
5124 /*
5125 * exec_extract_strings
5126 *
5127 * Copy arguments and environment from user space into work area; we may
5128 * have already copied some early arguments into the work area, and if
5129 * so, any arguments opied in are appended to those already there.
5130 * This function is the primary manipulator of ip_argspace, since
5131 * these are the arguments the client of execve(2) knows about. After
5132 * each argv[]/envv[] string is copied, we charge the string length
5133 * and argv[]/envv[] pointer slot to ip_argspace, so that we can
5134 * full preflight the arg list size.
5135 *
5136 * Parameters: struct image_params * the image parameter block
5137 *
5138 * Returns: 0 Success
5139 * !0 Failure: errno
5140 *
5141 * Implicit returns;
5142 * (imgp->ip_argc) Count of arguments, updated
5143 * (imgp->ip_envc) Count of environment strings, updated
5144 * (imgp->ip_argspace) Count of remaining of NCARGS
5145 * (imgp->ip_interp_buffer) Interpreter and args (mutated in place)
5146 *
5147 *
5148 * Note: The argument and environment vectors are user space pointers
5149 * to arrays of user space pointers.
5150 */
5151 __attribute__((noinline))
5152 static int
5153 exec_extract_strings(struct image_params *imgp)
5154 {
5155 int error = 0;
5156 int ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT_ADDR) ? 8 : 4;
5157 int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? 8 : 4;
5158 user_addr_t argv = imgp->ip_user_argv;
5159 user_addr_t envv = imgp->ip_user_envv;
5160
5161 /*
5162 * Adjust space reserved for the path name by however much padding it
5163 * needs. Doing this here since we didn't know if this would be a 32-
5164 * or 64-bit process back in exec_save_path.
5165 */
5166 while (imgp->ip_strspace % new_ptr_size != 0) {
5167 *imgp->ip_strendp++ = '\0';
5168 imgp->ip_strspace--;
5169 /* imgp->ip_argspace--; not counted towards exec args total */
5170 }
5171
5172 /*
5173 * From now on, we start attributing string space to ip_argspace
5174 */
5175 imgp->ip_startargv = imgp->ip_strendp;
5176 imgp->ip_argc = 0;
5177
5178 if ((imgp->ip_flags & IMGPF_INTERPRET) != 0) {
5179 user_addr_t arg;
5180 char *argstart, *ch;
5181
5182 /* First, the arguments in the "#!" string are tokenized and extracted. */
5183 argstart = imgp->ip_interp_buffer;
5184 while (argstart) {
5185 ch = argstart;
5186 while (*ch && !IS_WHITESPACE(*ch)) {
5187 ch++;
5188 }
5189
5190 if (*ch == '\0') {
5191 /* last argument, no need to NUL-terminate */
5192 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
5193 argstart = NULL;
5194 } else {
5195 /* NUL-terminate */
5196 *ch = '\0';
5197 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
5198
5199 /*
5200 * Find the next string. We know spaces at the end of the string have already
5201 * been stripped.
5202 */
5203 argstart = ch + 1;
5204 while (IS_WHITESPACE(*argstart)) {
5205 argstart++;
5206 }
5207 }
5208
5209 /* Error-check, regardless of whether this is the last interpreter arg or not */
5210 if (error) {
5211 goto bad;
5212 }
5213 if (imgp->ip_argspace < new_ptr_size) {
5214 error = E2BIG;
5215 goto bad;
5216 }
5217 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
5218 imgp->ip_argc++;
5219 }
5220
5221 if (argv != 0LL) {
5222 /*
5223 * If we are running an interpreter, replace the av[0] that was
5224 * passed to execve() with the path name that was
5225 * passed to execve() for interpreters which do not use the PATH
5226 * to locate their script arguments.
5227 */
5228 error = copyinptr(argv, &arg, ptr_size);
5229 if (error) {
5230 goto bad;
5231 }
5232 if (arg != 0LL) {
5233 argv += ptr_size; /* consume without using */
5234 }
5235 }
5236
5237 if (imgp->ip_interp_sugid_fd != -1) {
5238 char temp[19]; /* "/dev/fd/" + 10 digits + NUL */
5239 snprintf(temp, sizeof(temp), "/dev/fd/%d", imgp->ip_interp_sugid_fd);
5240 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(temp), UIO_SYSSPACE, TRUE);
5241 } else {
5242 error = exec_add_user_string(imgp, imgp->ip_user_fname, imgp->ip_seg, TRUE);
5243 }
5244
5245 if (error) {
5246 goto bad;
5247 }
5248 if (imgp->ip_argspace < new_ptr_size) {
5249 error = E2BIG;
5250 goto bad;
5251 }
5252 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
5253 imgp->ip_argc++;
5254 }
5255
5256 while (argv != 0LL) {
5257 user_addr_t arg;
5258
5259 error = copyinptr(argv, &arg, ptr_size);
5260 if (error) {
5261 goto bad;
5262 }
5263
5264 if (arg == 0LL) {
5265 break;
5266 }
5267
5268 argv += ptr_size;
5269
5270 /*
5271 * av[n...] = arg[n]
5272 */
5273 error = exec_add_user_string(imgp, arg, imgp->ip_seg, TRUE);
5274 if (error) {
5275 goto bad;
5276 }
5277 if (imgp->ip_argspace < new_ptr_size) {
5278 error = E2BIG;
5279 goto bad;
5280 }
5281 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
5282 imgp->ip_argc++;
5283 }
5284
5285 /* Save space for argv[] NULL terminator */
5286 if (imgp->ip_argspace < new_ptr_size) {
5287 error = E2BIG;
5288 goto bad;
5289 }
5290 imgp->ip_argspace -= new_ptr_size;
5291
5292 /* Note where the args ends and env begins. */
5293 imgp->ip_endargv = imgp->ip_strendp;
5294 imgp->ip_envc = 0;
5295
5296 /* Now, get the environment */
5297 while (envv != 0LL) {
5298 user_addr_t env;
5299
5300 error = copyinptr(envv, &env, ptr_size);
5301 if (error) {
5302 goto bad;
5303 }
5304
5305 envv += ptr_size;
5306 if (env == 0LL) {
5307 break;
5308 }
5309 /*
5310 * av[n...] = env[n]
5311 */
5312 error = exec_add_user_string(imgp, env, imgp->ip_seg, TRUE);
5313 if (error) {
5314 goto bad;
5315 }
5316 if (imgp->ip_argspace < new_ptr_size) {
5317 error = E2BIG;
5318 goto bad;
5319 }
5320 imgp->ip_argspace -= new_ptr_size; /* to hold envv[] entry */
5321 imgp->ip_envc++;
5322 }
5323
5324 /* Save space for envv[] NULL terminator */
5325 if (imgp->ip_argspace < new_ptr_size) {
5326 error = E2BIG;
5327 goto bad;
5328 }
5329 imgp->ip_argspace -= new_ptr_size;
5330
5331 /* Align the tail of the combined argv+envv area */
5332 while (imgp->ip_strspace % new_ptr_size != 0) {
5333 if (imgp->ip_argspace < 1) {
5334 error = E2BIG;
5335 goto bad;
5336 }
5337 *imgp->ip_strendp++ = '\0';
5338 imgp->ip_strspace--;
5339 imgp->ip_argspace--;
5340 }
5341
5342 /* Note where the envv ends and applev begins. */
5343 imgp->ip_endenvv = imgp->ip_strendp;
5344
5345 /*
5346 * From now on, we are no longer charging argument
5347 * space to ip_argspace.
5348 */
5349
5350 bad:
5351 return error;
5352 }
5353
5354 /*
5355 * Libc has an 8-element array set up for stack guard values. It only fills
5356 * in one of those entries, and both gcc and llvm seem to use only a single
5357 * 8-byte guard. Until somebody needs more than an 8-byte guard value, don't
5358 * do the work to construct them.
5359 */
5360 #define GUARD_VALUES 1
5361 #define GUARD_KEY "stack_guard="
5362
5363 /*
5364 * System malloc needs some entropy when it is initialized.
5365 */
5366 #define ENTROPY_VALUES 2
5367 #define ENTROPY_KEY "malloc_entropy="
5368
5369 /*
5370 * libplatform needs a random pointer-obfuscation value when it is initialized.
5371 */
5372 #define PTR_MUNGE_VALUES 1
5373 #define PTR_MUNGE_KEY "ptr_munge="
5374
5375 /*
5376 * System malloc engages nanozone for UIAPP.
5377 */
5378 #define NANO_ENGAGE_KEY "MallocNanoZone=1"
5379
5380 #define PFZ_KEY "pfz="
5381 extern user32_addr_t commpage_text32_location;
5382 extern user64_addr_t commpage_text64_location;
5383
5384 extern uuid_string_t bootsessionuuid_string;
5385
5386 #define MAIN_STACK_VALUES 4
5387 #define MAIN_STACK_KEY "main_stack="
5388
5389 #define FSID_KEY "executable_file="
5390 #define DYLD_FSID_KEY "dyld_file="
5391 #define CDHASH_KEY "executable_cdhash="
5392 #define DYLD_FLAGS_KEY "dyld_flags="
5393 #define SUBSYSTEM_ROOT_PATH_KEY "subsystem_root_path="
5394 #define APP_BOOT_SESSION_KEY "executable_boothash="
5395 #if __has_feature(ptrauth_calls)
5396 #define PTRAUTH_DISABLED_FLAG "ptrauth_disabled=1"
5397 #define DYLD_ARM64E_ABI_KEY "arm64e_abi="
5398 #endif /* __has_feature(ptrauth_calls) */
5399
5400 #define FSID_MAX_STRING "0x1234567890abcdef,0x1234567890abcdef"
5401
5402 #define HEX_STR_LEN 18 // 64-bit hex value "0x0123456701234567"
5403
5404 static int
5405 exec_add_entropy_key(struct image_params *imgp,
5406 const char *key,
5407 int values,
5408 boolean_t embedNUL)
5409 {
5410 const int limit = 8;
5411 uint64_t entropy[limit];
5412 char str[strlen(key) + (HEX_STR_LEN + 1) * limit + 1];
5413 if (values > limit) {
5414 values = limit;
5415 }
5416
5417 read_random(entropy, sizeof(entropy[0]) * values);
5418
5419 if (embedNUL) {
5420 entropy[0] &= ~(0xffull << 8);
5421 }
5422
5423 int len = scnprintf(str, sizeof(str), "%s0x%llx", key, entropy[0]);
5424 size_t remaining = sizeof(str) - len;
5425 for (int i = 1; i < values && remaining > 0; ++i) {
5426 size_t start = sizeof(str) - remaining;
5427 len = scnprintf(&str[start], remaining, ",0x%llx", entropy[i]);
5428 remaining -= len;
5429 }
5430
5431 return exec_add_user_string(imgp, CAST_USER_ADDR_T(str), UIO_SYSSPACE, FALSE);
5432 }
5433
5434 /*
5435 * Build up the contents of the apple[] string vector
5436 */
5437 #if (DEVELOPMENT || DEBUG)
5438 extern uint64_t dyld_flags;
5439 #endif
5440
5441 #if __has_feature(ptrauth_calls)
5442 static inline bool
5443 is_arm64e_running_as_arm64(const struct image_params *imgp)
5444 {
5445 return (imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK) == CPU_SUBTYPE_ARM64E &&
5446 (imgp->ip_flags & IMGPF_NOJOP);
5447 }
5448 #endif /* __has_feature(ptrauth_calls) */
5449
5450 static int
5451 exec_add_apple_strings(struct image_params *imgp,
5452 const load_result_t *load_result)
5453 {
5454 int error;
5455 int img_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? 8 : 4;
5456
5457 /* exec_save_path stored the first string */
5458 imgp->ip_applec = 1;
5459
5460 /* adding the pfz string */
5461 {
5462 char pfz_string[strlen(PFZ_KEY) + HEX_STR_LEN + 1];
5463
5464 if (img_ptr_size == 8) {
5465 __assert_only size_t ret = snprintf(pfz_string, sizeof(pfz_string), PFZ_KEY "0x%llx", commpage_text64_location);
5466 assert(ret < sizeof(pfz_string));
5467 } else {
5468 snprintf(pfz_string, sizeof(pfz_string), PFZ_KEY "0x%x", commpage_text32_location);
5469 }
5470 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(pfz_string), UIO_SYSSPACE, FALSE);
5471 if (error) {
5472 printf("Failed to add the pfz string with error %d\n", error);
5473 goto bad;
5474 }
5475 imgp->ip_applec++;
5476 }
5477
5478 /* adding the NANO_ENGAGE_KEY key */
5479 if (imgp->ip_px_sa) {
5480 int proc_flags = (((struct _posix_spawnattr *) imgp->ip_px_sa)->psa_flags);
5481
5482 if ((proc_flags & _POSIX_SPAWN_NANO_ALLOCATOR) == _POSIX_SPAWN_NANO_ALLOCATOR) {
5483 const char *nano_string = NANO_ENGAGE_KEY;
5484 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(nano_string), UIO_SYSSPACE, FALSE);
5485 if (error) {
5486 goto bad;
5487 }
5488 imgp->ip_applec++;
5489 }
5490 }
5491
5492 /*
5493 * Supply libc with a collection of random values to use when
5494 * implementing -fstack-protector.
5495 *
5496 * (The first random string always contains an embedded NUL so that
5497 * __stack_chk_guard also protects against C string vulnerabilities)
5498 */
5499 error = exec_add_entropy_key(imgp, GUARD_KEY, GUARD_VALUES, TRUE);
5500 if (error) {
5501 goto bad;
5502 }
5503 imgp->ip_applec++;
5504
5505 /*
5506 * Supply libc with entropy for system malloc.
5507 */
5508 error = exec_add_entropy_key(imgp, ENTROPY_KEY, ENTROPY_VALUES, FALSE);
5509 if (error) {
5510 goto bad;
5511 }
5512 imgp->ip_applec++;
5513
5514 /*
5515 * Supply libpthread & libplatform with a random value to use for pointer
5516 * obfuscation.
5517 */
5518 error = exec_add_entropy_key(imgp, PTR_MUNGE_KEY, PTR_MUNGE_VALUES, FALSE);
5519 if (error) {
5520 goto bad;
5521 }
5522 imgp->ip_applec++;
5523
5524 /*
5525 * Add MAIN_STACK_KEY: Supplies the address and size of the main thread's
5526 * stack if it was allocated by the kernel.
5527 *
5528 * The guard page is not included in this stack size as libpthread
5529 * expects to add it back in after receiving this value.
5530 */
5531 if (load_result->unixproc) {
5532 char stack_string[strlen(MAIN_STACK_KEY) + (HEX_STR_LEN + 1) * MAIN_STACK_VALUES + 1];
5533 snprintf(stack_string, sizeof(stack_string),
5534 MAIN_STACK_KEY "0x%llx,0x%llx,0x%llx,0x%llx",
5535 (uint64_t)load_result->user_stack,
5536 (uint64_t)load_result->user_stack_size,
5537 (uint64_t)load_result->user_stack_alloc,
5538 (uint64_t)load_result->user_stack_alloc_size);
5539 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(stack_string), UIO_SYSSPACE, FALSE);
5540 if (error) {
5541 goto bad;
5542 }
5543 imgp->ip_applec++;
5544 }
5545
5546 if (imgp->ip_vattr) {
5547 uint64_t fsid = vnode_get_va_fsid(imgp->ip_vattr);
5548 uint64_t fsobjid = imgp->ip_vattr->va_fileid;
5549
5550 char fsid_string[strlen(FSID_KEY) + strlen(FSID_MAX_STRING) + 1];
5551 snprintf(fsid_string, sizeof(fsid_string),
5552 FSID_KEY "0x%llx,0x%llx", fsid, fsobjid);
5553 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(fsid_string), UIO_SYSSPACE, FALSE);
5554 if (error) {
5555 goto bad;
5556 }
5557 imgp->ip_applec++;
5558 }
5559
5560 if (imgp->ip_dyld_fsid || imgp->ip_dyld_fsobjid) {
5561 char fsid_string[strlen(DYLD_FSID_KEY) + strlen(FSID_MAX_STRING) + 1];
5562 snprintf(fsid_string, sizeof(fsid_string),
5563 DYLD_FSID_KEY "0x%llx,0x%llx", imgp->ip_dyld_fsid, imgp->ip_dyld_fsobjid);
5564 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(fsid_string), UIO_SYSSPACE, FALSE);
5565 if (error) {
5566 goto bad;
5567 }
5568 imgp->ip_applec++;
5569 }
5570
5571 uint8_t cdhash[SHA1_RESULTLEN];
5572 int cdhash_errror = ubc_cs_getcdhash(imgp->ip_vp, imgp->ip_arch_offset, cdhash);
5573 if (cdhash_errror == 0) {
5574 char hash_string[strlen(CDHASH_KEY) + 2 * SHA1_RESULTLEN + 1];
5575 strncpy(hash_string, CDHASH_KEY, sizeof(hash_string));
5576 char *p = hash_string + sizeof(CDHASH_KEY) - 1;
5577 for (int i = 0; i < SHA1_RESULTLEN; i++) {
5578 snprintf(p, 3, "%02x", (int) cdhash[i]);
5579 p += 2;
5580 }
5581 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(hash_string), UIO_SYSSPACE, FALSE);
5582 if (error) {
5583 goto bad;
5584 }
5585 imgp->ip_applec++;
5586
5587 /* hash together cd-hash and boot-session-uuid */
5588 uint8_t sha_digest[SHA256_DIGEST_LENGTH];
5589 SHA256_CTX sha_ctx;
5590 SHA256_Init(&sha_ctx);
5591 SHA256_Update(&sha_ctx, bootsessionuuid_string, sizeof(bootsessionuuid_string));
5592 SHA256_Update(&sha_ctx, cdhash, sizeof(cdhash));
5593 SHA256_Final(sha_digest, &sha_ctx);
5594 char app_boot_string[strlen(APP_BOOT_SESSION_KEY) + 2 * SHA1_RESULTLEN + 1];
5595 strncpy(app_boot_string, APP_BOOT_SESSION_KEY, sizeof(app_boot_string));
5596 char *s = app_boot_string + sizeof(APP_BOOT_SESSION_KEY) - 1;
5597 for (int i = 0; i < SHA1_RESULTLEN; i++) {
5598 snprintf(s, 3, "%02x", (int) sha_digest[i]);
5599 s += 2;
5600 }
5601 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(app_boot_string), UIO_SYSSPACE, FALSE);
5602 if (error) {
5603 goto bad;
5604 }
5605 imgp->ip_applec++;
5606 }
5607 #if (DEVELOPMENT || DEBUG)
5608 if (dyld_flags) {
5609 char dyld_flags_string[strlen(DYLD_FLAGS_KEY) + HEX_STR_LEN + 1];
5610 snprintf(dyld_flags_string, sizeof(dyld_flags_string), DYLD_FLAGS_KEY "0x%llx", dyld_flags);
5611 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(dyld_flags_string), UIO_SYSSPACE, FALSE);
5612 if (error) {
5613 goto bad;
5614 }
5615 imgp->ip_applec++;
5616 }
5617 #endif
5618 if (imgp->ip_subsystem_root_path) {
5619 size_t buffer_len = MAXPATHLEN + strlen(SUBSYSTEM_ROOT_PATH_KEY);
5620 char subsystem_root_path_string[buffer_len];
5621 int required_len = snprintf(subsystem_root_path_string, buffer_len, SUBSYSTEM_ROOT_PATH_KEY "%s", imgp->ip_subsystem_root_path);
5622
5623 if (((size_t)required_len >= buffer_len) || (required_len < 0)) {
5624 error = ENAMETOOLONG;
5625 goto bad;
5626 }
5627
5628 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(subsystem_root_path_string), UIO_SYSSPACE, FALSE);
5629 if (error) {
5630 goto bad;
5631 }
5632
5633 imgp->ip_applec++;
5634 }
5635 #if __has_feature(ptrauth_calls)
5636 if (is_arm64e_running_as_arm64(imgp)) {
5637 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(PTRAUTH_DISABLED_FLAG), UIO_SYSSPACE, FALSE);
5638 if (error) {
5639 goto bad;
5640 }
5641
5642 imgp->ip_applec++;
5643 }
5644 #endif /* __has_feature(ptrauth_calls) */
5645
5646
5647 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
5648 {
5649 char dyld_abi_string[strlen(DYLD_ARM64E_ABI_KEY) + 8];
5650 strlcpy(dyld_abi_string, DYLD_ARM64E_ABI_KEY, sizeof(dyld_abi_string));
5651 bool allowAll = bootarg_arm64e_preview_abi;
5652 strlcat(dyld_abi_string, (allowAll ? "all" : "os"), sizeof(dyld_abi_string));
5653 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(dyld_abi_string), UIO_SYSSPACE, FALSE);
5654 if (error) {
5655 goto bad;
5656 }
5657
5658 imgp->ip_applec++;
5659 }
5660 #endif
5661
5662 /* Align the tail of the combined applev area */
5663 while (imgp->ip_strspace % img_ptr_size != 0) {
5664 *imgp->ip_strendp++ = '\0';
5665 imgp->ip_strspace--;
5666 }
5667
5668 bad:
5669 return error;
5670 }
5671
5672 /*
5673 * exec_check_permissions
5674 *
5675 * Description: Verify that the file that is being attempted to be executed
5676 * is in fact allowed to be executed based on it POSIX file
5677 * permissions and other access control criteria
5678 *
5679 * Parameters: struct image_params * the image parameter block
5680 *
5681 * Returns: 0 Success
5682 * EACCES Permission denied
5683 * ENOEXEC Executable file format error
5684 * ETXTBSY Text file busy [misuse of error code]
5685 * vnode_getattr:???
5686 * vnode_authorize:???
5687 */
5688 static int
5689 exec_check_permissions(struct image_params *imgp)
5690 {
5691 struct vnode *vp = imgp->ip_vp;
5692 struct vnode_attr *vap = imgp->ip_vattr;
5693 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
5694 int error;
5695 kauth_action_t action;
5696
5697 /* Only allow execution of regular files */
5698 if (!vnode_isreg(vp)) {
5699 return EACCES;
5700 }
5701
5702 /* Get the file attributes that we will be using here and elsewhere */
5703 VATTR_INIT(vap);
5704 VATTR_WANTED(vap, va_uid);
5705 VATTR_WANTED(vap, va_gid);
5706 VATTR_WANTED(vap, va_mode);
5707 VATTR_WANTED(vap, va_fsid);
5708 VATTR_WANTED(vap, va_fsid64);
5709 VATTR_WANTED(vap, va_fileid);
5710 VATTR_WANTED(vap, va_data_size);
5711 if ((error = vnode_getattr(vp, vap, imgp->ip_vfs_context)) != 0) {
5712 return error;
5713 }
5714
5715 /*
5716 * Ensure that at least one execute bit is on - otherwise root
5717 * will always succeed, and we don't want to happen unless the
5718 * file really is executable.
5719 */
5720 if (!vfs_authopaque(vnode_mount(vp)) && ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)) {
5721 return EACCES;
5722 }
5723
5724 /* Disallow zero length files */
5725 if (vap->va_data_size == 0) {
5726 return ENOEXEC;
5727 }
5728
5729 imgp->ip_arch_offset = (user_size_t)0;
5730 #if __LP64__
5731 imgp->ip_arch_size = vap->va_data_size;
5732 #else
5733 if (vap->va_data_size > UINT32_MAX) {
5734 return ENOEXEC;
5735 }
5736 imgp->ip_arch_size = (user_size_t)vap->va_data_size;
5737 #endif
5738
5739 /* Disable setuid-ness for traced programs or if MNT_NOSUID */
5740 if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_lflag & P_LTRACED)) {
5741 vap->va_mode &= ~(VSUID | VSGID);
5742 }
5743
5744 /*
5745 * Disable _POSIX_SPAWN_ALLOW_DATA_EXEC and _POSIX_SPAWN_DISABLE_ASLR
5746 * flags for setuid/setgid binaries.
5747 */
5748 if (vap->va_mode & (VSUID | VSGID)) {
5749 imgp->ip_flags &= ~(IMGPF_ALLOW_DATA_EXEC | IMGPF_DISABLE_ASLR);
5750 }
5751
5752 #if CONFIG_MACF
5753 error = mac_vnode_check_exec(imgp->ip_vfs_context, vp, imgp);
5754 if (error) {
5755 return error;
5756 }
5757 #endif
5758
5759 /* Check for execute permission */
5760 action = KAUTH_VNODE_EXECUTE;
5761 /* Traced images must also be readable */
5762 if (p->p_lflag & P_LTRACED) {
5763 action |= KAUTH_VNODE_READ_DATA;
5764 }
5765 if ((error = vnode_authorize(vp, NULL, action, imgp->ip_vfs_context)) != 0) {
5766 return error;
5767 }
5768
5769 #if 0
5770 /* Don't let it run if anyone had it open for writing */
5771 vnode_lock(vp);
5772 if (vp->v_writecount) {
5773 panic("going to return ETXTBSY %x", vp);
5774 vnode_unlock(vp);
5775 return ETXTBSY;
5776 }
5777 vnode_unlock(vp);
5778 #endif
5779
5780 /* XXX May want to indicate to underlying FS that vnode is open */
5781
5782 return error;
5783 }
5784
5785
5786 /*
5787 * exec_handle_sugid
5788 *
5789 * Initially clear the P_SUGID in the process flags; if an SUGID process is
5790 * exec'ing a non-SUGID image, then this is the point of no return.
5791 *
5792 * If the image being activated is SUGID, then replace the credential with a
5793 * copy, disable tracing (unless the tracing process is root), reset the
5794 * mach task port to revoke it, set the P_SUGID bit,
5795 *
5796 * If the saved user and group ID will be changing, then make sure it happens
5797 * to a new credential, rather than a shared one.
5798 *
5799 * Set the security token (this is probably obsolete, given that the token
5800 * should not technically be separate from the credential itself).
5801 *
5802 * Parameters: struct image_params * the image parameter block
5803 *
5804 * Returns: void No failure indication
5805 *
5806 * Implicit returns:
5807 * <process credential> Potentially modified/replaced
5808 * <task port> Potentially revoked
5809 * <process flags> P_SUGID bit potentially modified
5810 * <security token> Potentially modified
5811 */
5812 __attribute__((noinline))
5813 static int
5814 exec_handle_sugid(struct image_params *imgp)
5815 {
5816 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
5817 kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context);
5818 int i;
5819 int leave_sugid_clear = 0;
5820 int mac_reset_ipc = 0;
5821 int error = 0;
5822 task_t task = NULL;
5823 #if CONFIG_MACF
5824 int mac_transition, disjoint_cred = 0;
5825 int label_update_return = 0;
5826
5827 /*
5828 * Determine whether a call to update the MAC label will result in the
5829 * credential changing.
5830 *
5831 * Note: MAC policies which do not actually end up modifying
5832 * the label subsequently are strongly encouraged to
5833 * return 0 for this check, since a non-zero answer will
5834 * slow down the exec fast path for normal binaries.
5835 */
5836 mac_transition = mac_cred_check_label_update_execve(
5837 imgp->ip_vfs_context,
5838 imgp->ip_vp,
5839 imgp->ip_arch_offset,
5840 imgp->ip_scriptvp,
5841 imgp->ip_scriptlabelp,
5842 imgp->ip_execlabelp,
5843 p,
5844 imgp->ip_px_smpx);
5845 #endif
5846
5847 OSBitAndAtomic(~((uint32_t)P_SUGID), &p->p_flag);
5848
5849 /*
5850 * Order of the following is important; group checks must go last,
5851 * as we use the success of the 'ismember' check combined with the
5852 * failure of the explicit match to indicate that we will be setting
5853 * the egid of the process even though the new process did not
5854 * require VSUID/VSGID bits in order for it to set the new group as
5855 * its egid.
5856 *
5857 * Note: Technically, by this we are implying a call to
5858 * setegid() in the new process, rather than implying
5859 * it used its VSGID bit to set the effective group,
5860 * even though there is no code in that process to make
5861 * such a call.
5862 */
5863 if (((imgp->ip_origvattr->va_mode & VSUID) != 0 &&
5864 kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) ||
5865 ((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
5866 ((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &leave_sugid_clear) || !leave_sugid_clear) ||
5867 (kauth_cred_getgid(cred) != imgp->ip_origvattr->va_gid))) ||
5868 (imgp->ip_sc_port != NULL)) {
5869 #if CONFIG_MACF
5870 /* label for MAC transition and neither VSUID nor VSGID */
5871 handle_mac_transition:
5872 #endif
5873
5874 #if CONFIG_SETUID
5875 /*
5876 * Replace the credential with a copy of itself if euid or
5877 * egid change.
5878 *
5879 * Note: setuid binaries will automatically opt out of
5880 * group resolver participation as a side effect
5881 * of this operation. This is an intentional
5882 * part of the security model, which requires a
5883 * participating credential be established by
5884 * escalating privilege, setting up all other
5885 * aspects of the credential including whether
5886 * or not to participate in external group
5887 * membership resolution, then dropping their
5888 * effective privilege to that of the desired
5889 * final credential state.
5890 *
5891 * Modifications to p_ucred must be guarded using the
5892 * proc's ucred lock. This prevents others from accessing
5893 * a garbage credential.
5894 */
5895
5896 if (imgp->ip_sc_port != NULL) {
5897 extern int suid_cred_verify(ipc_port_t, vnode_t, uint32_t *);
5898 int ret = -1;
5899 uid_t uid = UINT32_MAX;
5900
5901 /*
5902 * Check that the vnodes match. If a script is being
5903 * executed check the script's vnode rather than the
5904 * interpreter's.
5905 */
5906 struct vnode *vp = imgp->ip_scriptvp != NULL ? imgp->ip_scriptvp : imgp->ip_vp;
5907
5908 ret = suid_cred_verify(imgp->ip_sc_port, vp, &uid);
5909 if (ret == 0) {
5910 apply_kauth_cred_update(p, ^kauth_cred_t (kauth_cred_t my_cred) {
5911 return kauth_cred_setresuid(my_cred,
5912 KAUTH_UID_NONE,
5913 uid,
5914 uid,
5915 KAUTH_UID_NONE);
5916 });
5917 } else {
5918 error = EPERM;
5919 }
5920 }
5921
5922 if (imgp->ip_origvattr->va_mode & VSUID) {
5923 apply_kauth_cred_update(p, ^kauth_cred_t (kauth_cred_t my_cred) {
5924 return kauth_cred_setresuid(my_cred,
5925 KAUTH_UID_NONE,
5926 imgp->ip_origvattr->va_uid,
5927 imgp->ip_origvattr->va_uid,
5928 KAUTH_UID_NONE);
5929 });
5930 }
5931
5932 if (imgp->ip_origvattr->va_mode & VSGID) {
5933 apply_kauth_cred_update(p, ^kauth_cred_t (kauth_cred_t my_cred) {
5934 return kauth_cred_setresgid(my_cred,
5935 KAUTH_GID_NONE,
5936 imgp->ip_origvattr->va_gid,
5937 imgp->ip_origvattr->va_gid);
5938 });
5939 }
5940 #endif /* CONFIG_SETUID */
5941
5942 #if CONFIG_MACF
5943 /*
5944 * If a policy has indicated that it will transition the label,
5945 * before making the call into the MAC policies, get a new
5946 * duplicate credential, so they can modify it without
5947 * modifying any others sharing it.
5948 */
5949 if (mac_transition) {
5950 /*
5951 * This hook may generate upcalls that require
5952 * importance donation from the kernel.
5953 * (23925818)
5954 */
5955 thread_t thread = current_thread();
5956 thread_enable_send_importance(thread, TRUE);
5957 kauth_proc_label_update_execve(p,
5958 imgp->ip_vfs_context,
5959 imgp->ip_vp,
5960 imgp->ip_arch_offset,
5961 imgp->ip_scriptvp,
5962 imgp->ip_scriptlabelp,
5963 imgp->ip_execlabelp,
5964 &imgp->ip_csflags,
5965 imgp->ip_px_smpx,
5966 &disjoint_cred, /* will be non zero if disjoint */
5967 &label_update_return);
5968 thread_enable_send_importance(thread, FALSE);
5969
5970 if (disjoint_cred) {
5971 /*
5972 * If updating the MAC label resulted in a
5973 * disjoint credential, flag that we need to
5974 * set the P_SUGID bit. This protects
5975 * against debuggers being attached by an
5976 * insufficiently privileged process onto the
5977 * result of a transition to a more privileged
5978 * credential.
5979 */
5980 leave_sugid_clear = 0;
5981 }
5982
5983 imgp->ip_mac_return = label_update_return;
5984 }
5985
5986 mac_reset_ipc = mac_proc_check_inherit_ipc_ports(p, p->p_textvp, p->p_textoff, imgp->ip_vp, imgp->ip_arch_offset, imgp->ip_scriptvp);
5987
5988 #endif /* CONFIG_MACF */
5989
5990 /*
5991 * If 'leave_sugid_clear' is non-zero, then we passed the
5992 * VSUID and MACF checks, and successfully determined that
5993 * the previous cred was a member of the VSGID group, but
5994 * that it was not the default at the time of the execve,
5995 * and that the post-labelling credential was not disjoint.
5996 * So we don't set the P_SUGID or reset mach ports and fds
5997 * on the basis of simply running this code.
5998 */
5999 if (mac_reset_ipc || !leave_sugid_clear) {
6000 /*
6001 * Have mach reset the task and thread ports.
6002 * We don't want anyone who had the ports before
6003 * a setuid exec to be able to access/control the
6004 * task/thread after.
6005 */
6006 ipc_task_reset((imgp->ip_new_thread != NULL) ?
6007 get_threadtask(imgp->ip_new_thread) : p->task);
6008 ipc_thread_reset((imgp->ip_new_thread != NULL) ?
6009 imgp->ip_new_thread : current_thread());
6010 }
6011
6012 if (!leave_sugid_clear) {
6013 /*
6014 * Flag the process as setuid.
6015 */
6016 OSBitOrAtomic(P_SUGID, &p->p_flag);
6017
6018 /*
6019 * Radar 2261856; setuid security hole fix
6020 * XXX For setuid processes, attempt to ensure that
6021 * stdin, stdout, and stderr are already allocated.
6022 * We do not want userland to accidentally allocate
6023 * descriptors in this range which has implied meaning
6024 * to libc.
6025 */
6026 for (i = 0; i < 3; i++) {
6027 if (fp_get_noref_locked(p, i) != NULL) {
6028 continue;
6029 }
6030
6031 /*
6032 * Do the kernel equivalent of
6033 *
6034 * if i == 0
6035 * (void) open("/dev/null", O_RDONLY);
6036 * else
6037 * (void) open("/dev/null", O_WRONLY);
6038 */
6039
6040 struct fileproc *fp;
6041 int indx;
6042 int flag;
6043 struct nameidata *ndp = NULL;
6044
6045 if (i == 0) {
6046 flag = FREAD;
6047 } else {
6048 flag = FWRITE;
6049 }
6050
6051 if ((error = falloc(p,
6052 &fp, &indx, imgp->ip_vfs_context)) != 0) {
6053 continue;
6054 }
6055
6056 MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
6057 if (ndp == NULL) {
6058 fp_free(p, indx, fp);
6059 error = ENOMEM;
6060 break;
6061 }
6062
6063 NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE,
6064 CAST_USER_ADDR_T("/dev/null"),
6065 imgp->ip_vfs_context);
6066
6067 if ((error = vn_open(ndp, flag, 0)) != 0) {
6068 fp_free(p, indx, fp);
6069 FREE(ndp, M_TEMP);
6070 break;
6071 }
6072
6073 struct fileglob *fg = fp->fp_glob;
6074
6075 fg->fg_flag = flag;
6076 fg->fg_ops = &vnops;
6077 fg->fg_data = ndp->ni_vp;
6078
6079 vnode_put(ndp->ni_vp);
6080
6081 proc_fdlock(p);
6082 procfdtbl_releasefd(p, indx, NULL);
6083 fp_drop(p, indx, fp, 1);
6084 proc_fdunlock(p);
6085
6086 FREE(ndp, M_TEMP);
6087 }
6088 }
6089 }
6090 #if CONFIG_MACF
6091 else {
6092 /*
6093 * We are here because we were told that the MAC label will
6094 * be transitioned, and the binary is not VSUID or VSGID; to
6095 * deal with this case, we could either duplicate a lot of
6096 * code, or we can indicate we want to default the P_SUGID
6097 * bit clear and jump back up.
6098 */
6099 if (mac_transition) {
6100 leave_sugid_clear = 1;
6101 goto handle_mac_transition;
6102 }
6103 }
6104
6105 #endif /* CONFIG_MACF */
6106
6107 /*
6108 * Implement the semantic where the effective user and group become
6109 * the saved user and group in exec'ed programs.
6110 *
6111 * Modifications to p_ucred must be guarded using the
6112 * proc's ucred lock. This prevents others from accessing
6113 * a garbage credential.
6114 */
6115 apply_kauth_cred_update(p, ^kauth_cred_t (kauth_cred_t my_cred) {
6116 return kauth_cred_setsvuidgid(my_cred,
6117 kauth_cred_getuid(my_cred),
6118 kauth_cred_getgid(my_cred));
6119 });
6120
6121 /* Update the process' identity version and set the security token */
6122 p->p_idversion = OSIncrementAtomic(&nextpidversion);
6123
6124 if (imgp->ip_new_thread != NULL) {
6125 task = get_threadtask(imgp->ip_new_thread);
6126 } else {
6127 task = p->task;
6128 }
6129 set_security_token_task_internal(p, task);
6130
6131 return error;
6132 }
6133
6134
6135 /*
6136 * create_unix_stack
6137 *
6138 * Description: Set the user stack address for the process to the provided
6139 * address. If a custom stack was not set as a result of the
6140 * load process (i.e. as specified by the image file for the
6141 * executable), then allocate the stack in the provided map and
6142 * set up appropriate guard pages for enforcing administrative
6143 * limits on stack growth, if they end up being needed.
6144 *
6145 * Parameters: p Process to set stack on
6146 * load_result Information from mach-o load commands
6147 * map Address map in which to allocate the new stack
6148 *
6149 * Returns: KERN_SUCCESS Stack successfully created
6150 * !KERN_SUCCESS Mach failure code
6151 */
6152 __attribute__((noinline))
6153 static kern_return_t
6154 create_unix_stack(vm_map_t map, load_result_t* load_result,
6155 proc_t p)
6156 {
6157 mach_vm_size_t size, prot_size;
6158 mach_vm_offset_t addr, prot_addr;
6159 kern_return_t kr;
6160
6161 mach_vm_address_t user_stack = load_result->user_stack;
6162
6163 proc_lock(p);
6164 p->user_stack = (uintptr_t)user_stack;
6165 if (load_result->custom_stack) {
6166 p->p_lflag |= P_LCUSTOM_STACK;
6167 }
6168 proc_unlock(p);
6169 if (vm_map_page_shift(map) < (int)PAGE_SHIFT) {
6170 DEBUG4K_LOAD("map %p user_stack 0x%llx custom %d user_stack_alloc_size 0x%llx\n", map, user_stack, load_result->custom_stack, load_result->user_stack_alloc_size);
6171 }
6172
6173 if (load_result->user_stack_alloc_size > 0) {
6174 /*
6175 * Allocate enough space for the maximum stack size we
6176 * will ever authorize and an extra page to act as
6177 * a guard page for stack overflows. For default stacks,
6178 * vm_initial_limit_stack takes care of the extra guard page.
6179 * Otherwise we must allocate it ourselves.
6180 */
6181 if (mach_vm_round_page_overflow(load_result->user_stack_alloc_size, &size)) {
6182 return KERN_INVALID_ARGUMENT;
6183 }
6184 addr = vm_map_trunc_page(load_result->user_stack - size,
6185 vm_map_page_mask(map));
6186 kr = mach_vm_allocate_kernel(map, &addr, size,
6187 VM_FLAGS_FIXED, VM_MEMORY_STACK);
6188 if (kr != KERN_SUCCESS) {
6189 // Can't allocate at default location, try anywhere
6190 addr = 0;
6191 kr = mach_vm_allocate_kernel(map, &addr, size,
6192 VM_FLAGS_ANYWHERE, VM_MEMORY_STACK);
6193 if (kr != KERN_SUCCESS) {
6194 return kr;
6195 }
6196
6197 user_stack = addr + size;
6198 load_result->user_stack = (user_addr_t)user_stack;
6199
6200 proc_lock(p);
6201 p->user_stack = (uintptr_t)user_stack;
6202 proc_unlock(p);
6203 }
6204
6205 load_result->user_stack_alloc = (user_addr_t)addr;
6206
6207 /*
6208 * And prevent access to what's above the current stack
6209 * size limit for this process.
6210 */
6211 if (load_result->user_stack_size == 0) {
6212 load_result->user_stack_size = proc_limitgetcur(p, RLIMIT_STACK, TRUE);
6213 prot_size = vm_map_trunc_page(size - load_result->user_stack_size, vm_map_page_mask(map));
6214 } else {
6215 prot_size = PAGE_SIZE;
6216 }
6217
6218 prot_addr = addr;
6219 kr = mach_vm_protect(map,
6220 prot_addr,
6221 prot_size,
6222 FALSE,
6223 VM_PROT_NONE);
6224 if (kr != KERN_SUCCESS) {
6225 (void)mach_vm_deallocate(map, addr, size);
6226 return kr;
6227 }
6228 }
6229
6230 return KERN_SUCCESS;
6231 }
6232
6233 #include <sys/reboot.h>
6234
6235 /*
6236 * load_init_program_at_path
6237 *
6238 * Description: Load the "init" program; in most cases, this will be "launchd"
6239 *
6240 * Parameters: p Process to call execve() to create
6241 * the "init" program
6242 * scratch_addr Page in p, scratch space
6243 * path NULL terminated path
6244 *
6245 * Returns: KERN_SUCCESS Success
6246 * !KERN_SUCCESS See execve/mac_execve for error codes
6247 *
6248 * Notes: The process that is passed in is the first manufactured
6249 * process on the system, and gets here via bsd_ast() firing
6250 * for the first time. This is done to ensure that bsd_init()
6251 * has run to completion.
6252 *
6253 * The address map of the first manufactured process matches the
6254 * word width of the kernel. Once the self-exec completes, the
6255 * initproc might be different.
6256 */
6257 static int
6258 load_init_program_at_path(proc_t p, user_addr_t scratch_addr, const char* path)
6259 {
6260 int retval[2];
6261 int error;
6262 struct execve_args init_exec_args;
6263 user_addr_t argv0 = USER_ADDR_NULL, argv1 = USER_ADDR_NULL;
6264
6265 /*
6266 * Validate inputs and pre-conditions
6267 */
6268 assert(p);
6269 assert(scratch_addr);
6270 assert(path);
6271
6272 /*
6273 * Copy out program name.
6274 */
6275 size_t path_length = strlen(path) + 1;
6276 argv0 = scratch_addr;
6277 error = copyout(path, argv0, path_length);
6278 if (error) {
6279 return error;
6280 }
6281
6282 scratch_addr = USER_ADDR_ALIGN(scratch_addr + path_length, sizeof(user_addr_t));
6283
6284 /*
6285 * Put out first (and only) argument, similarly.
6286 * Assumes everything fits in a page as allocated above.
6287 */
6288 if (boothowto & RB_SINGLE) {
6289 const char *init_args = "-s";
6290 size_t init_args_length = strlen(init_args) + 1;
6291
6292 argv1 = scratch_addr;
6293 error = copyout(init_args, argv1, init_args_length);
6294 if (error) {
6295 return error;
6296 }
6297
6298 scratch_addr = USER_ADDR_ALIGN(scratch_addr + init_args_length, sizeof(user_addr_t));
6299 }
6300
6301 if (proc_is64bit(p)) {
6302 user64_addr_t argv64bit[3] = {};
6303
6304 argv64bit[0] = argv0;
6305 argv64bit[1] = argv1;
6306 argv64bit[2] = USER_ADDR_NULL;
6307
6308 error = copyout(argv64bit, scratch_addr, sizeof(argv64bit));
6309 if (error) {
6310 return error;
6311 }
6312 } else {
6313 user32_addr_t argv32bit[3] = {};
6314
6315 argv32bit[0] = (user32_addr_t)argv0;
6316 argv32bit[1] = (user32_addr_t)argv1;
6317 argv32bit[2] = USER_ADDR_NULL;
6318
6319 error = copyout(argv32bit, scratch_addr, sizeof(argv32bit));
6320 if (error) {
6321 return error;
6322 }
6323 }
6324
6325 /*
6326 * Set up argument block for fake call to execve.
6327 */
6328 init_exec_args.fname = argv0;
6329 init_exec_args.argp = scratch_addr;
6330 init_exec_args.envp = USER_ADDR_NULL;
6331
6332 /*
6333 * So that init task is set with uid,gid 0 token
6334 */
6335 set_security_token(p);
6336
6337 return execve(p, &init_exec_args, retval);
6338 }
6339
6340 static const char * init_programs[] = {
6341 #if DEBUG
6342 "/usr/appleinternal/sbin/launchd.debug",
6343 #endif
6344 #if DEVELOPMENT || DEBUG
6345 "/usr/appleinternal/sbin/launchd.development",
6346 #endif
6347 "/sbin/launchd",
6348 };
6349
6350 /*
6351 * load_init_program
6352 *
6353 * Description: Load the "init" program; in most cases, this will be "launchd"
6354 *
6355 * Parameters: p Process to call execve() to create
6356 * the "init" program
6357 *
6358 * Returns: (void)
6359 *
6360 * Notes: The process that is passed in is the first manufactured
6361 * process on the system, and gets here via bsd_ast() firing
6362 * for the first time. This is done to ensure that bsd_init()
6363 * has run to completion.
6364 *
6365 * In DEBUG & DEVELOPMENT builds, the launchdsuffix boot-arg
6366 * may be used to select a specific launchd executable. As with
6367 * the kcsuffix boot-arg, setting launchdsuffix to "" or "release"
6368 * will force /sbin/launchd to be selected.
6369 *
6370 * Search order by build:
6371 *
6372 * DEBUG DEVELOPMENT RELEASE PATH
6373 * ----------------------------------------------------------------------------------
6374 * 1 1 NA /usr/appleinternal/sbin/launchd.$LAUNCHDSUFFIX
6375 * 2 NA NA /usr/appleinternal/sbin/launchd.debug
6376 * 3 2 NA /usr/appleinternal/sbin/launchd.development
6377 * 4 3 1 /sbin/launchd
6378 */
6379 void
6380 load_init_program(proc_t p)
6381 {
6382 uint32_t i;
6383 int error;
6384 vm_map_t map = current_map();
6385 mach_vm_offset_t scratch_addr = 0;
6386 mach_vm_size_t map_page_size = vm_map_page_size(map);
6387
6388 (void) mach_vm_allocate_kernel(map, &scratch_addr, map_page_size, VM_FLAGS_ANYWHERE, VM_KERN_MEMORY_NONE);
6389 #if CONFIG_MEMORYSTATUS
6390 (void) memorystatus_init_at_boot_snapshot();
6391 #endif /* CONFIG_MEMORYSTATUS */
6392
6393 #if __has_feature(ptrauth_calls)
6394 PE_parse_boot_argn("vm_shared_region_per_team_id", &vm_shared_region_per_team_id, sizeof(vm_shared_region_per_team_id));
6395 PE_parse_boot_argn("vm_shared_region_by_entitlement", &vm_shared_region_by_entitlement, sizeof(vm_shared_region_by_entitlement));
6396 PE_parse_boot_argn("vm_shared_region_reslide_aslr", &vm_shared_region_reslide_aslr, sizeof(vm_shared_region_reslide_aslr));
6397 PE_parse_boot_argn("vm_shared_region_reslide_restrict", &vm_shared_region_reslide_restrict, sizeof(vm_shared_region_reslide_restrict));
6398 #endif /* __has_feature(ptrauth_calls) */
6399
6400 #if DEBUG || DEVELOPMENT
6401 #if XNU_TARGET_OS_OSX
6402 PE_parse_boot_argn("unentitled_ios_sim_launch", &unentitled_ios_sim_launch, sizeof(unentitled_ios_sim_launch));
6403 #endif /* XNU_TARGET_OS_OSX */
6404
6405 /* Check for boot-arg suffix first */
6406 char launchd_suffix[64];
6407 if (PE_parse_boot_argn("launchdsuffix", launchd_suffix, sizeof(launchd_suffix))) {
6408 char launchd_path[128];
6409 boolean_t is_release_suffix = ((launchd_suffix[0] == 0) ||
6410 (strcmp(launchd_suffix, "release") == 0));
6411
6412 if (is_release_suffix) {
6413 printf("load_init_program: attempting to load /sbin/launchd\n");
6414 error = load_init_program_at_path(p, (user_addr_t)scratch_addr, "/sbin/launchd");
6415 if (!error) {
6416 return;
6417 }
6418
6419 panic("Process 1 exec of launchd.release failed, errno %d", error);
6420 } else {
6421 strlcpy(launchd_path, "/usr/appleinternal/sbin/launchd.", sizeof(launchd_path));
6422 strlcat(launchd_path, launchd_suffix, sizeof(launchd_path));
6423
6424 printf("load_init_program: attempting to load %s\n", launchd_path);
6425 error = load_init_program_at_path(p, (user_addr_t)scratch_addr, launchd_path);
6426 if (!error) {
6427 return;
6428 } else if (error != ENOENT) {
6429 printf("load_init_program: failed loading %s: errno %d\n", launchd_path, error);
6430 }
6431 }
6432 }
6433 #endif
6434
6435 error = ENOENT;
6436 for (i = 0; i < sizeof(init_programs) / sizeof(init_programs[0]); i++) {
6437 printf("load_init_program: attempting to load %s\n", init_programs[i]);
6438 error = load_init_program_at_path(p, (user_addr_t)scratch_addr, init_programs[i]);
6439 if (!error) {
6440 return;
6441 } else if (error != ENOENT) {
6442 printf("load_init_program: failed loading %s: errno %d\n", init_programs[i], error);
6443 }
6444 }
6445
6446 panic("Process 1 exec of %s failed, errno %d", ((i == 0) ? "<null>" : init_programs[i - 1]), error);
6447 }
6448
6449 /*
6450 * load_return_to_errno
6451 *
6452 * Description: Convert a load_return_t (Mach error) to an errno (BSD error)
6453 *
6454 * Parameters: lrtn Mach error number
6455 *
6456 * Returns: (int) BSD error number
6457 * 0 Success
6458 * EBADARCH Bad architecture
6459 * EBADMACHO Bad Mach object file
6460 * ESHLIBVERS Bad shared library version
6461 * ENOMEM Out of memory/resource shortage
6462 * EACCES Access denied
6463 * ENOENT Entry not found (usually "file does
6464 * does not exist")
6465 * EIO An I/O error occurred
6466 * EBADEXEC The executable is corrupt/unknown
6467 */
6468 static int
6469 load_return_to_errno(load_return_t lrtn)
6470 {
6471 switch (lrtn) {
6472 case LOAD_SUCCESS:
6473 return 0;
6474 case LOAD_BADARCH:
6475 return EBADARCH;
6476 case LOAD_BADMACHO:
6477 case LOAD_BADMACHO_UPX:
6478 return EBADMACHO;
6479 case LOAD_SHLIB:
6480 return ESHLIBVERS;
6481 case LOAD_NOSPACE:
6482 case LOAD_RESOURCE:
6483 return ENOMEM;
6484 case LOAD_PROTECT:
6485 return EACCES;
6486 case LOAD_ENOENT:
6487 return ENOENT;
6488 case LOAD_IOERROR:
6489 return EIO;
6490 case LOAD_DECRYPTFAIL:
6491 return EAUTH;
6492 case LOAD_FAILURE:
6493 default:
6494 return EBADEXEC;
6495 }
6496 }
6497
6498 #include <mach/mach_types.h>
6499 #include <mach/vm_prot.h>
6500 #include <mach/semaphore.h>
6501 #include <mach/sync_policy.h>
6502 #include <kern/clock.h>
6503 #include <mach/kern_return.h>
6504
6505 /*
6506 * execargs_alloc
6507 *
6508 * Description: Allocate the block of memory used by the execve arguments.
6509 * At the same time, we allocate a page so that we can read in
6510 * the first page of the image.
6511 *
6512 * Parameters: struct image_params * the image parameter block
6513 *
6514 * Returns: 0 Success
6515 * EINVAL Invalid argument
6516 * EACCES Permission denied
6517 * EINTR Interrupted function
6518 * ENOMEM Not enough space
6519 *
6520 * Notes: This is a temporary allocation into the kernel address space
6521 * to enable us to copy arguments in from user space. This is
6522 * necessitated by not mapping the process calling execve() into
6523 * the kernel address space during the execve() system call.
6524 *
6525 * We assemble the argument and environment, etc., into this
6526 * region before copying it as a single block into the child
6527 * process address space (at the top or bottom of the stack,
6528 * depending on which way the stack grows; see the function
6529 * exec_copyout_strings() for details).
6530 *
6531 * This ends up with a second (possibly unnecessary) copy compared
6532 * with assembing the data directly into the child address space,
6533 * instead, but since we cannot be guaranteed that the parent has
6534 * not modified its environment, we can't really know that it's
6535 * really a block there as well.
6536 */
6537
6538
6539 static int execargs_waiters = 0;
6540 lck_mtx_t *execargs_cache_lock;
6541
6542 static void
6543 execargs_lock_lock(void)
6544 {
6545 lck_mtx_lock_spin(execargs_cache_lock);
6546 }
6547
6548 static void
6549 execargs_lock_unlock(void)
6550 {
6551 lck_mtx_unlock(execargs_cache_lock);
6552 }
6553
6554 static wait_result_t
6555 execargs_lock_sleep(void)
6556 {
6557 return lck_mtx_sleep(execargs_cache_lock, LCK_SLEEP_DEFAULT, &execargs_free_count, THREAD_INTERRUPTIBLE);
6558 }
6559
6560 static kern_return_t
6561 execargs_purgeable_allocate(char **execarg_address)
6562 {
6563 kern_return_t kr = vm_allocate_kernel(bsd_pageable_map, (vm_offset_t *)execarg_address, BSD_PAGEABLE_SIZE_PER_EXEC, VM_FLAGS_ANYWHERE | VM_FLAGS_PURGABLE, VM_KERN_MEMORY_NONE);
6564 assert(kr == KERN_SUCCESS);
6565 return kr;
6566 }
6567
6568 static kern_return_t
6569 execargs_purgeable_reference(void *execarg_address)
6570 {
6571 int state = VM_PURGABLE_NONVOLATILE;
6572 kern_return_t kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
6573
6574 assert(kr == KERN_SUCCESS);
6575 return kr;
6576 }
6577
6578 static kern_return_t
6579 execargs_purgeable_volatilize(void *execarg_address)
6580 {
6581 int state = VM_PURGABLE_VOLATILE | VM_PURGABLE_ORDERING_OBSOLETE;
6582 kern_return_t kr;
6583 kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
6584
6585 assert(kr == KERN_SUCCESS);
6586
6587 return kr;
6588 }
6589
6590 static void
6591 execargs_wakeup_waiters(void)
6592 {
6593 thread_wakeup(&execargs_free_count);
6594 }
6595
6596 static int
6597 execargs_alloc(struct image_params *imgp)
6598 {
6599 kern_return_t kret;
6600 wait_result_t res;
6601 int i, cache_index = -1;
6602
6603 execargs_lock_lock();
6604
6605 while (execargs_free_count == 0) {
6606 execargs_waiters++;
6607 res = execargs_lock_sleep();
6608 execargs_waiters--;
6609 if (res != THREAD_AWAKENED) {
6610 execargs_lock_unlock();
6611 return EINTR;
6612 }
6613 }
6614
6615 execargs_free_count--;
6616
6617 for (i = 0; i < execargs_cache_size; i++) {
6618 vm_offset_t element = execargs_cache[i];
6619 if (element) {
6620 cache_index = i;
6621 imgp->ip_strings = (char *)(execargs_cache[i]);
6622 execargs_cache[i] = 0;
6623 break;
6624 }
6625 }
6626
6627 assert(execargs_free_count >= 0);
6628
6629 execargs_lock_unlock();
6630
6631 if (cache_index == -1) {
6632 kret = execargs_purgeable_allocate(&imgp->ip_strings);
6633 } else {
6634 kret = execargs_purgeable_reference(imgp->ip_strings);
6635 }
6636
6637 assert(kret == KERN_SUCCESS);
6638 if (kret != KERN_SUCCESS) {
6639 return ENOMEM;
6640 }
6641
6642 /* last page used to read in file headers */
6643 imgp->ip_vdata = imgp->ip_strings + (NCARGS + PAGE_SIZE);
6644 imgp->ip_strendp = imgp->ip_strings;
6645 imgp->ip_argspace = NCARGS;
6646 imgp->ip_strspace = (NCARGS + PAGE_SIZE);
6647
6648 return 0;
6649 }
6650
6651 /*
6652 * execargs_free
6653 *
6654 * Description: Free the block of memory used by the execve arguments and the
6655 * first page of the executable by a previous call to the function
6656 * execargs_alloc().
6657 *
6658 * Parameters: struct image_params * the image parameter block
6659 *
6660 * Returns: 0 Success
6661 * EINVAL Invalid argument
6662 * EINTR Oeration interrupted
6663 */
6664 static int
6665 execargs_free(struct image_params *imgp)
6666 {
6667 kern_return_t kret;
6668 int i;
6669 boolean_t needs_wakeup = FALSE;
6670
6671 kret = execargs_purgeable_volatilize(imgp->ip_strings);
6672
6673 execargs_lock_lock();
6674 execargs_free_count++;
6675
6676 for (i = 0; i < execargs_cache_size; i++) {
6677 vm_offset_t element = execargs_cache[i];
6678 if (element == 0) {
6679 execargs_cache[i] = (vm_offset_t) imgp->ip_strings;
6680 imgp->ip_strings = NULL;
6681 break;
6682 }
6683 }
6684
6685 assert(imgp->ip_strings == NULL);
6686
6687 if (execargs_waiters > 0) {
6688 needs_wakeup = TRUE;
6689 }
6690
6691 execargs_lock_unlock();
6692
6693 if (needs_wakeup == TRUE) {
6694 execargs_wakeup_waiters();
6695 }
6696
6697 return kret == KERN_SUCCESS ? 0 : EINVAL;
6698 }
6699
6700 static void
6701 exec_resettextvp(proc_t p, struct image_params *imgp)
6702 {
6703 vnode_t vp;
6704 off_t offset;
6705 vnode_t tvp = p->p_textvp;
6706 int ret;
6707
6708 vp = imgp->ip_vp;
6709 offset = imgp->ip_arch_offset;
6710
6711 if (vp == NULLVP) {
6712 panic("exec_resettextvp: expected valid vp");
6713 }
6714
6715 ret = vnode_ref(vp);
6716 proc_lock(p);
6717 if (ret == 0) {
6718 p->p_textvp = vp;
6719 p->p_textoff = offset;
6720 } else {
6721 p->p_textvp = NULLVP; /* this is paranoia */
6722 p->p_textoff = 0;
6723 }
6724 proc_unlock(p);
6725
6726 if (tvp != NULLVP) {
6727 if (vnode_getwithref(tvp) == 0) {
6728 vnode_rele(tvp);
6729 vnode_put(tvp);
6730 }
6731 }
6732 }
6733
6734 // Includes the 0-byte (therefore "SIZE" instead of "LEN").
6735 static const size_t CS_CDHASH_STRING_SIZE = CS_CDHASH_LEN * 2 + 1;
6736
6737 static void
6738 cdhash_to_string(char str[CS_CDHASH_STRING_SIZE], uint8_t const * const cdhash)
6739 {
6740 static char const nibble[] = "0123456789abcdef";
6741
6742 /* Apparently still the safest way to get a hex representation
6743 * of binary data.
6744 * xnu's printf routines have %*D/%20D in theory, but "not really", see:
6745 * <rdar://problem/33328859> confusion around %*D/%nD in printf
6746 */
6747 for (int i = 0; i < CS_CDHASH_LEN; ++i) {
6748 str[i * 2] = nibble[(cdhash[i] & 0xf0) >> 4];
6749 str[i * 2 + 1] = nibble[cdhash[i] & 0x0f];
6750 }
6751 str[CS_CDHASH_STRING_SIZE - 1] = 0;
6752 }
6753
6754 /*
6755 * __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__
6756 *
6757 * Description: Waits for the userspace daemon to respond to the request
6758 * we made. Function declared non inline to be visible in
6759 * stackshots and spindumps as well as debugging.
6760 */
6761 __attribute__((noinline)) int
6762 __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid)
6763 {
6764 return find_code_signature(task_access_port, new_pid);
6765 }
6766
6767 static int
6768 check_for_signature(proc_t p, struct image_params *imgp)
6769 {
6770 mach_port_t port = IPC_PORT_NULL;
6771 kern_return_t kr = KERN_FAILURE;
6772 int error = EACCES;
6773 boolean_t unexpected_failure = FALSE;
6774 struct cs_blob *csb;
6775 boolean_t require_success = FALSE;
6776 int spawn = (imgp->ip_flags & IMGPF_SPAWN);
6777 int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
6778 os_reason_t signature_failure_reason = OS_REASON_NULL;
6779
6780 /*
6781 * Override inherited code signing flags with the
6782 * ones for the process that is being successfully
6783 * loaded
6784 */
6785 proc_lock(p);
6786 p->p_csflags = imgp->ip_csflags;
6787 proc_unlock(p);
6788
6789 /* Set the switch_protect flag on the map */
6790 if (p->p_csflags & (CS_HARD | CS_KILL)) {
6791 vm_map_switch_protect(get_task_map(p->task), TRUE);
6792 }
6793 /* set the cs_enforced flags in the map */
6794 if (p->p_csflags & CS_ENFORCEMENT) {
6795 vm_map_cs_enforcement_set(get_task_map(p->task), TRUE);
6796 } else {
6797 vm_map_cs_enforcement_set(get_task_map(p->task), FALSE);
6798 }
6799
6800 /*
6801 * image activation may be failed due to policy
6802 * which is unexpected but security framework does not
6803 * approve of exec, kill and return immediately.
6804 */
6805 if (imgp->ip_mac_return != 0) {
6806 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
6807 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_SECURITY_POLICY, 0, 0);
6808 signature_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SECURITY_POLICY);
6809 error = imgp->ip_mac_return;
6810 unexpected_failure = TRUE;
6811 goto done;
6812 }
6813
6814 if (imgp->ip_cs_error != OS_REASON_NULL) {
6815 signature_failure_reason = imgp->ip_cs_error;
6816 imgp->ip_cs_error = OS_REASON_NULL;
6817 error = EACCES;
6818 goto done;
6819 }
6820
6821 #if XNU_TARGET_OS_OSX
6822 /* Check for platform passed in spawn attr if iOS binary is being spawned */
6823 if (proc_platform(p) == PLATFORM_IOS) {
6824 struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
6825 if (psa == NULL || psa->psa_platform == 0) {
6826 boolean_t no_sandbox_entitled = FALSE;
6827 #if DEBUG || DEVELOPMENT
6828 /*
6829 * Allow iOS binaries to spawn on internal systems
6830 * if no-sandbox entitlement is present of unentitled_ios_sim_launch
6831 * boot-arg set to true
6832 */
6833 if (unentitled_ios_sim_launch) {
6834 no_sandbox_entitled = TRUE;
6835 } else {
6836 no_sandbox_entitled = IOVnodeHasEntitlement(imgp->ip_vp,
6837 (int64_t)imgp->ip_arch_offset, "com.apple.private.security.no-sandbox");
6838 }
6839 #endif /* DEBUG || DEVELOPMENT */
6840 if (!no_sandbox_entitled) {
6841 signature_failure_reason = os_reason_create(OS_REASON_EXEC,
6842 EXEC_EXIT_REASON_WRONG_PLATFORM);
6843 error = EACCES;
6844 goto done;
6845 }
6846 printf("Allowing spawn of iOS binary %s since it has "
6847 "com.apple.private.security.no-sandbox entitlement or unentitled_ios_sim_launch "
6848 "boot-arg set to true\n", p->p_name);
6849 } else if (psa->psa_platform != PLATFORM_IOS) {
6850 /* Simulator binary spawned with wrong platform */
6851 signature_failure_reason = os_reason_create(OS_REASON_EXEC,
6852 EXEC_EXIT_REASON_WRONG_PLATFORM);
6853 error = EACCES;
6854 goto done;
6855 } else {
6856 printf("Allowing spawn of iOS binary %s since correct platform was passed in spawn\n",
6857 p->p_name);
6858 }
6859 }
6860 #endif /* XNU_TARGET_OS_OSX */
6861
6862 /* If the code signature came through the image activation path, we skip the
6863 * taskgated / externally attached path. */
6864 if (imgp->ip_csflags & CS_SIGNED) {
6865 error = 0;
6866 goto done;
6867 }
6868
6869 /* The rest of the code is for signatures that either already have been externally
6870 * attached (likely, but not necessarily by a previous run through the taskgated
6871 * path), or that will now be attached by taskgated. */
6872
6873 kr = task_get_task_access_port(p->task, &port);
6874 if (KERN_SUCCESS != kr || !IPC_PORT_VALID(port)) {
6875 error = 0;
6876 if (require_success) {
6877 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
6878 p->p_pid, OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASK_ACCESS_PORT, 0, 0);
6879 signature_failure_reason = os_reason_create(OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASK_ACCESS_PORT);
6880 error = EACCES;
6881 }
6882 goto done;
6883 }
6884
6885 /*
6886 * taskgated returns KERN_SUCCESS if it has completed its work
6887 * and the exec should continue, KERN_FAILURE if the exec should
6888 * fail, or it may error out with different error code in an
6889 * event of mig failure (e.g. process was signalled during the
6890 * rpc call, taskgated died, mig server died etc.).
6891 */
6892
6893 kr = __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(port, p->p_pid);
6894 switch (kr) {
6895 case KERN_SUCCESS:
6896 error = 0;
6897 break;
6898 case KERN_FAILURE:
6899 error = EACCES;
6900
6901 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
6902 p->p_pid, OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASKGATED_INVALID_SIG, 0, 0);
6903 signature_failure_reason = os_reason_create(OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASKGATED_INVALID_SIG);
6904 goto done;
6905 default:
6906 error = EACCES;
6907
6908 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
6909 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_TASKGATED_OTHER, 0, 0);
6910 signature_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_TASKGATED_OTHER);
6911 unexpected_failure = TRUE;
6912 goto done;
6913 }
6914
6915 /* Only do this if exec_resettextvp() did not fail */
6916 if (p->p_textvp != NULLVP) {
6917 csb = ubc_cs_blob_get(p->p_textvp, -1, -1, p->p_textoff);
6918
6919 if (csb != NULL) {
6920 /* As the enforcement we can do here is very limited, we only allow things that
6921 * are the only reason why this code path still exists:
6922 * Adhoc signed non-platform binaries without special cs_flags and without any
6923 * entitlements (unrestricted ones still pass AMFI). */
6924 if (
6925 /* Revalidate the blob if necessary through bumped generation count. */
6926 (ubc_cs_generation_check(p->p_textvp) == 0 ||
6927 ubc_cs_blob_revalidate(p->p_textvp, csb, imgp, 0, proc_platform(p)) == 0) &&
6928 /* Only CS_ADHOC, no CS_KILL, CS_HARD etc. */
6929 (csb->csb_flags & CS_ALLOWED_MACHO) == CS_ADHOC &&
6930 /* If it has a CMS blob, it's not adhoc. The CS_ADHOC flag can lie. */
6931 csblob_find_blob_bytes((const uint8_t *)csb->csb_mem_kaddr, csb->csb_mem_size,
6932 CSSLOT_SIGNATURESLOT,
6933 CSMAGIC_BLOBWRAPPER) == NULL &&
6934 /* It could still be in a trust cache (unlikely with CS_ADHOC), or a magic path. */
6935 csb->csb_platform_binary == 0 &&
6936 /* No entitlements, not even unrestricted ones. */
6937 csb->csb_entitlements_blob == NULL) {
6938 proc_lock(p);
6939 p->p_csflags |= CS_SIGNED | CS_VALID;
6940 proc_unlock(p);
6941 } else {
6942 uint8_t cdhash[CS_CDHASH_LEN];
6943 char cdhash_string[CS_CDHASH_STRING_SIZE];
6944 proc_getcdhash(p, cdhash);
6945 cdhash_to_string(cdhash_string, cdhash);
6946 printf("ignoring detached code signature on '%s' with cdhash '%s' "
6947 "because it is invalid, or not a simple adhoc signature.\n",
6948 p->p_name, cdhash_string);
6949 }
6950 }
6951 }
6952
6953 done:
6954 if (0 == error) {
6955 /* The process's code signature related properties are
6956 * fully set up, so this is an opportune moment to log
6957 * platform binary execution, if desired. */
6958 if (platform_exec_logging != 0 && csproc_get_platform_binary(p)) {
6959 uint8_t cdhash[CS_CDHASH_LEN];
6960 char cdhash_string[CS_CDHASH_STRING_SIZE];
6961 proc_getcdhash(p, cdhash);
6962 cdhash_to_string(cdhash_string, cdhash);
6963
6964 os_log(peLog, "CS Platform Exec Logging: Executing platform signed binary "
6965 "'%s' with cdhash %s\n", p->p_name, cdhash_string);
6966 }
6967 } else {
6968 if (!unexpected_failure) {
6969 p->p_csflags |= CS_KILLED;
6970 }
6971 /* make very sure execution fails */
6972 if (vfexec || spawn) {
6973 assert(signature_failure_reason != OS_REASON_NULL);
6974 psignal_vfork_with_reason(p, p->task, imgp->ip_new_thread,
6975 SIGKILL, signature_failure_reason);
6976 signature_failure_reason = OS_REASON_NULL;
6977 error = 0;
6978 } else {
6979 assert(signature_failure_reason != OS_REASON_NULL);
6980 psignal_with_reason(p, SIGKILL, signature_failure_reason);
6981 signature_failure_reason = OS_REASON_NULL;
6982 }
6983 }
6984
6985 if (port != IPC_PORT_NULL) {
6986 ipc_port_release_send(port);
6987 }
6988
6989 /* If we hit this, we likely would have leaked an exit reason */
6990 assert(signature_failure_reason == OS_REASON_NULL);
6991 return error;
6992 }
6993
6994 /*
6995 * Typically as soon as we start executing this process, the
6996 * first instruction will trigger a VM fault to bring the text
6997 * pages (as executable) into the address space, followed soon
6998 * thereafter by dyld data structures (for dynamic executable).
6999 * To optimize this, as well as improve support for hardware
7000 * debuggers that can only access resident pages present
7001 * in the process' page tables, we prefault some pages if
7002 * possible. Errors are non-fatal.
7003 */
7004 #ifndef PREVENT_CALLER_STACK_USE
7005 #define PREVENT_CALLER_STACK_USE __attribute__((noinline))
7006 #endif
7007 static void PREVENT_CALLER_STACK_USE
7008 exec_prefault_data(proc_t p __unused, struct image_params *imgp, load_result_t *load_result)
7009 {
7010 int ret;
7011 size_t expected_all_image_infos_size;
7012 kern_return_t kr;
7013
7014 /*
7015 * Prefault executable or dyld entry point.
7016 */
7017 if (vm_map_page_shift(current_map()) < (int)PAGE_SHIFT) {
7018 DEBUG4K_LOAD("entry_point 0x%llx\n", (uint64_t)load_result->entry_point);
7019 }
7020 kr = vm_fault(current_map(),
7021 vm_map_trunc_page(load_result->entry_point,
7022 vm_map_page_mask(current_map())),
7023 VM_PROT_READ | VM_PROT_EXECUTE,
7024 FALSE, VM_KERN_MEMORY_NONE,
7025 THREAD_UNINT, NULL, 0);
7026 if (kr != KERN_SUCCESS) {
7027 DEBUG4K_ERROR("map %p va 0x%llx -> 0x%x\n", current_map(), (uint64_t)vm_map_trunc_page(load_result->entry_point, vm_map_page_mask(current_map())), kr);
7028 }
7029
7030 if (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) {
7031 expected_all_image_infos_size = sizeof(struct user64_dyld_all_image_infos);
7032 } else {
7033 expected_all_image_infos_size = sizeof(struct user32_dyld_all_image_infos);
7034 }
7035
7036 /* Decode dyld anchor structure from <mach-o/dyld_images.h> */
7037 if (load_result->dynlinker &&
7038 load_result->all_image_info_addr &&
7039 load_result->all_image_info_size >= expected_all_image_infos_size) {
7040 union {
7041 struct user64_dyld_all_image_infos infos64;
7042 struct user32_dyld_all_image_infos infos32;
7043 } all_image_infos;
7044
7045 /*
7046 * Pre-fault to avoid copyin() going through the trap handler
7047 * and recovery path.
7048 */
7049 if (vm_map_page_shift(current_map()) < (int)PAGE_SHIFT) {
7050 DEBUG4K_LOAD("all_image_info_addr 0x%llx\n", load_result->all_image_info_addr);
7051 }
7052 kr = vm_fault(current_map(),
7053 vm_map_trunc_page(load_result->all_image_info_addr,
7054 vm_map_page_mask(current_map())),
7055 VM_PROT_READ | VM_PROT_WRITE,
7056 FALSE, VM_KERN_MEMORY_NONE,
7057 THREAD_UNINT, NULL, 0);
7058 if (kr != KERN_SUCCESS) {
7059 // printf("%s:%d map %p va 0x%llx -> 0x%x\n", __FUNCTION__, __LINE__, current_map(), vm_map_trunc_page(load_result->all_image_info_addr, vm_map_page_mask(current_map())), kr);
7060 }
7061 if ((load_result->all_image_info_addr & PAGE_MASK) + expected_all_image_infos_size > PAGE_SIZE) {
7062 /* all_image_infos straddles a page */
7063 kr = vm_fault(current_map(),
7064 vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size - 1,
7065 vm_map_page_mask(current_map())),
7066 VM_PROT_READ | VM_PROT_WRITE,
7067 FALSE, VM_KERN_MEMORY_NONE,
7068 THREAD_UNINT, NULL, 0);
7069 if (kr != KERN_SUCCESS) {
7070 // printf("%s:%d map %p va 0x%llx -> 0x%x\n", __FUNCTION__, __LINE__, current_map(), vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size -1, vm_map_page_mask(current_map())), kr);
7071 }
7072 }
7073
7074 if (vm_map_page_shift(current_map()) < (int)PAGE_SHIFT) {
7075 DEBUG4K_LOAD("copyin(0x%llx, 0x%lx)\n", load_result->all_image_info_addr, expected_all_image_infos_size);
7076 }
7077 ret = copyin((user_addr_t)load_result->all_image_info_addr,
7078 &all_image_infos,
7079 expected_all_image_infos_size);
7080 if (ret == 0 && all_image_infos.infos32.version >= DYLD_ALL_IMAGE_INFOS_ADDRESS_MINIMUM_VERSION) {
7081 user_addr_t notification_address;
7082 user_addr_t dyld_image_address;
7083 user_addr_t dyld_version_address;
7084 user_addr_t dyld_all_image_infos_address;
7085 user_addr_t dyld_slide_amount;
7086
7087 if (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) {
7088 notification_address = (user_addr_t)all_image_infos.infos64.notification;
7089 dyld_image_address = (user_addr_t)all_image_infos.infos64.dyldImageLoadAddress;
7090 dyld_version_address = (user_addr_t)all_image_infos.infos64.dyldVersion;
7091 dyld_all_image_infos_address = (user_addr_t)all_image_infos.infos64.dyldAllImageInfosAddress;
7092 } else {
7093 notification_address = all_image_infos.infos32.notification;
7094 dyld_image_address = all_image_infos.infos32.dyldImageLoadAddress;
7095 dyld_version_address = all_image_infos.infos32.dyldVersion;
7096 dyld_all_image_infos_address = all_image_infos.infos32.dyldAllImageInfosAddress;
7097 }
7098
7099 /*
7100 * dyld statically sets up the all_image_infos in its Mach-O
7101 * binary at static link time, with pointers relative to its default
7102 * load address. Since ASLR might slide dyld before its first
7103 * instruction is executed, "dyld_slide_amount" tells us how far
7104 * dyld was loaded compared to its default expected load address.
7105 * All other pointers into dyld's image should be adjusted by this
7106 * amount. At some point later, dyld will fix up pointers to take
7107 * into account the slide, at which point the all_image_infos_address
7108 * field in the structure will match the runtime load address, and
7109 * "dyld_slide_amount" will be 0, if we were to consult it again.
7110 */
7111
7112 dyld_slide_amount = (user_addr_t)load_result->all_image_info_addr - dyld_all_image_infos_address;
7113
7114 #if 0
7115 kprintf("exec_prefault: 0x%016llx 0x%08x 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
7116 (uint64_t)load_result->all_image_info_addr,
7117 all_image_infos.infos32.version,
7118 (uint64_t)notification_address,
7119 (uint64_t)dyld_image_address,
7120 (uint64_t)dyld_version_address,
7121 (uint64_t)dyld_all_image_infos_address);
7122 #endif
7123
7124 if (vm_map_page_shift(current_map()) < (int)PAGE_SHIFT) {
7125 DEBUG4K_LOAD("notification_address 0x%llx dyld_slide_amount 0x%llx\n", (uint64_t)notification_address, (uint64_t)dyld_slide_amount);
7126 }
7127 kr = vm_fault(current_map(),
7128 vm_map_trunc_page(notification_address + dyld_slide_amount,
7129 vm_map_page_mask(current_map())),
7130 VM_PROT_READ | VM_PROT_EXECUTE,
7131 FALSE, VM_KERN_MEMORY_NONE,
7132 THREAD_UNINT, NULL, 0);
7133 if (kr != KERN_SUCCESS) {
7134 // printf("%s:%d map %p va 0x%llx -> 0x%x\n", __FUNCTION__, __LINE__, current_map(), vm_map_trunc_page(notification_address + dyld_slide_amount, vm_map_page_mask(current_map())), kr);
7135 }
7136 if (vm_map_page_shift(current_map()) < (int)PAGE_SHIFT) {
7137 DEBUG4K_LOAD("dyld_image_address 0x%llx dyld_slide_amount 0x%llx\n", (uint64_t)dyld_image_address, (uint64_t)dyld_slide_amount);
7138 }
7139 kr = vm_fault(current_map(),
7140 vm_map_trunc_page(dyld_image_address + dyld_slide_amount,
7141 vm_map_page_mask(current_map())),
7142 VM_PROT_READ | VM_PROT_EXECUTE,
7143 FALSE, VM_KERN_MEMORY_NONE,
7144 THREAD_UNINT, NULL, 0);
7145 if (kr != KERN_SUCCESS) {
7146 // printf("%s:%d map %p va 0x%llx -> 0x%x\n", __FUNCTION__, __LINE__, current_map(), vm_map_trunc_page(dyld_image_address + dyld_slide_amount, vm_map_page_mask(current_map())), kr);
7147 }
7148 if (vm_map_page_shift(current_map()) < (int)PAGE_SHIFT) {
7149 DEBUG4K_LOAD("dyld_version_address 0x%llx dyld_slide_amount 0x%llx\n", (uint64_t)dyld_version_address, (uint64_t)dyld_slide_amount);
7150 }
7151 kr = vm_fault(current_map(),
7152 vm_map_trunc_page(dyld_version_address + dyld_slide_amount,
7153 vm_map_page_mask(current_map())),
7154 VM_PROT_READ,
7155 FALSE, VM_KERN_MEMORY_NONE,
7156 THREAD_UNINT, NULL, 0);
7157 if (kr != KERN_SUCCESS) {
7158 // printf("%s:%d map %p va 0x%llx -> 0x%x\n", __FUNCTION__, __LINE__, current_map(), vm_map_trunc_page(dyld_version_address + dyld_slide_amount, vm_map_page_mask(current_map())), kr);
7159 }
7160 if (vm_map_page_shift(current_map()) < (int)PAGE_SHIFT) {
7161 DEBUG4K_LOAD("dyld_all_image_infos_address 0x%llx dyld_slide_amount 0x%llx\n", (uint64_t)dyld_version_address, (uint64_t)dyld_slide_amount);
7162 }
7163 kr = vm_fault(current_map(),
7164 vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount,
7165 vm_map_page_mask(current_map())),
7166 VM_PROT_READ | VM_PROT_WRITE,
7167 FALSE, VM_KERN_MEMORY_NONE,
7168 THREAD_UNINT, NULL, 0);
7169 if (kr != KERN_SUCCESS) {
7170 // printf("%s:%d map %p va 0x%llx -> 0x%x\n", __FUNCTION__, __LINE__, current_map(), vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount, vm_map_page_mask(current_map())), kr);
7171 }
7172 }
7173 }
7174 }