]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_exec.c
xnu-4570.71.2.tar.gz
[apple/xnu.git] / bsd / kern / kern_exec.c
1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Mach Operating System
31 * Copyright (c) 1987 Carnegie-Mellon University
32 * All rights reserved. The CMU software License Agreement specifies
33 * the terms and conditions for use and redistribution.
34 */
35
36 /*-
37 * Copyright (c) 1982, 1986, 1991, 1993
38 * The Regents of the University of California. All rights reserved.
39 * (c) UNIX System Laboratories, Inc.
40 * All or some portions of this file are derived from material licensed
41 * to the University of California by American Telephone and Telegraph
42 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
43 * the permission of UNIX System Laboratories, Inc.
44 *
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions
47 * are met:
48 * 1. Redistributions of source code must retain the above copyright
49 * notice, this list of conditions and the following disclaimer.
50 * 2. Redistributions in binary form must reproduce the above copyright
51 * notice, this list of conditions and the following disclaimer in the
52 * documentation and/or other materials provided with the distribution.
53 * 3. All advertising materials mentioning features or use of this software
54 * must display the following acknowledgement:
55 * This product includes software developed by the University of
56 * California, Berkeley and its contributors.
57 * 4. Neither the name of the University nor the names of its contributors
58 * may be used to endorse or promote products derived from this software
59 * without specific prior written permission.
60 *
61 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
62 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
64 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
65 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
66 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
67 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
68 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
69 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
70 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
71 * SUCH DAMAGE.
72 *
73 * from: @(#)kern_exec.c 8.1 (Berkeley) 6/10/93
74 */
75 /*
76 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
77 * support for mandatory and extensible security protections. This notice
78 * is included in support of clause 2.2 (b) of the Apple Public License,
79 * Version 2.0.
80 */
81 #include <machine/reg.h>
82 #include <machine/cpu_capabilities.h>
83
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/filedesc.h>
87 #include <sys/kernel.h>
88 #include <sys/proc_internal.h>
89 #include <sys/kauth.h>
90 #include <sys/user.h>
91 #include <sys/socketvar.h>
92 #include <sys/malloc.h>
93 #include <sys/namei.h>
94 #include <sys/mount_internal.h>
95 #include <sys/vnode_internal.h>
96 #include <sys/file_internal.h>
97 #include <sys/stat.h>
98 #include <sys/uio_internal.h>
99 #include <sys/acct.h>
100 #include <sys/exec.h>
101 #include <sys/kdebug.h>
102 #include <sys/signal.h>
103 #include <sys/aio_kern.h>
104 #include <sys/sysproto.h>
105 #include <sys/persona.h>
106 #include <sys/reason.h>
107 #if SYSV_SHM
108 #include <sys/shm_internal.h> /* shmexec() */
109 #endif
110 #include <sys/ubc_internal.h> /* ubc_map() */
111 #include <sys/spawn.h>
112 #include <sys/spawn_internal.h>
113 #include <sys/process_policy.h>
114 #include <sys/codesign.h>
115 #include <sys/random.h>
116 #include <crypto/sha1.h>
117
118 #include <libkern/libkern.h>
119
120 #include <security/audit/audit.h>
121
122 #include <ipc/ipc_types.h>
123
124 #include <mach/mach_types.h>
125 #include <mach/port.h>
126 #include <mach/task.h>
127 #include <mach/task_access.h>
128 #include <mach/thread_act.h>
129 #include <mach/vm_map.h>
130 #include <mach/mach_vm.h>
131 #include <mach/vm_param.h>
132
133 #include <kern/sched_prim.h> /* thread_wakeup() */
134 #include <kern/affinity.h>
135 #include <kern/assert.h>
136 #include <kern/task.h>
137 #include <kern/coalition.h>
138 #include <kern/policy_internal.h>
139 #include <kern/kalloc.h>
140
141 #include <os/log.h>
142
143 #if CONFIG_MACF
144 #include <security/mac_framework.h>
145 #include <security/mac_mach_internal.h>
146 #endif
147
148 #include <vm/vm_map.h>
149 #include <vm/vm_kern.h>
150 #include <vm/vm_protos.h>
151 #include <vm/vm_kern.h>
152 #include <vm/vm_fault.h>
153 #include <vm/vm_pageout.h>
154
155 #include <kdp/kdp_dyld.h>
156
157 #include <machine/pal_routines.h>
158
159 #include <pexpert/pexpert.h>
160
161 #if CONFIG_MEMORYSTATUS
162 #include <sys/kern_memorystatus.h>
163 #endif
164
165 #if CONFIG_DTRACE
166 /* Do not include dtrace.h, it redefines kmem_[alloc/free] */
167 extern void dtrace_proc_exec(proc_t);
168 extern void (*dtrace_proc_waitfor_exec_ptr)(proc_t);
169
170 /*
171 * Since dtrace_proc_waitfor_exec_ptr can be added/removed in dtrace_subr.c,
172 * we will store its value before actually calling it.
173 */
174 static void (*dtrace_proc_waitfor_hook)(proc_t) = NULL;
175
176 #include <sys/dtrace_ptss.h>
177 #endif
178
179 /* support for child creation in exec after vfork */
180 thread_t fork_create_child(task_t parent_task, coalition_t *parent_coalition, proc_t child_proc, int inherit_memory, int is64bit, int in_exec);
181 void vfork_exit(proc_t p, int rv);
182 extern void proc_apply_task_networkbg_internal(proc_t, thread_t);
183 extern void task_set_did_exec_flag(task_t task);
184 extern void task_clear_exec_copy_flag(task_t task);
185 proc_t proc_exec_switch_task(proc_t p, task_t old_task, task_t new_task, thread_t new_thread);
186 boolean_t task_is_active(task_t);
187 boolean_t thread_is_active(thread_t thread);
188 void thread_copy_resource_info(thread_t dst_thread, thread_t src_thread);
189 void *ipc_importance_exec_switch_task(task_t old_task, task_t new_task);
190 extern void ipc_importance_release(void *elem);
191
192 /*
193 * Mach things for which prototypes are unavailable from Mach headers
194 */
195 void ipc_task_reset(
196 task_t task);
197 void ipc_thread_reset(
198 thread_t thread);
199 kern_return_t ipc_object_copyin(
200 ipc_space_t space,
201 mach_port_name_t name,
202 mach_msg_type_name_t msgt_name,
203 ipc_object_t *objectp);
204 void ipc_port_release_send(ipc_port_t);
205
206 #if DEVELOPMENT || DEBUG
207 void task_importance_update_owner_info(task_t);
208 #endif
209
210 extern struct savearea *get_user_regs(thread_t);
211
212 __attribute__((noinline)) int __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid);
213
214 #include <kern/thread.h>
215 #include <kern/task.h>
216 #include <kern/ast.h>
217 #include <kern/mach_loader.h>
218 #include <kern/mach_fat.h>
219 #include <mach-o/fat.h>
220 #include <mach-o/loader.h>
221 #include <machine/vmparam.h>
222 #include <sys/imgact.h>
223
224 #include <sys/sdt.h>
225
226
227 /*
228 * EAI_ITERLIMIT The maximum number of times to iterate an image
229 * activator in exec_activate_image() before treating
230 * it as malformed/corrupt.
231 */
232 #define EAI_ITERLIMIT 3
233
234 /*
235 * For #! interpreter parsing
236 */
237 #define IS_WHITESPACE(ch) ((ch == ' ') || (ch == '\t'))
238 #define IS_EOL(ch) ((ch == '#') || (ch == '\n'))
239
240 extern vm_map_t bsd_pageable_map;
241 extern const struct fileops vnops;
242
243 #define USER_ADDR_ALIGN(addr, val) \
244 ( ( (user_addr_t)(addr) + (val) - 1) \
245 & ~((val) - 1) )
246
247 /* Platform Code Exec Logging */
248 static int platform_exec_logging = 0;
249
250 SYSCTL_DECL(_security_mac);
251
252 SYSCTL_INT(_security_mac, OID_AUTO, platform_exec_logging, CTLFLAG_RW, &platform_exec_logging, 0,
253 "log cdhashes for all platform binary executions");
254
255 static os_log_t peLog = OS_LOG_DEFAULT;
256
257 struct image_params; /* Forward */
258 static int exec_activate_image(struct image_params *imgp);
259 static int exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp);
260 static int load_return_to_errno(load_return_t lrtn);
261 static int execargs_alloc(struct image_params *imgp);
262 static int execargs_free(struct image_params *imgp);
263 static int exec_check_permissions(struct image_params *imgp);
264 static int exec_extract_strings(struct image_params *imgp);
265 static int exec_add_apple_strings(struct image_params *imgp, const load_result_t *load_result);
266 static int exec_handle_sugid(struct image_params *imgp);
267 static int sugid_scripts = 0;
268 SYSCTL_INT (_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW | CTLFLAG_LOCKED, &sugid_scripts, 0, "");
269 static kern_return_t create_unix_stack(vm_map_t map, load_result_t* load_result, proc_t p);
270 static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size);
271 static void exec_resettextvp(proc_t, struct image_params *);
272 static int check_for_signature(proc_t, struct image_params *);
273 static void exec_prefault_data(proc_t, struct image_params *, load_result_t *);
274 static errno_t exec_handle_port_actions(struct image_params *imgp, boolean_t * portwatch_present, ipc_port_t * portwatch_ports);
275 static errno_t exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, uint64_t psa_darwin_role,
276 ipc_port_t * portwatch_ports, int portwatch_count);
277
278 /*
279 * exec_add_user_string
280 *
281 * Add the requested string to the string space area.
282 *
283 * Parameters; struct image_params * image parameter block
284 * user_addr_t string to add to strings area
285 * int segment from which string comes
286 * boolean_t TRUE if string contributes to NCARGS
287 *
288 * Returns: 0 Success
289 * !0 Failure errno from copyinstr()
290 *
291 * Implicit returns:
292 * (imgp->ip_strendp) updated location of next add, if any
293 * (imgp->ip_strspace) updated byte count of space remaining
294 * (imgp->ip_argspace) updated byte count of space in NCARGS
295 */
296 static int
297 exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolean_t is_ncargs)
298 {
299 int error = 0;
300
301 do {
302 size_t len = 0;
303 int space;
304
305 if (is_ncargs)
306 space = imgp->ip_argspace; /* by definition smaller than ip_strspace */
307 else
308 space = imgp->ip_strspace;
309
310 if (space <= 0) {
311 error = E2BIG;
312 break;
313 }
314
315 if (!UIO_SEG_IS_USER_SPACE(seg)) {
316 char *kstr = CAST_DOWN(char *,str); /* SAFE */
317 error = copystr(kstr, imgp->ip_strendp, space, &len);
318 } else {
319 error = copyinstr(str, imgp->ip_strendp, space, &len);
320 }
321
322 imgp->ip_strendp += len;
323 imgp->ip_strspace -= len;
324 if (is_ncargs)
325 imgp->ip_argspace -= len;
326
327 } while (error == ENAMETOOLONG);
328
329 return error;
330 }
331
332 /*
333 * dyld is now passed the executable path as a getenv-like variable
334 * in the same fashion as the stack_guard and malloc_entropy keys.
335 */
336 #define EXECUTABLE_KEY "executable_path="
337
338 /*
339 * exec_save_path
340 *
341 * To support new app package launching for Mac OS X, the dyld needs the
342 * first argument to execve() stored on the user stack.
343 *
344 * Save the executable path name at the bottom of the strings area and set
345 * the argument vector pointer to the location following that to indicate
346 * the start of the argument and environment tuples, setting the remaining
347 * string space count to the size of the string area minus the path length.
348 *
349 * Parameters; struct image_params * image parameter block
350 * char * path used to invoke program
351 * int segment from which path comes
352 *
353 * Returns: int 0 Success
354 * EFAULT Bad address
355 * copy[in]str:EFAULT Bad address
356 * copy[in]str:ENAMETOOLONG Filename too long
357 *
358 * Implicit returns:
359 * (imgp->ip_strings) saved path
360 * (imgp->ip_strspace) space remaining in ip_strings
361 * (imgp->ip_strendp) start of remaining copy area
362 * (imgp->ip_argspace) space remaining of NCARGS
363 * (imgp->ip_applec) Initial applev[0]
364 *
365 * Note: We have to do this before the initial namei() since in the
366 * path contains symbolic links, namei() will overwrite the
367 * original path buffer contents. If the last symbolic link
368 * resolved was a relative pathname, we would lose the original
369 * "path", which could be an absolute pathname. This might be
370 * unacceptable for dyld.
371 */
372 static int
373 exec_save_path(struct image_params *imgp, user_addr_t path, int seg, const char **excpath)
374 {
375 int error;
376 size_t len;
377 char *kpath;
378
379 // imgp->ip_strings can come out of a cache, so we need to obliterate the
380 // old path.
381 memset(imgp->ip_strings, '\0', strlen(EXECUTABLE_KEY) + MAXPATHLEN);
382
383 len = MIN(MAXPATHLEN, imgp->ip_strspace);
384
385 switch(seg) {
386 case UIO_USERSPACE32:
387 case UIO_USERSPACE64: /* Same for copyin()... */
388 error = copyinstr(path, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len);
389 break;
390 case UIO_SYSSPACE:
391 kpath = CAST_DOWN(char *,path); /* SAFE */
392 error = copystr(kpath, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len);
393 break;
394 default:
395 error = EFAULT;
396 break;
397 }
398
399 if (!error) {
400 bcopy(EXECUTABLE_KEY, imgp->ip_strings, strlen(EXECUTABLE_KEY));
401 len += strlen(EXECUTABLE_KEY);
402
403 imgp->ip_strendp += len;
404 imgp->ip_strspace -= len;
405
406 if (excpath) {
407 *excpath = imgp->ip_strings + strlen(EXECUTABLE_KEY);
408 }
409 }
410
411 return(error);
412 }
413
414 /*
415 * exec_reset_save_path
416 *
417 * If we detect a shell script, we need to reset the string area
418 * state so that the interpreter can be saved onto the stack.
419
420 * Parameters; struct image_params * image parameter block
421 *
422 * Returns: int 0 Success
423 *
424 * Implicit returns:
425 * (imgp->ip_strings) saved path
426 * (imgp->ip_strspace) space remaining in ip_strings
427 * (imgp->ip_strendp) start of remaining copy area
428 * (imgp->ip_argspace) space remaining of NCARGS
429 *
430 */
431 static int
432 exec_reset_save_path(struct image_params *imgp)
433 {
434 imgp->ip_strendp = imgp->ip_strings;
435 imgp->ip_argspace = NCARGS;
436 imgp->ip_strspace = ( NCARGS + PAGE_SIZE );
437
438 return (0);
439 }
440
441 /*
442 * exec_shell_imgact
443 *
444 * Image activator for interpreter scripts. If the image begins with
445 * the characters "#!", then it is an interpreter script. Verify the
446 * length of the script line indicating the interpreter is not in
447 * excess of the maximum allowed size. If this is the case, then
448 * break out the arguments, if any, which are separated by white
449 * space, and copy them into the argument save area as if they were
450 * provided on the command line before all other arguments. The line
451 * ends when we encounter a comment character ('#') or newline.
452 *
453 * Parameters; struct image_params * image parameter block
454 *
455 * Returns: -1 not an interpreter (keep looking)
456 * -3 Success: interpreter: relookup
457 * >0 Failure: interpreter: error number
458 *
459 * A return value other than -1 indicates subsequent image activators should
460 * not be given the opportunity to attempt to activate the image.
461 */
462 static int
463 exec_shell_imgact(struct image_params *imgp)
464 {
465 char *vdata = imgp->ip_vdata;
466 char *ihp;
467 char *line_startp, *line_endp;
468 char *interp;
469
470 /*
471 * Make sure it's a shell script. If we've already redirected
472 * from an interpreted file once, don't do it again.
473 */
474 if (vdata[0] != '#' ||
475 vdata[1] != '!' ||
476 (imgp->ip_flags & IMGPF_INTERPRET) != 0) {
477 return (-1);
478 }
479
480 if (imgp->ip_origcputype != 0) {
481 /* Fat header previously matched, don't allow shell script inside */
482 return (-1);
483 }
484
485 imgp->ip_flags |= IMGPF_INTERPRET;
486 imgp->ip_interp_sugid_fd = -1;
487 imgp->ip_interp_buffer[0] = '\0';
488
489 /* Check to see if SUGID scripts are permitted. If they aren't then
490 * clear the SUGID bits.
491 * imgp->ip_vattr is known to be valid.
492 */
493 if (sugid_scripts == 0) {
494 imgp->ip_origvattr->va_mode &= ~(VSUID | VSGID);
495 }
496
497 /* Try to find the first non-whitespace character */
498 for( ihp = &vdata[2]; ihp < &vdata[IMG_SHSIZE]; ihp++ ) {
499 if (IS_EOL(*ihp)) {
500 /* Did not find interpreter, "#!\n" */
501 return (ENOEXEC);
502 } else if (IS_WHITESPACE(*ihp)) {
503 /* Whitespace, like "#! /bin/sh\n", keep going. */
504 } else {
505 /* Found start of interpreter */
506 break;
507 }
508 }
509
510 if (ihp == &vdata[IMG_SHSIZE]) {
511 /* All whitespace, like "#! " */
512 return (ENOEXEC);
513 }
514
515 line_startp = ihp;
516
517 /* Try to find the end of the interpreter+args string */
518 for ( ; ihp < &vdata[IMG_SHSIZE]; ihp++ ) {
519 if (IS_EOL(*ihp)) {
520 /* Got it */
521 break;
522 } else {
523 /* Still part of interpreter or args */
524 }
525 }
526
527 if (ihp == &vdata[IMG_SHSIZE]) {
528 /* A long line, like "#! blah blah blah" without end */
529 return (ENOEXEC);
530 }
531
532 /* Backtrack until we find the last non-whitespace */
533 while (IS_EOL(*ihp) || IS_WHITESPACE(*ihp)) {
534 ihp--;
535 }
536
537 /* The character after the last non-whitespace is our logical end of line */
538 line_endp = ihp + 1;
539
540 /*
541 * Now we have pointers to the usable part of:
542 *
543 * "#! /usr/bin/int first second third \n"
544 * ^ line_startp ^ line_endp
545 */
546
547 /* copy the interpreter name */
548 interp = imgp->ip_interp_buffer;
549 for ( ihp = line_startp; (ihp < line_endp) && !IS_WHITESPACE(*ihp); ihp++)
550 *interp++ = *ihp;
551 *interp = '\0';
552
553 exec_reset_save_path(imgp);
554 exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer),
555 UIO_SYSSPACE, NULL);
556
557 /* Copy the entire interpreter + args for later processing into argv[] */
558 interp = imgp->ip_interp_buffer;
559 for ( ihp = line_startp; (ihp < line_endp); ihp++)
560 *interp++ = *ihp;
561 *interp = '\0';
562
563 #if !SECURE_KERNEL
564 /*
565 * If we have an SUID or SGID script, create a file descriptor
566 * from the vnode and pass /dev/fd/%d instead of the actual
567 * path name so that the script does not get opened twice
568 */
569 if (imgp->ip_origvattr->va_mode & (VSUID | VSGID)) {
570 proc_t p;
571 struct fileproc *fp;
572 int fd;
573 int error;
574
575 p = vfs_context_proc(imgp->ip_vfs_context);
576 error = falloc(p, &fp, &fd, imgp->ip_vfs_context);
577 if (error)
578 return(error);
579
580 fp->f_fglob->fg_flag = FREAD;
581 fp->f_fglob->fg_ops = &vnops;
582 fp->f_fglob->fg_data = (caddr_t)imgp->ip_vp;
583
584 proc_fdlock(p);
585 procfdtbl_releasefd(p, fd, NULL);
586 fp_drop(p, fd, fp, 1);
587 proc_fdunlock(p);
588 vnode_ref(imgp->ip_vp);
589
590 imgp->ip_interp_sugid_fd = fd;
591 }
592 #endif
593
594 return (-3);
595 }
596
597
598
599 /*
600 * exec_fat_imgact
601 *
602 * Image activator for fat 1.0 binaries. If the binary is fat, then we
603 * need to select an image from it internally, and make that the image
604 * we are going to attempt to execute. At present, this consists of
605 * reloading the first page for the image with a first page from the
606 * offset location indicated by the fat header.
607 *
608 * Parameters; struct image_params * image parameter block
609 *
610 * Returns: -1 not a fat binary (keep looking)
611 * -2 Success: encapsulated binary: reread
612 * >0 Failure: error number
613 *
614 * Important: This image activator is byte order neutral.
615 *
616 * Note: A return value other than -1 indicates subsequent image
617 * activators should not be given the opportunity to attempt
618 * to activate the image.
619 *
620 * If we find an encapsulated binary, we make no assertions
621 * about its validity; instead, we leave that up to a rescan
622 * for an activator to claim it, and, if it is claimed by one,
623 * that activator is responsible for determining validity.
624 */
625 static int
626 exec_fat_imgact(struct image_params *imgp)
627 {
628 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
629 kauth_cred_t cred = kauth_cred_proc_ref(p);
630 struct fat_header *fat_header = (struct fat_header *)imgp->ip_vdata;
631 struct _posix_spawnattr *psa = NULL;
632 struct fat_arch fat_arch;
633 int resid, error;
634 load_return_t lret;
635
636 if (imgp->ip_origcputype != 0) {
637 /* Fat header previously matched, don't allow another fat file inside */
638 error = -1; /* not claimed */
639 goto bad;
640 }
641
642 /* Make sure it's a fat binary */
643 if (OSSwapBigToHostInt32(fat_header->magic) != FAT_MAGIC) {
644 error = -1; /* not claimed */
645 goto bad;
646 }
647
648 /* imgp->ip_vdata has PAGE_SIZE, zerofilled if the file is smaller */
649 lret = fatfile_validate_fatarches((vm_offset_t)fat_header, PAGE_SIZE);
650 if (lret != LOAD_SUCCESS) {
651 error = load_return_to_errno(lret);
652 goto bad;
653 }
654
655 /* If posix_spawn binprefs exist, respect those prefs. */
656 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
657 if (psa != NULL && psa->psa_binprefs[0] != 0) {
658 uint32_t pr = 0;
659
660 /* Check each preference listed against all arches in header */
661 for (pr = 0; pr < NBINPREFS; pr++) {
662 cpu_type_t pref = psa->psa_binprefs[pr];
663 if (pref == 0) {
664 /* No suitable arch in the pref list */
665 error = EBADARCH;
666 goto bad;
667 }
668
669 if (pref == CPU_TYPE_ANY) {
670 /* Fall through to regular grading */
671 goto regular_grading;
672 }
673
674 lret = fatfile_getbestarch_for_cputype(pref,
675 (vm_offset_t)fat_header,
676 PAGE_SIZE,
677 &fat_arch);
678 if (lret == LOAD_SUCCESS) {
679 goto use_arch;
680 }
681 }
682
683 /* Requested binary preference was not honored */
684 error = EBADEXEC;
685 goto bad;
686 }
687
688 regular_grading:
689 /* Look up our preferred architecture in the fat file. */
690 lret = fatfile_getbestarch((vm_offset_t)fat_header,
691 PAGE_SIZE,
692 &fat_arch);
693 if (lret != LOAD_SUCCESS) {
694 error = load_return_to_errno(lret);
695 goto bad;
696 }
697
698 use_arch:
699 /* Read the Mach-O header out of fat_arch */
700 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata,
701 PAGE_SIZE, fat_arch.offset,
702 UIO_SYSSPACE, (IO_UNIT|IO_NODELOCKED),
703 cred, &resid, p);
704 if (error) {
705 goto bad;
706 }
707
708 if (resid) {
709 memset(imgp->ip_vdata + (PAGE_SIZE - resid), 0x0, resid);
710 }
711
712 /* Success. Indicate we have identified an encapsulated binary */
713 error = -2;
714 imgp->ip_arch_offset = (user_size_t)fat_arch.offset;
715 imgp->ip_arch_size = (user_size_t)fat_arch.size;
716 imgp->ip_origcputype = fat_arch.cputype;
717 imgp->ip_origcpusubtype = fat_arch.cpusubtype;
718
719 bad:
720 kauth_cred_unref(&cred);
721 return (error);
722 }
723
724 static int
725 activate_exec_state(task_t task, proc_t p, thread_t thread, load_result_t *result)
726 {
727 int ret;
728
729 task_set_dyld_info(task, MACH_VM_MIN_ADDRESS, 0);
730 if (result->is64bit) {
731 task_set_64bit(task, TRUE);
732 OSBitOrAtomic(P_LP64, &p->p_flag);
733 } else {
734 task_set_64bit(task, FALSE);
735 OSBitAndAtomic(~((uint32_t)P_LP64), &p->p_flag);
736 }
737
738 ret = thread_state_initialize(thread);
739 if (ret != KERN_SUCCESS) {
740 return ret;
741 }
742
743 if (result->threadstate) {
744 uint32_t *ts = result->threadstate;
745 uint32_t total_size = result->threadstate_sz;
746
747 while (total_size > 0) {
748 uint32_t flavor = *ts++;
749 uint32_t size = *ts++;
750
751 ret = thread_setstatus(thread, flavor, (thread_state_t)ts, size);
752 if (ret) {
753 return ret;
754 }
755 ts += size;
756 total_size -= (size + 2) * sizeof(uint32_t);
757 }
758 }
759
760 thread_setentrypoint(thread, result->entry_point);
761
762 return KERN_SUCCESS;
763 }
764
765
766 /*
767 * Set p->p_comm and p->p_name to the name passed to exec
768 */
769 static void
770 set_proc_name(struct image_params *imgp, proc_t p)
771 {
772 int p_name_len = sizeof(p->p_name) - 1;
773
774 if (imgp->ip_ndp->ni_cnd.cn_namelen > p_name_len) {
775 imgp->ip_ndp->ni_cnd.cn_namelen = p_name_len;
776 }
777
778 bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_name,
779 (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
780 p->p_name[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
781
782 if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN) {
783 imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
784 }
785
786 bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
787 (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
788 p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
789 }
790
791 static uint64_t get_va_fsid(struct vnode_attr *vap)
792 {
793 if (VATTR_IS_SUPPORTED(vap, va_fsid64)) {
794 return *(uint64_t *)&vap->va_fsid64;
795 } else {
796 return vap->va_fsid;
797 }
798 }
799
800 /*
801 * exec_mach_imgact
802 *
803 * Image activator for mach-o 1.0 binaries.
804 *
805 * Parameters; struct image_params * image parameter block
806 *
807 * Returns: -1 not a fat binary (keep looking)
808 * -2 Success: encapsulated binary: reread
809 * >0 Failure: error number
810 * EBADARCH Mach-o binary, but with an unrecognized
811 * architecture
812 * ENOMEM No memory for child process after -
813 * can only happen after vfork()
814 *
815 * Important: This image activator is NOT byte order neutral.
816 *
817 * Note: A return value other than -1 indicates subsequent image
818 * activators should not be given the opportunity to attempt
819 * to activate the image.
820 *
821 * TODO: More gracefully handle failures after vfork
822 */
823 static int
824 exec_mach_imgact(struct image_params *imgp)
825 {
826 struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
827 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
828 int error = 0;
829 task_t task;
830 task_t new_task = NULL; /* protected by vfexec */
831 thread_t thread;
832 struct uthread *uthread;
833 vm_map_t old_map = VM_MAP_NULL;
834 vm_map_t map = VM_MAP_NULL;
835 load_return_t lret;
836 load_result_t load_result;
837 struct _posix_spawnattr *psa = NULL;
838 int spawn = (imgp->ip_flags & IMGPF_SPAWN);
839 int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
840 int exec = (imgp->ip_flags & IMGPF_EXEC);
841 os_reason_t exec_failure_reason = OS_REASON_NULL;
842
843 /*
844 * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
845 * is a reserved field on the end, so for the most part, we can
846 * treat them as if they were identical. Reverse-endian Mach-O
847 * binaries are recognized but not compatible.
848 */
849 if ((mach_header->magic == MH_CIGAM) ||
850 (mach_header->magic == MH_CIGAM_64)) {
851 error = EBADARCH;
852 goto bad;
853 }
854
855 if ((mach_header->magic != MH_MAGIC) &&
856 (mach_header->magic != MH_MAGIC_64)) {
857 error = -1;
858 goto bad;
859 }
860
861 if (mach_header->filetype != MH_EXECUTE) {
862 error = -1;
863 goto bad;
864 }
865
866 if (imgp->ip_origcputype != 0) {
867 /* Fat header previously had an idea about this thin file */
868 if (imgp->ip_origcputype != mach_header->cputype ||
869 imgp->ip_origcpusubtype != mach_header->cpusubtype) {
870 error = EBADARCH;
871 goto bad;
872 }
873 } else {
874 imgp->ip_origcputype = mach_header->cputype;
875 imgp->ip_origcpusubtype = mach_header->cpusubtype;
876 }
877
878 task = current_task();
879 thread = current_thread();
880 uthread = get_bsdthread_info(thread);
881
882 if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64)
883 imgp->ip_flags |= IMGPF_IS_64BIT;
884
885 /* If posix_spawn binprefs exist, respect those prefs. */
886 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
887 if (psa != NULL && psa->psa_binprefs[0] != 0) {
888 int pr = 0;
889 for (pr = 0; pr < NBINPREFS; pr++) {
890 cpu_type_t pref = psa->psa_binprefs[pr];
891 if (pref == 0) {
892 /* No suitable arch in the pref list */
893 error = EBADARCH;
894 goto bad;
895 }
896
897 if (pref == CPU_TYPE_ANY) {
898 /* Jump to regular grading */
899 goto grade;
900 }
901
902 if (pref == imgp->ip_origcputype) {
903 /* We have a match! */
904 goto grade;
905 }
906 }
907 error = EBADARCH;
908 goto bad;
909 }
910 grade:
911 if (!grade_binary(imgp->ip_origcputype, imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK)) {
912 error = EBADARCH;
913 goto bad;
914 }
915
916 /* Copy in arguments/environment from the old process */
917 error = exec_extract_strings(imgp);
918 if (error)
919 goto bad;
920
921 AUDIT_ARG(argv, imgp->ip_startargv, imgp->ip_argc,
922 imgp->ip_endargv - imgp->ip_startargv);
923 AUDIT_ARG(envv, imgp->ip_endargv, imgp->ip_envc,
924 imgp->ip_endenvv - imgp->ip_endargv);
925
926 /*
927 * We are being called to activate an image subsequent to a vfork()
928 * operation; in this case, we know that our task, thread, and
929 * uthread are actually those of our parent, and our proc, which we
930 * obtained indirectly from the image_params vfs_context_t, is the
931 * new child process.
932 */
933 if (vfexec) {
934 imgp->ip_new_thread = fork_create_child(task, NULL, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT), FALSE);
935 /* task and thread ref returned, will be released in __mac_execve */
936 if (imgp->ip_new_thread == NULL) {
937 error = ENOMEM;
938 goto bad;
939 }
940 }
941
942
943 /* reset local idea of thread, uthread, task */
944 thread = imgp->ip_new_thread;
945 uthread = get_bsdthread_info(thread);
946 task = new_task = get_threadtask(thread);
947
948 /*
949 * Load the Mach-O file.
950 *
951 * NOTE: An error after this point indicates we have potentially
952 * destroyed or overwritten some process state while attempting an
953 * execve() following a vfork(), which is an unrecoverable condition.
954 * We send the new process an immediate SIGKILL to avoid it executing
955 * any instructions in the mutated address space. For true spawns,
956 * this is not the case, and "too late" is still not too late to
957 * return an error code to the parent process.
958 */
959
960 /*
961 * Actually load the image file we previously decided to load.
962 */
963 lret = load_machfile(imgp, mach_header, thread, &map, &load_result);
964 if (lret != LOAD_SUCCESS) {
965 error = load_return_to_errno(lret);
966
967 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
968 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO, 0, 0);
969 if (lret == LOAD_BADMACHO_UPX) {
970 /* set anything that might be useful in the crash report */
971 set_proc_name(imgp, p);
972
973 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_UPX);
974 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
975 exec_failure_reason->osr_flags |= OS_REASON_FLAG_CONSISTENT_FAILURE;
976 } else if (lret == LOAD_BADARCH_X86) {
977 /* set anything that might be useful in the crash report */
978 set_proc_name(imgp, p);
979
980 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_NO32EXEC);
981 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
982 exec_failure_reason->osr_flags |= OS_REASON_FLAG_CONSISTENT_FAILURE;
983 } else {
984 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO);
985 }
986
987 goto badtoolate;
988 }
989
990 proc_lock(p);
991 p->p_cputype = imgp->ip_origcputype;
992 p->p_cpusubtype = imgp->ip_origcpusubtype;
993 proc_unlock(p);
994
995 vm_map_set_user_wire_limit(map, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
996
997 /*
998 * Set code-signing flags if this binary is signed, or if parent has
999 * requested them on exec.
1000 */
1001 if (load_result.csflags & CS_VALID) {
1002 imgp->ip_csflags |= load_result.csflags &
1003 (CS_VALID|CS_SIGNED|CS_DEV_CODE|
1004 CS_HARD|CS_KILL|CS_RESTRICT|CS_ENFORCEMENT|CS_REQUIRE_LV|
1005 CS_ENTITLEMENTS_VALIDATED|CS_DYLD_PLATFORM|
1006 CS_ENTITLEMENT_FLAGS|
1007 CS_EXEC_SET_HARD|CS_EXEC_SET_KILL|CS_EXEC_SET_ENFORCEMENT);
1008 } else {
1009 imgp->ip_csflags &= ~CS_VALID;
1010 }
1011
1012 if (p->p_csflags & CS_EXEC_SET_HARD)
1013 imgp->ip_csflags |= CS_HARD;
1014 if (p->p_csflags & CS_EXEC_SET_KILL)
1015 imgp->ip_csflags |= CS_KILL;
1016 if (p->p_csflags & CS_EXEC_SET_ENFORCEMENT)
1017 imgp->ip_csflags |= CS_ENFORCEMENT;
1018 if (p->p_csflags & CS_EXEC_INHERIT_SIP) {
1019 if (p->p_csflags & CS_INSTALLER)
1020 imgp->ip_csflags |= CS_INSTALLER;
1021 if (p->p_csflags & CS_DATAVAULT_CONTROLLER)
1022 imgp->ip_csflags |= CS_DATAVAULT_CONTROLLER;
1023 if (p->p_csflags & CS_NVRAM_UNRESTRICTED)
1024 imgp->ip_csflags |= CS_NVRAM_UNRESTRICTED;
1025 }
1026
1027 /*
1028 * Set up the system reserved areas in the new address space.
1029 */
1030 vm_map_exec(map, task, load_result.is64bit, (void *)p->p_fd->fd_rdir, cpu_type());
1031
1032 /*
1033 * Close file descriptors which specify close-on-exec.
1034 */
1035 fdexec(p, psa != NULL ? psa->psa_flags : 0, exec);
1036
1037 /*
1038 * deal with set[ug]id.
1039 */
1040 error = exec_handle_sugid(imgp);
1041 if (error) {
1042 vm_map_deallocate(map);
1043
1044 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1045 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_SUGID_FAILURE, 0, 0);
1046 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SUGID_FAILURE);
1047 goto badtoolate;
1048 }
1049
1050 /*
1051 * Commit to new map.
1052 *
1053 * Swap the new map for the old for target task, which consumes
1054 * our new map reference but each leaves us responsible for the
1055 * old_map reference. That lets us get off the pmap associated
1056 * with it, and then we can release it.
1057 *
1058 * The map needs to be set on the target task which is different
1059 * than current task, thus swap_task_map is used instead of
1060 * vm_map_switch.
1061 */
1062 old_map = swap_task_map(task, thread, map);
1063 vm_map_deallocate(old_map);
1064 old_map = NULL;
1065
1066 lret = activate_exec_state(task, p, thread, &load_result);
1067 if (lret != KERN_SUCCESS) {
1068
1069 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1070 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_ACTV_THREADSTATE, 0, 0);
1071 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_ACTV_THREADSTATE);
1072 goto badtoolate;
1073 }
1074
1075 /*
1076 * deal with voucher on exec-calling thread.
1077 */
1078 if (imgp->ip_new_thread == NULL)
1079 thread_set_mach_voucher(current_thread(), IPC_VOUCHER_NULL);
1080
1081 /* Make sure we won't interrupt ourself signalling a partial process */
1082 if (!vfexec && !spawn && (p->p_lflag & P_LTRACED))
1083 psignal(p, SIGTRAP);
1084
1085 if (load_result.unixproc &&
1086 create_unix_stack(get_task_map(task),
1087 &load_result,
1088 p) != KERN_SUCCESS) {
1089 error = load_return_to_errno(LOAD_NOSPACE);
1090
1091 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1092 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_STACK_ALLOC, 0, 0);
1093 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_STACK_ALLOC);
1094 goto badtoolate;
1095 }
1096
1097 error = exec_add_apple_strings(imgp, &load_result);
1098 if (error) {
1099
1100 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1101 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_APPLE_STRING_INIT, 0, 0);
1102 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_APPLE_STRING_INIT);
1103 goto badtoolate;
1104 }
1105
1106 /* Switch to target task's map to copy out strings */
1107 old_map = vm_map_switch(get_task_map(task));
1108
1109 if (load_result.unixproc) {
1110 user_addr_t ap;
1111
1112 /*
1113 * Copy the strings area out into the new process address
1114 * space.
1115 */
1116 ap = p->user_stack;
1117 error = exec_copyout_strings(imgp, &ap);
1118 if (error) {
1119 vm_map_switch(old_map);
1120
1121 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1122 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_STRINGS, 0, 0);
1123 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_STRINGS);
1124 goto badtoolate;
1125 }
1126 /* Set the stack */
1127 thread_setuserstack(thread, ap);
1128 }
1129
1130 if (load_result.dynlinker) {
1131 uint64_t ap;
1132 int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
1133
1134 /* Adjust the stack */
1135 ap = thread_adjuserstack(thread, -new_ptr_size);
1136 error = copyoutptr(load_result.mach_header, ap, new_ptr_size);
1137
1138 if (error) {
1139 vm_map_switch(old_map);
1140
1141 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1142 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_DYNLINKER, 0, 0);
1143 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_DYNLINKER);
1144 goto badtoolate;
1145 }
1146 task_set_dyld_info(task, load_result.all_image_info_addr,
1147 load_result.all_image_info_size);
1148 }
1149
1150 /* Avoid immediate VM faults back into kernel */
1151 exec_prefault_data(p, imgp, &load_result);
1152
1153 vm_map_switch(old_map);
1154
1155 /* Stop profiling */
1156 stopprofclock(p);
1157
1158 /*
1159 * Reset signal state.
1160 */
1161 execsigs(p, thread);
1162
1163 /*
1164 * need to cancel async IO requests that can be cancelled and wait for those
1165 * already active. MAY BLOCK!
1166 */
1167 _aio_exec( p );
1168
1169 #if SYSV_SHM
1170 /* FIXME: Till vmspace inherit is fixed: */
1171 if (!vfexec && p->vm_shm)
1172 shmexec(p);
1173 #endif
1174 #if SYSV_SEM
1175 /* Clean up the semaphores */
1176 semexit(p);
1177 #endif
1178
1179 /*
1180 * Remember file name for accounting.
1181 */
1182 p->p_acflag &= ~AFORK;
1183
1184 set_proc_name(imgp, p);
1185
1186 #if CONFIG_SECLUDED_MEMORY
1187 if (secluded_for_apps &&
1188 load_result.platform_binary) {
1189 if (strncmp(p->p_name,
1190 "Camera",
1191 sizeof (p->p_name)) == 0) {
1192 task_set_could_use_secluded_mem(task, TRUE);
1193 } else {
1194 task_set_could_use_secluded_mem(task, FALSE);
1195 }
1196 if (strncmp(p->p_name,
1197 "mediaserverd",
1198 sizeof (p->p_name)) == 0) {
1199 task_set_could_also_use_secluded_mem(task, TRUE);
1200 }
1201 }
1202 #endif /* CONFIG_SECLUDED_MEMORY */
1203
1204 pal_dbg_set_task_name(task);
1205
1206 /*
1207 * The load result will have already been munged by AMFI to include the
1208 * platform binary flag if boot-args dictated it (AMFI will mark anything
1209 * that doesn't go through the upcall path as a platform binary if its
1210 * enforcement is disabled).
1211 */
1212 if (load_result.platform_binary) {
1213 if (cs_debug) {
1214 printf("setting platform binary on task: pid = %d\n", p->p_pid);
1215 }
1216
1217 /*
1218 * We must use 'task' here because the proc's task has not yet been
1219 * switched to the new one.
1220 */
1221 task_set_platform_binary(task, TRUE);
1222 } else {
1223 if (cs_debug) {
1224 printf("clearing platform binary on task: pid = %d\n", p->p_pid);
1225 }
1226
1227 task_set_platform_binary(task, FALSE);
1228 }
1229
1230 #if DEVELOPMENT || DEBUG
1231 /*
1232 * Update the pid an proc name for importance base if any
1233 */
1234 task_importance_update_owner_info(task);
1235 #endif
1236
1237 memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid));
1238
1239 #if CONFIG_DTRACE
1240 dtrace_proc_exec(p);
1241 #endif
1242
1243 if (kdebug_enable) {
1244 long args[4] = {};
1245
1246 uintptr_t fsid = 0, fileid = 0;
1247 if (imgp->ip_vattr) {
1248 uint64_t fsid64 = get_va_fsid(imgp->ip_vattr);
1249 fsid = fsid64;
1250 fileid = imgp->ip_vattr->va_fileid;
1251 // check for (unexpected) overflow and trace zero in that case
1252 if (fsid != fsid64 || fileid != imgp->ip_vattr->va_fileid) {
1253 fsid = fileid = 0;
1254 }
1255 }
1256 KERNEL_DEBUG_CONSTANT_IST1(TRACE_DATA_EXEC, p->p_pid, fsid, fileid, 0,
1257 (uintptr_t)thread_tid(thread));
1258
1259 /*
1260 * Collect the pathname for tracing
1261 */
1262 kdbg_trace_string(p, &args[0], &args[1], &args[2], &args[3]);
1263 KERNEL_DEBUG_CONSTANT_IST1(TRACE_STRING_EXEC, args[0], args[1],
1264 args[2], args[3], (uintptr_t)thread_tid(thread));
1265 }
1266
1267 /*
1268 * If posix_spawned with the START_SUSPENDED flag, stop the
1269 * process before it runs.
1270 */
1271 if (imgp->ip_px_sa != NULL) {
1272 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
1273 if (psa->psa_flags & POSIX_SPAWN_START_SUSPENDED) {
1274 proc_lock(p);
1275 p->p_stat = SSTOP;
1276 proc_unlock(p);
1277 (void) task_suspend_internal(task);
1278 }
1279 }
1280
1281 /*
1282 * mark as execed, wakeup the process that vforked (if any) and tell
1283 * it that it now has its own resources back
1284 */
1285 OSBitOrAtomic(P_EXEC, &p->p_flag);
1286 proc_resetregister(p);
1287 if (p->p_pptr && (p->p_lflag & P_LPPWAIT)) {
1288 proc_lock(p);
1289 p->p_lflag &= ~P_LPPWAIT;
1290 proc_unlock(p);
1291 wakeup((caddr_t)p->p_pptr);
1292 }
1293
1294 /*
1295 * Pay for our earlier safety; deliver the delayed signals from
1296 * the incomplete vfexec process now that it's complete.
1297 */
1298 if (vfexec && (p->p_lflag & P_LTRACED)) {
1299 psignal_vfork(p, new_task, thread, SIGTRAP);
1300 }
1301
1302 goto done;
1303
1304 badtoolate:
1305 /* Don't allow child process to execute any instructions */
1306 if (!spawn) {
1307 if (vfexec) {
1308 assert(exec_failure_reason != OS_REASON_NULL);
1309 psignal_vfork_with_reason(p, new_task, thread, SIGKILL, exec_failure_reason);
1310 exec_failure_reason = OS_REASON_NULL;
1311 } else {
1312 assert(exec_failure_reason != OS_REASON_NULL);
1313 psignal_with_reason(p, SIGKILL, exec_failure_reason);
1314 exec_failure_reason = OS_REASON_NULL;
1315
1316 if (exec) {
1317 /* Terminate the exec copy task */
1318 task_terminate_internal(task);
1319 }
1320 }
1321
1322 /* We can't stop this system call at this point, so just pretend we succeeded */
1323 error = 0;
1324 } else {
1325 os_reason_free(exec_failure_reason);
1326 exec_failure_reason = OS_REASON_NULL;
1327 }
1328
1329 done:
1330 if (load_result.threadstate) {
1331 kfree(load_result.threadstate, load_result.threadstate_sz);
1332 load_result.threadstate = NULL;
1333 }
1334
1335 bad:
1336 /* If we hit this, we likely would have leaked an exit reason */
1337 assert(exec_failure_reason == OS_REASON_NULL);
1338 return(error);
1339 }
1340
1341
1342
1343
1344 /*
1345 * Our image activator table; this is the table of the image types we are
1346 * capable of loading. We list them in order of preference to ensure the
1347 * fastest image load speed.
1348 *
1349 * XXX hardcoded, for now; should use linker sets
1350 */
1351 struct execsw {
1352 int (*ex_imgact)(struct image_params *);
1353 const char *ex_name;
1354 } execsw[] = {
1355 { exec_mach_imgact, "Mach-o Binary" },
1356 { exec_fat_imgact, "Fat Binary" },
1357 { exec_shell_imgact, "Interpreter Script" },
1358 { NULL, NULL}
1359 };
1360
1361
1362 /*
1363 * exec_activate_image
1364 *
1365 * Description: Iterate through the available image activators, and activate
1366 * the image associated with the imgp structure. We start with
1367 * the activator for Mach-o binaries followed by that for Fat binaries
1368 * for Interpreter scripts.
1369 *
1370 * Parameters: struct image_params * Image parameter block
1371 *
1372 * Returns: 0 Success
1373 * EBADEXEC The executable is corrupt/unknown
1374 * execargs_alloc:EINVAL Invalid argument
1375 * execargs_alloc:EACCES Permission denied
1376 * execargs_alloc:EINTR Interrupted function
1377 * execargs_alloc:ENOMEM Not enough space
1378 * exec_save_path:EFAULT Bad address
1379 * exec_save_path:ENAMETOOLONG Filename too long
1380 * exec_check_permissions:EACCES Permission denied
1381 * exec_check_permissions:ENOEXEC Executable file format error
1382 * exec_check_permissions:ETXTBSY Text file busy [misuse of error code]
1383 * exec_check_permissions:???
1384 * namei:???
1385 * vn_rdwr:??? [anything vn_rdwr can return]
1386 * <ex_imgact>:??? [anything an imgact can return]
1387 * EDEADLK Process is being terminated
1388 */
1389 static int
1390 exec_activate_image(struct image_params *imgp)
1391 {
1392 struct nameidata *ndp = NULL;
1393 const char *excpath;
1394 int error;
1395 int resid;
1396 int once = 1; /* save SGUID-ness for interpreted files */
1397 int i;
1398 int itercount = 0;
1399 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1400
1401 error = execargs_alloc(imgp);
1402 if (error)
1403 goto bad_notrans;
1404
1405 error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg, &excpath);
1406 if (error) {
1407 goto bad_notrans;
1408 }
1409
1410 /* Use excpath, which contains the copyin-ed exec path */
1411 DTRACE_PROC1(exec, uintptr_t, excpath);
1412
1413 MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
1414 if (ndp == NULL) {
1415 error = ENOMEM;
1416 goto bad_notrans;
1417 }
1418
1419 NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
1420 UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
1421
1422 again:
1423 error = namei(ndp);
1424 if (error)
1425 goto bad_notrans;
1426 imgp->ip_ndp = ndp; /* successful namei(); call nameidone() later */
1427 imgp->ip_vp = ndp->ni_vp; /* if set, need to vnode_put() at some point */
1428
1429 /*
1430 * Before we start the transition from binary A to binary B, make
1431 * sure another thread hasn't started exiting the process. We grab
1432 * the proc lock to check p_lflag initially, and the transition
1433 * mechanism ensures that the value doesn't change after we release
1434 * the lock.
1435 */
1436 proc_lock(p);
1437 if (p->p_lflag & P_LEXIT) {
1438 error = EDEADLK;
1439 proc_unlock(p);
1440 goto bad_notrans;
1441 }
1442 error = proc_transstart(p, 1, 0);
1443 proc_unlock(p);
1444 if (error)
1445 goto bad_notrans;
1446
1447 error = exec_check_permissions(imgp);
1448 if (error)
1449 goto bad;
1450
1451 /* Copy; avoid invocation of an interpreter overwriting the original */
1452 if (once) {
1453 once = 0;
1454 *imgp->ip_origvattr = *imgp->ip_vattr;
1455 }
1456
1457 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, 0,
1458 UIO_SYSSPACE, IO_NODELOCKED,
1459 vfs_context_ucred(imgp->ip_vfs_context),
1460 &resid, vfs_context_proc(imgp->ip_vfs_context));
1461 if (error)
1462 goto bad;
1463
1464 if (resid) {
1465 memset(imgp->ip_vdata + (PAGE_SIZE - resid), 0x0, resid);
1466 }
1467
1468 encapsulated_binary:
1469 /* Limit the number of iterations we will attempt on each binary */
1470 if (++itercount > EAI_ITERLIMIT) {
1471 error = EBADEXEC;
1472 goto bad;
1473 }
1474 error = -1;
1475 for(i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) {
1476
1477 error = (*execsw[i].ex_imgact)(imgp);
1478
1479 switch (error) {
1480 /* case -1: not claimed: continue */
1481 case -2: /* Encapsulated binary, imgp->ip_XXX set for next iteration */
1482 goto encapsulated_binary;
1483
1484 case -3: /* Interpreter */
1485 #if CONFIG_MACF
1486 /*
1487 * Copy the script label for later use. Note that
1488 * the label can be different when the script is
1489 * actually read by the interpreter.
1490 */
1491 if (imgp->ip_scriptlabelp)
1492 mac_vnode_label_free(imgp->ip_scriptlabelp);
1493 imgp->ip_scriptlabelp = mac_vnode_label_alloc();
1494 if (imgp->ip_scriptlabelp == NULL) {
1495 error = ENOMEM;
1496 break;
1497 }
1498 mac_vnode_label_copy(imgp->ip_vp->v_label,
1499 imgp->ip_scriptlabelp);
1500
1501 /*
1502 * Take a ref of the script vnode for later use.
1503 */
1504 if (imgp->ip_scriptvp)
1505 vnode_put(imgp->ip_scriptvp);
1506 if (vnode_getwithref(imgp->ip_vp) == 0)
1507 imgp->ip_scriptvp = imgp->ip_vp;
1508 #endif
1509
1510 nameidone(ndp);
1511
1512 vnode_put(imgp->ip_vp);
1513 imgp->ip_vp = NULL; /* already put */
1514 imgp->ip_ndp = NULL; /* already nameidone */
1515
1516 /* Use excpath, which exec_shell_imgact reset to the interpreter */
1517 NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF,
1518 UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
1519
1520 proc_transend(p, 0);
1521 goto again;
1522
1523 default:
1524 break;
1525 }
1526 }
1527
1528 /*
1529 * Call out to allow 3rd party notification of exec.
1530 * Ignore result of kauth_authorize_fileop call.
1531 */
1532 if (error == 0 && kauth_authorize_fileop_has_listeners()) {
1533 kauth_authorize_fileop(vfs_context_ucred(imgp->ip_vfs_context),
1534 KAUTH_FILEOP_EXEC,
1535 (uintptr_t)ndp->ni_vp, 0);
1536 }
1537 bad:
1538 proc_transend(p, 0);
1539
1540 bad_notrans:
1541 if (imgp->ip_strings)
1542 execargs_free(imgp);
1543 if (imgp->ip_ndp)
1544 nameidone(imgp->ip_ndp);
1545 if (ndp)
1546 FREE(ndp, M_TEMP);
1547
1548 return (error);
1549 }
1550
1551
1552 /*
1553 * exec_handle_spawnattr_policy
1554 *
1555 * Description: Decode and apply the posix_spawn apptype, qos clamp, and watchport ports to the task.
1556 *
1557 * Parameters: proc_t p process to apply attributes to
1558 * int psa_apptype posix spawn attribute apptype
1559 *
1560 * Returns: 0 Success
1561 */
1562 static errno_t
1563 exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, uint64_t psa_darwin_role,
1564 ipc_port_t * portwatch_ports, int portwatch_count)
1565 {
1566 int apptype = TASK_APPTYPE_NONE;
1567 int qos_clamp = THREAD_QOS_UNSPECIFIED;
1568 int role = TASK_UNSPECIFIED;
1569
1570 if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) {
1571 int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK;
1572
1573 switch(proctype) {
1574 case POSIX_SPAWN_PROC_TYPE_DAEMON_INTERACTIVE:
1575 apptype = TASK_APPTYPE_DAEMON_INTERACTIVE;
1576 break;
1577 case POSIX_SPAWN_PROC_TYPE_DAEMON_STANDARD:
1578 apptype = TASK_APPTYPE_DAEMON_STANDARD;
1579 break;
1580 case POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE:
1581 apptype = TASK_APPTYPE_DAEMON_ADAPTIVE;
1582 break;
1583 case POSIX_SPAWN_PROC_TYPE_DAEMON_BACKGROUND:
1584 apptype = TASK_APPTYPE_DAEMON_BACKGROUND;
1585 break;
1586 case POSIX_SPAWN_PROC_TYPE_APP_DEFAULT:
1587 apptype = TASK_APPTYPE_APP_DEFAULT;
1588 break;
1589 #if !CONFIG_EMBEDDED
1590 case POSIX_SPAWN_PROC_TYPE_APP_TAL:
1591 apptype = TASK_APPTYPE_APP_TAL;
1592 break;
1593 #endif /* !CONFIG_EMBEDDED */
1594 default:
1595 apptype = TASK_APPTYPE_NONE;
1596 /* TODO: Should an invalid value here fail the spawn? */
1597 break;
1598 }
1599 }
1600
1601 if (psa_qos_clamp != POSIX_SPAWN_PROC_CLAMP_NONE) {
1602 switch (psa_qos_clamp) {
1603 case POSIX_SPAWN_PROC_CLAMP_UTILITY:
1604 qos_clamp = THREAD_QOS_UTILITY;
1605 break;
1606 case POSIX_SPAWN_PROC_CLAMP_BACKGROUND:
1607 qos_clamp = THREAD_QOS_BACKGROUND;
1608 break;
1609 case POSIX_SPAWN_PROC_CLAMP_MAINTENANCE:
1610 qos_clamp = THREAD_QOS_MAINTENANCE;
1611 break;
1612 default:
1613 qos_clamp = THREAD_QOS_UNSPECIFIED;
1614 /* TODO: Should an invalid value here fail the spawn? */
1615 break;
1616 }
1617 }
1618
1619 if (psa_darwin_role != PRIO_DARWIN_ROLE_DEFAULT) {
1620 proc_darwin_role_to_task_role(psa_darwin_role, &role);
1621 }
1622
1623 if (apptype != TASK_APPTYPE_NONE ||
1624 qos_clamp != THREAD_QOS_UNSPECIFIED ||
1625 role != TASK_UNSPECIFIED) {
1626 proc_set_task_spawnpolicy(p->task, apptype, qos_clamp, role,
1627 portwatch_ports, portwatch_count);
1628 }
1629
1630 return (0);
1631 }
1632
1633
1634 /*
1635 * exec_handle_port_actions
1636 *
1637 * Description: Go through the _posix_port_actions_t contents,
1638 * calling task_set_special_port, task_set_exception_ports
1639 * and/or audit_session_spawnjoin for the current task.
1640 *
1641 * Parameters: struct image_params * Image parameter block
1642 *
1643 * Returns: 0 Success
1644 * EINVAL Failure
1645 * ENOTSUP Illegal posix_spawn attr flag was set
1646 */
1647 static errno_t
1648 exec_handle_port_actions(struct image_params *imgp, boolean_t * portwatch_present,
1649 ipc_port_t * portwatch_ports)
1650 {
1651 _posix_spawn_port_actions_t pacts = imgp->ip_px_spa;
1652 #if CONFIG_AUDIT
1653 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1654 #endif
1655 _ps_port_action_t *act = NULL;
1656 task_t task = get_threadtask(imgp->ip_new_thread);
1657 ipc_port_t port = NULL;
1658 errno_t ret = 0;
1659 int i;
1660 kern_return_t kr;
1661
1662 *portwatch_present = FALSE;
1663
1664 for (i = 0; i < pacts->pspa_count; i++) {
1665 act = &pacts->pspa_actions[i];
1666
1667 if (MACH_PORT_VALID(act->new_port)) {
1668 kr = ipc_object_copyin(get_task_ipcspace(current_task()),
1669 act->new_port, MACH_MSG_TYPE_COPY_SEND,
1670 (ipc_object_t *) &port);
1671
1672 if (kr != KERN_SUCCESS) {
1673 ret = EINVAL;
1674 goto done;
1675 }
1676 } else {
1677 /* it's NULL or DEAD */
1678 port = CAST_MACH_NAME_TO_PORT(act->new_port);
1679 }
1680
1681 switch (act->port_type) {
1682 case PSPA_SPECIAL:
1683 kr = task_set_special_port(task, act->which, port);
1684
1685 if (kr != KERN_SUCCESS)
1686 ret = EINVAL;
1687 break;
1688
1689 case PSPA_EXCEPTION:
1690 kr = task_set_exception_ports(task, act->mask, port,
1691 act->behavior, act->flavor);
1692 if (kr != KERN_SUCCESS)
1693 ret = EINVAL;
1694 break;
1695 #if CONFIG_AUDIT
1696 case PSPA_AU_SESSION:
1697 ret = audit_session_spawnjoin(p, task, port);
1698 if (ret) {
1699 /* audit_session_spawnjoin() has already dropped the reference in case of error. */
1700 goto done;
1701 }
1702
1703 break;
1704 #endif
1705 case PSPA_IMP_WATCHPORTS:
1706 if (portwatch_ports != NULL && IPC_PORT_VALID(port)) {
1707 *portwatch_present = TRUE;
1708 /* hold on to this till end of spawn */
1709 portwatch_ports[i] = port;
1710 } else {
1711 ipc_port_release_send(port);
1712 }
1713
1714 break;
1715 default:
1716 ret = EINVAL;
1717 break;
1718 }
1719
1720 if (ret) {
1721 /* action failed, so release port resources */
1722 ipc_port_release_send(port);
1723 break;
1724 }
1725 }
1726
1727 done:
1728 if (0 != ret)
1729 DTRACE_PROC1(spawn__port__failure, mach_port_name_t, act->new_port);
1730 return (ret);
1731 }
1732
1733 /*
1734 * exec_handle_file_actions
1735 *
1736 * Description: Go through the _posix_file_actions_t contents applying the
1737 * open, close, and dup2 operations to the open file table for
1738 * the current process.
1739 *
1740 * Parameters: struct image_params * Image parameter block
1741 *
1742 * Returns: 0 Success
1743 * ???
1744 *
1745 * Note: Actions are applied in the order specified, with the credential
1746 * of the parent process. This is done to permit the parent
1747 * process to utilize POSIX_SPAWN_RESETIDS to drop privilege in
1748 * the child following operations the child may in fact not be
1749 * normally permitted to perform.
1750 */
1751 static int
1752 exec_handle_file_actions(struct image_params *imgp, short psa_flags)
1753 {
1754 int error = 0;
1755 int action;
1756 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1757 _posix_spawn_file_actions_t px_sfap = imgp->ip_px_sfa;
1758 int ival[2]; /* dummy retval for system calls) */
1759
1760 for (action = 0; action < px_sfap->psfa_act_count; action++) {
1761 _psfa_action_t *psfa = &px_sfap->psfa_act_acts[ action];
1762
1763 switch(psfa->psfaa_type) {
1764 case PSFA_OPEN: {
1765 /*
1766 * Open is different, in that it requires the use of
1767 * a path argument, which is normally copied in from
1768 * user space; because of this, we have to support an
1769 * open from kernel space that passes an address space
1770 * context of UIO_SYSSPACE, and casts the address
1771 * argument to a user_addr_t.
1772 */
1773 char *bufp = NULL;
1774 struct vnode_attr *vap;
1775 struct nameidata *ndp;
1776 int mode = psfa->psfaa_openargs.psfao_mode;
1777 struct dup2_args dup2a;
1778 struct close_nocancel_args ca;
1779 int origfd;
1780
1781 MALLOC(bufp, char *, sizeof(*vap) + sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
1782 if (bufp == NULL) {
1783 error = ENOMEM;
1784 break;
1785 }
1786
1787 vap = (struct vnode_attr *) bufp;
1788 ndp = (struct nameidata *) (bufp + sizeof(*vap));
1789
1790 VATTR_INIT(vap);
1791 /* Mask off all but regular access permissions */
1792 mode = ((mode &~ p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT;
1793 VATTR_SET(vap, va_mode, mode & ACCESSPERMS);
1794
1795 NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
1796 CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path),
1797 imgp->ip_vfs_context);
1798
1799 error = open1(imgp->ip_vfs_context,
1800 ndp,
1801 psfa->psfaa_openargs.psfao_oflag,
1802 vap,
1803 fileproc_alloc_init, NULL,
1804 ival);
1805
1806 FREE(bufp, M_TEMP);
1807
1808 /*
1809 * If there's an error, or we get the right fd by
1810 * accident, then drop out here. This is easier than
1811 * reworking all the open code to preallocate fd
1812 * slots, and internally taking one as an argument.
1813 */
1814 if (error || ival[0] == psfa->psfaa_filedes)
1815 break;
1816
1817 origfd = ival[0];
1818 /*
1819 * If we didn't fall out from an error, we ended up
1820 * with the wrong fd; so now we've got to try to dup2
1821 * it to the right one.
1822 */
1823 dup2a.from = origfd;
1824 dup2a.to = psfa->psfaa_filedes;
1825
1826 /*
1827 * The dup2() system call implementation sets
1828 * ival to newfd in the success case, but we
1829 * can ignore that, since if we didn't get the
1830 * fd we wanted, the error will stop us.
1831 */
1832 error = dup2(p, &dup2a, ival);
1833 if (error)
1834 break;
1835
1836 /*
1837 * Finally, close the original fd.
1838 */
1839 ca.fd = origfd;
1840
1841 error = close_nocancel(p, &ca, ival);
1842 }
1843 break;
1844
1845 case PSFA_DUP2: {
1846 struct dup2_args dup2a;
1847
1848 dup2a.from = psfa->psfaa_filedes;
1849 dup2a.to = psfa->psfaa_openargs.psfao_oflag;
1850
1851 /*
1852 * The dup2() system call implementation sets
1853 * ival to newfd in the success case, but we
1854 * can ignore that, since if we didn't get the
1855 * fd we wanted, the error will stop us.
1856 */
1857 error = dup2(p, &dup2a, ival);
1858 }
1859 break;
1860
1861 case PSFA_CLOSE: {
1862 struct close_nocancel_args ca;
1863
1864 ca.fd = psfa->psfaa_filedes;
1865
1866 error = close_nocancel(p, &ca, ival);
1867 }
1868 break;
1869
1870 case PSFA_INHERIT: {
1871 struct fcntl_nocancel_args fcntla;
1872
1873 /*
1874 * Check to see if the descriptor exists, and
1875 * ensure it's -not- marked as close-on-exec.
1876 *
1877 * Attempting to "inherit" a guarded fd will
1878 * result in a error.
1879 */
1880 fcntla.fd = psfa->psfaa_filedes;
1881 fcntla.cmd = F_GETFD;
1882 if ((error = fcntl_nocancel(p, &fcntla, ival)) != 0)
1883 break;
1884
1885 if ((ival[0] & FD_CLOEXEC) == FD_CLOEXEC) {
1886 fcntla.fd = psfa->psfaa_filedes;
1887 fcntla.cmd = F_SETFD;
1888 fcntla.arg = ival[0] & ~FD_CLOEXEC;
1889 error = fcntl_nocancel(p, &fcntla, ival);
1890 }
1891
1892 }
1893 break;
1894
1895 default:
1896 error = EINVAL;
1897 break;
1898 }
1899
1900 /* All file actions failures are considered fatal, per POSIX */
1901
1902 if (error) {
1903 if (PSFA_OPEN == psfa->psfaa_type) {
1904 DTRACE_PROC1(spawn__open__failure, uintptr_t,
1905 psfa->psfaa_openargs.psfao_path);
1906 } else {
1907 DTRACE_PROC1(spawn__fd__failure, int, psfa->psfaa_filedes);
1908 }
1909 break;
1910 }
1911 }
1912
1913 if (error != 0 || (psa_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) == 0)
1914 return (error);
1915
1916 /*
1917 * If POSIX_SPAWN_CLOEXEC_DEFAULT is set, behave (during
1918 * this spawn only) as if "close on exec" is the default
1919 * disposition of all pre-existing file descriptors. In this case,
1920 * the list of file descriptors mentioned in the file actions
1921 * are the only ones that can be inherited, so mark them now.
1922 *
1923 * The actual closing part comes later, in fdexec().
1924 */
1925 proc_fdlock(p);
1926 for (action = 0; action < px_sfap->psfa_act_count; action++) {
1927 _psfa_action_t *psfa = &px_sfap->psfa_act_acts[action];
1928 int fd = psfa->psfaa_filedes;
1929
1930 switch (psfa->psfaa_type) {
1931 case PSFA_DUP2:
1932 fd = psfa->psfaa_openargs.psfao_oflag;
1933 /*FALLTHROUGH*/
1934 case PSFA_OPEN:
1935 case PSFA_INHERIT:
1936 *fdflags(p, fd) |= UF_INHERIT;
1937 break;
1938
1939 case PSFA_CLOSE:
1940 break;
1941 }
1942 }
1943 proc_fdunlock(p);
1944
1945 return (0);
1946 }
1947
1948 #if CONFIG_MACF
1949 /*
1950 * exec_spawnattr_getmacpolicyinfo
1951 */
1952 void *
1953 exec_spawnattr_getmacpolicyinfo(const void *macextensions, const char *policyname, size_t *lenp)
1954 {
1955 const struct _posix_spawn_mac_policy_extensions *psmx = macextensions;
1956 int i;
1957
1958 if (psmx == NULL)
1959 return NULL;
1960
1961 for (i = 0; i < psmx->psmx_count; i++) {
1962 const _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
1963 if (strncmp(extension->policyname, policyname, sizeof(extension->policyname)) == 0) {
1964 if (lenp != NULL)
1965 *lenp = extension->datalen;
1966 return extension->datap;
1967 }
1968 }
1969
1970 if (lenp != NULL)
1971 *lenp = 0;
1972 return NULL;
1973 }
1974
1975 static int
1976 spawn_copyin_macpolicyinfo(const struct user__posix_spawn_args_desc *px_args, _posix_spawn_mac_policy_extensions_t *psmxp)
1977 {
1978 _posix_spawn_mac_policy_extensions_t psmx = NULL;
1979 int error = 0;
1980 int copycnt = 0;
1981 int i = 0;
1982
1983 *psmxp = NULL;
1984
1985 if (px_args->mac_extensions_size < PS_MAC_EXTENSIONS_SIZE(1) ||
1986 px_args->mac_extensions_size > PAGE_SIZE) {
1987 error = EINVAL;
1988 goto bad;
1989 }
1990
1991 MALLOC(psmx, _posix_spawn_mac_policy_extensions_t, px_args->mac_extensions_size, M_TEMP, M_WAITOK);
1992 if ((error = copyin(px_args->mac_extensions, psmx, px_args->mac_extensions_size)) != 0)
1993 goto bad;
1994
1995 size_t extsize = PS_MAC_EXTENSIONS_SIZE(psmx->psmx_count);
1996 if (extsize == 0 || extsize > px_args->mac_extensions_size) {
1997 error = EINVAL;
1998 goto bad;
1999 }
2000
2001 for (i = 0; i < psmx->psmx_count; i++) {
2002 _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
2003 if (extension->datalen == 0 || extension->datalen > PAGE_SIZE) {
2004 error = EINVAL;
2005 goto bad;
2006 }
2007 }
2008
2009 for (copycnt = 0; copycnt < psmx->psmx_count; copycnt++) {
2010 _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[copycnt];
2011 void *data = NULL;
2012
2013 MALLOC(data, void *, extension->datalen, M_TEMP, M_WAITOK);
2014 if ((error = copyin(extension->data, data, extension->datalen)) != 0) {
2015 FREE(data, M_TEMP);
2016 goto bad;
2017 }
2018 extension->datap = data;
2019 }
2020
2021 *psmxp = psmx;
2022 return 0;
2023
2024 bad:
2025 if (psmx != NULL) {
2026 for (i = 0; i < copycnt; i++)
2027 FREE(psmx->psmx_extensions[i].datap, M_TEMP);
2028 FREE(psmx, M_TEMP);
2029 }
2030 return error;
2031 }
2032
2033 static void
2034 spawn_free_macpolicyinfo(_posix_spawn_mac_policy_extensions_t psmx)
2035 {
2036 int i;
2037
2038 if (psmx == NULL)
2039 return;
2040 for (i = 0; i < psmx->psmx_count; i++)
2041 FREE(psmx->psmx_extensions[i].datap, M_TEMP);
2042 FREE(psmx, M_TEMP);
2043 }
2044 #endif /* CONFIG_MACF */
2045
2046 #if CONFIG_COALITIONS
2047 static inline void spawn_coalitions_release_all(coalition_t coal[COALITION_NUM_TYPES])
2048 {
2049 for (int c = 0; c < COALITION_NUM_TYPES; c++) {
2050 if (coal[c]) {
2051 coalition_remove_active(coal[c]);
2052 coalition_release(coal[c]);
2053 }
2054 }
2055 }
2056 #endif
2057
2058 #if CONFIG_PERSONAS
2059 static int spawn_validate_persona(struct _posix_spawn_persona_info *px_persona)
2060 {
2061 int error = 0;
2062 struct persona *persona = NULL;
2063 int verify = px_persona->pspi_flags & POSIX_SPAWN_PERSONA_FLAGS_VERIFY;
2064
2065 /*
2066 * TODO: rdar://problem/19981151
2067 * Add entitlement check!
2068 */
2069 if (!kauth_cred_issuser(kauth_cred_get()))
2070 return EPERM;
2071
2072 persona = persona_lookup(px_persona->pspi_id);
2073 if (!persona) {
2074 error = ESRCH;
2075 goto out;
2076 }
2077
2078 if (verify) {
2079 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_UID) {
2080 if (px_persona->pspi_uid != persona_get_uid(persona)) {
2081 error = EINVAL;
2082 goto out;
2083 }
2084 }
2085 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GID) {
2086 if (px_persona->pspi_gid != persona_get_gid(persona)) {
2087 error = EINVAL;
2088 goto out;
2089 }
2090 }
2091 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) {
2092 unsigned ngroups = 0;
2093 gid_t groups[NGROUPS_MAX];
2094
2095 if (persona_get_groups(persona, &ngroups, groups,
2096 px_persona->pspi_ngroups) != 0) {
2097 error = EINVAL;
2098 goto out;
2099 }
2100 if (ngroups != px_persona->pspi_ngroups) {
2101 error = EINVAL;
2102 goto out;
2103 }
2104 while (ngroups--) {
2105 if (px_persona->pspi_groups[ngroups] != groups[ngroups]) {
2106 error = EINVAL;
2107 goto out;
2108 }
2109 }
2110 if (px_persona->pspi_gmuid != persona_get_gmuid(persona)) {
2111 error = EINVAL;
2112 goto out;
2113 }
2114 }
2115 }
2116
2117 out:
2118 if (persona)
2119 persona_put(persona);
2120
2121 return error;
2122 }
2123
2124 static int spawn_persona_adopt(proc_t p, struct _posix_spawn_persona_info *px_persona)
2125 {
2126 int ret;
2127 kauth_cred_t cred;
2128 struct persona *persona = NULL;
2129 int override = !!(px_persona->pspi_flags & POSIX_SPAWN_PERSONA_FLAGS_OVERRIDE);
2130
2131 if (!override)
2132 return persona_proc_adopt_id(p, px_persona->pspi_id, NULL);
2133
2134 /*
2135 * we want to spawn into the given persona, but we want to override
2136 * the kauth with a different UID/GID combo
2137 */
2138 persona = persona_lookup(px_persona->pspi_id);
2139 if (!persona)
2140 return ESRCH;
2141
2142 cred = persona_get_cred(persona);
2143 if (!cred) {
2144 ret = EINVAL;
2145 goto out;
2146 }
2147
2148 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_UID) {
2149 cred = kauth_cred_setresuid(cred,
2150 px_persona->pspi_uid,
2151 px_persona->pspi_uid,
2152 px_persona->pspi_uid,
2153 KAUTH_UID_NONE);
2154 }
2155
2156 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GID) {
2157 cred = kauth_cred_setresgid(cred,
2158 px_persona->pspi_gid,
2159 px_persona->pspi_gid,
2160 px_persona->pspi_gid);
2161 }
2162
2163 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) {
2164 cred = kauth_cred_setgroups(cred,
2165 px_persona->pspi_groups,
2166 px_persona->pspi_ngroups,
2167 px_persona->pspi_gmuid);
2168 }
2169
2170 ret = persona_proc_adopt(p, persona, cred);
2171
2172 out:
2173 persona_put(persona);
2174 return ret;
2175 }
2176 #endif
2177
2178 /*
2179 * posix_spawn
2180 *
2181 * Parameters: uap->pid Pointer to pid return area
2182 * uap->fname File name to exec
2183 * uap->argp Argument list
2184 * uap->envp Environment list
2185 *
2186 * Returns: 0 Success
2187 * EINVAL Invalid argument
2188 * ENOTSUP Not supported
2189 * ENOEXEC Executable file format error
2190 * exec_activate_image:EINVAL Invalid argument
2191 * exec_activate_image:EACCES Permission denied
2192 * exec_activate_image:EINTR Interrupted function
2193 * exec_activate_image:ENOMEM Not enough space
2194 * exec_activate_image:EFAULT Bad address
2195 * exec_activate_image:ENAMETOOLONG Filename too long
2196 * exec_activate_image:ENOEXEC Executable file format error
2197 * exec_activate_image:ETXTBSY Text file busy [misuse of error code]
2198 * exec_activate_image:EBADEXEC The executable is corrupt/unknown
2199 * exec_activate_image:???
2200 * mac_execve_enter:???
2201 *
2202 * TODO: Expect to need __mac_posix_spawn() at some point...
2203 * Handle posix_spawnattr_t
2204 * Handle posix_spawn_file_actions_t
2205 */
2206 int
2207 posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
2208 {
2209 proc_t p = ap; /* quiet bogus GCC vfork() warning */
2210 user_addr_t pid = uap->pid;
2211 int ival[2]; /* dummy retval for setpgid() */
2212 char *bufp = NULL;
2213 struct image_params *imgp;
2214 struct vnode_attr *vap;
2215 struct vnode_attr *origvap;
2216 struct uthread *uthread = 0; /* compiler complains if not set to 0*/
2217 int error, sig;
2218 int is_64 = IS_64BIT_PROCESS(p);
2219 struct vfs_context context;
2220 struct user__posix_spawn_args_desc px_args;
2221 struct _posix_spawnattr px_sa;
2222 _posix_spawn_file_actions_t px_sfap = NULL;
2223 _posix_spawn_port_actions_t px_spap = NULL;
2224 struct __kern_sigaction vec;
2225 boolean_t spawn_no_exec = FALSE;
2226 boolean_t proc_transit_set = TRUE;
2227 boolean_t exec_done = FALSE;
2228 int portwatch_count = 0;
2229 ipc_port_t * portwatch_ports = NULL;
2230 vm_size_t px_sa_offset = offsetof(struct _posix_spawnattr, psa_ports);
2231 task_t new_task = NULL;
2232 boolean_t should_release_proc_ref = FALSE;
2233 void *inherit = NULL;
2234 #if CONFIG_PERSONAS
2235 struct _posix_spawn_persona_info *px_persona = NULL;
2236 #endif
2237
2238 /*
2239 * Allocate a big chunk for locals instead of using stack since these
2240 * structures are pretty big.
2241 */
2242 MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
2243 imgp = (struct image_params *) bufp;
2244 if (bufp == NULL) {
2245 error = ENOMEM;
2246 goto bad;
2247 }
2248 vap = (struct vnode_attr *) (bufp + sizeof(*imgp));
2249 origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap));
2250
2251 /* Initialize the common data in the image_params structure */
2252 imgp->ip_user_fname = uap->path;
2253 imgp->ip_user_argv = uap->argv;
2254 imgp->ip_user_envv = uap->envp;
2255 imgp->ip_vattr = vap;
2256 imgp->ip_origvattr = origvap;
2257 imgp->ip_vfs_context = &context;
2258 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE);
2259 imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
2260 imgp->ip_mac_return = 0;
2261 imgp->ip_px_persona = NULL;
2262 imgp->ip_cs_error = OS_REASON_NULL;
2263
2264 if (uap->adesc != USER_ADDR_NULL) {
2265 if(is_64) {
2266 error = copyin(uap->adesc, &px_args, sizeof(px_args));
2267 } else {
2268 struct user32__posix_spawn_args_desc px_args32;
2269
2270 error = copyin(uap->adesc, &px_args32, sizeof(px_args32));
2271
2272 /*
2273 * Convert arguments descriptor from external 32 bit
2274 * representation to internal 64 bit representation
2275 */
2276 px_args.attr_size = px_args32.attr_size;
2277 px_args.attrp = CAST_USER_ADDR_T(px_args32.attrp);
2278 px_args.file_actions_size = px_args32.file_actions_size;
2279 px_args.file_actions = CAST_USER_ADDR_T(px_args32.file_actions);
2280 px_args.port_actions_size = px_args32.port_actions_size;
2281 px_args.port_actions = CAST_USER_ADDR_T(px_args32.port_actions);
2282 px_args.mac_extensions_size = px_args32.mac_extensions_size;
2283 px_args.mac_extensions = CAST_USER_ADDR_T(px_args32.mac_extensions);
2284 px_args.coal_info_size = px_args32.coal_info_size;
2285 px_args.coal_info = CAST_USER_ADDR_T(px_args32.coal_info);
2286 px_args.persona_info_size = px_args32.persona_info_size;
2287 px_args.persona_info = CAST_USER_ADDR_T(px_args32.persona_info);
2288 }
2289 if (error)
2290 goto bad;
2291
2292 if (px_args.attr_size != 0) {
2293 /*
2294 * We are not copying the port_actions pointer,
2295 * because we already have it from px_args.
2296 * This is a bit fragile: <rdar://problem/16427422>
2297 */
2298
2299 if ((error = copyin(px_args.attrp, &px_sa, px_sa_offset) != 0))
2300 goto bad;
2301
2302 bzero( (void *)( (unsigned long) &px_sa + px_sa_offset), sizeof(px_sa) - px_sa_offset );
2303
2304 imgp->ip_px_sa = &px_sa;
2305 }
2306 if (px_args.file_actions_size != 0) {
2307 /* Limit file_actions to allowed number of open files */
2308 int maxfa = (p->p_limit ? p->p_rlimit[RLIMIT_NOFILE].rlim_cur : NOFILE);
2309 size_t maxfa_size = PSF_ACTIONS_SIZE(maxfa);
2310 if (px_args.file_actions_size < PSF_ACTIONS_SIZE(1) ||
2311 maxfa_size == 0 || px_args.file_actions_size > maxfa_size) {
2312 error = EINVAL;
2313 goto bad;
2314 }
2315 MALLOC(px_sfap, _posix_spawn_file_actions_t, px_args.file_actions_size, M_TEMP, M_WAITOK);
2316 if (px_sfap == NULL) {
2317 error = ENOMEM;
2318 goto bad;
2319 }
2320 imgp->ip_px_sfa = px_sfap;
2321
2322 if ((error = copyin(px_args.file_actions, px_sfap,
2323 px_args.file_actions_size)) != 0)
2324 goto bad;
2325
2326 /* Verify that the action count matches the struct size */
2327 size_t psfsize = PSF_ACTIONS_SIZE(px_sfap->psfa_act_count);
2328 if (psfsize == 0 || psfsize != px_args.file_actions_size) {
2329 error = EINVAL;
2330 goto bad;
2331 }
2332 }
2333 if (px_args.port_actions_size != 0) {
2334 /* Limit port_actions to one page of data */
2335 if (px_args.port_actions_size < PS_PORT_ACTIONS_SIZE(1) ||
2336 px_args.port_actions_size > PAGE_SIZE) {
2337 error = EINVAL;
2338 goto bad;
2339 }
2340
2341 MALLOC(px_spap, _posix_spawn_port_actions_t,
2342 px_args.port_actions_size, M_TEMP, M_WAITOK);
2343 if (px_spap == NULL) {
2344 error = ENOMEM;
2345 goto bad;
2346 }
2347 imgp->ip_px_spa = px_spap;
2348
2349 if ((error = copyin(px_args.port_actions, px_spap,
2350 px_args.port_actions_size)) != 0)
2351 goto bad;
2352
2353 /* Verify that the action count matches the struct size */
2354 size_t pasize = PS_PORT_ACTIONS_SIZE(px_spap->pspa_count);
2355 if (pasize == 0 || pasize != px_args.port_actions_size) {
2356 error = EINVAL;
2357 goto bad;
2358 }
2359 }
2360 #if CONFIG_PERSONAS
2361 /* copy in the persona info */
2362 if (px_args.persona_info_size != 0 && px_args.persona_info != 0) {
2363 /* for now, we need the exact same struct in user space */
2364 if (px_args.persona_info_size != sizeof(*px_persona)) {
2365 error = ERANGE;
2366 goto bad;
2367 }
2368
2369 MALLOC(px_persona, struct _posix_spawn_persona_info *, px_args.persona_info_size, M_TEMP, M_WAITOK|M_ZERO);
2370 if (px_persona == NULL) {
2371 error = ENOMEM;
2372 goto bad;
2373 }
2374 imgp->ip_px_persona = px_persona;
2375
2376 if ((error = copyin(px_args.persona_info, px_persona,
2377 px_args.persona_info_size)) != 0)
2378 goto bad;
2379 if ((error = spawn_validate_persona(px_persona)) != 0)
2380 goto bad;
2381 }
2382 #endif
2383 #if CONFIG_MACF
2384 if (px_args.mac_extensions_size != 0) {
2385 if ((error = spawn_copyin_macpolicyinfo(&px_args, (_posix_spawn_mac_policy_extensions_t *)&imgp->ip_px_smpx)) != 0)
2386 goto bad;
2387 }
2388 #endif /* CONFIG_MACF */
2389 }
2390
2391 /* set uthread to parent */
2392 uthread = get_bsdthread_info(current_thread());
2393
2394 /*
2395 * <rdar://6640530>; this does not result in a behaviour change
2396 * relative to Leopard, so there should not be any existing code
2397 * which depends on it.
2398 */
2399 if (uthread->uu_flag & UT_VFORK) {
2400 error = EINVAL;
2401 goto bad;
2402 }
2403
2404 /*
2405 * If we don't have the extension flag that turns "posix_spawn()"
2406 * into "execve() with options", then we will be creating a new
2407 * process which does not inherit memory from the parent process,
2408 * which is one of the most expensive things about using fork()
2409 * and execve().
2410 */
2411 if (imgp->ip_px_sa == NULL || !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)){
2412
2413 /* Set the new task's coalition, if it is requested. */
2414 coalition_t coal[COALITION_NUM_TYPES] = { COALITION_NULL };
2415 #if CONFIG_COALITIONS
2416 int i, ncoals;
2417 kern_return_t kr = KERN_SUCCESS;
2418 struct _posix_spawn_coalition_info coal_info;
2419 int coal_role[COALITION_NUM_TYPES];
2420
2421 if (imgp->ip_px_sa == NULL || !px_args.coal_info)
2422 goto do_fork1;
2423
2424 memset(&coal_info, 0, sizeof(coal_info));
2425
2426 if (px_args.coal_info_size > sizeof(coal_info))
2427 px_args.coal_info_size = sizeof(coal_info);
2428 error = copyin(px_args.coal_info,
2429 &coal_info, px_args.coal_info_size);
2430 if (error != 0)
2431 goto bad;
2432
2433 ncoals = 0;
2434 for (i = 0; i < COALITION_NUM_TYPES; i++) {
2435 uint64_t cid = coal_info.psci_info[i].psci_id;
2436 if (cid != 0) {
2437 /*
2438 * don't allow tasks which are not in a
2439 * privileged coalition to spawn processes
2440 * into coalitions other than their own
2441 */
2442 if (!task_is_in_privileged_coalition(p->task, i)) {
2443 coal_dbg("ERROR: %d not in privilegd "
2444 "coalition of type %d",
2445 p->p_pid, i);
2446 spawn_coalitions_release_all(coal);
2447 error = EPERM;
2448 goto bad;
2449 }
2450
2451 coal_dbg("searching for coalition id:%llu", cid);
2452 /*
2453 * take a reference and activation on the
2454 * coalition to guard against free-while-spawn
2455 * races
2456 */
2457 coal[i] = coalition_find_and_activate_by_id(cid);
2458 if (coal[i] == COALITION_NULL) {
2459 coal_dbg("could not find coalition id:%llu "
2460 "(perhaps it has been terminated or reaped)", cid);
2461 /*
2462 * release any other coalition's we
2463 * may have a reference to
2464 */
2465 spawn_coalitions_release_all(coal);
2466 error = ESRCH;
2467 goto bad;
2468 }
2469 if (coalition_type(coal[i]) != i) {
2470 coal_dbg("coalition with id:%lld is not of type:%d"
2471 " (it's type:%d)", cid, i, coalition_type(coal[i]));
2472 error = ESRCH;
2473 goto bad;
2474 }
2475 coal_role[i] = coal_info.psci_info[i].psci_role;
2476 ncoals++;
2477 }
2478 }
2479 if (ncoals < COALITION_NUM_TYPES) {
2480 /*
2481 * If the user is attempting to spawn into a subset of
2482 * the known coalition types, then make sure they have
2483 * _at_least_ specified a resource coalition. If not,
2484 * the following fork1() call will implicitly force an
2485 * inheritance from 'p' and won't actually spawn the
2486 * new task into the coalitions the user specified.
2487 * (also the call to coalitions_set_roles will panic)
2488 */
2489 if (coal[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
2490 spawn_coalitions_release_all(coal);
2491 error = EINVAL;
2492 goto bad;
2493 }
2494 }
2495 do_fork1:
2496 #endif /* CONFIG_COALITIONS */
2497
2498 /*
2499 * note that this will implicitly inherit the
2500 * caller's persona (if it exists)
2501 */
2502 error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN, coal);
2503 /* returns a thread and task reference */
2504
2505 if (error == 0) {
2506 new_task = get_threadtask(imgp->ip_new_thread);
2507 }
2508 #if CONFIG_COALITIONS
2509 /* set the roles of this task within each given coalition */
2510 if (error == 0) {
2511 kr = coalitions_set_roles(coal, new_task, coal_role);
2512 if (kr != KERN_SUCCESS)
2513 error = EINVAL;
2514 if (kdebug_debugid_enabled(MACHDBG_CODE(DBG_MACH_COALITION,
2515 MACH_COALITION_ADOPT))) {
2516 for (i = 0; i < COALITION_NUM_TYPES; i++) {
2517 if (coal[i] != COALITION_NULL) {
2518 /*
2519 * On 32-bit targets, uniqueid
2520 * will get truncated to 32 bits
2521 */
2522 KDBG_RELEASE(MACHDBG_CODE(
2523 DBG_MACH_COALITION,
2524 MACH_COALITION_ADOPT),
2525 coalition_id(coal[i]),
2526 get_task_uniqueid(new_task));
2527 }
2528 }
2529 }
2530 }
2531
2532 /* drop our references and activations - fork1() now holds them */
2533 spawn_coalitions_release_all(coal);
2534 #endif /* CONFIG_COALITIONS */
2535 if (error != 0) {
2536 goto bad;
2537 }
2538 imgp->ip_flags |= IMGPF_SPAWN; /* spawn w/o exec */
2539 spawn_no_exec = TRUE; /* used in later tests */
2540
2541 #if CONFIG_PERSONAS
2542 /*
2543 * If the parent isn't in a persona (launchd), and
2544 * hasn't specified a new persona for the process,
2545 * then we'll put the process into the system persona
2546 *
2547 * TODO: this will have to be re-worked because as of
2548 * now, without any launchd adoption, the resulting
2549 * xpcproxy process will not have sufficient
2550 * privileges to setuid/gid.
2551 */
2552 #if 0
2553 if (!proc_has_persona(p) && imgp->ip_px_persona == NULL) {
2554 MALLOC(px_persona, struct _posix_spawn_persona_info *,
2555 sizeof(*px_persona), M_TEMP, M_WAITOK|M_ZERO);
2556 if (px_persona == NULL) {
2557 error = ENOMEM;
2558 goto bad;
2559 }
2560 px_persona->pspi_id = persona_get_id(g_system_persona);
2561 imgp->ip_px_persona = px_persona;
2562 }
2563 #endif /* 0 */
2564 #endif /* CONFIG_PERSONAS */
2565 } else {
2566 /*
2567 * For execve case, create a new task and thread
2568 * which points to current_proc. The current_proc will point
2569 * to the new task after image activation and proc ref drain.
2570 *
2571 * proc (current_proc) <----- old_task (current_task)
2572 * ^ | ^
2573 * | | |
2574 * | ----------------------------------
2575 * |
2576 * --------- new_task (task marked as TF_EXEC_COPY)
2577 *
2578 * After image activation, the proc will point to the new task
2579 * and would look like following.
2580 *
2581 * proc (current_proc) <----- old_task (current_task, marked as TPF_DID_EXEC)
2582 * ^ |
2583 * | |
2584 * | ----------> new_task
2585 * | |
2586 * -----------------
2587 *
2588 * During exec any transition from new_task -> proc is fine, but don't allow
2589 * transition from proc->task, since it will modify old_task.
2590 */
2591 imgp->ip_new_thread = fork_create_child(current_task(),
2592 NULL, p, FALSE, p->p_flag & P_LP64, TRUE);
2593 /* task and thread ref returned by fork_create_child */
2594 if (imgp->ip_new_thread == NULL) {
2595 error = ENOMEM;
2596 goto bad;
2597 }
2598
2599 new_task = get_threadtask(imgp->ip_new_thread);
2600 imgp->ip_flags |= IMGPF_EXEC;
2601 }
2602
2603 if (spawn_no_exec) {
2604 p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread);
2605
2606 /*
2607 * We had to wait until this point before firing the
2608 * proc:::create probe, otherwise p would not point to the
2609 * child process.
2610 */
2611 DTRACE_PROC1(create, proc_t, p);
2612 }
2613 assert(p != NULL);
2614
2615 context.vc_thread = imgp->ip_new_thread;
2616 context.vc_ucred = p->p_ucred; /* XXX must NOT be kauth_cred_get() */
2617
2618 /*
2619 * Post fdcopy(), pre exec_handle_sugid() - this is where we want
2620 * to handle the file_actions. Since vfork() also ends up setting
2621 * us into the parent process group, and saved off the signal flags,
2622 * this is also where we want to handle the spawn flags.
2623 */
2624
2625 /* Has spawn file actions? */
2626 if (imgp->ip_px_sfa != NULL) {
2627 /*
2628 * The POSIX_SPAWN_CLOEXEC_DEFAULT flag
2629 * is handled in exec_handle_file_actions().
2630 */
2631 if ((error = exec_handle_file_actions(imgp,
2632 imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0)) != 0)
2633 goto bad;
2634 }
2635
2636 /* Has spawn port actions? */
2637 if (imgp->ip_px_spa != NULL) {
2638 boolean_t is_adaptive = FALSE;
2639 boolean_t portwatch_present = FALSE;
2640
2641 /* Will this process become adaptive? The apptype isn't ready yet, so we can't look there. */
2642 if (imgp->ip_px_sa != NULL && px_sa.psa_apptype == POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE)
2643 is_adaptive = TRUE;
2644
2645 /*
2646 * portwatch only:
2647 * Allocate a place to store the ports we want to bind to the new task
2648 * We can't bind them until after the apptype is set.
2649 */
2650 if (px_spap->pspa_count != 0 && is_adaptive) {
2651 portwatch_count = px_spap->pspa_count;
2652 MALLOC(portwatch_ports, ipc_port_t *, (sizeof(ipc_port_t) * portwatch_count), M_TEMP, M_WAITOK | M_ZERO);
2653 } else {
2654 portwatch_ports = NULL;
2655 }
2656
2657 if ((error = exec_handle_port_actions(imgp, &portwatch_present, portwatch_ports)) != 0)
2658 goto bad;
2659
2660 if (portwatch_present == FALSE && portwatch_ports != NULL) {
2661 FREE(portwatch_ports, M_TEMP);
2662 portwatch_ports = NULL;
2663 portwatch_count = 0;
2664 }
2665 }
2666
2667 /* Has spawn attr? */
2668 if (imgp->ip_px_sa != NULL) {
2669 /*
2670 * Set the process group ID of the child process; this has
2671 * to happen before the image activation.
2672 */
2673 if (px_sa.psa_flags & POSIX_SPAWN_SETPGROUP) {
2674 struct setpgid_args spga;
2675 spga.pid = p->p_pid;
2676 spga.pgid = px_sa.psa_pgroup;
2677 /*
2678 * Effectively, call setpgid() system call; works
2679 * because there are no pointer arguments.
2680 */
2681 if((error = setpgid(p, &spga, ival)) != 0)
2682 goto bad;
2683 }
2684
2685 /*
2686 * Reset UID/GID to parent's RUID/RGID; This works only
2687 * because the operation occurs *after* the vfork() and
2688 * before the call to exec_handle_sugid() by the image
2689 * activator called from exec_activate_image(). POSIX
2690 * requires that any setuid/setgid bits on the process
2691 * image will take precedence over the spawn attributes
2692 * (re)setting them.
2693 *
2694 * Modifications to p_ucred must be guarded using the
2695 * proc's ucred lock. This prevents others from accessing
2696 * a garbage credential.
2697 */
2698 while (px_sa.psa_flags & POSIX_SPAWN_RESETIDS) {
2699 kauth_cred_t my_cred = kauth_cred_proc_ref(p);
2700 kauth_cred_t my_new_cred = kauth_cred_setuidgid(my_cred, kauth_cred_getruid(my_cred), kauth_cred_getrgid(my_cred));
2701
2702 if (my_cred == my_new_cred) {
2703 kauth_cred_unref(&my_cred);
2704 break;
2705 }
2706
2707 /* update cred on proc */
2708 proc_ucred_lock(p);
2709
2710 if (p->p_ucred != my_cred) {
2711 proc_ucred_unlock(p);
2712 kauth_cred_unref(&my_new_cred);
2713 continue;
2714 }
2715
2716 /* donate cred reference on my_new_cred to p->p_ucred */
2717 p->p_ucred = my_new_cred;
2718 PROC_UPDATE_CREDS_ONPROC(p);
2719 proc_ucred_unlock(p);
2720
2721 /* drop additional reference that was taken on the previous cred */
2722 kauth_cred_unref(&my_cred);
2723 }
2724
2725 #if CONFIG_PERSONAS
2726 if (spawn_no_exec && imgp->ip_px_persona != NULL) {
2727 /*
2728 * If we were asked to spawn a process into a new persona,
2729 * do the credential switch now (which may override the UID/GID
2730 * inherit done just above). It's important to do this switch
2731 * before image activation both for reasons stated above, and
2732 * to ensure that the new persona has access to the image/file
2733 * being executed.
2734 */
2735 error = spawn_persona_adopt(p, imgp->ip_px_persona);
2736 if (error != 0)
2737 goto bad;
2738 }
2739 #endif /* CONFIG_PERSONAS */
2740 #if !SECURE_KERNEL
2741 /*
2742 * Disable ASLR for the spawned process.
2743 *
2744 * But only do so if we are not embedded + RELEASE.
2745 * While embedded allows for a boot-arg (-disable_aslr)
2746 * to deal with this (which itself is only honored on
2747 * DEVELOPMENT or DEBUG builds of xnu), it is often
2748 * useful or necessary to disable ASLR on a per-process
2749 * basis for unit testing and debugging.
2750 */
2751 if (px_sa.psa_flags & _POSIX_SPAWN_DISABLE_ASLR)
2752 OSBitOrAtomic(P_DISABLE_ASLR, &p->p_flag);
2753 #endif /* !SECURE_KERNEL */
2754
2755 /* Randomize high bits of ASLR slide */
2756 if (px_sa.psa_flags & _POSIX_SPAWN_HIGH_BITS_ASLR)
2757 imgp->ip_flags |= IMGPF_HIGH_BITS_ASLR;
2758
2759 /*
2760 * Forcibly disallow execution from data pages for the spawned process
2761 * even if it would otherwise be permitted by the architecture default.
2762 */
2763 if (px_sa.psa_flags & _POSIX_SPAWN_ALLOW_DATA_EXEC)
2764 imgp->ip_flags |= IMGPF_ALLOW_DATA_EXEC;
2765 }
2766
2767 /*
2768 * Disable ASLR during image activation. This occurs either if the
2769 * _POSIX_SPAWN_DISABLE_ASLR attribute was found above or if
2770 * P_DISABLE_ASLR was inherited from the parent process.
2771 */
2772 if (p->p_flag & P_DISABLE_ASLR)
2773 imgp->ip_flags |= IMGPF_DISABLE_ASLR;
2774
2775 /*
2776 * Clear transition flag so we won't hang if exec_activate_image() causes
2777 * an automount (and launchd does a proc sysctl to service it).
2778 *
2779 * <rdar://problem/6848672>, <rdar://problem/5959568>.
2780 */
2781 if (spawn_no_exec) {
2782 proc_transend(p, 0);
2783 proc_transit_set = 0;
2784 }
2785
2786 #if MAC_SPAWN /* XXX */
2787 if (uap->mac_p != USER_ADDR_NULL) {
2788 error = mac_execve_enter(uap->mac_p, imgp);
2789 if (error)
2790 goto bad;
2791 }
2792 #endif
2793
2794 /*
2795 * Activate the image
2796 */
2797 error = exec_activate_image(imgp);
2798
2799 if (error == 0 && !spawn_no_exec) {
2800 p = proc_exec_switch_task(p, current_task(), new_task, imgp->ip_new_thread);
2801 /* proc ref returned */
2802 should_release_proc_ref = TRUE;
2803 }
2804
2805 if (error == 0) {
2806 /* process completed the exec */
2807 exec_done = TRUE;
2808 } else if (error == -1) {
2809 /* Image not claimed by any activator? */
2810 error = ENOEXEC;
2811 }
2812
2813 /*
2814 * If we have a spawn attr, and it contains signal related flags,
2815 * the we need to process them in the "context" of the new child
2816 * process, so we have to process it following image activation,
2817 * prior to making the thread runnable in user space. This is
2818 * necessitated by some signal information being per-thread rather
2819 * than per-process, and we don't have the new allocation in hand
2820 * until after the image is activated.
2821 */
2822 if (!error && imgp->ip_px_sa != NULL) {
2823 thread_t child_thread = imgp->ip_new_thread;
2824 uthread_t child_uthread = get_bsdthread_info(child_thread);
2825
2826 /*
2827 * Mask a list of signals, instead of them being unmasked, if
2828 * they were unmasked in the parent; note that some signals
2829 * are not maskable.
2830 */
2831 if (px_sa.psa_flags & POSIX_SPAWN_SETSIGMASK)
2832 child_uthread->uu_sigmask = (px_sa.psa_sigmask & ~sigcantmask);
2833 /*
2834 * Default a list of signals instead of ignoring them, if
2835 * they were ignored in the parent. Note that we pass
2836 * spawn_no_exec to setsigvec() to indicate that we called
2837 * fork1() and therefore do not need to call proc_signalstart()
2838 * internally.
2839 */
2840 if (px_sa.psa_flags & POSIX_SPAWN_SETSIGDEF) {
2841 vec.sa_handler = SIG_DFL;
2842 vec.sa_tramp = 0;
2843 vec.sa_mask = 0;
2844 vec.sa_flags = 0;
2845 for (sig = 1; sig < NSIG; sig++)
2846 if (px_sa.psa_sigdefault & (1 << (sig-1))) {
2847 error = setsigvec(p, child_thread, sig, &vec, spawn_no_exec);
2848 }
2849 }
2850
2851 /*
2852 * Activate the CPU usage monitor, if requested. This is done via a task-wide, per-thread CPU
2853 * usage limit, which will generate a resource exceeded exception if any one thread exceeds the
2854 * limit.
2855 *
2856 * Userland gives us interval in seconds, and the kernel SPI expects nanoseconds.
2857 */
2858 if (px_sa.psa_cpumonitor_percent != 0) {
2859 /*
2860 * Always treat a CPU monitor activation coming from spawn as entitled. Requiring
2861 * an entitlement to configure the monitor a certain way seems silly, since
2862 * whomever is turning it on could just as easily choose not to do so.
2863 */
2864 error = proc_set_task_ruse_cpu(p->task,
2865 TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC,
2866 px_sa.psa_cpumonitor_percent,
2867 px_sa.psa_cpumonitor_interval * NSEC_PER_SEC,
2868 0, TRUE);
2869 }
2870 }
2871
2872 bad:
2873
2874 if (error == 0) {
2875 /* reset delay idle sleep status if set */
2876 #if !CONFIG_EMBEDDED
2877 if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP)
2878 OSBitAndAtomic(~((uint32_t)P_DELAYIDLESLEEP), &p->p_flag);
2879 #endif /* !CONFIG_EMBEDDED */
2880 /* upon successful spawn, re/set the proc control state */
2881 if (imgp->ip_px_sa != NULL) {
2882 switch (px_sa.psa_pcontrol) {
2883 case POSIX_SPAWN_PCONTROL_THROTTLE:
2884 p->p_pcaction = P_PCTHROTTLE;
2885 break;
2886 case POSIX_SPAWN_PCONTROL_SUSPEND:
2887 p->p_pcaction = P_PCSUSP;
2888 break;
2889 case POSIX_SPAWN_PCONTROL_KILL:
2890 p->p_pcaction = P_PCKILL;
2891 break;
2892 case POSIX_SPAWN_PCONTROL_NONE:
2893 default:
2894 p->p_pcaction = 0;
2895 break;
2896 };
2897 }
2898 exec_resettextvp(p, imgp);
2899
2900 #if CONFIG_MEMORYSTATUS
2901 /* Has jetsam attributes? */
2902 if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_SET)) {
2903 /*
2904 * With 2-level high-water-mark support, POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is no
2905 * longer relevant, as background limits are described via the inactive limit slots.
2906 *
2907 * That said, however, if the POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is passed in,
2908 * we attempt to mimic previous behavior by forcing the BG limit data into the
2909 * inactive/non-fatal mode and force the active slots to hold system_wide/fatal mode.
2910 */
2911 if (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND) {
2912 memorystatus_update(p, px_sa.psa_priority, 0,
2913 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
2914 TRUE,
2915 -1, TRUE,
2916 px_sa.psa_memlimit_inactive, FALSE);
2917 } else {
2918 memorystatus_update(p, px_sa.psa_priority, 0,
2919 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
2920 TRUE,
2921 px_sa.psa_memlimit_active,
2922 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_ACTIVE_FATAL),
2923 px_sa.psa_memlimit_inactive,
2924 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_INACTIVE_FATAL));
2925 }
2926
2927 }
2928 #endif /* CONFIG_MEMORYSTATUS */
2929 }
2930
2931 /*
2932 * If we successfully called fork1(), we always need to do this;
2933 * we identify this case by noting the IMGPF_SPAWN flag. This is
2934 * because we come back from that call with signals blocked in the
2935 * child, and we have to unblock them, but we want to wait until
2936 * after we've performed any spawn actions. This has to happen
2937 * before check_for_signature(), which uses psignal.
2938 */
2939 if (spawn_no_exec) {
2940 if (proc_transit_set)
2941 proc_transend(p, 0);
2942
2943 /*
2944 * Drop the signal lock on the child which was taken on our
2945 * behalf by forkproc()/cloneproc() to prevent signals being
2946 * received by the child in a partially constructed state.
2947 */
2948 proc_signalend(p, 0);
2949
2950 /* flag the 'fork' has occurred */
2951 proc_knote(p->p_pptr, NOTE_FORK | p->p_pid);
2952 }
2953
2954 /* flag exec has occurred, notify only if it has not failed due to FP Key error */
2955 if (!error && ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0))
2956 proc_knote(p, NOTE_EXEC);
2957
2958
2959 if (error == 0) {
2960 /*
2961 * We need to initialize the bank context behind the protection of
2962 * the proc_trans lock to prevent a race with exit. We can't do this during
2963 * exec_activate_image because task_bank_init checks entitlements that
2964 * aren't loaded until subsequent calls (including exec_resettextvp).
2965 */
2966 error = proc_transstart(p, 0, 0);
2967
2968 if (error == 0) {
2969 task_bank_init(get_threadtask(imgp->ip_new_thread));
2970 proc_transend(p, 0);
2971 }
2972 }
2973
2974 /* Inherit task role from old task to new task for exec */
2975 if (error == 0 && !spawn_no_exec) {
2976 proc_inherit_task_role(get_threadtask(imgp->ip_new_thread), current_task());
2977 }
2978
2979 /*
2980 * Apply the spawnattr policy, apptype (which primes the task for importance donation),
2981 * and bind any portwatch ports to the new task.
2982 * This must be done after the exec so that the child's thread is ready,
2983 * and after the in transit state has been released, because priority is
2984 * dropped here so we need to be prepared for a potentially long preemption interval
2985 *
2986 * TODO: Consider splitting this up into separate phases
2987 */
2988 if (error == 0 && imgp->ip_px_sa != NULL) {
2989 struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
2990
2991 exec_handle_spawnattr_policy(p, psa->psa_apptype, psa->psa_qos_clamp, psa->psa_darwin_role,
2992 portwatch_ports, portwatch_count);
2993 }
2994
2995 /*
2996 * Need to transfer pending watch port boosts to the new task while still making
2997 * sure that the old task remains in the importance linkage. Create an importance
2998 * linkage from old task to new task, then switch the task importance base
2999 * of old task and new task. After the switch the port watch boost will be
3000 * boosting the new task and new task will be donating importance to old task.
3001 */
3002 if (error == 0 && task_did_exec(current_task())) {
3003 inherit = ipc_importance_exec_switch_task(current_task(), get_threadtask(imgp->ip_new_thread));
3004 }
3005
3006 if (error == 0) {
3007 /* Apply the main thread qos */
3008 thread_t main_thread = imgp->ip_new_thread;
3009 task_set_main_thread_qos(get_threadtask(imgp->ip_new_thread), main_thread);
3010
3011 #if CONFIG_MACF
3012 /*
3013 * Processes with the MAP_JIT entitlement are permitted to have
3014 * a jumbo-size map.
3015 */
3016 if (mac_proc_check_map_anon(p, 0, 0, 0, MAP_JIT, NULL) == 0) {
3017 vm_map_set_jumbo(get_task_map(p->task));
3018 }
3019 #endif /* CONFIG_MACF */
3020 }
3021
3022 /*
3023 * Release any ports we kept around for binding to the new task
3024 * We need to release the rights even if the posix_spawn has failed.
3025 */
3026 if (portwatch_ports != NULL) {
3027 for (int i = 0; i < portwatch_count; i++) {
3028 ipc_port_t port = NULL;
3029 if ((port = portwatch_ports[i]) != NULL) {
3030 ipc_port_release_send(port);
3031 }
3032 }
3033 FREE(portwatch_ports, M_TEMP);
3034 portwatch_ports = NULL;
3035 portwatch_count = 0;
3036 }
3037
3038 /*
3039 * We have to delay operations which might throw a signal until after
3040 * the signals have been unblocked; however, we want that to happen
3041 * after exec_resettextvp() so that the textvp is correct when they
3042 * fire.
3043 */
3044 if (error == 0) {
3045 error = check_for_signature(p, imgp);
3046
3047 /*
3048 * Pay for our earlier safety; deliver the delayed signals from
3049 * the incomplete spawn process now that it's complete.
3050 */
3051 if (imgp != NULL && spawn_no_exec && (p->p_lflag & P_LTRACED)) {
3052 psignal_vfork(p, p->task, imgp->ip_new_thread, SIGTRAP);
3053 }
3054
3055 if (error == 0 && !spawn_no_exec)
3056 KDBG(BSDDBG_CODE(DBG_BSD_PROC,BSD_PROC_EXEC),
3057 p->p_pid);
3058 }
3059
3060
3061 if (imgp != NULL) {
3062 if (imgp->ip_vp)
3063 vnode_put(imgp->ip_vp);
3064 if (imgp->ip_scriptvp)
3065 vnode_put(imgp->ip_scriptvp);
3066 if (imgp->ip_strings)
3067 execargs_free(imgp);
3068 if (imgp->ip_px_sfa != NULL)
3069 FREE(imgp->ip_px_sfa, M_TEMP);
3070 if (imgp->ip_px_spa != NULL)
3071 FREE(imgp->ip_px_spa, M_TEMP);
3072 #if CONFIG_PERSONAS
3073 if (imgp->ip_px_persona != NULL)
3074 FREE(imgp->ip_px_persona, M_TEMP);
3075 #endif
3076 #if CONFIG_MACF
3077 if (imgp->ip_px_smpx != NULL)
3078 spawn_free_macpolicyinfo(imgp->ip_px_smpx);
3079 if (imgp->ip_execlabelp)
3080 mac_cred_label_free(imgp->ip_execlabelp);
3081 if (imgp->ip_scriptlabelp)
3082 mac_vnode_label_free(imgp->ip_scriptlabelp);
3083 if (imgp->ip_cs_error != OS_REASON_NULL) {
3084 os_reason_free(imgp->ip_cs_error);
3085 imgp->ip_cs_error = OS_REASON_NULL;
3086 }
3087 #endif
3088 }
3089
3090 #if CONFIG_DTRACE
3091 if (spawn_no_exec) {
3092 /*
3093 * In the original DTrace reference implementation,
3094 * posix_spawn() was a libc routine that just
3095 * did vfork(2) then exec(2). Thus the proc::: probes
3096 * are very fork/exec oriented. The details of this
3097 * in-kernel implementation of posix_spawn() is different
3098 * (while producing the same process-observable effects)
3099 * particularly w.r.t. errors, and which thread/process
3100 * is constructing what on behalf of whom.
3101 */
3102 if (error) {
3103 DTRACE_PROC1(spawn__failure, int, error);
3104 } else {
3105 DTRACE_PROC(spawn__success);
3106 /*
3107 * Some DTrace scripts, e.g. newproc.d in
3108 * /usr/bin, rely on the the 'exec-success'
3109 * probe being fired in the child after the
3110 * new process image has been constructed
3111 * in order to determine the associated pid.
3112 *
3113 * So, even though the parent built the image
3114 * here, for compatibility, mark the new thread
3115 * so 'exec-success' fires on it as it leaves
3116 * the kernel.
3117 */
3118 dtrace_thread_didexec(imgp->ip_new_thread);
3119 }
3120 } else {
3121 if (error) {
3122 DTRACE_PROC1(exec__failure, int, error);
3123 } else {
3124 dtrace_thread_didexec(imgp->ip_new_thread);
3125 }
3126 }
3127
3128 if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) {
3129 (*dtrace_proc_waitfor_hook)(p);
3130 }
3131 #endif
3132 /*
3133 * clear bsd_info from old task if it did exec.
3134 */
3135 if (task_did_exec(current_task())) {
3136 set_bsdtask_info(current_task(), NULL);
3137 }
3138
3139 /* clear bsd_info from new task and terminate it if exec failed */
3140 if (new_task != NULL && task_is_exec_copy(new_task)) {
3141 set_bsdtask_info(new_task, NULL);
3142 task_terminate_internal(new_task);
3143 }
3144
3145 /* Return to both the parent and the child? */
3146 if (imgp != NULL && spawn_no_exec) {
3147 /*
3148 * If the parent wants the pid, copy it out
3149 */
3150 if (pid != USER_ADDR_NULL)
3151 (void)suword(pid, p->p_pid);
3152 retval[0] = error;
3153
3154 /*
3155 * If we had an error, perform an internal reap ; this is
3156 * entirely safe, as we have a real process backing us.
3157 */
3158 if (error) {
3159 proc_list_lock();
3160 p->p_listflag |= P_LIST_DEADPARENT;
3161 proc_list_unlock();
3162 proc_lock(p);
3163 /* make sure no one else has killed it off... */
3164 if (p->p_stat != SZOMB && p->exit_thread == NULL) {
3165 p->exit_thread = current_thread();
3166 proc_unlock(p);
3167 exit1(p, 1, (int *)NULL);
3168 } else {
3169 /* someone is doing it for us; just skip it */
3170 proc_unlock(p);
3171 }
3172 }
3173 }
3174
3175 /*
3176 * Do not terminate the current task, if proc_exec_switch_task did not
3177 * switch the tasks, terminating the current task without the switch would
3178 * result in loosing the SIGKILL status.
3179 */
3180 if (task_did_exec(current_task())) {
3181 /* Terminate the current task, since exec will start in new task */
3182 task_terminate_internal(current_task());
3183 }
3184
3185 /* Release the thread ref returned by fork_create_child/fork1 */
3186 if (imgp != NULL && imgp->ip_new_thread) {
3187 /* wake up the new thread */
3188 task_clear_return_wait(get_threadtask(imgp->ip_new_thread));
3189 thread_deallocate(imgp->ip_new_thread);
3190 imgp->ip_new_thread = NULL;
3191 }
3192
3193 /* Release the ref returned by fork_create_child/fork1 */
3194 if (new_task) {
3195 task_deallocate(new_task);
3196 new_task = NULL;
3197 }
3198
3199 if (should_release_proc_ref) {
3200 proc_rele(p);
3201 }
3202
3203 if (bufp != NULL) {
3204 FREE(bufp, M_TEMP);
3205 }
3206
3207 if (inherit != NULL) {
3208 ipc_importance_release(inherit);
3209 }
3210
3211 return(error);
3212 }
3213
3214 /*
3215 * proc_exec_switch_task
3216 *
3217 * Parameters: p proc
3218 * old_task task before exec
3219 * new_task task after exec
3220 * new_thread thread in new task
3221 *
3222 * Returns: proc.
3223 *
3224 * Note: The function will switch the task pointer of proc
3225 * from old task to new task. The switch needs to happen
3226 * after draining all proc refs and inside a proc translock.
3227 * In the case of failure to switch the task, which might happen
3228 * if the process received a SIGKILL or jetsam killed it, it will make
3229 * sure that the new tasks terminates. User proc ref returned
3230 * to caller.
3231 *
3232 * This function is called after point of no return, in the case
3233 * failure to switch, it will terminate the new task and swallow the
3234 * error and let the terminated process complete exec and die.
3235 */
3236 proc_t
3237 proc_exec_switch_task(proc_t p, task_t old_task, task_t new_task, thread_t new_thread)
3238 {
3239 int error = 0;
3240 boolean_t task_active;
3241 boolean_t proc_active;
3242 boolean_t thread_active;
3243 thread_t old_thread = current_thread();
3244
3245 /*
3246 * Switch the task pointer of proc to new task.
3247 * Before switching the task, wait for proc_refdrain.
3248 * After the switch happens, the proc can disappear,
3249 * take a ref before it disappears. Waiting for
3250 * proc_refdrain in exec will block all other threads
3251 * trying to take a proc ref, boost the current thread
3252 * to avoid priority inversion.
3253 */
3254 thread_set_exec_promotion(old_thread);
3255 p = proc_refdrain_with_refwait(p, TRUE);
3256 /* extra proc ref returned to the caller */
3257
3258 assert(get_threadtask(new_thread) == new_task);
3259 task_active = task_is_active(new_task);
3260
3261 /* Take the proc_translock to change the task ptr */
3262 proc_lock(p);
3263 proc_active = !(p->p_lflag & P_LEXIT);
3264
3265 /* Check if the current thread is not aborted due to SIGKILL */
3266 thread_active = thread_is_active(old_thread);
3267
3268 /*
3269 * Do not switch the task if the new task or proc is already terminated
3270 * as a result of error in exec past point of no return
3271 */
3272 if (proc_active && task_active && thread_active) {
3273 error = proc_transstart(p, 1, 0);
3274 if (error == 0) {
3275 uthread_t new_uthread = get_bsdthread_info(new_thread);
3276 uthread_t old_uthread = get_bsdthread_info(current_thread());
3277
3278 /*
3279 * bsd_info of old_task will get cleared in execve and posix_spawn
3280 * after firing exec-success/error dtrace probe.
3281 */
3282 p->task = new_task;
3283
3284 /* Clear dispatchqueue and workloop ast offset */
3285 p->p_dispatchqueue_offset = 0;
3286 p->p_dispatchqueue_serialno_offset = 0;
3287 p->p_return_to_kernel_offset = 0;
3288
3289 /* Copy the signal state, dtrace state and set bsd ast on new thread */
3290 act_set_astbsd(new_thread);
3291 new_uthread->uu_siglist = old_uthread->uu_siglist;
3292 new_uthread->uu_sigwait = old_uthread->uu_sigwait;
3293 new_uthread->uu_sigmask = old_uthread->uu_sigmask;
3294 new_uthread->uu_oldmask = old_uthread->uu_oldmask;
3295 new_uthread->uu_vforkmask = old_uthread->uu_vforkmask;
3296 new_uthread->uu_exit_reason = old_uthread->uu_exit_reason;
3297 #if CONFIG_DTRACE
3298 new_uthread->t_dtrace_sig = old_uthread->t_dtrace_sig;
3299 new_uthread->t_dtrace_stop = old_uthread->t_dtrace_stop;
3300 new_uthread->t_dtrace_resumepid = old_uthread->t_dtrace_resumepid;
3301 assert(new_uthread->t_dtrace_scratch == NULL);
3302 new_uthread->t_dtrace_scratch = old_uthread->t_dtrace_scratch;
3303
3304 old_uthread->t_dtrace_sig = 0;
3305 old_uthread->t_dtrace_stop = 0;
3306 old_uthread->t_dtrace_resumepid = 0;
3307 old_uthread->t_dtrace_scratch = NULL;
3308 #endif
3309 /* Copy the resource accounting info */
3310 thread_copy_resource_info(new_thread, current_thread());
3311
3312 /* Clear the exit reason and signal state on old thread */
3313 old_uthread->uu_exit_reason = NULL;
3314 old_uthread->uu_siglist = 0;
3315
3316 /* Add the new uthread to proc uthlist and remove the old one */
3317 TAILQ_INSERT_TAIL(&p->p_uthlist, new_uthread, uu_list);
3318 TAILQ_REMOVE(&p->p_uthlist, old_uthread, uu_list);
3319
3320 task_set_did_exec_flag(old_task);
3321 task_clear_exec_copy_flag(new_task);
3322
3323 task_copy_fields_for_exec(new_task, old_task);
3324
3325 proc_transend(p, 1);
3326 }
3327 }
3328
3329 proc_unlock(p);
3330 proc_refwake(p);
3331 thread_clear_exec_promotion(old_thread);
3332
3333 if (error != 0 || !task_active || !proc_active || !thread_active) {
3334 task_terminate_internal(new_task);
3335 }
3336
3337 return p;
3338 }
3339
3340 /*
3341 * execve
3342 *
3343 * Parameters: uap->fname File name to exec
3344 * uap->argp Argument list
3345 * uap->envp Environment list
3346 *
3347 * Returns: 0 Success
3348 * __mac_execve:EINVAL Invalid argument
3349 * __mac_execve:ENOTSUP Invalid argument
3350 * __mac_execve:EACCES Permission denied
3351 * __mac_execve:EINTR Interrupted function
3352 * __mac_execve:ENOMEM Not enough space
3353 * __mac_execve:EFAULT Bad address
3354 * __mac_execve:ENAMETOOLONG Filename too long
3355 * __mac_execve:ENOEXEC Executable file format error
3356 * __mac_execve:ETXTBSY Text file busy [misuse of error code]
3357 * __mac_execve:???
3358 *
3359 * TODO: Dynamic linker header address on stack is copied via suword()
3360 */
3361 /* ARGSUSED */
3362 int
3363 execve(proc_t p, struct execve_args *uap, int32_t *retval)
3364 {
3365 struct __mac_execve_args muap;
3366 int err;
3367
3368 memoryshot(VM_EXECVE, DBG_FUNC_NONE);
3369
3370 muap.fname = uap->fname;
3371 muap.argp = uap->argp;
3372 muap.envp = uap->envp;
3373 muap.mac_p = USER_ADDR_NULL;
3374 err = __mac_execve(p, &muap, retval);
3375
3376 return(err);
3377 }
3378
3379 /*
3380 * __mac_execve
3381 *
3382 * Parameters: uap->fname File name to exec
3383 * uap->argp Argument list
3384 * uap->envp Environment list
3385 * uap->mac_p MAC label supplied by caller
3386 *
3387 * Returns: 0 Success
3388 * EINVAL Invalid argument
3389 * ENOTSUP Not supported
3390 * ENOEXEC Executable file format error
3391 * exec_activate_image:EINVAL Invalid argument
3392 * exec_activate_image:EACCES Permission denied
3393 * exec_activate_image:EINTR Interrupted function
3394 * exec_activate_image:ENOMEM Not enough space
3395 * exec_activate_image:EFAULT Bad address
3396 * exec_activate_image:ENAMETOOLONG Filename too long
3397 * exec_activate_image:ENOEXEC Executable file format error
3398 * exec_activate_image:ETXTBSY Text file busy [misuse of error code]
3399 * exec_activate_image:EBADEXEC The executable is corrupt/unknown
3400 * exec_activate_image:???
3401 * mac_execve_enter:???
3402 *
3403 * TODO: Dynamic linker header address on stack is copied via suword()
3404 */
3405 int
3406 __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval)
3407 {
3408 char *bufp = NULL;
3409 struct image_params *imgp;
3410 struct vnode_attr *vap;
3411 struct vnode_attr *origvap;
3412 int error;
3413 int is_64 = IS_64BIT_PROCESS(p);
3414 struct vfs_context context;
3415 struct uthread *uthread;
3416 task_t new_task = NULL;
3417 boolean_t should_release_proc_ref = FALSE;
3418 boolean_t exec_done = FALSE;
3419 boolean_t in_vfexec = FALSE;
3420 void *inherit = NULL;
3421
3422 context.vc_thread = current_thread();
3423 context.vc_ucred = kauth_cred_proc_ref(p); /* XXX must NOT be kauth_cred_get() */
3424
3425 /* Allocate a big chunk for locals instead of using stack since these
3426 * structures a pretty big.
3427 */
3428 MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
3429 imgp = (struct image_params *) bufp;
3430 if (bufp == NULL) {
3431 error = ENOMEM;
3432 goto exit_with_error;
3433 }
3434 vap = (struct vnode_attr *) (bufp + sizeof(*imgp));
3435 origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap));
3436
3437 /* Initialize the common data in the image_params structure */
3438 imgp->ip_user_fname = uap->fname;
3439 imgp->ip_user_argv = uap->argp;
3440 imgp->ip_user_envv = uap->envp;
3441 imgp->ip_vattr = vap;
3442 imgp->ip_origvattr = origvap;
3443 imgp->ip_vfs_context = &context;
3444 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE) | ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE);
3445 imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
3446 imgp->ip_mac_return = 0;
3447 imgp->ip_cs_error = OS_REASON_NULL;
3448
3449 #if CONFIG_MACF
3450 if (uap->mac_p != USER_ADDR_NULL) {
3451 error = mac_execve_enter(uap->mac_p, imgp);
3452 if (error) {
3453 kauth_cred_unref(&context.vc_ucred);
3454 goto exit_with_error;
3455 }
3456 }
3457 #endif
3458 uthread = get_bsdthread_info(current_thread());
3459 if (uthread->uu_flag & UT_VFORK) {
3460 imgp->ip_flags |= IMGPF_VFORK_EXEC;
3461 in_vfexec = TRUE;
3462 } else {
3463 imgp->ip_flags |= IMGPF_EXEC;
3464
3465 /*
3466 * For execve case, create a new task and thread
3467 * which points to current_proc. The current_proc will point
3468 * to the new task after image activation and proc ref drain.
3469 *
3470 * proc (current_proc) <----- old_task (current_task)
3471 * ^ | ^
3472 * | | |
3473 * | ----------------------------------
3474 * |
3475 * --------- new_task (task marked as TF_EXEC_COPY)
3476 *
3477 * After image activation, the proc will point to the new task
3478 * and would look like following.
3479 *
3480 * proc (current_proc) <----- old_task (current_task, marked as TPF_DID_EXEC)
3481 * ^ |
3482 * | |
3483 * | ----------> new_task
3484 * | |
3485 * -----------------
3486 *
3487 * During exec any transition from new_task -> proc is fine, but don't allow
3488 * transition from proc->task, since it will modify old_task.
3489 */
3490 imgp->ip_new_thread = fork_create_child(current_task(),
3491 NULL, p, FALSE, p->p_flag & P_LP64, TRUE);
3492 /* task and thread ref returned by fork_create_child */
3493 if (imgp->ip_new_thread == NULL) {
3494 error = ENOMEM;
3495 goto exit_with_error;
3496 }
3497
3498 new_task = get_threadtask(imgp->ip_new_thread);
3499 context.vc_thread = imgp->ip_new_thread;
3500 }
3501
3502 error = exec_activate_image(imgp);
3503 /* thread and task ref returned for vfexec case */
3504
3505 if (imgp->ip_new_thread != NULL) {
3506 /*
3507 * task reference might be returned by exec_activate_image
3508 * for vfexec.
3509 */
3510 new_task = get_threadtask(imgp->ip_new_thread);
3511 }
3512
3513 if (!error && !in_vfexec) {
3514 p = proc_exec_switch_task(p, current_task(), new_task, imgp->ip_new_thread);
3515 /* proc ref returned */
3516 should_release_proc_ref = TRUE;
3517 }
3518
3519 kauth_cred_unref(&context.vc_ucred);
3520
3521 /* Image not claimed by any activator? */
3522 if (error == -1)
3523 error = ENOEXEC;
3524
3525 if (!error) {
3526 exec_done = TRUE;
3527 assert(imgp->ip_new_thread != NULL);
3528
3529 exec_resettextvp(p, imgp);
3530 error = check_for_signature(p, imgp);
3531 }
3532
3533 /* flag exec has occurred, notify only if it has not failed due to FP Key error */
3534 if (exec_done && ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0))
3535 proc_knote(p, NOTE_EXEC);
3536
3537 if (imgp->ip_vp != NULLVP)
3538 vnode_put(imgp->ip_vp);
3539 if (imgp->ip_scriptvp != NULLVP)
3540 vnode_put(imgp->ip_scriptvp);
3541 if (imgp->ip_strings)
3542 execargs_free(imgp);
3543 #if CONFIG_MACF
3544 if (imgp->ip_execlabelp)
3545 mac_cred_label_free(imgp->ip_execlabelp);
3546 if (imgp->ip_scriptlabelp)
3547 mac_vnode_label_free(imgp->ip_scriptlabelp);
3548 #endif
3549 if (imgp->ip_cs_error != OS_REASON_NULL) {
3550 os_reason_free(imgp->ip_cs_error);
3551 imgp->ip_cs_error = OS_REASON_NULL;
3552 }
3553
3554 if (!error) {
3555 /*
3556 * We need to initialize the bank context behind the protection of
3557 * the proc_trans lock to prevent a race with exit. We can't do this during
3558 * exec_activate_image because task_bank_init checks entitlements that
3559 * aren't loaded until subsequent calls (including exec_resettextvp).
3560 */
3561 error = proc_transstart(p, 0, 0);
3562 }
3563
3564 if (!error) {
3565 task_bank_init(get_threadtask(imgp->ip_new_thread));
3566 proc_transend(p, 0);
3567
3568 /* Sever any extant thread affinity */
3569 thread_affinity_exec(current_thread());
3570
3571 /* Inherit task role from old task to new task for exec */
3572 if (!in_vfexec) {
3573 proc_inherit_task_role(get_threadtask(imgp->ip_new_thread), current_task());
3574 }
3575
3576 thread_t main_thread = imgp->ip_new_thread;
3577
3578 task_set_main_thread_qos(new_task, main_thread);
3579
3580 #if CONFIG_MACF
3581 /*
3582 * Processes with the MAP_JIT entitlement are permitted to have
3583 * a jumbo-size map.
3584 */
3585 if (mac_proc_check_map_anon(p, 0, 0, 0, MAP_JIT, NULL) == 0) {
3586 vm_map_set_jumbo(get_task_map(new_task));
3587 }
3588 #endif /* CONFIG_MACF */
3589
3590
3591 #if CONFIG_DTRACE
3592 dtrace_thread_didexec(imgp->ip_new_thread);
3593
3594 if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL)
3595 (*dtrace_proc_waitfor_hook)(p);
3596 #endif
3597
3598 if (in_vfexec) {
3599 vfork_return(p, retval, p->p_pid);
3600 }
3601 } else {
3602 DTRACE_PROC1(exec__failure, int, error);
3603 }
3604
3605 exit_with_error:
3606
3607 /*
3608 * clear bsd_info from old task if it did exec.
3609 */
3610 if (task_did_exec(current_task())) {
3611 set_bsdtask_info(current_task(), NULL);
3612 }
3613
3614 /* clear bsd_info from new task and terminate it if exec failed */
3615 if (new_task != NULL && task_is_exec_copy(new_task)) {
3616 set_bsdtask_info(new_task, NULL);
3617 task_terminate_internal(new_task);
3618 }
3619
3620 /*
3621 * Need to transfer pending watch port boosts to the new task while still making
3622 * sure that the old task remains in the importance linkage. Create an importance
3623 * linkage from old task to new task, then switch the task importance base
3624 * of old task and new task. After the switch the port watch boost will be
3625 * boosting the new task and new task will be donating importance to old task.
3626 */
3627 if (error == 0 && task_did_exec(current_task())) {
3628 inherit = ipc_importance_exec_switch_task(current_task(), get_threadtask(imgp->ip_new_thread));
3629 }
3630
3631 if (imgp != NULL) {
3632 /*
3633 * Do not terminate the current task, if proc_exec_switch_task did not
3634 * switch the tasks, terminating the current task without the switch would
3635 * result in loosing the SIGKILL status.
3636 */
3637 if (task_did_exec(current_task())) {
3638 /* Terminate the current task, since exec will start in new task */
3639 task_terminate_internal(current_task());
3640 }
3641
3642 /* Release the thread ref returned by fork_create_child */
3643 if (imgp->ip_new_thread) {
3644 /* wake up the new exec thread */
3645 task_clear_return_wait(get_threadtask(imgp->ip_new_thread));
3646 thread_deallocate(imgp->ip_new_thread);
3647 imgp->ip_new_thread = NULL;
3648 }
3649 }
3650
3651 /* Release the ref returned by fork_create_child */
3652 if (new_task) {
3653 task_deallocate(new_task);
3654 new_task = NULL;
3655 }
3656
3657 if (should_release_proc_ref) {
3658 proc_rele(p);
3659 }
3660
3661 if (bufp != NULL) {
3662 FREE(bufp, M_TEMP);
3663 }
3664
3665 if (inherit != NULL) {
3666 ipc_importance_release(inherit);
3667 }
3668
3669 return(error);
3670 }
3671
3672
3673 /*
3674 * copyinptr
3675 *
3676 * Description: Copy a pointer in from user space to a user_addr_t in kernel
3677 * space, based on 32/64 bitness of the user space
3678 *
3679 * Parameters: froma User space address
3680 * toptr Address of kernel space user_addr_t
3681 * ptr_size 4/8, based on 'froma' address space
3682 *
3683 * Returns: 0 Success
3684 * EFAULT Bad 'froma'
3685 *
3686 * Implicit returns:
3687 * *ptr_size Modified
3688 */
3689 static int
3690 copyinptr(user_addr_t froma, user_addr_t *toptr, int ptr_size)
3691 {
3692 int error;
3693
3694 if (ptr_size == 4) {
3695 /* 64 bit value containing 32 bit address */
3696 unsigned int i;
3697
3698 error = copyin(froma, &i, 4);
3699 *toptr = CAST_USER_ADDR_T(i); /* SAFE */
3700 } else {
3701 error = copyin(froma, toptr, 8);
3702 }
3703 return (error);
3704 }
3705
3706
3707 /*
3708 * copyoutptr
3709 *
3710 * Description: Copy a pointer out from a user_addr_t in kernel space to
3711 * user space, based on 32/64 bitness of the user space
3712 *
3713 * Parameters: ua User space address to copy to
3714 * ptr Address of kernel space user_addr_t
3715 * ptr_size 4/8, based on 'ua' address space
3716 *
3717 * Returns: 0 Success
3718 * EFAULT Bad 'ua'
3719 *
3720 */
3721 static int
3722 copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size)
3723 {
3724 int error;
3725
3726 if (ptr_size == 4) {
3727 /* 64 bit value containing 32 bit address */
3728 unsigned int i = CAST_DOWN_EXPLICIT(unsigned int,ua); /* SAFE */
3729
3730 error = copyout(&i, ptr, 4);
3731 } else {
3732 error = copyout(&ua, ptr, 8);
3733 }
3734 return (error);
3735 }
3736
3737
3738 /*
3739 * exec_copyout_strings
3740 *
3741 * Copy out the strings segment to user space. The strings segment is put
3742 * on a preinitialized stack frame.
3743 *
3744 * Parameters: struct image_params * the image parameter block
3745 * int * a pointer to the stack offset variable
3746 *
3747 * Returns: 0 Success
3748 * !0 Faiure: errno
3749 *
3750 * Implicit returns:
3751 * (*stackp) The stack offset, modified
3752 *
3753 * Note: The strings segment layout is backward, from the beginning
3754 * of the top of the stack to consume the minimal amount of
3755 * space possible; the returned stack pointer points to the
3756 * end of the area consumed (stacks grow downward).
3757 *
3758 * argc is an int; arg[i] are pointers; env[i] are pointers;
3759 * the 0's are (void *)NULL's
3760 *
3761 * The stack frame layout is:
3762 *
3763 * +-------------+ <- p->user_stack
3764 * | 16b |
3765 * +-------------+
3766 * | STRING AREA |
3767 * | : |
3768 * | : |
3769 * | : |
3770 * +- -- -- -- --+
3771 * | PATH AREA |
3772 * +-------------+
3773 * | 0 |
3774 * +-------------+
3775 * | applev[n] |
3776 * +-------------+
3777 * :
3778 * :
3779 * +-------------+
3780 * | applev[1] |
3781 * +-------------+
3782 * | exec_path / |
3783 * | applev[0] |
3784 * +-------------+
3785 * | 0 |
3786 * +-------------+
3787 * | env[n] |
3788 * +-------------+
3789 * :
3790 * :
3791 * +-------------+
3792 * | env[0] |
3793 * +-------------+
3794 * | 0 |
3795 * +-------------+
3796 * | arg[argc-1] |
3797 * +-------------+
3798 * :
3799 * :
3800 * +-------------+
3801 * | arg[0] |
3802 * +-------------+
3803 * | argc |
3804 * sp-> +-------------+
3805 *
3806 * Although technically a part of the STRING AREA, we treat the PATH AREA as
3807 * a separate entity. This allows us to align the beginning of the PATH AREA
3808 * to a pointer boundary so that the exec_path, env[i], and argv[i] pointers
3809 * which preceed it on the stack are properly aligned.
3810 */
3811
3812 static int
3813 exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp)
3814 {
3815 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
3816 int ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
3817 int ptr_area_size;
3818 void *ptr_buffer_start, *ptr_buffer;
3819 int string_size;
3820
3821 user_addr_t string_area; /* *argv[], *env[] */
3822 user_addr_t ptr_area; /* argv[], env[], applev[] */
3823 user_addr_t argc_area; /* argc */
3824 user_addr_t stack;
3825 int error;
3826
3827 unsigned i;
3828 struct copyout_desc {
3829 char *start_string;
3830 int count;
3831 #if CONFIG_DTRACE
3832 user_addr_t *dtrace_cookie;
3833 #endif
3834 boolean_t null_term;
3835 } descriptors[] = {
3836 {
3837 .start_string = imgp->ip_startargv,
3838 .count = imgp->ip_argc,
3839 #if CONFIG_DTRACE
3840 .dtrace_cookie = &p->p_dtrace_argv,
3841 #endif
3842 .null_term = TRUE
3843 },
3844 {
3845 .start_string = imgp->ip_endargv,
3846 .count = imgp->ip_envc,
3847 #if CONFIG_DTRACE
3848 .dtrace_cookie = &p->p_dtrace_envp,
3849 #endif
3850 .null_term = TRUE
3851 },
3852 {
3853 .start_string = imgp->ip_strings,
3854 .count = 1,
3855 #if CONFIG_DTRACE
3856 .dtrace_cookie = NULL,
3857 #endif
3858 .null_term = FALSE
3859 },
3860 {
3861 .start_string = imgp->ip_endenvv,
3862 .count = imgp->ip_applec - 1, /* exec_path handled above */
3863 #if CONFIG_DTRACE
3864 .dtrace_cookie = NULL,
3865 #endif
3866 .null_term = TRUE
3867 }
3868 };
3869
3870 stack = *stackp;
3871
3872 /*
3873 * All previous contributors to the string area
3874 * should have aligned their sub-area
3875 */
3876 if (imgp->ip_strspace % ptr_size != 0) {
3877 error = EINVAL;
3878 goto bad;
3879 }
3880
3881 /* Grow the stack down for the strings we've been building up */
3882 string_size = imgp->ip_strendp - imgp->ip_strings;
3883 stack -= string_size;
3884 string_area = stack;
3885
3886 /*
3887 * Need room for one pointer for each string, plus
3888 * one for the NULLs terminating the argv, envv, and apple areas.
3889 */
3890 ptr_area_size = (imgp->ip_argc + imgp->ip_envc + imgp->ip_applec + 3) *
3891 ptr_size;
3892 stack -= ptr_area_size;
3893 ptr_area = stack;
3894
3895 /* We'll construct all the pointer arrays in our string buffer,
3896 * which we already know is aligned properly, and ip_argspace
3897 * was used to verify we have enough space.
3898 */
3899 ptr_buffer_start = ptr_buffer = (void *)imgp->ip_strendp;
3900
3901 /*
3902 * Need room for pointer-aligned argc slot.
3903 */
3904 stack -= ptr_size;
3905 argc_area = stack;
3906
3907 /*
3908 * Record the size of the arguments area so that sysctl_procargs()
3909 * can return the argument area without having to parse the arguments.
3910 */
3911 proc_lock(p);
3912 p->p_argc = imgp->ip_argc;
3913 p->p_argslen = (int)(*stackp - string_area);
3914 proc_unlock(p);
3915
3916 /* Return the initial stack address: the location of argc */
3917 *stackp = stack;
3918
3919 /*
3920 * Copy out the entire strings area.
3921 */
3922 error = copyout(imgp->ip_strings, string_area,
3923 string_size);
3924 if (error)
3925 goto bad;
3926
3927 for (i = 0; i < sizeof(descriptors)/sizeof(descriptors[0]); i++) {
3928 char *cur_string = descriptors[i].start_string;
3929 int j;
3930
3931 #if CONFIG_DTRACE
3932 if (descriptors[i].dtrace_cookie) {
3933 proc_lock(p);
3934 *descriptors[i].dtrace_cookie = ptr_area + ((uintptr_t)ptr_buffer - (uintptr_t)ptr_buffer_start); /* dtrace convenience */
3935 proc_unlock(p);
3936 }
3937 #endif /* CONFIG_DTRACE */
3938
3939 /*
3940 * For each segment (argv, envv, applev), copy as many pointers as requested
3941 * to our pointer buffer.
3942 */
3943 for (j = 0; j < descriptors[i].count; j++) {
3944 user_addr_t cur_address = string_area + (cur_string - imgp->ip_strings);
3945
3946 /* Copy out the pointer to the current string. Alignment has been verified */
3947 if (ptr_size == 8) {
3948 *(uint64_t *)ptr_buffer = (uint64_t)cur_address;
3949 } else {
3950 *(uint32_t *)ptr_buffer = (uint32_t)cur_address;
3951 }
3952
3953 ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
3954 cur_string += strlen(cur_string) + 1; /* Only a NUL between strings in the same area */
3955 }
3956
3957 if (descriptors[i].null_term) {
3958 if (ptr_size == 8) {
3959 *(uint64_t *)ptr_buffer = 0ULL;
3960 } else {
3961 *(uint32_t *)ptr_buffer = 0;
3962 }
3963
3964 ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
3965 }
3966 }
3967
3968 /*
3969 * Copy out all our pointer arrays in bulk.
3970 */
3971 error = copyout(ptr_buffer_start, ptr_area,
3972 ptr_area_size);
3973 if (error)
3974 goto bad;
3975
3976 /* argc (int32, stored in a ptr_size area) */
3977 error = copyoutptr((user_addr_t)imgp->ip_argc, argc_area, ptr_size);
3978 if (error)
3979 goto bad;
3980
3981 bad:
3982 return(error);
3983 }
3984
3985
3986 /*
3987 * exec_extract_strings
3988 *
3989 * Copy arguments and environment from user space into work area; we may
3990 * have already copied some early arguments into the work area, and if
3991 * so, any arguments opied in are appended to those already there.
3992 * This function is the primary manipulator of ip_argspace, since
3993 * these are the arguments the client of execve(2) knows about. After
3994 * each argv[]/envv[] string is copied, we charge the string length
3995 * and argv[]/envv[] pointer slot to ip_argspace, so that we can
3996 * full preflight the arg list size.
3997 *
3998 * Parameters: struct image_params * the image parameter block
3999 *
4000 * Returns: 0 Success
4001 * !0 Failure: errno
4002 *
4003 * Implicit returns;
4004 * (imgp->ip_argc) Count of arguments, updated
4005 * (imgp->ip_envc) Count of environment strings, updated
4006 * (imgp->ip_argspace) Count of remaining of NCARGS
4007 * (imgp->ip_interp_buffer) Interpreter and args (mutated in place)
4008 *
4009 *
4010 * Note: The argument and environment vectors are user space pointers
4011 * to arrays of user space pointers.
4012 */
4013 static int
4014 exec_extract_strings(struct image_params *imgp)
4015 {
4016 int error = 0;
4017 int ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT) ? 8 : 4;
4018 int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
4019 user_addr_t argv = imgp->ip_user_argv;
4020 user_addr_t envv = imgp->ip_user_envv;
4021
4022 /*
4023 * Adjust space reserved for the path name by however much padding it
4024 * needs. Doing this here since we didn't know if this would be a 32-
4025 * or 64-bit process back in exec_save_path.
4026 */
4027 while (imgp->ip_strspace % new_ptr_size != 0) {
4028 *imgp->ip_strendp++ = '\0';
4029 imgp->ip_strspace--;
4030 /* imgp->ip_argspace--; not counted towards exec args total */
4031 }
4032
4033 /*
4034 * From now on, we start attributing string space to ip_argspace
4035 */
4036 imgp->ip_startargv = imgp->ip_strendp;
4037 imgp->ip_argc = 0;
4038
4039 if((imgp->ip_flags & IMGPF_INTERPRET) != 0) {
4040 user_addr_t arg;
4041 char *argstart, *ch;
4042
4043 /* First, the arguments in the "#!" string are tokenized and extracted. */
4044 argstart = imgp->ip_interp_buffer;
4045 while (argstart) {
4046 ch = argstart;
4047 while (*ch && !IS_WHITESPACE(*ch)) {
4048 ch++;
4049 }
4050
4051 if (*ch == '\0') {
4052 /* last argument, no need to NUL-terminate */
4053 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
4054 argstart = NULL;
4055 } else {
4056 /* NUL-terminate */
4057 *ch = '\0';
4058 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
4059
4060 /*
4061 * Find the next string. We know spaces at the end of the string have already
4062 * been stripped.
4063 */
4064 argstart = ch + 1;
4065 while (IS_WHITESPACE(*argstart)) {
4066 argstart++;
4067 }
4068 }
4069
4070 /* Error-check, regardless of whether this is the last interpreter arg or not */
4071 if (error)
4072 goto bad;
4073 if (imgp->ip_argspace < new_ptr_size) {
4074 error = E2BIG;
4075 goto bad;
4076 }
4077 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
4078 imgp->ip_argc++;
4079 }
4080
4081 if (argv != 0LL) {
4082 /*
4083 * If we are running an interpreter, replace the av[0] that was
4084 * passed to execve() with the path name that was
4085 * passed to execve() for interpreters which do not use the PATH
4086 * to locate their script arguments.
4087 */
4088 error = copyinptr(argv, &arg, ptr_size);
4089 if (error)
4090 goto bad;
4091 if (arg != 0LL) {
4092 argv += ptr_size; /* consume without using */
4093 }
4094 }
4095
4096 if (imgp->ip_interp_sugid_fd != -1) {
4097 char temp[19]; /* "/dev/fd/" + 10 digits + NUL */
4098 snprintf(temp, sizeof(temp), "/dev/fd/%d", imgp->ip_interp_sugid_fd);
4099 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(temp), UIO_SYSSPACE, TRUE);
4100 } else {
4101 error = exec_add_user_string(imgp, imgp->ip_user_fname, imgp->ip_seg, TRUE);
4102 }
4103
4104 if (error)
4105 goto bad;
4106 if (imgp->ip_argspace < new_ptr_size) {
4107 error = E2BIG;
4108 goto bad;
4109 }
4110 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
4111 imgp->ip_argc++;
4112 }
4113
4114 while (argv != 0LL) {
4115 user_addr_t arg;
4116
4117 error = copyinptr(argv, &arg, ptr_size);
4118 if (error)
4119 goto bad;
4120
4121 if (arg == 0LL) {
4122 break;
4123 }
4124
4125 argv += ptr_size;
4126
4127 /*
4128 * av[n...] = arg[n]
4129 */
4130 error = exec_add_user_string(imgp, arg, imgp->ip_seg, TRUE);
4131 if (error)
4132 goto bad;
4133 if (imgp->ip_argspace < new_ptr_size) {
4134 error = E2BIG;
4135 goto bad;
4136 }
4137 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
4138 imgp->ip_argc++;
4139 }
4140
4141 /* Save space for argv[] NULL terminator */
4142 if (imgp->ip_argspace < new_ptr_size) {
4143 error = E2BIG;
4144 goto bad;
4145 }
4146 imgp->ip_argspace -= new_ptr_size;
4147
4148 /* Note where the args ends and env begins. */
4149 imgp->ip_endargv = imgp->ip_strendp;
4150 imgp->ip_envc = 0;
4151
4152 /* Now, get the environment */
4153 while (envv != 0LL) {
4154 user_addr_t env;
4155
4156 error = copyinptr(envv, &env, ptr_size);
4157 if (error)
4158 goto bad;
4159
4160 envv += ptr_size;
4161 if (env == 0LL) {
4162 break;
4163 }
4164 /*
4165 * av[n...] = env[n]
4166 */
4167 error = exec_add_user_string(imgp, env, imgp->ip_seg, TRUE);
4168 if (error)
4169 goto bad;
4170 if (imgp->ip_argspace < new_ptr_size) {
4171 error = E2BIG;
4172 goto bad;
4173 }
4174 imgp->ip_argspace -= new_ptr_size; /* to hold envv[] entry */
4175 imgp->ip_envc++;
4176 }
4177
4178 /* Save space for envv[] NULL terminator */
4179 if (imgp->ip_argspace < new_ptr_size) {
4180 error = E2BIG;
4181 goto bad;
4182 }
4183 imgp->ip_argspace -= new_ptr_size;
4184
4185 /* Align the tail of the combined argv+envv area */
4186 while (imgp->ip_strspace % new_ptr_size != 0) {
4187 if (imgp->ip_argspace < 1) {
4188 error = E2BIG;
4189 goto bad;
4190 }
4191 *imgp->ip_strendp++ = '\0';
4192 imgp->ip_strspace--;
4193 imgp->ip_argspace--;
4194 }
4195
4196 /* Note where the envv ends and applev begins. */
4197 imgp->ip_endenvv = imgp->ip_strendp;
4198
4199 /*
4200 * From now on, we are no longer charging argument
4201 * space to ip_argspace.
4202 */
4203
4204 bad:
4205 return error;
4206 }
4207
4208 /*
4209 * Libc has an 8-element array set up for stack guard values. It only fills
4210 * in one of those entries, and both gcc and llvm seem to use only a single
4211 * 8-byte guard. Until somebody needs more than an 8-byte guard value, don't
4212 * do the work to construct them.
4213 */
4214 #define GUARD_VALUES 1
4215 #define GUARD_KEY "stack_guard="
4216
4217 /*
4218 * System malloc needs some entropy when it is initialized.
4219 */
4220 #define ENTROPY_VALUES 2
4221 #define ENTROPY_KEY "malloc_entropy="
4222
4223 /*
4224 * System malloc engages nanozone for UIAPP.
4225 */
4226 #define NANO_ENGAGE_KEY "MallocNanoZone=1"
4227
4228 #define PFZ_KEY "pfz="
4229 extern user32_addr_t commpage_text32_location;
4230 extern user64_addr_t commpage_text64_location;
4231
4232 #define MAIN_STACK_VALUES 4
4233 #define MAIN_STACK_KEY "main_stack="
4234
4235 #define FSID_KEY "executable_file="
4236 #define DYLD_FSID_KEY "dyld_file="
4237 #define CDHASH_KEY "executable_cdhash="
4238
4239 #define FSID_MAX_STRING "0x1234567890abcdef,0x1234567890abcdef"
4240
4241 #define HEX_STR_LEN 18 // 64-bit hex value "0x0123456701234567"
4242
4243 static int
4244 exec_add_entropy_key(struct image_params *imgp,
4245 const char *key,
4246 int values,
4247 boolean_t embedNUL)
4248 {
4249 const int limit = 8;
4250 uint64_t entropy[limit];
4251 char str[strlen(key) + (HEX_STR_LEN + 1) * limit + 1];
4252 if (values > limit) {
4253 values = limit;
4254 }
4255
4256 read_random(entropy, sizeof(entropy[0]) * values);
4257
4258 if (embedNUL) {
4259 entropy[0] &= ~(0xffull << 8);
4260 }
4261
4262 int len = snprintf(str, sizeof(str), "%s0x%llx", key, entropy[0]);
4263 int remaining = sizeof(str) - len;
4264 for (int i = 1; i < values && remaining > 0; ++i) {
4265 int start = sizeof(str) - remaining;
4266 len = snprintf(&str[start], remaining, ",0x%llx", entropy[i]);
4267 remaining -= len;
4268 }
4269
4270 return exec_add_user_string(imgp, CAST_USER_ADDR_T(str), UIO_SYSSPACE, FALSE);
4271 }
4272
4273 /*
4274 * Build up the contents of the apple[] string vector
4275 */
4276 static int
4277 exec_add_apple_strings(struct image_params *imgp,
4278 const load_result_t *load_result)
4279 {
4280 int error;
4281 int img_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
4282
4283 /* exec_save_path stored the first string */
4284 imgp->ip_applec = 1;
4285
4286 /* adding the pfz string */
4287 {
4288 char pfz_string[strlen(PFZ_KEY) + HEX_STR_LEN + 1];
4289
4290 if (img_ptr_size == 8) {
4291 snprintf(pfz_string, sizeof(pfz_string), PFZ_KEY "0x%llx", commpage_text64_location);
4292 } else {
4293 snprintf(pfz_string, sizeof(pfz_string), PFZ_KEY "0x%x", commpage_text32_location);
4294 }
4295 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(pfz_string), UIO_SYSSPACE, FALSE);
4296 if (error) {
4297 goto bad;
4298 }
4299 imgp->ip_applec++;
4300 }
4301
4302 /* adding the NANO_ENGAGE_KEY key */
4303 if (imgp->ip_px_sa) {
4304 int proc_flags = (((struct _posix_spawnattr *) imgp->ip_px_sa)->psa_flags);
4305
4306 if ((proc_flags & _POSIX_SPAWN_NANO_ALLOCATOR) == _POSIX_SPAWN_NANO_ALLOCATOR) {
4307 const char *nano_string = NANO_ENGAGE_KEY;
4308 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(nano_string), UIO_SYSSPACE, FALSE);
4309 if (error){
4310 goto bad;
4311 }
4312 imgp->ip_applec++;
4313 }
4314 }
4315
4316 /*
4317 * Supply libc with a collection of random values to use when
4318 * implementing -fstack-protector.
4319 *
4320 * (The first random string always contains an embedded NUL so that
4321 * __stack_chk_guard also protects against C string vulnerabilities)
4322 */
4323 error = exec_add_entropy_key(imgp, GUARD_KEY, GUARD_VALUES, TRUE);
4324 if (error) {
4325 goto bad;
4326 }
4327 imgp->ip_applec++;
4328
4329 /*
4330 * Supply libc with entropy for system malloc.
4331 */
4332 error = exec_add_entropy_key(imgp, ENTROPY_KEY, ENTROPY_VALUES, FALSE);
4333 if (error) {
4334 goto bad;
4335 }
4336 imgp->ip_applec++;
4337
4338 /*
4339 * Add MAIN_STACK_KEY: Supplies the address and size of the main thread's
4340 * stack if it was allocated by the kernel.
4341 *
4342 * The guard page is not included in this stack size as libpthread
4343 * expects to add it back in after receiving this value.
4344 */
4345 if (load_result->unixproc) {
4346 char stack_string[strlen(MAIN_STACK_KEY) + (HEX_STR_LEN + 1) * MAIN_STACK_VALUES + 1];
4347 snprintf(stack_string, sizeof(stack_string),
4348 MAIN_STACK_KEY "0x%llx,0x%llx,0x%llx,0x%llx",
4349 (uint64_t)load_result->user_stack,
4350 (uint64_t)load_result->user_stack_size,
4351 (uint64_t)load_result->user_stack_alloc,
4352 (uint64_t)load_result->user_stack_alloc_size);
4353 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(stack_string), UIO_SYSSPACE, FALSE);
4354 if (error) {
4355 goto bad;
4356 }
4357 imgp->ip_applec++;
4358 }
4359
4360 if (imgp->ip_vattr) {
4361 uint64_t fsid = get_va_fsid(imgp->ip_vattr);
4362 uint64_t fsobjid = imgp->ip_vattr->va_fileid;
4363
4364 char fsid_string[strlen(FSID_KEY) + strlen(FSID_MAX_STRING) + 1];
4365 snprintf(fsid_string, sizeof(fsid_string),
4366 FSID_KEY "0x%llx,0x%llx", fsid, fsobjid);
4367 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(fsid_string), UIO_SYSSPACE, FALSE);
4368 if (error) {
4369 goto bad;
4370 }
4371 imgp->ip_applec++;
4372 }
4373
4374 if (imgp->ip_dyld_fsid || imgp->ip_dyld_fsobjid ) {
4375 char fsid_string[strlen(DYLD_FSID_KEY) + strlen(FSID_MAX_STRING) + 1];
4376 snprintf(fsid_string, sizeof(fsid_string),
4377 DYLD_FSID_KEY "0x%llx,0x%llx", imgp->ip_dyld_fsid, imgp->ip_dyld_fsobjid);
4378 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(fsid_string), UIO_SYSSPACE, FALSE);
4379 if (error) {
4380 goto bad;
4381 }
4382 imgp->ip_applec++;
4383 }
4384
4385 uint8_t cdhash[SHA1_RESULTLEN];
4386 int cdhash_errror = ubc_cs_getcdhash(imgp->ip_vp, imgp->ip_arch_offset, cdhash);
4387 if (cdhash_errror == 0) {
4388 char hash_string[strlen(CDHASH_KEY) + 2*SHA1_RESULTLEN + 1];
4389 strncpy(hash_string, CDHASH_KEY, sizeof(hash_string));
4390 char *p = hash_string + sizeof(CDHASH_KEY) - 1;
4391 for (int i = 0; i < SHA1_RESULTLEN; i++) {
4392 snprintf(p, 3, "%02x", (int) cdhash[i]);
4393 p += 2;
4394 }
4395 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(hash_string), UIO_SYSSPACE, FALSE);
4396 if (error) {
4397 goto bad;
4398 }
4399 imgp->ip_applec++;
4400 }
4401
4402 /* Align the tail of the combined applev area */
4403 while (imgp->ip_strspace % img_ptr_size != 0) {
4404 *imgp->ip_strendp++ = '\0';
4405 imgp->ip_strspace--;
4406 }
4407
4408 bad:
4409 return error;
4410 }
4411
4412 #define unix_stack_size(p) (p->p_rlimit[RLIMIT_STACK].rlim_cur)
4413
4414 /*
4415 * exec_check_permissions
4416 *
4417 * Description: Verify that the file that is being attempted to be executed
4418 * is in fact allowed to be executed based on it POSIX file
4419 * permissions and other access control criteria
4420 *
4421 * Parameters: struct image_params * the image parameter block
4422 *
4423 * Returns: 0 Success
4424 * EACCES Permission denied
4425 * ENOEXEC Executable file format error
4426 * ETXTBSY Text file busy [misuse of error code]
4427 * vnode_getattr:???
4428 * vnode_authorize:???
4429 */
4430 static int
4431 exec_check_permissions(struct image_params *imgp)
4432 {
4433 struct vnode *vp = imgp->ip_vp;
4434 struct vnode_attr *vap = imgp->ip_vattr;
4435 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
4436 int error;
4437 kauth_action_t action;
4438
4439 /* Only allow execution of regular files */
4440 if (!vnode_isreg(vp))
4441 return (EACCES);
4442
4443 /* Get the file attributes that we will be using here and elsewhere */
4444 VATTR_INIT(vap);
4445 VATTR_WANTED(vap, va_uid);
4446 VATTR_WANTED(vap, va_gid);
4447 VATTR_WANTED(vap, va_mode);
4448 VATTR_WANTED(vap, va_fsid);
4449 VATTR_WANTED(vap, va_fsid64);
4450 VATTR_WANTED(vap, va_fileid);
4451 VATTR_WANTED(vap, va_data_size);
4452 if ((error = vnode_getattr(vp, vap, imgp->ip_vfs_context)) != 0)
4453 return (error);
4454
4455 /*
4456 * Ensure that at least one execute bit is on - otherwise root
4457 * will always succeed, and we don't want to happen unless the
4458 * file really is executable.
4459 */
4460 if (!vfs_authopaque(vnode_mount(vp)) && ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0))
4461 return (EACCES);
4462
4463 /* Disallow zero length files */
4464 if (vap->va_data_size == 0)
4465 return (ENOEXEC);
4466
4467 imgp->ip_arch_offset = (user_size_t)0;
4468 imgp->ip_arch_size = vap->va_data_size;
4469
4470 /* Disable setuid-ness for traced programs or if MNT_NOSUID */
4471 if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_lflag & P_LTRACED))
4472 vap->va_mode &= ~(VSUID | VSGID);
4473
4474 /*
4475 * Disable _POSIX_SPAWN_ALLOW_DATA_EXEC and _POSIX_SPAWN_DISABLE_ASLR
4476 * flags for setuid/setgid binaries.
4477 */
4478 if (vap->va_mode & (VSUID | VSGID))
4479 imgp->ip_flags &= ~(IMGPF_ALLOW_DATA_EXEC | IMGPF_DISABLE_ASLR);
4480
4481 #if CONFIG_MACF
4482 error = mac_vnode_check_exec(imgp->ip_vfs_context, vp, imgp);
4483 if (error)
4484 return (error);
4485 #endif
4486
4487 /* Check for execute permission */
4488 action = KAUTH_VNODE_EXECUTE;
4489 /* Traced images must also be readable */
4490 if (p->p_lflag & P_LTRACED)
4491 action |= KAUTH_VNODE_READ_DATA;
4492 if ((error = vnode_authorize(vp, NULL, action, imgp->ip_vfs_context)) != 0)
4493 return (error);
4494
4495 #if 0
4496 /* Don't let it run if anyone had it open for writing */
4497 vnode_lock(vp);
4498 if (vp->v_writecount) {
4499 panic("going to return ETXTBSY %x", vp);
4500 vnode_unlock(vp);
4501 return (ETXTBSY);
4502 }
4503 vnode_unlock(vp);
4504 #endif
4505
4506
4507 /* XXX May want to indicate to underlying FS that vnode is open */
4508
4509 return (error);
4510 }
4511
4512
4513 /*
4514 * exec_handle_sugid
4515 *
4516 * Initially clear the P_SUGID in the process flags; if an SUGID process is
4517 * exec'ing a non-SUGID image, then this is the point of no return.
4518 *
4519 * If the image being activated is SUGID, then replace the credential with a
4520 * copy, disable tracing (unless the tracing process is root), reset the
4521 * mach task port to revoke it, set the P_SUGID bit,
4522 *
4523 * If the saved user and group ID will be changing, then make sure it happens
4524 * to a new credential, rather than a shared one.
4525 *
4526 * Set the security token (this is probably obsolete, given that the token
4527 * should not technically be separate from the credential itself).
4528 *
4529 * Parameters: struct image_params * the image parameter block
4530 *
4531 * Returns: void No failure indication
4532 *
4533 * Implicit returns:
4534 * <process credential> Potentially modified/replaced
4535 * <task port> Potentially revoked
4536 * <process flags> P_SUGID bit potentially modified
4537 * <security token> Potentially modified
4538 */
4539 static int
4540 exec_handle_sugid(struct image_params *imgp)
4541 {
4542 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
4543 kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context);
4544 kauth_cred_t my_cred, my_new_cred;
4545 int i;
4546 int leave_sugid_clear = 0;
4547 int mac_reset_ipc = 0;
4548 int error = 0;
4549 task_t task = NULL;
4550 #if CONFIG_MACF
4551 int mac_transition, disjoint_cred = 0;
4552 int label_update_return = 0;
4553
4554 /*
4555 * Determine whether a call to update the MAC label will result in the
4556 * credential changing.
4557 *
4558 * Note: MAC policies which do not actually end up modifying
4559 * the label subsequently are strongly encouraged to
4560 * return 0 for this check, since a non-zero answer will
4561 * slow down the exec fast path for normal binaries.
4562 */
4563 mac_transition = mac_cred_check_label_update_execve(
4564 imgp->ip_vfs_context,
4565 imgp->ip_vp,
4566 imgp->ip_arch_offset,
4567 imgp->ip_scriptvp,
4568 imgp->ip_scriptlabelp,
4569 imgp->ip_execlabelp,
4570 p,
4571 imgp->ip_px_smpx);
4572 #endif
4573
4574 OSBitAndAtomic(~((uint32_t)P_SUGID), &p->p_flag);
4575
4576 /*
4577 * Order of the following is important; group checks must go last,
4578 * as we use the success of the 'ismember' check combined with the
4579 * failure of the explicit match to indicate that we will be setting
4580 * the egid of the process even though the new process did not
4581 * require VSUID/VSGID bits in order for it to set the new group as
4582 * its egid.
4583 *
4584 * Note: Technically, by this we are implying a call to
4585 * setegid() in the new process, rather than implying
4586 * it used its VSGID bit to set the effective group,
4587 * even though there is no code in that process to make
4588 * such a call.
4589 */
4590 if (((imgp->ip_origvattr->va_mode & VSUID) != 0 &&
4591 kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) ||
4592 ((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
4593 ((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &leave_sugid_clear) || !leave_sugid_clear) ||
4594 (kauth_cred_getgid(cred) != imgp->ip_origvattr->va_gid)))) {
4595
4596 #if CONFIG_MACF
4597 /* label for MAC transition and neither VSUID nor VSGID */
4598 handle_mac_transition:
4599 #endif
4600
4601 #if !SECURE_KERNEL
4602 /*
4603 * Replace the credential with a copy of itself if euid or
4604 * egid change.
4605 *
4606 * Note: setuid binaries will automatically opt out of
4607 * group resolver participation as a side effect
4608 * of this operation. This is an intentional
4609 * part of the security model, which requires a
4610 * participating credential be established by
4611 * escalating privilege, setting up all other
4612 * aspects of the credential including whether
4613 * or not to participate in external group
4614 * membership resolution, then dropping their
4615 * effective privilege to that of the desired
4616 * final credential state.
4617 *
4618 * Modifications to p_ucred must be guarded using the
4619 * proc's ucred lock. This prevents others from accessing
4620 * a garbage credential.
4621 */
4622 while (imgp->ip_origvattr->va_mode & VSUID) {
4623 my_cred = kauth_cred_proc_ref(p);
4624 my_new_cred = kauth_cred_setresuid(my_cred, KAUTH_UID_NONE, imgp->ip_origvattr->va_uid, imgp->ip_origvattr->va_uid, KAUTH_UID_NONE);
4625
4626 if (my_new_cred == my_cred) {
4627 kauth_cred_unref(&my_cred);
4628 break;
4629 }
4630
4631 /* update cred on proc */
4632 proc_ucred_lock(p);
4633
4634 if (p->p_ucred != my_cred) {
4635 proc_ucred_unlock(p);
4636 kauth_cred_unref(&my_new_cred);
4637 continue;
4638 }
4639
4640 /* donate cred reference on my_new_cred to p->p_ucred */
4641 p->p_ucred = my_new_cred;
4642 PROC_UPDATE_CREDS_ONPROC(p);
4643 proc_ucred_unlock(p);
4644
4645 /* drop additional reference that was taken on the previous cred */
4646 kauth_cred_unref(&my_cred);
4647
4648 break;
4649 }
4650
4651 while (imgp->ip_origvattr->va_mode & VSGID) {
4652 my_cred = kauth_cred_proc_ref(p);
4653 my_new_cred = kauth_cred_setresgid(my_cred, KAUTH_GID_NONE, imgp->ip_origvattr->va_gid, imgp->ip_origvattr->va_gid);
4654
4655 if (my_new_cred == my_cred) {
4656 kauth_cred_unref(&my_cred);
4657 break;
4658 }
4659
4660 /* update cred on proc */
4661 proc_ucred_lock(p);
4662
4663 if (p->p_ucred != my_cred) {
4664 proc_ucred_unlock(p);
4665 kauth_cred_unref(&my_new_cred);
4666 continue;
4667 }
4668
4669 /* donate cred reference on my_new_cred to p->p_ucred */
4670 p->p_ucred = my_new_cred;
4671 PROC_UPDATE_CREDS_ONPROC(p);
4672 proc_ucred_unlock(p);
4673
4674 /* drop additional reference that was taken on the previous cred */
4675 kauth_cred_unref(&my_cred);
4676
4677 break;
4678 }
4679 #endif /* !SECURE_KERNEL */
4680
4681 #if CONFIG_MACF
4682 /*
4683 * If a policy has indicated that it will transition the label,
4684 * before making the call into the MAC policies, get a new
4685 * duplicate credential, so they can modify it without
4686 * modifying any others sharing it.
4687 */
4688 if (mac_transition) {
4689 /*
4690 * This hook may generate upcalls that require
4691 * importance donation from the kernel.
4692 * (23925818)
4693 */
4694 thread_t thread = current_thread();
4695 thread_enable_send_importance(thread, TRUE);
4696 kauth_proc_label_update_execve(p,
4697 imgp->ip_vfs_context,
4698 imgp->ip_vp,
4699 imgp->ip_arch_offset,
4700 imgp->ip_scriptvp,
4701 imgp->ip_scriptlabelp,
4702 imgp->ip_execlabelp,
4703 &imgp->ip_csflags,
4704 imgp->ip_px_smpx,
4705 &disjoint_cred, /* will be non zero if disjoint */
4706 &label_update_return);
4707 thread_enable_send_importance(thread, FALSE);
4708
4709 if (disjoint_cred) {
4710 /*
4711 * If updating the MAC label resulted in a
4712 * disjoint credential, flag that we need to
4713 * set the P_SUGID bit. This protects
4714 * against debuggers being attached by an
4715 * insufficiently privileged process onto the
4716 * result of a transition to a more privileged
4717 * credential.
4718 */
4719 leave_sugid_clear = 0;
4720 }
4721
4722 imgp->ip_mac_return = label_update_return;
4723 }
4724
4725 mac_reset_ipc = mac_proc_check_inherit_ipc_ports(p, p->p_textvp, p->p_textoff, imgp->ip_vp, imgp->ip_arch_offset, imgp->ip_scriptvp);
4726
4727 #endif /* CONFIG_MACF */
4728
4729 /*
4730 * If 'leave_sugid_clear' is non-zero, then we passed the
4731 * VSUID and MACF checks, and successfully determined that
4732 * the previous cred was a member of the VSGID group, but
4733 * that it was not the default at the time of the execve,
4734 * and that the post-labelling credential was not disjoint.
4735 * So we don't set the P_SUGID or reset mach ports and fds
4736 * on the basis of simply running this code.
4737 */
4738 if (mac_reset_ipc || !leave_sugid_clear) {
4739 /*
4740 * Have mach reset the task and thread ports.
4741 * We don't want anyone who had the ports before
4742 * a setuid exec to be able to access/control the
4743 * task/thread after.
4744 */
4745 ipc_task_reset((imgp->ip_new_thread != NULL) ?
4746 get_threadtask(imgp->ip_new_thread) : p->task);
4747 ipc_thread_reset((imgp->ip_new_thread != NULL) ?
4748 imgp->ip_new_thread : current_thread());
4749 }
4750
4751 if (!leave_sugid_clear) {
4752 /*
4753 * Flag the process as setuid.
4754 */
4755 OSBitOrAtomic(P_SUGID, &p->p_flag);
4756
4757 /*
4758 * Radar 2261856; setuid security hole fix
4759 * XXX For setuid processes, attempt to ensure that
4760 * stdin, stdout, and stderr are already allocated.
4761 * We do not want userland to accidentally allocate
4762 * descriptors in this range which has implied meaning
4763 * to libc.
4764 */
4765 for (i = 0; i < 3; i++) {
4766
4767 if (p->p_fd->fd_ofiles[i] != NULL)
4768 continue;
4769
4770 /*
4771 * Do the kernel equivalent of
4772 *
4773 * if i == 0
4774 * (void) open("/dev/null", O_RDONLY);
4775 * else
4776 * (void) open("/dev/null", O_WRONLY);
4777 */
4778
4779 struct fileproc *fp;
4780 int indx;
4781 int flag;
4782 struct nameidata *ndp = NULL;
4783
4784 if (i == 0)
4785 flag = FREAD;
4786 else
4787 flag = FWRITE;
4788
4789 if ((error = falloc(p,
4790 &fp, &indx, imgp->ip_vfs_context)) != 0)
4791 continue;
4792
4793 MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
4794 if (ndp == NULL) {
4795 fp_free(p, indx, fp);
4796 error = ENOMEM;
4797 break;
4798 }
4799
4800 NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE,
4801 CAST_USER_ADDR_T("/dev/null"),
4802 imgp->ip_vfs_context);
4803
4804 if ((error = vn_open(ndp, flag, 0)) != 0) {
4805 fp_free(p, indx, fp);
4806 FREE(ndp, M_TEMP);
4807 break;
4808 }
4809
4810 struct fileglob *fg = fp->f_fglob;
4811
4812 fg->fg_flag = flag;
4813 fg->fg_ops = &vnops;
4814 fg->fg_data = ndp->ni_vp;
4815
4816 vnode_put(ndp->ni_vp);
4817
4818 proc_fdlock(p);
4819 procfdtbl_releasefd(p, indx, NULL);
4820 fp_drop(p, indx, fp, 1);
4821 proc_fdunlock(p);
4822
4823 FREE(ndp, M_TEMP);
4824 }
4825 }
4826 }
4827 #if CONFIG_MACF
4828 else {
4829 /*
4830 * We are here because we were told that the MAC label will
4831 * be transitioned, and the binary is not VSUID or VSGID; to
4832 * deal with this case, we could either duplicate a lot of
4833 * code, or we can indicate we want to default the P_SUGID
4834 * bit clear and jump back up.
4835 */
4836 if (mac_transition) {
4837 leave_sugid_clear = 1;
4838 goto handle_mac_transition;
4839 }
4840 }
4841
4842 #endif /* CONFIG_MACF */
4843
4844 /*
4845 * Implement the semantic where the effective user and group become
4846 * the saved user and group in exec'ed programs.
4847 *
4848 * Modifications to p_ucred must be guarded using the
4849 * proc's ucred lock. This prevents others from accessing
4850 * a garbage credential.
4851 */
4852 for (;;) {
4853 my_cred = kauth_cred_proc_ref(p);
4854 my_new_cred = kauth_cred_setsvuidgid(my_cred, kauth_cred_getuid(my_cred), kauth_cred_getgid(my_cred));
4855
4856 if (my_new_cred == my_cred) {
4857 kauth_cred_unref(&my_cred);
4858 break;
4859 }
4860
4861 /* update cred on proc */
4862 proc_ucred_lock(p);
4863
4864 if (p->p_ucred != my_cred) {
4865 proc_ucred_unlock(p);
4866 kauth_cred_unref(&my_new_cred);
4867 continue;
4868 }
4869
4870 /* donate cred reference on my_new_cred to p->p_ucred */
4871 p->p_ucred = my_new_cred;
4872 PROC_UPDATE_CREDS_ONPROC(p);
4873 proc_ucred_unlock(p);
4874
4875 /* drop additional reference that was taken on the previous cred */
4876 kauth_cred_unref(&my_cred);
4877
4878 break;
4879 }
4880
4881
4882 /* Update the process' identity version and set the security token */
4883 p->p_idversion++;
4884
4885 if (imgp->ip_new_thread != NULL) {
4886 task = get_threadtask(imgp->ip_new_thread);
4887 } else {
4888 task = p->task;
4889 }
4890 set_security_token_task_internal(p, task);
4891
4892 return(error);
4893 }
4894
4895
4896 /*
4897 * create_unix_stack
4898 *
4899 * Description: Set the user stack address for the process to the provided
4900 * address. If a custom stack was not set as a result of the
4901 * load process (i.e. as specified by the image file for the
4902 * executable), then allocate the stack in the provided map and
4903 * set up appropriate guard pages for enforcing administrative
4904 * limits on stack growth, if they end up being needed.
4905 *
4906 * Parameters: p Process to set stack on
4907 * load_result Information from mach-o load commands
4908 * map Address map in which to allocate the new stack
4909 *
4910 * Returns: KERN_SUCCESS Stack successfully created
4911 * !KERN_SUCCESS Mach failure code
4912 */
4913 static kern_return_t
4914 create_unix_stack(vm_map_t map, load_result_t* load_result,
4915 proc_t p)
4916 {
4917 mach_vm_size_t size, prot_size;
4918 mach_vm_offset_t addr, prot_addr;
4919 kern_return_t kr;
4920
4921 mach_vm_address_t user_stack = load_result->user_stack;
4922
4923 proc_lock(p);
4924 p->user_stack = user_stack;
4925 proc_unlock(p);
4926
4927 if (load_result->user_stack_alloc_size > 0) {
4928 /*
4929 * Allocate enough space for the maximum stack size we
4930 * will ever authorize and an extra page to act as
4931 * a guard page for stack overflows. For default stacks,
4932 * vm_initial_limit_stack takes care of the extra guard page.
4933 * Otherwise we must allocate it ourselves.
4934 */
4935 if (mach_vm_round_page_overflow(load_result->user_stack_alloc_size, &size)) {
4936 return KERN_INVALID_ARGUMENT;
4937 }
4938 addr = mach_vm_trunc_page(load_result->user_stack - size);
4939 kr = mach_vm_allocate_kernel(map, &addr, size,
4940 VM_FLAGS_FIXED, VM_MEMORY_STACK);
4941 if (kr != KERN_SUCCESS) {
4942 // Can't allocate at default location, try anywhere
4943 addr = 0;
4944 kr = mach_vm_allocate_kernel(map, &addr, size,
4945 VM_FLAGS_ANYWHERE, VM_MEMORY_STACK);
4946 if (kr != KERN_SUCCESS) {
4947 return kr;
4948 }
4949
4950 user_stack = addr + size;
4951 load_result->user_stack = user_stack;
4952
4953 proc_lock(p);
4954 p->user_stack = user_stack;
4955 proc_unlock(p);
4956 }
4957
4958 load_result->user_stack_alloc = addr;
4959
4960 /*
4961 * And prevent access to what's above the current stack
4962 * size limit for this process.
4963 */
4964 if (load_result->user_stack_size == 0) {
4965 load_result->user_stack_size = unix_stack_size(p);
4966 prot_size = mach_vm_trunc_page(size - load_result->user_stack_size);
4967 } else {
4968 prot_size = PAGE_SIZE;
4969 }
4970
4971 prot_addr = addr;
4972 kr = mach_vm_protect(map,
4973 prot_addr,
4974 prot_size,
4975 FALSE,
4976 VM_PROT_NONE);
4977 if (kr != KERN_SUCCESS) {
4978 (void)mach_vm_deallocate(map, addr, size);
4979 return kr;
4980 }
4981 }
4982
4983 return KERN_SUCCESS;
4984 }
4985
4986 #include <sys/reboot.h>
4987
4988 /*
4989 * load_init_program_at_path
4990 *
4991 * Description: Load the "init" program; in most cases, this will be "launchd"
4992 *
4993 * Parameters: p Process to call execve() to create
4994 * the "init" program
4995 * scratch_addr Page in p, scratch space
4996 * path NULL terminated path
4997 *
4998 * Returns: KERN_SUCCESS Success
4999 * !KERN_SUCCESS See execve/mac_execve for error codes
5000 *
5001 * Notes: The process that is passed in is the first manufactured
5002 * process on the system, and gets here via bsd_ast() firing
5003 * for the first time. This is done to ensure that bsd_init()
5004 * has run to completion.
5005 *
5006 * The address map of the first manufactured process matches the
5007 * word width of the kernel. Once the self-exec completes, the
5008 * initproc might be different.
5009 */
5010 static int
5011 load_init_program_at_path(proc_t p, user_addr_t scratch_addr, const char* path)
5012 {
5013 int retval[2];
5014 int error;
5015 struct execve_args init_exec_args;
5016 user_addr_t argv0 = USER_ADDR_NULL, argv1 = USER_ADDR_NULL;
5017
5018 /*
5019 * Validate inputs and pre-conditions
5020 */
5021 assert(p);
5022 assert(scratch_addr);
5023 assert(path);
5024
5025 /*
5026 * Copy out program name.
5027 */
5028 size_t path_length = strlen(path) + 1;
5029 argv0 = scratch_addr;
5030 error = copyout(path, argv0, path_length);
5031 if (error)
5032 return error;
5033
5034 scratch_addr = USER_ADDR_ALIGN(scratch_addr + path_length, sizeof(user_addr_t));
5035
5036 /*
5037 * Put out first (and only) argument, similarly.
5038 * Assumes everything fits in a page as allocated above.
5039 */
5040 if (boothowto & RB_SINGLE) {
5041 const char *init_args = "-s";
5042 size_t init_args_length = strlen(init_args)+1;
5043
5044 argv1 = scratch_addr;
5045 error = copyout(init_args, argv1, init_args_length);
5046 if (error)
5047 return error;
5048
5049 scratch_addr = USER_ADDR_ALIGN(scratch_addr + init_args_length, sizeof(user_addr_t));
5050 }
5051
5052 if (proc_is64bit(p)) {
5053 user64_addr_t argv64bit[3];
5054
5055 argv64bit[0] = argv0;
5056 argv64bit[1] = argv1;
5057 argv64bit[2] = USER_ADDR_NULL;
5058
5059 error = copyout(argv64bit, scratch_addr, sizeof(argv64bit));
5060 if (error)
5061 return error;
5062 } else {
5063 user32_addr_t argv32bit[3];
5064
5065 argv32bit[0] = (user32_addr_t)argv0;
5066 argv32bit[1] = (user32_addr_t)argv1;
5067 argv32bit[2] = USER_ADDR_NULL;
5068
5069 error = copyout(argv32bit, scratch_addr, sizeof(argv32bit));
5070 if (error)
5071 return error;
5072 }
5073
5074 /*
5075 * Set up argument block for fake call to execve.
5076 */
5077 init_exec_args.fname = argv0;
5078 init_exec_args.argp = scratch_addr;
5079 init_exec_args.envp = USER_ADDR_NULL;
5080
5081 /*
5082 * So that init task is set with uid,gid 0 token
5083 */
5084 set_security_token(p);
5085
5086 return execve(p, &init_exec_args, retval);
5087 }
5088
5089 static const char * init_programs[] = {
5090 #if DEBUG
5091 "/usr/local/sbin/launchd.debug",
5092 #endif
5093 #if DEVELOPMENT || DEBUG
5094 "/usr/local/sbin/launchd.development",
5095 #endif
5096 "/sbin/launchd",
5097 };
5098
5099 /*
5100 * load_init_program
5101 *
5102 * Description: Load the "init" program; in most cases, this will be "launchd"
5103 *
5104 * Parameters: p Process to call execve() to create
5105 * the "init" program
5106 *
5107 * Returns: (void)
5108 *
5109 * Notes: The process that is passed in is the first manufactured
5110 * process on the system, and gets here via bsd_ast() firing
5111 * for the first time. This is done to ensure that bsd_init()
5112 * has run to completion.
5113 *
5114 * In DEBUG & DEVELOPMENT builds, the launchdsuffix boot-arg
5115 * may be used to select a specific launchd executable. As with
5116 * the kcsuffix boot-arg, setting launchdsuffix to "" or "release"
5117 * will force /sbin/launchd to be selected.
5118 *
5119 * Search order by build:
5120 *
5121 * DEBUG DEVELOPMENT RELEASE PATH
5122 * ----------------------------------------------------------------------------------
5123 * 1 1 NA /usr/local/sbin/launchd.$LAUNCHDSUFFIX
5124 * 2 NA NA /usr/local/sbin/launchd.debug
5125 * 3 2 NA /usr/local/sbin/launchd.development
5126 * 4 3 1 /sbin/launchd
5127 */
5128 void
5129 load_init_program(proc_t p)
5130 {
5131 uint32_t i;
5132 int error;
5133 vm_map_t map = current_map();
5134 mach_vm_offset_t scratch_addr = 0;
5135 mach_vm_size_t map_page_size = vm_map_page_size(map);
5136
5137 (void) mach_vm_allocate_kernel(map, &scratch_addr, map_page_size, VM_FLAGS_ANYWHERE, VM_KERN_MEMORY_NONE);
5138 #if CONFIG_MEMORYSTATUS
5139 (void) memorystatus_init_at_boot_snapshot();
5140 #endif /* CONFIG_MEMORYSTATUS */
5141
5142 #if DEBUG || DEVELOPMENT
5143 /* Check for boot-arg suffix first */
5144 char launchd_suffix[64];
5145 if (PE_parse_boot_argn("launchdsuffix", launchd_suffix, sizeof(launchd_suffix))) {
5146 char launchd_path[128];
5147 boolean_t is_release_suffix = ((launchd_suffix[0] == 0) ||
5148 (strcmp(launchd_suffix, "release") == 0));
5149
5150 if (is_release_suffix) {
5151 printf("load_init_program: attempting to load /sbin/launchd\n");
5152 error = load_init_program_at_path(p, (user_addr_t)scratch_addr, "/sbin/launchd");
5153 if (!error)
5154 return;
5155
5156 panic("Process 1 exec of launchd.release failed, errno %d", error);
5157 } else {
5158 strlcpy(launchd_path, "/usr/local/sbin/launchd.", sizeof(launchd_path));
5159 strlcat(launchd_path, launchd_suffix, sizeof(launchd_path));
5160
5161 printf("load_init_program: attempting to load %s\n", launchd_path);
5162 error = load_init_program_at_path(p, (user_addr_t)scratch_addr, launchd_path);
5163 if (!error) {
5164 return;
5165 } else {
5166 printf("load_init_program: failed loading %s: errno %d\n", launchd_path, error);
5167 }
5168 }
5169 }
5170 #endif
5171
5172 error = ENOENT;
5173 for (i = 0; i < sizeof(init_programs)/sizeof(init_programs[0]); i++) {
5174 printf("load_init_program: attempting to load %s\n", init_programs[i]);
5175 error = load_init_program_at_path(p, (user_addr_t)scratch_addr, init_programs[i]);
5176 if (!error) {
5177 return;
5178 } else {
5179 printf("load_init_program: failed loading %s: errno %d\n", init_programs[i], error);
5180 }
5181 }
5182
5183 panic("Process 1 exec of %s failed, errno %d", ((i == 0) ? "<null>" : init_programs[i-1]), error);
5184 }
5185
5186 /*
5187 * load_return_to_errno
5188 *
5189 * Description: Convert a load_return_t (Mach error) to an errno (BSD error)
5190 *
5191 * Parameters: lrtn Mach error number
5192 *
5193 * Returns: (int) BSD error number
5194 * 0 Success
5195 * EBADARCH Bad architecture
5196 * EBADMACHO Bad Mach object file
5197 * ESHLIBVERS Bad shared library version
5198 * ENOMEM Out of memory/resource shortage
5199 * EACCES Access denied
5200 * ENOENT Entry not found (usually "file does
5201 * does not exist")
5202 * EIO An I/O error occurred
5203 * EBADEXEC The executable is corrupt/unknown
5204 */
5205 static int
5206 load_return_to_errno(load_return_t lrtn)
5207 {
5208 switch (lrtn) {
5209 case LOAD_SUCCESS:
5210 return 0;
5211 case LOAD_BADARCH:
5212 case LOAD_BADARCH_X86:
5213 return EBADARCH;
5214 case LOAD_BADMACHO:
5215 case LOAD_BADMACHO_UPX:
5216 return EBADMACHO;
5217 case LOAD_SHLIB:
5218 return ESHLIBVERS;
5219 case LOAD_NOSPACE:
5220 case LOAD_RESOURCE:
5221 return ENOMEM;
5222 case LOAD_PROTECT:
5223 return EACCES;
5224 case LOAD_ENOENT:
5225 return ENOENT;
5226 case LOAD_IOERROR:
5227 return EIO;
5228 case LOAD_FAILURE:
5229 case LOAD_DECRYPTFAIL:
5230 default:
5231 return EBADEXEC;
5232 }
5233 }
5234
5235 #include <mach/mach_types.h>
5236 #include <mach/vm_prot.h>
5237 #include <mach/semaphore.h>
5238 #include <mach/sync_policy.h>
5239 #include <kern/clock.h>
5240 #include <mach/kern_return.h>
5241
5242 /*
5243 * execargs_alloc
5244 *
5245 * Description: Allocate the block of memory used by the execve arguments.
5246 * At the same time, we allocate a page so that we can read in
5247 * the first page of the image.
5248 *
5249 * Parameters: struct image_params * the image parameter block
5250 *
5251 * Returns: 0 Success
5252 * EINVAL Invalid argument
5253 * EACCES Permission denied
5254 * EINTR Interrupted function
5255 * ENOMEM Not enough space
5256 *
5257 * Notes: This is a temporary allocation into the kernel address space
5258 * to enable us to copy arguments in from user space. This is
5259 * necessitated by not mapping the process calling execve() into
5260 * the kernel address space during the execve() system call.
5261 *
5262 * We assemble the argument and environment, etc., into this
5263 * region before copying it as a single block into the child
5264 * process address space (at the top or bottom of the stack,
5265 * depending on which way the stack grows; see the function
5266 * exec_copyout_strings() for details).
5267 *
5268 * This ends up with a second (possibly unnecessary) copy compared
5269 * with assembing the data directly into the child address space,
5270 * instead, but since we cannot be guaranteed that the parent has
5271 * not modified its environment, we can't really know that it's
5272 * really a block there as well.
5273 */
5274
5275
5276 static int execargs_waiters = 0;
5277 lck_mtx_t *execargs_cache_lock;
5278
5279 static void
5280 execargs_lock_lock(void) {
5281 lck_mtx_lock_spin(execargs_cache_lock);
5282 }
5283
5284 static void
5285 execargs_lock_unlock(void) {
5286 lck_mtx_unlock(execargs_cache_lock);
5287 }
5288
5289 static wait_result_t
5290 execargs_lock_sleep(void) {
5291 return(lck_mtx_sleep(execargs_cache_lock, LCK_SLEEP_DEFAULT, &execargs_free_count, THREAD_INTERRUPTIBLE));
5292 }
5293
5294 static kern_return_t
5295 execargs_purgeable_allocate(char **execarg_address) {
5296 kern_return_t kr = vm_allocate_kernel(bsd_pageable_map, (vm_offset_t *)execarg_address, BSD_PAGEABLE_SIZE_PER_EXEC, VM_FLAGS_ANYWHERE | VM_FLAGS_PURGABLE, VM_KERN_MEMORY_NONE);
5297 assert(kr == KERN_SUCCESS);
5298 return kr;
5299 }
5300
5301 static kern_return_t
5302 execargs_purgeable_reference(void *execarg_address) {
5303 int state = VM_PURGABLE_NONVOLATILE;
5304 kern_return_t kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
5305
5306 assert(kr == KERN_SUCCESS);
5307 return kr;
5308 }
5309
5310 static kern_return_t
5311 execargs_purgeable_volatilize(void *execarg_address) {
5312 int state = VM_PURGABLE_VOLATILE | VM_PURGABLE_ORDERING_OBSOLETE;
5313 kern_return_t kr;
5314 kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
5315
5316 assert(kr == KERN_SUCCESS);
5317
5318 return kr;
5319 }
5320
5321 static void
5322 execargs_wakeup_waiters(void) {
5323 thread_wakeup(&execargs_free_count);
5324 }
5325
5326 static int
5327 execargs_alloc(struct image_params *imgp)
5328 {
5329 kern_return_t kret;
5330 wait_result_t res;
5331 int i, cache_index = -1;
5332
5333 execargs_lock_lock();
5334
5335 while (execargs_free_count == 0) {
5336 execargs_waiters++;
5337 res = execargs_lock_sleep();
5338 execargs_waiters--;
5339 if (res != THREAD_AWAKENED) {
5340 execargs_lock_unlock();
5341 return (EINTR);
5342 }
5343 }
5344
5345 execargs_free_count--;
5346
5347 for (i = 0; i < execargs_cache_size; i++) {
5348 vm_offset_t element = execargs_cache[i];
5349 if (element) {
5350 cache_index = i;
5351 imgp->ip_strings = (char *)(execargs_cache[i]);
5352 execargs_cache[i] = 0;
5353 break;
5354 }
5355 }
5356
5357 assert(execargs_free_count >= 0);
5358
5359 execargs_lock_unlock();
5360
5361 if (cache_index == -1) {
5362 kret = execargs_purgeable_allocate(&imgp->ip_strings);
5363 }
5364 else
5365 kret = execargs_purgeable_reference(imgp->ip_strings);
5366
5367 assert(kret == KERN_SUCCESS);
5368 if (kret != KERN_SUCCESS) {
5369 return (ENOMEM);
5370 }
5371
5372 /* last page used to read in file headers */
5373 imgp->ip_vdata = imgp->ip_strings + ( NCARGS + PAGE_SIZE );
5374 imgp->ip_strendp = imgp->ip_strings;
5375 imgp->ip_argspace = NCARGS;
5376 imgp->ip_strspace = ( NCARGS + PAGE_SIZE );
5377
5378 return (0);
5379 }
5380
5381 /*
5382 * execargs_free
5383 *
5384 * Description: Free the block of memory used by the execve arguments and the
5385 * first page of the executable by a previous call to the function
5386 * execargs_alloc().
5387 *
5388 * Parameters: struct image_params * the image parameter block
5389 *
5390 * Returns: 0 Success
5391 * EINVAL Invalid argument
5392 * EINTR Oeration interrupted
5393 */
5394 static int
5395 execargs_free(struct image_params *imgp)
5396 {
5397 kern_return_t kret;
5398 int i;
5399 boolean_t needs_wakeup = FALSE;
5400
5401 kret = execargs_purgeable_volatilize(imgp->ip_strings);
5402
5403 execargs_lock_lock();
5404 execargs_free_count++;
5405
5406 for (i = 0; i < execargs_cache_size; i++) {
5407 vm_offset_t element = execargs_cache[i];
5408 if (element == 0) {
5409 execargs_cache[i] = (vm_offset_t) imgp->ip_strings;
5410 imgp->ip_strings = NULL;
5411 break;
5412 }
5413 }
5414
5415 assert(imgp->ip_strings == NULL);
5416
5417 if (execargs_waiters > 0)
5418 needs_wakeup = TRUE;
5419
5420 execargs_lock_unlock();
5421
5422 if (needs_wakeup == TRUE)
5423 execargs_wakeup_waiters();
5424
5425 return ((kret == KERN_SUCCESS ? 0 : EINVAL));
5426 }
5427
5428 static void
5429 exec_resettextvp(proc_t p, struct image_params *imgp)
5430 {
5431 vnode_t vp;
5432 off_t offset;
5433 vnode_t tvp = p->p_textvp;
5434 int ret;
5435
5436 vp = imgp->ip_vp;
5437 offset = imgp->ip_arch_offset;
5438
5439 if (vp == NULLVP)
5440 panic("exec_resettextvp: expected valid vp");
5441
5442 ret = vnode_ref(vp);
5443 proc_lock(p);
5444 if (ret == 0) {
5445 p->p_textvp = vp;
5446 p->p_textoff = offset;
5447 } else {
5448 p->p_textvp = NULLVP; /* this is paranoia */
5449 p->p_textoff = 0;
5450 }
5451 proc_unlock(p);
5452
5453 if ( tvp != NULLVP) {
5454 if (vnode_getwithref(tvp) == 0) {
5455 vnode_rele(tvp);
5456 vnode_put(tvp);
5457 }
5458 }
5459
5460 }
5461
5462 // Includes the 0-byte (therefore "SIZE" instead of "LEN").
5463 static const size_t CS_CDHASH_STRING_SIZE = CS_CDHASH_LEN * 2 + 1;
5464
5465 static void cdhash_to_string(char str[CS_CDHASH_STRING_SIZE], uint8_t const * const cdhash) {
5466 static char const nibble[] = "0123456789abcdef";
5467
5468 /* Apparently still the safest way to get a hex representation
5469 * of binary data.
5470 * xnu's printf routines have %*D/%20D in theory, but "not really", see:
5471 * <rdar://problem/33328859> confusion around %*D/%nD in printf
5472 */
5473 for (int i = 0; i < CS_CDHASH_LEN; ++i) {
5474 str[i*2] = nibble[(cdhash[i] & 0xf0) >> 4];
5475 str[i*2+1] = nibble[cdhash[i] & 0x0f];
5476 }
5477 str[CS_CDHASH_STRING_SIZE - 1] = 0;
5478 }
5479
5480 /*
5481 * __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__
5482 *
5483 * Description: Waits for the userspace daemon to respond to the request
5484 * we made. Function declared non inline to be visible in
5485 * stackshots and spindumps as well as debugging.
5486 */
5487 __attribute__((noinline)) int
5488 __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid)
5489 {
5490 return find_code_signature(task_access_port, new_pid);
5491 }
5492
5493 static int
5494 check_for_signature(proc_t p, struct image_params *imgp)
5495 {
5496 mach_port_t port = NULL;
5497 kern_return_t kr = KERN_FAILURE;
5498 int error = EACCES;
5499 boolean_t unexpected_failure = FALSE;
5500 struct cs_blob *csb;
5501 boolean_t require_success = FALSE;
5502 int spawn = (imgp->ip_flags & IMGPF_SPAWN);
5503 int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
5504 os_reason_t signature_failure_reason = OS_REASON_NULL;
5505
5506 /*
5507 * Override inherited code signing flags with the
5508 * ones for the process that is being successfully
5509 * loaded
5510 */
5511 proc_lock(p);
5512 p->p_csflags = imgp->ip_csflags;
5513 proc_unlock(p);
5514
5515 /* Set the switch_protect flag on the map */
5516 if(p->p_csflags & (CS_HARD|CS_KILL)) {
5517 vm_map_switch_protect(get_task_map(p->task), TRUE);
5518 }
5519
5520 /*
5521 * image activation may be failed due to policy
5522 * which is unexpected but security framework does not
5523 * approve of exec, kill and return immediately.
5524 */
5525 if (imgp->ip_mac_return != 0) {
5526
5527 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
5528 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_SECURITY_POLICY, 0, 0);
5529 signature_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SECURITY_POLICY);
5530 error = imgp->ip_mac_return;
5531 unexpected_failure = TRUE;
5532 goto done;
5533 }
5534
5535 if (imgp->ip_cs_error != OS_REASON_NULL) {
5536 signature_failure_reason = imgp->ip_cs_error;
5537 imgp->ip_cs_error = OS_REASON_NULL;
5538 error = EACCES;
5539 goto done;
5540 }
5541
5542 /* If the code signature came through the image activation path, we skip the
5543 * taskgated / externally attached path. */
5544 if (imgp->ip_csflags & CS_SIGNED) {
5545 error = 0;
5546 goto done;
5547 }
5548
5549 /* The rest of the code is for signatures that either already have been externally
5550 * attached (likely, but not necessarily by a previous run through the taskgated
5551 * path), or that will now be attached by taskgated. */
5552
5553 kr = task_get_task_access_port(p->task, &port);
5554 if (KERN_SUCCESS != kr || !IPC_PORT_VALID(port)) {
5555 error = 0;
5556 if (require_success) {
5557 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
5558 p->p_pid, OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASK_ACCESS_PORT, 0, 0);
5559 signature_failure_reason = os_reason_create(OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASK_ACCESS_PORT);
5560 error = EACCES;
5561 }
5562 goto done;
5563 }
5564
5565 /*
5566 * taskgated returns KERN_SUCCESS if it has completed its work
5567 * and the exec should continue, KERN_FAILURE if the exec should
5568 * fail, or it may error out with different error code in an
5569 * event of mig failure (e.g. process was signalled during the
5570 * rpc call, taskgated died, mig server died etc.).
5571 */
5572
5573 kr = __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(port, p->p_pid);
5574 switch (kr) {
5575 case KERN_SUCCESS:
5576 error = 0;
5577 break;
5578 case KERN_FAILURE:
5579 error = EACCES;
5580
5581 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
5582 p->p_pid, OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASKGATED_INVALID_SIG, 0, 0);
5583 signature_failure_reason = os_reason_create(OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASKGATED_INVALID_SIG);
5584 goto done;
5585 default:
5586 error = EACCES;
5587
5588 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
5589 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_TASKGATED_OTHER, 0, 0);
5590 signature_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_TASKGATED_OTHER);
5591 unexpected_failure = TRUE;
5592 goto done;
5593 }
5594
5595 /* Only do this if exec_resettextvp() did not fail */
5596 if (p->p_textvp != NULLVP) {
5597 csb = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff);
5598
5599 if (csb != NULL) {
5600 /* As the enforcement we can do here is very limited, we only allow things that
5601 * are the only reason why this code path still exists:
5602 * Adhoc signed non-platform binaries without special cs_flags and without any
5603 * entitlements (unrestricted ones still pass AMFI). */
5604 if (
5605 /* Revalidate the blob if necessary through bumped generation count. */
5606 (ubc_cs_generation_check(p->p_textvp) == 0 ||
5607 ubc_cs_blob_revalidate(p->p_textvp, csb, imgp, 0) == 0) &&
5608 /* Only CS_ADHOC, no CS_KILL, CS_HARD etc. */
5609 (csb->csb_flags & CS_ALLOWED_MACHO) == CS_ADHOC &&
5610 /* If it has a CMS blob, it's not adhoc. The CS_ADHOC flag can lie. */
5611 csblob_find_blob_bytes((const uint8_t *)csb->csb_mem_kaddr, csb->csb_mem_size,
5612 CSSLOT_SIGNATURESLOT,
5613 CSMAGIC_BLOBWRAPPER) == NULL &&
5614 /* It could still be in a trust cache (unlikely with CS_ADHOC), or a magic path. */
5615 csb->csb_platform_binary == 0 &&
5616 /* No entitlements, not even unrestricted ones. */
5617 csb->csb_entitlements_blob == NULL) {
5618
5619 proc_lock(p);
5620 p->p_csflags |= CS_SIGNED | CS_VALID;
5621 proc_unlock(p);
5622
5623 } else {
5624 uint8_t cdhash[CS_CDHASH_LEN];
5625 char cdhash_string[CS_CDHASH_STRING_SIZE];
5626 proc_getcdhash(p, cdhash);
5627 cdhash_to_string(cdhash_string, cdhash);
5628 printf("ignoring detached code signature on '%s' with cdhash '%s' "
5629 "because it is invalid, or not a simple adhoc signature.\n",
5630 p->p_name, cdhash_string);
5631 }
5632
5633 }
5634 }
5635
5636 done:
5637 if (0 == error) {
5638 /* The process's code signature related properties are
5639 * fully set up, so this is an opportune moment to log
5640 * platform binary execution, if desired. */
5641 if (platform_exec_logging != 0 && csproc_get_platform_binary(p)) {
5642 uint8_t cdhash[CS_CDHASH_LEN];
5643 char cdhash_string[CS_CDHASH_STRING_SIZE];
5644 proc_getcdhash(p, cdhash);
5645 cdhash_to_string(cdhash_string, cdhash);
5646
5647 os_log(peLog, "CS Platform Exec Logging: Executing platform signed binary "
5648 "'%s' with cdhash %s\n", p->p_name, cdhash_string);
5649 }
5650 } else {
5651 if (!unexpected_failure)
5652 p->p_csflags |= CS_KILLED;
5653 /* make very sure execution fails */
5654 if (vfexec || spawn) {
5655 assert(signature_failure_reason != OS_REASON_NULL);
5656 psignal_vfork_with_reason(p, p->task, imgp->ip_new_thread,
5657 SIGKILL, signature_failure_reason);
5658 signature_failure_reason = OS_REASON_NULL;
5659 error = 0;
5660 } else {
5661 assert(signature_failure_reason != OS_REASON_NULL);
5662 psignal_with_reason(p, SIGKILL, signature_failure_reason);
5663 signature_failure_reason = OS_REASON_NULL;
5664 }
5665 }
5666
5667 /* If we hit this, we likely would have leaked an exit reason */
5668 assert(signature_failure_reason == OS_REASON_NULL);
5669 return error;
5670 }
5671
5672 /*
5673 * Typically as soon as we start executing this process, the
5674 * first instruction will trigger a VM fault to bring the text
5675 * pages (as executable) into the address space, followed soon
5676 * thereafter by dyld data structures (for dynamic executable).
5677 * To optimize this, as well as improve support for hardware
5678 * debuggers that can only access resident pages present
5679 * in the process' page tables, we prefault some pages if
5680 * possible. Errors are non-fatal.
5681 */
5682 static void exec_prefault_data(proc_t p __unused, struct image_params *imgp, load_result_t *load_result)
5683 {
5684 int ret;
5685 size_t expected_all_image_infos_size;
5686
5687 /*
5688 * Prefault executable or dyld entry point.
5689 */
5690 vm_fault(current_map(),
5691 vm_map_trunc_page(load_result->entry_point,
5692 vm_map_page_mask(current_map())),
5693 VM_PROT_READ | VM_PROT_EXECUTE,
5694 FALSE, VM_KERN_MEMORY_NONE,
5695 THREAD_UNINT, NULL, 0);
5696
5697 if (imgp->ip_flags & IMGPF_IS_64BIT) {
5698 expected_all_image_infos_size = sizeof(struct user64_dyld_all_image_infos);
5699 } else {
5700 expected_all_image_infos_size = sizeof(struct user32_dyld_all_image_infos);
5701 }
5702
5703 /* Decode dyld anchor structure from <mach-o/dyld_images.h> */
5704 if (load_result->dynlinker &&
5705 load_result->all_image_info_addr &&
5706 load_result->all_image_info_size >= expected_all_image_infos_size) {
5707 union {
5708 struct user64_dyld_all_image_infos infos64;
5709 struct user32_dyld_all_image_infos infos32;
5710 } all_image_infos;
5711
5712 /*
5713 * Pre-fault to avoid copyin() going through the trap handler
5714 * and recovery path.
5715 */
5716 vm_fault(current_map(),
5717 vm_map_trunc_page(load_result->all_image_info_addr,
5718 vm_map_page_mask(current_map())),
5719 VM_PROT_READ | VM_PROT_WRITE,
5720 FALSE, VM_KERN_MEMORY_NONE,
5721 THREAD_UNINT, NULL, 0);
5722 if ((load_result->all_image_info_addr & PAGE_MASK) + expected_all_image_infos_size > PAGE_SIZE) {
5723 /* all_image_infos straddles a page */
5724 vm_fault(current_map(),
5725 vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size - 1,
5726 vm_map_page_mask(current_map())),
5727 VM_PROT_READ | VM_PROT_WRITE,
5728 FALSE, VM_KERN_MEMORY_NONE,
5729 THREAD_UNINT, NULL, 0);
5730 }
5731
5732 ret = copyin(load_result->all_image_info_addr,
5733 &all_image_infos,
5734 expected_all_image_infos_size);
5735 if (ret == 0 && all_image_infos.infos32.version >= DYLD_ALL_IMAGE_INFOS_ADDRESS_MINIMUM_VERSION) {
5736
5737 user_addr_t notification_address;
5738 user_addr_t dyld_image_address;
5739 user_addr_t dyld_version_address;
5740 user_addr_t dyld_all_image_infos_address;
5741 user_addr_t dyld_slide_amount;
5742
5743 if (imgp->ip_flags & IMGPF_IS_64BIT) {
5744 notification_address = all_image_infos.infos64.notification;
5745 dyld_image_address = all_image_infos.infos64.dyldImageLoadAddress;
5746 dyld_version_address = all_image_infos.infos64.dyldVersion;
5747 dyld_all_image_infos_address = all_image_infos.infos64.dyldAllImageInfosAddress;
5748 } else {
5749 notification_address = all_image_infos.infos32.notification;
5750 dyld_image_address = all_image_infos.infos32.dyldImageLoadAddress;
5751 dyld_version_address = all_image_infos.infos32.dyldVersion;
5752 dyld_all_image_infos_address = all_image_infos.infos32.dyldAllImageInfosAddress;
5753 }
5754
5755 /*
5756 * dyld statically sets up the all_image_infos in its Mach-O
5757 * binary at static link time, with pointers relative to its default
5758 * load address. Since ASLR might slide dyld before its first
5759 * instruction is executed, "dyld_slide_amount" tells us how far
5760 * dyld was loaded compared to its default expected load address.
5761 * All other pointers into dyld's image should be adjusted by this
5762 * amount. At some point later, dyld will fix up pointers to take
5763 * into account the slide, at which point the all_image_infos_address
5764 * field in the structure will match the runtime load address, and
5765 * "dyld_slide_amount" will be 0, if we were to consult it again.
5766 */
5767
5768 dyld_slide_amount = load_result->all_image_info_addr - dyld_all_image_infos_address;
5769
5770 #if 0
5771 kprintf("exec_prefault: 0x%016llx 0x%08x 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
5772 (uint64_t)load_result->all_image_info_addr,
5773 all_image_infos.infos32.version,
5774 (uint64_t)notification_address,
5775 (uint64_t)dyld_image_address,
5776 (uint64_t)dyld_version_address,
5777 (uint64_t)dyld_all_image_infos_address);
5778 #endif
5779
5780 vm_fault(current_map(),
5781 vm_map_trunc_page(notification_address + dyld_slide_amount,
5782 vm_map_page_mask(current_map())),
5783 VM_PROT_READ | VM_PROT_EXECUTE,
5784 FALSE, VM_KERN_MEMORY_NONE,
5785 THREAD_UNINT, NULL, 0);
5786 vm_fault(current_map(),
5787 vm_map_trunc_page(dyld_image_address + dyld_slide_amount,
5788 vm_map_page_mask(current_map())),
5789 VM_PROT_READ | VM_PROT_EXECUTE,
5790 FALSE, VM_KERN_MEMORY_NONE,
5791 THREAD_UNINT, NULL, 0);
5792 vm_fault(current_map(),
5793 vm_map_trunc_page(dyld_version_address + dyld_slide_amount,
5794 vm_map_page_mask(current_map())),
5795 VM_PROT_READ,
5796 FALSE, VM_KERN_MEMORY_NONE,
5797 THREAD_UNINT, NULL, 0);
5798 vm_fault(current_map(),
5799 vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount,
5800 vm_map_page_mask(current_map())),
5801 VM_PROT_READ | VM_PROT_WRITE,
5802 FALSE, VM_KERN_MEMORY_NONE,
5803 THREAD_UNINT, NULL, 0);
5804 }
5805 }
5806 }