]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_exec.c
dc4c83eae175f87835866a2f039d9bd1e800ff09
[apple/xnu.git] / bsd / kern / kern_exec.c
1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Mach Operating System
31 * Copyright (c) 1987 Carnegie-Mellon University
32 * All rights reserved. The CMU software License Agreement specifies
33 * the terms and conditions for use and redistribution.
34 */
35
36 /*-
37 * Copyright (c) 1982, 1986, 1991, 1993
38 * The Regents of the University of California. All rights reserved.
39 * (c) UNIX System Laboratories, Inc.
40 * All or some portions of this file are derived from material licensed
41 * to the University of California by American Telephone and Telegraph
42 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
43 * the permission of UNIX System Laboratories, Inc.
44 *
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions
47 * are met:
48 * 1. Redistributions of source code must retain the above copyright
49 * notice, this list of conditions and the following disclaimer.
50 * 2. Redistributions in binary form must reproduce the above copyright
51 * notice, this list of conditions and the following disclaimer in the
52 * documentation and/or other materials provided with the distribution.
53 * 3. All advertising materials mentioning features or use of this software
54 * must display the following acknowledgement:
55 * This product includes software developed by the University of
56 * California, Berkeley and its contributors.
57 * 4. Neither the name of the University nor the names of its contributors
58 * may be used to endorse or promote products derived from this software
59 * without specific prior written permission.
60 *
61 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
62 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
64 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
65 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
66 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
67 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
68 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
69 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
70 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
71 * SUCH DAMAGE.
72 *
73 * from: @(#)kern_exec.c 8.1 (Berkeley) 6/10/93
74 */
75 /*
76 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
77 * support for mandatory and extensible security protections. This notice
78 * is included in support of clause 2.2 (b) of the Apple Public License,
79 * Version 2.0.
80 */
81 #include <machine/reg.h>
82 #include <machine/cpu_capabilities.h>
83
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/filedesc.h>
87 #include <sys/kernel.h>
88 #include <sys/proc_internal.h>
89 #include <sys/kauth.h>
90 #include <sys/user.h>
91 #include <sys/socketvar.h>
92 #include <sys/malloc.h>
93 #include <sys/namei.h>
94 #include <sys/mount_internal.h>
95 #include <sys/vnode_internal.h>
96 #include <sys/file_internal.h>
97 #include <sys/stat.h>
98 #include <sys/uio_internal.h>
99 #include <sys/acct.h>
100 #include <sys/exec.h>
101 #include <sys/kdebug.h>
102 #include <sys/signal.h>
103 #include <sys/aio_kern.h>
104 #include <sys/sysproto.h>
105 #include <sys/persona.h>
106 #include <sys/reason.h>
107 #if SYSV_SHM
108 #include <sys/shm_internal.h> /* shmexec() */
109 #endif
110 #include <sys/ubc_internal.h> /* ubc_map() */
111 #include <sys/spawn.h>
112 #include <sys/spawn_internal.h>
113 #include <sys/process_policy.h>
114 #include <sys/codesign.h>
115 #include <sys/random.h>
116 #include <crypto/sha1.h>
117
118 #include <libkern/libkern.h>
119
120 #include <security/audit/audit.h>
121
122 #include <ipc/ipc_types.h>
123
124 #include <mach/mach_types.h>
125 #include <mach/port.h>
126 #include <mach/task.h>
127 #include <mach/task_access.h>
128 #include <mach/thread_act.h>
129 #include <mach/vm_map.h>
130 #include <mach/mach_vm.h>
131 #include <mach/vm_param.h>
132
133 #include <kern/sched_prim.h> /* thread_wakeup() */
134 #include <kern/affinity.h>
135 #include <kern/assert.h>
136 #include <kern/task.h>
137 #include <kern/coalition.h>
138 #include <kern/policy_internal.h>
139 #include <kern/kalloc.h>
140
141 #if CONFIG_MACF
142 #include <security/mac.h>
143 #include <security/mac_mach_internal.h>
144 #endif
145
146 #include <vm/vm_map.h>
147 #include <vm/vm_kern.h>
148 #include <vm/vm_protos.h>
149 #include <vm/vm_kern.h>
150 #include <vm/vm_fault.h>
151 #include <vm/vm_pageout.h>
152
153 #include <kdp/kdp_dyld.h>
154
155 #include <machine/pal_routines.h>
156
157 #include <pexpert/pexpert.h>
158
159 #if CONFIG_MEMORYSTATUS
160 #include <sys/kern_memorystatus.h>
161 #endif
162
163 #if CONFIG_DTRACE
164 /* Do not include dtrace.h, it redefines kmem_[alloc/free] */
165 extern void dtrace_proc_exec(proc_t);
166 extern void (*dtrace_proc_waitfor_exec_ptr)(proc_t);
167
168 /*
169 * Since dtrace_proc_waitfor_exec_ptr can be added/removed in dtrace_subr.c,
170 * we will store its value before actually calling it.
171 */
172 static void (*dtrace_proc_waitfor_hook)(proc_t) = NULL;
173
174 #include <sys/dtrace_ptss.h>
175 #endif
176
177 /* support for child creation in exec after vfork */
178 thread_t fork_create_child(task_t parent_task, coalition_t *parent_coalition, proc_t child_proc, int inherit_memory, int is64bit, int in_exec);
179 void vfork_exit(proc_t p, int rv);
180 extern void proc_apply_task_networkbg_internal(proc_t, thread_t);
181 extern void task_set_did_exec_flag(task_t task);
182 extern void task_clear_exec_copy_flag(task_t task);
183 proc_t proc_exec_switch_task(proc_t p, task_t old_task, task_t new_task, thread_t new_thread);
184 boolean_t task_is_active(task_t);
185 boolean_t thread_is_active(thread_t thread);
186 void thread_copy_resource_info(thread_t dst_thread, thread_t src_thread);
187 void *ipc_importance_exec_switch_task(task_t old_task, task_t new_task);
188 extern void ipc_importance_release(void *elem);
189
190 /*
191 * Mach things for which prototypes are unavailable from Mach headers
192 */
193 void ipc_task_reset(
194 task_t task);
195 void ipc_thread_reset(
196 thread_t thread);
197 kern_return_t ipc_object_copyin(
198 ipc_space_t space,
199 mach_port_name_t name,
200 mach_msg_type_name_t msgt_name,
201 ipc_object_t *objectp);
202 void ipc_port_release_send(ipc_port_t);
203
204 #if DEVELOPMENT || DEBUG
205 void task_importance_update_owner_info(task_t);
206 #endif
207
208 extern struct savearea *get_user_regs(thread_t);
209
210 __attribute__((noinline)) int __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid);
211
212 #include <kern/thread.h>
213 #include <kern/task.h>
214 #include <kern/ast.h>
215 #include <kern/mach_loader.h>
216 #include <kern/mach_fat.h>
217 #include <mach-o/fat.h>
218 #include <mach-o/loader.h>
219 #include <machine/vmparam.h>
220 #include <sys/imgact.h>
221
222 #include <sys/sdt.h>
223
224
225 /*
226 * EAI_ITERLIMIT The maximum number of times to iterate an image
227 * activator in exec_activate_image() before treating
228 * it as malformed/corrupt.
229 */
230 #define EAI_ITERLIMIT 3
231
232 /*
233 * For #! interpreter parsing
234 */
235 #define IS_WHITESPACE(ch) ((ch == ' ') || (ch == '\t'))
236 #define IS_EOL(ch) ((ch == '#') || (ch == '\n'))
237
238 extern vm_map_t bsd_pageable_map;
239 extern const struct fileops vnops;
240
241 #define USER_ADDR_ALIGN(addr, val) \
242 ( ( (user_addr_t)(addr) + (val) - 1) \
243 & ~((val) - 1) )
244
245 struct image_params; /* Forward */
246 static int exec_activate_image(struct image_params *imgp);
247 static int exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp);
248 static int load_return_to_errno(load_return_t lrtn);
249 static int execargs_alloc(struct image_params *imgp);
250 static int execargs_free(struct image_params *imgp);
251 static int exec_check_permissions(struct image_params *imgp);
252 static int exec_extract_strings(struct image_params *imgp);
253 static int exec_add_apple_strings(struct image_params *imgp, const load_result_t *load_result);
254 static int exec_handle_sugid(struct image_params *imgp);
255 static int sugid_scripts = 0;
256 SYSCTL_INT (_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW | CTLFLAG_LOCKED, &sugid_scripts, 0, "");
257 static kern_return_t create_unix_stack(vm_map_t map, load_result_t* load_result, proc_t p);
258 static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size);
259 static void exec_resettextvp(proc_t, struct image_params *);
260 static int check_for_signature(proc_t, struct image_params *);
261 static void exec_prefault_data(proc_t, struct image_params *, load_result_t *);
262 static errno_t exec_handle_port_actions(struct image_params *imgp, boolean_t * portwatch_present, ipc_port_t * portwatch_ports);
263 static errno_t exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, uint64_t psa_darwin_role,
264 ipc_port_t * portwatch_ports, int portwatch_count);
265
266 /*
267 * exec_add_user_string
268 *
269 * Add the requested string to the string space area.
270 *
271 * Parameters; struct image_params * image parameter block
272 * user_addr_t string to add to strings area
273 * int segment from which string comes
274 * boolean_t TRUE if string contributes to NCARGS
275 *
276 * Returns: 0 Success
277 * !0 Failure errno from copyinstr()
278 *
279 * Implicit returns:
280 * (imgp->ip_strendp) updated location of next add, if any
281 * (imgp->ip_strspace) updated byte count of space remaining
282 * (imgp->ip_argspace) updated byte count of space in NCARGS
283 */
284 static int
285 exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolean_t is_ncargs)
286 {
287 int error = 0;
288
289 do {
290 size_t len = 0;
291 int space;
292
293 if (is_ncargs)
294 space = imgp->ip_argspace; /* by definition smaller than ip_strspace */
295 else
296 space = imgp->ip_strspace;
297
298 if (space <= 0) {
299 error = E2BIG;
300 break;
301 }
302
303 if (!UIO_SEG_IS_USER_SPACE(seg)) {
304 char *kstr = CAST_DOWN(char *,str); /* SAFE */
305 error = copystr(kstr, imgp->ip_strendp, space, &len);
306 } else {
307 error = copyinstr(str, imgp->ip_strendp, space, &len);
308 }
309
310 imgp->ip_strendp += len;
311 imgp->ip_strspace -= len;
312 if (is_ncargs)
313 imgp->ip_argspace -= len;
314
315 } while (error == ENAMETOOLONG);
316
317 return error;
318 }
319
320 /*
321 * dyld is now passed the executable path as a getenv-like variable
322 * in the same fashion as the stack_guard and malloc_entropy keys.
323 */
324 #define EXECUTABLE_KEY "executable_path="
325
326 /*
327 * exec_save_path
328 *
329 * To support new app package launching for Mac OS X, the dyld needs the
330 * first argument to execve() stored on the user stack.
331 *
332 * Save the executable path name at the bottom of the strings area and set
333 * the argument vector pointer to the location following that to indicate
334 * the start of the argument and environment tuples, setting the remaining
335 * string space count to the size of the string area minus the path length.
336 *
337 * Parameters; struct image_params * image parameter block
338 * char * path used to invoke program
339 * int segment from which path comes
340 *
341 * Returns: int 0 Success
342 * EFAULT Bad address
343 * copy[in]str:EFAULT Bad address
344 * copy[in]str:ENAMETOOLONG Filename too long
345 *
346 * Implicit returns:
347 * (imgp->ip_strings) saved path
348 * (imgp->ip_strspace) space remaining in ip_strings
349 * (imgp->ip_strendp) start of remaining copy area
350 * (imgp->ip_argspace) space remaining of NCARGS
351 * (imgp->ip_applec) Initial applev[0]
352 *
353 * Note: We have to do this before the initial namei() since in the
354 * path contains symbolic links, namei() will overwrite the
355 * original path buffer contents. If the last symbolic link
356 * resolved was a relative pathname, we would lose the original
357 * "path", which could be an absolute pathname. This might be
358 * unacceptable for dyld.
359 */
360 static int
361 exec_save_path(struct image_params *imgp, user_addr_t path, int seg, const char **excpath)
362 {
363 int error;
364 size_t len;
365 char *kpath;
366
367 // imgp->ip_strings can come out of a cache, so we need to obliterate the
368 // old path.
369 memset(imgp->ip_strings, '\0', strlen(EXECUTABLE_KEY) + MAXPATHLEN);
370
371 len = MIN(MAXPATHLEN, imgp->ip_strspace);
372
373 switch(seg) {
374 case UIO_USERSPACE32:
375 case UIO_USERSPACE64: /* Same for copyin()... */
376 error = copyinstr(path, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len);
377 break;
378 case UIO_SYSSPACE:
379 kpath = CAST_DOWN(char *,path); /* SAFE */
380 error = copystr(kpath, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len);
381 break;
382 default:
383 error = EFAULT;
384 break;
385 }
386
387 if (!error) {
388 bcopy(EXECUTABLE_KEY, imgp->ip_strings, strlen(EXECUTABLE_KEY));
389 len += strlen(EXECUTABLE_KEY);
390
391 imgp->ip_strendp += len;
392 imgp->ip_strspace -= len;
393
394 if (excpath) {
395 *excpath = imgp->ip_strings + strlen(EXECUTABLE_KEY);
396 }
397 }
398
399 return(error);
400 }
401
402 /*
403 * exec_reset_save_path
404 *
405 * If we detect a shell script, we need to reset the string area
406 * state so that the interpreter can be saved onto the stack.
407
408 * Parameters; struct image_params * image parameter block
409 *
410 * Returns: int 0 Success
411 *
412 * Implicit returns:
413 * (imgp->ip_strings) saved path
414 * (imgp->ip_strspace) space remaining in ip_strings
415 * (imgp->ip_strendp) start of remaining copy area
416 * (imgp->ip_argspace) space remaining of NCARGS
417 *
418 */
419 static int
420 exec_reset_save_path(struct image_params *imgp)
421 {
422 imgp->ip_strendp = imgp->ip_strings;
423 imgp->ip_argspace = NCARGS;
424 imgp->ip_strspace = ( NCARGS + PAGE_SIZE );
425
426 return (0);
427 }
428
429 /*
430 * exec_shell_imgact
431 *
432 * Image activator for interpreter scripts. If the image begins with
433 * the characters "#!", then it is an interpreter script. Verify the
434 * length of the script line indicating the interpreter is not in
435 * excess of the maximum allowed size. If this is the case, then
436 * break out the arguments, if any, which are separated by white
437 * space, and copy them into the argument save area as if they were
438 * provided on the command line before all other arguments. The line
439 * ends when we encounter a comment character ('#') or newline.
440 *
441 * Parameters; struct image_params * image parameter block
442 *
443 * Returns: -1 not an interpreter (keep looking)
444 * -3 Success: interpreter: relookup
445 * >0 Failure: interpreter: error number
446 *
447 * A return value other than -1 indicates subsequent image activators should
448 * not be given the opportunity to attempt to activate the image.
449 */
450 static int
451 exec_shell_imgact(struct image_params *imgp)
452 {
453 char *vdata = imgp->ip_vdata;
454 char *ihp;
455 char *line_startp, *line_endp;
456 char *interp;
457 proc_t p;
458 struct fileproc *fp;
459 int fd;
460 int error;
461
462 /*
463 * Make sure it's a shell script. If we've already redirected
464 * from an interpreted file once, don't do it again.
465 */
466 if (vdata[0] != '#' ||
467 vdata[1] != '!' ||
468 (imgp->ip_flags & IMGPF_INTERPRET) != 0) {
469 return (-1);
470 }
471
472 if (imgp->ip_origcputype != 0) {
473 /* Fat header previously matched, don't allow shell script inside */
474 return (-1);
475 }
476
477 imgp->ip_flags |= IMGPF_INTERPRET;
478 imgp->ip_interp_sugid_fd = -1;
479 imgp->ip_interp_buffer[0] = '\0';
480
481 /* Check to see if SUGID scripts are permitted. If they aren't then
482 * clear the SUGID bits.
483 * imgp->ip_vattr is known to be valid.
484 */
485 if (sugid_scripts == 0) {
486 imgp->ip_origvattr->va_mode &= ~(VSUID | VSGID);
487 }
488
489 /* Try to find the first non-whitespace character */
490 for( ihp = &vdata[2]; ihp < &vdata[IMG_SHSIZE]; ihp++ ) {
491 if (IS_EOL(*ihp)) {
492 /* Did not find interpreter, "#!\n" */
493 return (ENOEXEC);
494 } else if (IS_WHITESPACE(*ihp)) {
495 /* Whitespace, like "#! /bin/sh\n", keep going. */
496 } else {
497 /* Found start of interpreter */
498 break;
499 }
500 }
501
502 if (ihp == &vdata[IMG_SHSIZE]) {
503 /* All whitespace, like "#! " */
504 return (ENOEXEC);
505 }
506
507 line_startp = ihp;
508
509 /* Try to find the end of the interpreter+args string */
510 for ( ; ihp < &vdata[IMG_SHSIZE]; ihp++ ) {
511 if (IS_EOL(*ihp)) {
512 /* Got it */
513 break;
514 } else {
515 /* Still part of interpreter or args */
516 }
517 }
518
519 if (ihp == &vdata[IMG_SHSIZE]) {
520 /* A long line, like "#! blah blah blah" without end */
521 return (ENOEXEC);
522 }
523
524 /* Backtrack until we find the last non-whitespace */
525 while (IS_EOL(*ihp) || IS_WHITESPACE(*ihp)) {
526 ihp--;
527 }
528
529 /* The character after the last non-whitespace is our logical end of line */
530 line_endp = ihp + 1;
531
532 /*
533 * Now we have pointers to the usable part of:
534 *
535 * "#! /usr/bin/int first second third \n"
536 * ^ line_startp ^ line_endp
537 */
538
539 /* copy the interpreter name */
540 interp = imgp->ip_interp_buffer;
541 for ( ihp = line_startp; (ihp < line_endp) && !IS_WHITESPACE(*ihp); ihp++)
542 *interp++ = *ihp;
543 *interp = '\0';
544
545 exec_reset_save_path(imgp);
546 exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer),
547 UIO_SYSSPACE, NULL);
548
549 /* Copy the entire interpreter + args for later processing into argv[] */
550 interp = imgp->ip_interp_buffer;
551 for ( ihp = line_startp; (ihp < line_endp); ihp++)
552 *interp++ = *ihp;
553 *interp = '\0';
554
555 /*
556 * If we have a SUID oder SGID script, create a file descriptor
557 * from the vnode and pass /dev/fd/%d instead of the actual
558 * path name so that the script does not get opened twice
559 */
560 if (imgp->ip_origvattr->va_mode & (VSUID | VSGID)) {
561 p = vfs_context_proc(imgp->ip_vfs_context);
562 error = falloc(p, &fp, &fd, imgp->ip_vfs_context);
563 if (error)
564 return(error);
565
566 fp->f_fglob->fg_flag = FREAD;
567 fp->f_fglob->fg_ops = &vnops;
568 fp->f_fglob->fg_data = (caddr_t)imgp->ip_vp;
569
570 proc_fdlock(p);
571 procfdtbl_releasefd(p, fd, NULL);
572 fp_drop(p, fd, fp, 1);
573 proc_fdunlock(p);
574 vnode_ref(imgp->ip_vp);
575
576 imgp->ip_interp_sugid_fd = fd;
577 }
578
579 return (-3);
580 }
581
582
583
584 /*
585 * exec_fat_imgact
586 *
587 * Image activator for fat 1.0 binaries. If the binary is fat, then we
588 * need to select an image from it internally, and make that the image
589 * we are going to attempt to execute. At present, this consists of
590 * reloading the first page for the image with a first page from the
591 * offset location indicated by the fat header.
592 *
593 * Parameters; struct image_params * image parameter block
594 *
595 * Returns: -1 not a fat binary (keep looking)
596 * -2 Success: encapsulated binary: reread
597 * >0 Failure: error number
598 *
599 * Important: This image activator is byte order neutral.
600 *
601 * Note: A return value other than -1 indicates subsequent image
602 * activators should not be given the opportunity to attempt
603 * to activate the image.
604 *
605 * If we find an encapsulated binary, we make no assertions
606 * about its validity; instead, we leave that up to a rescan
607 * for an activator to claim it, and, if it is claimed by one,
608 * that activator is responsible for determining validity.
609 */
610 static int
611 exec_fat_imgact(struct image_params *imgp)
612 {
613 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
614 kauth_cred_t cred = kauth_cred_proc_ref(p);
615 struct fat_header *fat_header = (struct fat_header *)imgp->ip_vdata;
616 struct _posix_spawnattr *psa = NULL;
617 struct fat_arch fat_arch;
618 int resid, error;
619 load_return_t lret;
620
621 if (imgp->ip_origcputype != 0) {
622 /* Fat header previously matched, don't allow another fat file inside */
623 return (-1);
624 }
625
626 /* Make sure it's a fat binary */
627 if (OSSwapBigToHostInt32(fat_header->magic) != FAT_MAGIC) {
628 error = -1; /* not claimed */
629 goto bad;
630 }
631
632 /* imgp->ip_vdata has PAGE_SIZE, zerofilled if the file is smaller */
633 lret = fatfile_validate_fatarches((vm_offset_t)fat_header, PAGE_SIZE);
634 if (lret != LOAD_SUCCESS) {
635 error = load_return_to_errno(lret);
636 goto bad;
637 }
638
639 /* If posix_spawn binprefs exist, respect those prefs. */
640 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
641 if (psa != NULL && psa->psa_binprefs[0] != 0) {
642 uint32_t pr = 0;
643
644 /* Check each preference listed against all arches in header */
645 for (pr = 0; pr < NBINPREFS; pr++) {
646 cpu_type_t pref = psa->psa_binprefs[pr];
647 if (pref == 0) {
648 /* No suitable arch in the pref list */
649 error = EBADARCH;
650 goto bad;
651 }
652
653 if (pref == CPU_TYPE_ANY) {
654 /* Fall through to regular grading */
655 goto regular_grading;
656 }
657
658 lret = fatfile_getbestarch_for_cputype(pref,
659 (vm_offset_t)fat_header,
660 PAGE_SIZE,
661 &fat_arch);
662 if (lret == LOAD_SUCCESS) {
663 goto use_arch;
664 }
665 }
666
667 /* Requested binary preference was not honored */
668 error = EBADEXEC;
669 goto bad;
670 }
671
672 regular_grading:
673 /* Look up our preferred architecture in the fat file. */
674 lret = fatfile_getbestarch((vm_offset_t)fat_header,
675 PAGE_SIZE,
676 &fat_arch);
677 if (lret != LOAD_SUCCESS) {
678 error = load_return_to_errno(lret);
679 goto bad;
680 }
681
682 use_arch:
683 /* Read the Mach-O header out of fat_arch */
684 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata,
685 PAGE_SIZE, fat_arch.offset,
686 UIO_SYSSPACE, (IO_UNIT|IO_NODELOCKED),
687 cred, &resid, p);
688 if (error) {
689 goto bad;
690 }
691
692 if (resid) {
693 memset(imgp->ip_vdata + (PAGE_SIZE - resid), 0x0, resid);
694 }
695
696 /* Success. Indicate we have identified an encapsulated binary */
697 error = -2;
698 imgp->ip_arch_offset = (user_size_t)fat_arch.offset;
699 imgp->ip_arch_size = (user_size_t)fat_arch.size;
700 imgp->ip_origcputype = fat_arch.cputype;
701 imgp->ip_origcpusubtype = fat_arch.cpusubtype;
702
703 bad:
704 kauth_cred_unref(&cred);
705 return (error);
706 }
707
708 static int
709 activate_exec_state(task_t task, proc_t p, thread_t thread, load_result_t *result)
710 {
711 int ret;
712
713 task_set_dyld_info(task, MACH_VM_MIN_ADDRESS, 0);
714 if (result->is64bit) {
715 task_set_64bit(task, TRUE);
716 OSBitOrAtomic(P_LP64, &p->p_flag);
717 } else {
718 task_set_64bit(task, FALSE);
719 OSBitAndAtomic(~((uint32_t)P_LP64), &p->p_flag);
720 }
721
722 ret = thread_state_initialize(thread);
723 if (ret != KERN_SUCCESS) {
724 return ret;
725 }
726
727 if (result->threadstate) {
728 uint32_t *ts = result->threadstate;
729 uint32_t total_size = result->threadstate_sz;
730
731 while (total_size > 0) {
732 uint32_t flavor = *ts++;
733 uint32_t size = *ts++;
734
735 ret = thread_setstatus(thread, flavor, (thread_state_t)ts, size);
736 if (ret) {
737 return ret;
738 }
739 ts += size;
740 total_size -= (size + 2) * sizeof(uint32_t);
741 }
742 }
743
744 thread_setentrypoint(thread, result->entry_point);
745
746 return KERN_SUCCESS;
747 }
748
749
750 /*
751 * Set p->p_comm and p->p_name to the name passed to exec
752 */
753 static void
754 set_proc_name(struct image_params *imgp, proc_t p)
755 {
756 int p_name_len = sizeof(p->p_name) - 1;
757
758 if (imgp->ip_ndp->ni_cnd.cn_namelen > p_name_len) {
759 imgp->ip_ndp->ni_cnd.cn_namelen = p_name_len;
760 }
761
762 bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_name,
763 (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
764 p->p_name[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
765
766 if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN) {
767 imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
768 }
769
770 bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
771 (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
772 p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
773 }
774
775 /*
776 * exec_mach_imgact
777 *
778 * Image activator for mach-o 1.0 binaries.
779 *
780 * Parameters; struct image_params * image parameter block
781 *
782 * Returns: -1 not a fat binary (keep looking)
783 * -2 Success: encapsulated binary: reread
784 * >0 Failure: error number
785 * EBADARCH Mach-o binary, but with an unrecognized
786 * architecture
787 * ENOMEM No memory for child process after -
788 * can only happen after vfork()
789 *
790 * Important: This image activator is NOT byte order neutral.
791 *
792 * Note: A return value other than -1 indicates subsequent image
793 * activators should not be given the opportunity to attempt
794 * to activate the image.
795 *
796 * TODO: More gracefully handle failures after vfork
797 */
798 static int
799 exec_mach_imgact(struct image_params *imgp)
800 {
801 struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
802 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
803 int error = 0;
804 task_t task;
805 task_t new_task = NULL; /* protected by vfexec */
806 thread_t thread;
807 struct uthread *uthread;
808 vm_map_t old_map = VM_MAP_NULL;
809 vm_map_t map = VM_MAP_NULL;
810 load_return_t lret;
811 load_result_t load_result;
812 struct _posix_spawnattr *psa = NULL;
813 int spawn = (imgp->ip_flags & IMGPF_SPAWN);
814 int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
815 int exec = (imgp->ip_flags & IMGPF_EXEC);
816 os_reason_t exec_failure_reason = OS_REASON_NULL;
817
818 /*
819 * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
820 * is a reserved field on the end, so for the most part, we can
821 * treat them as if they were identical. Reverse-endian Mach-O
822 * binaries are recognized but not compatible.
823 */
824 if ((mach_header->magic == MH_CIGAM) ||
825 (mach_header->magic == MH_CIGAM_64)) {
826 error = EBADARCH;
827 goto bad;
828 }
829
830 if ((mach_header->magic != MH_MAGIC) &&
831 (mach_header->magic != MH_MAGIC_64)) {
832 error = -1;
833 goto bad;
834 }
835
836 if (mach_header->filetype != MH_EXECUTE) {
837 error = -1;
838 goto bad;
839 }
840
841 if (imgp->ip_origcputype != 0) {
842 /* Fat header previously had an idea about this thin file */
843 if (imgp->ip_origcputype != mach_header->cputype ||
844 imgp->ip_origcpusubtype != mach_header->cpusubtype) {
845 error = EBADARCH;
846 goto bad;
847 }
848 } else {
849 imgp->ip_origcputype = mach_header->cputype;
850 imgp->ip_origcpusubtype = mach_header->cpusubtype;
851 }
852
853 task = current_task();
854 thread = current_thread();
855 uthread = get_bsdthread_info(thread);
856
857 if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64)
858 imgp->ip_flags |= IMGPF_IS_64BIT;
859
860 /* If posix_spawn binprefs exist, respect those prefs. */
861 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
862 if (psa != NULL && psa->psa_binprefs[0] != 0) {
863 int pr = 0;
864 for (pr = 0; pr < NBINPREFS; pr++) {
865 cpu_type_t pref = psa->psa_binprefs[pr];
866 if (pref == 0) {
867 /* No suitable arch in the pref list */
868 error = EBADARCH;
869 goto bad;
870 }
871
872 if (pref == CPU_TYPE_ANY) {
873 /* Jump to regular grading */
874 goto grade;
875 }
876
877 if (pref == imgp->ip_origcputype) {
878 /* We have a match! */
879 goto grade;
880 }
881 }
882 error = EBADARCH;
883 goto bad;
884 }
885 grade:
886 if (!grade_binary(imgp->ip_origcputype, imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK)) {
887 error = EBADARCH;
888 goto bad;
889 }
890
891 /* Copy in arguments/environment from the old process */
892 error = exec_extract_strings(imgp);
893 if (error)
894 goto bad;
895
896 AUDIT_ARG(argv, imgp->ip_startargv, imgp->ip_argc,
897 imgp->ip_endargv - imgp->ip_startargv);
898 AUDIT_ARG(envv, imgp->ip_endargv, imgp->ip_envc,
899 imgp->ip_endenvv - imgp->ip_endargv);
900
901 /*
902 * We are being called to activate an image subsequent to a vfork()
903 * operation; in this case, we know that our task, thread, and
904 * uthread are actually those of our parent, and our proc, which we
905 * obtained indirectly from the image_params vfs_context_t, is the
906 * new child process.
907 */
908 if (vfexec) {
909 imgp->ip_new_thread = fork_create_child(task, NULL, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT), FALSE);
910 /* task and thread ref returned, will be released in __mac_execve */
911 if (imgp->ip_new_thread == NULL) {
912 error = ENOMEM;
913 goto bad;
914 }
915 }
916
917
918 /* reset local idea of thread, uthread, task */
919 thread = imgp->ip_new_thread;
920 uthread = get_bsdthread_info(thread);
921 task = new_task = get_threadtask(thread);
922
923 /*
924 * Load the Mach-O file.
925 *
926 * NOTE: An error after this point indicates we have potentially
927 * destroyed or overwritten some process state while attempting an
928 * execve() following a vfork(), which is an unrecoverable condition.
929 * We send the new process an immediate SIGKILL to avoid it executing
930 * any instructions in the mutated address space. For true spawns,
931 * this is not the case, and "too late" is still not too late to
932 * return an error code to the parent process.
933 */
934
935 /*
936 * Actually load the image file we previously decided to load.
937 */
938 lret = load_machfile(imgp, mach_header, thread, &map, &load_result);
939 if (lret != LOAD_SUCCESS) {
940 error = load_return_to_errno(lret);
941
942 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
943 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO, 0, 0);
944 if (lret == LOAD_BADMACHO_UPX) {
945 /* set anything that might be useful in the crash report */
946 set_proc_name(imgp, p);
947
948 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_UPX);
949 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
950 exec_failure_reason->osr_flags |= OS_REASON_FLAG_CONSISTENT_FAILURE;
951 } else {
952 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO);
953 }
954
955 goto badtoolate;
956 }
957
958 proc_lock(p);
959 p->p_cputype = imgp->ip_origcputype;
960 p->p_cpusubtype = imgp->ip_origcpusubtype;
961 proc_unlock(p);
962
963 vm_map_set_user_wire_limit(map, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
964
965 /*
966 * Set code-signing flags if this binary is signed, or if parent has
967 * requested them on exec.
968 */
969 if (load_result.csflags & CS_VALID) {
970 imgp->ip_csflags |= load_result.csflags &
971 (CS_VALID|CS_SIGNED|CS_DEV_CODE|
972 CS_HARD|CS_KILL|CS_RESTRICT|CS_ENFORCEMENT|CS_REQUIRE_LV|
973 CS_ENTITLEMENTS_VALIDATED|CS_DYLD_PLATFORM|
974 CS_ENTITLEMENT_FLAGS|
975 CS_EXEC_SET_HARD|CS_EXEC_SET_KILL|CS_EXEC_SET_ENFORCEMENT);
976 } else {
977 imgp->ip_csflags &= ~CS_VALID;
978 }
979
980 if (p->p_csflags & CS_EXEC_SET_HARD)
981 imgp->ip_csflags |= CS_HARD;
982 if (p->p_csflags & CS_EXEC_SET_KILL)
983 imgp->ip_csflags |= CS_KILL;
984 if (p->p_csflags & CS_EXEC_SET_ENFORCEMENT)
985 imgp->ip_csflags |= CS_ENFORCEMENT;
986 if (p->p_csflags & CS_EXEC_SET_INSTALLER)
987 imgp->ip_csflags |= CS_INSTALLER;
988
989 /*
990 * Set up the system reserved areas in the new address space.
991 */
992 vm_map_exec(map, task, load_result.is64bit, (void *)p->p_fd->fd_rdir, cpu_type());
993
994 /*
995 * Close file descriptors which specify close-on-exec.
996 */
997 fdexec(p, psa != NULL ? psa->psa_flags : 0);
998
999 /*
1000 * deal with set[ug]id.
1001 */
1002 error = exec_handle_sugid(imgp);
1003 if (error) {
1004 vm_map_deallocate(map);
1005
1006 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1007 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_SUGID_FAILURE, 0, 0);
1008 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SUGID_FAILURE);
1009 goto badtoolate;
1010 }
1011
1012 /*
1013 * Commit to new map.
1014 *
1015 * Swap the new map for the old, which consumes our new map reference but
1016 * each leaves us responsible for the old_map reference. That lets us get
1017 * off the pmap associated with it, and then we can release it.
1018 */
1019 old_map = swap_task_map(task, thread, map, !spawn);
1020 vm_map_deallocate(old_map);
1021
1022 lret = activate_exec_state(task, p, thread, &load_result);
1023 if (lret != KERN_SUCCESS) {
1024
1025 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1026 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_ACTV_THREADSTATE, 0, 0);
1027 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_ACTV_THREADSTATE);
1028 goto badtoolate;
1029 }
1030
1031 /*
1032 * deal with voucher on exec-calling thread.
1033 */
1034 if (imgp->ip_new_thread == NULL)
1035 thread_set_mach_voucher(current_thread(), IPC_VOUCHER_NULL);
1036
1037 /* Make sure we won't interrupt ourself signalling a partial process */
1038 if (!vfexec && !spawn && (p->p_lflag & P_LTRACED))
1039 psignal(p, SIGTRAP);
1040
1041 if (load_result.unixproc &&
1042 create_unix_stack(get_task_map(task),
1043 &load_result,
1044 p) != KERN_SUCCESS) {
1045 error = load_return_to_errno(LOAD_NOSPACE);
1046
1047 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1048 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_STACK_ALLOC, 0, 0);
1049 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_STACK_ALLOC);
1050 goto badtoolate;
1051 }
1052
1053 error = exec_add_apple_strings(imgp, &load_result);
1054 if (error) {
1055
1056 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1057 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_APPLE_STRING_INIT, 0, 0);
1058 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_APPLE_STRING_INIT);
1059 goto badtoolate;
1060 }
1061
1062 old_map = vm_map_switch(get_task_map(task));
1063
1064 if (load_result.unixproc) {
1065 user_addr_t ap;
1066
1067 /*
1068 * Copy the strings area out into the new process address
1069 * space.
1070 */
1071 ap = p->user_stack;
1072 error = exec_copyout_strings(imgp, &ap);
1073 if (error) {
1074 vm_map_switch(old_map);
1075
1076 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1077 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_STRINGS, 0, 0);
1078 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_STRINGS);
1079 goto badtoolate;
1080 }
1081 /* Set the stack */
1082 thread_setuserstack(thread, ap);
1083 }
1084
1085 if (load_result.dynlinker) {
1086 uint64_t ap;
1087 int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
1088
1089 /* Adjust the stack */
1090 ap = thread_adjuserstack(thread, -new_ptr_size);
1091 error = copyoutptr(load_result.mach_header, ap, new_ptr_size);
1092
1093 if (error) {
1094 vm_map_switch(old_map);
1095
1096 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1097 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_DYNLINKER, 0, 0);
1098 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_DYNLINKER);
1099 goto badtoolate;
1100 }
1101 task_set_dyld_info(task, load_result.all_image_info_addr,
1102 load_result.all_image_info_size);
1103 }
1104
1105 /* Avoid immediate VM faults back into kernel */
1106 exec_prefault_data(p, imgp, &load_result);
1107
1108 vm_map_switch(old_map);
1109
1110 /* Stop profiling */
1111 stopprofclock(p);
1112
1113 /*
1114 * Reset signal state.
1115 */
1116 execsigs(p, thread);
1117
1118 /*
1119 * need to cancel async IO requests that can be cancelled and wait for those
1120 * already active. MAY BLOCK!
1121 */
1122 _aio_exec( p );
1123
1124 #if SYSV_SHM
1125 /* FIXME: Till vmspace inherit is fixed: */
1126 if (!vfexec && p->vm_shm)
1127 shmexec(p);
1128 #endif
1129 #if SYSV_SEM
1130 /* Clean up the semaphores */
1131 semexit(p);
1132 #endif
1133
1134 /*
1135 * Remember file name for accounting.
1136 */
1137 p->p_acflag &= ~AFORK;
1138
1139 set_proc_name(imgp, p);
1140
1141 #if CONFIG_SECLUDED_MEMORY
1142 if (secluded_for_apps) {
1143 if (strncmp(p->p_name,
1144 "Camera",
1145 sizeof (p->p_name)) == 0 ||
1146 #if 00
1147 strncmp(p->p_name,
1148 "camerad",
1149 sizeof (p->p_name)) == 0 ||
1150 #endif
1151 strncmp(p->p_name,
1152 "testCamera",
1153 sizeof (p->p_name)) == 0) {
1154 task_set_could_use_secluded_mem(task, TRUE);
1155 } else {
1156 task_set_could_use_secluded_mem(task, FALSE);
1157 }
1158 if (strncmp(p->p_name,
1159 "mediaserverd",
1160 sizeof (p->p_name)) == 0) {
1161 task_set_could_also_use_secluded_mem(task, TRUE);
1162 }
1163 }
1164 #endif /* CONFIG_SECLUDED_MEMORY */
1165
1166 pal_dbg_set_task_name( task );
1167
1168 #if DEVELOPMENT || DEBUG
1169 /*
1170 * Update the pid an proc name for importance base if any
1171 */
1172 task_importance_update_owner_info(task);
1173 #endif
1174
1175 memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid));
1176
1177 #if CONFIG_DTRACE
1178 dtrace_proc_exec(p);
1179 #endif
1180
1181 if (kdebug_enable) {
1182 long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
1183
1184 /*
1185 * Collect the pathname for tracing
1186 */
1187 kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
1188
1189 KERNEL_DEBUG_CONSTANT1(TRACE_DATA_EXEC | DBG_FUNC_NONE,
1190 p->p_pid ,0,0,0, (uintptr_t)thread_tid(thread));
1191 KERNEL_DEBUG_CONSTANT1(TRACE_STRING_EXEC | DBG_FUNC_NONE,
1192 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (uintptr_t)thread_tid(thread));
1193 }
1194
1195 /*
1196 * If posix_spawned with the START_SUSPENDED flag, stop the
1197 * process before it runs.
1198 */
1199 if (imgp->ip_px_sa != NULL) {
1200 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
1201 if (psa->psa_flags & POSIX_SPAWN_START_SUSPENDED) {
1202 proc_lock(p);
1203 p->p_stat = SSTOP;
1204 proc_unlock(p);
1205 (void) task_suspend_internal(task);
1206 }
1207 }
1208
1209 /*
1210 * mark as execed, wakeup the process that vforked (if any) and tell
1211 * it that it now has its own resources back
1212 */
1213 OSBitOrAtomic(P_EXEC, &p->p_flag);
1214 proc_resetregister(p);
1215 if (p->p_pptr && (p->p_lflag & P_LPPWAIT)) {
1216 proc_lock(p);
1217 p->p_lflag &= ~P_LPPWAIT;
1218 proc_unlock(p);
1219 wakeup((caddr_t)p->p_pptr);
1220 }
1221
1222 /*
1223 * Pay for our earlier safety; deliver the delayed signals from
1224 * the incomplete vfexec process now that it's complete.
1225 */
1226 if (vfexec && (p->p_lflag & P_LTRACED)) {
1227 psignal_vfork(p, new_task, thread, SIGTRAP);
1228 }
1229
1230 goto done;
1231
1232 badtoolate:
1233 /* Don't allow child process to execute any instructions */
1234 if (!spawn) {
1235 if (vfexec) {
1236 assert(exec_failure_reason != OS_REASON_NULL);
1237 psignal_vfork_with_reason(p, new_task, thread, SIGKILL, exec_failure_reason);
1238 exec_failure_reason = OS_REASON_NULL;
1239 } else {
1240 assert(exec_failure_reason != OS_REASON_NULL);
1241 psignal_with_reason(p, SIGKILL, exec_failure_reason);
1242 exec_failure_reason = OS_REASON_NULL;
1243
1244 if (exec) {
1245 /* Terminate the exec copy task */
1246 task_terminate_internal(task);
1247 }
1248 }
1249
1250 /* We can't stop this system call at this point, so just pretend we succeeded */
1251 error = 0;
1252 } else {
1253 os_reason_free(exec_failure_reason);
1254 exec_failure_reason = OS_REASON_NULL;
1255 }
1256
1257 done:
1258 if (!spawn) {
1259 /* notify only if it has not failed due to FP Key error */
1260 if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)
1261 proc_knote(p, NOTE_EXEC);
1262 }
1263
1264 if (load_result.threadstate) {
1265 kfree(load_result.threadstate, load_result.threadstate_sz);
1266 load_result.threadstate = NULL;
1267 }
1268
1269 bad:
1270 /* If we hit this, we likely would have leaked an exit reason */
1271 assert(exec_failure_reason == OS_REASON_NULL);
1272 return(error);
1273 }
1274
1275
1276
1277
1278 /*
1279 * Our image activator table; this is the table of the image types we are
1280 * capable of loading. We list them in order of preference to ensure the
1281 * fastest image load speed.
1282 *
1283 * XXX hardcoded, for now; should use linker sets
1284 */
1285 struct execsw {
1286 int (*ex_imgact)(struct image_params *);
1287 const char *ex_name;
1288 } execsw[] = {
1289 { exec_mach_imgact, "Mach-o Binary" },
1290 { exec_fat_imgact, "Fat Binary" },
1291 { exec_shell_imgact, "Interpreter Script" },
1292 { NULL, NULL}
1293 };
1294
1295
1296 /*
1297 * exec_activate_image
1298 *
1299 * Description: Iterate through the available image activators, and activate
1300 * the image associated with the imgp structure. We start with
1301 * the
1302 *
1303 * Parameters: struct image_params * Image parameter block
1304 *
1305 * Returns: 0 Success
1306 * EBADEXEC The executable is corrupt/unknown
1307 * execargs_alloc:EINVAL Invalid argument
1308 * execargs_alloc:EACCES Permission denied
1309 * execargs_alloc:EINTR Interrupted function
1310 * execargs_alloc:ENOMEM Not enough space
1311 * exec_save_path:EFAULT Bad address
1312 * exec_save_path:ENAMETOOLONG Filename too long
1313 * exec_check_permissions:EACCES Permission denied
1314 * exec_check_permissions:ENOEXEC Executable file format error
1315 * exec_check_permissions:ETXTBSY Text file busy [misuse of error code]
1316 * exec_check_permissions:???
1317 * namei:???
1318 * vn_rdwr:??? [anything vn_rdwr can return]
1319 * <ex_imgact>:??? [anything an imgact can return]
1320 * EDEADLK Process is being terminated
1321 */
1322 static int
1323 exec_activate_image(struct image_params *imgp)
1324 {
1325 struct nameidata *ndp = NULL;
1326 const char *excpath;
1327 int error;
1328 int resid;
1329 int once = 1; /* save SGUID-ness for interpreted files */
1330 int i;
1331 int itercount = 0;
1332 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1333
1334 error = execargs_alloc(imgp);
1335 if (error)
1336 goto bad_notrans;
1337
1338 error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg, &excpath);
1339 if (error) {
1340 goto bad_notrans;
1341 }
1342
1343 /* Use excpath, which contains the copyin-ed exec path */
1344 DTRACE_PROC1(exec, uintptr_t, excpath);
1345
1346 MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
1347 if (ndp == NULL) {
1348 error = ENOMEM;
1349 goto bad_notrans;
1350 }
1351
1352 NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
1353 UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
1354
1355 again:
1356 error = namei(ndp);
1357 if (error)
1358 goto bad_notrans;
1359 imgp->ip_ndp = ndp; /* successful namei(); call nameidone() later */
1360 imgp->ip_vp = ndp->ni_vp; /* if set, need to vnode_put() at some point */
1361
1362 /*
1363 * Before we start the transition from binary A to binary B, make
1364 * sure another thread hasn't started exiting the process. We grab
1365 * the proc lock to check p_lflag initially, and the transition
1366 * mechanism ensures that the value doesn't change after we release
1367 * the lock.
1368 */
1369 proc_lock(p);
1370 if (p->p_lflag & P_LEXIT) {
1371 error = EDEADLK;
1372 proc_unlock(p);
1373 goto bad_notrans;
1374 }
1375 error = proc_transstart(p, 1, 0);
1376 proc_unlock(p);
1377 if (error)
1378 goto bad_notrans;
1379
1380 error = exec_check_permissions(imgp);
1381 if (error)
1382 goto bad;
1383
1384 /* Copy; avoid invocation of an interpreter overwriting the original */
1385 if (once) {
1386 once = 0;
1387 *imgp->ip_origvattr = *imgp->ip_vattr;
1388 }
1389
1390 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, 0,
1391 UIO_SYSSPACE, IO_NODELOCKED,
1392 vfs_context_ucred(imgp->ip_vfs_context),
1393 &resid, vfs_context_proc(imgp->ip_vfs_context));
1394 if (error)
1395 goto bad;
1396
1397 if (resid) {
1398 memset(imgp->ip_vdata + (PAGE_SIZE - resid), 0x0, resid);
1399 }
1400
1401 encapsulated_binary:
1402 /* Limit the number of iterations we will attempt on each binary */
1403 if (++itercount > EAI_ITERLIMIT) {
1404 error = EBADEXEC;
1405 goto bad;
1406 }
1407 error = -1;
1408 for(i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) {
1409
1410 error = (*execsw[i].ex_imgact)(imgp);
1411
1412 switch (error) {
1413 /* case -1: not claimed: continue */
1414 case -2: /* Encapsulated binary, imgp->ip_XXX set for next iteration */
1415 goto encapsulated_binary;
1416
1417 case -3: /* Interpreter */
1418 #if CONFIG_MACF
1419 /*
1420 * Copy the script label for later use. Note that
1421 * the label can be different when the script is
1422 * actually read by the interpreter.
1423 */
1424 if (imgp->ip_scriptlabelp)
1425 mac_vnode_label_free(imgp->ip_scriptlabelp);
1426 imgp->ip_scriptlabelp = mac_vnode_label_alloc();
1427 if (imgp->ip_scriptlabelp == NULL) {
1428 error = ENOMEM;
1429 break;
1430 }
1431 mac_vnode_label_copy(imgp->ip_vp->v_label,
1432 imgp->ip_scriptlabelp);
1433
1434 /*
1435 * Take a ref of the script vnode for later use.
1436 */
1437 if (imgp->ip_scriptvp)
1438 vnode_put(imgp->ip_scriptvp);
1439 if (vnode_getwithref(imgp->ip_vp) == 0)
1440 imgp->ip_scriptvp = imgp->ip_vp;
1441 #endif
1442
1443 nameidone(ndp);
1444
1445 vnode_put(imgp->ip_vp);
1446 imgp->ip_vp = NULL; /* already put */
1447 imgp->ip_ndp = NULL; /* already nameidone */
1448
1449 /* Use excpath, which exec_shell_imgact reset to the interpreter */
1450 NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF,
1451 UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
1452
1453 proc_transend(p, 0);
1454 goto again;
1455
1456 default:
1457 break;
1458 }
1459 }
1460
1461 /*
1462 * Call out to allow 3rd party notification of exec.
1463 * Ignore result of kauth_authorize_fileop call.
1464 */
1465 if (error == 0 && kauth_authorize_fileop_has_listeners()) {
1466 kauth_authorize_fileop(vfs_context_ucred(imgp->ip_vfs_context),
1467 KAUTH_FILEOP_EXEC,
1468 (uintptr_t)ndp->ni_vp, 0);
1469 }
1470 bad:
1471 proc_transend(p, 0);
1472
1473 bad_notrans:
1474 if (imgp->ip_strings)
1475 execargs_free(imgp);
1476 if (imgp->ip_ndp)
1477 nameidone(imgp->ip_ndp);
1478 if (ndp)
1479 FREE(ndp, M_TEMP);
1480
1481 return (error);
1482 }
1483
1484
1485 /*
1486 * exec_handle_spawnattr_policy
1487 *
1488 * Description: Decode and apply the posix_spawn apptype, qos clamp, and watchport ports to the task.
1489 *
1490 * Parameters: proc_t p process to apply attributes to
1491 * int psa_apptype posix spawn attribute apptype
1492 *
1493 * Returns: 0 Success
1494 */
1495 static errno_t
1496 exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, uint64_t psa_darwin_role,
1497 ipc_port_t * portwatch_ports, int portwatch_count)
1498 {
1499 int apptype = TASK_APPTYPE_NONE;
1500 int qos_clamp = THREAD_QOS_UNSPECIFIED;
1501 int role = TASK_UNSPECIFIED;
1502
1503 if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) {
1504 int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK;
1505
1506 switch(proctype) {
1507 case POSIX_SPAWN_PROC_TYPE_DAEMON_INTERACTIVE:
1508 apptype = TASK_APPTYPE_DAEMON_INTERACTIVE;
1509 break;
1510 case POSIX_SPAWN_PROC_TYPE_DAEMON_STANDARD:
1511 apptype = TASK_APPTYPE_DAEMON_STANDARD;
1512 break;
1513 case POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE:
1514 apptype = TASK_APPTYPE_DAEMON_ADAPTIVE;
1515 break;
1516 case POSIX_SPAWN_PROC_TYPE_DAEMON_BACKGROUND:
1517 apptype = TASK_APPTYPE_DAEMON_BACKGROUND;
1518 break;
1519 case POSIX_SPAWN_PROC_TYPE_APP_DEFAULT:
1520 apptype = TASK_APPTYPE_APP_DEFAULT;
1521 break;
1522 case POSIX_SPAWN_PROC_TYPE_APP_TAL:
1523 apptype = TASK_APPTYPE_APP_TAL;
1524 break;
1525 default:
1526 apptype = TASK_APPTYPE_NONE;
1527 /* TODO: Should an invalid value here fail the spawn? */
1528 break;
1529 }
1530 }
1531
1532 if (psa_qos_clamp != POSIX_SPAWN_PROC_CLAMP_NONE) {
1533 switch (psa_qos_clamp) {
1534 case POSIX_SPAWN_PROC_CLAMP_UTILITY:
1535 qos_clamp = THREAD_QOS_UTILITY;
1536 break;
1537 case POSIX_SPAWN_PROC_CLAMP_BACKGROUND:
1538 qos_clamp = THREAD_QOS_BACKGROUND;
1539 break;
1540 case POSIX_SPAWN_PROC_CLAMP_MAINTENANCE:
1541 qos_clamp = THREAD_QOS_MAINTENANCE;
1542 break;
1543 default:
1544 qos_clamp = THREAD_QOS_UNSPECIFIED;
1545 /* TODO: Should an invalid value here fail the spawn? */
1546 break;
1547 }
1548 }
1549
1550 if (psa_darwin_role != PRIO_DARWIN_ROLE_DEFAULT) {
1551 proc_darwin_role_to_task_role(psa_darwin_role, &role);
1552 }
1553
1554 if (apptype != TASK_APPTYPE_NONE ||
1555 qos_clamp != THREAD_QOS_UNSPECIFIED ||
1556 role != TASK_UNSPECIFIED) {
1557 proc_set_task_spawnpolicy(p->task, apptype, qos_clamp, role,
1558 portwatch_ports, portwatch_count);
1559 }
1560
1561 return (0);
1562 }
1563
1564
1565 /*
1566 * exec_handle_port_actions
1567 *
1568 * Description: Go through the _posix_port_actions_t contents,
1569 * calling task_set_special_port, task_set_exception_ports
1570 * and/or audit_session_spawnjoin for the current task.
1571 *
1572 * Parameters: struct image_params * Image parameter block
1573 *
1574 * Returns: 0 Success
1575 * EINVAL Failure
1576 * ENOTSUP Illegal posix_spawn attr flag was set
1577 */
1578 static errno_t
1579 exec_handle_port_actions(struct image_params *imgp, boolean_t * portwatch_present,
1580 ipc_port_t * portwatch_ports)
1581 {
1582 _posix_spawn_port_actions_t pacts = imgp->ip_px_spa;
1583 #if CONFIG_AUDIT
1584 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1585 #endif
1586 _ps_port_action_t *act = NULL;
1587 task_t task = get_threadtask(imgp->ip_new_thread);
1588 ipc_port_t port = NULL;
1589 errno_t ret = 0;
1590 int i;
1591 kern_return_t kr;
1592
1593 *portwatch_present = FALSE;
1594
1595 for (i = 0; i < pacts->pspa_count; i++) {
1596 act = &pacts->pspa_actions[i];
1597
1598 if (MACH_PORT_VALID(act->new_port)) {
1599 kr = ipc_object_copyin(get_task_ipcspace(current_task()),
1600 act->new_port, MACH_MSG_TYPE_COPY_SEND,
1601 (ipc_object_t *) &port);
1602
1603 if (kr != KERN_SUCCESS) {
1604 ret = EINVAL;
1605 goto done;
1606 }
1607 } else {
1608 /* it's NULL or DEAD */
1609 port = CAST_MACH_NAME_TO_PORT(act->new_port);
1610 }
1611
1612 switch (act->port_type) {
1613 case PSPA_SPECIAL:
1614 kr = task_set_special_port(task, act->which, port);
1615
1616 if (kr != KERN_SUCCESS)
1617 ret = EINVAL;
1618 break;
1619
1620 case PSPA_EXCEPTION:
1621 kr = task_set_exception_ports(task, act->mask, port,
1622 act->behavior, act->flavor);
1623 if (kr != KERN_SUCCESS)
1624 ret = EINVAL;
1625 break;
1626 #if CONFIG_AUDIT
1627 case PSPA_AU_SESSION:
1628 ret = audit_session_spawnjoin(p, task, port);
1629 break;
1630 #endif
1631 case PSPA_IMP_WATCHPORTS:
1632 if (portwatch_ports != NULL && IPC_PORT_VALID(port)) {
1633 *portwatch_present = TRUE;
1634 /* hold on to this till end of spawn */
1635 portwatch_ports[i] = port;
1636 } else {
1637 ipc_port_release_send(port);
1638 }
1639
1640 break;
1641 default:
1642 ret = EINVAL;
1643 break;
1644 }
1645
1646 if (ret) {
1647 /* action failed, so release port resources */
1648 ipc_port_release_send(port);
1649 break;
1650 }
1651 }
1652
1653 done:
1654 if (0 != ret)
1655 DTRACE_PROC1(spawn__port__failure, mach_port_name_t, act->new_port);
1656 return (ret);
1657 }
1658
1659 /*
1660 * exec_handle_file_actions
1661 *
1662 * Description: Go through the _posix_file_actions_t contents applying the
1663 * open, close, and dup2 operations to the open file table for
1664 * the current process.
1665 *
1666 * Parameters: struct image_params * Image parameter block
1667 *
1668 * Returns: 0 Success
1669 * ???
1670 *
1671 * Note: Actions are applied in the order specified, with the credential
1672 * of the parent process. This is done to permit the parent
1673 * process to utilize POSIX_SPAWN_RESETIDS to drop privilege in
1674 * the child following operations the child may in fact not be
1675 * normally permitted to perform.
1676 */
1677 static int
1678 exec_handle_file_actions(struct image_params *imgp, short psa_flags)
1679 {
1680 int error = 0;
1681 int action;
1682 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1683 _posix_spawn_file_actions_t px_sfap = imgp->ip_px_sfa;
1684 int ival[2]; /* dummy retval for system calls) */
1685
1686 for (action = 0; action < px_sfap->psfa_act_count; action++) {
1687 _psfa_action_t *psfa = &px_sfap->psfa_act_acts[ action];
1688
1689 switch(psfa->psfaa_type) {
1690 case PSFA_OPEN: {
1691 /*
1692 * Open is different, in that it requires the use of
1693 * a path argument, which is normally copied in from
1694 * user space; because of this, we have to support an
1695 * open from kernel space that passes an address space
1696 * context of UIO_SYSSPACE, and casts the address
1697 * argument to a user_addr_t.
1698 */
1699 char *bufp = NULL;
1700 struct vnode_attr *vap;
1701 struct nameidata *ndp;
1702 int mode = psfa->psfaa_openargs.psfao_mode;
1703 struct dup2_args dup2a;
1704 struct close_nocancel_args ca;
1705 int origfd;
1706
1707 MALLOC(bufp, char *, sizeof(*vap) + sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
1708 if (bufp == NULL) {
1709 error = ENOMEM;
1710 break;
1711 }
1712
1713 vap = (struct vnode_attr *) bufp;
1714 ndp = (struct nameidata *) (bufp + sizeof(*vap));
1715
1716 VATTR_INIT(vap);
1717 /* Mask off all but regular access permissions */
1718 mode = ((mode &~ p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT;
1719 VATTR_SET(vap, va_mode, mode & ACCESSPERMS);
1720
1721 NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
1722 CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path),
1723 imgp->ip_vfs_context);
1724
1725 error = open1(imgp->ip_vfs_context,
1726 ndp,
1727 psfa->psfaa_openargs.psfao_oflag,
1728 vap,
1729 fileproc_alloc_init, NULL,
1730 ival);
1731
1732 FREE(bufp, M_TEMP);
1733
1734 /*
1735 * If there's an error, or we get the right fd by
1736 * accident, then drop out here. This is easier than
1737 * reworking all the open code to preallocate fd
1738 * slots, and internally taking one as an argument.
1739 */
1740 if (error || ival[0] == psfa->psfaa_filedes)
1741 break;
1742
1743 origfd = ival[0];
1744 /*
1745 * If we didn't fall out from an error, we ended up
1746 * with the wrong fd; so now we've got to try to dup2
1747 * it to the right one.
1748 */
1749 dup2a.from = origfd;
1750 dup2a.to = psfa->psfaa_filedes;
1751
1752 /*
1753 * The dup2() system call implementation sets
1754 * ival to newfd in the success case, but we
1755 * can ignore that, since if we didn't get the
1756 * fd we wanted, the error will stop us.
1757 */
1758 error = dup2(p, &dup2a, ival);
1759 if (error)
1760 break;
1761
1762 /*
1763 * Finally, close the original fd.
1764 */
1765 ca.fd = origfd;
1766
1767 error = close_nocancel(p, &ca, ival);
1768 }
1769 break;
1770
1771 case PSFA_DUP2: {
1772 struct dup2_args dup2a;
1773
1774 dup2a.from = psfa->psfaa_filedes;
1775 dup2a.to = psfa->psfaa_openargs.psfao_oflag;
1776
1777 /*
1778 * The dup2() system call implementation sets
1779 * ival to newfd in the success case, but we
1780 * can ignore that, since if we didn't get the
1781 * fd we wanted, the error will stop us.
1782 */
1783 error = dup2(p, &dup2a, ival);
1784 }
1785 break;
1786
1787 case PSFA_CLOSE: {
1788 struct close_nocancel_args ca;
1789
1790 ca.fd = psfa->psfaa_filedes;
1791
1792 error = close_nocancel(p, &ca, ival);
1793 }
1794 break;
1795
1796 case PSFA_INHERIT: {
1797 struct fcntl_nocancel_args fcntla;
1798
1799 /*
1800 * Check to see if the descriptor exists, and
1801 * ensure it's -not- marked as close-on-exec.
1802 *
1803 * Attempting to "inherit" a guarded fd will
1804 * result in a error.
1805 */
1806 fcntla.fd = psfa->psfaa_filedes;
1807 fcntla.cmd = F_GETFD;
1808 if ((error = fcntl_nocancel(p, &fcntla, ival)) != 0)
1809 break;
1810
1811 if ((ival[0] & FD_CLOEXEC) == FD_CLOEXEC) {
1812 fcntla.fd = psfa->psfaa_filedes;
1813 fcntla.cmd = F_SETFD;
1814 fcntla.arg = ival[0] & ~FD_CLOEXEC;
1815 error = fcntl_nocancel(p, &fcntla, ival);
1816 }
1817
1818 }
1819 break;
1820
1821 default:
1822 error = EINVAL;
1823 break;
1824 }
1825
1826 /* All file actions failures are considered fatal, per POSIX */
1827
1828 if (error) {
1829 if (PSFA_OPEN == psfa->psfaa_type) {
1830 DTRACE_PROC1(spawn__open__failure, uintptr_t,
1831 psfa->psfaa_openargs.psfao_path);
1832 } else {
1833 DTRACE_PROC1(spawn__fd__failure, int, psfa->psfaa_filedes);
1834 }
1835 break;
1836 }
1837 }
1838
1839 if (error != 0 || (psa_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) == 0)
1840 return (error);
1841
1842 /*
1843 * If POSIX_SPAWN_CLOEXEC_DEFAULT is set, behave (during
1844 * this spawn only) as if "close on exec" is the default
1845 * disposition of all pre-existing file descriptors. In this case,
1846 * the list of file descriptors mentioned in the file actions
1847 * are the only ones that can be inherited, so mark them now.
1848 *
1849 * The actual closing part comes later, in fdexec().
1850 */
1851 proc_fdlock(p);
1852 for (action = 0; action < px_sfap->psfa_act_count; action++) {
1853 _psfa_action_t *psfa = &px_sfap->psfa_act_acts[action];
1854 int fd = psfa->psfaa_filedes;
1855
1856 switch (psfa->psfaa_type) {
1857 case PSFA_DUP2:
1858 fd = psfa->psfaa_openargs.psfao_oflag;
1859 /*FALLTHROUGH*/
1860 case PSFA_OPEN:
1861 case PSFA_INHERIT:
1862 *fdflags(p, fd) |= UF_INHERIT;
1863 break;
1864
1865 case PSFA_CLOSE:
1866 break;
1867 }
1868 }
1869 proc_fdunlock(p);
1870
1871 return (0);
1872 }
1873
1874 #if CONFIG_MACF
1875 /*
1876 * exec_spawnattr_getmacpolicyinfo
1877 */
1878 void *
1879 exec_spawnattr_getmacpolicyinfo(const void *macextensions, const char *policyname, size_t *lenp)
1880 {
1881 const struct _posix_spawn_mac_policy_extensions *psmx = macextensions;
1882 int i;
1883
1884 if (psmx == NULL)
1885 return NULL;
1886
1887 for (i = 0; i < psmx->psmx_count; i++) {
1888 const _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
1889 if (strncmp(extension->policyname, policyname, sizeof(extension->policyname)) == 0) {
1890 if (lenp != NULL)
1891 *lenp = extension->datalen;
1892 return extension->datap;
1893 }
1894 }
1895
1896 if (lenp != NULL)
1897 *lenp = 0;
1898 return NULL;
1899 }
1900
1901 static int
1902 spawn_copyin_macpolicyinfo(const struct user__posix_spawn_args_desc *px_args, _posix_spawn_mac_policy_extensions_t *psmxp)
1903 {
1904 _posix_spawn_mac_policy_extensions_t psmx = NULL;
1905 int error = 0;
1906 int copycnt = 0;
1907 int i = 0;
1908
1909 *psmxp = NULL;
1910
1911 if (px_args->mac_extensions_size < PS_MAC_EXTENSIONS_SIZE(1) ||
1912 px_args->mac_extensions_size > PAGE_SIZE) {
1913 error = EINVAL;
1914 goto bad;
1915 }
1916
1917 MALLOC(psmx, _posix_spawn_mac_policy_extensions_t, px_args->mac_extensions_size, M_TEMP, M_WAITOK);
1918 if ((error = copyin(px_args->mac_extensions, psmx, px_args->mac_extensions_size)) != 0)
1919 goto bad;
1920
1921 if (PS_MAC_EXTENSIONS_SIZE(psmx->psmx_count) > px_args->mac_extensions_size) {
1922 error = EINVAL;
1923 goto bad;
1924 }
1925
1926 for (i = 0; i < psmx->psmx_count; i++) {
1927 _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
1928 if (extension->datalen == 0 || extension->datalen > PAGE_SIZE) {
1929 error = EINVAL;
1930 goto bad;
1931 }
1932 }
1933
1934 for (copycnt = 0; copycnt < psmx->psmx_count; copycnt++) {
1935 _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[copycnt];
1936 void *data = NULL;
1937
1938 MALLOC(data, void *, extension->datalen, M_TEMP, M_WAITOK);
1939 if ((error = copyin(extension->data, data, extension->datalen)) != 0) {
1940 FREE(data, M_TEMP);
1941 goto bad;
1942 }
1943 extension->datap = data;
1944 }
1945
1946 *psmxp = psmx;
1947 return 0;
1948
1949 bad:
1950 if (psmx != NULL) {
1951 for (i = 0; i < copycnt; i++)
1952 FREE(psmx->psmx_extensions[i].datap, M_TEMP);
1953 FREE(psmx, M_TEMP);
1954 }
1955 return error;
1956 }
1957
1958 static void
1959 spawn_free_macpolicyinfo(_posix_spawn_mac_policy_extensions_t psmx)
1960 {
1961 int i;
1962
1963 if (psmx == NULL)
1964 return;
1965 for (i = 0; i < psmx->psmx_count; i++)
1966 FREE(psmx->psmx_extensions[i].datap, M_TEMP);
1967 FREE(psmx, M_TEMP);
1968 }
1969 #endif /* CONFIG_MACF */
1970
1971 #if CONFIG_COALITIONS
1972 static inline void spawn_coalitions_release_all(coalition_t coal[COALITION_NUM_TYPES])
1973 {
1974 for (int c = 0; c < COALITION_NUM_TYPES; c++) {
1975 if (coal[c]) {
1976 coalition_remove_active(coal[c]);
1977 coalition_release(coal[c]);
1978 }
1979 }
1980 }
1981 #endif
1982
1983 #if CONFIG_PERSONAS
1984 static int spawn_validate_persona(struct _posix_spawn_persona_info *px_persona)
1985 {
1986 int error = 0;
1987 struct persona *persona = NULL;
1988 int verify = px_persona->pspi_flags & POSIX_SPAWN_PERSONA_FLAGS_VERIFY;
1989
1990 /*
1991 * TODO: rdar://problem/19981151
1992 * Add entitlement check!
1993 */
1994 if (!kauth_cred_issuser(kauth_cred_get()))
1995 return EPERM;
1996
1997 persona = persona_lookup(px_persona->pspi_id);
1998 if (!persona) {
1999 error = ESRCH;
2000 goto out;
2001 }
2002
2003 if (verify) {
2004 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_UID) {
2005 if (px_persona->pspi_uid != persona_get_uid(persona)) {
2006 error = EINVAL;
2007 goto out;
2008 }
2009 }
2010 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GID) {
2011 if (px_persona->pspi_gid != persona_get_gid(persona)) {
2012 error = EINVAL;
2013 goto out;
2014 }
2015 }
2016 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) {
2017 int ngroups = 0;
2018 gid_t groups[NGROUPS_MAX];
2019
2020 if (persona_get_groups(persona, &ngroups, groups,
2021 px_persona->pspi_ngroups) != 0) {
2022 error = EINVAL;
2023 goto out;
2024 }
2025 if (ngroups != (int)px_persona->pspi_ngroups) {
2026 error = EINVAL;
2027 goto out;
2028 }
2029 while (ngroups--) {
2030 if (px_persona->pspi_groups[ngroups] != groups[ngroups]) {
2031 error = EINVAL;
2032 goto out;
2033 }
2034 }
2035 if (px_persona->pspi_gmuid != persona_get_gmuid(persona)) {
2036 error = EINVAL;
2037 goto out;
2038 }
2039 }
2040 }
2041
2042 out:
2043 if (persona)
2044 persona_put(persona);
2045
2046 return error;
2047 }
2048
2049 static int spawn_persona_adopt(proc_t p, struct _posix_spawn_persona_info *px_persona)
2050 {
2051 int ret;
2052 kauth_cred_t cred;
2053 struct persona *persona = NULL;
2054 int override = !!(px_persona->pspi_flags & POSIX_SPAWN_PERSONA_FLAGS_OVERRIDE);
2055
2056 if (!override)
2057 return persona_proc_adopt_id(p, px_persona->pspi_id, NULL);
2058
2059 /*
2060 * we want to spawn into the given persona, but we want to override
2061 * the kauth with a different UID/GID combo
2062 */
2063 persona = persona_lookup(px_persona->pspi_id);
2064 if (!persona)
2065 return ESRCH;
2066
2067 cred = persona_get_cred(persona);
2068 if (!cred) {
2069 ret = EINVAL;
2070 goto out;
2071 }
2072
2073 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_UID) {
2074 cred = kauth_cred_setresuid(cred,
2075 px_persona->pspi_uid,
2076 px_persona->pspi_uid,
2077 px_persona->pspi_uid,
2078 KAUTH_UID_NONE);
2079 }
2080
2081 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GID) {
2082 cred = kauth_cred_setresgid(cred,
2083 px_persona->pspi_gid,
2084 px_persona->pspi_gid,
2085 px_persona->pspi_gid);
2086 }
2087
2088 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) {
2089 cred = kauth_cred_setgroups(cred,
2090 px_persona->pspi_groups,
2091 px_persona->pspi_ngroups,
2092 px_persona->pspi_gmuid);
2093 }
2094
2095 ret = persona_proc_adopt(p, persona, cred);
2096
2097 out:
2098 persona_put(persona);
2099 return ret;
2100 }
2101 #endif
2102
2103 /*
2104 * posix_spawn
2105 *
2106 * Parameters: uap->pid Pointer to pid return area
2107 * uap->fname File name to exec
2108 * uap->argp Argument list
2109 * uap->envp Environment list
2110 *
2111 * Returns: 0 Success
2112 * EINVAL Invalid argument
2113 * ENOTSUP Not supported
2114 * ENOEXEC Executable file format error
2115 * exec_activate_image:EINVAL Invalid argument
2116 * exec_activate_image:EACCES Permission denied
2117 * exec_activate_image:EINTR Interrupted function
2118 * exec_activate_image:ENOMEM Not enough space
2119 * exec_activate_image:EFAULT Bad address
2120 * exec_activate_image:ENAMETOOLONG Filename too long
2121 * exec_activate_image:ENOEXEC Executable file format error
2122 * exec_activate_image:ETXTBSY Text file busy [misuse of error code]
2123 * exec_activate_image:EBADEXEC The executable is corrupt/unknown
2124 * exec_activate_image:???
2125 * mac_execve_enter:???
2126 *
2127 * TODO: Expect to need __mac_posix_spawn() at some point...
2128 * Handle posix_spawnattr_t
2129 * Handle posix_spawn_file_actions_t
2130 */
2131 int
2132 posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
2133 {
2134 proc_t p = ap; /* quiet bogus GCC vfork() warning */
2135 user_addr_t pid = uap->pid;
2136 int ival[2]; /* dummy retval for setpgid() */
2137 char *bufp = NULL;
2138 struct image_params *imgp;
2139 struct vnode_attr *vap;
2140 struct vnode_attr *origvap;
2141 struct uthread *uthread = 0; /* compiler complains if not set to 0*/
2142 int error, sig;
2143 int is_64 = IS_64BIT_PROCESS(p);
2144 struct vfs_context context;
2145 struct user__posix_spawn_args_desc px_args;
2146 struct _posix_spawnattr px_sa;
2147 _posix_spawn_file_actions_t px_sfap = NULL;
2148 _posix_spawn_port_actions_t px_spap = NULL;
2149 struct __kern_sigaction vec;
2150 boolean_t spawn_no_exec = FALSE;
2151 boolean_t proc_transit_set = TRUE;
2152 boolean_t exec_done = FALSE;
2153 int portwatch_count = 0;
2154 ipc_port_t * portwatch_ports = NULL;
2155 vm_size_t px_sa_offset = offsetof(struct _posix_spawnattr, psa_ports);
2156 task_t new_task = NULL;
2157 boolean_t should_release_proc_ref = FALSE;
2158 void *inherit = NULL;
2159 #if CONFIG_PERSONAS
2160 struct _posix_spawn_persona_info *px_persona = NULL;
2161 #endif
2162
2163 /*
2164 * Allocate a big chunk for locals instead of using stack since these
2165 * structures are pretty big.
2166 */
2167 MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
2168 imgp = (struct image_params *) bufp;
2169 if (bufp == NULL) {
2170 error = ENOMEM;
2171 goto bad;
2172 }
2173 vap = (struct vnode_attr *) (bufp + sizeof(*imgp));
2174 origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap));
2175
2176 /* Initialize the common data in the image_params structure */
2177 imgp->ip_user_fname = uap->path;
2178 imgp->ip_user_argv = uap->argv;
2179 imgp->ip_user_envv = uap->envp;
2180 imgp->ip_vattr = vap;
2181 imgp->ip_origvattr = origvap;
2182 imgp->ip_vfs_context = &context;
2183 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE);
2184 imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
2185 imgp->ip_mac_return = 0;
2186 imgp->ip_px_persona = NULL;
2187 imgp->ip_cs_error = OS_REASON_NULL;
2188
2189 if (uap->adesc != USER_ADDR_NULL) {
2190 if(is_64) {
2191 error = copyin(uap->adesc, &px_args, sizeof(px_args));
2192 } else {
2193 struct user32__posix_spawn_args_desc px_args32;
2194
2195 error = copyin(uap->adesc, &px_args32, sizeof(px_args32));
2196
2197 /*
2198 * Convert arguments descriptor from external 32 bit
2199 * representation to internal 64 bit representation
2200 */
2201 px_args.attr_size = px_args32.attr_size;
2202 px_args.attrp = CAST_USER_ADDR_T(px_args32.attrp);
2203 px_args.file_actions_size = px_args32.file_actions_size;
2204 px_args.file_actions = CAST_USER_ADDR_T(px_args32.file_actions);
2205 px_args.port_actions_size = px_args32.port_actions_size;
2206 px_args.port_actions = CAST_USER_ADDR_T(px_args32.port_actions);
2207 px_args.mac_extensions_size = px_args32.mac_extensions_size;
2208 px_args.mac_extensions = CAST_USER_ADDR_T(px_args32.mac_extensions);
2209 px_args.coal_info_size = px_args32.coal_info_size;
2210 px_args.coal_info = CAST_USER_ADDR_T(px_args32.coal_info);
2211 px_args.persona_info_size = px_args32.persona_info_size;
2212 px_args.persona_info = CAST_USER_ADDR_T(px_args32.persona_info);
2213 }
2214 if (error)
2215 goto bad;
2216
2217 if (px_args.attr_size != 0) {
2218 /*
2219 * We are not copying the port_actions pointer,
2220 * because we already have it from px_args.
2221 * This is a bit fragile: <rdar://problem/16427422>
2222 */
2223
2224 if ((error = copyin(px_args.attrp, &px_sa, px_sa_offset) != 0))
2225 goto bad;
2226
2227 bzero( (void *)( (unsigned long) &px_sa + px_sa_offset), sizeof(px_sa) - px_sa_offset );
2228
2229 imgp->ip_px_sa = &px_sa;
2230 }
2231 if (px_args.file_actions_size != 0) {
2232 /* Limit file_actions to allowed number of open files */
2233 int maxfa = (p->p_limit ? p->p_rlimit[RLIMIT_NOFILE].rlim_cur : NOFILE);
2234 if (px_args.file_actions_size < PSF_ACTIONS_SIZE(1) ||
2235 px_args.file_actions_size > PSF_ACTIONS_SIZE(maxfa)) {
2236 error = EINVAL;
2237 goto bad;
2238 }
2239 MALLOC(px_sfap, _posix_spawn_file_actions_t, px_args.file_actions_size, M_TEMP, M_WAITOK);
2240 if (px_sfap == NULL) {
2241 error = ENOMEM;
2242 goto bad;
2243 }
2244 imgp->ip_px_sfa = px_sfap;
2245
2246 if ((error = copyin(px_args.file_actions, px_sfap,
2247 px_args.file_actions_size)) != 0)
2248 goto bad;
2249
2250 /* Verify that the action count matches the struct size */
2251 if (PSF_ACTIONS_SIZE(px_sfap->psfa_act_count) != px_args.file_actions_size) {
2252 error = EINVAL;
2253 goto bad;
2254 }
2255 }
2256 if (px_args.port_actions_size != 0) {
2257 /* Limit port_actions to one page of data */
2258 if (px_args.port_actions_size < PS_PORT_ACTIONS_SIZE(1) ||
2259 px_args.port_actions_size > PAGE_SIZE) {
2260 error = EINVAL;
2261 goto bad;
2262 }
2263
2264 MALLOC(px_spap, _posix_spawn_port_actions_t,
2265 px_args.port_actions_size, M_TEMP, M_WAITOK);
2266 if (px_spap == NULL) {
2267 error = ENOMEM;
2268 goto bad;
2269 }
2270 imgp->ip_px_spa = px_spap;
2271
2272 if ((error = copyin(px_args.port_actions, px_spap,
2273 px_args.port_actions_size)) != 0)
2274 goto bad;
2275
2276 /* Verify that the action count matches the struct size */
2277 if (PS_PORT_ACTIONS_SIZE(px_spap->pspa_count) != px_args.port_actions_size) {
2278 error = EINVAL;
2279 goto bad;
2280 }
2281 }
2282 #if CONFIG_PERSONAS
2283 /* copy in the persona info */
2284 if (px_args.persona_info_size != 0 && px_args.persona_info != 0) {
2285 /* for now, we need the exact same struct in user space */
2286 if (px_args.persona_info_size != sizeof(*px_persona)) {
2287 error = ERANGE;
2288 goto bad;
2289 }
2290
2291 MALLOC(px_persona, struct _posix_spawn_persona_info *, px_args.persona_info_size, M_TEMP, M_WAITOK|M_ZERO);
2292 if (px_persona == NULL) {
2293 error = ENOMEM;
2294 goto bad;
2295 }
2296 imgp->ip_px_persona = px_persona;
2297
2298 if ((error = copyin(px_args.persona_info, px_persona,
2299 px_args.persona_info_size)) != 0)
2300 goto bad;
2301 if ((error = spawn_validate_persona(px_persona)) != 0)
2302 goto bad;
2303 }
2304 #endif
2305 #if CONFIG_MACF
2306 if (px_args.mac_extensions_size != 0) {
2307 if ((error = spawn_copyin_macpolicyinfo(&px_args, (_posix_spawn_mac_policy_extensions_t *)&imgp->ip_px_smpx)) != 0)
2308 goto bad;
2309 }
2310 #endif /* CONFIG_MACF */
2311 }
2312
2313 /* set uthread to parent */
2314 uthread = get_bsdthread_info(current_thread());
2315
2316 /*
2317 * <rdar://6640530>; this does not result in a behaviour change
2318 * relative to Leopard, so there should not be any existing code
2319 * which depends on it.
2320 */
2321 if (uthread->uu_flag & UT_VFORK) {
2322 error = EINVAL;
2323 goto bad;
2324 }
2325
2326 /*
2327 * If we don't have the extension flag that turns "posix_spawn()"
2328 * into "execve() with options", then we will be creating a new
2329 * process which does not inherit memory from the parent process,
2330 * which is one of the most expensive things about using fork()
2331 * and execve().
2332 */
2333 if (imgp->ip_px_sa == NULL || !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)){
2334
2335 /* Set the new task's coalition, if it is requested. */
2336 coalition_t coal[COALITION_NUM_TYPES] = { COALITION_NULL };
2337 #if CONFIG_COALITIONS
2338 int i, ncoals;
2339 kern_return_t kr = KERN_SUCCESS;
2340 struct _posix_spawn_coalition_info coal_info;
2341 int coal_role[COALITION_NUM_TYPES];
2342
2343 if (imgp->ip_px_sa == NULL || !px_args.coal_info)
2344 goto do_fork1;
2345
2346 memset(&coal_info, 0, sizeof(coal_info));
2347
2348 if (px_args.coal_info_size > sizeof(coal_info))
2349 px_args.coal_info_size = sizeof(coal_info);
2350 error = copyin(px_args.coal_info,
2351 &coal_info, px_args.coal_info_size);
2352 if (error != 0)
2353 goto bad;
2354
2355 ncoals = 0;
2356 for (i = 0; i < COALITION_NUM_TYPES; i++) {
2357 uint64_t cid = coal_info.psci_info[i].psci_id;
2358 if (cid != 0) {
2359 /*
2360 * don't allow tasks which are not in a
2361 * privileged coalition to spawn processes
2362 * into coalitions other than their own
2363 */
2364 if (!task_is_in_privileged_coalition(p->task, i)) {
2365 coal_dbg("ERROR: %d not in privilegd "
2366 "coalition of type %d",
2367 p->p_pid, i);
2368 spawn_coalitions_release_all(coal);
2369 error = EPERM;
2370 goto bad;
2371 }
2372
2373 coal_dbg("searching for coalition id:%llu", cid);
2374 /*
2375 * take a reference and activation on the
2376 * coalition to guard against free-while-spawn
2377 * races
2378 */
2379 coal[i] = coalition_find_and_activate_by_id(cid);
2380 if (coal[i] == COALITION_NULL) {
2381 coal_dbg("could not find coalition id:%llu "
2382 "(perhaps it has been terminated or reaped)", cid);
2383 /*
2384 * release any other coalition's we
2385 * may have a reference to
2386 */
2387 spawn_coalitions_release_all(coal);
2388 error = ESRCH;
2389 goto bad;
2390 }
2391 if (coalition_type(coal[i]) != i) {
2392 coal_dbg("coalition with id:%lld is not of type:%d"
2393 " (it's type:%d)", cid, i, coalition_type(coal[i]));
2394 error = ESRCH;
2395 goto bad;
2396 }
2397 coal_role[i] = coal_info.psci_info[i].psci_role;
2398 ncoals++;
2399 }
2400 }
2401 if (ncoals < COALITION_NUM_TYPES) {
2402 /*
2403 * If the user is attempting to spawn into a subset of
2404 * the known coalition types, then make sure they have
2405 * _at_least_ specified a resource coalition. If not,
2406 * the following fork1() call will implicitly force an
2407 * inheritance from 'p' and won't actually spawn the
2408 * new task into the coalitions the user specified.
2409 * (also the call to coalitions_set_roles will panic)
2410 */
2411 if (coal[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
2412 spawn_coalitions_release_all(coal);
2413 error = EINVAL;
2414 goto bad;
2415 }
2416 }
2417 do_fork1:
2418 #endif /* CONFIG_COALITIONS */
2419
2420 /*
2421 * note that this will implicitly inherit the
2422 * caller's persona (if it exists)
2423 */
2424 error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN, coal);
2425 /* returns a thread and task reference */
2426
2427 if (error == 0) {
2428 new_task = get_threadtask(imgp->ip_new_thread);
2429 }
2430 #if CONFIG_COALITIONS
2431 /* set the roles of this task within each given coalition */
2432 if (error == 0) {
2433 kr = coalitions_set_roles(coal, get_threadtask(imgp->ip_new_thread), coal_role);
2434 if (kr != KERN_SUCCESS)
2435 error = EINVAL;
2436 }
2437
2438 /* drop our references and activations - fork1() now holds them */
2439 spawn_coalitions_release_all(coal);
2440 #endif /* CONFIG_COALITIONS */
2441 if (error != 0) {
2442 goto bad;
2443 }
2444 imgp->ip_flags |= IMGPF_SPAWN; /* spawn w/o exec */
2445 spawn_no_exec = TRUE; /* used in later tests */
2446
2447 #if CONFIG_PERSONAS
2448 /*
2449 * If the parent isn't in a persona (launchd), and
2450 * hasn't specified a new persona for the process,
2451 * then we'll put the process into the system persona
2452 *
2453 * TODO: this will have to be re-worked because as of
2454 * now, without any launchd adoption, the resulting
2455 * xpcproxy process will not have sufficient
2456 * privileges to setuid/gid.
2457 */
2458 #if 0
2459 if (!proc_has_persona(p) && imgp->ip_px_persona == NULL) {
2460 MALLOC(px_persona, struct _posix_spawn_persona_info *,
2461 sizeof(*px_persona), M_TEMP, M_WAITOK|M_ZERO);
2462 if (px_persona == NULL) {
2463 error = ENOMEM;
2464 goto bad;
2465 }
2466 px_persona->pspi_id = persona_get_id(g_system_persona);
2467 imgp->ip_px_persona = px_persona;
2468 }
2469 #endif /* 0 */
2470 #endif /* CONFIG_PERSONAS */
2471 } else {
2472 /*
2473 * For execve case, create a new task and thread
2474 * which points to current_proc. The current_proc will point
2475 * to the new task after image activation and proc ref drain.
2476 *
2477 * proc (current_proc) <----- old_task (current_task)
2478 * ^ | ^
2479 * | | |
2480 * | ----------------------------------
2481 * |
2482 * --------- new_task (task marked as TF_EXEC_COPY)
2483 *
2484 * After image activation, the proc will point to the new task
2485 * and would look like following.
2486 *
2487 * proc (current_proc) <----- old_task (current_task, marked as TPF_DID_EXEC)
2488 * ^ |
2489 * | |
2490 * | ----------> new_task
2491 * | |
2492 * -----------------
2493 *
2494 * During exec any transition from new_task -> proc is fine, but don't allow
2495 * transition from proc->task, since it will modify old_task.
2496 */
2497 imgp->ip_new_thread = fork_create_child(current_task(),
2498 NULL, p, FALSE, p->p_flag & P_LP64, TRUE);
2499 /* task and thread ref returned by fork_create_child */
2500 if (imgp->ip_new_thread == NULL) {
2501 error = ENOMEM;
2502 goto bad;
2503 }
2504
2505 new_task = get_threadtask(imgp->ip_new_thread);
2506 imgp->ip_flags |= IMGPF_EXEC;
2507 }
2508
2509 if (spawn_no_exec) {
2510 p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread);
2511
2512 /*
2513 * We had to wait until this point before firing the
2514 * proc:::create probe, otherwise p would not point to the
2515 * child process.
2516 */
2517 DTRACE_PROC1(create, proc_t, p);
2518 }
2519 assert(p != NULL);
2520
2521 context.vc_thread = imgp->ip_new_thread;
2522 context.vc_ucred = p->p_ucred; /* XXX must NOT be kauth_cred_get() */
2523
2524 /*
2525 * Post fdcopy(), pre exec_handle_sugid() - this is where we want
2526 * to handle the file_actions. Since vfork() also ends up setting
2527 * us into the parent process group, and saved off the signal flags,
2528 * this is also where we want to handle the spawn flags.
2529 */
2530
2531 /* Has spawn file actions? */
2532 if (imgp->ip_px_sfa != NULL) {
2533 /*
2534 * The POSIX_SPAWN_CLOEXEC_DEFAULT flag
2535 * is handled in exec_handle_file_actions().
2536 */
2537 if ((error = exec_handle_file_actions(imgp,
2538 imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0)) != 0)
2539 goto bad;
2540 }
2541
2542 /* Has spawn port actions? */
2543 if (imgp->ip_px_spa != NULL) {
2544 boolean_t is_adaptive = FALSE;
2545 boolean_t portwatch_present = FALSE;
2546
2547 /* Will this process become adaptive? The apptype isn't ready yet, so we can't look there. */
2548 if (imgp->ip_px_sa != NULL && px_sa.psa_apptype == POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE)
2549 is_adaptive = TRUE;
2550
2551 /*
2552 * portwatch only:
2553 * Allocate a place to store the ports we want to bind to the new task
2554 * We can't bind them until after the apptype is set.
2555 */
2556 if (px_spap->pspa_count != 0 && is_adaptive) {
2557 portwatch_count = px_spap->pspa_count;
2558 MALLOC(portwatch_ports, ipc_port_t *, (sizeof(ipc_port_t) * portwatch_count), M_TEMP, M_WAITOK | M_ZERO);
2559 } else {
2560 portwatch_ports = NULL;
2561 }
2562
2563 if ((error = exec_handle_port_actions(imgp, &portwatch_present, portwatch_ports)) != 0)
2564 goto bad;
2565
2566 if (portwatch_present == FALSE && portwatch_ports != NULL) {
2567 FREE(portwatch_ports, M_TEMP);
2568 portwatch_ports = NULL;
2569 portwatch_count = 0;
2570 }
2571 }
2572
2573 /* Has spawn attr? */
2574 if (imgp->ip_px_sa != NULL) {
2575 /*
2576 * Set the process group ID of the child process; this has
2577 * to happen before the image activation.
2578 */
2579 if (px_sa.psa_flags & POSIX_SPAWN_SETPGROUP) {
2580 struct setpgid_args spga;
2581 spga.pid = p->p_pid;
2582 spga.pgid = px_sa.psa_pgroup;
2583 /*
2584 * Effectively, call setpgid() system call; works
2585 * because there are no pointer arguments.
2586 */
2587 if((error = setpgid(p, &spga, ival)) != 0)
2588 goto bad;
2589 }
2590
2591 /*
2592 * Reset UID/GID to parent's RUID/RGID; This works only
2593 * because the operation occurs *after* the vfork() and
2594 * before the call to exec_handle_sugid() by the image
2595 * activator called from exec_activate_image(). POSIX
2596 * requires that any setuid/setgid bits on the process
2597 * image will take precedence over the spawn attributes
2598 * (re)setting them.
2599 *
2600 * Modifications to p_ucred must be guarded using the
2601 * proc's ucred lock. This prevents others from accessing
2602 * a garbage credential.
2603 */
2604 while (px_sa.psa_flags & POSIX_SPAWN_RESETIDS) {
2605 kauth_cred_t my_cred = kauth_cred_proc_ref(p);
2606 kauth_cred_t my_new_cred = kauth_cred_setuidgid(my_cred, kauth_cred_getruid(my_cred), kauth_cred_getrgid(my_cred));
2607
2608 if (my_cred == my_new_cred) {
2609 kauth_cred_unref(&my_cred);
2610 break;
2611 }
2612
2613 /* update cred on proc */
2614 proc_ucred_lock(p);
2615
2616 if (p->p_ucred != my_cred) {
2617 proc_ucred_unlock(p);
2618 kauth_cred_unref(&my_new_cred);
2619 continue;
2620 }
2621
2622 /* donate cred reference on my_new_cred to p->p_ucred */
2623 p->p_ucred = my_new_cred;
2624 PROC_UPDATE_CREDS_ONPROC(p);
2625 proc_ucred_unlock(p);
2626
2627 /* drop additional reference that was taken on the previous cred */
2628 kauth_cred_unref(&my_cred);
2629 }
2630
2631 #if CONFIG_PERSONAS
2632 if (spawn_no_exec && imgp->ip_px_persona != NULL) {
2633 /*
2634 * If we were asked to spawn a process into a new persona,
2635 * do the credential switch now (which may override the UID/GID
2636 * inherit done just above). It's important to do this switch
2637 * before image activation both for reasons stated above, and
2638 * to ensure that the new persona has access to the image/file
2639 * being executed.
2640 */
2641 error = spawn_persona_adopt(p, imgp->ip_px_persona);
2642 if (error != 0)
2643 goto bad;
2644 }
2645 #endif /* CONFIG_PERSONAS */
2646 #if !SECURE_KERNEL
2647 /*
2648 * Disable ASLR for the spawned process.
2649 *
2650 * But only do so if we are not embedded + RELEASE.
2651 * While embedded allows for a boot-arg (-disable_aslr)
2652 * to deal with this (which itself is only honored on
2653 * DEVELOPMENT or DEBUG builds of xnu), it is often
2654 * useful or necessary to disable ASLR on a per-process
2655 * basis for unit testing and debugging.
2656 */
2657 if (px_sa.psa_flags & _POSIX_SPAWN_DISABLE_ASLR)
2658 OSBitOrAtomic(P_DISABLE_ASLR, &p->p_flag);
2659 #endif /* !SECURE_KERNEL */
2660
2661 /*
2662 * Forcibly disallow execution from data pages for the spawned process
2663 * even if it would otherwise be permitted by the architecture default.
2664 */
2665 if (px_sa.psa_flags & _POSIX_SPAWN_ALLOW_DATA_EXEC)
2666 imgp->ip_flags |= IMGPF_ALLOW_DATA_EXEC;
2667 }
2668
2669 /*
2670 * Disable ASLR during image activation. This occurs either if the
2671 * _POSIX_SPAWN_DISABLE_ASLR attribute was found above or if
2672 * P_DISABLE_ASLR was inherited from the parent process.
2673 */
2674 if (p->p_flag & P_DISABLE_ASLR)
2675 imgp->ip_flags |= IMGPF_DISABLE_ASLR;
2676
2677 /*
2678 * Clear transition flag so we won't hang if exec_activate_image() causes
2679 * an automount (and launchd does a proc sysctl to service it).
2680 *
2681 * <rdar://problem/6848672>, <rdar://problem/5959568>.
2682 */
2683 if (spawn_no_exec) {
2684 proc_transend(p, 0);
2685 proc_transit_set = 0;
2686 }
2687
2688 #if MAC_SPAWN /* XXX */
2689 if (uap->mac_p != USER_ADDR_NULL) {
2690 error = mac_execve_enter(uap->mac_p, imgp);
2691 if (error)
2692 goto bad;
2693 }
2694 #endif
2695
2696 /*
2697 * Activate the image
2698 */
2699 error = exec_activate_image(imgp);
2700
2701 if (error == 0 && !spawn_no_exec) {
2702 p = proc_exec_switch_task(p, current_task(), new_task, imgp->ip_new_thread);
2703 /* proc ref returned */
2704 should_release_proc_ref = TRUE;
2705 }
2706
2707 if (error == 0) {
2708 /* process completed the exec */
2709 exec_done = TRUE;
2710 } else if (error == -1) {
2711 /* Image not claimed by any activator? */
2712 error = ENOEXEC;
2713 }
2714
2715 /*
2716 * If we have a spawn attr, and it contains signal related flags,
2717 * the we need to process them in the "context" of the new child
2718 * process, so we have to process it following image activation,
2719 * prior to making the thread runnable in user space. This is
2720 * necessitated by some signal information being per-thread rather
2721 * than per-process, and we don't have the new allocation in hand
2722 * until after the image is activated.
2723 */
2724 if (!error && imgp->ip_px_sa != NULL) {
2725 thread_t child_thread = imgp->ip_new_thread;
2726 uthread_t child_uthread = get_bsdthread_info(child_thread);
2727
2728 /*
2729 * Mask a list of signals, instead of them being unmasked, if
2730 * they were unmasked in the parent; note that some signals
2731 * are not maskable.
2732 */
2733 if (px_sa.psa_flags & POSIX_SPAWN_SETSIGMASK)
2734 child_uthread->uu_sigmask = (px_sa.psa_sigmask & ~sigcantmask);
2735 /*
2736 * Default a list of signals instead of ignoring them, if
2737 * they were ignored in the parent. Note that we pass
2738 * spawn_no_exec to setsigvec() to indicate that we called
2739 * fork1() and therefore do not need to call proc_signalstart()
2740 * internally.
2741 */
2742 if (px_sa.psa_flags & POSIX_SPAWN_SETSIGDEF) {
2743 vec.sa_handler = SIG_DFL;
2744 vec.sa_tramp = 0;
2745 vec.sa_mask = 0;
2746 vec.sa_flags = 0;
2747 for (sig = 1; sig < NSIG; sig++)
2748 if (px_sa.psa_sigdefault & (1 << (sig-1))) {
2749 error = setsigvec(p, child_thread, sig, &vec, spawn_no_exec);
2750 }
2751 }
2752
2753 /*
2754 * Activate the CPU usage monitor, if requested. This is done via a task-wide, per-thread CPU
2755 * usage limit, which will generate a resource exceeded exception if any one thread exceeds the
2756 * limit.
2757 *
2758 * Userland gives us interval in seconds, and the kernel SPI expects nanoseconds.
2759 */
2760 if (px_sa.psa_cpumonitor_percent != 0) {
2761 /*
2762 * Always treat a CPU monitor activation coming from spawn as entitled. Requiring
2763 * an entitlement to configure the monitor a certain way seems silly, since
2764 * whomever is turning it on could just as easily choose not to do so.
2765 */
2766 error = proc_set_task_ruse_cpu(p->task,
2767 TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC,
2768 px_sa.psa_cpumonitor_percent,
2769 px_sa.psa_cpumonitor_interval * NSEC_PER_SEC,
2770 0, TRUE);
2771 }
2772 }
2773
2774 bad:
2775
2776 if (error == 0) {
2777 /* reset delay idle sleep status if set */
2778 if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP)
2779 OSBitAndAtomic(~((uint32_t)P_DELAYIDLESLEEP), &p->p_flag);
2780 /* upon successful spawn, re/set the proc control state */
2781 if (imgp->ip_px_sa != NULL) {
2782 switch (px_sa.psa_pcontrol) {
2783 case POSIX_SPAWN_PCONTROL_THROTTLE:
2784 p->p_pcaction = P_PCTHROTTLE;
2785 break;
2786 case POSIX_SPAWN_PCONTROL_SUSPEND:
2787 p->p_pcaction = P_PCSUSP;
2788 break;
2789 case POSIX_SPAWN_PCONTROL_KILL:
2790 p->p_pcaction = P_PCKILL;
2791 break;
2792 case POSIX_SPAWN_PCONTROL_NONE:
2793 default:
2794 p->p_pcaction = 0;
2795 break;
2796 };
2797 }
2798 exec_resettextvp(p, imgp);
2799
2800 #if CONFIG_MEMORYSTATUS
2801 /* Has jetsam attributes? */
2802 if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_SET)) {
2803 /*
2804 * With 2-level high-water-mark support, POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is no
2805 * longer relevant, as background limits are described via the inactive limit slots.
2806 * At the kernel layer, the flag is ignored.
2807 *
2808 * That said, however, if the POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is passed in,
2809 * we attempt to mimic previous behavior by forcing the BG limit data into the
2810 * inactive/non-fatal mode and force the active slots to hold system_wide/fatal mode.
2811 * The kernel layer will flag this mapping.
2812 */
2813 if (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND) {
2814 memorystatus_update(p, px_sa.psa_priority, 0,
2815 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
2816 TRUE,
2817 -1, TRUE,
2818 px_sa.psa_memlimit_inactive, FALSE,
2819 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND));
2820 } else {
2821 memorystatus_update(p, px_sa.psa_priority, 0,
2822 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
2823 TRUE,
2824 px_sa.psa_memlimit_active,
2825 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_ACTIVE_FATAL),
2826 px_sa.psa_memlimit_inactive,
2827 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_INACTIVE_FATAL),
2828 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND));
2829 }
2830
2831 }
2832 #endif /* CONFIG_MEMORYSTATUS */
2833 }
2834
2835 /*
2836 * If we successfully called fork1(), we always need to do this;
2837 * we identify this case by noting the IMGPF_SPAWN flag. This is
2838 * because we come back from that call with signals blocked in the
2839 * child, and we have to unblock them, but we want to wait until
2840 * after we've performed any spawn actions. This has to happen
2841 * before check_for_signature(), which uses psignal.
2842 */
2843 if (spawn_no_exec) {
2844 if (proc_transit_set)
2845 proc_transend(p, 0);
2846
2847 /*
2848 * Drop the signal lock on the child which was taken on our
2849 * behalf by forkproc()/cloneproc() to prevent signals being
2850 * received by the child in a partially constructed state.
2851 */
2852 proc_signalend(p, 0);
2853
2854 /* flag the 'fork' has occurred */
2855 proc_knote(p->p_pptr, NOTE_FORK | p->p_pid);
2856 /* then flag exec has occurred */
2857 /* notify only if it has not failed due to FP Key error */
2858 if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)
2859 proc_knote(p, NOTE_EXEC);
2860 }
2861
2862 if (error == 0) {
2863 /*
2864 * We need to initialize the bank context behind the protection of
2865 * the proc_trans lock to prevent a race with exit. We can't do this during
2866 * exec_activate_image because task_bank_init checks entitlements that
2867 * aren't loaded until subsequent calls (including exec_resettextvp).
2868 */
2869 error = proc_transstart(p, 0, 0);
2870
2871 if (error == 0) {
2872 task_bank_init(get_threadtask(imgp->ip_new_thread));
2873 proc_transend(p, 0);
2874 }
2875 }
2876
2877
2878 /*
2879 * Apply the spawnattr policy, apptype (which primes the task for importance donation),
2880 * and bind any portwatch ports to the new task.
2881 * This must be done after the exec so that the child's thread is ready,
2882 * and after the in transit state has been released, because priority is
2883 * dropped here so we need to be prepared for a potentially long preemption interval
2884 *
2885 * TODO: Consider splitting this up into separate phases
2886 */
2887 if (error == 0 && imgp->ip_px_sa != NULL) {
2888 struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
2889
2890 exec_handle_spawnattr_policy(p, psa->psa_apptype, psa->psa_qos_clamp, psa->psa_darwin_role,
2891 portwatch_ports, portwatch_count);
2892 }
2893
2894 /*
2895 * Need to transfer pending watch port boosts to the new task while still making
2896 * sure that the old task remains in the importance linkage. Create an importance
2897 * linkage from old task to new task, then switch the task importance base
2898 * of old task and new task. After the switch the port watch boost will be
2899 * boosting the new task and new task will be donating importance to old task.
2900 */
2901 if (error == 0 && task_did_exec(current_task())) {
2902 inherit = ipc_importance_exec_switch_task(current_task(), get_threadtask(imgp->ip_new_thread));
2903 }
2904
2905 /* Apply the main thread qos */
2906 if (error == 0) {
2907 thread_t main_thread = imgp->ip_new_thread;
2908
2909 task_set_main_thread_qos(get_threadtask(imgp->ip_new_thread), main_thread);
2910 }
2911
2912 /*
2913 * Release any ports we kept around for binding to the new task
2914 * We need to release the rights even if the posix_spawn has failed.
2915 */
2916 if (portwatch_ports != NULL) {
2917 for (int i = 0; i < portwatch_count; i++) {
2918 ipc_port_t port = NULL;
2919 if ((port = portwatch_ports[i]) != NULL) {
2920 ipc_port_release_send(port);
2921 }
2922 }
2923 FREE(portwatch_ports, M_TEMP);
2924 portwatch_ports = NULL;
2925 portwatch_count = 0;
2926 }
2927
2928 /*
2929 * We have to delay operations which might throw a signal until after
2930 * the signals have been unblocked; however, we want that to happen
2931 * after exec_resettextvp() so that the textvp is correct when they
2932 * fire.
2933 */
2934 if (error == 0) {
2935 error = check_for_signature(p, imgp);
2936
2937 /*
2938 * Pay for our earlier safety; deliver the delayed signals from
2939 * the incomplete spawn process now that it's complete.
2940 */
2941 if (imgp != NULL && spawn_no_exec && (p->p_lflag & P_LTRACED)) {
2942 psignal_vfork(p, p->task, imgp->ip_new_thread, SIGTRAP);
2943 }
2944
2945 if (error == 0 && !spawn_no_exec)
2946 KDBG(BSDDBG_CODE(DBG_BSD_PROC,BSD_PROC_EXEC),
2947 p->p_pid);
2948 }
2949
2950
2951 if (imgp != NULL) {
2952 if (imgp->ip_vp)
2953 vnode_put(imgp->ip_vp);
2954 if (imgp->ip_scriptvp)
2955 vnode_put(imgp->ip_scriptvp);
2956 if (imgp->ip_strings)
2957 execargs_free(imgp);
2958 if (imgp->ip_px_sfa != NULL)
2959 FREE(imgp->ip_px_sfa, M_TEMP);
2960 if (imgp->ip_px_spa != NULL)
2961 FREE(imgp->ip_px_spa, M_TEMP);
2962 #if CONFIG_PERSONAS
2963 if (imgp->ip_px_persona != NULL)
2964 FREE(imgp->ip_px_persona, M_TEMP);
2965 #endif
2966 #if CONFIG_MACF
2967 if (imgp->ip_px_smpx != NULL)
2968 spawn_free_macpolicyinfo(imgp->ip_px_smpx);
2969 if (imgp->ip_execlabelp)
2970 mac_cred_label_free(imgp->ip_execlabelp);
2971 if (imgp->ip_scriptlabelp)
2972 mac_vnode_label_free(imgp->ip_scriptlabelp);
2973 if (imgp->ip_cs_error != OS_REASON_NULL) {
2974 os_reason_free(imgp->ip_cs_error);
2975 imgp->ip_cs_error = OS_REASON_NULL;
2976 }
2977 #endif
2978 }
2979
2980 #if CONFIG_DTRACE
2981 if (spawn_no_exec) {
2982 /*
2983 * In the original DTrace reference implementation,
2984 * posix_spawn() was a libc routine that just
2985 * did vfork(2) then exec(2). Thus the proc::: probes
2986 * are very fork/exec oriented. The details of this
2987 * in-kernel implementation of posix_spawn() is different
2988 * (while producing the same process-observable effects)
2989 * particularly w.r.t. errors, and which thread/process
2990 * is constructing what on behalf of whom.
2991 */
2992 if (error) {
2993 DTRACE_PROC1(spawn__failure, int, error);
2994 } else {
2995 DTRACE_PROC(spawn__success);
2996 /*
2997 * Some DTrace scripts, e.g. newproc.d in
2998 * /usr/bin, rely on the the 'exec-success'
2999 * probe being fired in the child after the
3000 * new process image has been constructed
3001 * in order to determine the associated pid.
3002 *
3003 * So, even though the parent built the image
3004 * here, for compatibility, mark the new thread
3005 * so 'exec-success' fires on it as it leaves
3006 * the kernel.
3007 */
3008 dtrace_thread_didexec(imgp->ip_new_thread);
3009 }
3010 } else {
3011 if (error) {
3012 DTRACE_PROC1(exec__failure, int, error);
3013 } else {
3014 DTRACE_PROC(exec__success);
3015 }
3016 }
3017
3018 if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) {
3019 (*dtrace_proc_waitfor_hook)(p);
3020 }
3021 #endif
3022 /*
3023 * exec-success dtrace probe fired, clear bsd_info from
3024 * old task if it did exec.
3025 */
3026 if (task_did_exec(current_task())) {
3027 set_bsdtask_info(current_task(), NULL);
3028 }
3029
3030 /* clear bsd_info from new task and terminate it if exec failed */
3031 if (new_task != NULL && task_is_exec_copy(new_task)) {
3032 set_bsdtask_info(new_task, NULL);
3033 task_terminate_internal(new_task);
3034 }
3035
3036 /* Return to both the parent and the child? */
3037 if (imgp != NULL && spawn_no_exec) {
3038 /*
3039 * If the parent wants the pid, copy it out
3040 */
3041 if (pid != USER_ADDR_NULL)
3042 (void)suword(pid, p->p_pid);
3043 retval[0] = error;
3044
3045 /*
3046 * If we had an error, perform an internal reap ; this is
3047 * entirely safe, as we have a real process backing us.
3048 */
3049 if (error) {
3050 proc_list_lock();
3051 p->p_listflag |= P_LIST_DEADPARENT;
3052 proc_list_unlock();
3053 proc_lock(p);
3054 /* make sure no one else has killed it off... */
3055 if (p->p_stat != SZOMB && p->exit_thread == NULL) {
3056 p->exit_thread = current_thread();
3057 proc_unlock(p);
3058 exit1(p, 1, (int *)NULL);
3059 } else {
3060 /* someone is doing it for us; just skip it */
3061 proc_unlock(p);
3062 }
3063 }
3064 }
3065
3066 /*
3067 * Do not terminate the current task, if proc_exec_switch_task did not
3068 * switch the tasks, terminating the current task without the switch would
3069 * result in loosing the SIGKILL status.
3070 */
3071 if (task_did_exec(current_task())) {
3072 /* Terminate the current task, since exec will start in new task */
3073 task_terminate_internal(current_task());
3074 }
3075
3076 /* Release the thread ref returned by fork_create_child/fork1 */
3077 if (imgp != NULL && imgp->ip_new_thread) {
3078 /* wake up the new thread */
3079 task_clear_return_wait(get_threadtask(imgp->ip_new_thread));
3080 thread_deallocate(imgp->ip_new_thread);
3081 imgp->ip_new_thread = NULL;
3082 }
3083
3084 /* Release the ref returned by fork_create_child/fork1 */
3085 if (new_task) {
3086 task_deallocate(new_task);
3087 new_task = NULL;
3088 }
3089
3090 if (should_release_proc_ref) {
3091 proc_rele(p);
3092 }
3093
3094 if (bufp != NULL) {
3095 FREE(bufp, M_TEMP);
3096 }
3097
3098 if (inherit != NULL) {
3099 ipc_importance_release(inherit);
3100 }
3101
3102 return(error);
3103 }
3104
3105 /*
3106 * proc_exec_switch_task
3107 *
3108 * Parameters: p proc
3109 * old_task task before exec
3110 * new_task task after exec
3111 * new_thread thread in new task
3112 *
3113 * Returns: proc.
3114 *
3115 * Note: The function will switch the task pointer of proc
3116 * from old task to new task. The switch needs to happen
3117 * after draining all proc refs and inside a proc translock.
3118 * In the case of failure to switch the task, which might happen
3119 * if the process received a SIGKILL or jetsam killed it, it will make
3120 * sure that the new tasks terminates. User proc ref returned
3121 * to caller.
3122 *
3123 * This function is called after point of no return, in the case
3124 * failure to switch, it will terminate the new task and swallow the
3125 * error and let the terminated process complete exec and die.
3126 */
3127 proc_t
3128 proc_exec_switch_task(proc_t p, task_t old_task, task_t new_task, thread_t new_thread)
3129 {
3130 int error = 0;
3131 boolean_t task_active;
3132 boolean_t proc_active;
3133 boolean_t thread_active;
3134 thread_t old_thread = current_thread();
3135
3136 /*
3137 * Switch the task pointer of proc to new task.
3138 * Before switching the task, wait for proc_refdrain.
3139 * After the switch happens, the proc can disappear,
3140 * take a ref before it disappears.
3141 */
3142 p = proc_refdrain_with_refwait(p, TRUE);
3143 /* extra proc ref returned to the caller */
3144
3145 assert(get_threadtask(new_thread) == new_task);
3146 task_active = task_is_active(new_task);
3147
3148 /* Take the proc_translock to change the task ptr */
3149 proc_lock(p);
3150 proc_active = !(p->p_lflag & P_LEXIT);
3151
3152 /* Check if the current thread is not aborted due to SIGKILL */
3153 thread_active = thread_is_active(old_thread);
3154
3155 /*
3156 * Do not switch the task if the new task or proc is already terminated
3157 * as a result of error in exec past point of no return
3158 */
3159 if (proc_active && task_active && thread_active) {
3160 error = proc_transstart(p, 1, 0);
3161 if (error == 0) {
3162 uthread_t new_uthread = get_bsdthread_info(new_thread);
3163 uthread_t old_uthread = get_bsdthread_info(current_thread());
3164
3165 /*
3166 * bsd_info of old_task will get cleared in execve and posix_spawn
3167 * after firing exec-success/error dtrace probe.
3168 */
3169 p->task = new_task;
3170
3171 /* Copy the signal state, dtrace state and set bsd ast on new thread */
3172 act_set_astbsd(new_thread);
3173 new_uthread->uu_siglist = old_uthread->uu_siglist;
3174 new_uthread->uu_sigwait = old_uthread->uu_sigwait;
3175 new_uthread->uu_sigmask = old_uthread->uu_sigmask;
3176 new_uthread->uu_oldmask = old_uthread->uu_oldmask;
3177 new_uthread->uu_vforkmask = old_uthread->uu_vforkmask;
3178 new_uthread->uu_exit_reason = old_uthread->uu_exit_reason;
3179 #if CONFIG_DTRACE
3180 new_uthread->t_dtrace_sig = old_uthread->t_dtrace_sig;
3181 new_uthread->t_dtrace_stop = old_uthread->t_dtrace_stop;
3182 new_uthread->t_dtrace_resumepid = old_uthread->t_dtrace_resumepid;
3183 assert(new_uthread->t_dtrace_scratch == NULL);
3184 new_uthread->t_dtrace_scratch = old_uthread->t_dtrace_scratch;
3185
3186 old_uthread->t_dtrace_sig = 0;
3187 old_uthread->t_dtrace_stop = 0;
3188 old_uthread->t_dtrace_resumepid = 0;
3189 old_uthread->t_dtrace_scratch = NULL;
3190 #endif
3191 /* Copy the resource accounting info */
3192 thread_copy_resource_info(new_thread, current_thread());
3193
3194 /* Clear the exit reason and signal state on old thread */
3195 old_uthread->uu_exit_reason = NULL;
3196 old_uthread->uu_siglist = 0;
3197
3198 /* Add the new uthread to proc uthlist and remove the old one */
3199 TAILQ_INSERT_TAIL(&p->p_uthlist, new_uthread, uu_list);
3200 TAILQ_REMOVE(&p->p_uthlist, old_uthread, uu_list);
3201
3202 task_set_did_exec_flag(old_task);
3203 task_clear_exec_copy_flag(new_task);
3204
3205 proc_transend(p, 1);
3206 }
3207 }
3208
3209 proc_unlock(p);
3210 proc_refwake(p);
3211
3212 if (error != 0 || !task_active || !proc_active || !thread_active) {
3213 task_terminate_internal(new_task);
3214 }
3215
3216 return p;
3217 }
3218
3219 /*
3220 * execve
3221 *
3222 * Parameters: uap->fname File name to exec
3223 * uap->argp Argument list
3224 * uap->envp Environment list
3225 *
3226 * Returns: 0 Success
3227 * __mac_execve:EINVAL Invalid argument
3228 * __mac_execve:ENOTSUP Invalid argument
3229 * __mac_execve:EACCES Permission denied
3230 * __mac_execve:EINTR Interrupted function
3231 * __mac_execve:ENOMEM Not enough space
3232 * __mac_execve:EFAULT Bad address
3233 * __mac_execve:ENAMETOOLONG Filename too long
3234 * __mac_execve:ENOEXEC Executable file format error
3235 * __mac_execve:ETXTBSY Text file busy [misuse of error code]
3236 * __mac_execve:???
3237 *
3238 * TODO: Dynamic linker header address on stack is copied via suword()
3239 */
3240 /* ARGSUSED */
3241 int
3242 execve(proc_t p, struct execve_args *uap, int32_t *retval)
3243 {
3244 struct __mac_execve_args muap;
3245 int err;
3246
3247 memoryshot(VM_EXECVE, DBG_FUNC_NONE);
3248
3249 muap.fname = uap->fname;
3250 muap.argp = uap->argp;
3251 muap.envp = uap->envp;
3252 muap.mac_p = USER_ADDR_NULL;
3253 err = __mac_execve(p, &muap, retval);
3254
3255 return(err);
3256 }
3257
3258 /*
3259 * __mac_execve
3260 *
3261 * Parameters: uap->fname File name to exec
3262 * uap->argp Argument list
3263 * uap->envp Environment list
3264 * uap->mac_p MAC label supplied by caller
3265 *
3266 * Returns: 0 Success
3267 * EINVAL Invalid argument
3268 * ENOTSUP Not supported
3269 * ENOEXEC Executable file format error
3270 * exec_activate_image:EINVAL Invalid argument
3271 * exec_activate_image:EACCES Permission denied
3272 * exec_activate_image:EINTR Interrupted function
3273 * exec_activate_image:ENOMEM Not enough space
3274 * exec_activate_image:EFAULT Bad address
3275 * exec_activate_image:ENAMETOOLONG Filename too long
3276 * exec_activate_image:ENOEXEC Executable file format error
3277 * exec_activate_image:ETXTBSY Text file busy [misuse of error code]
3278 * exec_activate_image:EBADEXEC The executable is corrupt/unknown
3279 * exec_activate_image:???
3280 * mac_execve_enter:???
3281 *
3282 * TODO: Dynamic linker header address on stack is copied via suword()
3283 */
3284 int
3285 __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval)
3286 {
3287 char *bufp = NULL;
3288 struct image_params *imgp;
3289 struct vnode_attr *vap;
3290 struct vnode_attr *origvap;
3291 int error;
3292 int is_64 = IS_64BIT_PROCESS(p);
3293 struct vfs_context context;
3294 struct uthread *uthread;
3295 task_t new_task = NULL;
3296 boolean_t should_release_proc_ref = FALSE;
3297 boolean_t exec_done = FALSE;
3298 boolean_t in_vfexec = FALSE;
3299 void *inherit = NULL;
3300
3301 context.vc_thread = current_thread();
3302 context.vc_ucred = kauth_cred_proc_ref(p); /* XXX must NOT be kauth_cred_get() */
3303
3304 /* Allocate a big chunk for locals instead of using stack since these
3305 * structures a pretty big.
3306 */
3307 MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
3308 imgp = (struct image_params *) bufp;
3309 if (bufp == NULL) {
3310 error = ENOMEM;
3311 goto exit_with_error;
3312 }
3313 vap = (struct vnode_attr *) (bufp + sizeof(*imgp));
3314 origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap));
3315
3316 /* Initialize the common data in the image_params structure */
3317 imgp->ip_user_fname = uap->fname;
3318 imgp->ip_user_argv = uap->argp;
3319 imgp->ip_user_envv = uap->envp;
3320 imgp->ip_vattr = vap;
3321 imgp->ip_origvattr = origvap;
3322 imgp->ip_vfs_context = &context;
3323 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE) | ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE);
3324 imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
3325 imgp->ip_mac_return = 0;
3326 imgp->ip_cs_error = OS_REASON_NULL;
3327
3328 uthread = get_bsdthread_info(current_thread());
3329 if (uthread->uu_flag & UT_VFORK) {
3330 imgp->ip_flags |= IMGPF_VFORK_EXEC;
3331 in_vfexec = TRUE;
3332 } else {
3333 imgp->ip_flags |= IMGPF_EXEC;
3334
3335 /*
3336 * For execve case, create a new task and thread
3337 * which points to current_proc. The current_proc will point
3338 * to the new task after image activation and proc ref drain.
3339 *
3340 * proc (current_proc) <----- old_task (current_task)
3341 * ^ | ^
3342 * | | |
3343 * | ----------------------------------
3344 * |
3345 * --------- new_task (task marked as TF_EXEC_COPY)
3346 *
3347 * After image activation, the proc will point to the new task
3348 * and would look like following.
3349 *
3350 * proc (current_proc) <----- old_task (current_task, marked as TPF_DID_EXEC)
3351 * ^ |
3352 * | |
3353 * | ----------> new_task
3354 * | |
3355 * -----------------
3356 *
3357 * During exec any transition from new_task -> proc is fine, but don't allow
3358 * transition from proc->task, since it will modify old_task.
3359 */
3360 imgp->ip_new_thread = fork_create_child(current_task(),
3361 NULL, p, FALSE, p->p_flag & P_LP64, TRUE);
3362 /* task and thread ref returned by fork_create_child */
3363 if (imgp->ip_new_thread == NULL) {
3364 error = ENOMEM;
3365 goto exit_with_error;
3366 }
3367
3368 new_task = get_threadtask(imgp->ip_new_thread);
3369 context.vc_thread = imgp->ip_new_thread;
3370 }
3371
3372 #if CONFIG_MACF
3373 if (uap->mac_p != USER_ADDR_NULL) {
3374 error = mac_execve_enter(uap->mac_p, imgp);
3375 if (error) {
3376 kauth_cred_unref(&context.vc_ucred);
3377 goto exit_with_error;
3378 }
3379 }
3380 #endif
3381
3382 error = exec_activate_image(imgp);
3383 /* thread and task ref returned for vfexec case */
3384
3385 if (imgp->ip_new_thread != NULL) {
3386 /*
3387 * task reference might be returned by exec_activate_image
3388 * for vfexec.
3389 */
3390 new_task = get_threadtask(imgp->ip_new_thread);
3391 }
3392
3393 if (!error && !in_vfexec) {
3394 p = proc_exec_switch_task(p, current_task(), new_task, imgp->ip_new_thread);
3395 /* proc ref returned */
3396 should_release_proc_ref = TRUE;
3397 }
3398
3399 kauth_cred_unref(&context.vc_ucred);
3400
3401 /* Image not claimed by any activator? */
3402 if (error == -1)
3403 error = ENOEXEC;
3404
3405 if (!error) {
3406 exec_done = TRUE;
3407 assert(imgp->ip_new_thread != NULL);
3408
3409 exec_resettextvp(p, imgp);
3410 error = check_for_signature(p, imgp);
3411 }
3412 if (imgp->ip_vp != NULLVP)
3413 vnode_put(imgp->ip_vp);
3414 if (imgp->ip_scriptvp != NULLVP)
3415 vnode_put(imgp->ip_scriptvp);
3416 if (imgp->ip_strings)
3417 execargs_free(imgp);
3418 #if CONFIG_MACF
3419 if (imgp->ip_execlabelp)
3420 mac_cred_label_free(imgp->ip_execlabelp);
3421 if (imgp->ip_scriptlabelp)
3422 mac_vnode_label_free(imgp->ip_scriptlabelp);
3423 #endif
3424 if (imgp->ip_cs_error != OS_REASON_NULL) {
3425 os_reason_free(imgp->ip_cs_error);
3426 imgp->ip_cs_error = OS_REASON_NULL;
3427 }
3428
3429 if (!error) {
3430 /*
3431 * We need to initialize the bank context behind the protection of
3432 * the proc_trans lock to prevent a race with exit. We can't do this during
3433 * exec_activate_image because task_bank_init checks entitlements that
3434 * aren't loaded until subsequent calls (including exec_resettextvp).
3435 */
3436 error = proc_transstart(p, 0, 0);
3437 }
3438
3439 if (!error) {
3440 task_bank_init(get_threadtask(imgp->ip_new_thread));
3441 proc_transend(p, 0);
3442
3443 /* Sever any extant thread affinity */
3444 thread_affinity_exec(current_thread());
3445
3446 thread_t main_thread = imgp->ip_new_thread;
3447
3448 task_set_main_thread_qos(new_task, main_thread);
3449
3450 DTRACE_PROC(exec__success);
3451
3452 #if CONFIG_DTRACE
3453 if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL)
3454 (*dtrace_proc_waitfor_hook)(p);
3455 #endif
3456
3457 if (in_vfexec) {
3458 vfork_return(p, retval, p->p_pid);
3459 }
3460 } else {
3461 DTRACE_PROC1(exec__failure, int, error);
3462 }
3463
3464 exit_with_error:
3465
3466 /*
3467 * exec-success dtrace probe fired, clear bsd_info from
3468 * old task if it did exec.
3469 */
3470 if (task_did_exec(current_task())) {
3471 set_bsdtask_info(current_task(), NULL);
3472 }
3473
3474 /* clear bsd_info from new task and terminate it if exec failed */
3475 if (new_task != NULL && task_is_exec_copy(new_task)) {
3476 set_bsdtask_info(new_task, NULL);
3477 task_terminate_internal(new_task);
3478 }
3479
3480 /*
3481 * Need to transfer pending watch port boosts to the new task while still making
3482 * sure that the old task remains in the importance linkage. Create an importance
3483 * linkage from old task to new task, then switch the task importance base
3484 * of old task and new task. After the switch the port watch boost will be
3485 * boosting the new task and new task will be donating importance to old task.
3486 */
3487 if (error == 0 && task_did_exec(current_task())) {
3488 inherit = ipc_importance_exec_switch_task(current_task(), get_threadtask(imgp->ip_new_thread));
3489 }
3490
3491 if (imgp != NULL) {
3492 /*
3493 * Do not terminate the current task, if proc_exec_switch_task did not
3494 * switch the tasks, terminating the current task without the switch would
3495 * result in loosing the SIGKILL status.
3496 */
3497 if (task_did_exec(current_task())) {
3498 /* Terminate the current task, since exec will start in new task */
3499 task_terminate_internal(current_task());
3500 }
3501
3502 /* Release the thread ref returned by fork_create_child */
3503 if (imgp->ip_new_thread) {
3504 /* wake up the new exec thread */
3505 task_clear_return_wait(get_threadtask(imgp->ip_new_thread));
3506 thread_deallocate(imgp->ip_new_thread);
3507 imgp->ip_new_thread = NULL;
3508 }
3509 }
3510
3511 /* Release the ref returned by fork_create_child */
3512 if (new_task) {
3513 task_deallocate(new_task);
3514 new_task = NULL;
3515 }
3516
3517 if (should_release_proc_ref) {
3518 proc_rele(p);
3519 }
3520
3521 if (bufp != NULL) {
3522 FREE(bufp, M_TEMP);
3523 }
3524
3525 if (inherit != NULL) {
3526 ipc_importance_release(inherit);
3527 }
3528
3529 return(error);
3530 }
3531
3532
3533 /*
3534 * copyinptr
3535 *
3536 * Description: Copy a pointer in from user space to a user_addr_t in kernel
3537 * space, based on 32/64 bitness of the user space
3538 *
3539 * Parameters: froma User space address
3540 * toptr Address of kernel space user_addr_t
3541 * ptr_size 4/8, based on 'froma' address space
3542 *
3543 * Returns: 0 Success
3544 * EFAULT Bad 'froma'
3545 *
3546 * Implicit returns:
3547 * *ptr_size Modified
3548 */
3549 static int
3550 copyinptr(user_addr_t froma, user_addr_t *toptr, int ptr_size)
3551 {
3552 int error;
3553
3554 if (ptr_size == 4) {
3555 /* 64 bit value containing 32 bit address */
3556 unsigned int i;
3557
3558 error = copyin(froma, &i, 4);
3559 *toptr = CAST_USER_ADDR_T(i); /* SAFE */
3560 } else {
3561 error = copyin(froma, toptr, 8);
3562 }
3563 return (error);
3564 }
3565
3566
3567 /*
3568 * copyoutptr
3569 *
3570 * Description: Copy a pointer out from a user_addr_t in kernel space to
3571 * user space, based on 32/64 bitness of the user space
3572 *
3573 * Parameters: ua User space address to copy to
3574 * ptr Address of kernel space user_addr_t
3575 * ptr_size 4/8, based on 'ua' address space
3576 *
3577 * Returns: 0 Success
3578 * EFAULT Bad 'ua'
3579 *
3580 */
3581 static int
3582 copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size)
3583 {
3584 int error;
3585
3586 if (ptr_size == 4) {
3587 /* 64 bit value containing 32 bit address */
3588 unsigned int i = CAST_DOWN_EXPLICIT(unsigned int,ua); /* SAFE */
3589
3590 error = copyout(&i, ptr, 4);
3591 } else {
3592 error = copyout(&ua, ptr, 8);
3593 }
3594 return (error);
3595 }
3596
3597
3598 /*
3599 * exec_copyout_strings
3600 *
3601 * Copy out the strings segment to user space. The strings segment is put
3602 * on a preinitialized stack frame.
3603 *
3604 * Parameters: struct image_params * the image parameter block
3605 * int * a pointer to the stack offset variable
3606 *
3607 * Returns: 0 Success
3608 * !0 Faiure: errno
3609 *
3610 * Implicit returns:
3611 * (*stackp) The stack offset, modified
3612 *
3613 * Note: The strings segment layout is backward, from the beginning
3614 * of the top of the stack to consume the minimal amount of
3615 * space possible; the returned stack pointer points to the
3616 * end of the area consumed (stacks grow downward).
3617 *
3618 * argc is an int; arg[i] are pointers; env[i] are pointers;
3619 * the 0's are (void *)NULL's
3620 *
3621 * The stack frame layout is:
3622 *
3623 * +-------------+ <- p->user_stack
3624 * | 16b |
3625 * +-------------+
3626 * | STRING AREA |
3627 * | : |
3628 * | : |
3629 * | : |
3630 * +- -- -- -- --+
3631 * | PATH AREA |
3632 * +-------------+
3633 * | 0 |
3634 * +-------------+
3635 * | applev[n] |
3636 * +-------------+
3637 * :
3638 * :
3639 * +-------------+
3640 * | applev[1] |
3641 * +-------------+
3642 * | exec_path / |
3643 * | applev[0] |
3644 * +-------------+
3645 * | 0 |
3646 * +-------------+
3647 * | env[n] |
3648 * +-------------+
3649 * :
3650 * :
3651 * +-------------+
3652 * | env[0] |
3653 * +-------------+
3654 * | 0 |
3655 * +-------------+
3656 * | arg[argc-1] |
3657 * +-------------+
3658 * :
3659 * :
3660 * +-------------+
3661 * | arg[0] |
3662 * +-------------+
3663 * | argc |
3664 * sp-> +-------------+
3665 *
3666 * Although technically a part of the STRING AREA, we treat the PATH AREA as
3667 * a separate entity. This allows us to align the beginning of the PATH AREA
3668 * to a pointer boundary so that the exec_path, env[i], and argv[i] pointers
3669 * which preceed it on the stack are properly aligned.
3670 */
3671
3672 static int
3673 exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp)
3674 {
3675 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
3676 int ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
3677 int ptr_area_size;
3678 void *ptr_buffer_start, *ptr_buffer;
3679 int string_size;
3680
3681 user_addr_t string_area; /* *argv[], *env[] */
3682 user_addr_t ptr_area; /* argv[], env[], applev[] */
3683 user_addr_t argc_area; /* argc */
3684 user_addr_t stack;
3685 int error;
3686
3687 unsigned i;
3688 struct copyout_desc {
3689 char *start_string;
3690 int count;
3691 #if CONFIG_DTRACE
3692 user_addr_t *dtrace_cookie;
3693 #endif
3694 boolean_t null_term;
3695 } descriptors[] = {
3696 {
3697 .start_string = imgp->ip_startargv,
3698 .count = imgp->ip_argc,
3699 #if CONFIG_DTRACE
3700 .dtrace_cookie = &p->p_dtrace_argv,
3701 #endif
3702 .null_term = TRUE
3703 },
3704 {
3705 .start_string = imgp->ip_endargv,
3706 .count = imgp->ip_envc,
3707 #if CONFIG_DTRACE
3708 .dtrace_cookie = &p->p_dtrace_envp,
3709 #endif
3710 .null_term = TRUE
3711 },
3712 {
3713 .start_string = imgp->ip_strings,
3714 .count = 1,
3715 #if CONFIG_DTRACE
3716 .dtrace_cookie = NULL,
3717 #endif
3718 .null_term = FALSE
3719 },
3720 {
3721 .start_string = imgp->ip_endenvv,
3722 .count = imgp->ip_applec - 1, /* exec_path handled above */
3723 #if CONFIG_DTRACE
3724 .dtrace_cookie = NULL,
3725 #endif
3726 .null_term = TRUE
3727 }
3728 };
3729
3730 stack = *stackp;
3731
3732 /*
3733 * All previous contributors to the string area
3734 * should have aligned their sub-area
3735 */
3736 if (imgp->ip_strspace % ptr_size != 0) {
3737 error = EINVAL;
3738 goto bad;
3739 }
3740
3741 /* Grow the stack down for the strings we've been building up */
3742 string_size = imgp->ip_strendp - imgp->ip_strings;
3743 stack -= string_size;
3744 string_area = stack;
3745
3746 /*
3747 * Need room for one pointer for each string, plus
3748 * one for the NULLs terminating the argv, envv, and apple areas.
3749 */
3750 ptr_area_size = (imgp->ip_argc + imgp->ip_envc + imgp->ip_applec + 3) *
3751 ptr_size;
3752 stack -= ptr_area_size;
3753 ptr_area = stack;
3754
3755 /* We'll construct all the pointer arrays in our string buffer,
3756 * which we already know is aligned properly, and ip_argspace
3757 * was used to verify we have enough space.
3758 */
3759 ptr_buffer_start = ptr_buffer = (void *)imgp->ip_strendp;
3760
3761 /*
3762 * Need room for pointer-aligned argc slot.
3763 */
3764 stack -= ptr_size;
3765 argc_area = stack;
3766
3767 /*
3768 * Record the size of the arguments area so that sysctl_procargs()
3769 * can return the argument area without having to parse the arguments.
3770 */
3771 proc_lock(p);
3772 p->p_argc = imgp->ip_argc;
3773 p->p_argslen = (int)(*stackp - string_area);
3774 proc_unlock(p);
3775
3776 /* Return the initial stack address: the location of argc */
3777 *stackp = stack;
3778
3779 /*
3780 * Copy out the entire strings area.
3781 */
3782 error = copyout(imgp->ip_strings, string_area,
3783 string_size);
3784 if (error)
3785 goto bad;
3786
3787 for (i = 0; i < sizeof(descriptors)/sizeof(descriptors[0]); i++) {
3788 char *cur_string = descriptors[i].start_string;
3789 int j;
3790
3791 #if CONFIG_DTRACE
3792 if (descriptors[i].dtrace_cookie) {
3793 proc_lock(p);
3794 *descriptors[i].dtrace_cookie = ptr_area + ((uintptr_t)ptr_buffer - (uintptr_t)ptr_buffer_start); /* dtrace convenience */
3795 proc_unlock(p);
3796 }
3797 #endif /* CONFIG_DTRACE */
3798
3799 /*
3800 * For each segment (argv, envv, applev), copy as many pointers as requested
3801 * to our pointer buffer.
3802 */
3803 for (j = 0; j < descriptors[i].count; j++) {
3804 user_addr_t cur_address = string_area + (cur_string - imgp->ip_strings);
3805
3806 /* Copy out the pointer to the current string. Alignment has been verified */
3807 if (ptr_size == 8) {
3808 *(uint64_t *)ptr_buffer = (uint64_t)cur_address;
3809 } else {
3810 *(uint32_t *)ptr_buffer = (uint32_t)cur_address;
3811 }
3812
3813 ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
3814 cur_string += strlen(cur_string) + 1; /* Only a NUL between strings in the same area */
3815 }
3816
3817 if (descriptors[i].null_term) {
3818 if (ptr_size == 8) {
3819 *(uint64_t *)ptr_buffer = 0ULL;
3820 } else {
3821 *(uint32_t *)ptr_buffer = 0;
3822 }
3823
3824 ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
3825 }
3826 }
3827
3828 /*
3829 * Copy out all our pointer arrays in bulk.
3830 */
3831 error = copyout(ptr_buffer_start, ptr_area,
3832 ptr_area_size);
3833 if (error)
3834 goto bad;
3835
3836 /* argc (int32, stored in a ptr_size area) */
3837 error = copyoutptr((user_addr_t)imgp->ip_argc, argc_area, ptr_size);
3838 if (error)
3839 goto bad;
3840
3841 bad:
3842 return(error);
3843 }
3844
3845
3846 /*
3847 * exec_extract_strings
3848 *
3849 * Copy arguments and environment from user space into work area; we may
3850 * have already copied some early arguments into the work area, and if
3851 * so, any arguments opied in are appended to those already there.
3852 * This function is the primary manipulator of ip_argspace, since
3853 * these are the arguments the client of execve(2) knows about. After
3854 * each argv[]/envv[] string is copied, we charge the string length
3855 * and argv[]/envv[] pointer slot to ip_argspace, so that we can
3856 * full preflight the arg list size.
3857 *
3858 * Parameters: struct image_params * the image parameter block
3859 *
3860 * Returns: 0 Success
3861 * !0 Failure: errno
3862 *
3863 * Implicit returns;
3864 * (imgp->ip_argc) Count of arguments, updated
3865 * (imgp->ip_envc) Count of environment strings, updated
3866 * (imgp->ip_argspace) Count of remaining of NCARGS
3867 * (imgp->ip_interp_buffer) Interpreter and args (mutated in place)
3868 *
3869 *
3870 * Note: The argument and environment vectors are user space pointers
3871 * to arrays of user space pointers.
3872 */
3873 static int
3874 exec_extract_strings(struct image_params *imgp)
3875 {
3876 int error = 0;
3877 int ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT) ? 8 : 4;
3878 int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
3879 user_addr_t argv = imgp->ip_user_argv;
3880 user_addr_t envv = imgp->ip_user_envv;
3881
3882 /*
3883 * Adjust space reserved for the path name by however much padding it
3884 * needs. Doing this here since we didn't know if this would be a 32-
3885 * or 64-bit process back in exec_save_path.
3886 */
3887 while (imgp->ip_strspace % new_ptr_size != 0) {
3888 *imgp->ip_strendp++ = '\0';
3889 imgp->ip_strspace--;
3890 /* imgp->ip_argspace--; not counted towards exec args total */
3891 }
3892
3893 /*
3894 * From now on, we start attributing string space to ip_argspace
3895 */
3896 imgp->ip_startargv = imgp->ip_strendp;
3897 imgp->ip_argc = 0;
3898
3899 if((imgp->ip_flags & IMGPF_INTERPRET) != 0) {
3900 user_addr_t arg;
3901 char *argstart, *ch;
3902
3903 /* First, the arguments in the "#!" string are tokenized and extracted. */
3904 argstart = imgp->ip_interp_buffer;
3905 while (argstart) {
3906 ch = argstart;
3907 while (*ch && !IS_WHITESPACE(*ch)) {
3908 ch++;
3909 }
3910
3911 if (*ch == '\0') {
3912 /* last argument, no need to NUL-terminate */
3913 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
3914 argstart = NULL;
3915 } else {
3916 /* NUL-terminate */
3917 *ch = '\0';
3918 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
3919
3920 /*
3921 * Find the next string. We know spaces at the end of the string have already
3922 * been stripped.
3923 */
3924 argstart = ch + 1;
3925 while (IS_WHITESPACE(*argstart)) {
3926 argstart++;
3927 }
3928 }
3929
3930 /* Error-check, regardless of whether this is the last interpreter arg or not */
3931 if (error)
3932 goto bad;
3933 if (imgp->ip_argspace < new_ptr_size) {
3934 error = E2BIG;
3935 goto bad;
3936 }
3937 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
3938 imgp->ip_argc++;
3939 }
3940
3941 if (argv != 0LL) {
3942 /*
3943 * If we are running an interpreter, replace the av[0] that was
3944 * passed to execve() with the path name that was
3945 * passed to execve() for interpreters which do not use the PATH
3946 * to locate their script arguments.
3947 */
3948 error = copyinptr(argv, &arg, ptr_size);
3949 if (error)
3950 goto bad;
3951 if (arg != 0LL) {
3952 argv += ptr_size; /* consume without using */
3953 }
3954 }
3955
3956 if (imgp->ip_interp_sugid_fd != -1) {
3957 char temp[19]; /* "/dev/fd/" + 10 digits + NUL */
3958 snprintf(temp, sizeof(temp), "/dev/fd/%d", imgp->ip_interp_sugid_fd);
3959 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(temp), UIO_SYSSPACE, TRUE);
3960 } else {
3961 error = exec_add_user_string(imgp, imgp->ip_user_fname, imgp->ip_seg, TRUE);
3962 }
3963
3964 if (error)
3965 goto bad;
3966 if (imgp->ip_argspace < new_ptr_size) {
3967 error = E2BIG;
3968 goto bad;
3969 }
3970 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
3971 imgp->ip_argc++;
3972 }
3973
3974 while (argv != 0LL) {
3975 user_addr_t arg;
3976
3977 error = copyinptr(argv, &arg, ptr_size);
3978 if (error)
3979 goto bad;
3980
3981 if (arg == 0LL) {
3982 break;
3983 }
3984
3985 argv += ptr_size;
3986
3987 /*
3988 * av[n...] = arg[n]
3989 */
3990 error = exec_add_user_string(imgp, arg, imgp->ip_seg, TRUE);
3991 if (error)
3992 goto bad;
3993 if (imgp->ip_argspace < new_ptr_size) {
3994 error = E2BIG;
3995 goto bad;
3996 }
3997 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
3998 imgp->ip_argc++;
3999 }
4000
4001 /* Save space for argv[] NULL terminator */
4002 if (imgp->ip_argspace < new_ptr_size) {
4003 error = E2BIG;
4004 goto bad;
4005 }
4006 imgp->ip_argspace -= new_ptr_size;
4007
4008 /* Note where the args ends and env begins. */
4009 imgp->ip_endargv = imgp->ip_strendp;
4010 imgp->ip_envc = 0;
4011
4012 /* Now, get the environment */
4013 while (envv != 0LL) {
4014 user_addr_t env;
4015
4016 error = copyinptr(envv, &env, ptr_size);
4017 if (error)
4018 goto bad;
4019
4020 envv += ptr_size;
4021 if (env == 0LL) {
4022 break;
4023 }
4024 /*
4025 * av[n...] = env[n]
4026 */
4027 error = exec_add_user_string(imgp, env, imgp->ip_seg, TRUE);
4028 if (error)
4029 goto bad;
4030 if (imgp->ip_argspace < new_ptr_size) {
4031 error = E2BIG;
4032 goto bad;
4033 }
4034 imgp->ip_argspace -= new_ptr_size; /* to hold envv[] entry */
4035 imgp->ip_envc++;
4036 }
4037
4038 /* Save space for envv[] NULL terminator */
4039 if (imgp->ip_argspace < new_ptr_size) {
4040 error = E2BIG;
4041 goto bad;
4042 }
4043 imgp->ip_argspace -= new_ptr_size;
4044
4045 /* Align the tail of the combined argv+envv area */
4046 while (imgp->ip_strspace % new_ptr_size != 0) {
4047 if (imgp->ip_argspace < 1) {
4048 error = E2BIG;
4049 goto bad;
4050 }
4051 *imgp->ip_strendp++ = '\0';
4052 imgp->ip_strspace--;
4053 imgp->ip_argspace--;
4054 }
4055
4056 /* Note where the envv ends and applev begins. */
4057 imgp->ip_endenvv = imgp->ip_strendp;
4058
4059 /*
4060 * From now on, we are no longer charging argument
4061 * space to ip_argspace.
4062 */
4063
4064 bad:
4065 return error;
4066 }
4067
4068 /*
4069 * Libc has an 8-element array set up for stack guard values. It only fills
4070 * in one of those entries, and both gcc and llvm seem to use only a single
4071 * 8-byte guard. Until somebody needs more than an 8-byte guard value, don't
4072 * do the work to construct them.
4073 */
4074 #define GUARD_VALUES 1
4075 #define GUARD_KEY "stack_guard="
4076
4077 /*
4078 * System malloc needs some entropy when it is initialized.
4079 */
4080 #define ENTROPY_VALUES 2
4081 #define ENTROPY_KEY "malloc_entropy="
4082
4083 /*
4084 * System malloc engages nanozone for UIAPP.
4085 */
4086 #define NANO_ENGAGE_KEY "MallocNanoZone=1"
4087
4088 #define PFZ_KEY "pfz="
4089 extern user32_addr_t commpage_text32_location;
4090 extern user64_addr_t commpage_text64_location;
4091
4092 #define MAIN_STACK_VALUES 4
4093 #define MAIN_STACK_KEY "main_stack="
4094
4095 #define HEX_STR_LEN 18 // 64-bit hex value "0x0123456701234567"
4096
4097 static int
4098 exec_add_entropy_key(struct image_params *imgp,
4099 const char *key,
4100 int values,
4101 boolean_t embedNUL)
4102 {
4103 const int limit = 8;
4104 uint64_t entropy[limit];
4105 char str[strlen(key) + (HEX_STR_LEN + 1) * limit + 1];
4106 if (values > limit) {
4107 values = limit;
4108 }
4109
4110 read_random(entropy, sizeof(entropy[0]) * values);
4111
4112 if (embedNUL) {
4113 entropy[0] &= ~(0xffull << 8);
4114 }
4115
4116 int len = snprintf(str, sizeof(str), "%s0x%llx", key, entropy[0]);
4117 int remaining = sizeof(str) - len;
4118 for (int i = 1; i < values && remaining > 0; ++i) {
4119 int start = sizeof(str) - remaining;
4120 len = snprintf(&str[start], remaining, ",0x%llx", entropy[i]);
4121 remaining -= len;
4122 }
4123
4124 return exec_add_user_string(imgp, CAST_USER_ADDR_T(str), UIO_SYSSPACE, FALSE);
4125 }
4126
4127 /*
4128 * Build up the contents of the apple[] string vector
4129 */
4130 static int
4131 exec_add_apple_strings(struct image_params *imgp,
4132 const load_result_t *load_result)
4133 {
4134 int error;
4135 int img_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
4136
4137 /* exec_save_path stored the first string */
4138 imgp->ip_applec = 1;
4139
4140 /* adding the pfz string */
4141 {
4142 char pfz_string[strlen(PFZ_KEY) + HEX_STR_LEN + 1];
4143
4144 if (img_ptr_size == 8) {
4145 snprintf(pfz_string, sizeof(pfz_string), PFZ_KEY "0x%llx", commpage_text64_location);
4146 } else {
4147 snprintf(pfz_string, sizeof(pfz_string), PFZ_KEY "0x%x", commpage_text32_location);
4148 }
4149 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(pfz_string), UIO_SYSSPACE, FALSE);
4150 if (error) {
4151 goto bad;
4152 }
4153 imgp->ip_applec++;
4154 }
4155
4156 /* adding the NANO_ENGAGE_KEY key */
4157 if (imgp->ip_px_sa) {
4158 int proc_flags = (((struct _posix_spawnattr *) imgp->ip_px_sa)->psa_flags);
4159
4160 if ((proc_flags & _POSIX_SPAWN_NANO_ALLOCATOR) == _POSIX_SPAWN_NANO_ALLOCATOR) {
4161 const char *nano_string = NANO_ENGAGE_KEY;
4162 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(nano_string), UIO_SYSSPACE, FALSE);
4163 if (error){
4164 goto bad;
4165 }
4166 imgp->ip_applec++;
4167 }
4168 }
4169
4170 /*
4171 * Supply libc with a collection of random values to use when
4172 * implementing -fstack-protector.
4173 *
4174 * (The first random string always contains an embedded NUL so that
4175 * __stack_chk_guard also protects against C string vulnerabilities)
4176 */
4177 error = exec_add_entropy_key(imgp, GUARD_KEY, GUARD_VALUES, TRUE);
4178 if (error) {
4179 goto bad;
4180 }
4181 imgp->ip_applec++;
4182
4183 /*
4184 * Supply libc with entropy for system malloc.
4185 */
4186 error = exec_add_entropy_key(imgp, ENTROPY_KEY, ENTROPY_VALUES, FALSE);
4187 if (error) {
4188 goto bad;
4189 }
4190 imgp->ip_applec++;
4191
4192 /*
4193 * Add MAIN_STACK_KEY: Supplies the address and size of the main thread's
4194 * stack if it was allocated by the kernel.
4195 *
4196 * The guard page is not included in this stack size as libpthread
4197 * expects to add it back in after receiving this value.
4198 */
4199 if (load_result->unixproc) {
4200 char stack_string[strlen(MAIN_STACK_KEY) + (HEX_STR_LEN + 1) * MAIN_STACK_VALUES + 1];
4201 snprintf(stack_string, sizeof(stack_string),
4202 MAIN_STACK_KEY "0x%llx,0x%llx,0x%llx,0x%llx",
4203 (uint64_t)load_result->user_stack,
4204 (uint64_t)load_result->user_stack_size,
4205 (uint64_t)load_result->user_stack_alloc,
4206 (uint64_t)load_result->user_stack_alloc_size);
4207 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(stack_string), UIO_SYSSPACE, FALSE);
4208 if (error) {
4209 goto bad;
4210 }
4211 imgp->ip_applec++;
4212 }
4213
4214 /* Align the tail of the combined applev area */
4215 while (imgp->ip_strspace % img_ptr_size != 0) {
4216 *imgp->ip_strendp++ = '\0';
4217 imgp->ip_strspace--;
4218 }
4219
4220 bad:
4221 return error;
4222 }
4223
4224 #define unix_stack_size(p) (p->p_rlimit[RLIMIT_STACK].rlim_cur)
4225
4226 /*
4227 * exec_check_permissions
4228 *
4229 * Description: Verify that the file that is being attempted to be executed
4230 * is in fact allowed to be executed based on it POSIX file
4231 * permissions and other access control criteria
4232 *
4233 * Parameters: struct image_params * the image parameter block
4234 *
4235 * Returns: 0 Success
4236 * EACCES Permission denied
4237 * ENOEXEC Executable file format error
4238 * ETXTBSY Text file busy [misuse of error code]
4239 * vnode_getattr:???
4240 * vnode_authorize:???
4241 */
4242 static int
4243 exec_check_permissions(struct image_params *imgp)
4244 {
4245 struct vnode *vp = imgp->ip_vp;
4246 struct vnode_attr *vap = imgp->ip_vattr;
4247 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
4248 int error;
4249 kauth_action_t action;
4250
4251 /* Only allow execution of regular files */
4252 if (!vnode_isreg(vp))
4253 return (EACCES);
4254
4255 /* Get the file attributes that we will be using here and elsewhere */
4256 VATTR_INIT(vap);
4257 VATTR_WANTED(vap, va_uid);
4258 VATTR_WANTED(vap, va_gid);
4259 VATTR_WANTED(vap, va_mode);
4260 VATTR_WANTED(vap, va_fsid);
4261 VATTR_WANTED(vap, va_fileid);
4262 VATTR_WANTED(vap, va_data_size);
4263 if ((error = vnode_getattr(vp, vap, imgp->ip_vfs_context)) != 0)
4264 return (error);
4265
4266 /*
4267 * Ensure that at least one execute bit is on - otherwise root
4268 * will always succeed, and we don't want to happen unless the
4269 * file really is executable.
4270 */
4271 if (!vfs_authopaque(vnode_mount(vp)) && ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0))
4272 return (EACCES);
4273
4274 /* Disallow zero length files */
4275 if (vap->va_data_size == 0)
4276 return (ENOEXEC);
4277
4278 imgp->ip_arch_offset = (user_size_t)0;
4279 imgp->ip_arch_size = vap->va_data_size;
4280
4281 /* Disable setuid-ness for traced programs or if MNT_NOSUID */
4282 if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_lflag & P_LTRACED))
4283 vap->va_mode &= ~(VSUID | VSGID);
4284
4285 /*
4286 * Disable _POSIX_SPAWN_ALLOW_DATA_EXEC and _POSIX_SPAWN_DISABLE_ASLR
4287 * flags for setuid/setgid binaries.
4288 */
4289 if (vap->va_mode & (VSUID | VSGID))
4290 imgp->ip_flags &= ~(IMGPF_ALLOW_DATA_EXEC | IMGPF_DISABLE_ASLR);
4291
4292 #if CONFIG_MACF
4293 error = mac_vnode_check_exec(imgp->ip_vfs_context, vp, imgp);
4294 if (error)
4295 return (error);
4296 #endif
4297
4298 /* Check for execute permission */
4299 action = KAUTH_VNODE_EXECUTE;
4300 /* Traced images must also be readable */
4301 if (p->p_lflag & P_LTRACED)
4302 action |= KAUTH_VNODE_READ_DATA;
4303 if ((error = vnode_authorize(vp, NULL, action, imgp->ip_vfs_context)) != 0)
4304 return (error);
4305
4306 #if 0
4307 /* Don't let it run if anyone had it open for writing */
4308 vnode_lock(vp);
4309 if (vp->v_writecount) {
4310 panic("going to return ETXTBSY %x", vp);
4311 vnode_unlock(vp);
4312 return (ETXTBSY);
4313 }
4314 vnode_unlock(vp);
4315 #endif
4316
4317
4318 /* XXX May want to indicate to underlying FS that vnode is open */
4319
4320 return (error);
4321 }
4322
4323
4324 /*
4325 * exec_handle_sugid
4326 *
4327 * Initially clear the P_SUGID in the process flags; if an SUGID process is
4328 * exec'ing a non-SUGID image, then this is the point of no return.
4329 *
4330 * If the image being activated is SUGID, then replace the credential with a
4331 * copy, disable tracing (unless the tracing process is root), reset the
4332 * mach task port to revoke it, set the P_SUGID bit,
4333 *
4334 * If the saved user and group ID will be changing, then make sure it happens
4335 * to a new credential, rather than a shared one.
4336 *
4337 * Set the security token (this is probably obsolete, given that the token
4338 * should not technically be separate from the credential itself).
4339 *
4340 * Parameters: struct image_params * the image parameter block
4341 *
4342 * Returns: void No failure indication
4343 *
4344 * Implicit returns:
4345 * <process credential> Potentially modified/replaced
4346 * <task port> Potentially revoked
4347 * <process flags> P_SUGID bit potentially modified
4348 * <security token> Potentially modified
4349 */
4350 static int
4351 exec_handle_sugid(struct image_params *imgp)
4352 {
4353 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
4354 kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context);
4355 kauth_cred_t my_cred, my_new_cred;
4356 int i;
4357 int leave_sugid_clear = 0;
4358 int mac_reset_ipc = 0;
4359 int error = 0;
4360 task_t task = NULL;
4361 #if CONFIG_MACF
4362 int mac_transition, disjoint_cred = 0;
4363 int label_update_return = 0;
4364
4365 /*
4366 * Determine whether a call to update the MAC label will result in the
4367 * credential changing.
4368 *
4369 * Note: MAC policies which do not actually end up modifying
4370 * the label subsequently are strongly encouraged to
4371 * return 0 for this check, since a non-zero answer will
4372 * slow down the exec fast path for normal binaries.
4373 */
4374 mac_transition = mac_cred_check_label_update_execve(
4375 imgp->ip_vfs_context,
4376 imgp->ip_vp,
4377 imgp->ip_arch_offset,
4378 imgp->ip_scriptvp,
4379 imgp->ip_scriptlabelp,
4380 imgp->ip_execlabelp,
4381 p,
4382 imgp->ip_px_smpx);
4383 #endif
4384
4385 OSBitAndAtomic(~((uint32_t)P_SUGID), &p->p_flag);
4386
4387 /*
4388 * Order of the following is important; group checks must go last,
4389 * as we use the success of the 'ismember' check combined with the
4390 * failure of the explicit match to indicate that we will be setting
4391 * the egid of the process even though the new process did not
4392 * require VSUID/VSGID bits in order for it to set the new group as
4393 * its egid.
4394 *
4395 * Note: Technically, by this we are implying a call to
4396 * setegid() in the new process, rather than implying
4397 * it used its VSGID bit to set the effective group,
4398 * even though there is no code in that process to make
4399 * such a call.
4400 */
4401 if (((imgp->ip_origvattr->va_mode & VSUID) != 0 &&
4402 kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) ||
4403 ((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
4404 ((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &leave_sugid_clear) || !leave_sugid_clear) ||
4405 (kauth_cred_getgid(cred) != imgp->ip_origvattr->va_gid)))) {
4406
4407 #if CONFIG_MACF
4408 /* label for MAC transition and neither VSUID nor VSGID */
4409 handle_mac_transition:
4410 #endif
4411
4412 /*
4413 * Replace the credential with a copy of itself if euid or
4414 * egid change.
4415 *
4416 * Note: setuid binaries will automatically opt out of
4417 * group resolver participation as a side effect
4418 * of this operation. This is an intentional
4419 * part of the security model, which requires a
4420 * participating credential be established by
4421 * escalating privilege, setting up all other
4422 * aspects of the credential including whether
4423 * or not to participate in external group
4424 * membership resolution, then dropping their
4425 * effective privilege to that of the desired
4426 * final credential state.
4427 *
4428 * Modifications to p_ucred must be guarded using the
4429 * proc's ucred lock. This prevents others from accessing
4430 * a garbage credential.
4431 */
4432 while (imgp->ip_origvattr->va_mode & VSUID) {
4433 my_cred = kauth_cred_proc_ref(p);
4434 my_new_cred = kauth_cred_setresuid(my_cred, KAUTH_UID_NONE, imgp->ip_origvattr->va_uid, imgp->ip_origvattr->va_uid, KAUTH_UID_NONE);
4435
4436 if (my_new_cred == my_cred) {
4437 kauth_cred_unref(&my_cred);
4438 break;
4439 }
4440
4441 /* update cred on proc */
4442 proc_ucred_lock(p);
4443
4444 if (p->p_ucred != my_cred) {
4445 proc_ucred_unlock(p);
4446 kauth_cred_unref(&my_new_cred);
4447 continue;
4448 }
4449
4450 /* donate cred reference on my_new_cred to p->p_ucred */
4451 p->p_ucred = my_new_cred;
4452 PROC_UPDATE_CREDS_ONPROC(p);
4453 proc_ucred_unlock(p);
4454
4455 /* drop additional reference that was taken on the previous cred */
4456 kauth_cred_unref(&my_cred);
4457
4458 break;
4459 }
4460
4461 while (imgp->ip_origvattr->va_mode & VSGID) {
4462 my_cred = kauth_cred_proc_ref(p);
4463 my_new_cred = kauth_cred_setresgid(my_cred, KAUTH_GID_NONE, imgp->ip_origvattr->va_gid, imgp->ip_origvattr->va_gid);
4464
4465 if (my_new_cred == my_cred) {
4466 kauth_cred_unref(&my_cred);
4467 break;
4468 }
4469
4470 /* update cred on proc */
4471 proc_ucred_lock(p);
4472
4473 if (p->p_ucred != my_cred) {
4474 proc_ucred_unlock(p);
4475 kauth_cred_unref(&my_new_cred);
4476 continue;
4477 }
4478
4479 /* donate cred reference on my_new_cred to p->p_ucred */
4480 p->p_ucred = my_new_cred;
4481 PROC_UPDATE_CREDS_ONPROC(p);
4482 proc_ucred_unlock(p);
4483
4484 /* drop additional reference that was taken on the previous cred */
4485 kauth_cred_unref(&my_cred);
4486
4487 break;
4488 }
4489
4490 #if CONFIG_MACF
4491 /*
4492 * If a policy has indicated that it will transition the label,
4493 * before making the call into the MAC policies, get a new
4494 * duplicate credential, so they can modify it without
4495 * modifying any others sharing it.
4496 */
4497 if (mac_transition) {
4498 /*
4499 * This hook may generate upcalls that require
4500 * importance donation from the kernel.
4501 * (23925818)
4502 */
4503 thread_t thread = current_thread();
4504 thread_enable_send_importance(thread, TRUE);
4505 kauth_proc_label_update_execve(p,
4506 imgp->ip_vfs_context,
4507 imgp->ip_vp,
4508 imgp->ip_arch_offset,
4509 imgp->ip_scriptvp,
4510 imgp->ip_scriptlabelp,
4511 imgp->ip_execlabelp,
4512 &imgp->ip_csflags,
4513 imgp->ip_px_smpx,
4514 &disjoint_cred, /* will be non zero if disjoint */
4515 &label_update_return);
4516 thread_enable_send_importance(thread, FALSE);
4517
4518 if (disjoint_cred) {
4519 /*
4520 * If updating the MAC label resulted in a
4521 * disjoint credential, flag that we need to
4522 * set the P_SUGID bit. This protects
4523 * against debuggers being attached by an
4524 * insufficiently privileged process onto the
4525 * result of a transition to a more privileged
4526 * credential.
4527 */
4528 leave_sugid_clear = 0;
4529 }
4530
4531 imgp->ip_mac_return = label_update_return;
4532 }
4533
4534 mac_reset_ipc = mac_proc_check_inherit_ipc_ports(p, p->p_textvp, p->p_textoff, imgp->ip_vp, imgp->ip_arch_offset, imgp->ip_scriptvp);
4535
4536 #endif /* CONFIG_MACF */
4537
4538 /*
4539 * If 'leave_sugid_clear' is non-zero, then we passed the
4540 * VSUID and MACF checks, and successfully determined that
4541 * the previous cred was a member of the VSGID group, but
4542 * that it was not the default at the time of the execve,
4543 * and that the post-labelling credential was not disjoint.
4544 * So we don't set the P_SUGID or reset mach ports and fds
4545 * on the basis of simply running this code.
4546 */
4547 if (mac_reset_ipc || !leave_sugid_clear) {
4548 /*
4549 * Have mach reset the task and thread ports.
4550 * We don't want anyone who had the ports before
4551 * a setuid exec to be able to access/control the
4552 * task/thread after.
4553 */
4554 ipc_task_reset((imgp->ip_new_thread != NULL) ?
4555 get_threadtask(imgp->ip_new_thread) : p->task);
4556 ipc_thread_reset((imgp->ip_new_thread != NULL) ?
4557 imgp->ip_new_thread : current_thread());
4558 }
4559
4560 if (!leave_sugid_clear) {
4561 /*
4562 * Flag the process as setuid.
4563 */
4564 OSBitOrAtomic(P_SUGID, &p->p_flag);
4565
4566 /*
4567 * Radar 2261856; setuid security hole fix
4568 * XXX For setuid processes, attempt to ensure that
4569 * stdin, stdout, and stderr are already allocated.
4570 * We do not want userland to accidentally allocate
4571 * descriptors in this range which has implied meaning
4572 * to libc.
4573 */
4574 for (i = 0; i < 3; i++) {
4575
4576 if (p->p_fd->fd_ofiles[i] != NULL)
4577 continue;
4578
4579 /*
4580 * Do the kernel equivalent of
4581 *
4582 * if i == 0
4583 * (void) open("/dev/null", O_RDONLY);
4584 * else
4585 * (void) open("/dev/null", O_WRONLY);
4586 */
4587
4588 struct fileproc *fp;
4589 int indx;
4590 int flag;
4591 struct nameidata *ndp = NULL;
4592
4593 if (i == 0)
4594 flag = FREAD;
4595 else
4596 flag = FWRITE;
4597
4598 if ((error = falloc(p,
4599 &fp, &indx, imgp->ip_vfs_context)) != 0)
4600 continue;
4601
4602 MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
4603 if (ndp == NULL) {
4604 fp_free(p, indx, fp);
4605 error = ENOMEM;
4606 break;
4607 }
4608
4609 NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE,
4610 CAST_USER_ADDR_T("/dev/null"),
4611 imgp->ip_vfs_context);
4612
4613 if ((error = vn_open(ndp, flag, 0)) != 0) {
4614 fp_free(p, indx, fp);
4615 FREE(ndp, M_TEMP);
4616 break;
4617 }
4618
4619 struct fileglob *fg = fp->f_fglob;
4620
4621 fg->fg_flag = flag;
4622 fg->fg_ops = &vnops;
4623 fg->fg_data = ndp->ni_vp;
4624
4625 vnode_put(ndp->ni_vp);
4626
4627 proc_fdlock(p);
4628 procfdtbl_releasefd(p, indx, NULL);
4629 fp_drop(p, indx, fp, 1);
4630 proc_fdunlock(p);
4631
4632 FREE(ndp, M_TEMP);
4633 }
4634 }
4635 }
4636 #if CONFIG_MACF
4637 else {
4638 /*
4639 * We are here because we were told that the MAC label will
4640 * be transitioned, and the binary is not VSUID or VSGID; to
4641 * deal with this case, we could either duplicate a lot of
4642 * code, or we can indicate we want to default the P_SUGID
4643 * bit clear and jump back up.
4644 */
4645 if (mac_transition) {
4646 leave_sugid_clear = 1;
4647 goto handle_mac_transition;
4648 }
4649 }
4650
4651 #endif /* CONFIG_MACF */
4652
4653 /*
4654 * Implement the semantic where the effective user and group become
4655 * the saved user and group in exec'ed programs.
4656 *
4657 * Modifications to p_ucred must be guarded using the
4658 * proc's ucred lock. This prevents others from accessing
4659 * a garbage credential.
4660 */
4661 for (;;) {
4662 my_cred = kauth_cred_proc_ref(p);
4663 my_new_cred = kauth_cred_setsvuidgid(my_cred, kauth_cred_getuid(my_cred), kauth_cred_getgid(my_cred));
4664
4665 if (my_new_cred == my_cred) {
4666 kauth_cred_unref(&my_cred);
4667 break;
4668 }
4669
4670 /* update cred on proc */
4671 proc_ucred_lock(p);
4672
4673 if (p->p_ucred != my_cred) {
4674 proc_ucred_unlock(p);
4675 kauth_cred_unref(&my_new_cred);
4676 continue;
4677 }
4678
4679 /* donate cred reference on my_new_cred to p->p_ucred */
4680 p->p_ucred = my_new_cred;
4681 PROC_UPDATE_CREDS_ONPROC(p);
4682 proc_ucred_unlock(p);
4683
4684 /* drop additional reference that was taken on the previous cred */
4685 kauth_cred_unref(&my_cred);
4686
4687 break;
4688 }
4689
4690
4691 /* Update the process' identity version and set the security token */
4692 p->p_idversion++;
4693
4694 if (imgp->ip_new_thread != NULL) {
4695 task = get_threadtask(imgp->ip_new_thread);
4696 } else {
4697 task = p->task;
4698 }
4699 set_security_token_task_internal(p, task);
4700
4701 return(error);
4702 }
4703
4704
4705 /*
4706 * create_unix_stack
4707 *
4708 * Description: Set the user stack address for the process to the provided
4709 * address. If a custom stack was not set as a result of the
4710 * load process (i.e. as specified by the image file for the
4711 * executable), then allocate the stack in the provided map and
4712 * set up appropriate guard pages for enforcing administrative
4713 * limits on stack growth, if they end up being needed.
4714 *
4715 * Parameters: p Process to set stack on
4716 * load_result Information from mach-o load commands
4717 * map Address map in which to allocate the new stack
4718 *
4719 * Returns: KERN_SUCCESS Stack successfully created
4720 * !KERN_SUCCESS Mach failure code
4721 */
4722 static kern_return_t
4723 create_unix_stack(vm_map_t map, load_result_t* load_result,
4724 proc_t p)
4725 {
4726 mach_vm_size_t size, prot_size;
4727 mach_vm_offset_t addr, prot_addr;
4728 kern_return_t kr;
4729
4730 mach_vm_address_t user_stack = load_result->user_stack;
4731
4732 proc_lock(p);
4733 p->user_stack = user_stack;
4734 proc_unlock(p);
4735
4736 if (load_result->user_stack_alloc_size > 0) {
4737 /*
4738 * Allocate enough space for the maximum stack size we
4739 * will ever authorize and an extra page to act as
4740 * a guard page for stack overflows. For default stacks,
4741 * vm_initial_limit_stack takes care of the extra guard page.
4742 * Otherwise we must allocate it ourselves.
4743 */
4744 if (mach_vm_round_page_overflow(load_result->user_stack_alloc_size, &size)) {
4745 return KERN_INVALID_ARGUMENT;
4746 }
4747 addr = mach_vm_trunc_page(load_result->user_stack - size);
4748 kr = mach_vm_allocate(map, &addr, size,
4749 VM_MAKE_TAG(VM_MEMORY_STACK) |
4750 VM_FLAGS_FIXED);
4751 if (kr != KERN_SUCCESS) {
4752 // Can't allocate at default location, try anywhere
4753 addr = 0;
4754 kr = mach_vm_allocate(map, &addr, size,
4755 VM_MAKE_TAG(VM_MEMORY_STACK) |
4756 VM_FLAGS_ANYWHERE);
4757 if (kr != KERN_SUCCESS) {
4758 return kr;
4759 }
4760
4761 user_stack = addr + size;
4762 load_result->user_stack = user_stack;
4763
4764 proc_lock(p);
4765 p->user_stack = user_stack;
4766 proc_unlock(p);
4767 }
4768
4769 load_result->user_stack_alloc = addr;
4770
4771 /*
4772 * And prevent access to what's above the current stack
4773 * size limit for this process.
4774 */
4775 if (load_result->user_stack_size == 0) {
4776 load_result->user_stack_size = unix_stack_size(p);
4777 prot_size = mach_vm_trunc_page(size - load_result->user_stack_size);
4778 } else {
4779 prot_size = PAGE_SIZE;
4780 }
4781
4782 prot_addr = addr;
4783 kr = mach_vm_protect(map,
4784 prot_addr,
4785 prot_size,
4786 FALSE,
4787 VM_PROT_NONE);
4788 if (kr != KERN_SUCCESS) {
4789 (void)mach_vm_deallocate(map, addr, size);
4790 return kr;
4791 }
4792 }
4793
4794 return KERN_SUCCESS;
4795 }
4796
4797 #include <sys/reboot.h>
4798
4799 /*
4800 * load_init_program_at_path
4801 *
4802 * Description: Load the "init" program; in most cases, this will be "launchd"
4803 *
4804 * Parameters: p Process to call execve() to create
4805 * the "init" program
4806 * scratch_addr Page in p, scratch space
4807 * path NULL terminated path
4808 *
4809 * Returns: KERN_SUCCESS Success
4810 * !KERN_SUCCESS See execve/mac_execve for error codes
4811 *
4812 * Notes: The process that is passed in is the first manufactured
4813 * process on the system, and gets here via bsd_ast() firing
4814 * for the first time. This is done to ensure that bsd_init()
4815 * has run to completion.
4816 *
4817 * The address map of the first manufactured process matches the
4818 * word width of the kernel. Once the self-exec completes, the
4819 * initproc might be different.
4820 */
4821 static int
4822 load_init_program_at_path(proc_t p, user_addr_t scratch_addr, const char* path)
4823 {
4824 int retval[2];
4825 int error;
4826 struct execve_args init_exec_args;
4827 user_addr_t argv0 = USER_ADDR_NULL, argv1 = USER_ADDR_NULL;
4828
4829 /*
4830 * Validate inputs and pre-conditions
4831 */
4832 assert(p);
4833 assert(scratch_addr);
4834 assert(path);
4835
4836 /*
4837 * Copy out program name.
4838 */
4839 size_t path_length = strlen(path) + 1;
4840 argv0 = scratch_addr;
4841 error = copyout(path, argv0, path_length);
4842 if (error)
4843 return error;
4844
4845 scratch_addr = USER_ADDR_ALIGN(scratch_addr + path_length, sizeof(user_addr_t));
4846
4847 /*
4848 * Put out first (and only) argument, similarly.
4849 * Assumes everything fits in a page as allocated above.
4850 */
4851 if (boothowto & RB_SINGLE) {
4852 const char *init_args = "-s";
4853 size_t init_args_length = strlen(init_args)+1;
4854
4855 argv1 = scratch_addr;
4856 error = copyout(init_args, argv1, init_args_length);
4857 if (error)
4858 return error;
4859
4860 scratch_addr = USER_ADDR_ALIGN(scratch_addr + init_args_length, sizeof(user_addr_t));
4861 }
4862
4863 if (proc_is64bit(p)) {
4864 user64_addr_t argv64bit[3];
4865
4866 argv64bit[0] = argv0;
4867 argv64bit[1] = argv1;
4868 argv64bit[2] = USER_ADDR_NULL;
4869
4870 error = copyout(argv64bit, scratch_addr, sizeof(argv64bit));
4871 if (error)
4872 return error;
4873 } else {
4874 user32_addr_t argv32bit[3];
4875
4876 argv32bit[0] = (user32_addr_t)argv0;
4877 argv32bit[1] = (user32_addr_t)argv1;
4878 argv32bit[2] = USER_ADDR_NULL;
4879
4880 error = copyout(argv32bit, scratch_addr, sizeof(argv32bit));
4881 if (error)
4882 return error;
4883 }
4884
4885 /*
4886 * Set up argument block for fake call to execve.
4887 */
4888 init_exec_args.fname = argv0;
4889 init_exec_args.argp = scratch_addr;
4890 init_exec_args.envp = USER_ADDR_NULL;
4891
4892 /*
4893 * So that init task is set with uid,gid 0 token
4894 */
4895 set_security_token(p);
4896
4897 return execve(p, &init_exec_args, retval);
4898 }
4899
4900 static const char * init_programs[] = {
4901 #if DEBUG
4902 "/usr/local/sbin/launchd.debug",
4903 #endif
4904 #if DEVELOPMENT || DEBUG
4905 "/usr/local/sbin/launchd.development",
4906 #endif
4907 "/sbin/launchd",
4908 };
4909
4910 /*
4911 * load_init_program
4912 *
4913 * Description: Load the "init" program; in most cases, this will be "launchd"
4914 *
4915 * Parameters: p Process to call execve() to create
4916 * the "init" program
4917 *
4918 * Returns: (void)
4919 *
4920 * Notes: The process that is passed in is the first manufactured
4921 * process on the system, and gets here via bsd_ast() firing
4922 * for the first time. This is done to ensure that bsd_init()
4923 * has run to completion.
4924 *
4925 * In DEBUG & DEVELOPMENT builds, the launchdsuffix boot-arg
4926 * may be used to select a specific launchd executable. As with
4927 * the kcsuffix boot-arg, setting launchdsuffix to "" or "release"
4928 * will force /sbin/launchd to be selected.
4929 *
4930 * Search order by build:
4931 *
4932 * DEBUG DEVELOPMENT RELEASE PATH
4933 * ----------------------------------------------------------------------------------
4934 * 1 1 NA /usr/local/sbin/launchd.$LAUNCHDSUFFIX
4935 * 2 NA NA /usr/local/sbin/launchd.debug
4936 * 3 2 NA /usr/local/sbin/launchd.development
4937 * 4 3 1 /sbin/launchd
4938 */
4939 void
4940 load_init_program(proc_t p)
4941 {
4942 uint32_t i;
4943 int error;
4944 vm_map_t map = current_map();
4945 mach_vm_offset_t scratch_addr = 0;
4946 mach_vm_size_t map_page_size = vm_map_page_size(map);
4947
4948 (void) mach_vm_allocate(map, &scratch_addr, map_page_size, VM_FLAGS_ANYWHERE);
4949 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
4950 (void) memorystatus_init_at_boot_snapshot();
4951 #endif /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
4952
4953 #if DEBUG || DEVELOPMENT
4954 /* Check for boot-arg suffix first */
4955 char launchd_suffix[64];
4956 if (PE_parse_boot_argn("launchdsuffix", launchd_suffix, sizeof(launchd_suffix))) {
4957 char launchd_path[128];
4958 boolean_t is_release_suffix = ((launchd_suffix[0] == 0) ||
4959 (strcmp(launchd_suffix, "release") == 0));
4960
4961 if (is_release_suffix) {
4962 error = load_init_program_at_path(p, (user_addr_t)scratch_addr, "/sbin/launchd");
4963 if (!error)
4964 return;
4965
4966 panic("Process 1 exec of launchd.release failed, errno %d", error);
4967 } else {
4968 strlcpy(launchd_path, "/usr/local/sbin/launchd.", sizeof(launchd_path));
4969 strlcat(launchd_path, launchd_suffix, sizeof(launchd_path));
4970
4971 /* All the error data is lost in the loop below, don't
4972 * attempt to save it. */
4973 if (!load_init_program_at_path(p, (user_addr_t)scratch_addr, launchd_path)) {
4974 return;
4975 }
4976 }
4977 }
4978 #endif
4979
4980 error = ENOENT;
4981 for (i = 0; i < sizeof(init_programs)/sizeof(init_programs[0]); i++) {
4982 error = load_init_program_at_path(p, (user_addr_t)scratch_addr, init_programs[i]);
4983 if (!error)
4984 return;
4985 }
4986
4987 panic("Process 1 exec of %s failed, errno %d", ((i == 0) ? "<null>" : init_programs[i-1]), error);
4988 }
4989
4990 /*
4991 * load_return_to_errno
4992 *
4993 * Description: Convert a load_return_t (Mach error) to an errno (BSD error)
4994 *
4995 * Parameters: lrtn Mach error number
4996 *
4997 * Returns: (int) BSD error number
4998 * 0 Success
4999 * EBADARCH Bad architecture
5000 * EBADMACHO Bad Mach object file
5001 * ESHLIBVERS Bad shared library version
5002 * ENOMEM Out of memory/resource shortage
5003 * EACCES Access denied
5004 * ENOENT Entry not found (usually "file does
5005 * does not exist")
5006 * EIO An I/O error occurred
5007 * EBADEXEC The executable is corrupt/unknown
5008 */
5009 static int
5010 load_return_to_errno(load_return_t lrtn)
5011 {
5012 switch (lrtn) {
5013 case LOAD_SUCCESS:
5014 return 0;
5015 case LOAD_BADARCH:
5016 return EBADARCH;
5017 case LOAD_BADMACHO:
5018 case LOAD_BADMACHO_UPX:
5019 return EBADMACHO;
5020 case LOAD_SHLIB:
5021 return ESHLIBVERS;
5022 case LOAD_NOSPACE:
5023 case LOAD_RESOURCE:
5024 return ENOMEM;
5025 case LOAD_PROTECT:
5026 return EACCES;
5027 case LOAD_ENOENT:
5028 return ENOENT;
5029 case LOAD_IOERROR:
5030 return EIO;
5031 case LOAD_FAILURE:
5032 case LOAD_DECRYPTFAIL:
5033 default:
5034 return EBADEXEC;
5035 }
5036 }
5037
5038 #include <mach/mach_types.h>
5039 #include <mach/vm_prot.h>
5040 #include <mach/semaphore.h>
5041 #include <mach/sync_policy.h>
5042 #include <kern/clock.h>
5043 #include <mach/kern_return.h>
5044
5045 /*
5046 * execargs_alloc
5047 *
5048 * Description: Allocate the block of memory used by the execve arguments.
5049 * At the same time, we allocate a page so that we can read in
5050 * the first page of the image.
5051 *
5052 * Parameters: struct image_params * the image parameter block
5053 *
5054 * Returns: 0 Success
5055 * EINVAL Invalid argument
5056 * EACCES Permission denied
5057 * EINTR Interrupted function
5058 * ENOMEM Not enough space
5059 *
5060 * Notes: This is a temporary allocation into the kernel address space
5061 * to enable us to copy arguments in from user space. This is
5062 * necessitated by not mapping the process calling execve() into
5063 * the kernel address space during the execve() system call.
5064 *
5065 * We assemble the argument and environment, etc., into this
5066 * region before copying it as a single block into the child
5067 * process address space (at the top or bottom of the stack,
5068 * depending on which way the stack grows; see the function
5069 * exec_copyout_strings() for details).
5070 *
5071 * This ends up with a second (possibly unnecessary) copy compared
5072 * with assembing the data directly into the child address space,
5073 * instead, but since we cannot be guaranteed that the parent has
5074 * not modified its environment, we can't really know that it's
5075 * really a block there as well.
5076 */
5077
5078
5079 static int execargs_waiters = 0;
5080 lck_mtx_t *execargs_cache_lock;
5081
5082 static void
5083 execargs_lock_lock(void) {
5084 lck_mtx_lock_spin(execargs_cache_lock);
5085 }
5086
5087 static void
5088 execargs_lock_unlock(void) {
5089 lck_mtx_unlock(execargs_cache_lock);
5090 }
5091
5092 static wait_result_t
5093 execargs_lock_sleep(void) {
5094 return(lck_mtx_sleep(execargs_cache_lock, LCK_SLEEP_DEFAULT, &execargs_free_count, THREAD_INTERRUPTIBLE));
5095 }
5096
5097 static kern_return_t
5098 execargs_purgeable_allocate(char **execarg_address) {
5099 kern_return_t kr = vm_allocate(bsd_pageable_map, (vm_offset_t *)execarg_address, BSD_PAGEABLE_SIZE_PER_EXEC, VM_FLAGS_ANYWHERE | VM_FLAGS_PURGABLE);
5100 assert(kr == KERN_SUCCESS);
5101 return kr;
5102 }
5103
5104 static kern_return_t
5105 execargs_purgeable_reference(void *execarg_address) {
5106 int state = VM_PURGABLE_NONVOLATILE;
5107 kern_return_t kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
5108
5109 assert(kr == KERN_SUCCESS);
5110 return kr;
5111 }
5112
5113 static kern_return_t
5114 execargs_purgeable_volatilize(void *execarg_address) {
5115 int state = VM_PURGABLE_VOLATILE | VM_PURGABLE_ORDERING_OBSOLETE;
5116 kern_return_t kr;
5117 kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
5118
5119 assert(kr == KERN_SUCCESS);
5120
5121 return kr;
5122 }
5123
5124 static void
5125 execargs_wakeup_waiters(void) {
5126 thread_wakeup(&execargs_free_count);
5127 }
5128
5129 static int
5130 execargs_alloc(struct image_params *imgp)
5131 {
5132 kern_return_t kret;
5133 wait_result_t res;
5134 int i, cache_index = -1;
5135
5136 execargs_lock_lock();
5137
5138 while (execargs_free_count == 0) {
5139 execargs_waiters++;
5140 res = execargs_lock_sleep();
5141 execargs_waiters--;
5142 if (res != THREAD_AWAKENED) {
5143 execargs_lock_unlock();
5144 return (EINTR);
5145 }
5146 }
5147
5148 execargs_free_count--;
5149
5150 for (i = 0; i < execargs_cache_size; i++) {
5151 vm_offset_t element = execargs_cache[i];
5152 if (element) {
5153 cache_index = i;
5154 imgp->ip_strings = (char *)(execargs_cache[i]);
5155 execargs_cache[i] = 0;
5156 break;
5157 }
5158 }
5159
5160 assert(execargs_free_count >= 0);
5161
5162 execargs_lock_unlock();
5163
5164 if (cache_index == -1) {
5165 kret = execargs_purgeable_allocate(&imgp->ip_strings);
5166 }
5167 else
5168 kret = execargs_purgeable_reference(imgp->ip_strings);
5169
5170 assert(kret == KERN_SUCCESS);
5171 if (kret != KERN_SUCCESS) {
5172 return (ENOMEM);
5173 }
5174
5175 /* last page used to read in file headers */
5176 imgp->ip_vdata = imgp->ip_strings + ( NCARGS + PAGE_SIZE );
5177 imgp->ip_strendp = imgp->ip_strings;
5178 imgp->ip_argspace = NCARGS;
5179 imgp->ip_strspace = ( NCARGS + PAGE_SIZE );
5180
5181 return (0);
5182 }
5183
5184 /*
5185 * execargs_free
5186 *
5187 * Description: Free the block of memory used by the execve arguments and the
5188 * first page of the executable by a previous call to the function
5189 * execargs_alloc().
5190 *
5191 * Parameters: struct image_params * the image parameter block
5192 *
5193 * Returns: 0 Success
5194 * EINVAL Invalid argument
5195 * EINTR Oeration interrupted
5196 */
5197 static int
5198 execargs_free(struct image_params *imgp)
5199 {
5200 kern_return_t kret;
5201 int i;
5202 boolean_t needs_wakeup = FALSE;
5203
5204 kret = execargs_purgeable_volatilize(imgp->ip_strings);
5205
5206 execargs_lock_lock();
5207 execargs_free_count++;
5208
5209 for (i = 0; i < execargs_cache_size; i++) {
5210 vm_offset_t element = execargs_cache[i];
5211 if (element == 0) {
5212 execargs_cache[i] = (vm_offset_t) imgp->ip_strings;
5213 imgp->ip_strings = NULL;
5214 break;
5215 }
5216 }
5217
5218 assert(imgp->ip_strings == NULL);
5219
5220 if (execargs_waiters > 0)
5221 needs_wakeup = TRUE;
5222
5223 execargs_lock_unlock();
5224
5225 if (needs_wakeup == TRUE)
5226 execargs_wakeup_waiters();
5227
5228 return ((kret == KERN_SUCCESS ? 0 : EINVAL));
5229 }
5230
5231 static void
5232 exec_resettextvp(proc_t p, struct image_params *imgp)
5233 {
5234 vnode_t vp;
5235 off_t offset;
5236 vnode_t tvp = p->p_textvp;
5237 int ret;
5238
5239 vp = imgp->ip_vp;
5240 offset = imgp->ip_arch_offset;
5241
5242 if (vp == NULLVP)
5243 panic("exec_resettextvp: expected valid vp");
5244
5245 ret = vnode_ref(vp);
5246 proc_lock(p);
5247 if (ret == 0) {
5248 p->p_textvp = vp;
5249 p->p_textoff = offset;
5250 } else {
5251 p->p_textvp = NULLVP; /* this is paranoia */
5252 p->p_textoff = 0;
5253 }
5254 proc_unlock(p);
5255
5256 if ( tvp != NULLVP) {
5257 if (vnode_getwithref(tvp) == 0) {
5258 vnode_rele(tvp);
5259 vnode_put(tvp);
5260 }
5261 }
5262
5263 }
5264
5265 /*
5266 * If the process is not signed or if it contains entitlements, we
5267 * need to communicate through the task_access_port to taskgated.
5268 *
5269 * taskgated will provide a detached code signature if present, and
5270 * will enforce any restrictions on entitlements.
5271 */
5272
5273 static boolean_t
5274 taskgated_required(proc_t p, boolean_t *require_success)
5275 {
5276 size_t length;
5277 void *blob;
5278 int error;
5279
5280 if (cs_debug > 2)
5281 csvnode_print_debug(p->p_textvp);
5282
5283 const int can_skip_taskgated = csproc_get_platform_binary(p) && !csproc_get_platform_path(p);
5284 if (can_skip_taskgated) {
5285 if (cs_debug) printf("taskgated not required for: %s\n", p->p_name);
5286 *require_success = FALSE;
5287 return FALSE;
5288 }
5289
5290 if ((p->p_csflags & CS_VALID) == 0) {
5291 *require_success = FALSE;
5292 return TRUE;
5293 }
5294
5295 error = cs_entitlements_blob_get(p, &blob, &length);
5296 if (error == 0 && blob != NULL) {
5297 /*
5298 * fatal on the desktop when entitlements are present,
5299 * unless we started in single-user mode
5300 */
5301 if ((boothowto & RB_SINGLE) == 0)
5302 *require_success = TRUE;
5303 /*
5304 * Allow initproc to run without causing taskgated to launch
5305 */
5306 if (p == initproc) {
5307 *require_success = FALSE;
5308 return FALSE;
5309 }
5310
5311 if (cs_debug) printf("taskgated required for: %s\n", p->p_name);
5312
5313 return TRUE;
5314 }
5315
5316 *require_success = FALSE;
5317 return FALSE;
5318 }
5319
5320 /*
5321 * __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__
5322 *
5323 * Description: Waits for the userspace daemon to respond to the request
5324 * we made. Function declared non inline to be visible in
5325 * stackshots and spindumps as well as debugging.
5326 */
5327 __attribute__((noinline)) int
5328 __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid)
5329 {
5330 return find_code_signature(task_access_port, new_pid);
5331 }
5332
5333 static int
5334 check_for_signature(proc_t p, struct image_params *imgp)
5335 {
5336 mach_port_t port = NULL;
5337 kern_return_t kr = KERN_FAILURE;
5338 int error = EACCES;
5339 boolean_t unexpected_failure = FALSE;
5340 unsigned char hash[SHA1_RESULTLEN];
5341 boolean_t require_success = FALSE;
5342 int spawn = (imgp->ip_flags & IMGPF_SPAWN);
5343 int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
5344 os_reason_t signature_failure_reason = OS_REASON_NULL;
5345
5346 /*
5347 * Override inherited code signing flags with the
5348 * ones for the process that is being successfully
5349 * loaded
5350 */
5351 proc_lock(p);
5352 p->p_csflags = imgp->ip_csflags;
5353 proc_unlock(p);
5354
5355 /* Set the switch_protect flag on the map */
5356 if(p->p_csflags & (CS_HARD|CS_KILL)) {
5357 vm_map_switch_protect(get_task_map(p->task), TRUE);
5358 }
5359
5360 /*
5361 * image activation may be failed due to policy
5362 * which is unexpected but security framework does not
5363 * approve of exec, kill and return immediately.
5364 */
5365 if (imgp->ip_mac_return != 0) {
5366
5367 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
5368 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_SECURITY_POLICY, 0, 0);
5369 signature_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SECURITY_POLICY);
5370 error = imgp->ip_mac_return;
5371 unexpected_failure = TRUE;
5372 goto done;
5373 }
5374
5375 if (imgp->ip_cs_error != OS_REASON_NULL) {
5376 signature_failure_reason = imgp->ip_cs_error;
5377 imgp->ip_cs_error = OS_REASON_NULL;
5378 error = EACCES;
5379 goto done;
5380 }
5381
5382 /* check if callout to taskgated is needed */
5383 if (!taskgated_required(p, &require_success)) {
5384 error = 0;
5385 goto done;
5386 }
5387
5388 kr = task_get_task_access_port(p->task, &port);
5389 if (KERN_SUCCESS != kr || !IPC_PORT_VALID(port)) {
5390 error = 0;
5391 if (require_success) {
5392 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
5393 p->p_pid, OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASK_ACCESS_PORT, 0, 0);
5394 signature_failure_reason = os_reason_create(OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASK_ACCESS_PORT);
5395 error = EACCES;
5396 }
5397 goto done;
5398 }
5399
5400 /*
5401 * taskgated returns KERN_SUCCESS if it has completed its work
5402 * and the exec should continue, KERN_FAILURE if the exec should
5403 * fail, or it may error out with different error code in an
5404 * event of mig failure (e.g. process was signalled during the
5405 * rpc call, taskgated died, mig server died etc.).
5406 */
5407
5408 kr = __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(port, p->p_pid);
5409 switch (kr) {
5410 case KERN_SUCCESS:
5411 error = 0;
5412 break;
5413 case KERN_FAILURE:
5414 error = EACCES;
5415
5416 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
5417 p->p_pid, OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASKGATED_INVALID_SIG, 0, 0);
5418 signature_failure_reason = os_reason_create(OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASKGATED_INVALID_SIG);
5419 goto done;
5420 default:
5421 error = EACCES;
5422
5423 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
5424 p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_TASKGATED_OTHER, 0, 0);
5425 signature_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_TASKGATED_OTHER);
5426 unexpected_failure = TRUE;
5427 goto done;
5428 }
5429
5430 /* Only do this if exec_resettextvp() did not fail */
5431 if (p->p_textvp != NULLVP) {
5432 /*
5433 * If there's a new code directory, mark this process
5434 * as signed.
5435 */
5436 if (0 == ubc_cs_getcdhash(p->p_textvp, p->p_textoff, hash)) {
5437 proc_lock(p);
5438 p->p_csflags |= CS_VALID;
5439 proc_unlock(p);
5440 }
5441 }
5442
5443 done:
5444 if (0 != error) {
5445 if (!unexpected_failure)
5446 p->p_csflags |= CS_KILLED;
5447 /* make very sure execution fails */
5448 if (vfexec || spawn) {
5449 assert(signature_failure_reason != OS_REASON_NULL);
5450 psignal_vfork_with_reason(p, p->task, imgp->ip_new_thread,
5451 SIGKILL, signature_failure_reason);
5452 signature_failure_reason = OS_REASON_NULL;
5453 error = 0;
5454 } else {
5455 assert(signature_failure_reason != OS_REASON_NULL);
5456 psignal_with_reason(p, SIGKILL, signature_failure_reason);
5457 signature_failure_reason = OS_REASON_NULL;
5458 }
5459 }
5460
5461 /* If we hit this, we likely would have leaked an exit reason */
5462 assert(signature_failure_reason == OS_REASON_NULL);
5463 return error;
5464 }
5465
5466 /*
5467 * Typically as soon as we start executing this process, the
5468 * first instruction will trigger a VM fault to bring the text
5469 * pages (as executable) into the address space, followed soon
5470 * thereafter by dyld data structures (for dynamic executable).
5471 * To optimize this, as well as improve support for hardware
5472 * debuggers that can only access resident pages present
5473 * in the process' page tables, we prefault some pages if
5474 * possible. Errors are non-fatal.
5475 */
5476 static void exec_prefault_data(proc_t p __unused, struct image_params *imgp, load_result_t *load_result)
5477 {
5478 int ret;
5479 size_t expected_all_image_infos_size;
5480
5481 /*
5482 * Prefault executable or dyld entry point.
5483 */
5484 vm_fault(current_map(),
5485 vm_map_trunc_page(load_result->entry_point,
5486 vm_map_page_mask(current_map())),
5487 VM_PROT_READ | VM_PROT_EXECUTE,
5488 FALSE,
5489 THREAD_UNINT, NULL, 0);
5490
5491 if (imgp->ip_flags & IMGPF_IS_64BIT) {
5492 expected_all_image_infos_size = sizeof(struct user64_dyld_all_image_infos);
5493 } else {
5494 expected_all_image_infos_size = sizeof(struct user32_dyld_all_image_infos);
5495 }
5496
5497 /* Decode dyld anchor structure from <mach-o/dyld_images.h> */
5498 if (load_result->dynlinker &&
5499 load_result->all_image_info_addr &&
5500 load_result->all_image_info_size >= expected_all_image_infos_size) {
5501 union {
5502 struct user64_dyld_all_image_infos infos64;
5503 struct user32_dyld_all_image_infos infos32;
5504 } all_image_infos;
5505
5506 /*
5507 * Pre-fault to avoid copyin() going through the trap handler
5508 * and recovery path.
5509 */
5510 vm_fault(current_map(),
5511 vm_map_trunc_page(load_result->all_image_info_addr,
5512 vm_map_page_mask(current_map())),
5513 VM_PROT_READ | VM_PROT_WRITE,
5514 FALSE,
5515 THREAD_UNINT, NULL, 0);
5516 if ((load_result->all_image_info_addr & PAGE_MASK) + expected_all_image_infos_size > PAGE_SIZE) {
5517 /* all_image_infos straddles a page */
5518 vm_fault(current_map(),
5519 vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size - 1,
5520 vm_map_page_mask(current_map())),
5521 VM_PROT_READ | VM_PROT_WRITE,
5522 FALSE,
5523 THREAD_UNINT, NULL, 0);
5524 }
5525
5526 ret = copyin(load_result->all_image_info_addr,
5527 &all_image_infos,
5528 expected_all_image_infos_size);
5529 if (ret == 0 && all_image_infos.infos32.version >= 9) {
5530
5531 user_addr_t notification_address;
5532 user_addr_t dyld_image_address;
5533 user_addr_t dyld_version_address;
5534 user_addr_t dyld_all_image_infos_address;
5535 user_addr_t dyld_slide_amount;
5536
5537 if (imgp->ip_flags & IMGPF_IS_64BIT) {
5538 notification_address = all_image_infos.infos64.notification;
5539 dyld_image_address = all_image_infos.infos64.dyldImageLoadAddress;
5540 dyld_version_address = all_image_infos.infos64.dyldVersion;
5541 dyld_all_image_infos_address = all_image_infos.infos64.dyldAllImageInfosAddress;
5542 } else {
5543 notification_address = all_image_infos.infos32.notification;
5544 dyld_image_address = all_image_infos.infos32.dyldImageLoadAddress;
5545 dyld_version_address = all_image_infos.infos32.dyldVersion;
5546 dyld_all_image_infos_address = all_image_infos.infos32.dyldAllImageInfosAddress;
5547 }
5548
5549 /*
5550 * dyld statically sets up the all_image_infos in its Mach-O
5551 * binary at static link time, with pointers relative to its default
5552 * load address. Since ASLR might slide dyld before its first
5553 * instruction is executed, "dyld_slide_amount" tells us how far
5554 * dyld was loaded compared to its default expected load address.
5555 * All other pointers into dyld's image should be adjusted by this
5556 * amount. At some point later, dyld will fix up pointers to take
5557 * into account the slide, at which point the all_image_infos_address
5558 * field in the structure will match the runtime load address, and
5559 * "dyld_slide_amount" will be 0, if we were to consult it again.
5560 */
5561
5562 dyld_slide_amount = load_result->all_image_info_addr - dyld_all_image_infos_address;
5563
5564 #if 0
5565 kprintf("exec_prefault: 0x%016llx 0x%08x 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
5566 (uint64_t)load_result->all_image_info_addr,
5567 all_image_infos.infos32.version,
5568 (uint64_t)notification_address,
5569 (uint64_t)dyld_image_address,
5570 (uint64_t)dyld_version_address,
5571 (uint64_t)dyld_all_image_infos_address);
5572 #endif
5573
5574 vm_fault(current_map(),
5575 vm_map_trunc_page(notification_address + dyld_slide_amount,
5576 vm_map_page_mask(current_map())),
5577 VM_PROT_READ | VM_PROT_EXECUTE,
5578 FALSE,
5579 THREAD_UNINT, NULL, 0);
5580 vm_fault(current_map(),
5581 vm_map_trunc_page(dyld_image_address + dyld_slide_amount,
5582 vm_map_page_mask(current_map())),
5583 VM_PROT_READ | VM_PROT_EXECUTE,
5584 FALSE,
5585 THREAD_UNINT, NULL, 0);
5586 vm_fault(current_map(),
5587 vm_map_trunc_page(dyld_version_address + dyld_slide_amount,
5588 vm_map_page_mask(current_map())),
5589 VM_PROT_READ,
5590 FALSE,
5591 THREAD_UNINT, NULL, 0);
5592 vm_fault(current_map(),
5593 vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount,
5594 vm_map_page_mask(current_map())),
5595 VM_PROT_READ | VM_PROT_WRITE,
5596 FALSE,
5597 THREAD_UNINT, NULL, 0);
5598 }
5599 }
5600 }