]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_exec.c
20b1f0317a610aa49e332cbc21555defa5903312
[apple/xnu.git] / bsd / kern / kern_exec.c
1 /*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Mach Operating System
31 * Copyright (c) 1987 Carnegie-Mellon University
32 * All rights reserved. The CMU software License Agreement specifies
33 * the terms and conditions for use and redistribution.
34 */
35
36 /*-
37 * Copyright (c) 1982, 1986, 1991, 1993
38 * The Regents of the University of California. All rights reserved.
39 * (c) UNIX System Laboratories, Inc.
40 * All or some portions of this file are derived from material licensed
41 * to the University of California by American Telephone and Telegraph
42 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
43 * the permission of UNIX System Laboratories, Inc.
44 *
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions
47 * are met:
48 * 1. Redistributions of source code must retain the above copyright
49 * notice, this list of conditions and the following disclaimer.
50 * 2. Redistributions in binary form must reproduce the above copyright
51 * notice, this list of conditions and the following disclaimer in the
52 * documentation and/or other materials provided with the distribution.
53 * 3. All advertising materials mentioning features or use of this software
54 * must display the following acknowledgement:
55 * This product includes software developed by the University of
56 * California, Berkeley and its contributors.
57 * 4. Neither the name of the University nor the names of its contributors
58 * may be used to endorse or promote products derived from this software
59 * without specific prior written permission.
60 *
61 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
62 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
64 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
65 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
66 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
67 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
68 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
69 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
70 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
71 * SUCH DAMAGE.
72 *
73 * from: @(#)kern_exec.c 8.1 (Berkeley) 6/10/93
74 */
75 /*
76 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
77 * support for mandatory and extensible security protections. This notice
78 * is included in support of clause 2.2 (b) of the Apple Public License,
79 * Version 2.0.
80 */
81 #include <machine/reg.h>
82 #include <machine/cpu_capabilities.h>
83
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/filedesc.h>
87 #include <sys/kernel.h>
88 #include <sys/proc_internal.h>
89 #include <sys/kauth.h>
90 #include <sys/user.h>
91 #include <sys/socketvar.h>
92 #include <sys/malloc.h>
93 #include <sys/namei.h>
94 #include <sys/mount_internal.h>
95 #include <sys/vnode_internal.h>
96 #include <sys/file_internal.h>
97 #include <sys/stat.h>
98 #include <sys/uio_internal.h>
99 #include <sys/acct.h>
100 #include <sys/exec.h>
101 #include <sys/kdebug.h>
102 #include <sys/signal.h>
103 #include <sys/aio_kern.h>
104 #include <sys/sysproto.h>
105 #include <sys/persona.h>
106 #if SYSV_SHM
107 #include <sys/shm_internal.h> /* shmexec() */
108 #endif
109 #include <sys/ubc_internal.h> /* ubc_map() */
110 #include <sys/spawn.h>
111 #include <sys/spawn_internal.h>
112 #include <sys/process_policy.h>
113 #include <sys/codesign.h>
114 #include <crypto/sha1.h>
115
116 #include <libkern/libkern.h>
117
118 #include <security/audit/audit.h>
119
120 #include <ipc/ipc_types.h>
121
122 #include <mach/mach_types.h>
123 #include <mach/port.h>
124 #include <mach/task.h>
125 #include <mach/task_access.h>
126 #include <mach/thread_act.h>
127 #include <mach/vm_map.h>
128 #include <mach/mach_vm.h>
129 #include <mach/vm_param.h>
130
131 #include <kern/sched_prim.h> /* thread_wakeup() */
132 #include <kern/affinity.h>
133 #include <kern/assert.h>
134 #include <kern/task.h>
135 #include <kern/coalition.h>
136 #include <kern/kalloc.h>
137
138 #if CONFIG_MACF
139 #include <security/mac.h>
140 #include <security/mac_mach_internal.h>
141 #endif
142
143 #include <vm/vm_map.h>
144 #include <vm/vm_kern.h>
145 #include <vm/vm_protos.h>
146 #include <vm/vm_kern.h>
147 #include <vm/vm_fault.h>
148 #include <vm/vm_pageout.h>
149
150 #include <kdp/kdp_dyld.h>
151
152 #include <machine/pal_routines.h>
153
154 #include <pexpert/pexpert.h>
155
156 #if CONFIG_MEMORYSTATUS
157 #include <sys/kern_memorystatus.h>
158 #endif
159
160 #if CONFIG_DTRACE
161 /* Do not include dtrace.h, it redefines kmem_[alloc/free] */
162 extern void (*dtrace_fasttrap_exec_ptr)(proc_t);
163 extern void (*dtrace_proc_waitfor_exec_ptr)(proc_t);
164 extern void (*dtrace_helpers_cleanup)(proc_t);
165 extern void dtrace_lazy_dofs_destroy(proc_t);
166
167 /*
168 * Since dtrace_proc_waitfor_exec_ptr can be added/removed in dtrace_subr.c,
169 * we will store its value before actually calling it.
170 */
171 static void (*dtrace_proc_waitfor_hook)(proc_t) = NULL;
172
173 #include <sys/dtrace_ptss.h>
174 #endif
175
176 /* support for child creation in exec after vfork */
177 thread_t fork_create_child(task_t parent_task, coalition_t *parent_coalition, proc_t child_proc, int inherit_memory, int is64bit);
178 void vfork_exit(proc_t p, int rv);
179 extern void proc_apply_task_networkbg_internal(proc_t, thread_t);
180
181 /*
182 * Mach things for which prototypes are unavailable from Mach headers
183 */
184 void ipc_task_reset(
185 task_t task);
186 void ipc_thread_reset(
187 thread_t thread);
188 kern_return_t ipc_object_copyin(
189 ipc_space_t space,
190 mach_port_name_t name,
191 mach_msg_type_name_t msgt_name,
192 ipc_object_t *objectp);
193 void ipc_port_release_send(ipc_port_t);
194
195 #if DEVELOPMENT || DEBUG
196 void task_importance_update_owner_info(task_t);
197 #endif
198
199 extern struct savearea *get_user_regs(thread_t);
200 extern kern_return_t machine_thread_neon_state_initialize(thread_t thread);
201
202 __attribute__((noinline)) int __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid);
203
204 #include <kern/thread.h>
205 #include <kern/task.h>
206 #include <kern/ast.h>
207 #include <kern/mach_loader.h>
208 #include <kern/mach_fat.h>
209 #include <mach-o/fat.h>
210 #include <mach-o/loader.h>
211 #include <machine/vmparam.h>
212 #include <sys/imgact.h>
213
214 #include <sys/sdt.h>
215
216
217 /*
218 * EAI_ITERLIMIT The maximum number of times to iterate an image
219 * activator in exec_activate_image() before treating
220 * it as malformed/corrupt.
221 */
222 #define EAI_ITERLIMIT 3
223
224 /*
225 * For #! interpreter parsing
226 */
227 #define IS_WHITESPACE(ch) ((ch == ' ') || (ch == '\t'))
228 #define IS_EOL(ch) ((ch == '#') || (ch == '\n'))
229
230 extern vm_map_t bsd_pageable_map;
231 extern const struct fileops vnops;
232
233 #define USER_ADDR_ALIGN(addr, val) \
234 ( ( (user_addr_t)(addr) + (val) - 1) \
235 & ~((val) - 1) )
236
237 struct image_params; /* Forward */
238 static int exec_activate_image(struct image_params *imgp);
239 static int exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp);
240 static int load_return_to_errno(load_return_t lrtn);
241 static int execargs_alloc(struct image_params *imgp);
242 static int execargs_free(struct image_params *imgp);
243 static int exec_check_permissions(struct image_params *imgp);
244 static int exec_extract_strings(struct image_params *imgp);
245 static int exec_add_apple_strings(struct image_params *imgp);
246 static int exec_handle_sugid(struct image_params *imgp);
247 static int sugid_scripts = 0;
248 SYSCTL_INT (_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW | CTLFLAG_LOCKED, &sugid_scripts, 0, "");
249 static kern_return_t create_unix_stack(vm_map_t map, load_result_t* load_result, proc_t p);
250 static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size);
251 static void exec_resettextvp(proc_t, struct image_params *);
252 static int check_for_signature(proc_t, struct image_params *);
253 static void exec_prefault_data(proc_t, struct image_params *, load_result_t *);
254 static errno_t exec_handle_port_actions(struct image_params *imgp, short psa_flags, boolean_t * portwatch_present, ipc_port_t * portwatch_ports);
255 static errno_t exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, uint64_t psa_darwin_role,
256 ipc_port_t * portwatch_ports, int portwatch_count);
257
258 /*
259 * exec_add_user_string
260 *
261 * Add the requested string to the string space area.
262 *
263 * Parameters; struct image_params * image parameter block
264 * user_addr_t string to add to strings area
265 * int segment from which string comes
266 * boolean_t TRUE if string contributes to NCARGS
267 *
268 * Returns: 0 Success
269 * !0 Failure errno from copyinstr()
270 *
271 * Implicit returns:
272 * (imgp->ip_strendp) updated location of next add, if any
273 * (imgp->ip_strspace) updated byte count of space remaining
274 * (imgp->ip_argspace) updated byte count of space in NCARGS
275 */
276 static int
277 exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolean_t is_ncargs)
278 {
279 int error = 0;
280
281 do {
282 size_t len = 0;
283 int space;
284
285 if (is_ncargs)
286 space = imgp->ip_argspace; /* by definition smaller than ip_strspace */
287 else
288 space = imgp->ip_strspace;
289
290 if (space <= 0) {
291 error = E2BIG;
292 break;
293 }
294
295 if (!UIO_SEG_IS_USER_SPACE(seg)) {
296 char *kstr = CAST_DOWN(char *,str); /* SAFE */
297 error = copystr(kstr, imgp->ip_strendp, space, &len);
298 } else {
299 error = copyinstr(str, imgp->ip_strendp, space, &len);
300 }
301
302 imgp->ip_strendp += len;
303 imgp->ip_strspace -= len;
304 if (is_ncargs)
305 imgp->ip_argspace -= len;
306
307 } while (error == ENAMETOOLONG);
308
309 return error;
310 }
311
312 /*
313 * dyld is now passed the executable path as a getenv-like variable
314 * in the same fashion as the stack_guard and malloc_entropy keys.
315 */
316 #define EXECUTABLE_KEY "executable_path="
317
318 /*
319 * exec_save_path
320 *
321 * To support new app package launching for Mac OS X, the dyld needs the
322 * first argument to execve() stored on the user stack.
323 *
324 * Save the executable path name at the bottom of the strings area and set
325 * the argument vector pointer to the location following that to indicate
326 * the start of the argument and environment tuples, setting the remaining
327 * string space count to the size of the string area minus the path length.
328 *
329 * Parameters; struct image_params * image parameter block
330 * char * path used to invoke program
331 * int segment from which path comes
332 *
333 * Returns: int 0 Success
334 * EFAULT Bad address
335 * copy[in]str:EFAULT Bad address
336 * copy[in]str:ENAMETOOLONG Filename too long
337 *
338 * Implicit returns:
339 * (imgp->ip_strings) saved path
340 * (imgp->ip_strspace) space remaining in ip_strings
341 * (imgp->ip_strendp) start of remaining copy area
342 * (imgp->ip_argspace) space remaining of NCARGS
343 * (imgp->ip_applec) Initial applev[0]
344 *
345 * Note: We have to do this before the initial namei() since in the
346 * path contains symbolic links, namei() will overwrite the
347 * original path buffer contents. If the last symbolic link
348 * resolved was a relative pathname, we would lose the original
349 * "path", which could be an absolute pathname. This might be
350 * unacceptable for dyld.
351 */
352 static int
353 exec_save_path(struct image_params *imgp, user_addr_t path, int seg, const char **excpath)
354 {
355 int error;
356 size_t len;
357 char *kpath;
358
359 // imgp->ip_strings can come out of a cache, so we need to obliterate the
360 // old path.
361 memset(imgp->ip_strings, '\0', strlen(EXECUTABLE_KEY) + MAXPATHLEN);
362
363 len = MIN(MAXPATHLEN, imgp->ip_strspace);
364
365 switch(seg) {
366 case UIO_USERSPACE32:
367 case UIO_USERSPACE64: /* Same for copyin()... */
368 error = copyinstr(path, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len);
369 break;
370 case UIO_SYSSPACE:
371 kpath = CAST_DOWN(char *,path); /* SAFE */
372 error = copystr(kpath, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len);
373 break;
374 default:
375 error = EFAULT;
376 break;
377 }
378
379 if (!error) {
380 bcopy(EXECUTABLE_KEY, imgp->ip_strings, strlen(EXECUTABLE_KEY));
381 len += strlen(EXECUTABLE_KEY);
382
383 imgp->ip_strendp += len;
384 imgp->ip_strspace -= len;
385
386 if (excpath) {
387 *excpath = imgp->ip_strings + strlen(EXECUTABLE_KEY);
388 }
389 }
390
391 return(error);
392 }
393
394 /*
395 * exec_reset_save_path
396 *
397 * If we detect a shell script, we need to reset the string area
398 * state so that the interpreter can be saved onto the stack.
399
400 * Parameters; struct image_params * image parameter block
401 *
402 * Returns: int 0 Success
403 *
404 * Implicit returns:
405 * (imgp->ip_strings) saved path
406 * (imgp->ip_strspace) space remaining in ip_strings
407 * (imgp->ip_strendp) start of remaining copy area
408 * (imgp->ip_argspace) space remaining of NCARGS
409 *
410 */
411 static int
412 exec_reset_save_path(struct image_params *imgp)
413 {
414 imgp->ip_strendp = imgp->ip_strings;
415 imgp->ip_argspace = NCARGS;
416 imgp->ip_strspace = ( NCARGS + PAGE_SIZE );
417
418 return (0);
419 }
420
421 /*
422 * exec_shell_imgact
423 *
424 * Image activator for interpreter scripts. If the image begins with
425 * the characters "#!", then it is an interpreter script. Verify the
426 * length of the script line indicating the interpreter is not in
427 * excess of the maximum allowed size. If this is the case, then
428 * break out the arguments, if any, which are separated by white
429 * space, and copy them into the argument save area as if they were
430 * provided on the command line before all other arguments. The line
431 * ends when we encounter a comment character ('#') or newline.
432 *
433 * Parameters; struct image_params * image parameter block
434 *
435 * Returns: -1 not an interpreter (keep looking)
436 * -3 Success: interpreter: relookup
437 * >0 Failure: interpreter: error number
438 *
439 * A return value other than -1 indicates subsequent image activators should
440 * not be given the opportunity to attempt to activate the image.
441 */
442 static int
443 exec_shell_imgact(struct image_params *imgp)
444 {
445 char *vdata = imgp->ip_vdata;
446 char *ihp;
447 char *line_startp, *line_endp;
448 char *interp;
449 proc_t p;
450 struct fileproc *fp;
451 int fd;
452 int error;
453
454 /*
455 * Make sure it's a shell script. If we've already redirected
456 * from an interpreted file once, don't do it again.
457 */
458 if (vdata[0] != '#' ||
459 vdata[1] != '!' ||
460 (imgp->ip_flags & IMGPF_INTERPRET) != 0) {
461 return (-1);
462 }
463
464 if (imgp->ip_origcputype != 0) {
465 /* Fat header previously matched, don't allow shell script inside */
466 return (-1);
467 }
468
469 imgp->ip_flags |= IMGPF_INTERPRET;
470 imgp->ip_interp_sugid_fd = -1;
471 imgp->ip_interp_buffer[0] = '\0';
472
473 /* Check to see if SUGID scripts are permitted. If they aren't then
474 * clear the SUGID bits.
475 * imgp->ip_vattr is known to be valid.
476 */
477 if (sugid_scripts == 0) {
478 imgp->ip_origvattr->va_mode &= ~(VSUID | VSGID);
479 }
480
481 /* Try to find the first non-whitespace character */
482 for( ihp = &vdata[2]; ihp < &vdata[IMG_SHSIZE]; ihp++ ) {
483 if (IS_EOL(*ihp)) {
484 /* Did not find interpreter, "#!\n" */
485 return (ENOEXEC);
486 } else if (IS_WHITESPACE(*ihp)) {
487 /* Whitespace, like "#! /bin/sh\n", keep going. */
488 } else {
489 /* Found start of interpreter */
490 break;
491 }
492 }
493
494 if (ihp == &vdata[IMG_SHSIZE]) {
495 /* All whitespace, like "#! " */
496 return (ENOEXEC);
497 }
498
499 line_startp = ihp;
500
501 /* Try to find the end of the interpreter+args string */
502 for ( ; ihp < &vdata[IMG_SHSIZE]; ihp++ ) {
503 if (IS_EOL(*ihp)) {
504 /* Got it */
505 break;
506 } else {
507 /* Still part of interpreter or args */
508 }
509 }
510
511 if (ihp == &vdata[IMG_SHSIZE]) {
512 /* A long line, like "#! blah blah blah" without end */
513 return (ENOEXEC);
514 }
515
516 /* Backtrack until we find the last non-whitespace */
517 while (IS_EOL(*ihp) || IS_WHITESPACE(*ihp)) {
518 ihp--;
519 }
520
521 /* The character after the last non-whitespace is our logical end of line */
522 line_endp = ihp + 1;
523
524 /*
525 * Now we have pointers to the usable part of:
526 *
527 * "#! /usr/bin/int first second third \n"
528 * ^ line_startp ^ line_endp
529 */
530
531 /* copy the interpreter name */
532 interp = imgp->ip_interp_buffer;
533 for ( ihp = line_startp; (ihp < line_endp) && !IS_WHITESPACE(*ihp); ihp++)
534 *interp++ = *ihp;
535 *interp = '\0';
536
537 exec_reset_save_path(imgp);
538 exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer),
539 UIO_SYSSPACE, NULL);
540
541 /* Copy the entire interpreter + args for later processing into argv[] */
542 interp = imgp->ip_interp_buffer;
543 for ( ihp = line_startp; (ihp < line_endp); ihp++)
544 *interp++ = *ihp;
545 *interp = '\0';
546
547 /*
548 * If we have a SUID oder SGID script, create a file descriptor
549 * from the vnode and pass /dev/fd/%d instead of the actual
550 * path name so that the script does not get opened twice
551 */
552 if (imgp->ip_origvattr->va_mode & (VSUID | VSGID)) {
553 p = vfs_context_proc(imgp->ip_vfs_context);
554 error = falloc(p, &fp, &fd, imgp->ip_vfs_context);
555 if (error)
556 return(error);
557
558 fp->f_fglob->fg_flag = FREAD;
559 fp->f_fglob->fg_ops = &vnops;
560 fp->f_fglob->fg_data = (caddr_t)imgp->ip_vp;
561
562 proc_fdlock(p);
563 procfdtbl_releasefd(p, fd, NULL);
564 fp_drop(p, fd, fp, 1);
565 proc_fdunlock(p);
566 vnode_ref(imgp->ip_vp);
567
568 imgp->ip_interp_sugid_fd = fd;
569 }
570
571 return (-3);
572 }
573
574
575
576 /*
577 * exec_fat_imgact
578 *
579 * Image activator for fat 1.0 binaries. If the binary is fat, then we
580 * need to select an image from it internally, and make that the image
581 * we are going to attempt to execute. At present, this consists of
582 * reloading the first page for the image with a first page from the
583 * offset location indicated by the fat header.
584 *
585 * Parameters; struct image_params * image parameter block
586 *
587 * Returns: -1 not a fat binary (keep looking)
588 * -2 Success: encapsulated binary: reread
589 * >0 Failure: error number
590 *
591 * Important: This image activator is byte order neutral.
592 *
593 * Note: A return value other than -1 indicates subsequent image
594 * activators should not be given the opportunity to attempt
595 * to activate the image.
596 *
597 * If we find an encapsulated binary, we make no assertions
598 * about its validity; instead, we leave that up to a rescan
599 * for an activator to claim it, and, if it is claimed by one,
600 * that activator is responsible for determining validity.
601 */
602 static int
603 exec_fat_imgact(struct image_params *imgp)
604 {
605 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
606 kauth_cred_t cred = kauth_cred_proc_ref(p);
607 struct fat_header *fat_header = (struct fat_header *)imgp->ip_vdata;
608 struct _posix_spawnattr *psa = NULL;
609 struct fat_arch fat_arch;
610 int resid, error;
611 load_return_t lret;
612
613 if (imgp->ip_origcputype != 0) {
614 /* Fat header previously matched, don't allow another fat file inside */
615 return (-1);
616 }
617
618 /* Make sure it's a fat binary */
619 if (OSSwapBigToHostInt32(fat_header->magic) != FAT_MAGIC) {
620 error = -1; /* not claimed */
621 goto bad;
622 }
623
624 /* imgp->ip_vdata has PAGE_SIZE, zerofilled if the file is smaller */
625 lret = fatfile_validate_fatarches((vm_offset_t)fat_header, PAGE_SIZE);
626 if (lret != LOAD_SUCCESS) {
627 error = load_return_to_errno(lret);
628 goto bad;
629 }
630
631 /* If posix_spawn binprefs exist, respect those prefs. */
632 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
633 if (psa != NULL && psa->psa_binprefs[0] != 0) {
634 uint32_t pr = 0;
635
636 /* Check each preference listed against all arches in header */
637 for (pr = 0; pr < NBINPREFS; pr++) {
638 cpu_type_t pref = psa->psa_binprefs[pr];
639 if (pref == 0) {
640 /* No suitable arch in the pref list */
641 error = EBADARCH;
642 goto bad;
643 }
644
645 if (pref == CPU_TYPE_ANY) {
646 /* Fall through to regular grading */
647 goto regular_grading;
648 }
649
650 lret = fatfile_getbestarch_for_cputype(pref,
651 (vm_offset_t)fat_header,
652 PAGE_SIZE,
653 &fat_arch);
654 if (lret == LOAD_SUCCESS) {
655 goto use_arch;
656 }
657 }
658
659 /* Requested binary preference was not honored */
660 error = EBADEXEC;
661 goto bad;
662 }
663
664 regular_grading:
665 /* Look up our preferred architecture in the fat file. */
666 lret = fatfile_getbestarch((vm_offset_t)fat_header,
667 PAGE_SIZE,
668 &fat_arch);
669 if (lret != LOAD_SUCCESS) {
670 error = load_return_to_errno(lret);
671 goto bad;
672 }
673
674 use_arch:
675 /* Read the Mach-O header out of fat_arch */
676 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata,
677 PAGE_SIZE, fat_arch.offset,
678 UIO_SYSSPACE, (IO_UNIT|IO_NODELOCKED),
679 cred, &resid, p);
680 if (error) {
681 goto bad;
682 }
683
684 if (resid) {
685 memset(imgp->ip_vdata + (PAGE_SIZE - resid), 0x0, resid);
686 }
687
688 /* Success. Indicate we have identified an encapsulated binary */
689 error = -2;
690 imgp->ip_arch_offset = (user_size_t)fat_arch.offset;
691 imgp->ip_arch_size = (user_size_t)fat_arch.size;
692 imgp->ip_origcputype = fat_arch.cputype;
693 imgp->ip_origcpusubtype = fat_arch.cpusubtype;
694
695 bad:
696 kauth_cred_unref(&cred);
697 return (error);
698 }
699
700 static int
701 activate_thread_state(thread_t thread, load_result_t *result)
702 {
703 int ret;
704
705 ret = thread_state_initialize(thread);
706 if (ret != KERN_SUCCESS) {
707 return ret;
708 }
709
710
711 if (result->threadstate) {
712 uint32_t *ts = result->threadstate;
713 uint32_t total_size = result->threadstate_sz;
714
715 while (total_size > 0) {
716 uint32_t flavor = *ts++;
717 uint32_t size = *ts++;
718
719 ret = thread_setstatus(thread, flavor, (thread_state_t)ts, size);
720 if (ret) {
721 return ret;
722 }
723 ts += size;
724 total_size -= (size + 2) * sizeof(uint32_t);
725 }
726 }
727
728 thread_setentrypoint(thread, result->entry_point);
729
730 return KERN_SUCCESS;
731 }
732
733
734 /*
735 * exec_mach_imgact
736 *
737 * Image activator for mach-o 1.0 binaries.
738 *
739 * Parameters; struct image_params * image parameter block
740 *
741 * Returns: -1 not a fat binary (keep looking)
742 * -2 Success: encapsulated binary: reread
743 * >0 Failure: error number
744 * EBADARCH Mach-o binary, but with an unrecognized
745 * architecture
746 * ENOMEM No memory for child process after -
747 * can only happen after vfork()
748 *
749 * Important: This image activator is NOT byte order neutral.
750 *
751 * Note: A return value other than -1 indicates subsequent image
752 * activators should not be given the opportunity to attempt
753 * to activate the image.
754 *
755 * TODO: More gracefully handle failures after vfork
756 */
757 static int
758 exec_mach_imgact(struct image_params *imgp)
759 {
760 struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
761 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
762 int error = 0;
763 task_t task;
764 task_t new_task = NULL; /* protected by vfexec */
765 thread_t thread;
766 struct uthread *uthread;
767 vm_map_t old_map = VM_MAP_NULL;
768 vm_map_t map;
769 load_return_t lret;
770 load_result_t load_result;
771 struct _posix_spawnattr *psa = NULL;
772 int spawn = (imgp->ip_flags & IMGPF_SPAWN);
773 int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
774 int p_name_len;
775
776 /*
777 * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
778 * is a reserved field on the end, so for the most part, we can
779 * treat them as if they were identical. Reverse-endian Mach-O
780 * binaries are recognized but not compatible.
781 */
782 if ((mach_header->magic == MH_CIGAM) ||
783 (mach_header->magic == MH_CIGAM_64)) {
784 error = EBADARCH;
785 goto bad;
786 }
787
788 if ((mach_header->magic != MH_MAGIC) &&
789 (mach_header->magic != MH_MAGIC_64)) {
790 error = -1;
791 goto bad;
792 }
793
794 if (mach_header->filetype != MH_EXECUTE) {
795 error = -1;
796 goto bad;
797 }
798
799 if (imgp->ip_origcputype != 0) {
800 /* Fat header previously had an idea about this thin file */
801 if (imgp->ip_origcputype != mach_header->cputype ||
802 imgp->ip_origcpusubtype != mach_header->cpusubtype) {
803 error = EBADARCH;
804 goto bad;
805 }
806 } else {
807 imgp->ip_origcputype = mach_header->cputype;
808 imgp->ip_origcpusubtype = mach_header->cpusubtype;
809 }
810
811 task = current_task();
812 thread = current_thread();
813 uthread = get_bsdthread_info(thread);
814
815 if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64)
816 imgp->ip_flags |= IMGPF_IS_64BIT;
817
818 /* If posix_spawn binprefs exist, respect those prefs. */
819 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
820 if (psa != NULL && psa->psa_binprefs[0] != 0) {
821 int pr = 0;
822 for (pr = 0; pr < NBINPREFS; pr++) {
823 cpu_type_t pref = psa->psa_binprefs[pr];
824 if (pref == 0) {
825 /* No suitable arch in the pref list */
826 error = EBADARCH;
827 goto bad;
828 }
829
830 if (pref == CPU_TYPE_ANY) {
831 /* Jump to regular grading */
832 goto grade;
833 }
834
835 if (pref == imgp->ip_origcputype) {
836 /* We have a match! */
837 goto grade;
838 }
839 }
840 error = EBADARCH;
841 goto bad;
842 }
843 grade:
844 if (!grade_binary(imgp->ip_origcputype, imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK)) {
845 error = EBADARCH;
846 goto bad;
847 }
848
849 /* Copy in arguments/environment from the old process */
850 error = exec_extract_strings(imgp);
851 if (error)
852 goto bad;
853
854 error = exec_add_apple_strings(imgp);
855 if (error)
856 goto bad;
857
858 AUDIT_ARG(argv, imgp->ip_startargv, imgp->ip_argc,
859 imgp->ip_endargv - imgp->ip_startargv);
860 AUDIT_ARG(envv, imgp->ip_endargv, imgp->ip_envc,
861 imgp->ip_endenvv - imgp->ip_endargv);
862
863 /*
864 * We are being called to activate an image subsequent to a vfork()
865 * operation; in this case, we know that our task, thread, and
866 * uthread are actually those of our parent, and our proc, which we
867 * obtained indirectly from the image_params vfs_context_t, is the
868 * new child process.
869 */
870 if (vfexec || spawn) {
871 if (vfexec) {
872 imgp->ip_new_thread = fork_create_child(task, NULL, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT));
873 if (imgp->ip_new_thread == NULL) {
874 error = ENOMEM;
875 goto bad;
876 }
877 }
878
879 /* reset local idea of thread, uthread, task */
880 thread = imgp->ip_new_thread;
881 uthread = get_bsdthread_info(thread);
882 task = new_task = get_threadtask(thread);
883 map = get_task_map(task);
884 } else {
885 map = VM_MAP_NULL;
886 }
887
888 /*
889 * We set these flags here; this is OK, since if we fail after
890 * this point, we have already destroyed the parent process anyway.
891 */
892 task_set_dyld_info(task, MACH_VM_MIN_ADDRESS, 0);
893 if (imgp->ip_flags & IMGPF_IS_64BIT) {
894 task_set_64bit(task, TRUE);
895 OSBitOrAtomic(P_LP64, &p->p_flag);
896 } else {
897 task_set_64bit(task, FALSE);
898 OSBitAndAtomic(~((uint32_t)P_LP64), &p->p_flag);
899 }
900
901 /*
902 * Load the Mach-O file.
903 *
904 * NOTE: An error after this point indicates we have potentially
905 * destroyed or overwritten some process state while attempting an
906 * execve() following a vfork(), which is an unrecoverable condition.
907 * We send the new process an immediate SIGKILL to avoid it executing
908 * any instructions in the mutated address space. For true spawns,
909 * this is not the case, and "too late" is still not too late to
910 * return an error code to the parent process.
911 */
912
913 /*
914 * Actually load the image file we previously decided to load.
915 */
916 lret = load_machfile(imgp, mach_header, thread, &map, &load_result);
917
918 if (lret != LOAD_SUCCESS) {
919 error = load_return_to_errno(lret);
920 goto badtoolate;
921 }
922
923 proc_lock(p);
924 p->p_cputype = imgp->ip_origcputype;
925 p->p_cpusubtype = imgp->ip_origcpusubtype;
926 proc_unlock(p);
927
928 vm_map_set_user_wire_limit(map, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
929
930 /*
931 * Set code-signing flags if this binary is signed, or if parent has
932 * requested them on exec.
933 */
934 if (load_result.csflags & CS_VALID) {
935 imgp->ip_csflags |= load_result.csflags &
936 (CS_VALID|
937 CS_HARD|CS_KILL|CS_RESTRICT|CS_ENFORCEMENT|CS_REQUIRE_LV|CS_DYLD_PLATFORM|
938 CS_EXEC_SET_HARD|CS_EXEC_SET_KILL|CS_EXEC_SET_ENFORCEMENT);
939 } else {
940 imgp->ip_csflags &= ~CS_VALID;
941 }
942
943 if (p->p_csflags & CS_EXEC_SET_HARD)
944 imgp->ip_csflags |= CS_HARD;
945 if (p->p_csflags & CS_EXEC_SET_KILL)
946 imgp->ip_csflags |= CS_KILL;
947 if (p->p_csflags & CS_EXEC_SET_ENFORCEMENT)
948 imgp->ip_csflags |= CS_ENFORCEMENT;
949 if (p->p_csflags & CS_EXEC_SET_INSTALLER)
950 imgp->ip_csflags |= CS_INSTALLER;
951
952 /*
953 * Set up the system reserved areas in the new address space.
954 */
955 vm_map_exec(map, task, (void *)p->p_fd->fd_rdir, cpu_type());
956
957 /*
958 * Close file descriptors which specify close-on-exec.
959 */
960 fdexec(p, psa != NULL ? psa->psa_flags : 0);
961
962 /*
963 * deal with set[ug]id.
964 */
965 error = exec_handle_sugid(imgp);
966 if (error) {
967 if (spawn || !vfexec) {
968 vm_map_deallocate(map);
969 }
970 goto badtoolate;
971 }
972
973 /*
974 * Commit to new map.
975 *
976 * Swap the new map for the old, which consumes our new map reference but
977 * each leaves us responsible for the old_map reference. That lets us get
978 * off the pmap associated with it, and then we can release it.
979 */
980 if (!vfexec) {
981 old_map = swap_task_map(task, thread, map, !spawn);
982 vm_map_deallocate(old_map);
983 }
984
985 lret = activate_thread_state(thread, &load_result);
986 if (lret != KERN_SUCCESS) {
987 goto badtoolate;
988 }
989
990 /*
991 * deal with voucher on exec-calling thread.
992 */
993 if (imgp->ip_new_thread == NULL)
994 thread_set_mach_voucher(current_thread(), IPC_VOUCHER_NULL);
995
996 /* Make sure we won't interrupt ourself signalling a partial process */
997 if (!vfexec && !spawn && (p->p_lflag & P_LTRACED))
998 psignal(p, SIGTRAP);
999
1000 if (load_result.unixproc &&
1001 create_unix_stack(get_task_map(task),
1002 &load_result,
1003 p) != KERN_SUCCESS) {
1004 error = load_return_to_errno(LOAD_NOSPACE);
1005 goto badtoolate;
1006 }
1007
1008 if (vfexec || spawn) {
1009 old_map = vm_map_switch(get_task_map(task));
1010 }
1011
1012 if (load_result.unixproc) {
1013 user_addr_t ap;
1014
1015 /*
1016 * Copy the strings area out into the new process address
1017 * space.
1018 */
1019 ap = p->user_stack;
1020 error = exec_copyout_strings(imgp, &ap);
1021 if (error) {
1022 if (vfexec || spawn)
1023 vm_map_switch(old_map);
1024 goto badtoolate;
1025 }
1026 /* Set the stack */
1027 thread_setuserstack(thread, ap);
1028 }
1029
1030 if (load_result.dynlinker) {
1031 uint64_t ap;
1032 int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
1033
1034 /* Adjust the stack */
1035 ap = thread_adjuserstack(thread, -new_ptr_size);
1036 error = copyoutptr(load_result.mach_header, ap, new_ptr_size);
1037
1038 if (error) {
1039 if (vfexec || spawn)
1040 vm_map_switch(old_map);
1041 goto badtoolate;
1042 }
1043 task_set_dyld_info(task, load_result.all_image_info_addr,
1044 load_result.all_image_info_size);
1045 }
1046
1047 /* Avoid immediate VM faults back into kernel */
1048 exec_prefault_data(p, imgp, &load_result);
1049
1050 if (vfexec || spawn) {
1051 vm_map_switch(old_map);
1052 }
1053
1054 /* Stop profiling */
1055 stopprofclock(p);
1056
1057 /*
1058 * Reset signal state.
1059 */
1060 execsigs(p, thread);
1061
1062 /*
1063 * need to cancel async IO requests that can be cancelled and wait for those
1064 * already active. MAY BLOCK!
1065 */
1066 _aio_exec( p );
1067
1068 #if SYSV_SHM
1069 /* FIXME: Till vmspace inherit is fixed: */
1070 if (!vfexec && p->vm_shm)
1071 shmexec(p);
1072 #endif
1073 #if SYSV_SEM
1074 /* Clean up the semaphores */
1075 semexit(p);
1076 #endif
1077
1078 /*
1079 * Remember file name for accounting.
1080 */
1081 p->p_acflag &= ~AFORK;
1082
1083 /*
1084 * Set p->p_comm and p->p_name to the name passed to exec
1085 */
1086 p_name_len = sizeof(p->p_name) - 1;
1087 if(imgp->ip_ndp->ni_cnd.cn_namelen > p_name_len)
1088 imgp->ip_ndp->ni_cnd.cn_namelen = p_name_len;
1089 bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_name,
1090 (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
1091 p->p_name[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
1092
1093 if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN)
1094 imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
1095 bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
1096 (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
1097 p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
1098
1099 pal_dbg_set_task_name( p->task );
1100
1101 #if DEVELOPMENT || DEBUG
1102 /*
1103 * Update the pid an proc name for importance base if any
1104 */
1105 task_importance_update_owner_info(p->task);
1106 #endif
1107
1108 memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid));
1109
1110 // <rdar://6598155> dtrace code cleanup needed
1111 #if CONFIG_DTRACE
1112 /*
1113 * Invalidate any predicate evaluation already cached for this thread by DTrace.
1114 * That's because we've just stored to p_comm and DTrace refers to that when it
1115 * evaluates the "execname" special variable. uid and gid may have changed as well.
1116 */
1117 dtrace_set_thread_predcache(current_thread(), 0);
1118
1119 /*
1120 * Free any outstanding lazy dof entries. It is imperative we
1121 * always call dtrace_lazy_dofs_destroy, rather than null check
1122 * and call if !NULL. If we NULL test, during lazy dof faulting
1123 * we can race with the faulting code and proceed from here to
1124 * beyond the helpers cleanup. The lazy dof faulting will then
1125 * install new helpers which no longer belong to this process!
1126 */
1127 dtrace_lazy_dofs_destroy(p);
1128
1129
1130 /*
1131 * Clean up any DTrace helpers for the process.
1132 */
1133 if (p->p_dtrace_helpers != NULL && dtrace_helpers_cleanup) {
1134 (*dtrace_helpers_cleanup)(p);
1135 }
1136
1137 /*
1138 * Cleanup the DTrace provider associated with this process.
1139 */
1140 proc_lock(p);
1141 if (p->p_dtrace_probes && dtrace_fasttrap_exec_ptr) {
1142 (*dtrace_fasttrap_exec_ptr)(p);
1143 }
1144 proc_unlock(p);
1145 #endif
1146
1147 if (kdebug_enable) {
1148 long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
1149
1150 /*
1151 * Collect the pathname for tracing
1152 */
1153 kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
1154
1155 if (vfexec || spawn) {
1156 KERNEL_DEBUG_CONSTANT1(TRACE_DATA_EXEC | DBG_FUNC_NONE,
1157 p->p_pid ,0,0,0, (uintptr_t)thread_tid(thread));
1158 KERNEL_DEBUG_CONSTANT1(TRACE_STRING_EXEC | DBG_FUNC_NONE,
1159 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (uintptr_t)thread_tid(thread));
1160 } else {
1161 KERNEL_DEBUG_CONSTANT(TRACE_DATA_EXEC | DBG_FUNC_NONE,
1162 p->p_pid ,0,0,0,0);
1163 KERNEL_DEBUG_CONSTANT(TRACE_STRING_EXEC | DBG_FUNC_NONE,
1164 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
1165 }
1166 }
1167
1168 /*
1169 * If posix_spawned with the START_SUSPENDED flag, stop the
1170 * process before it runs.
1171 */
1172 if (imgp->ip_px_sa != NULL) {
1173 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
1174 if (psa->psa_flags & POSIX_SPAWN_START_SUSPENDED) {
1175 proc_lock(p);
1176 p->p_stat = SSTOP;
1177 proc_unlock(p);
1178 (void) task_suspend_internal(p->task);
1179 }
1180 }
1181
1182 /*
1183 * mark as execed, wakeup the process that vforked (if any) and tell
1184 * it that it now has its own resources back
1185 */
1186 OSBitOrAtomic(P_EXEC, &p->p_flag);
1187 proc_resetregister(p);
1188 if (p->p_pptr && (p->p_lflag & P_LPPWAIT)) {
1189 proc_lock(p);
1190 p->p_lflag &= ~P_LPPWAIT;
1191 proc_unlock(p);
1192 wakeup((caddr_t)p->p_pptr);
1193 }
1194
1195 /*
1196 * Pay for our earlier safety; deliver the delayed signals from
1197 * the incomplete vfexec process now that it's complete.
1198 */
1199 if (vfexec && (p->p_lflag & P_LTRACED)) {
1200 psignal_vfork(p, new_task, thread, SIGTRAP);
1201 }
1202
1203 goto done;
1204
1205 badtoolate:
1206 /* Don't allow child process to execute any instructions */
1207 if (!spawn) {
1208 if (vfexec) {
1209 psignal_vfork(p, new_task, thread, SIGKILL);
1210 } else {
1211 psignal(p, SIGKILL);
1212 }
1213
1214 /* We can't stop this system call at this point, so just pretend we succeeded */
1215 error = 0;
1216 }
1217
1218 done:
1219 if (!spawn) {
1220 /* notify only if it has not failed due to FP Key error */
1221 if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)
1222 proc_knote(p, NOTE_EXEC);
1223 }
1224
1225 /* Drop extra references for cases where we don't expect the caller to clean up */
1226 if (vfexec || (spawn && error == 0)) {
1227 task_deallocate(new_task);
1228 thread_deallocate(thread);
1229 }
1230
1231 if (load_result.threadstate) {
1232 kfree(load_result.threadstate, load_result.threadstate_sz);
1233 load_result.threadstate = NULL;
1234 }
1235
1236 bad:
1237 return(error);
1238 }
1239
1240
1241
1242
1243 /*
1244 * Our image activator table; this is the table of the image types we are
1245 * capable of loading. We list them in order of preference to ensure the
1246 * fastest image load speed.
1247 *
1248 * XXX hardcoded, for now; should use linker sets
1249 */
1250 struct execsw {
1251 int (*ex_imgact)(struct image_params *);
1252 const char *ex_name;
1253 } execsw[] = {
1254 { exec_mach_imgact, "Mach-o Binary" },
1255 { exec_fat_imgact, "Fat Binary" },
1256 { exec_shell_imgact, "Interpreter Script" },
1257 { NULL, NULL}
1258 };
1259
1260
1261 /*
1262 * exec_activate_image
1263 *
1264 * Description: Iterate through the available image activators, and activate
1265 * the image associated with the imgp structure. We start with
1266 * the
1267 *
1268 * Parameters: struct image_params * Image parameter block
1269 *
1270 * Returns: 0 Success
1271 * EBADEXEC The executable is corrupt/unknown
1272 * execargs_alloc:EINVAL Invalid argument
1273 * execargs_alloc:EACCES Permission denied
1274 * execargs_alloc:EINTR Interrupted function
1275 * execargs_alloc:ENOMEM Not enough space
1276 * exec_save_path:EFAULT Bad address
1277 * exec_save_path:ENAMETOOLONG Filename too long
1278 * exec_check_permissions:EACCES Permission denied
1279 * exec_check_permissions:ENOEXEC Executable file format error
1280 * exec_check_permissions:ETXTBSY Text file busy [misuse of error code]
1281 * exec_check_permissions:???
1282 * namei:???
1283 * vn_rdwr:??? [anything vn_rdwr can return]
1284 * <ex_imgact>:??? [anything an imgact can return]
1285 * EDEADLK Process is being terminated
1286 */
1287 static int
1288 exec_activate_image(struct image_params *imgp)
1289 {
1290 struct nameidata *ndp = NULL;
1291 const char *excpath;
1292 int error;
1293 int resid;
1294 int once = 1; /* save SGUID-ness for interpreted files */
1295 int i;
1296 int itercount = 0;
1297 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1298
1299 error = execargs_alloc(imgp);
1300 if (error)
1301 goto bad_notrans;
1302
1303 error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg, &excpath);
1304 if (error) {
1305 goto bad_notrans;
1306 }
1307
1308 /* Use excpath, which contains the copyin-ed exec path */
1309 DTRACE_PROC1(exec, uintptr_t, excpath);
1310
1311 MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
1312 if (ndp == NULL) {
1313 error = ENOMEM;
1314 goto bad_notrans;
1315 }
1316
1317 NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
1318 UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
1319
1320 again:
1321 error = namei(ndp);
1322 if (error)
1323 goto bad_notrans;
1324 imgp->ip_ndp = ndp; /* successful namei(); call nameidone() later */
1325 imgp->ip_vp = ndp->ni_vp; /* if set, need to vnode_put() at some point */
1326
1327 /*
1328 * Before we start the transition from binary A to binary B, make
1329 * sure another thread hasn't started exiting the process. We grab
1330 * the proc lock to check p_lflag initially, and the transition
1331 * mechanism ensures that the value doesn't change after we release
1332 * the lock.
1333 */
1334 proc_lock(p);
1335 if (p->p_lflag & P_LEXIT) {
1336 error = EDEADLK;
1337 proc_unlock(p);
1338 goto bad_notrans;
1339 }
1340 error = proc_transstart(p, 1, 0);
1341 proc_unlock(p);
1342 if (error)
1343 goto bad_notrans;
1344
1345 error = exec_check_permissions(imgp);
1346 if (error)
1347 goto bad;
1348
1349 /* Copy; avoid invocation of an interpreter overwriting the original */
1350 if (once) {
1351 once = 0;
1352 *imgp->ip_origvattr = *imgp->ip_vattr;
1353 }
1354
1355 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, 0,
1356 UIO_SYSSPACE, IO_NODELOCKED,
1357 vfs_context_ucred(imgp->ip_vfs_context),
1358 &resid, vfs_context_proc(imgp->ip_vfs_context));
1359 if (error)
1360 goto bad;
1361
1362 if (resid) {
1363 memset(imgp->ip_vdata + (PAGE_SIZE - resid), 0x0, resid);
1364 }
1365
1366 encapsulated_binary:
1367 /* Limit the number of iterations we will attempt on each binary */
1368 if (++itercount > EAI_ITERLIMIT) {
1369 error = EBADEXEC;
1370 goto bad;
1371 }
1372 error = -1;
1373 for(i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) {
1374
1375 error = (*execsw[i].ex_imgact)(imgp);
1376
1377 switch (error) {
1378 /* case -1: not claimed: continue */
1379 case -2: /* Encapsulated binary, imgp->ip_XXX set for next iteration */
1380 goto encapsulated_binary;
1381
1382 case -3: /* Interpreter */
1383 #if CONFIG_MACF
1384 /*
1385 * Copy the script label for later use. Note that
1386 * the label can be different when the script is
1387 * actually read by the interpreter.
1388 */
1389 if (imgp->ip_scriptlabelp)
1390 mac_vnode_label_free(imgp->ip_scriptlabelp);
1391 imgp->ip_scriptlabelp = mac_vnode_label_alloc();
1392 if (imgp->ip_scriptlabelp == NULL) {
1393 error = ENOMEM;
1394 break;
1395 }
1396 mac_vnode_label_copy(imgp->ip_vp->v_label,
1397 imgp->ip_scriptlabelp);
1398
1399 /*
1400 * Take a ref of the script vnode for later use.
1401 */
1402 if (imgp->ip_scriptvp)
1403 vnode_put(imgp->ip_scriptvp);
1404 if (vnode_getwithref(imgp->ip_vp) == 0)
1405 imgp->ip_scriptvp = imgp->ip_vp;
1406 #endif
1407
1408 nameidone(ndp);
1409
1410 vnode_put(imgp->ip_vp);
1411 imgp->ip_vp = NULL; /* already put */
1412 imgp->ip_ndp = NULL; /* already nameidone */
1413
1414 /* Use excpath, which exec_shell_imgact reset to the interpreter */
1415 NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF,
1416 UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
1417
1418 proc_transend(p, 0);
1419 goto again;
1420
1421 default:
1422 break;
1423 }
1424 }
1425
1426 /*
1427 * Call out to allow 3rd party notification of exec.
1428 * Ignore result of kauth_authorize_fileop call.
1429 */
1430 if (error == 0 && kauth_authorize_fileop_has_listeners()) {
1431 kauth_authorize_fileop(vfs_context_ucred(imgp->ip_vfs_context),
1432 KAUTH_FILEOP_EXEC,
1433 (uintptr_t)ndp->ni_vp, 0);
1434 }
1435
1436 if (error == 0) {
1437 /*
1438 * Reset atm context from task
1439 */
1440 task_atm_reset(p->task);
1441
1442 /*
1443 * Reset old bank context from task
1444 */
1445 task_bank_reset(p->task);
1446 }
1447 bad:
1448 proc_transend(p, 0);
1449
1450 bad_notrans:
1451 if (imgp->ip_strings)
1452 execargs_free(imgp);
1453 if (imgp->ip_ndp)
1454 nameidone(imgp->ip_ndp);
1455 if (ndp)
1456 FREE(ndp, M_TEMP);
1457
1458 return (error);
1459 }
1460
1461
1462 /*
1463 * exec_handle_spawnattr_policy
1464 *
1465 * Description: Decode and apply the posix_spawn apptype, qos clamp, and watchport ports to the task.
1466 *
1467 * Parameters: proc_t p process to apply attributes to
1468 * int psa_apptype posix spawn attribute apptype
1469 *
1470 * Returns: 0 Success
1471 */
1472 static errno_t
1473 exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, uint64_t psa_darwin_role,
1474 ipc_port_t * portwatch_ports, int portwatch_count)
1475 {
1476 int apptype = TASK_APPTYPE_NONE;
1477 int qos_clamp = THREAD_QOS_UNSPECIFIED;
1478 int role = TASK_UNSPECIFIED;
1479
1480 if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) {
1481 int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK;
1482
1483 switch(proctype) {
1484 case POSIX_SPAWN_PROC_TYPE_DAEMON_INTERACTIVE:
1485 apptype = TASK_APPTYPE_DAEMON_INTERACTIVE;
1486 break;
1487 case POSIX_SPAWN_PROC_TYPE_DAEMON_STANDARD:
1488 apptype = TASK_APPTYPE_DAEMON_STANDARD;
1489 break;
1490 case POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE:
1491 apptype = TASK_APPTYPE_DAEMON_ADAPTIVE;
1492 break;
1493 case POSIX_SPAWN_PROC_TYPE_DAEMON_BACKGROUND:
1494 apptype = TASK_APPTYPE_DAEMON_BACKGROUND;
1495 break;
1496 case POSIX_SPAWN_PROC_TYPE_APP_DEFAULT:
1497 apptype = TASK_APPTYPE_APP_DEFAULT;
1498 break;
1499 case POSIX_SPAWN_PROC_TYPE_APP_TAL:
1500 apptype = TASK_APPTYPE_APP_TAL;
1501 break;
1502 default:
1503 apptype = TASK_APPTYPE_NONE;
1504 /* TODO: Should an invalid value here fail the spawn? */
1505 break;
1506 }
1507 }
1508
1509 if (psa_qos_clamp != POSIX_SPAWN_PROC_CLAMP_NONE) {
1510 switch (psa_qos_clamp) {
1511 case POSIX_SPAWN_PROC_CLAMP_UTILITY:
1512 qos_clamp = THREAD_QOS_UTILITY;
1513 break;
1514 case POSIX_SPAWN_PROC_CLAMP_BACKGROUND:
1515 qos_clamp = THREAD_QOS_BACKGROUND;
1516 break;
1517 case POSIX_SPAWN_PROC_CLAMP_MAINTENANCE:
1518 qos_clamp = THREAD_QOS_MAINTENANCE;
1519 break;
1520 default:
1521 qos_clamp = THREAD_QOS_UNSPECIFIED;
1522 /* TODO: Should an invalid value here fail the spawn? */
1523 break;
1524 }
1525 }
1526
1527 if (psa_darwin_role != PRIO_DARWIN_ROLE_DEFAULT) {
1528 proc_darwin_role_to_task_role(psa_darwin_role, &role);
1529 }
1530
1531 if (apptype != TASK_APPTYPE_NONE ||
1532 qos_clamp != THREAD_QOS_UNSPECIFIED ||
1533 role != TASK_UNSPECIFIED) {
1534 proc_set_task_spawnpolicy(p->task, apptype, qos_clamp, role,
1535 portwatch_ports, portwatch_count);
1536 }
1537
1538 return (0);
1539 }
1540
1541
1542 /*
1543 * exec_handle_port_actions
1544 *
1545 * Description: Go through the _posix_port_actions_t contents,
1546 * calling task_set_special_port, task_set_exception_ports
1547 * and/or audit_session_spawnjoin for the current task.
1548 *
1549 * Parameters: struct image_params * Image parameter block
1550 * short psa_flags posix spawn attribute flags
1551 *
1552 * Returns: 0 Success
1553 * EINVAL Failure
1554 * ENOTSUP Illegal posix_spawn attr flag was set
1555 */
1556 static errno_t
1557 exec_handle_port_actions(struct image_params *imgp, short psa_flags, boolean_t * portwatch_present, ipc_port_t * portwatch_ports)
1558 {
1559 _posix_spawn_port_actions_t pacts = imgp->ip_px_spa;
1560 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1561 _ps_port_action_t *act = NULL;
1562 task_t task = p->task;
1563 ipc_port_t port = NULL;
1564 errno_t ret = 0;
1565 int i;
1566
1567 *portwatch_present = FALSE;
1568
1569 for (i = 0; i < pacts->pspa_count; i++) {
1570 act = &pacts->pspa_actions[i];
1571
1572 if (ipc_object_copyin(get_task_ipcspace(current_task()),
1573 act->new_port, MACH_MSG_TYPE_COPY_SEND,
1574 (ipc_object_t *) &port) != KERN_SUCCESS) {
1575 ret = EINVAL;
1576 goto done;
1577 }
1578
1579 switch (act->port_type) {
1580 case PSPA_SPECIAL:
1581 /* Only allowed when not under vfork */
1582 if (!(psa_flags & POSIX_SPAWN_SETEXEC))
1583 ret = ENOTSUP;
1584 else if (task_set_special_port(task,
1585 act->which, port) != KERN_SUCCESS)
1586 ret = EINVAL;
1587 break;
1588
1589 case PSPA_EXCEPTION:
1590 /* Only allowed when not under vfork */
1591 if (!(psa_flags & POSIX_SPAWN_SETEXEC))
1592 ret = ENOTSUP;
1593 else if (task_set_exception_ports(task,
1594 act->mask, port, act->behavior,
1595 act->flavor) != KERN_SUCCESS)
1596 ret = EINVAL;
1597 break;
1598 #if CONFIG_AUDIT
1599 case PSPA_AU_SESSION:
1600 ret = audit_session_spawnjoin(p, port);
1601 break;
1602 #endif
1603 case PSPA_IMP_WATCHPORTS:
1604 if (portwatch_ports != NULL) {
1605 *portwatch_present = TRUE;
1606 /* hold on to this till end of spawn */
1607 portwatch_ports[i] = port;
1608 ret = 0;
1609 } else
1610 ipc_port_release_send(port);
1611 break;
1612 default:
1613 ret = EINVAL;
1614 break;
1615 }
1616
1617 /* action failed, so release port resources */
1618
1619 if (ret) {
1620 ipc_port_release_send(port);
1621 break;
1622 }
1623 }
1624
1625 done:
1626 if (0 != ret)
1627 DTRACE_PROC1(spawn__port__failure, mach_port_name_t, act->new_port);
1628 return (ret);
1629 }
1630
1631 /*
1632 * exec_handle_file_actions
1633 *
1634 * Description: Go through the _posix_file_actions_t contents applying the
1635 * open, close, and dup2 operations to the open file table for
1636 * the current process.
1637 *
1638 * Parameters: struct image_params * Image parameter block
1639 *
1640 * Returns: 0 Success
1641 * ???
1642 *
1643 * Note: Actions are applied in the order specified, with the credential
1644 * of the parent process. This is done to permit the parent
1645 * process to utilize POSIX_SPAWN_RESETIDS to drop privilege in
1646 * the child following operations the child may in fact not be
1647 * normally permitted to perform.
1648 */
1649 static int
1650 exec_handle_file_actions(struct image_params *imgp, short psa_flags)
1651 {
1652 int error = 0;
1653 int action;
1654 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1655 _posix_spawn_file_actions_t px_sfap = imgp->ip_px_sfa;
1656 int ival[2]; /* dummy retval for system calls) */
1657
1658 for (action = 0; action < px_sfap->psfa_act_count; action++) {
1659 _psfa_action_t *psfa = &px_sfap->psfa_act_acts[ action];
1660
1661 switch(psfa->psfaa_type) {
1662 case PSFA_OPEN: {
1663 /*
1664 * Open is different, in that it requires the use of
1665 * a path argument, which is normally copied in from
1666 * user space; because of this, we have to support an
1667 * open from kernel space that passes an address space
1668 * context of UIO_SYSSPACE, and casts the address
1669 * argument to a user_addr_t.
1670 */
1671 char *bufp = NULL;
1672 struct vnode_attr *vap;
1673 struct nameidata *ndp;
1674 int mode = psfa->psfaa_openargs.psfao_mode;
1675 struct dup2_args dup2a;
1676 struct close_nocancel_args ca;
1677 int origfd;
1678
1679 MALLOC(bufp, char *, sizeof(*vap) + sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
1680 if (bufp == NULL) {
1681 error = ENOMEM;
1682 break;
1683 }
1684
1685 vap = (struct vnode_attr *) bufp;
1686 ndp = (struct nameidata *) (bufp + sizeof(*vap));
1687
1688 VATTR_INIT(vap);
1689 /* Mask off all but regular access permissions */
1690 mode = ((mode &~ p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT;
1691 VATTR_SET(vap, va_mode, mode & ACCESSPERMS);
1692
1693 NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
1694 CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path),
1695 imgp->ip_vfs_context);
1696
1697 error = open1(imgp->ip_vfs_context,
1698 ndp,
1699 psfa->psfaa_openargs.psfao_oflag,
1700 vap,
1701 fileproc_alloc_init, NULL,
1702 ival);
1703
1704 FREE(bufp, M_TEMP);
1705
1706 /*
1707 * If there's an error, or we get the right fd by
1708 * accident, then drop out here. This is easier than
1709 * reworking all the open code to preallocate fd
1710 * slots, and internally taking one as an argument.
1711 */
1712 if (error || ival[0] == psfa->psfaa_filedes)
1713 break;
1714
1715 origfd = ival[0];
1716 /*
1717 * If we didn't fall out from an error, we ended up
1718 * with the wrong fd; so now we've got to try to dup2
1719 * it to the right one.
1720 */
1721 dup2a.from = origfd;
1722 dup2a.to = psfa->psfaa_filedes;
1723
1724 /*
1725 * The dup2() system call implementation sets
1726 * ival to newfd in the success case, but we
1727 * can ignore that, since if we didn't get the
1728 * fd we wanted, the error will stop us.
1729 */
1730 error = dup2(p, &dup2a, ival);
1731 if (error)
1732 break;
1733
1734 /*
1735 * Finally, close the original fd.
1736 */
1737 ca.fd = origfd;
1738
1739 error = close_nocancel(p, &ca, ival);
1740 }
1741 break;
1742
1743 case PSFA_DUP2: {
1744 struct dup2_args dup2a;
1745
1746 dup2a.from = psfa->psfaa_filedes;
1747 dup2a.to = psfa->psfaa_openargs.psfao_oflag;
1748
1749 /*
1750 * The dup2() system call implementation sets
1751 * ival to newfd in the success case, but we
1752 * can ignore that, since if we didn't get the
1753 * fd we wanted, the error will stop us.
1754 */
1755 error = dup2(p, &dup2a, ival);
1756 }
1757 break;
1758
1759 case PSFA_CLOSE: {
1760 struct close_nocancel_args ca;
1761
1762 ca.fd = psfa->psfaa_filedes;
1763
1764 error = close_nocancel(p, &ca, ival);
1765 }
1766 break;
1767
1768 case PSFA_INHERIT: {
1769 struct fcntl_nocancel_args fcntla;
1770
1771 /*
1772 * Check to see if the descriptor exists, and
1773 * ensure it's -not- marked as close-on-exec.
1774 *
1775 * Attempting to "inherit" a guarded fd will
1776 * result in a error.
1777 */
1778 fcntla.fd = psfa->psfaa_filedes;
1779 fcntla.cmd = F_GETFD;
1780 if ((error = fcntl_nocancel(p, &fcntla, ival)) != 0)
1781 break;
1782
1783 if ((ival[0] & FD_CLOEXEC) == FD_CLOEXEC) {
1784 fcntla.fd = psfa->psfaa_filedes;
1785 fcntla.cmd = F_SETFD;
1786 fcntla.arg = ival[0] & ~FD_CLOEXEC;
1787 error = fcntl_nocancel(p, &fcntla, ival);
1788 }
1789
1790 }
1791 break;
1792
1793 default:
1794 error = EINVAL;
1795 break;
1796 }
1797
1798 /* All file actions failures are considered fatal, per POSIX */
1799
1800 if (error) {
1801 if (PSFA_OPEN == psfa->psfaa_type) {
1802 DTRACE_PROC1(spawn__open__failure, uintptr_t,
1803 psfa->psfaa_openargs.psfao_path);
1804 } else {
1805 DTRACE_PROC1(spawn__fd__failure, int, psfa->psfaa_filedes);
1806 }
1807 break;
1808 }
1809 }
1810
1811 if (error != 0 || (psa_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) == 0)
1812 return (error);
1813
1814 /*
1815 * If POSIX_SPAWN_CLOEXEC_DEFAULT is set, behave (during
1816 * this spawn only) as if "close on exec" is the default
1817 * disposition of all pre-existing file descriptors. In this case,
1818 * the list of file descriptors mentioned in the file actions
1819 * are the only ones that can be inherited, so mark them now.
1820 *
1821 * The actual closing part comes later, in fdexec().
1822 */
1823 proc_fdlock(p);
1824 for (action = 0; action < px_sfap->psfa_act_count; action++) {
1825 _psfa_action_t *psfa = &px_sfap->psfa_act_acts[action];
1826 int fd = psfa->psfaa_filedes;
1827
1828 switch (psfa->psfaa_type) {
1829 case PSFA_DUP2:
1830 fd = psfa->psfaa_openargs.psfao_oflag;
1831 /*FALLTHROUGH*/
1832 case PSFA_OPEN:
1833 case PSFA_INHERIT:
1834 *fdflags(p, fd) |= UF_INHERIT;
1835 break;
1836
1837 case PSFA_CLOSE:
1838 break;
1839 }
1840 }
1841 proc_fdunlock(p);
1842
1843 return (0);
1844 }
1845
1846 #if CONFIG_MACF
1847 /*
1848 * exec_spawnattr_getmacpolicyinfo
1849 */
1850 void *
1851 exec_spawnattr_getmacpolicyinfo(const void *macextensions, const char *policyname, size_t *lenp)
1852 {
1853 const struct _posix_spawn_mac_policy_extensions *psmx = macextensions;
1854 int i;
1855
1856 if (psmx == NULL)
1857 return NULL;
1858
1859 for (i = 0; i < psmx->psmx_count; i++) {
1860 const _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
1861 if (strncmp(extension->policyname, policyname, sizeof(extension->policyname)) == 0) {
1862 if (lenp != NULL)
1863 *lenp = extension->datalen;
1864 return extension->datap;
1865 }
1866 }
1867
1868 if (lenp != NULL)
1869 *lenp = 0;
1870 return NULL;
1871 }
1872
1873 static int
1874 spawn_copyin_macpolicyinfo(const struct user__posix_spawn_args_desc *px_args, _posix_spawn_mac_policy_extensions_t *psmxp)
1875 {
1876 _posix_spawn_mac_policy_extensions_t psmx = NULL;
1877 int error = 0;
1878 int copycnt = 0;
1879 int i = 0;
1880
1881 *psmxp = NULL;
1882
1883 if (px_args->mac_extensions_size < PS_MAC_EXTENSIONS_SIZE(1) ||
1884 px_args->mac_extensions_size > PAGE_SIZE) {
1885 error = EINVAL;
1886 goto bad;
1887 }
1888
1889 MALLOC(psmx, _posix_spawn_mac_policy_extensions_t, px_args->mac_extensions_size, M_TEMP, M_WAITOK);
1890 if ((error = copyin(px_args->mac_extensions, psmx, px_args->mac_extensions_size)) != 0)
1891 goto bad;
1892
1893 if (PS_MAC_EXTENSIONS_SIZE(psmx->psmx_count) > px_args->mac_extensions_size) {
1894 error = EINVAL;
1895 goto bad;
1896 }
1897
1898 for (i = 0; i < psmx->psmx_count; i++) {
1899 _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
1900 if (extension->datalen == 0 || extension->datalen > PAGE_SIZE) {
1901 error = EINVAL;
1902 goto bad;
1903 }
1904 }
1905
1906 for (copycnt = 0; copycnt < psmx->psmx_count; copycnt++) {
1907 _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[copycnt];
1908 void *data = NULL;
1909
1910 MALLOC(data, void *, extension->datalen, M_TEMP, M_WAITOK);
1911 if ((error = copyin(extension->data, data, extension->datalen)) != 0) {
1912 FREE(data, M_TEMP);
1913 goto bad;
1914 }
1915 extension->datap = data;
1916 }
1917
1918 *psmxp = psmx;
1919 return 0;
1920
1921 bad:
1922 if (psmx != NULL) {
1923 for (i = 0; i < copycnt; i++)
1924 FREE(psmx->psmx_extensions[i].datap, M_TEMP);
1925 FREE(psmx, M_TEMP);
1926 }
1927 return error;
1928 }
1929
1930 static void
1931 spawn_free_macpolicyinfo(_posix_spawn_mac_policy_extensions_t psmx)
1932 {
1933 int i;
1934
1935 if (psmx == NULL)
1936 return;
1937 for (i = 0; i < psmx->psmx_count; i++)
1938 FREE(psmx->psmx_extensions[i].datap, M_TEMP);
1939 FREE(psmx, M_TEMP);
1940 }
1941 #endif /* CONFIG_MACF */
1942
1943 #if CONFIG_COALITIONS
1944 static inline void spawn_coalitions_release_all(coalition_t coal[COALITION_NUM_TYPES])
1945 {
1946 for (int c = 0; c < COALITION_NUM_TYPES; c++) {
1947 if (coal[c]) {
1948 coalition_remove_active(coal[c]);
1949 coalition_release(coal[c]);
1950 }
1951 }
1952 }
1953 #endif
1954
1955 #if CONFIG_PERSONAS
1956 static int spawn_validate_persona(struct _posix_spawn_persona_info *px_persona)
1957 {
1958 int error = 0;
1959 struct persona *persona = NULL;
1960 int verify = px_persona->pspi_flags & POSIX_SPAWN_PERSONA_FLAGS_VERIFY;
1961
1962 /*
1963 * TODO: rdar://problem/19981151
1964 * Add entitlement check!
1965 */
1966 if (!kauth_cred_issuser(kauth_cred_get()))
1967 return EPERM;
1968
1969 persona = persona_lookup(px_persona->pspi_id);
1970 if (!persona) {
1971 error = ESRCH;
1972 goto out;
1973 }
1974
1975 if (verify) {
1976 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_UID) {
1977 if (px_persona->pspi_uid != persona_get_uid(persona)) {
1978 error = EINVAL;
1979 goto out;
1980 }
1981 }
1982 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GID) {
1983 if (px_persona->pspi_gid != persona_get_gid(persona)) {
1984 error = EINVAL;
1985 goto out;
1986 }
1987 }
1988 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) {
1989 int ngroups = 0;
1990 gid_t groups[NGROUPS_MAX];
1991
1992 if (persona_get_groups(persona, &ngroups, groups,
1993 px_persona->pspi_ngroups) != 0) {
1994 error = EINVAL;
1995 goto out;
1996 }
1997 if (ngroups != (int)px_persona->pspi_ngroups) {
1998 error = EINVAL;
1999 goto out;
2000 }
2001 while (ngroups--) {
2002 if (px_persona->pspi_groups[ngroups] != groups[ngroups]) {
2003 error = EINVAL;
2004 goto out;
2005 }
2006 }
2007 if (px_persona->pspi_gmuid != persona_get_gmuid(persona)) {
2008 error = EINVAL;
2009 goto out;
2010 }
2011 }
2012 }
2013
2014 out:
2015 if (persona)
2016 persona_put(persona);
2017
2018 return error;
2019 }
2020
2021 static int spawn_persona_adopt(proc_t p, struct _posix_spawn_persona_info *px_persona)
2022 {
2023 int ret;
2024 kauth_cred_t cred;
2025 struct persona *persona = NULL;
2026 int override = !!(px_persona->pspi_flags & POSIX_SPAWN_PERSONA_FLAGS_OVERRIDE);
2027
2028 if (!override)
2029 return persona_proc_adopt_id(p, px_persona->pspi_id, NULL);
2030
2031 /*
2032 * we want to spawn into the given persona, but we want to override
2033 * the kauth with a different UID/GID combo
2034 */
2035 persona = persona_lookup(px_persona->pspi_id);
2036 if (!persona)
2037 return ESRCH;
2038
2039 cred = persona_get_cred(persona);
2040 if (!cred) {
2041 ret = EINVAL;
2042 goto out;
2043 }
2044
2045 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_UID) {
2046 cred = kauth_cred_setresuid(cred,
2047 px_persona->pspi_uid,
2048 px_persona->pspi_uid,
2049 px_persona->pspi_uid,
2050 KAUTH_UID_NONE);
2051 }
2052
2053 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GID) {
2054 cred = kauth_cred_setresgid(cred,
2055 px_persona->pspi_gid,
2056 px_persona->pspi_gid,
2057 px_persona->pspi_gid);
2058 }
2059
2060 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) {
2061 cred = kauth_cred_setgroups(cred,
2062 px_persona->pspi_groups,
2063 px_persona->pspi_ngroups,
2064 px_persona->pspi_gmuid);
2065 }
2066
2067 ret = persona_proc_adopt(p, persona, cred);
2068
2069 out:
2070 persona_put(persona);
2071 return ret;
2072 }
2073 #endif
2074
2075 void
2076 proc_set_return_wait(proc_t p)
2077 {
2078 proc_lock(p);
2079 p->p_lflag |= P_LRETURNWAIT;
2080 proc_unlock(p);
2081 }
2082
2083 void
2084 proc_clear_return_wait(proc_t p, thread_t child_thread)
2085 {
2086 proc_lock(p);
2087
2088 p->p_lflag &= ~P_LRETURNWAIT;
2089 if (p->p_lflag & P_LRETURNWAITER) {
2090 wakeup(&p->p_lflag);
2091 }
2092
2093 proc_unlock(p);
2094
2095 (void)thread_resume(child_thread);
2096 }
2097
2098 void
2099 proc_wait_to_return()
2100 {
2101 proc_t p;
2102
2103 p = current_proc();
2104 proc_lock(p);
2105
2106 if (p->p_lflag & P_LRETURNWAIT) {
2107 p->p_lflag |= P_LRETURNWAITER;
2108 do {
2109 msleep(&p->p_lflag, &p->p_mlock, 0,
2110 "thread_check_setup_complete", NULL);
2111 } while (p->p_lflag & P_LRETURNWAIT);
2112 p->p_lflag &= ~P_LRETURNWAITER;
2113 }
2114
2115 proc_unlock(p);
2116 thread_bootstrap_return();
2117 }
2118
2119 /*
2120 * posix_spawn
2121 *
2122 * Parameters: uap->pid Pointer to pid return area
2123 * uap->fname File name to exec
2124 * uap->argp Argument list
2125 * uap->envp Environment list
2126 *
2127 * Returns: 0 Success
2128 * EINVAL Invalid argument
2129 * ENOTSUP Not supported
2130 * ENOEXEC Executable file format error
2131 * exec_activate_image:EINVAL Invalid argument
2132 * exec_activate_image:EACCES Permission denied
2133 * exec_activate_image:EINTR Interrupted function
2134 * exec_activate_image:ENOMEM Not enough space
2135 * exec_activate_image:EFAULT Bad address
2136 * exec_activate_image:ENAMETOOLONG Filename too long
2137 * exec_activate_image:ENOEXEC Executable file format error
2138 * exec_activate_image:ETXTBSY Text file busy [misuse of error code]
2139 * exec_activate_image:EBADEXEC The executable is corrupt/unknown
2140 * exec_activate_image:???
2141 * mac_execve_enter:???
2142 *
2143 * TODO: Expect to need __mac_posix_spawn() at some point...
2144 * Handle posix_spawnattr_t
2145 * Handle posix_spawn_file_actions_t
2146 */
2147 int
2148 posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
2149 {
2150 proc_t p = ap; /* quiet bogus GCC vfork() warning */
2151 user_addr_t pid = uap->pid;
2152 int ival[2]; /* dummy retval for setpgid() */
2153 char *bufp = NULL;
2154 struct image_params *imgp;
2155 struct vnode_attr *vap;
2156 struct vnode_attr *origvap;
2157 struct uthread *uthread = 0; /* compiler complains if not set to 0*/
2158 int error, sig;
2159 int is_64 = IS_64BIT_PROCESS(p);
2160 struct vfs_context context;
2161 struct user__posix_spawn_args_desc px_args;
2162 struct _posix_spawnattr px_sa;
2163 _posix_spawn_file_actions_t px_sfap = NULL;
2164 _posix_spawn_port_actions_t px_spap = NULL;
2165 struct __kern_sigaction vec;
2166 boolean_t spawn_no_exec = FALSE;
2167 boolean_t proc_transit_set = TRUE;
2168 boolean_t exec_done = FALSE;
2169 int portwatch_count = 0;
2170 ipc_port_t * portwatch_ports = NULL;
2171 vm_size_t px_sa_offset = offsetof(struct _posix_spawnattr, psa_ports);
2172 #if CONFIG_PERSONAS
2173 struct _posix_spawn_persona_info *px_persona = NULL;
2174 #endif
2175
2176 /*
2177 * Allocate a big chunk for locals instead of using stack since these
2178 * structures are pretty big.
2179 */
2180 MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
2181 imgp = (struct image_params *) bufp;
2182 if (bufp == NULL) {
2183 error = ENOMEM;
2184 goto bad;
2185 }
2186 vap = (struct vnode_attr *) (bufp + sizeof(*imgp));
2187 origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap));
2188
2189 /* Initialize the common data in the image_params structure */
2190 imgp->ip_user_fname = uap->path;
2191 imgp->ip_user_argv = uap->argv;
2192 imgp->ip_user_envv = uap->envp;
2193 imgp->ip_vattr = vap;
2194 imgp->ip_origvattr = origvap;
2195 imgp->ip_vfs_context = &context;
2196 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE);
2197 imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
2198 imgp->ip_mac_return = 0;
2199 imgp->ip_px_persona = NULL;
2200
2201 if (uap->adesc != USER_ADDR_NULL) {
2202 if(is_64) {
2203 error = copyin(uap->adesc, &px_args, sizeof(px_args));
2204 } else {
2205 struct user32__posix_spawn_args_desc px_args32;
2206
2207 error = copyin(uap->adesc, &px_args32, sizeof(px_args32));
2208
2209 /*
2210 * Convert arguments descriptor from external 32 bit
2211 * representation to internal 64 bit representation
2212 */
2213 px_args.attr_size = px_args32.attr_size;
2214 px_args.attrp = CAST_USER_ADDR_T(px_args32.attrp);
2215 px_args.file_actions_size = px_args32.file_actions_size;
2216 px_args.file_actions = CAST_USER_ADDR_T(px_args32.file_actions);
2217 px_args.port_actions_size = px_args32.port_actions_size;
2218 px_args.port_actions = CAST_USER_ADDR_T(px_args32.port_actions);
2219 px_args.mac_extensions_size = px_args32.mac_extensions_size;
2220 px_args.mac_extensions = CAST_USER_ADDR_T(px_args32.mac_extensions);
2221 px_args.coal_info_size = px_args32.coal_info_size;
2222 px_args.coal_info = CAST_USER_ADDR_T(px_args32.coal_info);
2223 px_args.persona_info_size = px_args32.persona_info_size;
2224 px_args.persona_info = CAST_USER_ADDR_T(px_args32.persona_info);
2225 }
2226 if (error)
2227 goto bad;
2228
2229 if (px_args.attr_size != 0) {
2230 /*
2231 * We are not copying the port_actions pointer,
2232 * because we already have it from px_args.
2233 * This is a bit fragile: <rdar://problem/16427422>
2234 */
2235
2236 if ((error = copyin(px_args.attrp, &px_sa, px_sa_offset) != 0))
2237 goto bad;
2238
2239 bzero( (void *)( (unsigned long) &px_sa + px_sa_offset), sizeof(px_sa) - px_sa_offset );
2240
2241 imgp->ip_px_sa = &px_sa;
2242 }
2243 if (px_args.file_actions_size != 0) {
2244 /* Limit file_actions to allowed number of open files */
2245 int maxfa = (p->p_limit ? p->p_rlimit[RLIMIT_NOFILE].rlim_cur : NOFILE);
2246 if (px_args.file_actions_size < PSF_ACTIONS_SIZE(1) ||
2247 px_args.file_actions_size > PSF_ACTIONS_SIZE(maxfa)) {
2248 error = EINVAL;
2249 goto bad;
2250 }
2251 MALLOC(px_sfap, _posix_spawn_file_actions_t, px_args.file_actions_size, M_TEMP, M_WAITOK);
2252 if (px_sfap == NULL) {
2253 error = ENOMEM;
2254 goto bad;
2255 }
2256 imgp->ip_px_sfa = px_sfap;
2257
2258 if ((error = copyin(px_args.file_actions, px_sfap,
2259 px_args.file_actions_size)) != 0)
2260 goto bad;
2261
2262 /* Verify that the action count matches the struct size */
2263 if (PSF_ACTIONS_SIZE(px_sfap->psfa_act_count) != px_args.file_actions_size) {
2264 error = EINVAL;
2265 goto bad;
2266 }
2267 }
2268 if (px_args.port_actions_size != 0) {
2269 /* Limit port_actions to one page of data */
2270 if (px_args.port_actions_size < PS_PORT_ACTIONS_SIZE(1) ||
2271 px_args.port_actions_size > PAGE_SIZE) {
2272 error = EINVAL;
2273 goto bad;
2274 }
2275
2276 MALLOC(px_spap, _posix_spawn_port_actions_t,
2277 px_args.port_actions_size, M_TEMP, M_WAITOK);
2278 if (px_spap == NULL) {
2279 error = ENOMEM;
2280 goto bad;
2281 }
2282 imgp->ip_px_spa = px_spap;
2283
2284 if ((error = copyin(px_args.port_actions, px_spap,
2285 px_args.port_actions_size)) != 0)
2286 goto bad;
2287
2288 /* Verify that the action count matches the struct size */
2289 if (PS_PORT_ACTIONS_SIZE(px_spap->pspa_count) != px_args.port_actions_size) {
2290 error = EINVAL;
2291 goto bad;
2292 }
2293 }
2294 #if CONFIG_PERSONAS
2295 /* copy in the persona info */
2296 if (px_args.persona_info_size != 0 && px_args.persona_info != 0) {
2297 /* for now, we need the exact same struct in user space */
2298 if (px_args.persona_info_size != sizeof(*px_persona)) {
2299 error = ERANGE;
2300 goto bad;
2301 }
2302
2303 MALLOC(px_persona, struct _posix_spawn_persona_info *, px_args.persona_info_size, M_TEMP, M_WAITOK|M_ZERO);
2304 if (px_persona == NULL) {
2305 error = ENOMEM;
2306 goto bad;
2307 }
2308 imgp->ip_px_persona = px_persona;
2309
2310 if ((error = copyin(px_args.persona_info, px_persona,
2311 px_args.persona_info_size)) != 0)
2312 goto bad;
2313 if ((error = spawn_validate_persona(px_persona)) != 0)
2314 goto bad;
2315 }
2316 #endif
2317 #if CONFIG_MACF
2318 if (px_args.mac_extensions_size != 0) {
2319 if ((error = spawn_copyin_macpolicyinfo(&px_args, (_posix_spawn_mac_policy_extensions_t *)&imgp->ip_px_smpx)) != 0)
2320 goto bad;
2321 }
2322 #endif /* CONFIG_MACF */
2323 }
2324
2325 /* set uthread to parent */
2326 uthread = get_bsdthread_info(current_thread());
2327
2328 /*
2329 * <rdar://6640530>; this does not result in a behaviour change
2330 * relative to Leopard, so there should not be any existing code
2331 * which depends on it.
2332 */
2333 if (uthread->uu_flag & UT_VFORK) {
2334 error = EINVAL;
2335 goto bad;
2336 }
2337
2338 /*
2339 * If we don't have the extension flag that turns "posix_spawn()"
2340 * into "execve() with options", then we will be creating a new
2341 * process which does not inherit memory from the parent process,
2342 * which is one of the most expensive things about using fork()
2343 * and execve().
2344 */
2345 if (imgp->ip_px_sa == NULL || !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)){
2346
2347 /* Set the new task's coalition, if it is requested. */
2348 coalition_t coal[COALITION_NUM_TYPES] = { COALITION_NULL };
2349 #if CONFIG_COALITIONS
2350 int i, ncoals;
2351 kern_return_t kr = KERN_SUCCESS;
2352 struct _posix_spawn_coalition_info coal_info;
2353 int coal_role[COALITION_NUM_TYPES];
2354
2355 if (imgp->ip_px_sa == NULL || !px_args.coal_info)
2356 goto do_fork1;
2357
2358 memset(&coal_info, 0, sizeof(coal_info));
2359
2360 if (px_args.coal_info_size > sizeof(coal_info))
2361 px_args.coal_info_size = sizeof(coal_info);
2362 error = copyin(px_args.coal_info,
2363 &coal_info, px_args.coal_info_size);
2364 if (error != 0)
2365 goto bad;
2366
2367 ncoals = 0;
2368 for (i = 0; i < COALITION_NUM_TYPES; i++) {
2369 uint64_t cid = coal_info.psci_info[i].psci_id;
2370 if (cid != 0) {
2371 /*
2372 * don't allow tasks which are not in a
2373 * privileged coalition to spawn processes
2374 * into coalitions other than their own
2375 */
2376 if (!task_is_in_privileged_coalition(p->task, i)) {
2377 coal_dbg("ERROR: %d not in privilegd "
2378 "coalition of type %d",
2379 p->p_pid, i);
2380 spawn_coalitions_release_all(coal);
2381 error = EPERM;
2382 goto bad;
2383 }
2384
2385 coal_dbg("searching for coalition id:%llu", cid);
2386 /*
2387 * take a reference and activation on the
2388 * coalition to guard against free-while-spawn
2389 * races
2390 */
2391 coal[i] = coalition_find_and_activate_by_id(cid);
2392 if (coal[i] == COALITION_NULL) {
2393 coal_dbg("could not find coalition id:%llu "
2394 "(perhaps it has been terminated or reaped)", cid);
2395 /*
2396 * release any other coalition's we
2397 * may have a reference to
2398 */
2399 spawn_coalitions_release_all(coal);
2400 error = ESRCH;
2401 goto bad;
2402 }
2403 if (coalition_type(coal[i]) != i) {
2404 coal_dbg("coalition with id:%lld is not of type:%d"
2405 " (it's type:%d)", cid, i, coalition_type(coal[i]));
2406 error = ESRCH;
2407 goto bad;
2408 }
2409 coal_role[i] = coal_info.psci_info[i].psci_role;
2410 ncoals++;
2411 }
2412 }
2413 if (ncoals < COALITION_NUM_TYPES) {
2414 /*
2415 * If the user is attempting to spawn into a subset of
2416 * the known coalition types, then make sure they have
2417 * _at_least_ specified a resource coalition. If not,
2418 * the following fork1() call will implicitly force an
2419 * inheritance from 'p' and won't actually spawn the
2420 * new task into the coalitions the user specified.
2421 * (also the call to coalitions_set_roles will panic)
2422 */
2423 if (coal[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
2424 spawn_coalitions_release_all(coal);
2425 error = EINVAL;
2426 goto bad;
2427 }
2428 }
2429 do_fork1:
2430 #endif /* CONFIG_COALITIONS */
2431
2432 /*
2433 * note that this will implicitly inherit the
2434 * caller's persona (if it exists)
2435 */
2436 error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN, coal);
2437
2438 #if CONFIG_COALITIONS
2439 /* set the roles of this task within each given coalition */
2440 if (error == 0) {
2441 kr = coalitions_set_roles(coal, get_threadtask(imgp->ip_new_thread), coal_role);
2442 if (kr != KERN_SUCCESS)
2443 error = EINVAL;
2444 }
2445
2446 /* drop our references and activations - fork1() now holds them */
2447 spawn_coalitions_release_all(coal);
2448 #endif /* CONFIG_COALITIONS */
2449 if (error != 0) {
2450 goto bad;
2451 }
2452 imgp->ip_flags |= IMGPF_SPAWN; /* spawn w/o exec */
2453 spawn_no_exec = TRUE; /* used in later tests */
2454
2455 #if CONFIG_PERSONAS
2456 /*
2457 * If the parent isn't in a persona (launchd), and
2458 * hasn't specified a new persona for the process,
2459 * then we'll put the process into the system persona
2460 *
2461 * TODO: this will have to be re-worked because as of
2462 * now, without any launchd adoption, the resulting
2463 * xpcproxy process will not have sufficient
2464 * privileges to setuid/gid.
2465 */
2466 #if 0
2467 if (!proc_has_persona(p) && imgp->ip_px_persona == NULL) {
2468 MALLOC(px_persona, struct _posix_spawn_persona_info *,
2469 sizeof(*px_persona), M_TEMP, M_WAITOK|M_ZERO);
2470 if (px_persona == NULL) {
2471 error = ENOMEM;
2472 goto bad;
2473 }
2474 px_persona->pspi_id = persona_get_id(g_system_persona);
2475 imgp->ip_px_persona = px_persona;
2476 }
2477 #endif /* 0 */
2478 #endif /* CONFIG_PERSONAS */
2479 }
2480
2481 if (spawn_no_exec) {
2482 p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread);
2483
2484 /*
2485 * We had to wait until this point before firing the
2486 * proc:::create probe, otherwise p would not point to the
2487 * child process.
2488 */
2489 DTRACE_PROC1(create, proc_t, p);
2490 }
2491 assert(p != NULL);
2492
2493 /* By default, the thread everyone plays with is the parent */
2494 context.vc_thread = current_thread();
2495 context.vc_ucred = p->p_ucred; /* XXX must NOT be kauth_cred_get() */
2496
2497 /*
2498 * However, if we're not in the setexec case, redirect the context
2499 * to the newly created process instead
2500 */
2501 if (spawn_no_exec)
2502 context.vc_thread = imgp->ip_new_thread;
2503
2504 /*
2505 * Post fdcopy(), pre exec_handle_sugid() - this is where we want
2506 * to handle the file_actions. Since vfork() also ends up setting
2507 * us into the parent process group, and saved off the signal flags,
2508 * this is also where we want to handle the spawn flags.
2509 */
2510
2511 /* Has spawn file actions? */
2512 if (imgp->ip_px_sfa != NULL) {
2513 /*
2514 * The POSIX_SPAWN_CLOEXEC_DEFAULT flag
2515 * is handled in exec_handle_file_actions().
2516 */
2517 if ((error = exec_handle_file_actions(imgp,
2518 imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0)) != 0)
2519 goto bad;
2520 }
2521
2522 /* Has spawn port actions? */
2523 if (imgp->ip_px_spa != NULL) {
2524 boolean_t is_adaptive = FALSE;
2525 boolean_t portwatch_present = FALSE;
2526
2527 /* Will this process become adaptive? The apptype isn't ready yet, so we can't look there. */
2528 if (imgp->ip_px_sa != NULL && px_sa.psa_apptype == POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE)
2529 is_adaptive = TRUE;
2530
2531 /*
2532 * portwatch only:
2533 * Allocate a place to store the ports we want to bind to the new task
2534 * We can't bind them until after the apptype is set.
2535 */
2536 if (px_spap->pspa_count != 0 && is_adaptive) {
2537 portwatch_count = px_spap->pspa_count;
2538 MALLOC(portwatch_ports, ipc_port_t *, (sizeof(ipc_port_t) * portwatch_count), M_TEMP, M_WAITOK | M_ZERO);
2539 } else {
2540 portwatch_ports = NULL;
2541 }
2542
2543 if ((error = exec_handle_port_actions(imgp,
2544 imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0, &portwatch_present, portwatch_ports)) != 0)
2545 goto bad;
2546
2547 if (portwatch_present == FALSE && portwatch_ports != NULL) {
2548 FREE(portwatch_ports, M_TEMP);
2549 portwatch_ports = NULL;
2550 portwatch_count = 0;
2551 }
2552 }
2553
2554 /* Has spawn attr? */
2555 if (imgp->ip_px_sa != NULL) {
2556 /*
2557 * Set the process group ID of the child process; this has
2558 * to happen before the image activation.
2559 */
2560 if (px_sa.psa_flags & POSIX_SPAWN_SETPGROUP) {
2561 struct setpgid_args spga;
2562 spga.pid = p->p_pid;
2563 spga.pgid = px_sa.psa_pgroup;
2564 /*
2565 * Effectively, call setpgid() system call; works
2566 * because there are no pointer arguments.
2567 */
2568 if((error = setpgid(p, &spga, ival)) != 0)
2569 goto bad;
2570 }
2571
2572 /*
2573 * Reset UID/GID to parent's RUID/RGID; This works only
2574 * because the operation occurs *after* the vfork() and
2575 * before the call to exec_handle_sugid() by the image
2576 * activator called from exec_activate_image(). POSIX
2577 * requires that any setuid/setgid bits on the process
2578 * image will take precedence over the spawn attributes
2579 * (re)setting them.
2580 *
2581 * The use of p_ucred is safe, since we are acting on the
2582 * new process, and it has no threads other than the one
2583 * we are creating for it.
2584 */
2585 if (px_sa.psa_flags & POSIX_SPAWN_RESETIDS) {
2586 kauth_cred_t my_cred = p->p_ucred;
2587 kauth_cred_t my_new_cred = kauth_cred_setuidgid(my_cred, kauth_cred_getruid(my_cred), kauth_cred_getrgid(my_cred));
2588 if (my_new_cred != my_cred) {
2589 p->p_ucred = my_new_cred;
2590 /* update cred on proc */
2591 PROC_UPDATE_CREDS_ONPROC(p);
2592 }
2593 }
2594
2595 #if CONFIG_PERSONAS
2596 if (spawn_no_exec && imgp->ip_px_persona != NULL) {
2597 /*
2598 * If we were asked to spawn a process into a new persona,
2599 * do the credential switch now (which may override the UID/GID
2600 * inherit done just above). It's important to do this switch
2601 * before image activation both for reasons stated above, and
2602 * to ensure that the new persona has access to the image/file
2603 * being executed.
2604 */
2605 error = spawn_persona_adopt(p, imgp->ip_px_persona);
2606 if (error != 0)
2607 goto bad;
2608 }
2609 #endif /* CONFIG_PERSONAS */
2610 #if !SECURE_KERNEL
2611 /*
2612 * Disable ASLR for the spawned process.
2613 *
2614 * But only do so if we are not embedded + RELEASE.
2615 * While embedded allows for a boot-arg (-disable_aslr)
2616 * to deal with this (which itself is only honored on
2617 * DEVELOPMENT or DEBUG builds of xnu), it is often
2618 * useful or necessary to disable ASLR on a per-process
2619 * basis for unit testing and debugging.
2620 */
2621 if (px_sa.psa_flags & _POSIX_SPAWN_DISABLE_ASLR)
2622 OSBitOrAtomic(P_DISABLE_ASLR, &p->p_flag);
2623 #endif /* !SECURE_KERNEL */
2624
2625 /*
2626 * Forcibly disallow execution from data pages for the spawned process
2627 * even if it would otherwise be permitted by the architecture default.
2628 */
2629 if (px_sa.psa_flags & _POSIX_SPAWN_ALLOW_DATA_EXEC)
2630 imgp->ip_flags |= IMGPF_ALLOW_DATA_EXEC;
2631 }
2632
2633 /*
2634 * Disable ASLR during image activation. This occurs either if the
2635 * _POSIX_SPAWN_DISABLE_ASLR attribute was found above or if
2636 * P_DISABLE_ASLR was inherited from the parent process.
2637 */
2638 if (p->p_flag & P_DISABLE_ASLR)
2639 imgp->ip_flags |= IMGPF_DISABLE_ASLR;
2640
2641 /*
2642 * Clear transition flag so we won't hang if exec_activate_image() causes
2643 * an automount (and launchd does a proc sysctl to service it).
2644 *
2645 * <rdar://problem/6848672>, <rdar://problem/5959568>.
2646 */
2647 if (spawn_no_exec) {
2648 proc_transend(p, 0);
2649 proc_transit_set = 0;
2650 }
2651
2652 #if MAC_SPAWN /* XXX */
2653 if (uap->mac_p != USER_ADDR_NULL) {
2654 error = mac_execve_enter(uap->mac_p, imgp);
2655 if (error)
2656 goto bad;
2657 }
2658 #endif
2659
2660 /*
2661 * Activate the image
2662 */
2663 error = exec_activate_image(imgp);
2664
2665 if (error == 0) {
2666 /* process completed the exec */
2667 exec_done = TRUE;
2668 } else if (error == -1) {
2669 /* Image not claimed by any activator? */
2670 error = ENOEXEC;
2671 }
2672
2673 /*
2674 * If we have a spawn attr, and it contains signal related flags,
2675 * the we need to process them in the "context" of the new child
2676 * process, so we have to process it following image activation,
2677 * prior to making the thread runnable in user space. This is
2678 * necessitated by some signal information being per-thread rather
2679 * than per-process, and we don't have the new allocation in hand
2680 * until after the image is activated.
2681 */
2682 if (!error && imgp->ip_px_sa != NULL) {
2683 thread_t child_thread = current_thread();
2684 uthread_t child_uthread = uthread;
2685
2686 /*
2687 * If we created a new child thread, then the thread and
2688 * uthread are different than the current ones; otherwise,
2689 * we leave them, since we are in the exec case instead.
2690 */
2691 if (spawn_no_exec) {
2692 child_thread = imgp->ip_new_thread;
2693 child_uthread = get_bsdthread_info(child_thread);
2694 }
2695
2696 /*
2697 * Mask a list of signals, instead of them being unmasked, if
2698 * they were unmasked in the parent; note that some signals
2699 * are not maskable.
2700 */
2701 if (px_sa.psa_flags & POSIX_SPAWN_SETSIGMASK)
2702 child_uthread->uu_sigmask = (px_sa.psa_sigmask & ~sigcantmask);
2703 /*
2704 * Default a list of signals instead of ignoring them, if
2705 * they were ignored in the parent. Note that we pass
2706 * spawn_no_exec to setsigvec() to indicate that we called
2707 * fork1() and therefore do not need to call proc_signalstart()
2708 * internally.
2709 */
2710 if (px_sa.psa_flags & POSIX_SPAWN_SETSIGDEF) {
2711 vec.sa_handler = SIG_DFL;
2712 vec.sa_tramp = 0;
2713 vec.sa_mask = 0;
2714 vec.sa_flags = 0;
2715 for (sig = 0; sig < NSIG; sig++)
2716 if (px_sa.psa_sigdefault & (1 << sig)) {
2717 error = setsigvec(p, child_thread, sig + 1, &vec, spawn_no_exec);
2718 }
2719 }
2720
2721 /*
2722 * Activate the CPU usage monitor, if requested. This is done via a task-wide, per-thread CPU
2723 * usage limit, which will generate a resource exceeded exception if any one thread exceeds the
2724 * limit.
2725 *
2726 * Userland gives us interval in seconds, and the kernel SPI expects nanoseconds.
2727 */
2728 if (px_sa.psa_cpumonitor_percent != 0) {
2729 /*
2730 * Always treat a CPU monitor activation coming from spawn as entitled. Requiring
2731 * an entitlement to configure the monitor a certain way seems silly, since
2732 * whomever is turning it on could just as easily choose not to do so.
2733 */
2734 error = proc_set_task_ruse_cpu(p->task,
2735 TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC,
2736 px_sa.psa_cpumonitor_percent,
2737 px_sa.psa_cpumonitor_interval * NSEC_PER_SEC,
2738 0, TRUE);
2739 }
2740 }
2741
2742 bad:
2743
2744 if (error == 0) {
2745 /* reset delay idle sleep status if set */
2746 if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP)
2747 OSBitAndAtomic(~((uint32_t)P_DELAYIDLESLEEP), &p->p_flag);
2748 /* upon successful spawn, re/set the proc control state */
2749 if (imgp->ip_px_sa != NULL) {
2750 switch (px_sa.psa_pcontrol) {
2751 case POSIX_SPAWN_PCONTROL_THROTTLE:
2752 p->p_pcaction = P_PCTHROTTLE;
2753 break;
2754 case POSIX_SPAWN_PCONTROL_SUSPEND:
2755 p->p_pcaction = P_PCSUSP;
2756 break;
2757 case POSIX_SPAWN_PCONTROL_KILL:
2758 p->p_pcaction = P_PCKILL;
2759 break;
2760 case POSIX_SPAWN_PCONTROL_NONE:
2761 default:
2762 p->p_pcaction = 0;
2763 break;
2764 };
2765 }
2766 exec_resettextvp(p, imgp);
2767
2768 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
2769 /* Has jetsam attributes? */
2770 if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_SET)) {
2771 /*
2772 * With 2-level high-water-mark support, POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is no
2773 * longer relevant, as background limits are described via the inactive limit slots.
2774 * At the kernel layer, the flag is ignored.
2775 *
2776 * That said, however, if the POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is passed in,
2777 * we attempt to mimic previous behavior by forcing the BG limit data into the
2778 * inactive/non-fatal mode and force the active slots to hold system_wide/fatal mode.
2779 * The kernel layer will flag this mapping.
2780 */
2781 if (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND) {
2782 memorystatus_update(p, px_sa.psa_priority, 0,
2783 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
2784 TRUE,
2785 -1, TRUE,
2786 px_sa.psa_memlimit_inactive, FALSE,
2787 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND));
2788 } else {
2789 memorystatus_update(p, px_sa.psa_priority, 0,
2790 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
2791 TRUE,
2792 px_sa.psa_memlimit_active,
2793 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_ACTIVE_FATAL),
2794 px_sa.psa_memlimit_inactive,
2795 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_INACTIVE_FATAL),
2796 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND));
2797 }
2798
2799 }
2800 #endif /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM*/
2801 }
2802
2803 /*
2804 * If we successfully called fork1(), we always need to do this;
2805 * we identify this case by noting the IMGPF_SPAWN flag. This is
2806 * because we come back from that call with signals blocked in the
2807 * child, and we have to unblock them, but we want to wait until
2808 * after we've performed any spawn actions. This has to happen
2809 * before check_for_signature(), which uses psignal.
2810 */
2811 if (spawn_no_exec) {
2812 if (proc_transit_set)
2813 proc_transend(p, 0);
2814
2815 /*
2816 * Drop the signal lock on the child which was taken on our
2817 * behalf by forkproc()/cloneproc() to prevent signals being
2818 * received by the child in a partially constructed state.
2819 */
2820 proc_signalend(p, 0);
2821
2822 /* flag the 'fork' has occurred */
2823 proc_knote(p->p_pptr, NOTE_FORK | p->p_pid);
2824 /* then flag exec has occurred */
2825 /* notify only if it has not failed due to FP Key error */
2826 if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)
2827 proc_knote(p, NOTE_EXEC);
2828 } else if (error == 0) {
2829 /* reset the importance attribute from our previous life */
2830 task_importance_reset(p->task);
2831 }
2832
2833 if (error == 0) {
2834 /*
2835 * We need to initialize the bank context behind the protection of
2836 * the proc_trans lock to prevent a race with exit. We can't do this during
2837 * exec_activate_image because task_bank_init checks entitlements that
2838 * aren't loaded until subsequent calls (including exec_resettextvp).
2839 */
2840 error = proc_transstart(p, 0, 0);
2841
2842 if (error == 0) {
2843 task_bank_init(p->task);
2844 proc_transend(p, 0);
2845 }
2846 }
2847
2848
2849 /*
2850 * Apply the spawnattr policy, apptype (which primes the task for importance donation),
2851 * and bind any portwatch ports to the new task.
2852 * This must be done after the exec so that the child's thread is ready,
2853 * and after the in transit state has been released, because priority is
2854 * dropped here so we need to be prepared for a potentially long preemption interval
2855 *
2856 * TODO: Consider splitting this up into separate phases
2857 */
2858 if (error == 0 && imgp->ip_px_sa != NULL) {
2859 struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
2860
2861 exec_handle_spawnattr_policy(p, psa->psa_apptype, psa->psa_qos_clamp, psa->psa_darwin_role,
2862 portwatch_ports, portwatch_count);
2863 }
2864
2865 /* Apply the main thread qos */
2866 if (error == 0) {
2867 thread_t main_thread = (imgp->ip_new_thread != NULL) ? imgp->ip_new_thread : current_thread();
2868
2869 task_set_main_thread_qos(p->task, main_thread);
2870 }
2871
2872 /*
2873 * Release any ports we kept around for binding to the new task
2874 * We need to release the rights even if the posix_spawn has failed.
2875 */
2876 if (portwatch_ports != NULL) {
2877 for (int i = 0; i < portwatch_count; i++) {
2878 ipc_port_t port = NULL;
2879 if ((port = portwatch_ports[i]) != NULL) {
2880 ipc_port_release_send(port);
2881 }
2882 }
2883 FREE(portwatch_ports, M_TEMP);
2884 portwatch_ports = NULL;
2885 portwatch_count = 0;
2886 }
2887
2888 /*
2889 * We have to delay operations which might throw a signal until after
2890 * the signals have been unblocked; however, we want that to happen
2891 * after exec_resettextvp() so that the textvp is correct when they
2892 * fire.
2893 */
2894 if (error == 0) {
2895 error = check_for_signature(p, imgp);
2896
2897 /*
2898 * Pay for our earlier safety; deliver the delayed signals from
2899 * the incomplete spawn process now that it's complete.
2900 */
2901 if (imgp != NULL && spawn_no_exec && (p->p_lflag & P_LTRACED)) {
2902 psignal_vfork(p, p->task, imgp->ip_new_thread, SIGTRAP);
2903 }
2904 }
2905
2906
2907 if (imgp != NULL) {
2908 if (imgp->ip_vp)
2909 vnode_put(imgp->ip_vp);
2910 if (imgp->ip_scriptvp)
2911 vnode_put(imgp->ip_scriptvp);
2912 if (imgp->ip_strings)
2913 execargs_free(imgp);
2914 if (imgp->ip_px_sfa != NULL)
2915 FREE(imgp->ip_px_sfa, M_TEMP);
2916 if (imgp->ip_px_spa != NULL)
2917 FREE(imgp->ip_px_spa, M_TEMP);
2918 #if CONFIG_PERSONAS
2919 if (imgp->ip_px_persona != NULL)
2920 FREE(imgp->ip_px_persona, M_TEMP);
2921 #endif
2922 #if CONFIG_MACF
2923 if (imgp->ip_px_smpx != NULL)
2924 spawn_free_macpolicyinfo(imgp->ip_px_smpx);
2925 if (imgp->ip_execlabelp)
2926 mac_cred_label_free(imgp->ip_execlabelp);
2927 if (imgp->ip_scriptlabelp)
2928 mac_vnode_label_free(imgp->ip_scriptlabelp);
2929 #endif
2930 }
2931
2932 #if CONFIG_DTRACE
2933 if (spawn_no_exec) {
2934 /*
2935 * In the original DTrace reference implementation,
2936 * posix_spawn() was a libc routine that just
2937 * did vfork(2) then exec(2). Thus the proc::: probes
2938 * are very fork/exec oriented. The details of this
2939 * in-kernel implementation of posix_spawn() is different
2940 * (while producing the same process-observable effects)
2941 * particularly w.r.t. errors, and which thread/process
2942 * is constructing what on behalf of whom.
2943 */
2944 if (error) {
2945 DTRACE_PROC1(spawn__failure, int, error);
2946 } else {
2947 DTRACE_PROC(spawn__success);
2948 /*
2949 * Some DTrace scripts, e.g. newproc.d in
2950 * /usr/bin, rely on the the 'exec-success'
2951 * probe being fired in the child after the
2952 * new process image has been constructed
2953 * in order to determine the associated pid.
2954 *
2955 * So, even though the parent built the image
2956 * here, for compatibility, mark the new thread
2957 * so 'exec-success' fires on it as it leaves
2958 * the kernel.
2959 */
2960 dtrace_thread_didexec(imgp->ip_new_thread);
2961 }
2962 } else {
2963 if (error) {
2964 DTRACE_PROC1(exec__failure, int, error);
2965 } else {
2966 DTRACE_PROC(exec__success);
2967 }
2968 }
2969
2970 if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL)
2971 (*dtrace_proc_waitfor_hook)(p);
2972 #endif
2973
2974 /* Return to both the parent and the child? */
2975 if (imgp != NULL && spawn_no_exec) {
2976 /*
2977 * If the parent wants the pid, copy it out
2978 */
2979 if (pid != USER_ADDR_NULL)
2980 (void)suword(pid, p->p_pid);
2981 retval[0] = error;
2982
2983 /*
2984 * If we had an error, perform an internal reap ; this is
2985 * entirely safe, as we have a real process backing us.
2986 */
2987 if (error) {
2988 proc_list_lock();
2989 p->p_listflag |= P_LIST_DEADPARENT;
2990 proc_list_unlock();
2991 proc_lock(p);
2992 /* make sure no one else has killed it off... */
2993 if (p->p_stat != SZOMB && p->exit_thread == NULL) {
2994 p->exit_thread = current_thread();
2995 proc_unlock(p);
2996 exit1(p, 1, (int *)NULL);
2997 proc_clear_return_wait(p, imgp->ip_new_thread);
2998 if (exec_done == FALSE) {
2999 task_deallocate(get_threadtask(imgp->ip_new_thread));
3000 thread_deallocate(imgp->ip_new_thread);
3001 }
3002 } else {
3003 /* someone is doing it for us; just skip it */
3004 proc_unlock(p);
3005 proc_clear_return_wait(p, imgp->ip_new_thread);
3006 }
3007 } else {
3008
3009 /*
3010 * Return to the child
3011 *
3012 * Note: the image activator earlier dropped the
3013 * task/thread references to the newly spawned
3014 * process; this is OK, since we still have suspended
3015 * queue references on them, so we should be fine
3016 * with the delayed resume of the thread here.
3017 */
3018 proc_clear_return_wait(p, imgp->ip_new_thread);
3019 }
3020 }
3021 if (bufp != NULL) {
3022 FREE(bufp, M_TEMP);
3023 }
3024
3025 return(error);
3026 }
3027
3028
3029 /*
3030 * execve
3031 *
3032 * Parameters: uap->fname File name to exec
3033 * uap->argp Argument list
3034 * uap->envp Environment list
3035 *
3036 * Returns: 0 Success
3037 * __mac_execve:EINVAL Invalid argument
3038 * __mac_execve:ENOTSUP Invalid argument
3039 * __mac_execve:EACCES Permission denied
3040 * __mac_execve:EINTR Interrupted function
3041 * __mac_execve:ENOMEM Not enough space
3042 * __mac_execve:EFAULT Bad address
3043 * __mac_execve:ENAMETOOLONG Filename too long
3044 * __mac_execve:ENOEXEC Executable file format error
3045 * __mac_execve:ETXTBSY Text file busy [misuse of error code]
3046 * __mac_execve:???
3047 *
3048 * TODO: Dynamic linker header address on stack is copied via suword()
3049 */
3050 /* ARGSUSED */
3051 int
3052 execve(proc_t p, struct execve_args *uap, int32_t *retval)
3053 {
3054 struct __mac_execve_args muap;
3055 int err;
3056
3057 memoryshot(VM_EXECVE, DBG_FUNC_NONE);
3058
3059 muap.fname = uap->fname;
3060 muap.argp = uap->argp;
3061 muap.envp = uap->envp;
3062 muap.mac_p = USER_ADDR_NULL;
3063 err = __mac_execve(p, &muap, retval);
3064
3065 return(err);
3066 }
3067
3068 /*
3069 * __mac_execve
3070 *
3071 * Parameters: uap->fname File name to exec
3072 * uap->argp Argument list
3073 * uap->envp Environment list
3074 * uap->mac_p MAC label supplied by caller
3075 *
3076 * Returns: 0 Success
3077 * EINVAL Invalid argument
3078 * ENOTSUP Not supported
3079 * ENOEXEC Executable file format error
3080 * exec_activate_image:EINVAL Invalid argument
3081 * exec_activate_image:EACCES Permission denied
3082 * exec_activate_image:EINTR Interrupted function
3083 * exec_activate_image:ENOMEM Not enough space
3084 * exec_activate_image:EFAULT Bad address
3085 * exec_activate_image:ENAMETOOLONG Filename too long
3086 * exec_activate_image:ENOEXEC Executable file format error
3087 * exec_activate_image:ETXTBSY Text file busy [misuse of error code]
3088 * exec_activate_image:EBADEXEC The executable is corrupt/unknown
3089 * exec_activate_image:???
3090 * mac_execve_enter:???
3091 *
3092 * TODO: Dynamic linker header address on stack is copied via suword()
3093 */
3094 int
3095 __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval)
3096 {
3097 char *bufp = NULL;
3098 struct image_params *imgp;
3099 struct vnode_attr *vap;
3100 struct vnode_attr *origvap;
3101 int error;
3102 int is_64 = IS_64BIT_PROCESS(p);
3103 struct vfs_context context;
3104 struct uthread *uthread;
3105
3106 context.vc_thread = current_thread();
3107 context.vc_ucred = kauth_cred_proc_ref(p); /* XXX must NOT be kauth_cred_get() */
3108
3109 /* Allocate a big chunk for locals instead of using stack since these
3110 * structures a pretty big.
3111 */
3112 MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
3113 imgp = (struct image_params *) bufp;
3114 if (bufp == NULL) {
3115 error = ENOMEM;
3116 goto exit_with_error;
3117 }
3118 vap = (struct vnode_attr *) (bufp + sizeof(*imgp));
3119 origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap));
3120
3121 /* Initialize the common data in the image_params structure */
3122 imgp->ip_user_fname = uap->fname;
3123 imgp->ip_user_argv = uap->argp;
3124 imgp->ip_user_envv = uap->envp;
3125 imgp->ip_vattr = vap;
3126 imgp->ip_origvattr = origvap;
3127 imgp->ip_vfs_context = &context;
3128 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE) | ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE);
3129 imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
3130 imgp->ip_mac_return = 0;
3131
3132 uthread = get_bsdthread_info(current_thread());
3133 if (uthread->uu_flag & UT_VFORK) {
3134 imgp->ip_flags |= IMGPF_VFORK_EXEC;
3135 }
3136
3137 #if CONFIG_MACF
3138 if (uap->mac_p != USER_ADDR_NULL) {
3139 error = mac_execve_enter(uap->mac_p, imgp);
3140 if (error) {
3141 kauth_cred_unref(&context.vc_ucred);
3142 goto exit_with_error;
3143 }
3144 }
3145 #endif
3146
3147 error = exec_activate_image(imgp);
3148
3149 kauth_cred_unref(&context.vc_ucred);
3150
3151 /* Image not claimed by any activator? */
3152 if (error == -1)
3153 error = ENOEXEC;
3154
3155 if (error == 0) {
3156 exec_resettextvp(p, imgp);
3157 error = check_for_signature(p, imgp);
3158 }
3159 if (imgp->ip_vp != NULLVP)
3160 vnode_put(imgp->ip_vp);
3161 if (imgp->ip_scriptvp != NULLVP)
3162 vnode_put(imgp->ip_scriptvp);
3163 if (imgp->ip_strings)
3164 execargs_free(imgp);
3165 #if CONFIG_MACF
3166 if (imgp->ip_execlabelp)
3167 mac_cred_label_free(imgp->ip_execlabelp);
3168 if (imgp->ip_scriptlabelp)
3169 mac_vnode_label_free(imgp->ip_scriptlabelp);
3170 #endif
3171
3172 if (!error) {
3173 /*
3174 * We need to initialize the bank context behind the protection of
3175 * the proc_trans lock to prevent a race with exit. We can't do this during
3176 * exec_activate_image because task_bank_init checks entitlements that
3177 * aren't loaded until subsequent calls (including exec_resettextvp).
3178 */
3179 error = proc_transstart(p, 0, 0);
3180
3181 if (!error) {
3182 task_bank_init(p->task);
3183 proc_transend(p, 0);
3184 }
3185 }
3186
3187 if (!error) {
3188 /* Sever any extant thread affinity */
3189 thread_affinity_exec(current_thread());
3190
3191 thread_t main_thread = (imgp->ip_new_thread != NULL) ? imgp->ip_new_thread : current_thread();
3192
3193 task_set_main_thread_qos(p->task, main_thread);
3194
3195 /* reset task importance */
3196 task_importance_reset(p->task);
3197
3198 DTRACE_PROC(exec__success);
3199
3200 #if CONFIG_DTRACE
3201 if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL)
3202 (*dtrace_proc_waitfor_hook)(p);
3203 #endif
3204
3205 if (imgp->ip_flags & IMGPF_VFORK_EXEC) {
3206 vfork_return(p, retval, p->p_pid);
3207 proc_clear_return_wait(p, imgp->ip_new_thread);
3208 }
3209 } else {
3210 DTRACE_PROC1(exec__failure, int, error);
3211 }
3212
3213 exit_with_error:
3214 if (bufp != NULL) {
3215 FREE(bufp, M_TEMP);
3216 }
3217
3218 return(error);
3219 }
3220
3221
3222 /*
3223 * copyinptr
3224 *
3225 * Description: Copy a pointer in from user space to a user_addr_t in kernel
3226 * space, based on 32/64 bitness of the user space
3227 *
3228 * Parameters: froma User space address
3229 * toptr Address of kernel space user_addr_t
3230 * ptr_size 4/8, based on 'froma' address space
3231 *
3232 * Returns: 0 Success
3233 * EFAULT Bad 'froma'
3234 *
3235 * Implicit returns:
3236 * *ptr_size Modified
3237 */
3238 static int
3239 copyinptr(user_addr_t froma, user_addr_t *toptr, int ptr_size)
3240 {
3241 int error;
3242
3243 if (ptr_size == 4) {
3244 /* 64 bit value containing 32 bit address */
3245 unsigned int i;
3246
3247 error = copyin(froma, &i, 4);
3248 *toptr = CAST_USER_ADDR_T(i); /* SAFE */
3249 } else {
3250 error = copyin(froma, toptr, 8);
3251 }
3252 return (error);
3253 }
3254
3255
3256 /*
3257 * copyoutptr
3258 *
3259 * Description: Copy a pointer out from a user_addr_t in kernel space to
3260 * user space, based on 32/64 bitness of the user space
3261 *
3262 * Parameters: ua User space address to copy to
3263 * ptr Address of kernel space user_addr_t
3264 * ptr_size 4/8, based on 'ua' address space
3265 *
3266 * Returns: 0 Success
3267 * EFAULT Bad 'ua'
3268 *
3269 */
3270 static int
3271 copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size)
3272 {
3273 int error;
3274
3275 if (ptr_size == 4) {
3276 /* 64 bit value containing 32 bit address */
3277 unsigned int i = CAST_DOWN_EXPLICIT(unsigned int,ua); /* SAFE */
3278
3279 error = copyout(&i, ptr, 4);
3280 } else {
3281 error = copyout(&ua, ptr, 8);
3282 }
3283 return (error);
3284 }
3285
3286
3287 /*
3288 * exec_copyout_strings
3289 *
3290 * Copy out the strings segment to user space. The strings segment is put
3291 * on a preinitialized stack frame.
3292 *
3293 * Parameters: struct image_params * the image parameter block
3294 * int * a pointer to the stack offset variable
3295 *
3296 * Returns: 0 Success
3297 * !0 Faiure: errno
3298 *
3299 * Implicit returns:
3300 * (*stackp) The stack offset, modified
3301 *
3302 * Note: The strings segment layout is backward, from the beginning
3303 * of the top of the stack to consume the minimal amount of
3304 * space possible; the returned stack pointer points to the
3305 * end of the area consumed (stacks grow downward).
3306 *
3307 * argc is an int; arg[i] are pointers; env[i] are pointers;
3308 * the 0's are (void *)NULL's
3309 *
3310 * The stack frame layout is:
3311 *
3312 * +-------------+ <- p->user_stack
3313 * | 16b |
3314 * +-------------+
3315 * | STRING AREA |
3316 * | : |
3317 * | : |
3318 * | : |
3319 * +- -- -- -- --+
3320 * | PATH AREA |
3321 * +-------------+
3322 * | 0 |
3323 * +-------------+
3324 * | applev[n] |
3325 * +-------------+
3326 * :
3327 * :
3328 * +-------------+
3329 * | applev[1] |
3330 * +-------------+
3331 * | exec_path / |
3332 * | applev[0] |
3333 * +-------------+
3334 * | 0 |
3335 * +-------------+
3336 * | env[n] |
3337 * +-------------+
3338 * :
3339 * :
3340 * +-------------+
3341 * | env[0] |
3342 * +-------------+
3343 * | 0 |
3344 * +-------------+
3345 * | arg[argc-1] |
3346 * +-------------+
3347 * :
3348 * :
3349 * +-------------+
3350 * | arg[0] |
3351 * +-------------+
3352 * | argc |
3353 * sp-> +-------------+
3354 *
3355 * Although technically a part of the STRING AREA, we treat the PATH AREA as
3356 * a separate entity. This allows us to align the beginning of the PATH AREA
3357 * to a pointer boundary so that the exec_path, env[i], and argv[i] pointers
3358 * which preceed it on the stack are properly aligned.
3359 */
3360
3361 static int
3362 exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp)
3363 {
3364 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
3365 int ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
3366 int ptr_area_size;
3367 void *ptr_buffer_start, *ptr_buffer;
3368 int string_size;
3369
3370 user_addr_t string_area; /* *argv[], *env[] */
3371 user_addr_t ptr_area; /* argv[], env[], applev[] */
3372 user_addr_t argc_area; /* argc */
3373 user_addr_t stack;
3374 int error;
3375
3376 unsigned i;
3377 struct copyout_desc {
3378 char *start_string;
3379 int count;
3380 #if CONFIG_DTRACE
3381 user_addr_t *dtrace_cookie;
3382 #endif
3383 boolean_t null_term;
3384 } descriptors[] = {
3385 {
3386 .start_string = imgp->ip_startargv,
3387 .count = imgp->ip_argc,
3388 #if CONFIG_DTRACE
3389 .dtrace_cookie = &p->p_dtrace_argv,
3390 #endif
3391 .null_term = TRUE
3392 },
3393 {
3394 .start_string = imgp->ip_endargv,
3395 .count = imgp->ip_envc,
3396 #if CONFIG_DTRACE
3397 .dtrace_cookie = &p->p_dtrace_envp,
3398 #endif
3399 .null_term = TRUE
3400 },
3401 {
3402 .start_string = imgp->ip_strings,
3403 .count = 1,
3404 #if CONFIG_DTRACE
3405 .dtrace_cookie = NULL,
3406 #endif
3407 .null_term = FALSE
3408 },
3409 {
3410 .start_string = imgp->ip_endenvv,
3411 .count = imgp->ip_applec - 1, /* exec_path handled above */
3412 #if CONFIG_DTRACE
3413 .dtrace_cookie = NULL,
3414 #endif
3415 .null_term = TRUE
3416 }
3417 };
3418
3419 stack = *stackp;
3420
3421 /*
3422 * All previous contributors to the string area
3423 * should have aligned their sub-area
3424 */
3425 if (imgp->ip_strspace % ptr_size != 0) {
3426 error = EINVAL;
3427 goto bad;
3428 }
3429
3430 /* Grow the stack down for the strings we've been building up */
3431 string_size = imgp->ip_strendp - imgp->ip_strings;
3432 stack -= string_size;
3433 string_area = stack;
3434
3435 /*
3436 * Need room for one pointer for each string, plus
3437 * one for the NULLs terminating the argv, envv, and apple areas.
3438 */
3439 ptr_area_size = (imgp->ip_argc + imgp->ip_envc + imgp->ip_applec + 3) *
3440 ptr_size;
3441 stack -= ptr_area_size;
3442 ptr_area = stack;
3443
3444 /* We'll construct all the pointer arrays in our string buffer,
3445 * which we already know is aligned properly, and ip_argspace
3446 * was used to verify we have enough space.
3447 */
3448 ptr_buffer_start = ptr_buffer = (void *)imgp->ip_strendp;
3449
3450 /*
3451 * Need room for pointer-aligned argc slot.
3452 */
3453 stack -= ptr_size;
3454 argc_area = stack;
3455
3456 /*
3457 * Record the size of the arguments area so that sysctl_procargs()
3458 * can return the argument area without having to parse the arguments.
3459 */
3460 proc_lock(p);
3461 p->p_argc = imgp->ip_argc;
3462 p->p_argslen = (int)(*stackp - string_area);
3463 proc_unlock(p);
3464
3465 /* Return the initial stack address: the location of argc */
3466 *stackp = stack;
3467
3468 /*
3469 * Copy out the entire strings area.
3470 */
3471 error = copyout(imgp->ip_strings, string_area,
3472 string_size);
3473 if (error)
3474 goto bad;
3475
3476 for (i = 0; i < sizeof(descriptors)/sizeof(descriptors[0]); i++) {
3477 char *cur_string = descriptors[i].start_string;
3478 int j;
3479
3480 #if CONFIG_DTRACE
3481 if (descriptors[i].dtrace_cookie) {
3482 proc_lock(p);
3483 *descriptors[i].dtrace_cookie = ptr_area + ((uintptr_t)ptr_buffer - (uintptr_t)ptr_buffer_start); /* dtrace convenience */
3484 proc_unlock(p);
3485 }
3486 #endif /* CONFIG_DTRACE */
3487
3488 /*
3489 * For each segment (argv, envv, applev), copy as many pointers as requested
3490 * to our pointer buffer.
3491 */
3492 for (j = 0; j < descriptors[i].count; j++) {
3493 user_addr_t cur_address = string_area + (cur_string - imgp->ip_strings);
3494
3495 /* Copy out the pointer to the current string. Alignment has been verified */
3496 if (ptr_size == 8) {
3497 *(uint64_t *)ptr_buffer = (uint64_t)cur_address;
3498 } else {
3499 *(uint32_t *)ptr_buffer = (uint32_t)cur_address;
3500 }
3501
3502 ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
3503 cur_string += strlen(cur_string) + 1; /* Only a NUL between strings in the same area */
3504 }
3505
3506 if (descriptors[i].null_term) {
3507 if (ptr_size == 8) {
3508 *(uint64_t *)ptr_buffer = 0ULL;
3509 } else {
3510 *(uint32_t *)ptr_buffer = 0;
3511 }
3512
3513 ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
3514 }
3515 }
3516
3517 /*
3518 * Copy out all our pointer arrays in bulk.
3519 */
3520 error = copyout(ptr_buffer_start, ptr_area,
3521 ptr_area_size);
3522 if (error)
3523 goto bad;
3524
3525 /* argc (int32, stored in a ptr_size area) */
3526 error = copyoutptr((user_addr_t)imgp->ip_argc, argc_area, ptr_size);
3527 if (error)
3528 goto bad;
3529
3530 bad:
3531 return(error);
3532 }
3533
3534
3535 /*
3536 * exec_extract_strings
3537 *
3538 * Copy arguments and environment from user space into work area; we may
3539 * have already copied some early arguments into the work area, and if
3540 * so, any arguments opied in are appended to those already there.
3541 * This function is the primary manipulator of ip_argspace, since
3542 * these are the arguments the client of execve(2) knows about. After
3543 * each argv[]/envv[] string is copied, we charge the string length
3544 * and argv[]/envv[] pointer slot to ip_argspace, so that we can
3545 * full preflight the arg list size.
3546 *
3547 * Parameters: struct image_params * the image parameter block
3548 *
3549 * Returns: 0 Success
3550 * !0 Failure: errno
3551 *
3552 * Implicit returns;
3553 * (imgp->ip_argc) Count of arguments, updated
3554 * (imgp->ip_envc) Count of environment strings, updated
3555 * (imgp->ip_argspace) Count of remaining of NCARGS
3556 * (imgp->ip_interp_buffer) Interpreter and args (mutated in place)
3557 *
3558 *
3559 * Note: The argument and environment vectors are user space pointers
3560 * to arrays of user space pointers.
3561 */
3562 static int
3563 exec_extract_strings(struct image_params *imgp)
3564 {
3565 int error = 0;
3566 int ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT) ? 8 : 4;
3567 int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
3568 user_addr_t argv = imgp->ip_user_argv;
3569 user_addr_t envv = imgp->ip_user_envv;
3570
3571 /*
3572 * Adjust space reserved for the path name by however much padding it
3573 * needs. Doing this here since we didn't know if this would be a 32-
3574 * or 64-bit process back in exec_save_path.
3575 */
3576 while (imgp->ip_strspace % new_ptr_size != 0) {
3577 *imgp->ip_strendp++ = '\0';
3578 imgp->ip_strspace--;
3579 /* imgp->ip_argspace--; not counted towards exec args total */
3580 }
3581
3582 /*
3583 * From now on, we start attributing string space to ip_argspace
3584 */
3585 imgp->ip_startargv = imgp->ip_strendp;
3586 imgp->ip_argc = 0;
3587
3588 if((imgp->ip_flags & IMGPF_INTERPRET) != 0) {
3589 user_addr_t arg;
3590 char *argstart, *ch;
3591
3592 /* First, the arguments in the "#!" string are tokenized and extracted. */
3593 argstart = imgp->ip_interp_buffer;
3594 while (argstart) {
3595 ch = argstart;
3596 while (*ch && !IS_WHITESPACE(*ch)) {
3597 ch++;
3598 }
3599
3600 if (*ch == '\0') {
3601 /* last argument, no need to NUL-terminate */
3602 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
3603 argstart = NULL;
3604 } else {
3605 /* NUL-terminate */
3606 *ch = '\0';
3607 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
3608
3609 /*
3610 * Find the next string. We know spaces at the end of the string have already
3611 * been stripped.
3612 */
3613 argstart = ch + 1;
3614 while (IS_WHITESPACE(*argstart)) {
3615 argstart++;
3616 }
3617 }
3618
3619 /* Error-check, regardless of whether this is the last interpreter arg or not */
3620 if (error)
3621 goto bad;
3622 if (imgp->ip_argspace < new_ptr_size) {
3623 error = E2BIG;
3624 goto bad;
3625 }
3626 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
3627 imgp->ip_argc++;
3628 }
3629
3630 if (argv != 0LL) {
3631 /*
3632 * If we are running an interpreter, replace the av[0] that was
3633 * passed to execve() with the path name that was
3634 * passed to execve() for interpreters which do not use the PATH
3635 * to locate their script arguments.
3636 */
3637 error = copyinptr(argv, &arg, ptr_size);
3638 if (error)
3639 goto bad;
3640 if (arg != 0LL) {
3641 argv += ptr_size; /* consume without using */
3642 }
3643 }
3644
3645 if (imgp->ip_interp_sugid_fd != -1) {
3646 char temp[19]; /* "/dev/fd/" + 10 digits + NUL */
3647 snprintf(temp, sizeof(temp), "/dev/fd/%d", imgp->ip_interp_sugid_fd);
3648 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(temp), UIO_SYSSPACE, TRUE);
3649 } else {
3650 error = exec_add_user_string(imgp, imgp->ip_user_fname, imgp->ip_seg, TRUE);
3651 }
3652
3653 if (error)
3654 goto bad;
3655 if (imgp->ip_argspace < new_ptr_size) {
3656 error = E2BIG;
3657 goto bad;
3658 }
3659 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
3660 imgp->ip_argc++;
3661 }
3662
3663 while (argv != 0LL) {
3664 user_addr_t arg;
3665
3666 error = copyinptr(argv, &arg, ptr_size);
3667 if (error)
3668 goto bad;
3669
3670 if (arg == 0LL) {
3671 break;
3672 }
3673
3674 argv += ptr_size;
3675
3676 /*
3677 * av[n...] = arg[n]
3678 */
3679 error = exec_add_user_string(imgp, arg, imgp->ip_seg, TRUE);
3680 if (error)
3681 goto bad;
3682 if (imgp->ip_argspace < new_ptr_size) {
3683 error = E2BIG;
3684 goto bad;
3685 }
3686 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
3687 imgp->ip_argc++;
3688 }
3689
3690 /* Save space for argv[] NULL terminator */
3691 if (imgp->ip_argspace < new_ptr_size) {
3692 error = E2BIG;
3693 goto bad;
3694 }
3695 imgp->ip_argspace -= new_ptr_size;
3696
3697 /* Note where the args ends and env begins. */
3698 imgp->ip_endargv = imgp->ip_strendp;
3699 imgp->ip_envc = 0;
3700
3701 /* Now, get the environment */
3702 while (envv != 0LL) {
3703 user_addr_t env;
3704
3705 error = copyinptr(envv, &env, ptr_size);
3706 if (error)
3707 goto bad;
3708
3709 envv += ptr_size;
3710 if (env == 0LL) {
3711 break;
3712 }
3713 /*
3714 * av[n...] = env[n]
3715 */
3716 error = exec_add_user_string(imgp, env, imgp->ip_seg, TRUE);
3717 if (error)
3718 goto bad;
3719 if (imgp->ip_argspace < new_ptr_size) {
3720 error = E2BIG;
3721 goto bad;
3722 }
3723 imgp->ip_argspace -= new_ptr_size; /* to hold envv[] entry */
3724 imgp->ip_envc++;
3725 }
3726
3727 /* Save space for envv[] NULL terminator */
3728 if (imgp->ip_argspace < new_ptr_size) {
3729 error = E2BIG;
3730 goto bad;
3731 }
3732 imgp->ip_argspace -= new_ptr_size;
3733
3734 /* Align the tail of the combined argv+envv area */
3735 while (imgp->ip_strspace % new_ptr_size != 0) {
3736 if (imgp->ip_argspace < 1) {
3737 error = E2BIG;
3738 goto bad;
3739 }
3740 *imgp->ip_strendp++ = '\0';
3741 imgp->ip_strspace--;
3742 imgp->ip_argspace--;
3743 }
3744
3745 /* Note where the envv ends and applev begins. */
3746 imgp->ip_endenvv = imgp->ip_strendp;
3747
3748 /*
3749 * From now on, we are no longer charging argument
3750 * space to ip_argspace.
3751 */
3752
3753 bad:
3754 return error;
3755 }
3756
3757 static char *
3758 random_hex_str(char *str, int len, boolean_t embedNUL)
3759 {
3760 uint64_t low, high, value;
3761 int idx;
3762 char digit;
3763
3764 /* A 64-bit value will only take 16 characters, plus '0x' and NULL. */
3765 if (len > 19)
3766 len = 19;
3767
3768 /* We need enough room for at least 1 digit */
3769 if (len < 4)
3770 return (NULL);
3771
3772 low = random();
3773 high = random();
3774 value = high << 32 | low;
3775
3776 if (embedNUL) {
3777 /*
3778 * Zero a byte to protect against C string vulnerabilities
3779 * e.g. for userland __stack_chk_guard.
3780 */
3781 value &= ~(0xffull << 8);
3782 }
3783
3784 str[0] = '0';
3785 str[1] = 'x';
3786 for (idx = 2; idx < len - 1; idx++) {
3787 digit = value & 0xf;
3788 value = value >> 4;
3789 if (digit < 10)
3790 str[idx] = '0' + digit;
3791 else
3792 str[idx] = 'a' + (digit - 10);
3793 }
3794 str[idx] = '\0';
3795 return (str);
3796 }
3797
3798 /*
3799 * Libc has an 8-element array set up for stack guard values. It only fills
3800 * in one of those entries, and both gcc and llvm seem to use only a single
3801 * 8-byte guard. Until somebody needs more than an 8-byte guard value, don't
3802 * do the work to construct them.
3803 */
3804 #define GUARD_VALUES 1
3805 #define GUARD_KEY "stack_guard="
3806
3807 /*
3808 * System malloc needs some entropy when it is initialized.
3809 */
3810 #define ENTROPY_VALUES 2
3811 #define ENTROPY_KEY "malloc_entropy="
3812
3813 /*
3814 * System malloc engages nanozone for UIAPP.
3815 */
3816 #define NANO_ENGAGE_KEY "MallocNanoZone=1"
3817
3818 #define PFZ_KEY "pfz="
3819 extern user32_addr_t commpage_text32_location;
3820 extern user64_addr_t commpage_text64_location;
3821 /*
3822 * Build up the contents of the apple[] string vector
3823 */
3824 static int
3825 exec_add_apple_strings(struct image_params *imgp)
3826 {
3827 int i, error;
3828 int new_ptr_size=4;
3829 char guard[19];
3830 char guard_vec[strlen(GUARD_KEY) + 19 * GUARD_VALUES + 1];
3831
3832 char entropy[19];
3833 char entropy_vec[strlen(ENTROPY_KEY) + 19 * ENTROPY_VALUES + 1];
3834
3835 char pfz_string[strlen(PFZ_KEY) + 16 + 4 +1];
3836
3837 if( imgp->ip_flags & IMGPF_IS_64BIT) {
3838 new_ptr_size = 8;
3839 snprintf(pfz_string, sizeof(pfz_string),PFZ_KEY "0x%llx",commpage_text64_location);
3840 } else {
3841 snprintf(pfz_string, sizeof(pfz_string),PFZ_KEY "0x%x",commpage_text32_location);
3842 }
3843
3844 /* exec_save_path stored the first string */
3845 imgp->ip_applec = 1;
3846
3847 /* adding the pfz string */
3848 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(pfz_string),UIO_SYSSPACE,FALSE);
3849 if(error)
3850 goto bad;
3851 imgp->ip_applec++;
3852
3853 /* adding the NANO_ENGAGE_KEY key */
3854 if (imgp->ip_px_sa) {
3855 int proc_flags = (((struct _posix_spawnattr *) imgp->ip_px_sa)->psa_flags);
3856
3857 if ((proc_flags & _POSIX_SPAWN_NANO_ALLOCATOR) == _POSIX_SPAWN_NANO_ALLOCATOR) {
3858 char uiapp_string[strlen(NANO_ENGAGE_KEY) + 1];
3859
3860 snprintf(uiapp_string, sizeof(uiapp_string), NANO_ENGAGE_KEY);
3861 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(uiapp_string),UIO_SYSSPACE,FALSE);
3862 if (error)
3863 goto bad;
3864 imgp->ip_applec++;
3865 }
3866 }
3867
3868 /*
3869 * Supply libc with a collection of random values to use when
3870 * implementing -fstack-protector.
3871 *
3872 * (The first random string always contains an embedded NUL so that
3873 * __stack_chk_guard also protects against C string vulnerabilities)
3874 */
3875 (void)strlcpy(guard_vec, GUARD_KEY, sizeof (guard_vec));
3876 for (i = 0; i < GUARD_VALUES; i++) {
3877 random_hex_str(guard, sizeof (guard), i == 0);
3878 if (i)
3879 (void)strlcat(guard_vec, ",", sizeof (guard_vec));
3880 (void)strlcat(guard_vec, guard, sizeof (guard_vec));
3881 }
3882
3883 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(guard_vec), UIO_SYSSPACE, FALSE);
3884 if (error)
3885 goto bad;
3886 imgp->ip_applec++;
3887
3888 /*
3889 * Supply libc with entropy for system malloc.
3890 */
3891 (void)strlcpy(entropy_vec, ENTROPY_KEY, sizeof(entropy_vec));
3892 for (i = 0; i < ENTROPY_VALUES; i++) {
3893 random_hex_str(entropy, sizeof (entropy), FALSE);
3894 if (i)
3895 (void)strlcat(entropy_vec, ",", sizeof (entropy_vec));
3896 (void)strlcat(entropy_vec, entropy, sizeof (entropy_vec));
3897 }
3898
3899 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(entropy_vec), UIO_SYSSPACE, FALSE);
3900 if (error)
3901 goto bad;
3902 imgp->ip_applec++;
3903
3904 /* Align the tail of the combined applev area */
3905 while (imgp->ip_strspace % new_ptr_size != 0) {
3906 *imgp->ip_strendp++ = '\0';
3907 imgp->ip_strspace--;
3908 }
3909
3910 bad:
3911 return error;
3912 }
3913
3914 #define unix_stack_size(p) (p->p_rlimit[RLIMIT_STACK].rlim_cur)
3915
3916 /*
3917 * exec_check_permissions
3918 *
3919 * Description: Verify that the file that is being attempted to be executed
3920 * is in fact allowed to be executed based on it POSIX file
3921 * permissions and other access control criteria
3922 *
3923 * Parameters: struct image_params * the image parameter block
3924 *
3925 * Returns: 0 Success
3926 * EACCES Permission denied
3927 * ENOEXEC Executable file format error
3928 * ETXTBSY Text file busy [misuse of error code]
3929 * vnode_getattr:???
3930 * vnode_authorize:???
3931 */
3932 static int
3933 exec_check_permissions(struct image_params *imgp)
3934 {
3935 struct vnode *vp = imgp->ip_vp;
3936 struct vnode_attr *vap = imgp->ip_vattr;
3937 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
3938 int error;
3939 kauth_action_t action;
3940
3941 /* Only allow execution of regular files */
3942 if (!vnode_isreg(vp))
3943 return (EACCES);
3944
3945 /* Get the file attributes that we will be using here and elsewhere */
3946 VATTR_INIT(vap);
3947 VATTR_WANTED(vap, va_uid);
3948 VATTR_WANTED(vap, va_gid);
3949 VATTR_WANTED(vap, va_mode);
3950 VATTR_WANTED(vap, va_fsid);
3951 VATTR_WANTED(vap, va_fileid);
3952 VATTR_WANTED(vap, va_data_size);
3953 if ((error = vnode_getattr(vp, vap, imgp->ip_vfs_context)) != 0)
3954 return (error);
3955
3956 /*
3957 * Ensure that at least one execute bit is on - otherwise root
3958 * will always succeed, and we don't want to happen unless the
3959 * file really is executable.
3960 */
3961 if (!vfs_authopaque(vnode_mount(vp)) && ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0))
3962 return (EACCES);
3963
3964 /* Disallow zero length files */
3965 if (vap->va_data_size == 0)
3966 return (ENOEXEC);
3967
3968 imgp->ip_arch_offset = (user_size_t)0;
3969 imgp->ip_arch_size = vap->va_data_size;
3970
3971 /* Disable setuid-ness for traced programs or if MNT_NOSUID */
3972 if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_lflag & P_LTRACED))
3973 vap->va_mode &= ~(VSUID | VSGID);
3974
3975 /*
3976 * Disable _POSIX_SPAWN_ALLOW_DATA_EXEC and _POSIX_SPAWN_DISABLE_ASLR
3977 * flags for setuid/setgid binaries.
3978 */
3979 if (vap->va_mode & (VSUID | VSGID))
3980 imgp->ip_flags &= ~(IMGPF_ALLOW_DATA_EXEC | IMGPF_DISABLE_ASLR);
3981
3982 #if CONFIG_MACF
3983 error = mac_vnode_check_exec(imgp->ip_vfs_context, vp, imgp);
3984 if (error)
3985 return (error);
3986 #endif
3987
3988 /* Check for execute permission */
3989 action = KAUTH_VNODE_EXECUTE;
3990 /* Traced images must also be readable */
3991 if (p->p_lflag & P_LTRACED)
3992 action |= KAUTH_VNODE_READ_DATA;
3993 if ((error = vnode_authorize(vp, NULL, action, imgp->ip_vfs_context)) != 0)
3994 return (error);
3995
3996 #if 0
3997 /* Don't let it run if anyone had it open for writing */
3998 vnode_lock(vp);
3999 if (vp->v_writecount) {
4000 panic("going to return ETXTBSY %x", vp);
4001 vnode_unlock(vp);
4002 return (ETXTBSY);
4003 }
4004 vnode_unlock(vp);
4005 #endif
4006
4007
4008 /* XXX May want to indicate to underlying FS that vnode is open */
4009
4010 return (error);
4011 }
4012
4013
4014 /*
4015 * exec_handle_sugid
4016 *
4017 * Initially clear the P_SUGID in the process flags; if an SUGID process is
4018 * exec'ing a non-SUGID image, then this is the point of no return.
4019 *
4020 * If the image being activated is SUGID, then replace the credential with a
4021 * copy, disable tracing (unless the tracing process is root), reset the
4022 * mach task port to revoke it, set the P_SUGID bit,
4023 *
4024 * If the saved user and group ID will be changing, then make sure it happens
4025 * to a new credential, rather than a shared one.
4026 *
4027 * Set the security token (this is probably obsolete, given that the token
4028 * should not technically be separate from the credential itself).
4029 *
4030 * Parameters: struct image_params * the image parameter block
4031 *
4032 * Returns: void No failure indication
4033 *
4034 * Implicit returns:
4035 * <process credential> Potentially modified/replaced
4036 * <task port> Potentially revoked
4037 * <process flags> P_SUGID bit potentially modified
4038 * <security token> Potentially modified
4039 */
4040 static int
4041 exec_handle_sugid(struct image_params *imgp)
4042 {
4043 kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context);
4044 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
4045 int i;
4046 int leave_sugid_clear = 0;
4047 int mac_reset_ipc = 0;
4048 int error = 0;
4049 #if CONFIG_MACF
4050 int mac_transition, disjoint_cred = 0;
4051 int label_update_return = 0;
4052
4053 /*
4054 * Determine whether a call to update the MAC label will result in the
4055 * credential changing.
4056 *
4057 * Note: MAC policies which do not actually end up modifying
4058 * the label subsequently are strongly encouraged to
4059 * return 0 for this check, since a non-zero answer will
4060 * slow down the exec fast path for normal binaries.
4061 */
4062 mac_transition = mac_cred_check_label_update_execve(
4063 imgp->ip_vfs_context,
4064 imgp->ip_vp,
4065 imgp->ip_arch_offset,
4066 imgp->ip_scriptvp,
4067 imgp->ip_scriptlabelp,
4068 imgp->ip_execlabelp,
4069 p,
4070 imgp->ip_px_smpx);
4071 #endif
4072
4073 OSBitAndAtomic(~((uint32_t)P_SUGID), &p->p_flag);
4074
4075 /*
4076 * Order of the following is important; group checks must go last,
4077 * as we use the success of the 'ismember' check combined with the
4078 * failure of the explicit match to indicate that we will be setting
4079 * the egid of the process even though the new process did not
4080 * require VSUID/VSGID bits in order for it to set the new group as
4081 * its egid.
4082 *
4083 * Note: Technically, by this we are implying a call to
4084 * setegid() in the new process, rather than implying
4085 * it used its VSGID bit to set the effective group,
4086 * even though there is no code in that process to make
4087 * such a call.
4088 */
4089 if (((imgp->ip_origvattr->va_mode & VSUID) != 0 &&
4090 kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) ||
4091 ((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
4092 ((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &leave_sugid_clear) || !leave_sugid_clear) ||
4093 (kauth_cred_getgid(cred) != imgp->ip_origvattr->va_gid)))) {
4094
4095 #if CONFIG_MACF
4096 /* label for MAC transition and neither VSUID nor VSGID */
4097 handle_mac_transition:
4098 #endif
4099
4100 /*
4101 * Replace the credential with a copy of itself if euid or
4102 * egid change.
4103 *
4104 * Note: setuid binaries will automatically opt out of
4105 * group resolver participation as a side effect
4106 * of this operation. This is an intentional
4107 * part of the security model, which requires a
4108 * participating credential be established by
4109 * escalating privilege, setting up all other
4110 * aspects of the credential including whether
4111 * or not to participate in external group
4112 * membership resolution, then dropping their
4113 * effective privilege to that of the desired
4114 * final credential state.
4115 */
4116 if (imgp->ip_origvattr->va_mode & VSUID) {
4117 p->p_ucred = kauth_cred_setresuid(p->p_ucred, KAUTH_UID_NONE, imgp->ip_origvattr->va_uid, imgp->ip_origvattr->va_uid, KAUTH_UID_NONE);
4118 /* update cred on proc */
4119 PROC_UPDATE_CREDS_ONPROC(p);
4120 }
4121 if (imgp->ip_origvattr->va_mode & VSGID) {
4122 p->p_ucred = kauth_cred_setresgid(p->p_ucred, KAUTH_GID_NONE, imgp->ip_origvattr->va_gid, imgp->ip_origvattr->va_gid);
4123 /* update cred on proc */
4124 PROC_UPDATE_CREDS_ONPROC(p);
4125 }
4126
4127 #if CONFIG_MACF
4128 /*
4129 * If a policy has indicated that it will transition the label,
4130 * before making the call into the MAC policies, get a new
4131 * duplicate credential, so they can modify it without
4132 * modifying any others sharing it.
4133 */
4134 if (mac_transition) {
4135 /*
4136 * This hook may generate upcalls that require
4137 * importance donation from the kernel.
4138 * (23925818)
4139 */
4140 thread_t thread = current_thread();
4141 thread_enable_send_importance(thread, TRUE);
4142 kauth_proc_label_update_execve(p,
4143 imgp->ip_vfs_context,
4144 imgp->ip_vp,
4145 imgp->ip_arch_offset,
4146 imgp->ip_scriptvp,
4147 imgp->ip_scriptlabelp,
4148 imgp->ip_execlabelp,
4149 &imgp->ip_csflags,
4150 imgp->ip_px_smpx,
4151 &disjoint_cred, /* will be non zero if disjoint */
4152 &label_update_return);
4153 thread_enable_send_importance(thread, FALSE);
4154
4155 if (disjoint_cred) {
4156 /*
4157 * If updating the MAC label resulted in a
4158 * disjoint credential, flag that we need to
4159 * set the P_SUGID bit. This protects
4160 * against debuggers being attached by an
4161 * insufficiently privileged process onto the
4162 * result of a transition to a more privileged
4163 * credential.
4164 */
4165 leave_sugid_clear = 0;
4166 }
4167
4168 imgp->ip_mac_return = label_update_return;
4169 }
4170
4171 mac_reset_ipc = mac_proc_check_inherit_ipc_ports(p, p->p_textvp, p->p_textoff, imgp->ip_vp, imgp->ip_arch_offset, imgp->ip_scriptvp);
4172
4173 #endif /* CONFIG_MACF */
4174
4175 /*
4176 * If 'leave_sugid_clear' is non-zero, then we passed the
4177 * VSUID and MACF checks, and successfully determined that
4178 * the previous cred was a member of the VSGID group, but
4179 * that it was not the default at the time of the execve,
4180 * and that the post-labelling credential was not disjoint.
4181 * So we don't set the P_SUGID or reset mach ports and fds
4182 * on the basis of simply running this code.
4183 */
4184 if (mac_reset_ipc || !leave_sugid_clear) {
4185 /*
4186 * Have mach reset the task and thread ports.
4187 * We don't want anyone who had the ports before
4188 * a setuid exec to be able to access/control the
4189 * task/thread after.
4190 */
4191 ipc_task_reset(p->task);
4192 ipc_thread_reset((imgp->ip_new_thread != NULL) ?
4193 imgp->ip_new_thread : current_thread());
4194 }
4195
4196 if (!leave_sugid_clear) {
4197 /*
4198 * Flag the process as setuid.
4199 */
4200 OSBitOrAtomic(P_SUGID, &p->p_flag);
4201
4202 /*
4203 * Radar 2261856; setuid security hole fix
4204 * XXX For setuid processes, attempt to ensure that
4205 * stdin, stdout, and stderr are already allocated.
4206 * We do not want userland to accidentally allocate
4207 * descriptors in this range which has implied meaning
4208 * to libc.
4209 */
4210 for (i = 0; i < 3; i++) {
4211
4212 if (p->p_fd->fd_ofiles[i] != NULL)
4213 continue;
4214
4215 /*
4216 * Do the kernel equivalent of
4217 *
4218 * if i == 0
4219 * (void) open("/dev/null", O_RDONLY);
4220 * else
4221 * (void) open("/dev/null", O_WRONLY);
4222 */
4223
4224 struct fileproc *fp;
4225 int indx;
4226 int flag;
4227 struct nameidata *ndp = NULL;
4228
4229 if (i == 0)
4230 flag = FREAD;
4231 else
4232 flag = FWRITE;
4233
4234 if ((error = falloc(p,
4235 &fp, &indx, imgp->ip_vfs_context)) != 0)
4236 continue;
4237
4238 MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
4239 if (ndp == NULL) {
4240 error = ENOMEM;
4241 break;
4242 }
4243
4244 NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE,
4245 CAST_USER_ADDR_T("/dev/null"),
4246 imgp->ip_vfs_context);
4247
4248 if ((error = vn_open(ndp, flag, 0)) != 0) {
4249 fp_free(p, indx, fp);
4250 break;
4251 }
4252
4253 struct fileglob *fg = fp->f_fglob;
4254
4255 fg->fg_flag = flag;
4256 fg->fg_ops = &vnops;
4257 fg->fg_data = ndp->ni_vp;
4258
4259 vnode_put(ndp->ni_vp);
4260
4261 proc_fdlock(p);
4262 procfdtbl_releasefd(p, indx, NULL);
4263 fp_drop(p, indx, fp, 1);
4264 proc_fdunlock(p);
4265
4266 FREE(ndp, M_TEMP);
4267 }
4268 }
4269 }
4270 #if CONFIG_MACF
4271 else {
4272 /*
4273 * We are here because we were told that the MAC label will
4274 * be transitioned, and the binary is not VSUID or VSGID; to
4275 * deal with this case, we could either duplicate a lot of
4276 * code, or we can indicate we want to default the P_SUGID
4277 * bit clear and jump back up.
4278 */
4279 if (mac_transition) {
4280 leave_sugid_clear = 1;
4281 goto handle_mac_transition;
4282 }
4283 }
4284
4285 #endif /* CONFIG_MACF */
4286
4287 /*
4288 * Implement the semantic where the effective user and group become
4289 * the saved user and group in exec'ed programs.
4290 */
4291 p->p_ucred = kauth_cred_setsvuidgid(p->p_ucred, kauth_cred_getuid(p->p_ucred), kauth_cred_getgid(p->p_ucred));
4292 /* update cred on proc */
4293 PROC_UPDATE_CREDS_ONPROC(p);
4294
4295 /* Update the process' identity version and set the security token */
4296 p->p_idversion++;
4297 set_security_token(p);
4298
4299 return(error);
4300 }
4301
4302
4303 /*
4304 * create_unix_stack
4305 *
4306 * Description: Set the user stack address for the process to the provided
4307 * address. If a custom stack was not set as a result of the
4308 * load process (i.e. as specified by the image file for the
4309 * executable), then allocate the stack in the provided map and
4310 * set up appropriate guard pages for enforcing administrative
4311 * limits on stack growth, if they end up being needed.
4312 *
4313 * Parameters: p Process to set stack on
4314 * load_result Information from mach-o load commands
4315 * map Address map in which to allocate the new stack
4316 *
4317 * Returns: KERN_SUCCESS Stack successfully created
4318 * !KERN_SUCCESS Mach failure code
4319 */
4320 static kern_return_t
4321 create_unix_stack(vm_map_t map, load_result_t* load_result,
4322 proc_t p)
4323 {
4324 mach_vm_size_t size, prot_size;
4325 mach_vm_offset_t addr, prot_addr;
4326 kern_return_t kr;
4327
4328 mach_vm_address_t user_stack = load_result->user_stack;
4329
4330 proc_lock(p);
4331 p->user_stack = user_stack;
4332 proc_unlock(p);
4333
4334 if (!load_result->prog_allocated_stack) {
4335 /*
4336 * Allocate enough space for the maximum stack size we
4337 * will ever authorize and an extra page to act as
4338 * a guard page for stack overflows. For default stacks,
4339 * vm_initial_limit_stack takes care of the extra guard page.
4340 * Otherwise we must allocate it ourselves.
4341 */
4342
4343 size = mach_vm_round_page(load_result->user_stack_size);
4344 if (load_result->prog_stack_size)
4345 size += PAGE_SIZE;
4346 addr = mach_vm_trunc_page(load_result->user_stack - size);
4347 kr = mach_vm_allocate(map, &addr, size,
4348 VM_MAKE_TAG(VM_MEMORY_STACK) |
4349 VM_FLAGS_FIXED);
4350 if (kr != KERN_SUCCESS) {
4351 /* If can't allocate at default location, try anywhere */
4352 addr = 0;
4353 kr = mach_vm_allocate(map, &addr, size,
4354 VM_MAKE_TAG(VM_MEMORY_STACK) |
4355 VM_FLAGS_ANYWHERE);
4356 if (kr != KERN_SUCCESS)
4357 return kr;
4358
4359 user_stack = addr + size;
4360 load_result->user_stack = user_stack;
4361
4362 proc_lock(p);
4363 p->user_stack = user_stack;
4364 proc_unlock(p);
4365 }
4366
4367 /*
4368 * And prevent access to what's above the current stack
4369 * size limit for this process.
4370 */
4371 prot_addr = addr;
4372 if (load_result->prog_stack_size)
4373 prot_size = PAGE_SIZE;
4374 else
4375 prot_size = mach_vm_trunc_page(size - unix_stack_size(p));
4376 kr = mach_vm_protect(map,
4377 prot_addr,
4378 prot_size,
4379 FALSE,
4380 VM_PROT_NONE);
4381 if (kr != KERN_SUCCESS) {
4382 (void) mach_vm_deallocate(map, addr, size);
4383 return kr;
4384 }
4385 }
4386
4387 return KERN_SUCCESS;
4388 }
4389
4390 #include <sys/reboot.h>
4391
4392 /*
4393 * load_init_program_at_path
4394 *
4395 * Description: Load the "init" program; in most cases, this will be "launchd"
4396 *
4397 * Parameters: p Process to call execve() to create
4398 * the "init" program
4399 * scratch_addr Page in p, scratch space
4400 * path NULL terminated path
4401 *
4402 * Returns: KERN_SUCCESS Success
4403 * !KERN_SUCCESS See execve/mac_execve for error codes
4404 *
4405 * Notes: The process that is passed in is the first manufactured
4406 * process on the system, and gets here via bsd_ast() firing
4407 * for the first time. This is done to ensure that bsd_init()
4408 * has run to completion.
4409 *
4410 * The address map of the first manufactured process is 32 bit.
4411 * WHEN this becomes 64b, this code will fail; it needs to be
4412 * made 64b capable.
4413 */
4414 static int
4415 load_init_program_at_path(proc_t p, user_addr_t scratch_addr, const char* path)
4416 {
4417 uint32_t argv[3];
4418 uint32_t argc = 0;
4419 int retval[2];
4420 struct execve_args init_exec_args;
4421
4422 /*
4423 * Validate inputs and pre-conditions
4424 */
4425 assert(p);
4426 assert(scratch_addr);
4427 assert(path);
4428
4429 if (IS_64BIT_PROCESS(p)) {
4430 panic("Init against 64b primordial proc not implemented");
4431 }
4432
4433 /*
4434 * Copy out program name.
4435 */
4436 size_t path_length = strlen(path) + 1;
4437 (void) copyout(path, scratch_addr, path_length);
4438
4439 argv[argc++] = (uint32_t)scratch_addr;
4440 scratch_addr = USER_ADDR_ALIGN(scratch_addr + path_length, 16);
4441
4442 /*
4443 * Put out first (and only) argument, similarly.
4444 * Assumes everything fits in a page as allocated above.
4445 */
4446 if (boothowto & RB_SINGLE) {
4447 const char *init_args = "-s";
4448 size_t init_args_length = strlen(init_args)+1;
4449
4450 copyout(init_args, scratch_addr, init_args_length);
4451
4452 argv[argc++] = (uint32_t)scratch_addr;
4453 scratch_addr = USER_ADDR_ALIGN(scratch_addr + init_args_length, 16);
4454 }
4455
4456 /*
4457 * Null-end the argument list
4458 */
4459 argv[argc] = 0;
4460
4461 /*
4462 * Copy out the argument list.
4463 */
4464 (void) copyout(argv, scratch_addr, sizeof(argv));
4465
4466 /*
4467 * Set up argument block for fake call to execve.
4468 */
4469 init_exec_args.fname = CAST_USER_ADDR_T(argv[0]);
4470 init_exec_args.argp = scratch_addr;
4471 init_exec_args.envp = USER_ADDR_NULL;
4472
4473 /*
4474 * So that init task is set with uid,gid 0 token
4475 */
4476 set_security_token(p);
4477
4478 return execve(p, &init_exec_args, retval);
4479 }
4480
4481 static const char * init_programs[] = {
4482 #if DEBUG
4483 "/usr/local/sbin/launchd.debug",
4484 #endif
4485 #if DEVELOPMENT || DEBUG
4486 /* Remove DEBUG conditional when <rdar://problem/17931977> is fixed */
4487 "/usr/local/sbin/launchd.development",
4488 #endif
4489 "/sbin/launchd",
4490 };
4491
4492 /*
4493 * load_init_program
4494 *
4495 * Description: Load the "init" program; in most cases, this will be "launchd"
4496 *
4497 * Parameters: p Process to call execve() to create
4498 * the "init" program
4499 *
4500 * Returns: (void)
4501 *
4502 * Notes: The process that is passed in is the first manufactured
4503 * process on the system, and gets here via bsd_ast() firing
4504 * for the first time. This is done to ensure that bsd_init()
4505 * has run to completion.
4506 *
4507 * In DEBUG & DEVELOPMENT builds, the launchdsuffix boot-arg
4508 * may be used to select a specific launchd executable. As with
4509 * the kcsuffix boot-arg, setting launchdsuffix to "" or "release"
4510 * will force /sbin/launchd to be selected.
4511 *
4512 * The DEBUG kernel will continue to check for a .development
4513 * version until <rdar://problem/17931977> is fixed.
4514 *
4515 * Search order by build:
4516 *
4517 * DEBUG DEVELOPMENT RELEASE PATH
4518 * ----------------------------------------------------------------------------------
4519 * 1 1 NA /usr/local/sbin/launchd.$LAUNCHDSUFFIX
4520 * 2 NA NA /usr/local/sbin/launchd.debug
4521 * 3 2 NA /usr/local/sbin/launchd.development
4522 * 4 3 1 /sbin/launchd
4523 */
4524 void
4525 load_init_program(proc_t p)
4526 {
4527 uint32_t i;
4528 int error;
4529 vm_offset_t scratch_addr = VM_MIN_ADDRESS;
4530
4531 (void) vm_allocate(current_map(), &scratch_addr, PAGE_SIZE, VM_FLAGS_ANYWHERE);
4532 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
4533 (void) memorystatus_init_at_boot_snapshot();
4534 #endif /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
4535
4536 #if DEBUG || DEVELOPMENT
4537 /* Check for boot-arg suffix first */
4538 char launchd_suffix[64];
4539 if (PE_parse_boot_argn("launchdsuffix", launchd_suffix, sizeof(launchd_suffix))) {
4540 char launchd_path[128];
4541 boolean_t is_release_suffix = ((launchd_suffix[0] == 0) ||
4542 (strcmp(launchd_suffix, "release") == 0));
4543
4544 if (is_release_suffix) {
4545 error = load_init_program_at_path(p, CAST_USER_ADDR_T(scratch_addr), "/sbin/launchd");
4546 if (!error)
4547 return;
4548
4549 panic("Process 1 exec of launchd.release failed, errno %d", error);
4550 } else {
4551 strlcpy(launchd_path, "/usr/local/sbin/launchd.", sizeof(launchd_path));
4552 strlcat(launchd_path, launchd_suffix, sizeof(launchd_path));
4553
4554 /* All the error data is lost in the loop below, don't
4555 * attempt to save it. */
4556 if (!load_init_program_at_path(p, CAST_USER_ADDR_T(scratch_addr), launchd_path)) {
4557 return;
4558 }
4559 }
4560 }
4561 #endif
4562
4563 error = ENOENT;
4564 for (i = 0; i < sizeof(init_programs)/sizeof(init_programs[0]); i++) {
4565 error = load_init_program_at_path(p, CAST_USER_ADDR_T(scratch_addr), init_programs[i]);
4566 if (!error)
4567 return;
4568 }
4569
4570 panic("Process 1 exec of %s failed, errno %d", ((i == 0) ? "<null>" : init_programs[i-1]), error);
4571 }
4572
4573 /*
4574 * load_return_to_errno
4575 *
4576 * Description: Convert a load_return_t (Mach error) to an errno (BSD error)
4577 *
4578 * Parameters: lrtn Mach error number
4579 *
4580 * Returns: (int) BSD error number
4581 * 0 Success
4582 * EBADARCH Bad architecture
4583 * EBADMACHO Bad Mach object file
4584 * ESHLIBVERS Bad shared library version
4585 * ENOMEM Out of memory/resource shortage
4586 * EACCES Access denied
4587 * ENOENT Entry not found (usually "file does
4588 * does not exist")
4589 * EIO An I/O error occurred
4590 * EBADEXEC The executable is corrupt/unknown
4591 */
4592 static int
4593 load_return_to_errno(load_return_t lrtn)
4594 {
4595 switch (lrtn) {
4596 case LOAD_SUCCESS:
4597 return 0;
4598 case LOAD_BADARCH:
4599 return EBADARCH;
4600 case LOAD_BADMACHO:
4601 return EBADMACHO;
4602 case LOAD_SHLIB:
4603 return ESHLIBVERS;
4604 case LOAD_NOSPACE:
4605 case LOAD_RESOURCE:
4606 return ENOMEM;
4607 case LOAD_PROTECT:
4608 return EACCES;
4609 case LOAD_ENOENT:
4610 return ENOENT;
4611 case LOAD_IOERROR:
4612 return EIO;
4613 case LOAD_FAILURE:
4614 case LOAD_DECRYPTFAIL:
4615 default:
4616 return EBADEXEC;
4617 }
4618 }
4619
4620 #include <mach/mach_types.h>
4621 #include <mach/vm_prot.h>
4622 #include <mach/semaphore.h>
4623 #include <mach/sync_policy.h>
4624 #include <kern/clock.h>
4625 #include <mach/kern_return.h>
4626
4627 /*
4628 * execargs_alloc
4629 *
4630 * Description: Allocate the block of memory used by the execve arguments.
4631 * At the same time, we allocate a page so that we can read in
4632 * the first page of the image.
4633 *
4634 * Parameters: struct image_params * the image parameter block
4635 *
4636 * Returns: 0 Success
4637 * EINVAL Invalid argument
4638 * EACCES Permission denied
4639 * EINTR Interrupted function
4640 * ENOMEM Not enough space
4641 *
4642 * Notes: This is a temporary allocation into the kernel address space
4643 * to enable us to copy arguments in from user space. This is
4644 * necessitated by not mapping the process calling execve() into
4645 * the kernel address space during the execve() system call.
4646 *
4647 * We assemble the argument and environment, etc., into this
4648 * region before copying it as a single block into the child
4649 * process address space (at the top or bottom of the stack,
4650 * depending on which way the stack grows; see the function
4651 * exec_copyout_strings() for details).
4652 *
4653 * This ends up with a second (possibly unnecessary) copy compared
4654 * with assembing the data directly into the child address space,
4655 * instead, but since we cannot be guaranteed that the parent has
4656 * not modified its environment, we can't really know that it's
4657 * really a block there as well.
4658 */
4659
4660
4661 static int execargs_waiters = 0;
4662 lck_mtx_t *execargs_cache_lock;
4663
4664 static void
4665 execargs_lock_lock(void) {
4666 lck_mtx_lock_spin(execargs_cache_lock);
4667 }
4668
4669 static void
4670 execargs_lock_unlock(void) {
4671 lck_mtx_unlock(execargs_cache_lock);
4672 }
4673
4674 static wait_result_t
4675 execargs_lock_sleep(void) {
4676 return(lck_mtx_sleep(execargs_cache_lock, LCK_SLEEP_DEFAULT, &execargs_free_count, THREAD_INTERRUPTIBLE));
4677 }
4678
4679 static kern_return_t
4680 execargs_purgeable_allocate(char **execarg_address) {
4681 kern_return_t kr = vm_allocate(bsd_pageable_map, (vm_offset_t *)execarg_address, BSD_PAGEABLE_SIZE_PER_EXEC, VM_FLAGS_ANYWHERE | VM_FLAGS_PURGABLE);
4682 assert(kr == KERN_SUCCESS);
4683 return kr;
4684 }
4685
4686 static kern_return_t
4687 execargs_purgeable_reference(void *execarg_address) {
4688 int state = VM_PURGABLE_NONVOLATILE;
4689 kern_return_t kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
4690
4691 assert(kr == KERN_SUCCESS);
4692 return kr;
4693 }
4694
4695 static kern_return_t
4696 execargs_purgeable_volatilize(void *execarg_address) {
4697 int state = VM_PURGABLE_VOLATILE | VM_PURGABLE_ORDERING_OBSOLETE;
4698 kern_return_t kr;
4699 kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
4700
4701 assert(kr == KERN_SUCCESS);
4702
4703 return kr;
4704 }
4705
4706 static void
4707 execargs_wakeup_waiters(void) {
4708 thread_wakeup(&execargs_free_count);
4709 }
4710
4711 static int
4712 execargs_alloc(struct image_params *imgp)
4713 {
4714 kern_return_t kret;
4715 wait_result_t res;
4716 int i, cache_index = -1;
4717
4718 execargs_lock_lock();
4719
4720 while (execargs_free_count == 0) {
4721 execargs_waiters++;
4722 res = execargs_lock_sleep();
4723 execargs_waiters--;
4724 if (res != THREAD_AWAKENED) {
4725 execargs_lock_unlock();
4726 return (EINTR);
4727 }
4728 }
4729
4730 execargs_free_count--;
4731
4732 for (i = 0; i < execargs_cache_size; i++) {
4733 vm_offset_t element = execargs_cache[i];
4734 if (element) {
4735 cache_index = i;
4736 imgp->ip_strings = (char *)(execargs_cache[i]);
4737 execargs_cache[i] = 0;
4738 break;
4739 }
4740 }
4741
4742 assert(execargs_free_count >= 0);
4743
4744 execargs_lock_unlock();
4745
4746 if (cache_index == -1) {
4747 kret = execargs_purgeable_allocate(&imgp->ip_strings);
4748 }
4749 else
4750 kret = execargs_purgeable_reference(imgp->ip_strings);
4751
4752 assert(kret == KERN_SUCCESS);
4753 if (kret != KERN_SUCCESS) {
4754 return (ENOMEM);
4755 }
4756
4757 /* last page used to read in file headers */
4758 imgp->ip_vdata = imgp->ip_strings + ( NCARGS + PAGE_SIZE );
4759 imgp->ip_strendp = imgp->ip_strings;
4760 imgp->ip_argspace = NCARGS;
4761 imgp->ip_strspace = ( NCARGS + PAGE_SIZE );
4762
4763 return (0);
4764 }
4765
4766 /*
4767 * execargs_free
4768 *
4769 * Description: Free the block of memory used by the execve arguments and the
4770 * first page of the executable by a previous call to the function
4771 * execargs_alloc().
4772 *
4773 * Parameters: struct image_params * the image parameter block
4774 *
4775 * Returns: 0 Success
4776 * EINVAL Invalid argument
4777 * EINTR Oeration interrupted
4778 */
4779 static int
4780 execargs_free(struct image_params *imgp)
4781 {
4782 kern_return_t kret;
4783 int i;
4784 boolean_t needs_wakeup = FALSE;
4785
4786 kret = execargs_purgeable_volatilize(imgp->ip_strings);
4787
4788 execargs_lock_lock();
4789 execargs_free_count++;
4790
4791 for (i = 0; i < execargs_cache_size; i++) {
4792 vm_offset_t element = execargs_cache[i];
4793 if (element == 0) {
4794 execargs_cache[i] = (vm_offset_t) imgp->ip_strings;
4795 imgp->ip_strings = NULL;
4796 break;
4797 }
4798 }
4799
4800 assert(imgp->ip_strings == NULL);
4801
4802 if (execargs_waiters > 0)
4803 needs_wakeup = TRUE;
4804
4805 execargs_lock_unlock();
4806
4807 if (needs_wakeup == TRUE)
4808 execargs_wakeup_waiters();
4809
4810 return ((kret == KERN_SUCCESS ? 0 : EINVAL));
4811 }
4812
4813 static void
4814 exec_resettextvp(proc_t p, struct image_params *imgp)
4815 {
4816 vnode_t vp;
4817 off_t offset;
4818 vnode_t tvp = p->p_textvp;
4819 int ret;
4820
4821 vp = imgp->ip_vp;
4822 offset = imgp->ip_arch_offset;
4823
4824 if (vp == NULLVP)
4825 panic("exec_resettextvp: expected valid vp");
4826
4827 ret = vnode_ref(vp);
4828 proc_lock(p);
4829 if (ret == 0) {
4830 p->p_textvp = vp;
4831 p->p_textoff = offset;
4832 } else {
4833 p->p_textvp = NULLVP; /* this is paranoia */
4834 p->p_textoff = 0;
4835 }
4836 proc_unlock(p);
4837
4838 if ( tvp != NULLVP) {
4839 if (vnode_getwithref(tvp) == 0) {
4840 vnode_rele(tvp);
4841 vnode_put(tvp);
4842 }
4843 }
4844
4845 }
4846
4847 /*
4848 * If the process is not signed or if it contains entitlements, we
4849 * need to communicate through the task_access_port to taskgated.
4850 *
4851 * taskgated will provide a detached code signature if present, and
4852 * will enforce any restrictions on entitlements.
4853 */
4854
4855 static boolean_t
4856 taskgated_required(proc_t p, boolean_t *require_success)
4857 {
4858 size_t length;
4859 void *blob;
4860 int error;
4861
4862 if (cs_debug > 2)
4863 csvnode_print_debug(p->p_textvp);
4864
4865 const int can_skip_taskgated = csproc_get_platform_binary(p) && !csproc_get_platform_path(p);
4866 if (can_skip_taskgated) {
4867 if (cs_debug) printf("taskgated not required for: %s\n", p->p_name);
4868 *require_success = FALSE;
4869 return FALSE;
4870 }
4871
4872 if ((p->p_csflags & CS_VALID) == 0) {
4873 *require_success = FALSE;
4874 return TRUE;
4875 }
4876
4877 error = cs_entitlements_blob_get(p, &blob, &length);
4878 if (error == 0 && blob != NULL) {
4879 /*
4880 * fatal on the desktop when entitlements are present,
4881 * unless we started in single-user mode
4882 */
4883 if ((boothowto & RB_SINGLE) == 0)
4884 *require_success = TRUE;
4885 /*
4886 * Allow initproc to run without causing taskgated to launch
4887 */
4888 if (p == initproc) {
4889 *require_success = FALSE;
4890 return FALSE;
4891 }
4892
4893 if (cs_debug) printf("taskgated required for: %s\n", p->p_name);
4894
4895 return TRUE;
4896 }
4897
4898 *require_success = FALSE;
4899 return FALSE;
4900 }
4901
4902 /*
4903 * __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__
4904 *
4905 * Description: Waits for the userspace daemon to respond to the request
4906 * we made. Function declared non inline to be visible in
4907 * stackshots and spindumps as well as debugging.
4908 */
4909 __attribute__((noinline)) int
4910 __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid)
4911 {
4912 return find_code_signature(task_access_port, new_pid);
4913 }
4914
4915 static int
4916 check_for_signature(proc_t p, struct image_params *imgp)
4917 {
4918 mach_port_t port = NULL;
4919 kern_return_t kr = KERN_FAILURE;
4920 int error = EACCES;
4921 boolean_t unexpected_failure = FALSE;
4922 unsigned char hash[SHA1_RESULTLEN];
4923 boolean_t require_success = FALSE;
4924 int spawn = (imgp->ip_flags & IMGPF_SPAWN);
4925 int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
4926
4927 /*
4928 * Override inherited code signing flags with the
4929 * ones for the process that is being successfully
4930 * loaded
4931 */
4932 proc_lock(p);
4933 p->p_csflags = imgp->ip_csflags;
4934 proc_unlock(p);
4935
4936 /* Set the switch_protect flag on the map */
4937 if(p->p_csflags & (CS_HARD|CS_KILL)) {
4938 vm_map_switch_protect(get_task_map(p->task), TRUE);
4939 }
4940
4941 /*
4942 * image activation may be failed due to policy
4943 * which is unexpected but security framework does not
4944 * approve of exec, kill and return immediately.
4945 */
4946 if (imgp->ip_mac_return != 0) {
4947 error = imgp->ip_mac_return;
4948 unexpected_failure = TRUE;
4949 goto done;
4950 }
4951
4952 /* check if callout to taskgated is needed */
4953 if (!taskgated_required(p, &require_success)) {
4954 error = 0;
4955 goto done;
4956 }
4957
4958 kr = task_get_task_access_port(p->task, &port);
4959 if (KERN_SUCCESS != kr || !IPC_PORT_VALID(port)) {
4960 error = 0;
4961 if (require_success)
4962 error = EACCES;
4963 goto done;
4964 }
4965
4966 /*
4967 * taskgated returns KERN_SUCCESS if it has completed its work
4968 * and the exec should continue, KERN_FAILURE if the exec should
4969 * fail, or it may error out with different error code in an
4970 * event of mig failure (e.g. process was signalled during the
4971 * rpc call, taskgated died, mig server died etc.).
4972 */
4973
4974 kr = __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(port, p->p_pid);
4975 switch (kr) {
4976 case KERN_SUCCESS:
4977 error = 0;
4978 break;
4979 case KERN_FAILURE:
4980 error = EACCES;
4981 goto done;
4982 default:
4983 error = EACCES;
4984 unexpected_failure = TRUE;
4985 goto done;
4986 }
4987
4988 /* Only do this if exec_resettextvp() did not fail */
4989 if (p->p_textvp != NULLVP) {
4990 /*
4991 * If there's a new code directory, mark this process
4992 * as signed.
4993 */
4994 if (0 == ubc_cs_getcdhash(p->p_textvp, p->p_textoff, hash)) {
4995 proc_lock(p);
4996 p->p_csflags |= CS_VALID;
4997 proc_unlock(p);
4998 }
4999 }
5000
5001 done:
5002 if (0 != error) {
5003 if (!unexpected_failure)
5004 p->p_csflags |= CS_KILLED;
5005 /* make very sure execution fails */
5006 if (vfexec || spawn) {
5007 psignal_vfork(p, p->task, imgp->ip_new_thread, SIGKILL);
5008 error = 0;
5009 } else {
5010 psignal(p, SIGKILL);
5011 }
5012 }
5013 return error;
5014 }
5015
5016 /*
5017 * Typically as soon as we start executing this process, the
5018 * first instruction will trigger a VM fault to bring the text
5019 * pages (as executable) into the address space, followed soon
5020 * thereafter by dyld data structures (for dynamic executable).
5021 * To optimize this, as well as improve support for hardware
5022 * debuggers that can only access resident pages present
5023 * in the process' page tables, we prefault some pages if
5024 * possible. Errors are non-fatal.
5025 */
5026 static void exec_prefault_data(proc_t p __unused, struct image_params *imgp, load_result_t *load_result)
5027 {
5028 int ret;
5029 size_t expected_all_image_infos_size;
5030
5031 /*
5032 * Prefault executable or dyld entry point.
5033 */
5034 vm_fault(current_map(),
5035 vm_map_trunc_page(load_result->entry_point,
5036 vm_map_page_mask(current_map())),
5037 VM_PROT_READ | VM_PROT_EXECUTE,
5038 FALSE,
5039 THREAD_UNINT, NULL, 0);
5040
5041 if (imgp->ip_flags & IMGPF_IS_64BIT) {
5042 expected_all_image_infos_size = sizeof(struct user64_dyld_all_image_infos);
5043 } else {
5044 expected_all_image_infos_size = sizeof(struct user32_dyld_all_image_infos);
5045 }
5046
5047 /* Decode dyld anchor structure from <mach-o/dyld_images.h> */
5048 if (load_result->dynlinker &&
5049 load_result->all_image_info_addr &&
5050 load_result->all_image_info_size >= expected_all_image_infos_size) {
5051 union {
5052 struct user64_dyld_all_image_infos infos64;
5053 struct user32_dyld_all_image_infos infos32;
5054 } all_image_infos;
5055
5056 /*
5057 * Pre-fault to avoid copyin() going through the trap handler
5058 * and recovery path.
5059 */
5060 vm_fault(current_map(),
5061 vm_map_trunc_page(load_result->all_image_info_addr,
5062 vm_map_page_mask(current_map())),
5063 VM_PROT_READ | VM_PROT_WRITE,
5064 FALSE,
5065 THREAD_UNINT, NULL, 0);
5066 if ((load_result->all_image_info_addr & PAGE_MASK) + expected_all_image_infos_size > PAGE_SIZE) {
5067 /* all_image_infos straddles a page */
5068 vm_fault(current_map(),
5069 vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size - 1,
5070 vm_map_page_mask(current_map())),
5071 VM_PROT_READ | VM_PROT_WRITE,
5072 FALSE,
5073 THREAD_UNINT, NULL, 0);
5074 }
5075
5076 ret = copyin(load_result->all_image_info_addr,
5077 &all_image_infos,
5078 expected_all_image_infos_size);
5079 if (ret == 0 && all_image_infos.infos32.version >= 9) {
5080
5081 user_addr_t notification_address;
5082 user_addr_t dyld_image_address;
5083 user_addr_t dyld_version_address;
5084 user_addr_t dyld_all_image_infos_address;
5085 user_addr_t dyld_slide_amount;
5086
5087 if (imgp->ip_flags & IMGPF_IS_64BIT) {
5088 notification_address = all_image_infos.infos64.notification;
5089 dyld_image_address = all_image_infos.infos64.dyldImageLoadAddress;
5090 dyld_version_address = all_image_infos.infos64.dyldVersion;
5091 dyld_all_image_infos_address = all_image_infos.infos64.dyldAllImageInfosAddress;
5092 } else {
5093 notification_address = all_image_infos.infos32.notification;
5094 dyld_image_address = all_image_infos.infos32.dyldImageLoadAddress;
5095 dyld_version_address = all_image_infos.infos32.dyldVersion;
5096 dyld_all_image_infos_address = all_image_infos.infos32.dyldAllImageInfosAddress;
5097 }
5098
5099 /*
5100 * dyld statically sets up the all_image_infos in its Mach-O
5101 * binary at static link time, with pointers relative to its default
5102 * load address. Since ASLR might slide dyld before its first
5103 * instruction is executed, "dyld_slide_amount" tells us how far
5104 * dyld was loaded compared to its default expected load address.
5105 * All other pointers into dyld's image should be adjusted by this
5106 * amount. At some point later, dyld will fix up pointers to take
5107 * into account the slide, at which point the all_image_infos_address
5108 * field in the structure will match the runtime load address, and
5109 * "dyld_slide_amount" will be 0, if we were to consult it again.
5110 */
5111
5112 dyld_slide_amount = load_result->all_image_info_addr - dyld_all_image_infos_address;
5113
5114 #if 0
5115 kprintf("exec_prefault: 0x%016llx 0x%08x 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
5116 (uint64_t)load_result->all_image_info_addr,
5117 all_image_infos.infos32.version,
5118 (uint64_t)notification_address,
5119 (uint64_t)dyld_image_address,
5120 (uint64_t)dyld_version_address,
5121 (uint64_t)dyld_all_image_infos_address);
5122 #endif
5123
5124 vm_fault(current_map(),
5125 vm_map_trunc_page(notification_address + dyld_slide_amount,
5126 vm_map_page_mask(current_map())),
5127 VM_PROT_READ | VM_PROT_EXECUTE,
5128 FALSE,
5129 THREAD_UNINT, NULL, 0);
5130 vm_fault(current_map(),
5131 vm_map_trunc_page(dyld_image_address + dyld_slide_amount,
5132 vm_map_page_mask(current_map())),
5133 VM_PROT_READ | VM_PROT_EXECUTE,
5134 FALSE,
5135 THREAD_UNINT, NULL, 0);
5136 vm_fault(current_map(),
5137 vm_map_trunc_page(dyld_version_address + dyld_slide_amount,
5138 vm_map_page_mask(current_map())),
5139 VM_PROT_READ,
5140 FALSE,
5141 THREAD_UNINT, NULL, 0);
5142 vm_fault(current_map(),
5143 vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount,
5144 vm_map_page_mask(current_map())),
5145 VM_PROT_READ | VM_PROT_WRITE,
5146 FALSE,
5147 THREAD_UNINT, NULL, 0);
5148 }
5149 }
5150 }