]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/kern_exec.c
xnu-2422.100.13.tar.gz
[apple/xnu.git] / bsd / kern / kern_exec.c
CommitLineData
1c79356b 1/*
6d2010ae 2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Mach Operating System
31 * Copyright (c) 1987 Carnegie-Mellon University
32 * All rights reserved. The CMU software License Agreement specifies
33 * the terms and conditions for use and redistribution.
34 */
35
36#include <cputypes.h>
37
38/*-
39 * Copyright (c) 1982, 1986, 1991, 1993
40 * The Regents of the University of California. All rights reserved.
41 * (c) UNIX System Laboratories, Inc.
42 * All or some portions of this file are derived from material licensed
43 * to the University of California by American Telephone and Telegraph
44 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
45 * the permission of UNIX System Laboratories, Inc.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: @(#)kern_exec.c 8.1 (Berkeley) 6/10/93
76 */
2d21ac55
A
77/*
78 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
79 * support for mandatory and extensible security protections. This notice
80 * is included in support of clause 2.2 (b) of the Apple Public License,
81 * Version 2.0.
82 */
1c79356b 83#include <machine/reg.h>
316670eb 84#include <machine/cpu_capabilities.h>
1c79356b
A
85
86#include <sys/param.h>
87#include <sys/systm.h>
88#include <sys/filedesc.h>
89#include <sys/kernel.h>
91447636
A
90#include <sys/proc_internal.h>
91#include <sys/kauth.h>
1c79356b 92#include <sys/user.h>
1c79356b
A
93#include <sys/socketvar.h>
94#include <sys/malloc.h>
95#include <sys/namei.h>
91447636
A
96#include <sys/mount_internal.h>
97#include <sys/vnode_internal.h>
98#include <sys/file_internal.h>
1c79356b 99#include <sys/stat.h>
91447636 100#include <sys/uio_internal.h>
1c79356b
A
101#include <sys/acct.h>
102#include <sys/exec.h>
103#include <sys/kdebug.h>
104#include <sys/signal.h>
55e303ae 105#include <sys/aio_kern.h>
91447636 106#include <sys/sysproto.h>
2d21ac55 107#if SYSV_SHM
91447636 108#include <sys/shm_internal.h> /* shmexec() */
2d21ac55 109#endif
91447636 110#include <sys/ubc_internal.h> /* ubc_map() */
2d21ac55
A
111#include <sys/spawn.h>
112#include <sys/spawn_internal.h>
39236c6e 113#include <sys/process_policy.h>
2d21ac55 114#include <sys/codesign.h>
b0d623f7 115#include <crypto/sha1.h>
1c79356b 116
39236c6e
A
117#include <libkern/libkern.h>
118
b0d623f7 119#include <security/audit/audit.h>
e5568f75 120
2d21ac55
A
121#include <ipc/ipc_types.h>
122
91447636 123#include <mach/mach_types.h>
b0d623f7 124#include <mach/port.h>
91447636 125#include <mach/task.h>
b0d623f7 126#include <mach/task_access.h>
91447636
A
127#include <mach/thread_act.h>
128#include <mach/vm_map.h>
129#include <mach/mach_vm.h>
1c79356b
A
130#include <mach/vm_param.h>
131
b0d623f7
A
132#include <kern/sched_prim.h> /* thread_wakeup() */
133#include <kern/affinity.h>
134#include <kern/assert.h>
316670eb 135#include <kern/task.h>
b0d623f7 136
2d21ac55
A
137#if CONFIG_MACF
138#include <security/mac.h>
139#include <security/mac_mach_internal.h>
140#endif
141
1c79356b
A
142#include <vm/vm_map.h>
143#include <vm/vm_kern.h>
2d21ac55 144#include <vm/vm_protos.h>
91447636 145#include <vm/vm_kern.h>
316670eb 146#include <vm/vm_fault.h>
39236c6e 147#include <vm/vm_pageout.h>
316670eb
A
148
149#include <kdp/kdp_dyld.h>
2d21ac55 150
6d2010ae 151#include <machine/pal_routines.h>
b0d623f7 152
316670eb
A
153#include <pexpert/pexpert.h>
154
155#if CONFIG_MEMORYSTATUS
156#include <sys/kern_memorystatus.h>
157#endif
158
2d21ac55
A
159#if CONFIG_DTRACE
160/* Do not include dtrace.h, it redefines kmem_[alloc/free] */
161extern void (*dtrace_fasttrap_exec_ptr)(proc_t);
162extern void (*dtrace_helpers_cleanup)(proc_t);
163extern void dtrace_lazy_dofs_destroy(proc_t);
164
165#include <sys/dtrace_ptss.h>
166#endif
167
168/* support for child creation in exec after vfork */
169thread_t fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int is64bit);
170void vfork_exit(proc_t p, int rv);
b0d623f7 171int setsigvec(proc_t, thread_t, int, struct __kern_sigaction *, boolean_t in_sigstart);
316670eb 172extern void proc_apply_task_networkbg_internal(proc_t, thread_t);
1c79356b 173
91447636
A
174/*
175 * Mach things for which prototypes are unavailable from Mach headers
176 */
177void ipc_task_reset(
178 task_t task);
6601e61a
A
179void ipc_thread_reset(
180 thread_t thread);
2d21ac55
A
181kern_return_t ipc_object_copyin(
182 ipc_space_t space,
183 mach_port_name_t name,
184 mach_msg_type_name_t msgt_name,
185 ipc_object_t *objectp);
186void ipc_port_release_send(ipc_port_t);
91447636
A
187
188extern struct savearea *get_user_regs(thread_t);
189
190
1c79356b
A
191#include <kern/thread.h>
192#include <kern/task.h>
1c79356b
A
193#include <kern/ast.h>
194#include <kern/mach_loader.h>
b0d623f7 195#include <kern/mach_fat.h>
1c79356b
A
196#include <mach-o/fat.h>
197#include <mach-o/loader.h>
198#include <machine/vmparam.h>
91447636
A
199#include <sys/imgact.h>
200
2d21ac55
A
201#include <sys/sdt.h>
202
91447636 203
2d21ac55
A
204/*
205 * EAI_ITERLIMIT The maximum number of times to iterate an image
206 * activator in exec_activate_image() before treating
207 * it as malformed/corrupt.
208 */
209#define EAI_ITERLIMIT 10
1c79356b 210
6d2010ae
A
211/*
212 * For #! interpreter parsing
213 */
214#define IS_WHITESPACE(ch) ((ch == ' ') || (ch == '\t'))
215#define IS_EOL(ch) ((ch == '#') || (ch == '\n'))
216
1c79356b 217extern vm_map_t bsd_pageable_map;
39236c6e 218extern const struct fileops vnops;
1c79356b
A
219
220#define ROUND_PTR(type, addr) \
b0d623f7 221 (type *)( ( (uintptr_t)(addr) + 16 - 1) \
1c79356b
A
222 & ~(16 - 1) )
223
91447636 224struct image_params; /* Forward */
2d21ac55 225static int exec_activate_image(struct image_params *imgp);
91447636 226static int exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp);
1c79356b 227static int load_return_to_errno(load_return_t lrtn);
91447636
A
228static int execargs_alloc(struct image_params *imgp);
229static int execargs_free(struct image_params *imgp);
230static int exec_check_permissions(struct image_params *imgp);
231static int exec_extract_strings(struct image_params *imgp);
6d2010ae 232static int exec_add_apple_strings(struct image_params *imgp);
91447636 233static int exec_handle_sugid(struct image_params *imgp);
a3d08fcd 234static int sugid_scripts = 0;
6d2010ae 235SYSCTL_INT (_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW | CTLFLAG_LOCKED, &sugid_scripts, 0, "");
316670eb 236static kern_return_t create_unix_stack(vm_map_t map, load_result_t* load_result, proc_t p);
91447636 237static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size);
2d21ac55 238static void exec_resettextvp(proc_t, struct image_params *);
b0d623f7 239static int check_for_signature(proc_t, struct image_params *);
316670eb 240static void exec_prefault_data(proc_t, struct image_params *, load_result_t *);
39236c6e
A
241static errno_t exec_handle_port_actions(struct image_params *imgp, short psa_flags, int * need_portwatch, ipc_port_t * portwatch);
242static errno_t exec_handle_spawnattr_apptype(proc_t p, int psa_apptype);
1c79356b 243
55e303ae 244/*
6d2010ae 245 * exec_add_user_string
91447636
A
246 *
247 * Add the requested string to the string space area.
248 *
249 * Parameters; struct image_params * image parameter block
250 * user_addr_t string to add to strings area
6d2010ae
A
251 * int segment from which string comes
252 * boolean_t TRUE if string contributes to NCARGS
91447636
A
253 *
254 * Returns: 0 Success
255 * !0 Failure errno from copyinstr()
256 *
257 * Implicit returns:
258 * (imgp->ip_strendp) updated location of next add, if any
259 * (imgp->ip_strspace) updated byte count of space remaining
6d2010ae 260 * (imgp->ip_argspace) updated byte count of space in NCARGS
55e303ae 261 */
91447636 262static int
6d2010ae 263exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolean_t is_ncargs)
91447636 264{
6d2010ae
A
265 int error = 0;
266
267 do {
268 size_t len = 0;
269 int space;
270
271 if (is_ncargs)
272 space = imgp->ip_argspace; /* by definition smaller than ip_strspace */
273 else
274 space = imgp->ip_strspace;
275
276 if (space <= 0) {
55e303ae
A
277 error = E2BIG;
278 break;
279 }
6d2010ae
A
280
281 if (!UIO_SEG_IS_USER_SPACE(seg)) {
91447636 282 char *kstr = CAST_DOWN(char *,str); /* SAFE */
6d2010ae 283 error = copystr(kstr, imgp->ip_strendp, space, &len);
55e303ae 284 } else {
6d2010ae 285 error = copyinstr(str, imgp->ip_strendp, space, &len);
55e303ae 286 }
6d2010ae 287
91447636
A
288 imgp->ip_strendp += len;
289 imgp->ip_strspace -= len;
6d2010ae
A
290 if (is_ncargs)
291 imgp->ip_argspace -= len;
292
55e303ae 293 } while (error == ENAMETOOLONG);
6d2010ae 294
55e303ae
A
295 return error;
296}
297
91447636
A
298/*
299 * exec_save_path
300 *
301 * To support new app package launching for Mac OS X, the dyld needs the
302 * first argument to execve() stored on the user stack.
303 *
6d2010ae 304 * Save the executable path name at the bottom of the strings area and set
91447636
A
305 * the argument vector pointer to the location following that to indicate
306 * the start of the argument and environment tuples, setting the remaining
6d2010ae 307 * string space count to the size of the string area minus the path length.
91447636
A
308 *
309 * Parameters; struct image_params * image parameter block
310 * char * path used to invoke program
2d21ac55 311 * int segment from which path comes
91447636
A
312 *
313 * Returns: int 0 Success
2d21ac55
A
314 * EFAULT Bad address
315 * copy[in]str:EFAULT Bad address
316 * copy[in]str:ENAMETOOLONG Filename too long
317 *
91447636
A
318 * Implicit returns:
319 * (imgp->ip_strings) saved path
320 * (imgp->ip_strspace) space remaining in ip_strings
91447636 321 * (imgp->ip_strendp) start of remaining copy area
6d2010ae
A
322 * (imgp->ip_argspace) space remaining of NCARGS
323 * (imgp->ip_applec) Initial applev[0]
91447636
A
324 *
325 * Note: We have to do this before the initial namei() since in the
326 * path contains symbolic links, namei() will overwrite the
327 * original path buffer contents. If the last symbolic link
328 * resolved was a relative pathname, we would lose the original
329 * "path", which could be an absolute pathname. This might be
330 * unacceptable for dyld.
331 */
332static int
2d21ac55 333exec_save_path(struct image_params *imgp, user_addr_t path, int seg)
1c79356b 334{
91447636
A
335 int error;
336 size_t len;
6d2010ae 337 char *kpath;
91447636
A
338
339 len = MIN(MAXPATHLEN, imgp->ip_strspace);
340
2d21ac55 341 switch(seg) {
91447636
A
342 case UIO_USERSPACE32:
343 case UIO_USERSPACE64: /* Same for copyin()... */
344 error = copyinstr(path, imgp->ip_strings, len, &len);
345 break;
b0d623f7 346 case UIO_SYSSPACE:
6d2010ae 347 kpath = CAST_DOWN(char *,path); /* SAFE */
91447636
A
348 error = copystr(kpath, imgp->ip_strings, len, &len);
349 break;
350 default:
351 error = EFAULT;
352 break;
353 }
354
355 if (!error) {
356 imgp->ip_strendp += len;
357 imgp->ip_strspace -= len;
91447636
A
358 }
359
360 return(error);
361}
362
6d2010ae
A
363/*
364 * exec_reset_save_path
365 *
366 * If we detect a shell script, we need to reset the string area
367 * state so that the interpreter can be saved onto the stack.
368
369 * Parameters; struct image_params * image parameter block
370 *
371 * Returns: int 0 Success
372 *
373 * Implicit returns:
374 * (imgp->ip_strings) saved path
375 * (imgp->ip_strspace) space remaining in ip_strings
376 * (imgp->ip_strendp) start of remaining copy area
377 * (imgp->ip_argspace) space remaining of NCARGS
378 *
379 */
380static int
381exec_reset_save_path(struct image_params *imgp)
382{
383 imgp->ip_strendp = imgp->ip_strings;
384 imgp->ip_argspace = NCARGS;
385 imgp->ip_strspace = ( NCARGS + PAGE_SIZE );
386
387 return (0);
388}
389
91447636
A
390/*
391 * exec_shell_imgact
392 *
393 * Image activator for interpreter scripts. If the image begins with the
394 * characters "#!", then it is an interpreter script. Verify that we are
0c530ab8 395 * not already executing in PowerPC mode, and that the length of the script
91447636
A
396 * line indicating the interpreter is not in excess of the maximum allowed
397 * size. If this is the case, then break out the arguments, if any, which
398 * are separated by white space, and copy them into the argument save area
399 * as if they were provided on the command line before all other arguments.
400 * The line ends when we encounter a comment character ('#') or newline.
401 *
402 * Parameters; struct image_params * image parameter block
403 *
404 * Returns: -1 not an interpreter (keep looking)
405 * -3 Success: interpreter: relookup
406 * >0 Failure: interpreter: error number
407 *
408 * A return value other than -1 indicates subsequent image activators should
409 * not be given the opportunity to attempt to activate the image.
410 */
411static int
412exec_shell_imgact(struct image_params *imgp)
413{
414 char *vdata = imgp->ip_vdata;
415 char *ihp;
6d2010ae 416 char *line_startp, *line_endp;
91447636 417 char *interp;
2d21ac55
A
418 proc_t p;
419 struct fileproc *fp;
420 int fd;
421 int error;
91447636
A
422
423 /*
424 * Make sure it's a shell script. If we've already redirected
425 * from an interpreted file once, don't do it again.
426 *
0c530ab8
A
427 * Note: We disallow PowerPC, since the expectation is that we
428 * may run a PowerPC interpreter, but not an interpret a PowerPC
91447636 429 * image. This is consistent with historical behaviour.
55e303ae 430 */
91447636
A
431 if (vdata[0] != '#' ||
432 vdata[1] != '!' ||
433 (imgp->ip_flags & IMGPF_INTERPRET) != 0) {
434 return (-1);
435 }
436
91447636 437 imgp->ip_flags |= IMGPF_INTERPRET;
6d2010ae
A
438 imgp->ip_interp_sugid_fd = -1;
439 imgp->ip_interp_buffer[0] = '\0';
91447636 440
6d2010ae 441 /* Check to see if SUGID scripts are permitted. If they aren't then
91447636
A
442 * clear the SUGID bits.
443 * imgp->ip_vattr is known to be valid.
6d2010ae
A
444 */
445 if (sugid_scripts == 0) {
446 imgp->ip_origvattr->va_mode &= ~(VSUID | VSGID);
91447636
A
447 }
448
6d2010ae
A
449 /* Try to find the first non-whitespace character */
450 for( ihp = &vdata[2]; ihp < &vdata[IMG_SHSIZE]; ihp++ ) {
451 if (IS_EOL(*ihp)) {
452 /* Did not find interpreter, "#!\n" */
91447636 453 return (ENOEXEC);
6d2010ae
A
454 } else if (IS_WHITESPACE(*ihp)) {
455 /* Whitespace, like "#! /bin/sh\n", keep going. */
456 } else {
457 /* Found start of interpreter */
458 break;
459 }
91447636
A
460 }
461
6d2010ae
A
462 if (ihp == &vdata[IMG_SHSIZE]) {
463 /* All whitespace, like "#! " */
464 return (ENOEXEC);
465 }
91447636 466
6d2010ae
A
467 line_startp = ihp;
468
469 /* Try to find the end of the interpreter+args string */
470 for ( ; ihp < &vdata[IMG_SHSIZE]; ihp++ ) {
471 if (IS_EOL(*ihp)) {
472 /* Got it */
473 break;
474 } else {
475 /* Still part of interpreter or args */
476 }
477 }
91447636 478
6d2010ae
A
479 if (ihp == &vdata[IMG_SHSIZE]) {
480 /* A long line, like "#! blah blah blah" without end */
91447636 481 return (ENOEXEC);
6d2010ae
A
482 }
483
484 /* Backtrack until we find the last non-whitespace */
485 while (IS_EOL(*ihp) || IS_WHITESPACE(*ihp)) {
486 ihp--;
487 }
488
489 /* The character after the last non-whitespace is our logical end of line */
490 line_endp = ihp + 1;
491
492 /*
493 * Now we have pointers to the usable part of:
494 *
495 * "#! /usr/bin/int first second third \n"
496 * ^ line_startp ^ line_endp
497 */
91447636
A
498
499 /* copy the interpreter name */
6d2010ae
A
500 interp = imgp->ip_interp_buffer;
501 for ( ihp = line_startp; (ihp < line_endp) && !IS_WHITESPACE(*ihp); ihp++)
502 *interp++ = *ihp;
91447636
A
503 *interp = '\0';
504
6d2010ae
A
505 exec_reset_save_path(imgp);
506 exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer),
b0d623f7 507 UIO_SYSSPACE);
91447636 508
6d2010ae
A
509 /* Copy the entire interpreter + args for later processing into argv[] */
510 interp = imgp->ip_interp_buffer;
511 for ( ihp = line_startp; (ihp < line_endp); ihp++)
512 *interp++ = *ihp;
513 *interp = '\0';
91447636 514
2d21ac55
A
515 /*
516 * If we have a SUID oder SGID script, create a file descriptor
517 * from the vnode and pass /dev/fd/%d instead of the actual
518 * path name so that the script does not get opened twice
519 */
520 if (imgp->ip_origvattr->va_mode & (VSUID | VSGID)) {
521 p = vfs_context_proc(imgp->ip_vfs_context);
522 error = falloc(p, &fp, &fd, imgp->ip_vfs_context);
523 if (error)
524 return(error);
525
526 fp->f_fglob->fg_flag = FREAD;
2d21ac55
A
527 fp->f_fglob->fg_ops = &vnops;
528 fp->f_fglob->fg_data = (caddr_t)imgp->ip_vp;
529
530 proc_fdlock(p);
531 procfdtbl_releasefd(p, fd, NULL);
532 fp_drop(p, fd, fp, 1);
533 proc_fdunlock(p);
534 vnode_ref(imgp->ip_vp);
535
6d2010ae 536 imgp->ip_interp_sugid_fd = fd;
2d21ac55
A
537 }
538
91447636
A
539 return (-3);
540}
541
542
543
544/*
545 * exec_fat_imgact
546 *
547 * Image activator for fat 1.0 binaries. If the binary is fat, then we
548 * need to select an image from it internally, and make that the image
549 * we are going to attempt to execute. At present, this consists of
550 * reloading the first page for the image with a first page from the
551 * offset location indicated by the fat header.
552 *
2d21ac55
A
553 * Parameters; struct image_params * image parameter block
554 *
555 * Returns: -1 not a fat binary (keep looking)
556 * -2 Success: encapsulated binary: reread
557 * >0 Failure: error number
558 *
91447636
A
559 * Important: This image activator is byte order neutral.
560 *
2d21ac55
A
561 * Note: A return value other than -1 indicates subsequent image
562 * activators should not be given the opportunity to attempt
563 * to activate the image.
564 *
565 * If we find an encapsulated binary, we make no assertions
91447636
A
566 * about its validity; instead, we leave that up to a rescan
567 * for an activator to claim it, and, if it is claimed by one,
568 * that activator is responsible for determining validity.
569 */
570static int
571exec_fat_imgact(struct image_params *imgp)
572{
2d21ac55 573 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
0c530ab8 574 kauth_cred_t cred = kauth_cred_proc_ref(p);
91447636 575 struct fat_header *fat_header = (struct fat_header *)imgp->ip_vdata;
2d21ac55 576 struct _posix_spawnattr *psa = NULL;
91447636
A
577 struct fat_arch fat_arch;
578 int resid, error;
579 load_return_t lret;
580
581 /* Make sure it's a fat binary */
582 if ((fat_header->magic != FAT_MAGIC) &&
583 (fat_header->magic != FAT_CIGAM)) {
584 error = -1;
585 goto bad;
586 }
587
2d21ac55
A
588 /* If posix_spawn binprefs exist, respect those prefs. */
589 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
590 if (psa != NULL && psa->psa_binprefs[0] != 0) {
591 struct fat_arch *arches = (struct fat_arch *) (fat_header + 1);
592 int nfat_arch = 0, pr = 0, f = 0;
593
594 nfat_arch = OSSwapBigToHostInt32(fat_header->nfat_arch);
595 /* Check each preference listed against all arches in header */
596 for (pr = 0; pr < NBINPREFS; pr++) {
597 cpu_type_t pref = psa->psa_binprefs[pr];
598 if (pref == 0) {
599 /* No suitable arch in the pref list */
600 error = EBADARCH;
601 goto bad;
602 }
603
604 if (pref == CPU_TYPE_ANY) {
605 /* Fall through to regular grading */
606 break;
607 }
608
609 for (f = 0; f < nfat_arch; f++) {
610 cpu_type_t archtype = OSSwapBigToHostInt32(
611 arches[f].cputype);
612 cpu_type_t archsubtype = OSSwapBigToHostInt32(
613 arches[f].cpusubtype) & ~CPU_SUBTYPE_MASK;
614 if (pref == archtype &&
615 grade_binary(archtype, archsubtype)) {
616 /* We have a winner! */
617 fat_arch.cputype = archtype;
618 fat_arch.cpusubtype = archsubtype;
619 fat_arch.offset = OSSwapBigToHostInt32(
620 arches[f].offset);
621 fat_arch.size = OSSwapBigToHostInt32(
622 arches[f].size);
623 fat_arch.align = OSSwapBigToHostInt32(
624 arches[f].align);
625 goto use_arch;
626 }
627 }
628 }
629 }
630
91447636
A
631 /* Look up our preferred architecture in the fat file. */
632 lret = fatfile_getarch_affinity(imgp->ip_vp,
633 (vm_offset_t)fat_header,
634 &fat_arch,
635 (p->p_flag & P_AFFINITY));
636 if (lret != LOAD_SUCCESS) {
637 error = load_return_to_errno(lret);
638 goto bad;
639 }
640
2d21ac55
A
641use_arch:
642 /* Read the Mach-O header out of fat_arch */
91447636
A
643 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata,
644 PAGE_SIZE, fat_arch.offset,
b0d623f7 645 UIO_SYSSPACE, (IO_UNIT|IO_NODELOCKED),
91447636
A
646 cred, &resid, p);
647 if (error) {
648 goto bad;
649 }
650
651 /* Did we read a complete header? */
652 if (resid) {
653 error = EBADEXEC;
654 goto bad;
655 }
656
657 /* Success. Indicate we have identified an encapsulated binary */
658 error = -2;
659 imgp->ip_arch_offset = (user_size_t)fat_arch.offset;
660 imgp->ip_arch_size = (user_size_t)fat_arch.size;
661
662bad:
0c530ab8 663 kauth_cred_unref(&cred);
91447636
A
664 return (error);
665}
666
667/*
668 * exec_mach_imgact
669 *
670 * Image activator for mach-o 1.0 binaries.
671 *
2d21ac55
A
672 * Parameters; struct image_params * image parameter block
673 *
674 * Returns: -1 not a fat binary (keep looking)
675 * -2 Success: encapsulated binary: reread
676 * >0 Failure: error number
677 * EBADARCH Mach-o binary, but with an unrecognized
678 * architecture
679 * ENOMEM No memory for child process after -
680 * can only happen after vfork()
681 *
91447636 682 * Important: This image activator is NOT byte order neutral.
2d21ac55
A
683 *
684 * Note: A return value other than -1 indicates subsequent image
685 * activators should not be given the opportunity to attempt
686 * to activate the image.
687 *
688 * TODO: More gracefully handle failures after vfork
91447636
A
689 */
690static int
691exec_mach_imgact(struct image_params *imgp)
692{
693 struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
2d21ac55 694 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
91447636
A
695 int error = 0;
696 int vfexec = 0;
697 task_t task;
2d21ac55 698 task_t new_task = NULL; /* protected by vfexec */
91447636 699 thread_t thread;
1c79356b 700 struct uthread *uthread;
91447636 701 vm_map_t old_map = VM_MAP_NULL;
0b4e3aa0 702 vm_map_t map;
91447636
A
703 load_return_t lret;
704 load_result_t load_result;
2d21ac55 705 struct _posix_spawnattr *psa = NULL;
b0d623f7 706 int spawn = (imgp->ip_flags & IMGPF_SPAWN);
2d21ac55 707
91447636
A
708 /*
709 * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
710 * is a reserved field on the end, so for the most part, we can
316670eb
A
711 * treat them as if they were identical. Reverse-endian Mach-O
712 * binaries are recognized but not compatible.
713 */
714 if ((mach_header->magic == MH_CIGAM) ||
715 (mach_header->magic == MH_CIGAM_64)) {
716 error = EBADARCH;
717 goto bad;
718 }
719
91447636
A
720 if ((mach_header->magic != MH_MAGIC) &&
721 (mach_header->magic != MH_MAGIC_64)) {
2d21ac55
A
722 error = -1;
723 goto bad;
724 }
725
726 switch (mach_header->filetype) {
727 case MH_DYLIB:
728 case MH_BUNDLE:
729 error = -1;
91447636
A
730 goto bad;
731 }
732
2d21ac55
A
733 if (!imgp->ip_origcputype) {
734 imgp->ip_origcputype = mach_header->cputype;
735 imgp->ip_origcpusubtype = mach_header->cpusubtype;
736 }
737
91447636
A
738 task = current_task();
739 thread = current_thread();
740 uthread = get_bsdthread_info(thread);
741
b0d623f7
A
742 /*
743 * Save off the vfexec state up front; we have to do this, because
6d2010ae 744 * we need to know if we were in this state initially subsequent to
b0d623f7
A
745 * creating the backing task, thread, and uthread for the child
746 * process (from the vfs_context_t from in img_parms).
747 */
91447636
A
748 if (uthread->uu_flag & UT_VFORK)
749 vfexec = 1; /* Mark in exec */
750
751 if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64)
752 imgp->ip_flags |= IMGPF_IS_64BIT;
753
2d21ac55
A
754 /* If posix_spawn binprefs exist, respect those prefs. */
755 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
756 if (psa != NULL && psa->psa_binprefs[0] != 0) {
757 int pr = 0;
758 for (pr = 0; pr < NBINPREFS; pr++) {
759 cpu_type_t pref = psa->psa_binprefs[pr];
760 if (pref == 0) {
761 /* No suitable arch in the pref list */
762 error = EBADARCH;
763 goto bad;
764 }
765
766 if (pref == CPU_TYPE_ANY) {
767 /* Jump to regular grading */
768 goto grade;
769 }
770
771 if (pref == imgp->ip_origcputype) {
772 /* We have a match! */
773 goto grade;
774 }
775 }
91447636
A
776 error = EBADARCH;
777 goto bad;
778 }
2d21ac55 779grade:
39236c6e 780 if (!grade_binary(imgp->ip_origcputype, imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK)) {
2d21ac55
A
781 error = EBADARCH;
782 goto bad;
91447636
A
783 }
784
2d21ac55
A
785 /* Copy in arguments/environment from the old process */
786 error = exec_extract_strings(imgp);
787 if (error)
788 goto bad;
789
6d2010ae
A
790 error = exec_add_apple_strings(imgp);
791 if (error)
792 goto bad;
b0d623f7 793
6d2010ae
A
794 AUDIT_ARG(argv, imgp->ip_startargv, imgp->ip_argc,
795 imgp->ip_endargv - imgp->ip_startargv);
796 AUDIT_ARG(envv, imgp->ip_endargv, imgp->ip_envc,
797 imgp->ip_endenvv - imgp->ip_endargv);
91447636 798
b0d623f7
A
799 /*
800 * We are being called to activate an image subsequent to a vfork()
801 * operation; in this case, we know that our task, thread, and
6d2010ae 802 * uthread are actually those of our parent, and our proc, which we
b0d623f7
A
803 * obtained indirectly from the image_params vfs_context_t, is the
804 * new child process.
805 */
806 if (vfexec || spawn) {
807 if (vfexec) {
808 imgp->ip_new_thread = fork_create_child(task, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT));
809 if (imgp->ip_new_thread == NULL) {
810 error = ENOMEM;
811 goto bad;
812 }
2d21ac55 813 }
b0d623f7 814
2d21ac55 815 /* reset local idea of thread, uthread, task */
b0d623f7 816 thread = imgp->ip_new_thread;
91447636 817 uthread = get_bsdthread_info(thread);
2d21ac55
A
818 task = new_task = get_threadtask(thread);
819 map = get_task_map(task);
91447636
A
820 } else {
821 map = VM_MAP_NULL;
822 }
823
824 /*
825 * We set these flags here; this is OK, since if we fail after
826 * this point, we have already destroyed the parent process anyway.
827 */
b0d623f7 828 task_set_dyld_info(task, MACH_VM_MIN_ADDRESS, 0);
91447636
A
829 if (imgp->ip_flags & IMGPF_IS_64BIT) {
830 task_set_64bit(task, TRUE);
b0d623f7 831 OSBitOrAtomic(P_LP64, &p->p_flag);
91447636
A
832 } else {
833 task_set_64bit(task, FALSE);
b0d623f7 834 OSBitAndAtomic(~((uint32_t)P_LP64), &p->p_flag);
91447636
A
835 }
836
837 /*
838 * Load the Mach-O file.
b0d623f7 839 *
91447636 840 * NOTE: An error after this point indicates we have potentially
6d2010ae 841 * destroyed or overwritten some process state while attempting an
91447636
A
842 * execve() following a vfork(), which is an unrecoverable condition.
843 */
844
91447636
A
845 /*
846 * Actually load the image file we previously decided to load.
847 */
2d21ac55 848 lret = load_machfile(imgp, mach_header, thread, map, &load_result);
91447636
A
849
850 if (lret != LOAD_SUCCESS) {
851 error = load_return_to_errno(lret);
852 goto badtoolate;
853 }
854
2d21ac55
A
855 vm_map_set_user_wire_limit(get_task_map(task), p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
856
857 /*
858 * Set code-signing flags if this binary is signed, or if parent has
859 * requested them on exec.
860 */
861 if (load_result.csflags & CS_VALID) {
39236c6e 862 imgp->ip_csflags |= load_result.csflags &
2d21ac55 863 (CS_VALID|
39236c6e
A
864 CS_HARD|CS_KILL|CS_ENFORCEMENT|
865 CS_EXEC_SET_HARD|CS_EXEC_SET_KILL|CS_EXEC_SET_ENFORCEMENT);
2d21ac55
A
866 } else {
867 imgp->ip_csflags &= ~CS_VALID;
868 }
869
870 if (p->p_csflags & CS_EXEC_SET_HARD)
871 imgp->ip_csflags |= CS_HARD;
872 if (p->p_csflags & CS_EXEC_SET_KILL)
873 imgp->ip_csflags |= CS_KILL;
39236c6e
A
874 if (p->p_csflags & CS_EXEC_SET_ENFORCEMENT)
875 imgp->ip_csflags |= CS_ENFORCEMENT;
2d21ac55
A
876
877
2d21ac55
A
878 /*
879 * Set up the system reserved areas in the new address space.
880 */
881 vm_map_exec(get_task_map(task),
882 task,
883 (void *) p->p_fd->fd_rdir,
2d21ac55
A
884 cpu_type());
885
0c530ab8 886 /*
6d2010ae 887 * Close file descriptors which specify close-on-exec.
6601e61a 888 */
6d2010ae 889 fdexec(p, psa != NULL ? psa->psa_flags : 0);
91447636
A
890
891 /*
892 * deal with set[ug]id.
893 */
894 error = exec_handle_sugid(imgp);
895
b0d623f7
A
896 /* Make sure we won't interrupt ourself signalling a partial process */
897 if (!vfexec && !spawn && (p->p_lflag & P_LTRACED))
91447636
A
898 psignal(p, SIGTRAP);
899
900 if (error) {
901 goto badtoolate;
902 }
91447636
A
903
904 if (load_result.unixproc &&
905 create_unix_stack(get_task_map(task),
316670eb 906 &load_result,
2d21ac55 907 p) != KERN_SUCCESS) {
91447636
A
908 error = load_return_to_errno(LOAD_NOSPACE);
909 goto badtoolate;
910 }
911
b0d623f7 912 if (vfexec || spawn) {
91447636
A
913 old_map = vm_map_switch(get_task_map(task));
914 }
915
916 if (load_result.unixproc) {
917 user_addr_t ap;
918
919 /*
920 * Copy the strings area out into the new process address
921 * space.
922 */
923 ap = p->user_stack;
924 error = exec_copyout_strings(imgp, &ap);
925 if (error) {
b0d623f7 926 if (vfexec || spawn)
91447636
A
927 vm_map_switch(old_map);
928 goto badtoolate;
929 }
930 /* Set the stack */
931 thread_setuserstack(thread, ap);
932 }
933
934 if (load_result.dynlinker) {
935 uint64_t ap;
6d2010ae 936 int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
91447636
A
937
938 /* Adjust the stack */
6d2010ae
A
939 ap = thread_adjuserstack(thread, -new_ptr_size);
940 error = copyoutptr(load_result.mach_header, ap, new_ptr_size);
941
0c530ab8 942 if (error) {
b0d623f7 943 if (vfexec || spawn)
2d21ac55 944 vm_map_switch(old_map);
0c530ab8 945 goto badtoolate;
91447636 946 }
b0d623f7
A
947 task_set_dyld_info(task, load_result.all_image_info_addr,
948 load_result.all_image_info_size);
91447636
A
949 }
950
316670eb
A
951 /* Avoid immediate VM faults back into kernel */
952 exec_prefault_data(p, imgp, &load_result);
953
b0d623f7 954 if (vfexec || spawn) {
91447636
A
955 vm_map_switch(old_map);
956 }
957 /* Set the entry point */
958 thread_setentrypoint(thread, load_result.entry_point);
959
960 /* Stop profiling */
961 stopprofclock(p);
962
963 /*
964 * Reset signal state.
965 */
966 execsigs(p, thread);
967
91447636
A
968 /*
969 * need to cancel async IO requests that can be cancelled and wait for those
970 * already active. MAY BLOCK!
971 */
972 _aio_exec( p );
973
2d21ac55 974#if SYSV_SHM
91447636
A
975 /* FIXME: Till vmspace inherit is fixed: */
976 if (!vfexec && p->vm_shm)
977 shmexec(p);
2d21ac55
A
978#endif
979#if SYSV_SEM
91447636
A
980 /* Clean up the semaphores */
981 semexit(p);
2d21ac55 982#endif
91447636
A
983
984 /*
985 * Remember file name for accounting.
986 */
987 p->p_acflag &= ~AFORK;
988 /* If the translated name isn't NULL, then we want to use
989 * that translated name as the name we show as the "real" name.
990 * Otherwise, use the name passed into exec.
991 */
992 if (0 != imgp->ip_p_comm[0]) {
993 bcopy((caddr_t)imgp->ip_p_comm, (caddr_t)p->p_comm,
994 sizeof(p->p_comm));
995 } else {
996 if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN)
997 imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
998 bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
999 (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
1000 p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
1001 }
55e303ae 1002
6d2010ae
A
1003 pal_dbg_set_task_name( p->task );
1004
b0d623f7
A
1005 memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid));
1006
1007// <rdar://6598155> dtrace code cleanup needed
2d21ac55
A
1008#if CONFIG_DTRACE
1009 /*
1010 * Invalidate any predicate evaluation already cached for this thread by DTrace.
1011 * That's because we've just stored to p_comm and DTrace refers to that when it
1012 * evaluates the "execname" special variable. uid and gid may have changed as well.
1013 */
1014 dtrace_set_thread_predcache(current_thread(), 0);
1015
1016 /*
1017 * Free any outstanding lazy dof entries. It is imperative we
1018 * always call dtrace_lazy_dofs_destroy, rather than null check
1019 * and call if !NULL. If we NULL test, during lazy dof faulting
1020 * we can race with the faulting code and proceed from here to
1021 * beyond the helpers cleanup. The lazy dof faulting will then
1022 * install new helpers which no longer belong to this process!
1023 */
1024 dtrace_lazy_dofs_destroy(p);
1025
1026
1027 /*
1028 * Clean up any DTrace helpers for the process.
1029 */
1030 if (p->p_dtrace_helpers != NULL && dtrace_helpers_cleanup) {
1031 (*dtrace_helpers_cleanup)(p);
1032 }
1033
1034 /*
1035 * Cleanup the DTrace provider associated with this process.
1036 */
1037 proc_lock(p);
1038 if (p->p_dtrace_probes && dtrace_fasttrap_exec_ptr) {
1039 (*dtrace_fasttrap_exec_ptr)(p);
1040 }
1041 proc_unlock(p);
1042#endif
1043
0c530ab8 1044 if (kdebug_enable) {
2d21ac55
A
1045 long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
1046
1047 /*
1048 * Collect the pathname for tracing
1049 */
1050 kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
1051
b0d623f7 1052 if (vfexec || spawn) {
2d21ac55 1053 KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE,
b0d623f7 1054 p->p_pid ,0,0,0, (uintptr_t)thread_tid(thread));
2d21ac55 1055 KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
b0d623f7 1056 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (uintptr_t)thread_tid(thread));
2d21ac55
A
1057 } else {
1058 KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE,
1059 p->p_pid ,0,0,0,0);
1060 KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
1061 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
1062 }
91447636
A
1063 }
1064
0c530ab8 1065 /*
316670eb
A
1066 * Ensure the 'translated' and 'affinity' flags are cleared, since we
1067 * no longer run PowerPC binaries.
0c530ab8 1068 */
316670eb 1069 OSBitAndAtomic(~((uint32_t)(P_TRANSLATED | P_AFFINITY)), &p->p_flag);
2d21ac55
A
1070
1071 /*
1072 * If posix_spawned with the START_SUSPENDED flag, stop the
1073 * process before it runs.
1074 */
1075 if (imgp->ip_px_sa != NULL) {
1076 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
1077 if (psa->psa_flags & POSIX_SPAWN_START_SUSPENDED) {
1078 proc_lock(p);
1079 p->p_stat = SSTOP;
1080 proc_unlock(p);
1081 (void) task_suspend(p->task);
1082 }
39236c6e 1083 }
316670eb 1084
39236c6e
A
1085 /*
1086 * Apply the apptype state (which primes the task for importance donation)
1087 * This must be done after the exec so that the child's thread is ready
1088 */
1089 if (imgp->ip_px_sa != NULL) {
1090 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
1091 exec_handle_spawnattr_apptype(p, psa->psa_apptype);
2d21ac55 1092 }
91447636
A
1093
1094 /*
1095 * mark as execed, wakeup the process that vforked (if any) and tell
b0d623f7 1096 * it that it now has its own resources back
91447636 1097 */
b0d623f7
A
1098 OSBitOrAtomic(P_EXEC, &p->p_flag);
1099 proc_resetregister(p);
2d21ac55
A
1100 if (p->p_pptr && (p->p_lflag & P_LPPWAIT)) {
1101 proc_lock(p);
1102 p->p_lflag &= ~P_LPPWAIT;
1103 proc_unlock(p);
91447636
A
1104 wakeup((caddr_t)p->p_pptr);
1105 }
1106
b0d623f7
A
1107 /*
1108 * Pay for our earlier safety; deliver the delayed signals from
1109 * the incomplete vfexec process now that it's complete.
1110 */
2d21ac55 1111 if (vfexec && (p->p_lflag & P_LTRACED)) {
91447636
A
1112 psignal_vfork(p, new_task, thread, SIGTRAP);
1113 }
1114
1115badtoolate:
b0d623f7 1116if (!spawn)
39236c6e
A
1117 /* notify only if it has not failed due to FP Key error */
1118 if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)
1119 proc_knote(p, NOTE_EXEC);
593a1d5f 1120
b0d623f7 1121 if (vfexec || spawn) {
91447636
A
1122 task_deallocate(new_task);
1123 thread_deallocate(thread);
1124 if (error)
1125 error = 0;
1126 }
1127
1128bad:
1129 return(error);
1130}
1131
1132
1133
1134
1135/*
1136 * Our image activator table; this is the table of the image types we are
1137 * capable of loading. We list them in order of preference to ensure the
1138 * fastest image load speed.
1139 *
1140 * XXX hardcoded, for now; should use linker sets
1141 */
1142struct execsw {
1143 int (*ex_imgact)(struct image_params *);
1144 const char *ex_name;
1145} execsw[] = {
1146 { exec_mach_imgact, "Mach-o Binary" },
1147 { exec_fat_imgact, "Fat Binary" },
1148 { exec_shell_imgact, "Interpreter Script" },
1149 { NULL, NULL}
1150};
1151
1152
1153/*
2d21ac55
A
1154 * exec_activate_image
1155 *
1156 * Description: Iterate through the available image activators, and activate
1157 * the image associated with the imgp structure. We start with
1158 * the
1159 *
1160 * Parameters: struct image_params * Image parameter block
1161 *
1162 * Returns: 0 Success
1163 * EBADEXEC The executable is corrupt/unknown
1164 * execargs_alloc:EINVAL Invalid argument
1165 * execargs_alloc:EACCES Permission denied
1166 * execargs_alloc:EINTR Interrupted function
1167 * execargs_alloc:ENOMEM Not enough space
1168 * exec_save_path:EFAULT Bad address
1169 * exec_save_path:ENAMETOOLONG Filename too long
1170 * exec_check_permissions:EACCES Permission denied
1171 * exec_check_permissions:ENOEXEC Executable file format error
1172 * exec_check_permissions:ETXTBSY Text file busy [misuse of error code]
1173 * exec_check_permissions:???
1174 * namei:???
1175 * vn_rdwr:??? [anything vn_rdwr can return]
1176 * <ex_imgact>:??? [anything an imgact can return]
91447636 1177 */
2d21ac55
A
1178static int
1179exec_activate_image(struct image_params *imgp)
91447636 1180{
91447636 1181 struct nameidata nd;
2d21ac55
A
1182 int error;
1183 int resid;
91447636 1184 int once = 1; /* save SGUID-ness for interpreted files */
2d21ac55
A
1185 int i;
1186 int iterlimit = EAI_ITERLIMIT;
593a1d5f 1187 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1c79356b 1188
91447636 1189 error = execargs_alloc(imgp);
2d21ac55 1190 if (error)
39236c6e 1191 goto bad_notrans;
2d21ac55 1192
2d21ac55 1193 error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg);
55e303ae 1194 if (error) {
593a1d5f 1195 goto bad_notrans;
55e303ae 1196 }
91447636 1197
6d2010ae 1198 /* Use imgp->ip_strings, which contains the copyin-ed exec path */
2d21ac55
A
1199 DTRACE_PROC1(exec, uintptr_t, imgp->ip_strings);
1200
6d2010ae
A
1201 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
1202 UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context);
55e303ae 1203
91447636 1204again:
55e303ae
A
1205 error = namei(&nd);
1206 if (error)
593a1d5f 1207 goto bad_notrans;
91447636
A
1208 imgp->ip_ndp = &nd; /* successful namei(); call nameidone() later */
1209 imgp->ip_vp = nd.ni_vp; /* if set, need to vnode_put() at some point */
1c79356b 1210
6d2010ae
A
1211 /*
1212 * Before we start the transition from binary A to binary B, make
1213 * sure another thread hasn't started exiting the process. We grab
1214 * the proc lock to check p_lflag initially, and the transition
1215 * mechanism ensures that the value doesn't change after we release
1216 * the lock.
1217 */
1218 proc_lock(p);
1219 if (p->p_lflag & P_LEXIT) {
1220 proc_unlock(p);
1221 goto bad_notrans;
1222 }
1223 error = proc_transstart(p, 1);
1224 proc_unlock(p);
b0d623f7
A
1225 if (error)
1226 goto bad_notrans;
593a1d5f 1227
91447636
A
1228 error = exec_check_permissions(imgp);
1229 if (error)
1c79356b 1230 goto bad;
1c79356b 1231
91447636
A
1232 /* Copy; avoid invocation of an interpreter overwriting the original */
1233 if (once) {
1234 once = 0;
2d21ac55 1235 *imgp->ip_origvattr = *imgp->ip_vattr;
91447636 1236 }
1c79356b 1237
91447636 1238 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, 0,
b0d623f7 1239 UIO_SYSSPACE, IO_NODELOCKED,
2d21ac55
A
1240 vfs_context_ucred(imgp->ip_vfs_context),
1241 &resid, vfs_context_proc(imgp->ip_vfs_context));
1c79356b
A
1242 if (error)
1243 goto bad;
91447636
A
1244
1245encapsulated_binary:
2d21ac55
A
1246 /* Limit the number of iterations we will attempt on each binary */
1247 if (--iterlimit == 0) {
1248 error = EBADEXEC;
1249 goto bad;
1250 }
91447636
A
1251 error = -1;
1252 for(i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) {
1c79356b 1253
91447636 1254 error = (*execsw[i].ex_imgact)(imgp);
1c79356b 1255
91447636
A
1256 switch (error) {
1257 /* case -1: not claimed: continue */
1258 case -2: /* Encapsulated binary */
1259 goto encapsulated_binary;
1c79356b 1260
91447636 1261 case -3: /* Interpreter */
2d21ac55
A
1262#if CONFIG_MACF
1263 /*
1264 * Copy the script label for later use. Note that
1265 * the label can be different when the script is
1266 * actually read by the interpreter.
1267 */
1268 if (imgp->ip_scriptlabelp)
1269 mac_vnode_label_free(imgp->ip_scriptlabelp);
1270 imgp->ip_scriptlabelp = mac_vnode_label_alloc();
1271 if (imgp->ip_scriptlabelp == NULL) {
1272 error = ENOMEM;
1273 break;
1274 }
1275 mac_vnode_label_copy(imgp->ip_vp->v_label,
b0d623f7 1276 imgp->ip_scriptlabelp);
39236c6e
A
1277
1278 /*
1279 * Take a ref of the script vnode for later use.
1280 */
1281 if (imgp->ip_scriptvp)
1282 vnode_put(imgp->ip_scriptvp);
1283 if (vnode_getwithref(imgp->ip_vp) == 0)
1284 imgp->ip_scriptvp = imgp->ip_vp;
2d21ac55 1285#endif
6d2010ae
A
1286
1287 nameidone(&nd);
1288
91447636
A
1289 vnode_put(imgp->ip_vp);
1290 imgp->ip_vp = NULL; /* already put */
6d2010ae
A
1291 imgp->ip_ndp = NULL; /* already nameidone */
1292
1293 /* Use imgp->ip_strings, which exec_shell_imgact reset to the interpreter */
1294 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF,
1295 UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context);
1c79356b 1296
593a1d5f 1297 proc_transend(p, 0);
91447636
A
1298 goto again;
1299
1300 default:
1301 break;
1302 }
1303 }
2d21ac55
A
1304
1305 /*
1306 * Call out to allow 3rd party notification of exec.
91447636
A
1307 * Ignore result of kauth_authorize_fileop call.
1308 */
1309 if (error == 0 && kauth_authorize_fileop_has_listeners()) {
2d21ac55
A
1310 kauth_authorize_fileop(vfs_context_ucred(imgp->ip_vfs_context),
1311 KAUTH_FILEOP_EXEC,
1312 (uintptr_t)nd.ni_vp, 0);
91447636 1313 }
91447636
A
1314
1315bad:
593a1d5f
A
1316 proc_transend(p, 0);
1317
1318bad_notrans:
91447636
A
1319 if (imgp->ip_strings)
1320 execargs_free(imgp);
2d21ac55
A
1321 if (imgp->ip_ndp)
1322 nameidone(imgp->ip_ndp);
91447636 1323
2d21ac55
A
1324 return (error);
1325}
91447636 1326
39236c6e
A
1327
1328/*
1329 * exec_handle_spawnattr_apptype
1330 *
1331 * Description: Decode and apply the posix_spawn apptype to the task.
1332 *
1333 * Parameters: proc_t p process to apply attributes to
1334 * int psa_apptype posix spawn attribute apptype
1335 *
1336 * Returns: 0 Success
1337 */
1338static errno_t
1339exec_handle_spawnattr_apptype(proc_t p, int psa_apptype)
1340{
1341 if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) {
1342 int apptype = TASK_APPTYPE_NONE;
1343 int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK;
1344
1345 switch(proctype) {
1346 case POSIX_SPAWN_PROC_TYPE_DAEMON_INTERACTIVE:
1347 apptype = TASK_APPTYPE_DAEMON_INTERACTIVE;
1348 break;
1349 case POSIX_SPAWN_PROC_TYPE_DAEMON_STANDARD:
1350 apptype = TASK_APPTYPE_DAEMON_STANDARD;
1351 break;
1352 case POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE:
1353 apptype = TASK_APPTYPE_DAEMON_ADAPTIVE;
1354 break;
1355 case POSIX_SPAWN_PROC_TYPE_DAEMON_BACKGROUND:
1356 apptype = TASK_APPTYPE_DAEMON_BACKGROUND;
1357 break;
1358 case POSIX_SPAWN_PROC_TYPE_APP_DEFAULT:
1359 apptype = TASK_APPTYPE_APP_DEFAULT;
1360 break;
1361 case POSIX_SPAWN_PROC_TYPE_APP_TAL:
1362 apptype = TASK_APPTYPE_APP_TAL;
1363 break;
1364 default:
1365 apptype = TASK_APPTYPE_NONE;
1366 break;
1367 }
1368
1369 proc_set_task_apptype(p->task, apptype);
1370
1371 /* TODO: Should an invalid value here fail the spawn? */
1372 return (0);
1373 }
1374
1375 return (0);
1376}
1377
1378
2d21ac55
A
1379/*
1380 * exec_handle_port_actions
1381 *
1382 * Description: Go through the _posix_port_actions_t contents,
b0d623f7
A
1383 * calling task_set_special_port, task_set_exception_ports
1384 * and/or audit_session_spawnjoin for the current task.
2d21ac55
A
1385 *
1386 * Parameters: struct image_params * Image parameter block
b0d623f7 1387 * short psa_flags posix spawn attribute flags
2d21ac55
A
1388 *
1389 * Returns: 0 Success
6d2010ae 1390 * EINVAL Failure
b0d623f7 1391 * ENOTSUP Illegal posix_spawn attr flag was set
2d21ac55 1392 */
6d2010ae 1393static errno_t
39236c6e 1394exec_handle_port_actions(struct image_params *imgp, short psa_flags, int * need_portwatch, ipc_port_t * portwatch_ports)
91447636 1395{
2d21ac55
A
1396 _posix_spawn_port_actions_t pacts = imgp->ip_px_spa;
1397 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1398 _ps_port_action_t *act = NULL;
1399 task_t task = p->task;
1400 ipc_port_t port = NULL;
316670eb 1401 errno_t ret = 0;
2d21ac55 1402 int i;
91447636 1403
39236c6e
A
1404 if (need_portwatch != NULL)
1405 *need_portwatch = 0;
1406
2d21ac55
A
1407 for (i = 0; i < pacts->pspa_count; i++) {
1408 act = &pacts->pspa_actions[i];
91447636 1409
6d2010ae 1410 if (ipc_object_copyin(get_task_ipcspace(current_task()),
316670eb 1411 act->new_port, MACH_MSG_TYPE_COPY_SEND,
39236c6e
A
1412 (ipc_object_t *) &port) != KERN_SUCCESS) {
1413 ret = EINVAL;
1414 goto done;
1415 }
1c79356b 1416
2d21ac55 1417 switch (act->port_type) {
316670eb
A
1418 case PSPA_SPECIAL:
1419 /* Only allowed when not under vfork */
1420 if (!(psa_flags & POSIX_SPAWN_SETEXEC))
1421 ret = ENOTSUP;
1422 else if (task_set_special_port(task,
39236c6e 1423 act->which, port) != KERN_SUCCESS)
316670eb
A
1424 ret = EINVAL;
1425 break;
1426
1427 case PSPA_EXCEPTION:
1428 /* Only allowed when not under vfork */
1429 if (!(psa_flags & POSIX_SPAWN_SETEXEC))
1430 ret = ENOTSUP;
1431 else if (task_set_exception_ports(task,
39236c6e
A
1432 act->mask, port, act->behavior,
1433 act->flavor) != KERN_SUCCESS)
316670eb
A
1434 ret = EINVAL;
1435 break;
b0d623f7 1436#if CONFIG_AUDIT
316670eb
A
1437 case PSPA_AU_SESSION:
1438 ret = audit_session_spawnjoin(p, port);
1439 break;
b0d623f7 1440#endif
39236c6e
A
1441 case PSPA_IMP_WATCHPORTS:
1442 if (portwatch_ports != NULL) {
1443 if (need_portwatch != NULL)
1444 *need_portwatch = 1;
1445 /* hold on to this till end of spawn */
1446 portwatch_ports[i] = port;
1447 ret = 0;
1448 } else
1449 ipc_port_release_send(port);
1450 break;
316670eb
A
1451 default:
1452 ret = EINVAL;
1453 break;
2d21ac55 1454 }
316670eb 1455
2d21ac55 1456 /* action failed, so release port resources */
316670eb 1457
2d21ac55
A
1458 if (ret) {
1459 ipc_port_release_send(port);
316670eb 1460 break;
2d21ac55
A
1461 }
1462 }
1463
39236c6e
A
1464done:
1465 if (0 != ret)
1466 DTRACE_PROC1(spawn__port__failure, mach_port_name_t, act->new_port);
316670eb 1467 return (ret);
2d21ac55
A
1468}
1469
1470/*
1471 * exec_handle_file_actions
1472 *
1473 * Description: Go through the _posix_file_actions_t contents applying the
1474 * open, close, and dup2 operations to the open file table for
1475 * the current process.
1476 *
1477 * Parameters: struct image_params * Image parameter block
1478 *
1479 * Returns: 0 Success
1480 * ???
1481 *
1482 * Note: Actions are applied in the order specified, with the credential
1483 * of the parent process. This is done to permit the parent
1484 * process to utilize POSIX_SPAWN_RESETIDS to drop privilege in
1485 * the child following operations the child may in fact not be
1486 * normally permitted to perform.
1487 */
1488static int
6d2010ae 1489exec_handle_file_actions(struct image_params *imgp, short psa_flags)
2d21ac55
A
1490{
1491 int error = 0;
1492 int action;
1493 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1494 _posix_spawn_file_actions_t px_sfap = imgp->ip_px_sfa;
b0d623f7 1495 int ival[2]; /* dummy retval for system calls) */
2d21ac55
A
1496
1497 for (action = 0; action < px_sfap->psfa_act_count; action++) {
1498 _psfa_action_t *psfa = &px_sfap->psfa_act_acts[ action];
1499
1500 switch(psfa->psfaa_type) {
1501 case PSFA_OPEN: {
1502 /*
1503 * Open is different, in that it requires the use of
1504 * a path argument, which is normally copied in from
1505 * user space; because of this, we have to support an
1506 * open from kernel space that passes an address space
6d2010ae 1507 * context of UIO_SYSSPACE, and casts the address
2d21ac55
A
1508 * argument to a user_addr_t.
1509 */
1510 struct vnode_attr va;
1511 struct nameidata nd;
1512 int mode = psfa->psfaa_openargs.psfao_mode;
1513 struct dup2_args dup2a;
1514 struct close_nocancel_args ca;
1515 int origfd;
1516
1517 VATTR_INIT(&va);
1518 /* Mask off all but regular access permissions */
1519 mode = ((mode &~ p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT;
1520 VATTR_SET(&va, va_mode, mode & ACCESSPERMS);
1521
6d2010ae 1522 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
2d21ac55
A
1523 CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path),
1524 imgp->ip_vfs_context);
1525
1526 error = open1(imgp->ip_vfs_context,
1527 &nd,
1528 psfa->psfaa_openargs.psfao_oflag,
1529 &va,
39236c6e 1530 fileproc_alloc_init, NULL,
2d21ac55
A
1531 ival);
1532
1533 /*
1534 * If there's an error, or we get the right fd by
6d2010ae
A
1535 * accident, then drop out here. This is easier than
1536 * reworking all the open code to preallocate fd
2d21ac55
A
1537 * slots, and internally taking one as an argument.
1538 */
1539 if (error || ival[0] == psfa->psfaa_filedes)
1540 break;
1541
1542 origfd = ival[0];
1543 /*
1544 * If we didn't fall out from an error, we ended up
1545 * with the wrong fd; so now we've got to try to dup2
1546 * it to the right one.
1547 */
1548 dup2a.from = origfd;
1549 dup2a.to = psfa->psfaa_filedes;
1550
1551 /*
1552 * The dup2() system call implementation sets
1553 * ival to newfd in the success case, but we
1554 * can ignore that, since if we didn't get the
1555 * fd we wanted, the error will stop us.
1556 */
1557 error = dup2(p, &dup2a, ival);
1558 if (error)
1559 break;
1560
1561 /*
1562 * Finally, close the original fd.
1563 */
1564 ca.fd = origfd;
1565
1566 error = close_nocancel(p, &ca, ival);
1567 }
1568 break;
1569
1570 case PSFA_DUP2: {
1571 struct dup2_args dup2a;
1572
1573 dup2a.from = psfa->psfaa_filedes;
1574 dup2a.to = psfa->psfaa_openargs.psfao_oflag;
1575
1576 /*
1577 * The dup2() system call implementation sets
1578 * ival to newfd in the success case, but we
1579 * can ignore that, since if we didn't get the
1580 * fd we wanted, the error will stop us.
1581 */
1582 error = dup2(p, &dup2a, ival);
1583 }
1584 break;
1585
1586 case PSFA_CLOSE: {
1587 struct close_nocancel_args ca;
1588
1589 ca.fd = psfa->psfaa_filedes;
1590
1591 error = close_nocancel(p, &ca, ival);
1592 }
1593 break;
1594
6d2010ae 1595 case PSFA_INHERIT: {
39236c6e 1596 struct fcntl_nocancel_args fcntla;
6d2010ae
A
1597
1598 /*
1599 * Check to see if the descriptor exists, and
1600 * ensure it's -not- marked as close-on-exec.
39236c6e
A
1601 *
1602 * Attempting to "inherit" a guarded fd will
1603 * result in a error.
6d2010ae 1604 */
39236c6e
A
1605 fcntla.fd = psfa->psfaa_filedes;
1606 fcntla.cmd = F_GETFD;
1607 if ((error = fcntl_nocancel(p, &fcntla, ival)) != 0)
1608 break;
1609
1610 if ((ival[0] & FD_CLOEXEC) == FD_CLOEXEC) {
1611 fcntla.fd = psfa->psfaa_filedes;
1612 fcntla.cmd = F_SETFD;
1613 fcntla.arg = ival[0] & ~FD_CLOEXEC;
1614 error = fcntl_nocancel(p, &fcntla, ival);
6d2010ae 1615 }
39236c6e 1616
6d2010ae
A
1617 }
1618 break;
1619
2d21ac55
A
1620 default:
1621 error = EINVAL;
1622 break;
1623 }
6d2010ae 1624
2d21ac55 1625 /* All file actions failures are considered fatal, per POSIX */
6d2010ae 1626
39236c6e
A
1627 if (error) {
1628 if (PSFA_OPEN == psfa->psfaa_type) {
1629 DTRACE_PROC1(spawn__open__failure, uintptr_t,
1630 psfa->psfaa_openargs.psfao_path);
1631 } else {
1632 DTRACE_PROC1(spawn__fd__failure, int, psfa->psfaa_filedes);
1633 }
2d21ac55 1634 break;
39236c6e 1635 }
2d21ac55
A
1636 }
1637
6d2010ae
A
1638 if (error != 0 || (psa_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) == 0)
1639 return (error);
1640
1641 /*
1642 * If POSIX_SPAWN_CLOEXEC_DEFAULT is set, behave (during
1643 * this spawn only) as if "close on exec" is the default
1644 * disposition of all pre-existing file descriptors. In this case,
1645 * the list of file descriptors mentioned in the file actions
1646 * are the only ones that can be inherited, so mark them now.
1647 *
1648 * The actual closing part comes later, in fdexec().
1649 */
1650 proc_fdlock(p);
1651 for (action = 0; action < px_sfap->psfa_act_count; action++) {
1652 _psfa_action_t *psfa = &px_sfap->psfa_act_acts[action];
1653 int fd = psfa->psfaa_filedes;
1654
1655 switch (psfa->psfaa_type) {
1656 case PSFA_DUP2:
1657 fd = psfa->psfaa_openargs.psfao_oflag;
1658 /*FALLTHROUGH*/
1659 case PSFA_OPEN:
1660 case PSFA_INHERIT:
1661 *fdflags(p, fd) |= UF_INHERIT;
1662 break;
1663
1664 case PSFA_CLOSE:
1665 break;
1666 }
1667 }
1668 proc_fdunlock(p);
1669
1670 return (0);
2d21ac55
A
1671}
1672
39236c6e
A
1673#if CONFIG_MACF
1674/*
1675 * exec_spawnattr_getmacpolicyinfo
1676 */
1677void *
1678exec_spawnattr_getmacpolicyinfo(const void *macextensions, const char *policyname, size_t *lenp)
1679{
1680 const struct _posix_spawn_mac_policy_extensions *psmx = macextensions;
1681 int i;
1682
1683 if (psmx == NULL)
1684 return NULL;
1685
1686 for (i = 0; i < psmx->psmx_count; i++) {
1687 const _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
1688 if (strncmp(extension->policyname, policyname, sizeof(extension->policyname)) == 0) {
1689 if (lenp != NULL)
1690 *lenp = extension->datalen;
1691 return extension->datap;
1692 }
1693 }
1694
1695 if (lenp != NULL)
1696 *lenp = 0;
1697 return NULL;
1698}
1699
1700static int
1701spawn_copyin_macpolicyinfo(const struct user__posix_spawn_args_desc *px_args, _posix_spawn_mac_policy_extensions_t *psmxp)
1702{
1703 _posix_spawn_mac_policy_extensions_t psmx = NULL;
1704 int error = 0;
1705 int copycnt = 0;
1706 int i = 0;
1707
1708 *psmxp = NULL;
1709
1710 if (px_args->mac_extensions_size < PS_MAC_EXTENSIONS_SIZE(1) ||
1711 px_args->mac_extensions_size > PAGE_SIZE) {
1712 error = EINVAL;
1713 goto bad;
1714 }
1715
1716 MALLOC(psmx, _posix_spawn_mac_policy_extensions_t, px_args->mac_extensions_size, M_TEMP, M_WAITOK);
1717 if ((error = copyin(px_args->mac_extensions, psmx, px_args->mac_extensions_size)) != 0)
1718 goto bad;
1719
1720 if (PS_MAC_EXTENSIONS_SIZE(psmx->psmx_count) > px_args->mac_extensions_size) {
1721 error = EINVAL;
1722 goto bad;
1723 }
1724
1725 for (i = 0; i < psmx->psmx_count; i++) {
1726 _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
1727 if (extension->datalen == 0 || extension->datalen > PAGE_SIZE) {
1728 error = EINVAL;
1729 goto bad;
1730 }
1731 }
1732
1733 for (copycnt = 0; copycnt < psmx->psmx_count; copycnt++) {
1734 _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[copycnt];
1735 void *data = NULL;
1736
1737 MALLOC(data, void *, extension->datalen, M_TEMP, M_WAITOK);
1738 if ((error = copyin(extension->data, data, extension->datalen)) != 0) {
1739 FREE(data, M_TEMP);
1740 goto bad;
1741 }
1742 extension->datap = data;
1743 }
1744
1745 *psmxp = psmx;
1746 return 0;
1747
1748bad:
1749 if (psmx != NULL) {
1750 for (i = 0; i < copycnt; i++)
1751 FREE(psmx->psmx_extensions[i].datap, M_TEMP);
1752 FREE(psmx, M_TEMP);
1753 }
1754 return error;
1755}
1756
1757static void
1758spawn_free_macpolicyinfo(_posix_spawn_mac_policy_extensions_t psmx)
1759{
1760 int i;
1761
1762 if (psmx == NULL)
1763 return;
1764 for (i = 0; i < psmx->psmx_count; i++)
1765 FREE(psmx->psmx_extensions[i].datap, M_TEMP);
1766 FREE(psmx, M_TEMP);
1767}
1768#endif /* CONFIG_MACF */
2d21ac55
A
1769
1770/*
1771 * posix_spawn
1772 *
1773 * Parameters: uap->pid Pointer to pid return area
1774 * uap->fname File name to exec
1775 * uap->argp Argument list
1776 * uap->envp Environment list
1777 *
1778 * Returns: 0 Success
1779 * EINVAL Invalid argument
1780 * ENOTSUP Not supported
1781 * ENOEXEC Executable file format error
1782 * exec_activate_image:EINVAL Invalid argument
1783 * exec_activate_image:EACCES Permission denied
1784 * exec_activate_image:EINTR Interrupted function
1785 * exec_activate_image:ENOMEM Not enough space
1786 * exec_activate_image:EFAULT Bad address
1787 * exec_activate_image:ENAMETOOLONG Filename too long
1788 * exec_activate_image:ENOEXEC Executable file format error
1789 * exec_activate_image:ETXTBSY Text file busy [misuse of error code]
1790 * exec_activate_image:EBADEXEC The executable is corrupt/unknown
1791 * exec_activate_image:???
1792 * mac_execve_enter:???
1793 *
b0d623f7 1794 * TODO: Expect to need __mac_posix_spawn() at some point...
2d21ac55
A
1795 * Handle posix_spawnattr_t
1796 * Handle posix_spawn_file_actions_t
1797 */
1798int
b0d623f7 1799posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
2d21ac55
A
1800{
1801 proc_t p = ap; /* quiet bogus GCC vfork() warning */
4a3eedf9 1802 user_addr_t pid = uap->pid;
b0d623f7
A
1803 int ival[2]; /* dummy retval for setpgid() */
1804 char *bufp = NULL;
1805 struct image_params *imgp;
1806 struct vnode_attr *vap;
1807 struct vnode_attr *origvap;
2d21ac55
A
1808 struct uthread *uthread = 0; /* compiler complains if not set to 0*/
1809 int error, sig;
2d21ac55
A
1810 char alt_p_comm[sizeof(p->p_comm)] = {0}; /* for PowerPC */
1811 int is_64 = IS_64BIT_PROCESS(p);
2d21ac55
A
1812 struct vfs_context context;
1813 struct user__posix_spawn_args_desc px_args;
1814 struct _posix_spawnattr px_sa;
1815 _posix_spawn_file_actions_t px_sfap = NULL;
1816 _posix_spawn_port_actions_t px_spap = NULL;
b0d623f7
A
1817 struct __kern_sigaction vec;
1818 boolean_t spawn_no_exec = FALSE;
6d2010ae
A
1819 boolean_t proc_transit_set = TRUE;
1820 boolean_t exec_done = FALSE;
39236c6e
A
1821 int need_portwatch = 0, portwatch_count = 0;
1822 ipc_port_t * portwatch_ports = NULL;
1823 vm_size_t px_sa_offset = offsetof(struct _posix_spawnattr, psa_ports);
2d21ac55 1824
b0d623f7
A
1825 /*
1826 * Allocate a big chunk for locals instead of using stack since these
6d2010ae 1827 * structures are pretty big.
b0d623f7
A
1828 */
1829 MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
1830 imgp = (struct image_params *) bufp;
1831 if (bufp == NULL) {
1832 error = ENOMEM;
1833 goto bad;
1834 }
1835 vap = (struct vnode_attr *) (bufp + sizeof(*imgp));
1836 origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap));
2d21ac55
A
1837
1838 /* Initialize the common data in the image_params structure */
2d21ac55
A
1839 imgp->ip_user_fname = uap->path;
1840 imgp->ip_user_argv = uap->argv;
1841 imgp->ip_user_envv = uap->envp;
b0d623f7
A
1842 imgp->ip_vattr = vap;
1843 imgp->ip_origvattr = origvap;
2d21ac55
A
1844 imgp->ip_vfs_context = &context;
1845 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE);
1846 imgp->ip_p_comm = alt_p_comm; /* for PowerPC */
1847 imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
1848
1849 if (uap->adesc != USER_ADDR_NULL) {
1850 if(is_64) {
1851 error = copyin(uap->adesc, &px_args, sizeof(px_args));
1852 } else {
b0d623f7 1853 struct user32__posix_spawn_args_desc px_args32;
2d21ac55
A
1854
1855 error = copyin(uap->adesc, &px_args32, sizeof(px_args32));
1856
1857 /*
1858 * Convert arguments descriptor from external 32 bit
1859 * representation to internal 64 bit representation
1860 */
1861 px_args.attr_size = px_args32.attr_size;
1862 px_args.attrp = CAST_USER_ADDR_T(px_args32.attrp);
1863 px_args.file_actions_size = px_args32.file_actions_size;
1864 px_args.file_actions = CAST_USER_ADDR_T(px_args32.file_actions);
1865 px_args.port_actions_size = px_args32.port_actions_size;
1866 px_args.port_actions = CAST_USER_ADDR_T(px_args32.port_actions);
39236c6e
A
1867 px_args.mac_extensions_size = px_args32.mac_extensions_size;
1868 px_args.mac_extensions = CAST_USER_ADDR_T(px_args32.mac_extensions);
2d21ac55
A
1869 }
1870 if (error)
1871 goto bad;
1872
1873 if (px_args.attr_size != 0) {
1874 /*
39236c6e
A
1875 * We are not copying the port_actions pointer,
1876 * because we already have it from px_args.
2d21ac55 1877 */
39236c6e
A
1878
1879
1880 if ((error = copyin(px_args.attrp, &px_sa, px_sa_offset) != 0))
2d21ac55 1881 goto bad;
39236c6e
A
1882
1883 bzero( (void *)( (unsigned long) &px_sa + px_sa_offset), sizeof(px_sa) - px_sa_offset );
2d21ac55
A
1884
1885 imgp->ip_px_sa = &px_sa;
1886 }
1887 if (px_args.file_actions_size != 0) {
1888 /* Limit file_actions to allowed number of open files */
1889 int maxfa = (p->p_limit ? p->p_rlimit[RLIMIT_NOFILE].rlim_cur : NOFILE);
1890 if (px_args.file_actions_size < PSF_ACTIONS_SIZE(1) ||
1891 px_args.file_actions_size > PSF_ACTIONS_SIZE(maxfa)) {
1892 error = EINVAL;
1893 goto bad;
1894 }
1895 MALLOC(px_sfap, _posix_spawn_file_actions_t, px_args.file_actions_size, M_TEMP, M_WAITOK);
1896 if (px_sfap == NULL) {
1897 error = ENOMEM;
1898 goto bad;
1899 }
1900 imgp->ip_px_sfa = px_sfap;
1901
1902 if ((error = copyin(px_args.file_actions, px_sfap,
1903 px_args.file_actions_size)) != 0)
1904 goto bad;
39236c6e
A
1905
1906 /* Verify that the action count matches the struct size */
1907 if (PSF_ACTIONS_SIZE(px_sfap->psfa_act_count) != px_args.file_actions_size) {
1908 error = EINVAL;
1909 goto bad;
1910 }
2d21ac55
A
1911 }
1912 if (px_args.port_actions_size != 0) {
1913 /* Limit port_actions to one page of data */
1914 if (px_args.port_actions_size < PS_PORT_ACTIONS_SIZE(1) ||
1915 px_args.port_actions_size > PAGE_SIZE) {
1916 error = EINVAL;
1917 goto bad;
1918 }
1919
1920 MALLOC(px_spap, _posix_spawn_port_actions_t,
1921 px_args.port_actions_size, M_TEMP, M_WAITOK);
1922 if (px_spap == NULL) {
1923 error = ENOMEM;
1924 goto bad;
1925 }
1926 imgp->ip_px_spa = px_spap;
1927
1928 if ((error = copyin(px_args.port_actions, px_spap,
1929 px_args.port_actions_size)) != 0)
1930 goto bad;
39236c6e
A
1931
1932 /* Verify that the action count matches the struct size */
1933 if (PS_PORT_ACTIONS_SIZE(px_spap->pspa_count) != px_args.port_actions_size) {
1934 error = EINVAL;
1935 goto bad;
1936 }
1937 }
1938#if CONFIG_MACF
1939 if (px_args.mac_extensions_size != 0) {
1940 if ((error = spawn_copyin_macpolicyinfo(&px_args, (_posix_spawn_mac_policy_extensions_t *)&imgp->ip_px_smpx)) != 0)
1941 goto bad;
2d21ac55 1942 }
39236c6e 1943#endif /* CONFIG_MACF */
2d21ac55
A
1944 }
1945
b0d623f7
A
1946 /* set uthread to parent */
1947 uthread = get_bsdthread_info(current_thread());
1948
1949 /*
1950 * <rdar://6640530>; this does not result in a behaviour change
1951 * relative to Leopard, so there should not be any existing code
1952 * which depends on it.
1953 */
1954 if (uthread->uu_flag & UT_VFORK) {
1955 error = EINVAL;
1956 goto bad;
1957 }
1958
1959 /*
6d2010ae 1960 * If we don't have the extension flag that turns "posix_spawn()"
b0d623f7
A
1961 * into "execve() with options", then we will be creating a new
1962 * process which does not inherit memory from the parent process,
1963 * which is one of the most expensive things about using fork()
1964 * and execve().
1965 */
2d21ac55 1966 if (imgp->ip_px_sa == NULL || !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)){
b0d623f7 1967 if ((error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN)) != 0)
2d21ac55 1968 goto bad;
b0d623f7
A
1969 imgp->ip_flags |= IMGPF_SPAWN; /* spawn w/o exec */
1970 spawn_no_exec = TRUE; /* used in later tests */
39236c6e 1971 DTRACE_PROC1(create, proc_t, p);
2d21ac55
A
1972 }
1973
b0d623f7
A
1974 if (spawn_no_exec)
1975 p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread);
6d2010ae 1976 assert(p != NULL);
2d21ac55 1977
b0d623f7 1978 /* By default, the thread everyone plays with is the parent */
2d21ac55
A
1979 context.vc_thread = current_thread();
1980 context.vc_ucred = p->p_ucred; /* XXX must NOT be kauth_cred_get() */
1981
b0d623f7
A
1982 /*
1983 * However, if we're not in the setexec case, redirect the context
1984 * to the newly created process instead
1985 */
1986 if (spawn_no_exec)
1987 context.vc_thread = imgp->ip_new_thread;
1988
2d21ac55
A
1989 /*
1990 * Post fdcopy(), pre exec_handle_sugid() - this is where we want
1991 * to handle the file_actions. Since vfork() also ends up setting
1992 * us into the parent process group, and saved off the signal flags,
1993 * this is also where we want to handle the spawn flags.
1994 */
6d2010ae 1995
2d21ac55 1996 /* Has spawn file actions? */
6d2010ae
A
1997 if (imgp->ip_px_sfa != NULL) {
1998 /*
1999 * The POSIX_SPAWN_CLOEXEC_DEFAULT flag
2000 * is handled in exec_handle_file_actions().
2001 */
2002 if ((error = exec_handle_file_actions(imgp,
2003 imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0)) != 0)
2004 goto bad;
2d21ac55
A
2005 }
2006
2007 /* Has spawn port actions? */
39236c6e
A
2008 if (imgp->ip_px_spa != NULL) {
2009 boolean_t is_adaptive = FALSE;
2010
2011 /* Will this process become adaptive? The apptype isn't ready yet, so we can't look there. */
2012 if (imgp->ip_px_sa != NULL && px_sa.psa_apptype == POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE)
2013 is_adaptive = TRUE;
2014
2015 /*
2016 * portwatch only:
2017 * Allocate a place to store the ports we want to bind to the new task
2018 * We can't bind them until after the apptype is set.
b0d623f7 2019 */
39236c6e
A
2020 if (px_spap->pspa_count != 0 && is_adaptive) {
2021 portwatch_count = px_spap->pspa_count;
2022 MALLOC(portwatch_ports, ipc_port_t *, (sizeof(ipc_port_t) * portwatch_count), M_TEMP, M_WAITOK | M_ZERO);
2023 } else {
2024 portwatch_ports = NULL;
2025 }
2026
6d2010ae 2027 if ((error = exec_handle_port_actions(imgp,
39236c6e 2028 imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0, &need_portwatch, portwatch_ports)) != 0)
2d21ac55
A
2029 goto bad;
2030 }
2031
2032 /* Has spawn attr? */
2033 if (imgp->ip_px_sa != NULL) {
b0d623f7
A
2034 /*
2035 * Set the process group ID of the child process; this has
2036 * to happen before the image activation.
2037 */
2d21ac55
A
2038 if (px_sa.psa_flags & POSIX_SPAWN_SETPGROUP) {
2039 struct setpgid_args spga;
2040 spga.pid = p->p_pid;
2041 spga.pgid = px_sa.psa_pgroup;
2042 /*
2043 * Effectively, call setpgid() system call; works
2044 * because there are no pointer arguments.
2045 */
2046 if((error = setpgid(p, &spga, ival)) != 0)
2047 goto bad;
2048 }
b0d623f7 2049
2d21ac55
A
2050 /*
2051 * Reset UID/GID to parent's RUID/RGID; This works only
2052 * because the operation occurs *after* the vfork() and
2053 * before the call to exec_handle_sugid() by the image
b0d623f7
A
2054 * activator called from exec_activate_image(). POSIX
2055 * requires that any setuid/setgid bits on the process
2056 * image will take precedence over the spawn attributes
2057 * (re)setting them.
2d21ac55
A
2058 *
2059 * The use of p_ucred is safe, since we are acting on the
2060 * new process, and it has no threads other than the one
2061 * we are creating for it.
2062 */
2063 if (px_sa.psa_flags & POSIX_SPAWN_RESETIDS) {
2064 kauth_cred_t my_cred = p->p_ucred;
6d2010ae
A
2065 kauth_cred_t my_new_cred = kauth_cred_setuidgid(my_cred, kauth_cred_getruid(my_cred), kauth_cred_getrgid(my_cred));
2066 if (my_new_cred != my_cred) {
2d21ac55 2067 p->p_ucred = my_new_cred;
6d2010ae
A
2068 /* update cred on proc */
2069 PROC_UPDATE_CREDS_ONPROC(p);
2070 }
2d21ac55 2071 }
6d2010ae
A
2072
2073 /*
2074 * Disable ASLR for the spawned process.
2075 */
39236c6e
A
2076 /*
2077 * But only do so if we are not embedded; embedded allows for a
2078 * boot-arg (-disable_aslr) to deal with this (which itself is
2079 * only honored on DEVELOPMENT or DEBUG builds of xnu).
2080 */
6d2010ae
A
2081 if (px_sa.psa_flags & _POSIX_SPAWN_DISABLE_ASLR)
2082 OSBitOrAtomic(P_DISABLE_ASLR, &p->p_flag);
2083
2084 /*
2085 * Forcibly disallow execution from data pages for the spawned process
2086 * even if it would otherwise be permitted by the architecture default.
2087 */
2088 if (px_sa.psa_flags & _POSIX_SPAWN_ALLOW_DATA_EXEC)
2089 imgp->ip_flags |= IMGPF_ALLOW_DATA_EXEC;
b0d623f7
A
2090 }
2091
6d2010ae
A
2092 /*
2093 * Disable ASLR during image activation. This occurs either if the
2094 * _POSIX_SPAWN_DISABLE_ASLR attribute was found above or if
2095 * P_DISABLE_ASLR was inherited from the parent process.
2096 */
2097 if (p->p_flag & P_DISABLE_ASLR)
2098 imgp->ip_flags |= IMGPF_DISABLE_ASLR;
2099
b0d623f7
A
2100 /*
2101 * Clear transition flag so we won't hang if exec_activate_image() causes
2102 * an automount (and launchd does a proc sysctl to service it).
2103 *
2104 * <rdar://problem/6848672>, <rdar://problem/5959568>.
2105 */
2106 if (spawn_no_exec) {
2107 proc_transend(p, 0);
6d2010ae 2108 proc_transit_set = 0;
b0d623f7
A
2109 }
2110
2111#if MAC_SPAWN /* XXX */
2112 if (uap->mac_p != USER_ADDR_NULL) {
2113 error = mac_execve_enter(uap->mac_p, imgp);
2114 if (error)
2115 goto bad;
2116 }
2117#endif
2118
2119 /*
2120 * Activate the image
2121 */
2122 error = exec_activate_image(imgp);
2123
6d2010ae
A
2124 if (error == 0) {
2125 /* process completed the exec */
2126 exec_done = TRUE;
2127 } else if (error == -1) {
2128 /* Image not claimed by any activator? */
b0d623f7 2129 error = ENOEXEC;
6d2010ae 2130 }
b0d623f7
A
2131
2132 /*
2133 * If we have a spawn attr, and it contains signal related flags,
2134 * the we need to process them in the "context" of the new child
2135 * process, so we have to process it following image activation,
2136 * prior to making the thread runnable in user space. This is
2137 * necessitated by some signal information being per-thread rather
2138 * than per-process, and we don't have the new allocation in hand
2139 * until after the image is activated.
2140 */
2141 if (!error && imgp->ip_px_sa != NULL) {
2142 thread_t child_thread = current_thread();
2143 uthread_t child_uthread = uthread;
2144
2145 /*
2146 * If we created a new child thread, then the thread and
2147 * uthread are different than the current ones; otherwise,
2148 * we leave them, since we are in the exec case instead.
2149 */
2150 if (spawn_no_exec) {
2151 child_thread = imgp->ip_new_thread;
2152 child_uthread = get_bsdthread_info(child_thread);
2153 }
2154
2d21ac55
A
2155 /*
2156 * Mask a list of signals, instead of them being unmasked, if
2157 * they were unmasked in the parent; note that some signals
2158 * are not maskable.
2159 */
2160 if (px_sa.psa_flags & POSIX_SPAWN_SETSIGMASK)
b0d623f7 2161 child_uthread->uu_sigmask = (px_sa.psa_sigmask & ~sigcantmask);
2d21ac55
A
2162 /*
2163 * Default a list of signals instead of ignoring them, if
b0d623f7
A
2164 * they were ignored in the parent. Note that we pass
2165 * spawn_no_exec to setsigvec() to indicate that we called
2166 * fork1() and therefore do not need to call proc_signalstart()
2167 * internally.
2d21ac55
A
2168 */
2169 if (px_sa.psa_flags & POSIX_SPAWN_SETSIGDEF) {
2170 vec.sa_handler = SIG_DFL;
2171 vec.sa_tramp = 0;
2172 vec.sa_mask = 0;
2173 vec.sa_flags = 0;
2174 for (sig = 0; sig < NSIG; sig++)
b0d623f7
A
2175 if (px_sa.psa_sigdefault & (1 << sig)) {
2176 error = setsigvec(p, child_thread, sig + 1, &vec, spawn_no_exec);
2d21ac55
A
2177 }
2178 }
316670eb
A
2179
2180 /*
2181 * Activate the CPU usage monitor, if requested. This is done via a task-wide, per-thread CPU
2182 * usage limit, which will generate a resource exceeded exception if any one thread exceeds the
2183 * limit.
2184 *
2185 * Userland gives us interval in seconds, and the kernel SPI expects nanoseconds.
2186 */
2187 if (px_sa.psa_cpumonitor_percent != 0) {
39236c6e
A
2188 /*
2189 * Always treat a CPU monitor activation coming from spawn as entitled. Requiring
2190 * an entitlement to configure the monitor a certain way seems silly, since
2191 * whomever is turning it on could just as easily choose not to do so.
2192 *
2193 * XXX - Ignore the parameters that we get from userland. The spawnattr method of
2194 * activating the monitor always gets the system default parameters. Once we have
2195 * an explicit spawn SPI for configuring the defaults, we can revert this to
2196 * respect the params passed in from userland.
2197 */
316670eb
A
2198 error = proc_set_task_ruse_cpu(p->task,
2199 TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC,
39236c6e
A
2200 PROC_POLICY_CPUMON_DEFAULTS, 0,
2201 0, TRUE);
316670eb 2202 }
2d21ac55
A
2203 }
2204
b0d623f7 2205bad:
39236c6e
A
2206
2207 if (portwatch_ports != NULL) {
2208 int needboost = 0;
2209
2210 /*
2211 * Mark the ports as destined to be handed off to the new task, and
2212 * transfer any boosts to the new task.
2213 * We need to release the rights even if the posix_spawn has failed.
2214 */
2215 if (need_portwatch != 0) {
2216 for (int i = 0; i < portwatch_count; i++) {
2217 ipc_port_t port = NULL;
2218
2219 if ((port = portwatch_ports[i]) != NULL) {
2220 int boost = 0;
2221 if (error == 0)
2222 task_add_importance_watchport(p->task, p->p_pid, port, &boost);
2223 ipc_port_release_send(port);
2224 needboost += boost;
2225 }
2226 }
2227 }
2228
2229 if (needboost != 0) {
2230 /*
2231 * Apply the boost count found on the ports, which will keep the
2232 * newly created process out of background until it handles the incoming messages.
2233 */
2234 task_hold_multiple_assertion(p->task, needboost);
2235 }
2236
2237 FREE(portwatch_ports, M_TEMP);
2238 portwatch_ports = NULL;
2239 portwatch_count = 0;
2240 }
2241
b0d623f7 2242 if (error == 0) {
316670eb 2243 /* reset delay idle sleep status if set */
316670eb
A
2244 if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP)
2245 OSBitAndAtomic(~((uint32_t)P_DELAYIDLESLEEP), &p->p_flag);
b0d623f7
A
2246 /* upon successful spawn, re/set the proc control state */
2247 if (imgp->ip_px_sa != NULL) {
2248 switch (px_sa.psa_pcontrol) {
2249 case POSIX_SPAWN_PCONTROL_THROTTLE:
2250 p->p_pcaction = P_PCTHROTTLE;
2251 break;
2252 case POSIX_SPAWN_PCONTROL_SUSPEND:
2253 p->p_pcaction = P_PCSUSP;
2254 break;
2255 case POSIX_SPAWN_PCONTROL_KILL:
2256 p->p_pcaction = P_PCKILL;
2257 break;
2258 case POSIX_SPAWN_PCONTROL_NONE:
2259 default:
2260 p->p_pcaction = 0;
2261 break;
2262 };
2d21ac55 2263 }
b0d623f7 2264 exec_resettextvp(p, imgp);
316670eb 2265
39236c6e 2266#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
316670eb 2267 /* Has jetsam attributes? */
39236c6e
A
2268 if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_SET)) {
2269 memorystatus_update(p, px_sa.psa_priority, 0, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
2270 TRUE, px_sa.psa_high_water_mark, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND));
316670eb
A
2271 }
2272#endif
2d21ac55
A
2273 }
2274
b0d623f7
A
2275 /*
2276 * If we successfully called fork1(), we always need to do this;
2277 * we identify this case by noting the IMGPF_SPAWN flag. This is
2278 * because we come back from that call with signals blocked in the
2279 * child, and we have to unblock them, but we want to wait until
2280 * after we've performed any spawn actions. This has to happen
2281 * before check_for_signature(), which uses psignal.
2282 */
2283 if (spawn_no_exec) {
6d2010ae
A
2284 if (proc_transit_set)
2285 proc_transend(p, 0);
2286
b0d623f7
A
2287 /*
2288 * Drop the signal lock on the child which was taken on our
2289 * behalf by forkproc()/cloneproc() to prevent signals being
2290 * received by the child in a partially constructed state.
2291 */
2292 proc_signalend(p, 0);
2293
2294 /* flag the 'fork' has occurred */
2295 proc_knote(p->p_pptr, NOTE_FORK | p->p_pid);
2296 /* then flag exec has occurred */
39236c6e
A
2297 /* notify only if it has not failed due to FP Key error */
2298 if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)
2299 proc_knote(p, NOTE_EXEC);
2d21ac55 2300 }
2d21ac55 2301
b0d623f7
A
2302 /*
2303 * We have to delay operations which might throw a signal until after
2304 * the signals have been unblocked; however, we want that to happen
2305 * after exec_resettextvp() so that the textvp is correct when they
2306 * fire.
2307 */
2d21ac55 2308 if (error == 0) {
b0d623f7
A
2309 error = check_for_signature(p, imgp);
2310
2311 /*
2312 * Pay for our earlier safety; deliver the delayed signals from
2313 * the incomplete spawn process now that it's complete.
2314 */
2315 if (imgp != NULL && spawn_no_exec && (p->p_lflag & P_LTRACED)) {
2316 psignal_vfork(p, p->task, imgp->ip_new_thread, SIGTRAP);
2317 }
2318 }
2319
2d21ac55 2320
b0d623f7
A
2321 if (imgp != NULL) {
2322 if (imgp->ip_vp)
2323 vnode_put(imgp->ip_vp);
39236c6e
A
2324 if (imgp->ip_scriptvp)
2325 vnode_put(imgp->ip_scriptvp);
b0d623f7
A
2326 if (imgp->ip_strings)
2327 execargs_free(imgp);
2328 if (imgp->ip_px_sfa != NULL)
2329 FREE(imgp->ip_px_sfa, M_TEMP);
2330 if (imgp->ip_px_spa != NULL)
2331 FREE(imgp->ip_px_spa, M_TEMP);
2332
2d21ac55 2333#if CONFIG_MACF
39236c6e
A
2334 if (imgp->ip_px_smpx != NULL)
2335 spawn_free_macpolicyinfo(imgp->ip_px_smpx);
b0d623f7
A
2336 if (imgp->ip_execlabelp)
2337 mac_cred_label_free(imgp->ip_execlabelp);
2338 if (imgp->ip_scriptlabelp)
2339 mac_vnode_label_free(imgp->ip_scriptlabelp);
2d21ac55 2340#endif
b0d623f7
A
2341 }
2342
39236c6e
A
2343#if CONFIG_DTRACE
2344 if (spawn_no_exec) {
2345 /*
2346 * In the original DTrace reference implementation,
2347 * posix_spawn() was a libc routine that just
2348 * did vfork(2) then exec(2). Thus the proc::: probes
2349 * are very fork/exec oriented. The details of this
2350 * in-kernel implementation of posix_spawn() is different
2351 * (while producing the same process-observable effects)
2352 * particularly w.r.t. errors, and which thread/process
2353 * is constructing what on behalf of whom.
2354 */
2355 if (error) {
2356 DTRACE_PROC1(spawn__failure, int, error);
2357 } else {
2358 DTRACE_PROC(spawn__success);
2359 /*
2360 * Some DTrace scripts, e.g. newproc.d in
2361 * /usr/bin, rely on the the 'exec-success'
2362 * probe being fired in the child after the
2363 * new process image has been constructed
2364 * in order to determine the associated pid.
2365 *
2366 * So, even though the parent built the image
2367 * here, for compatibility, mark the new thread
2368 * so 'exec-success' fires on it as it leaves
2369 * the kernel.
2370 */
2371 dtrace_thread_didexec(imgp->ip_new_thread);
2372 }
b0d623f7 2373 } else {
39236c6e
A
2374 if (error) {
2375 DTRACE_PROC1(exec__failure, int, error);
2376 } else {
2377 DTRACE_PROC(exec__success);
2378 }
b0d623f7 2379 }
39236c6e 2380#endif
b0d623f7
A
2381
2382 /* Return to both the parent and the child? */
2383 if (imgp != NULL && spawn_no_exec) {
2d21ac55 2384 /*
2d21ac55
A
2385 * If the parent wants the pid, copy it out
2386 */
4a3eedf9
A
2387 if (pid != USER_ADDR_NULL)
2388 (void)suword(pid, p->p_pid);
2d21ac55 2389 retval[0] = error;
2d21ac55 2390
2d21ac55 2391 /*
b0d623f7
A
2392 * If we had an error, perform an internal reap ; this is
2393 * entirely safe, as we have a real process backing us.
2d21ac55 2394 */
b0d623f7
A
2395 if (error) {
2396 proc_list_lock();
2397 p->p_listflag |= P_LIST_DEADPARENT;
2398 proc_list_unlock();
2399 proc_lock(p);
2400 /* make sure no one else has killed it off... */
2401 if (p->p_stat != SZOMB && p->exit_thread == NULL) {
2402 p->exit_thread = current_thread();
2403 proc_unlock(p);
2404 exit1(p, 1, (int *)NULL);
6d2010ae
A
2405 if (exec_done == FALSE) {
2406 task_deallocate(get_threadtask(imgp->ip_new_thread));
2407 thread_deallocate(imgp->ip_new_thread);
2408 }
b0d623f7
A
2409 } else {
2410 /* someone is doing it for us; just skip it */
2411 proc_unlock(p);
2412 }
2413 } else {
2d21ac55 2414
b0d623f7 2415 /*
39236c6e 2416 * Return to the child
b0d623f7
A
2417 *
2418 * Note: the image activator earlier dropped the
2419 * task/thread references to the newly spawned
2420 * process; this is OK, since we still have suspended
2421 * queue references on them, so we should be fine
2422 * with the delayed resume of the thread here.
2423 */
2424 (void)thread_resume(imgp->ip_new_thread);
2425 }
2426 }
2427 if (bufp != NULL) {
2428 FREE(bufp, M_TEMP);
2429 }
2430
2d21ac55
A
2431 return(error);
2432}
2433
2434
2435/*
2436 * execve
2437 *
2438 * Parameters: uap->fname File name to exec
2439 * uap->argp Argument list
2440 * uap->envp Environment list
2441 *
2442 * Returns: 0 Success
2443 * __mac_execve:EINVAL Invalid argument
2444 * __mac_execve:ENOTSUP Invalid argument
2445 * __mac_execve:EACCES Permission denied
2446 * __mac_execve:EINTR Interrupted function
2447 * __mac_execve:ENOMEM Not enough space
2448 * __mac_execve:EFAULT Bad address
2449 * __mac_execve:ENAMETOOLONG Filename too long
2450 * __mac_execve:ENOEXEC Executable file format error
2451 * __mac_execve:ETXTBSY Text file busy [misuse of error code]
2452 * __mac_execve:???
2453 *
2454 * TODO: Dynamic linker header address on stack is copied via suword()
2455 */
2456/* ARGSUSED */
2457int
b0d623f7 2458execve(proc_t p, struct execve_args *uap, int32_t *retval)
2d21ac55
A
2459{
2460 struct __mac_execve_args muap;
2461 int err;
2462
39236c6e
A
2463 memoryshot(VM_EXECVE, DBG_FUNC_NONE);
2464
2d21ac55
A
2465 muap.fname = uap->fname;
2466 muap.argp = uap->argp;
2467 muap.envp = uap->envp;
2468 muap.mac_p = USER_ADDR_NULL;
2469 err = __mac_execve(p, &muap, retval);
2470
2471 return(err);
2472}
2473
2474/*
2475 * __mac_execve
2476 *
2477 * Parameters: uap->fname File name to exec
2478 * uap->argp Argument list
2479 * uap->envp Environment list
2480 * uap->mac_p MAC label supplied by caller
2481 *
2482 * Returns: 0 Success
2483 * EINVAL Invalid argument
2484 * ENOTSUP Not supported
2485 * ENOEXEC Executable file format error
2486 * exec_activate_image:EINVAL Invalid argument
2487 * exec_activate_image:EACCES Permission denied
2488 * exec_activate_image:EINTR Interrupted function
2489 * exec_activate_image:ENOMEM Not enough space
2490 * exec_activate_image:EFAULT Bad address
2491 * exec_activate_image:ENAMETOOLONG Filename too long
2492 * exec_activate_image:ENOEXEC Executable file format error
2493 * exec_activate_image:ETXTBSY Text file busy [misuse of error code]
2494 * exec_activate_image:EBADEXEC The executable is corrupt/unknown
2495 * exec_activate_image:???
2496 * mac_execve_enter:???
2497 *
2498 * TODO: Dynamic linker header address on stack is copied via suword()
2499 */
2500int
b0d623f7 2501__mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval)
2d21ac55 2502{
b0d623f7
A
2503 char *bufp = NULL;
2504 struct image_params *imgp;
2505 struct vnode_attr *vap;
2506 struct vnode_attr *origvap;
2d21ac55 2507 int error;
2d21ac55
A
2508 char alt_p_comm[sizeof(p->p_comm)] = {0}; /* for PowerPC */
2509 int is_64 = IS_64BIT_PROCESS(p);
2510 struct vfs_context context;
2511
2512 context.vc_thread = current_thread();
2513 context.vc_ucred = kauth_cred_proc_ref(p); /* XXX must NOT be kauth_cred_get() */
2514
b0d623f7
A
2515 /* Allocate a big chunk for locals instead of using stack since these
2516 * structures a pretty big.
2517 */
2518 MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
2519 imgp = (struct image_params *) bufp;
2520 if (bufp == NULL) {
2521 error = ENOMEM;
2522 goto exit_with_error;
2523 }
2524 vap = (struct vnode_attr *) (bufp + sizeof(*imgp));
2525 origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap));
2526
2d21ac55 2527 /* Initialize the common data in the image_params structure */
2d21ac55
A
2528 imgp->ip_user_fname = uap->fname;
2529 imgp->ip_user_argv = uap->argp;
2530 imgp->ip_user_envv = uap->envp;
b0d623f7
A
2531 imgp->ip_vattr = vap;
2532 imgp->ip_origvattr = origvap;
2d21ac55 2533 imgp->ip_vfs_context = &context;
6d2010ae 2534 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE) | ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE);
2d21ac55
A
2535 imgp->ip_p_comm = alt_p_comm; /* for PowerPC */
2536 imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
2537
2d21ac55
A
2538#if CONFIG_MACF
2539 if (uap->mac_p != USER_ADDR_NULL) {
2540 error = mac_execve_enter(uap->mac_p, imgp);
2541 if (error) {
2542 kauth_cred_unref(&context.vc_ucred);
b0d623f7 2543 goto exit_with_error;
2d21ac55
A
2544 }
2545 }
2546#endif
2547
2d21ac55 2548 error = exec_activate_image(imgp);
2d21ac55
A
2549
2550 kauth_cred_unref(&context.vc_ucred);
2551
2552 /* Image not claimed by any activator? */
2553 if (error == -1)
2554 error = ENOEXEC;
2555
2556 if (error == 0) {
2557 exec_resettextvp(p, imgp);
b0d623f7 2558 error = check_for_signature(p, imgp);
2d21ac55
A
2559 }
2560 if (imgp->ip_vp != NULLVP)
2561 vnode_put(imgp->ip_vp);
39236c6e
A
2562 if (imgp->ip_scriptvp != NULLVP)
2563 vnode_put(imgp->ip_scriptvp);
2d21ac55
A
2564 if (imgp->ip_strings)
2565 execargs_free(imgp);
2566#if CONFIG_MACF
2567 if (imgp->ip_execlabelp)
2568 mac_cred_label_free(imgp->ip_execlabelp);
2569 if (imgp->ip_scriptlabelp)
2570 mac_vnode_label_free(imgp->ip_scriptlabelp);
2571#endif
2572 if (!error) {
b0d623f7 2573 struct uthread *uthread;
2d21ac55 2574
b0d623f7
A
2575 /* Sever any extant thread affinity */
2576 thread_affinity_exec(current_thread());
2577
2578 DTRACE_PROC(exec__success);
2579 uthread = get_bsdthread_info(current_thread());
2d21ac55
A
2580 if (uthread->uu_flag & UT_VFORK) {
2581 vfork_return(p, retval, p->p_pid);
b0d623f7 2582 (void)thread_resume(imgp->ip_new_thread);
2d21ac55
A
2583 }
2584 } else {
2585 DTRACE_PROC1(exec__failure, int, error);
2586 }
b0d623f7
A
2587
2588exit_with_error:
2589 if (bufp != NULL) {
2590 FREE(bufp, M_TEMP);
2591 }
2d21ac55
A
2592
2593 return(error);
2594}
2595
2596
2597/*
2598 * copyinptr
2599 *
2600 * Description: Copy a pointer in from user space to a user_addr_t in kernel
2601 * space, based on 32/64 bitness of the user space
2602 *
2603 * Parameters: froma User space address
2604 * toptr Address of kernel space user_addr_t
2605 * ptr_size 4/8, based on 'froma' address space
2606 *
2607 * Returns: 0 Success
2608 * EFAULT Bad 'froma'
2609 *
2610 * Implicit returns:
2611 * *ptr_size Modified
2612 */
2613static int
2614copyinptr(user_addr_t froma, user_addr_t *toptr, int ptr_size)
2615{
2616 int error;
2617
2618 if (ptr_size == 4) {
2619 /* 64 bit value containing 32 bit address */
2620 unsigned int i;
2621
2622 error = copyin(froma, &i, 4);
2623 *toptr = CAST_USER_ADDR_T(i); /* SAFE */
2624 } else {
2625 error = copyin(froma, toptr, 8);
2626 }
2627 return (error);
2628}
2629
2630
2631/*
2632 * copyoutptr
2633 *
2634 * Description: Copy a pointer out from a user_addr_t in kernel space to
2635 * user space, based on 32/64 bitness of the user space
2636 *
2637 * Parameters: ua User space address to copy to
2638 * ptr Address of kernel space user_addr_t
2639 * ptr_size 4/8, based on 'ua' address space
2640 *
2641 * Returns: 0 Success
2642 * EFAULT Bad 'ua'
2643 *
2d21ac55
A
2644 */
2645static int
2646copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size)
2647{
91447636 2648 int error;
1c79356b 2649
91447636
A
2650 if (ptr_size == 4) {
2651 /* 64 bit value containing 32 bit address */
b0d623f7 2652 unsigned int i = CAST_DOWN_EXPLICIT(unsigned int,ua); /* SAFE */
91447636
A
2653
2654 error = copyout(&i, ptr, 4);
2655 } else {
2656 error = copyout(&ua, ptr, 8);
1c79356b 2657 }
91447636
A
2658 return (error);
2659}
2660
2661
2662/*
2663 * exec_copyout_strings
2664 *
2665 * Copy out the strings segment to user space. The strings segment is put
2666 * on a preinitialized stack frame.
2667 *
2668 * Parameters: struct image_params * the image parameter block
2669 * int * a pointer to the stack offset variable
2670 *
2671 * Returns: 0 Success
2672 * !0 Faiure: errno
2673 *
2674 * Implicit returns:
2675 * (*stackp) The stack offset, modified
2676 *
2677 * Note: The strings segment layout is backward, from the beginning
2678 * of the top of the stack to consume the minimal amount of
2679 * space possible; the returned stack pointer points to the
6d2010ae 2680 * end of the area consumed (stacks grow downward).
91447636
A
2681 *
2682 * argc is an int; arg[i] are pointers; env[i] are pointers;
6d2010ae 2683 * the 0's are (void *)NULL's
91447636
A
2684 *
2685 * The stack frame layout is:
2686 *
6d2010ae
A
2687 * +-------------+ <- p->user_stack
2688 * | 16b |
2689 * +-------------+
2690 * | STRING AREA |
2691 * | : |
2692 * | : |
2693 * | : |
2694 * +- -- -- -- --+
2695 * | PATH AREA |
2696 * +-------------+
2697 * | 0 |
2698 * +-------------+
2699 * | applev[n] |
2700 * +-------------+
2701 * :
2702 * :
2703 * +-------------+
2704 * | applev[1] |
2705 * +-------------+
2706 * | exec_path / |
2707 * | applev[0] |
2708 * +-------------+
2709 * | 0 |
2710 * +-------------+
2711 * | env[n] |
2712 * +-------------+
2713 * :
2714 * :
2715 * +-------------+
2716 * | env[0] |
2717 * +-------------+
2718 * | 0 |
2719 * +-------------+
2720 * | arg[argc-1] |
2721 * +-------------+
2722 * :
2723 * :
2724 * +-------------+
2725 * | arg[0] |
2726 * +-------------+
2727 * | argc |
2728 * sp-> +-------------+
91447636
A
2729 *
2730 * Although technically a part of the STRING AREA, we treat the PATH AREA as
2731 * a separate entity. This allows us to align the beginning of the PATH AREA
2732 * to a pointer boundary so that the exec_path, env[i], and argv[i] pointers
2733 * which preceed it on the stack are properly aligned.
91447636 2734 */
6d2010ae 2735
91447636
A
2736static int
2737exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp)
2738{
2d21ac55 2739 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
91447636 2740 int ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
6d2010ae
A
2741 int ptr_area_size;
2742 void *ptr_buffer_start, *ptr_buffer;
2743 int string_size;
2744
91447636 2745 user_addr_t string_area; /* *argv[], *env[] */
6d2010ae
A
2746 user_addr_t ptr_area; /* argv[], env[], applev[] */
2747 user_addr_t argc_area; /* argc */
91447636 2748 user_addr_t stack;
91447636 2749 int error;
6d2010ae
A
2750
2751 unsigned i;
2752 struct copyout_desc {
2753 char *start_string;
2754 int count;
2755#if CONFIG_DTRACE
2756 user_addr_t *dtrace_cookie;
2757#endif
2758 boolean_t null_term;
2759 } descriptors[] = {
2760 {
2761 .start_string = imgp->ip_startargv,
2762 .count = imgp->ip_argc,
2763#if CONFIG_DTRACE
2764 .dtrace_cookie = &p->p_dtrace_argv,
2765#endif
2766 .null_term = TRUE
2767 },
2768 {
2769 .start_string = imgp->ip_endargv,
2770 .count = imgp->ip_envc,
2771#if CONFIG_DTRACE
2772 .dtrace_cookie = &p->p_dtrace_envp,
2773#endif
2774 .null_term = TRUE
2775 },
2776 {
2777 .start_string = imgp->ip_strings,
2778 .count = 1,
2779#if CONFIG_DTRACE
2780 .dtrace_cookie = NULL,
2781#endif
2782 .null_term = FALSE
2783 },
2784 {
2785 .start_string = imgp->ip_endenvv,
2786 .count = imgp->ip_applec - 1, /* exec_path handled above */
2787#if CONFIG_DTRACE
2788 .dtrace_cookie = NULL,
2789#endif
2790 .null_term = TRUE
2791 }
2792 };
91447636
A
2793
2794 stack = *stackp;
2795
2796 /*
6d2010ae
A
2797 * All previous contributors to the string area
2798 * should have aligned their sub-area
91447636 2799 */
6d2010ae
A
2800 if (imgp->ip_strspace % ptr_size != 0) {
2801 error = EINVAL;
2802 goto bad;
2803 }
91447636 2804
6d2010ae
A
2805 /* Grow the stack down for the strings we've been building up */
2806 string_size = imgp->ip_strendp - imgp->ip_strings;
2807 stack -= string_size;
2808 string_area = stack;
2809
2810 /*
2811 * Need room for one pointer for each string, plus
2812 * one for the NULLs terminating the argv, envv, and apple areas.
2813 */
2814 ptr_area_size = (imgp->ip_argc + imgp->ip_envc + imgp->ip_applec + 3) *
2815 ptr_size;
2816 stack -= ptr_area_size;
2817 ptr_area = stack;
2818
2819 /* We'll construct all the pointer arrays in our string buffer,
2820 * which we already know is aligned properly, and ip_argspace
2821 * was used to verify we have enough space.
2822 */
2823 ptr_buffer_start = ptr_buffer = (void *)imgp->ip_strendp;
2824
2825 /*
2826 * Need room for pointer-aligned argc slot.
2827 */
2828 stack -= ptr_size;
2829 argc_area = stack;
1c79356b
A
2830
2831 /*
91447636
A
2832 * Record the size of the arguments area so that sysctl_procargs()
2833 * can return the argument area without having to parse the arguments.
1c79356b 2834 */
2d21ac55 2835 proc_lock(p);
91447636 2836 p->p_argc = imgp->ip_argc;
6d2010ae 2837 p->p_argslen = (int)(*stackp - string_area);
2d21ac55 2838 proc_unlock(p);
91447636 2839
6d2010ae
A
2840 /* Return the initial stack address: the location of argc */
2841 *stackp = stack;
91447636 2842
1c79356b 2843 /*
6d2010ae
A
2844 * Copy out the entire strings area.
2845 */
2846 error = copyout(imgp->ip_strings, string_area,
2847 string_size);
91447636
A
2848 if (error)
2849 goto bad;
2850
6d2010ae
A
2851 for (i = 0; i < sizeof(descriptors)/sizeof(descriptors[0]); i++) {
2852 char *cur_string = descriptors[i].start_string;
2853 int j;
91447636 2854
2d21ac55 2855#if CONFIG_DTRACE
6d2010ae
A
2856 if (descriptors[i].dtrace_cookie) {
2857 proc_lock(p);
2858 *descriptors[i].dtrace_cookie = ptr_area + ((uintptr_t)ptr_buffer - (uintptr_t)ptr_buffer_start); /* dtrace convenience */
2859 proc_unlock(p);
2860 }
2d21ac55 2861#endif /* CONFIG_DTRACE */
91447636 2862
6d2010ae
A
2863 /*
2864 * For each segment (argv, envv, applev), copy as many pointers as requested
2865 * to our pointer buffer.
2866 */
2867 for (j = 0; j < descriptors[i].count; j++) {
2868 user_addr_t cur_address = string_area + (cur_string - imgp->ip_strings);
2869
2870 /* Copy out the pointer to the current string. Alignment has been verified */
2871 if (ptr_size == 8) {
2872 *(uint64_t *)ptr_buffer = (uint64_t)cur_address;
2873 } else {
2874 *(uint32_t *)ptr_buffer = (uint32_t)cur_address;
2875 }
2876
2877 ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
2878 cur_string += strlen(cur_string) + 1; /* Only a NUL between strings in the same area */
91447636 2879 }
91447636 2880
6d2010ae
A
2881 if (descriptors[i].null_term) {
2882 if (ptr_size == 8) {
2883 *(uint64_t *)ptr_buffer = 0ULL;
2884 } else {
2885 *(uint32_t *)ptr_buffer = 0;
91447636 2886 }
6d2010ae
A
2887
2888 ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
2889 }
2890 }
2891
2892 /*
2893 * Copy out all our pointer arrays in bulk.
2894 */
2895 error = copyout(ptr_buffer_start, ptr_area,
2896 ptr_area_size);
2897 if (error)
2898 goto bad;
2899
2900 /* argc (int32, stored in a ptr_size area) */
2901 error = copyoutptr((user_addr_t)imgp->ip_argc, argc_area, ptr_size);
2902 if (error)
2903 goto bad;
91447636
A
2904
2905bad:
2906 return(error);
2907}
2908
2909
2910/*
2911 * exec_extract_strings
2912 *
2913 * Copy arguments and environment from user space into work area; we may
2914 * have already copied some early arguments into the work area, and if
2915 * so, any arguments opied in are appended to those already there.
6d2010ae
A
2916 * This function is the primary manipulator of ip_argspace, since
2917 * these are the arguments the client of execve(2) knows about. After
2918 * each argv[]/envv[] string is copied, we charge the string length
2919 * and argv[]/envv[] pointer slot to ip_argspace, so that we can
2920 * full preflight the arg list size.
91447636
A
2921 *
2922 * Parameters: struct image_params * the image parameter block
2923 *
2924 * Returns: 0 Success
2925 * !0 Failure: errno
2926 *
2927 * Implicit returns;
2928 * (imgp->ip_argc) Count of arguments, updated
2929 * (imgp->ip_envc) Count of environment strings, updated
6d2010ae
A
2930 * (imgp->ip_argspace) Count of remaining of NCARGS
2931 * (imgp->ip_interp_buffer) Interpreter and args (mutated in place)
91447636
A
2932 *
2933 *
2d21ac55 2934 * Note: The argument and environment vectors are user space pointers
91447636
A
2935 * to arrays of user space pointers.
2936 */
2937static int
2938exec_extract_strings(struct image_params *imgp)
2939{
2940 int error = 0;
91447636 2941 int ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT) ? 8 : 4;
6d2010ae 2942 int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
91447636
A
2943 user_addr_t argv = imgp->ip_user_argv;
2944 user_addr_t envv = imgp->ip_user_envv;
2945
b0d623f7
A
2946 /*
2947 * Adjust space reserved for the path name by however much padding it
2948 * needs. Doing this here since we didn't know if this would be a 32-
2949 * or 64-bit process back in exec_save_path.
2950 */
6d2010ae
A
2951 while (imgp->ip_strspace % new_ptr_size != 0) {
2952 *imgp->ip_strendp++ = '\0';
2953 imgp->ip_strspace--;
2954 /* imgp->ip_argspace--; not counted towards exec args total */
2955 }
b0d623f7 2956
1c79356b 2957 /*
6d2010ae 2958 * From now on, we start attributing string space to ip_argspace
1c79356b 2959 */
6d2010ae
A
2960 imgp->ip_startargv = imgp->ip_strendp;
2961 imgp->ip_argc = 0;
2962
2963 if((imgp->ip_flags & IMGPF_INTERPRET) != 0) {
91447636 2964 user_addr_t arg;
6d2010ae
A
2965 char *argstart, *ch;
2966
2967 /* First, the arguments in the "#!" string are tokenized and extracted. */
2968 argstart = imgp->ip_interp_buffer;
2969 while (argstart) {
2970 ch = argstart;
2971 while (*ch && !IS_WHITESPACE(*ch)) {
2972 ch++;
2973 }
91447636 2974
6d2010ae
A
2975 if (*ch == '\0') {
2976 /* last argument, no need to NUL-terminate */
2977 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
2978 argstart = NULL;
2979 } else {
2980 /* NUL-terminate */
2981 *ch = '\0';
2982 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
2983
2984 /*
2985 * Find the next string. We know spaces at the end of the string have already
2986 * been stripped.
2987 */
2988 argstart = ch + 1;
2989 while (IS_WHITESPACE(*argstart)) {
2990 argstart++;
2991 }
2992 }
2993
2994 /* Error-check, regardless of whether this is the last interpreter arg or not */
91447636
A
2995 if (error)
2996 goto bad;
6d2010ae
A
2997 if (imgp->ip_argspace < new_ptr_size) {
2998 error = E2BIG;
2999 goto bad;
3000 }
3001 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
91447636 3002 imgp->ip_argc++;
1c79356b 3003 }
6d2010ae
A
3004
3005 if (argv != 0LL) {
3006 /*
3007 * If we are running an interpreter, replace the av[0] that was
3008 * passed to execve() with the path name that was
3009 * passed to execve() for interpreters which do not use the PATH
3010 * to locate their script arguments.
3011 */
3012 error = copyinptr(argv, &arg, ptr_size);
3013 if (error)
3014 goto bad;
3015 if (arg != 0LL) {
3016 argv += ptr_size; /* consume without using */
3017 }
3018 }
3019
3020 if (imgp->ip_interp_sugid_fd != -1) {
3021 char temp[19]; /* "/dev/fd/" + 10 digits + NUL */
3022 snprintf(temp, sizeof(temp), "/dev/fd/%d", imgp->ip_interp_sugid_fd);
3023 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(temp), UIO_SYSSPACE, TRUE);
3024 } else {
3025 error = exec_add_user_string(imgp, imgp->ip_user_fname, imgp->ip_seg, TRUE);
3026 }
3027
3028 if (error)
3029 goto bad;
3030 if (imgp->ip_argspace < new_ptr_size) {
3031 error = E2BIG;
3032 goto bad;
3033 }
3034 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
3035 imgp->ip_argc++;
91447636 3036 }
1c79356b 3037
91447636
A
3038 while (argv != 0LL) {
3039 user_addr_t arg;
3040
3041 error = copyinptr(argv, &arg, ptr_size);
3042 if (error)
1c79356b 3043 goto bad;
1c79356b 3044
91447636
A
3045 if (arg == 0LL) {
3046 break;
1c79356b 3047 }
6d2010ae
A
3048
3049 argv += ptr_size;
3050
91447636
A
3051 /*
3052 * av[n...] = arg[n]
3053 */
6d2010ae 3054 error = exec_add_user_string(imgp, arg, imgp->ip_seg, TRUE);
91447636
A
3055 if (error)
3056 goto bad;
6d2010ae
A
3057 if (imgp->ip_argspace < new_ptr_size) {
3058 error = E2BIG;
3059 goto bad;
3060 }
3061 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
91447636
A
3062 imgp->ip_argc++;
3063 }
6d2010ae
A
3064
3065 /* Save space for argv[] NULL terminator */
3066 if (imgp->ip_argspace < new_ptr_size) {
3067 error = E2BIG;
3068 goto bad;
3069 }
3070 imgp->ip_argspace -= new_ptr_size;
b0d623f7 3071
6d2010ae
A
3072 /* Note where the args ends and env begins. */
3073 imgp->ip_endargv = imgp->ip_strendp;
3074 imgp->ip_envc = 0;
91447636
A
3075
3076 /* Now, get the environment */
3077 while (envv != 0LL) {
3078 user_addr_t env;
3079
3080 error = copyinptr(envv, &env, ptr_size);
3081 if (error)
3082 goto bad;
1c79356b 3083
91447636
A
3084 envv += ptr_size;
3085 if (env == 0LL) {
3086 break;
1c79356b 3087 }
1c79356b 3088 /*
91447636
A
3089 * av[n...] = env[n]
3090 */
6d2010ae 3091 error = exec_add_user_string(imgp, env, imgp->ip_seg, TRUE);
91447636 3092 if (error)
55e303ae 3093 goto bad;
6d2010ae
A
3094 if (imgp->ip_argspace < new_ptr_size) {
3095 error = E2BIG;
3096 goto bad;
3097 }
3098 imgp->ip_argspace -= new_ptr_size; /* to hold envv[] entry */
91447636 3099 imgp->ip_envc++;
55e303ae 3100 }
6d2010ae
A
3101
3102 /* Save space for envv[] NULL terminator */
3103 if (imgp->ip_argspace < new_ptr_size) {
3104 error = E2BIG;
3105 goto bad;
3106 }
3107 imgp->ip_argspace -= new_ptr_size;
3108
3109 /* Align the tail of the combined argv+envv area */
3110 while (imgp->ip_strspace % new_ptr_size != 0) {
3111 if (imgp->ip_argspace < 1) {
3112 error = E2BIG;
3113 goto bad;
3114 }
3115 *imgp->ip_strendp++ = '\0';
3116 imgp->ip_strspace--;
3117 imgp->ip_argspace--;
3118 }
3119
3120 /* Note where the envv ends and applev begins. */
3121 imgp->ip_endenvv = imgp->ip_strendp;
3122
3123 /*
3124 * From now on, we are no longer charging argument
3125 * space to ip_argspace.
3126 */
3127
91447636
A
3128bad:
3129 return error;
3130}
55e303ae 3131
6d2010ae 3132static char *
39236c6e 3133random_hex_str(char *str, int len, boolean_t embedNUL)
6d2010ae
A
3134{
3135 uint64_t low, high, value;
3136 int idx;
3137 char digit;
3138
3139 /* A 64-bit value will only take 16 characters, plus '0x' and NULL. */
3140 if (len > 19)
3141 len = 19;
3142
3143 /* We need enough room for at least 1 digit */
3144 if (len < 4)
3145 return (NULL);
3146
3147 low = random();
3148 high = random();
3149 value = high << 32 | low;
3150
39236c6e
A
3151 if (embedNUL) {
3152 /*
3153 * Zero a byte to protect against C string vulnerabilities
3154 * e.g. for userland __stack_chk_guard.
3155 */
3156 value &= ~(0xffull << 8);
3157 }
3158
6d2010ae
A
3159 str[0] = '0';
3160 str[1] = 'x';
3161 for (idx = 2; idx < len - 1; idx++) {
3162 digit = value & 0xf;
3163 value = value >> 4;
3164 if (digit < 10)
3165 str[idx] = '0' + digit;
3166 else
3167 str[idx] = 'a' + (digit - 10);
3168 }
3169 str[idx] = '\0';
3170 return (str);
3171}
3172
3173/*
3174 * Libc has an 8-element array set up for stack guard values. It only fills
3175 * in one of those entries, and both gcc and llvm seem to use only a single
3176 * 8-byte guard. Until somebody needs more than an 8-byte guard value, don't
3177 * do the work to construct them.
3178 */
3179#define GUARD_VALUES 1
3180#define GUARD_KEY "stack_guard="
3181
3182/*
3183 * System malloc needs some entropy when it is initialized.
3184 */
3185#define ENTROPY_VALUES 2
3186#define ENTROPY_KEY "malloc_entropy="
3187
39236c6e
A
3188/*
3189 * System malloc engages nanozone for UIAPP.
3190 */
3191#define NANO_ENGAGE_KEY "MallocNanoZone=1"
3192
316670eb
A
3193#define PFZ_KEY "pfz="
3194extern user32_addr_t commpage_text32_location;
3195extern user64_addr_t commpage_text64_location;
6d2010ae
A
3196/*
3197 * Build up the contents of the apple[] string vector
3198 */
3199static int
3200exec_add_apple_strings(struct image_params *imgp)
3201{
3202 int i, error;
316670eb 3203 int new_ptr_size=4;
6d2010ae
A
3204 char guard[19];
3205 char guard_vec[strlen(GUARD_KEY) + 19 * GUARD_VALUES + 1];
3206
3207 char entropy[19];
3208 char entropy_vec[strlen(ENTROPY_KEY) + 19 * ENTROPY_VALUES + 1];
3209
316670eb
A
3210 char pfz_string[strlen(PFZ_KEY) + 16 + 4 +1];
3211
3212 if( imgp->ip_flags & IMGPF_IS_64BIT) {
3213 new_ptr_size = 8;
3214 snprintf(pfz_string, sizeof(pfz_string),PFZ_KEY "0x%llx",commpage_text64_location);
39236c6e 3215 } else {
316670eb
A
3216 snprintf(pfz_string, sizeof(pfz_string),PFZ_KEY "0x%x",commpage_text32_location);
3217 }
3218
6d2010ae
A
3219 /* exec_save_path stored the first string */
3220 imgp->ip_applec = 1;
3221
316670eb
A
3222 /* adding the pfz string */
3223 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(pfz_string),UIO_SYSSPACE,FALSE);
3224 if(error)
3225 goto bad;
3226 imgp->ip_applec++;
3227
39236c6e
A
3228 /* adding the NANO_ENGAGE_KEY key */
3229 if (imgp->ip_px_sa) {
3230 int proc_type = (((struct _posix_spawnattr *) imgp->ip_px_sa)->psa_apptype) & POSIX_SPAWN_PROC_TYPE_MASK;
3231
3232 if (proc_type == POSIX_SPAWN_PROC_TYPE_APP_DEFAULT || proc_type == POSIX_SPAWN_PROC_TYPE_APP_TAL) {
3233 char uiapp_string[strlen(NANO_ENGAGE_KEY) + 1];
3234
3235 snprintf(uiapp_string, sizeof(uiapp_string), NANO_ENGAGE_KEY);
3236 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(uiapp_string),UIO_SYSSPACE,FALSE);
3237 if(error)
3238 goto bad;
3239 imgp->ip_applec++;
3240 }
3241 }
3242
6d2010ae
A
3243 /*
3244 * Supply libc with a collection of random values to use when
3245 * implementing -fstack-protector.
39236c6e
A
3246 *
3247 * (The first random string always contains an embedded NUL so that
3248 * __stack_chk_guard also protects against C string vulnerabilities)
6d2010ae
A
3249 */
3250 (void)strlcpy(guard_vec, GUARD_KEY, sizeof (guard_vec));
3251 for (i = 0; i < GUARD_VALUES; i++) {
39236c6e 3252 random_hex_str(guard, sizeof (guard), i == 0);
6d2010ae
A
3253 if (i)
3254 (void)strlcat(guard_vec, ",", sizeof (guard_vec));
3255 (void)strlcat(guard_vec, guard, sizeof (guard_vec));
3256 }
3257
3258 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(guard_vec), UIO_SYSSPACE, FALSE);
3259 if (error)
3260 goto bad;
3261 imgp->ip_applec++;
3262
3263 /*
3264 * Supply libc with entropy for system malloc.
3265 */
3266 (void)strlcpy(entropy_vec, ENTROPY_KEY, sizeof(entropy_vec));
3267 for (i = 0; i < ENTROPY_VALUES; i++) {
39236c6e 3268 random_hex_str(entropy, sizeof (entropy), FALSE);
6d2010ae
A
3269 if (i)
3270 (void)strlcat(entropy_vec, ",", sizeof (entropy_vec));
3271 (void)strlcat(entropy_vec, entropy, sizeof (entropy_vec));
3272 }
3273
3274 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(entropy_vec), UIO_SYSSPACE, FALSE);
3275 if (error)
3276 goto bad;
3277 imgp->ip_applec++;
3278
3279 /* Align the tail of the combined applev area */
3280 while (imgp->ip_strspace % new_ptr_size != 0) {
3281 *imgp->ip_strendp++ = '\0';
3282 imgp->ip_strspace--;
3283 }
3284
3285bad:
3286 return error;
3287}
55e303ae 3288
91447636 3289#define unix_stack_size(p) (p->p_rlimit[RLIMIT_STACK].rlim_cur)
55e303ae 3290
2d21ac55
A
3291/*
3292 * exec_check_permissions
3293 *
6d2010ae 3294 * Description: Verify that the file that is being attempted to be executed
2d21ac55
A
3295 * is in fact allowed to be executed based on it POSIX file
3296 * permissions and other access control criteria
3297 *
3298 * Parameters: struct image_params * the image parameter block
3299 *
3300 * Returns: 0 Success
3301 * EACCES Permission denied
3302 * ENOEXEC Executable file format error
3303 * ETXTBSY Text file busy [misuse of error code]
3304 * vnode_getattr:???
3305 * vnode_authorize:???
3306 */
91447636
A
3307static int
3308exec_check_permissions(struct image_params *imgp)
3309{
3310 struct vnode *vp = imgp->ip_vp;
3311 struct vnode_attr *vap = imgp->ip_vattr;
2d21ac55 3312 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
91447636
A
3313 int error;
3314 kauth_action_t action;
55e303ae 3315
91447636
A
3316 /* Only allow execution of regular files */
3317 if (!vnode_isreg(vp))
3318 return (EACCES);
3319
3320 /* Get the file attributes that we will be using here and elsewhere */
3321 VATTR_INIT(vap);
3322 VATTR_WANTED(vap, va_uid);
3323 VATTR_WANTED(vap, va_gid);
3324 VATTR_WANTED(vap, va_mode);
3325 VATTR_WANTED(vap, va_fsid);
3326 VATTR_WANTED(vap, va_fileid);
3327 VATTR_WANTED(vap, va_data_size);
3328 if ((error = vnode_getattr(vp, vap, imgp->ip_vfs_context)) != 0)
3329 return (error);
55e303ae 3330
91447636
A
3331 /*
3332 * Ensure that at least one execute bit is on - otherwise root
3333 * will always succeed, and we don't want to happen unless the
3334 * file really is executable.
3335 */
6d2010ae 3336 if (!vfs_authopaque(vnode_mount(vp)) && ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0))
91447636 3337 return (EACCES);
55e303ae 3338
91447636
A
3339 /* Disallow zero length files */
3340 if (vap->va_data_size == 0)
3341 return (ENOEXEC);
55e303ae 3342
91447636
A
3343 imgp->ip_arch_offset = (user_size_t)0;
3344 imgp->ip_arch_size = vap->va_data_size;
0b4e3aa0 3345
91447636 3346 /* Disable setuid-ness for traced programs or if MNT_NOSUID */
b0d623f7 3347 if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_lflag & P_LTRACED))
91447636 3348 vap->va_mode &= ~(VSUID | VSGID);
2d21ac55 3349
39236c6e
A
3350 /*
3351 * Disable _POSIX_SPAWN_ALLOW_DATA_EXEC and _POSIX_SPAWN_DISABLE_ASLR
3352 * flags for setuid/setgid binaries.
3353 */
3354 if (vap->va_mode & (VSUID | VSGID))
3355 imgp->ip_flags &= ~(IMGPF_ALLOW_DATA_EXEC | IMGPF_DISABLE_ASLR);
3356
2d21ac55
A
3357#if CONFIG_MACF
3358 error = mac_vnode_check_exec(imgp->ip_vfs_context, vp, imgp);
3359 if (error)
3360 return (error);
3361#endif
91447636
A
3362
3363 /* Check for execute permission */
3364 action = KAUTH_VNODE_EXECUTE;
3365 /* Traced images must also be readable */
2d21ac55 3366 if (p->p_lflag & P_LTRACED)
91447636
A
3367 action |= KAUTH_VNODE_READ_DATA;
3368 if ((error = vnode_authorize(vp, NULL, action, imgp->ip_vfs_context)) != 0)
3369 return (error);
1c79356b 3370
2d21ac55 3371#if 0
91447636 3372 /* Don't let it run if anyone had it open for writing */
2d21ac55
A
3373 vnode_lock(vp);
3374 if (vp->v_writecount) {
3375 panic("going to return ETXTBSY %x", vp);
3376 vnode_unlock(vp);
91447636 3377 return (ETXTBSY);
2d21ac55
A
3378 }
3379 vnode_unlock(vp);
3380#endif
3381
de355530 3382
91447636 3383 /* XXX May want to indicate to underlying FS that vnode is open */
1c79356b 3384
91447636
A
3385 return (error);
3386}
3387
2d21ac55 3388
91447636
A
3389/*
3390 * exec_handle_sugid
3391 *
3392 * Initially clear the P_SUGID in the process flags; if an SUGID process is
3393 * exec'ing a non-SUGID image, then this is the point of no return.
3394 *
2d21ac55 3395 * If the image being activated is SUGID, then replace the credential with a
91447636
A
3396 * copy, disable tracing (unless the tracing process is root), reset the
3397 * mach task port to revoke it, set the P_SUGID bit,
3398 *
3399 * If the saved user and group ID will be changing, then make sure it happens
3400 * to a new credential, rather than a shared one.
3401 *
3402 * Set the security token (this is probably obsolete, given that the token
3403 * should not technically be separate from the credential itself).
3404 *
3405 * Parameters: struct image_params * the image parameter block
3406 *
3407 * Returns: void No failure indication
3408 *
3409 * Implicit returns:
3410 * <process credential> Potentially modified/replaced
3411 * <task port> Potentially revoked
3412 * <process flags> P_SUGID bit potentially modified
3413 * <security token> Potentially modified
3414 */
3415static int
3416exec_handle_sugid(struct image_params *imgp)
3417{
3418 kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context);
2d21ac55 3419 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
91447636 3420 int i;
c910b4d9 3421 int leave_sugid_clear = 0;
91447636 3422 int error = 0;
2d21ac55
A
3423#if CONFIG_MACF
3424 int mac_transition;
c910b4d9
A
3425
3426 /*
3427 * Determine whether a call to update the MAC label will result in the
3428 * credential changing.
3429 *
3430 * Note: MAC policies which do not actually end up modifying
3431 * the label subsequently are strongly encouraged to
3432 * return 0 for this check, since a non-zero answer will
3433 * slow down the exec fast path for normal binaries.
3434 */
3435 mac_transition = mac_cred_check_label_update_execve(
3436 imgp->ip_vfs_context,
3437 imgp->ip_vp,
39236c6e 3438 imgp->ip_scriptvp,
c910b4d9 3439 imgp->ip_scriptlabelp,
39236c6e
A
3440 imgp->ip_execlabelp,
3441 p,
3442 imgp->ip_px_smpx);
2d21ac55 3443#endif
1c79356b 3444
b0d623f7 3445 OSBitAndAtomic(~((uint32_t)P_SUGID), &p->p_flag);
91447636 3446
2d21ac55
A
3447 /*
3448 * Order of the following is important; group checks must go last,
c910b4d9 3449 * as we use the success of the 'ismember' check combined with the
2d21ac55
A
3450 * failure of the explicit match to indicate that we will be setting
3451 * the egid of the process even though the new process did not
3452 * require VSUID/VSGID bits in order for it to set the new group as
3453 * its egid.
3454 *
3455 * Note: Technically, by this we are implying a call to
3456 * setegid() in the new process, rather than implying
3457 * it used its VSGID bit to set the effective group,
3458 * even though there is no code in that process to make
3459 * such a call.
3460 */
91447636
A
3461 if (((imgp->ip_origvattr->va_mode & VSUID) != 0 &&
3462 kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) ||
3463 ((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
c910b4d9 3464 ((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &leave_sugid_clear) || !leave_sugid_clear) ||
6d2010ae 3465 (kauth_cred_getgid(cred) != imgp->ip_origvattr->va_gid)))) {
2d21ac55 3466
c910b4d9
A
3467#if CONFIG_MACF
3468/* label for MAC transition and neither VSUID nor VSGID */
3469handle_mac_transition:
3470#endif
3471
1c79356b 3472 /*
2d21ac55
A
3473 * Replace the credential with a copy of itself if euid or
3474 * egid change.
3475 *
3476 * Note: setuid binaries will automatically opt out of
3477 * group resolver participation as a side effect
3478 * of this operation. This is an intentional
3479 * part of the security model, which requires a
3480 * participating credential be established by
3481 * escalating privilege, setting up all other
3482 * aspects of the credential including whether
3483 * or not to participate in external group
3484 * membership resolution, then dropping their
3485 * effective privilege to that of the desired
3486 * final credential state.
91447636
A
3487 */
3488 if (imgp->ip_origvattr->va_mode & VSUID) {
2d21ac55 3489 p->p_ucred = kauth_cred_setresuid(p->p_ucred, KAUTH_UID_NONE, imgp->ip_origvattr->va_uid, imgp->ip_origvattr->va_uid, KAUTH_UID_NONE);
6d2010ae
A
3490 /* update cred on proc */
3491 PROC_UPDATE_CREDS_ONPROC(p);
91447636
A
3492 }
3493 if (imgp->ip_origvattr->va_mode & VSGID) {
2d21ac55 3494 p->p_ucred = kauth_cred_setresgid(p->p_ucred, KAUTH_GID_NONE, imgp->ip_origvattr->va_gid, imgp->ip_origvattr->va_gid);
6d2010ae
A
3495 /* update cred on proc */
3496 PROC_UPDATE_CREDS_ONPROC(p);
91447636 3497 }
1c79356b 3498
2d21ac55
A
3499#if CONFIG_MACF
3500 /*
c910b4d9
A
3501 * If a policy has indicated that it will transition the label,
3502 * before making the call into the MAC policies, get a new
2d21ac55
A
3503 * duplicate credential, so they can modify it without
3504 * modifying any others sharing it.
3505 */
c910b4d9
A
3506 if (mac_transition) {
3507 kauth_cred_t my_cred;
3508 if (kauth_proc_label_update_execve(p,
3509 imgp->ip_vfs_context,
3510 imgp->ip_vp,
39236c6e 3511 imgp->ip_scriptvp,
c910b4d9 3512 imgp->ip_scriptlabelp,
39236c6e
A
3513 imgp->ip_execlabelp,
3514 imgp->ip_px_smpx)) {
c910b4d9
A
3515 /*
3516 * If updating the MAC label resulted in a
3517 * disjoint credential, flag that we need to
3518 * set the P_SUGID bit. This protects
3519 * against debuggers being attached by an
3520 * insufficiently privileged process onto the
3521 * result of a transition to a more privileged
3522 * credential.
3523 */
3524 leave_sugid_clear = 0;
3525 }
2d21ac55
A
3526
3527 my_cred = kauth_cred_proc_ref(p);
3528 mac_task_label_update_cred(my_cred, p->task);
3529 kauth_cred_unref(&my_cred);
3530 }
c910b4d9
A
3531#endif /* CONFIG_MACF */
3532
2d21ac55 3533 /*
c910b4d9
A
3534 * If 'leave_sugid_clear' is non-zero, then we passed the
3535 * VSUID and MACF checks, and successfully determined that
3536 * the previous cred was a member of the VSGID group, but
3537 * that it was not the default at the time of the execve,
3538 * and that the post-labelling credential was not disjoint.
39236c6e
A
3539 * So we don't set the P_SUGID or reset mach ports and fds
3540 * on the basis of simply running this code.
1c79356b 3541 */
39236c6e
A
3542 if (!leave_sugid_clear) {
3543 /*
3544 * Have mach reset the task and thread ports.
3545 * We don't want anyone who had the ports before
3546 * a setuid exec to be able to access/control the
3547 * task/thread after.
3548 */
3549 ipc_task_reset(p->task);
3550 ipc_thread_reset((imgp->ip_new_thread != NULL) ?
3551 imgp->ip_new_thread : current_thread());
91447636 3552
39236c6e
A
3553 /*
3554 * Flag the process as setuid.
3555 */
3556 OSBitOrAtomic(P_SUGID, &p->p_flag);
1c79356b 3557
ebb1b9f4 3558 /*
39236c6e
A
3559 * Radar 2261856; setuid security hole fix
3560 * XXX For setuid processes, attempt to ensure that
3561 * stdin, stdout, and stderr are already allocated.
3562 * We do not want userland to accidentally allocate
3563 * descriptors in this range which has implied meaning
3564 * to libc.
ebb1b9f4 3565 */
39236c6e
A
3566 for (i = 0; i < 3; i++) {
3567
3568 if (p->p_fd->fd_ofiles[i] != NULL)
3569 continue;
3570
3571 /*
3572 * Do the kernel equivalent of
3573 *
3574 * if i == 0
3575 * (void) open("/dev/null", O_RDONLY);
3576 * else
3577 * (void) open("/dev/null", O_WRONLY);
3578 */
91447636 3579
39236c6e
A
3580 struct fileproc *fp;
3581 int indx;
3582 int flag;
1c79356b 3583
39236c6e
A
3584 if (i == 0)
3585 flag = FREAD;
3586 else
3587 flag = FWRITE;
ebb1b9f4 3588
39236c6e
A
3589 if ((error = falloc(p,
3590 &fp, &indx, imgp->ip_vfs_context)) != 0)
3591 continue;
ebb1b9f4 3592
39236c6e 3593 struct nameidata nd1;
ebb1b9f4 3594
39236c6e
A
3595 NDINIT(&nd1, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE,
3596 CAST_USER_ADDR_T("/dev/null"),
3597 imgp->ip_vfs_context);
ebb1b9f4 3598
39236c6e
A
3599 if ((error = vn_open(&nd1, flag, 0)) != 0) {
3600 fp_free(p, indx, fp);
3601 break;
3602 }
ebb1b9f4 3603
39236c6e 3604 struct fileglob *fg = fp->f_fglob;
ebb1b9f4 3605
39236c6e
A
3606 fg->fg_flag = flag;
3607 fg->fg_ops = &vnops;
3608 fg->fg_data = nd1.ni_vp;
ebb1b9f4 3609
39236c6e
A
3610 vnode_put(nd1.ni_vp);
3611
3612 proc_fdlock(p);
3613 procfdtbl_releasefd(p, indx, NULL);
3614 fp_drop(p, indx, fp, 1);
3615 proc_fdunlock(p);
3616 }
1c79356b 3617 }
1c79356b 3618 }
c910b4d9
A
3619#if CONFIG_MACF
3620 else {
3621 /*
3622 * We are here because we were told that the MAC label will
3623 * be transitioned, and the binary is not VSUID or VSGID; to
3624 * deal with this case, we could either duplicate a lot of
3625 * code, or we can indicate we want to default the P_SUGID
3626 * bit clear and jump back up.
3627 */
3628 if (mac_transition) {
3629 leave_sugid_clear = 1;
3630 goto handle_mac_transition;
3631 }
3632 }
3633#endif /* CONFIG_MACF */
1c79356b
A
3634
3635 /*
91447636
A
3636 * Implement the semantic where the effective user and group become
3637 * the saved user and group in exec'ed programs.
1c79356b 3638 */
6d2010ae
A
3639 p->p_ucred = kauth_cred_setsvuidgid(p->p_ucred, kauth_cred_getuid(p->p_ucred), kauth_cred_getgid(p->p_ucred));
3640 /* update cred on proc */
3641 PROC_UPDATE_CREDS_ONPROC(p);
91447636 3642
593a1d5f
A
3643 /* Update the process' identity version and set the security token */
3644 p->p_idversion++;
91447636 3645 set_security_token(p);
0b4e3aa0 3646
1c79356b
A
3647 return(error);
3648}
3649
2d21ac55
A
3650
3651/*
3652 * create_unix_stack
3653 *
3654 * Description: Set the user stack address for the process to the provided
3655 * address. If a custom stack was not set as a result of the
3656 * load process (i.e. as specified by the image file for the
3657 * executable), then allocate the stack in the provided map and
3658 * set up appropriate guard pages for enforcing administrative
3659 * limits on stack growth, if they end up being needed.
3660 *
3661 * Parameters: p Process to set stack on
316670eb
A
3662 * load_result Information from mach-o load commands
3663 * map Address map in which to allocate the new stack
2d21ac55
A
3664 *
3665 * Returns: KERN_SUCCESS Stack successfully created
3666 * !KERN_SUCCESS Mach failure code
3667 */
91447636 3668static kern_return_t
316670eb 3669create_unix_stack(vm_map_t map, load_result_t* load_result,
2d21ac55 3670 proc_t p)
1c79356b 3671{
2d21ac55
A
3672 mach_vm_size_t size, prot_size;
3673 mach_vm_offset_t addr, prot_addr;
3674 kern_return_t kr;
1c79356b 3675
316670eb
A
3676 mach_vm_address_t user_stack = load_result->user_stack;
3677
2d21ac55 3678 proc_lock(p);
91447636 3679 p->user_stack = user_stack;
2d21ac55
A
3680 proc_unlock(p);
3681
316670eb 3682 if (!load_result->prog_allocated_stack) {
2d21ac55
A
3683 /*
3684 * Allocate enough space for the maximum stack size we
3685 * will ever authorize and an extra page to act as
316670eb
A
3686 * a guard page for stack overflows. For default stacks,
3687 * vm_initial_limit_stack takes care of the extra guard page.
3688 * Otherwise we must allocate it ourselves.
2d21ac55 3689 */
316670eb
A
3690
3691 size = mach_vm_round_page(load_result->user_stack_size);
3692 if (load_result->prog_stack_size)
3693 size += PAGE_SIZE;
3694 addr = mach_vm_trunc_page(load_result->user_stack - size);
2d21ac55 3695 kr = mach_vm_allocate(map, &addr, size,
91447636 3696 VM_MAKE_TAG(VM_MEMORY_STACK) |
316670eb 3697 VM_FLAGS_FIXED);
2d21ac55 3698 if (kr != KERN_SUCCESS) {
316670eb
A
3699 /* If can't allocate at default location, try anywhere */
3700 addr = 0;
3701 kr = mach_vm_allocate(map, &addr, size,
3702 VM_MAKE_TAG(VM_MEMORY_STACK) |
3703 VM_FLAGS_ANYWHERE);
3704 if (kr != KERN_SUCCESS)
3705 return kr;
3706
3707 user_stack = addr + size;
3708 load_result->user_stack = user_stack;
3709
3710 proc_lock(p);
3711 p->user_stack = user_stack;
3712 proc_unlock(p);
2d21ac55 3713 }
316670eb 3714
2d21ac55
A
3715 /*
3716 * And prevent access to what's above the current stack
3717 * size limit for this process.
3718 */
3719 prot_addr = addr;
316670eb
A
3720 if (load_result->prog_stack_size)
3721 prot_size = PAGE_SIZE;
3722 else
3723 prot_size = mach_vm_trunc_page(size - unix_stack_size(p));
2d21ac55 3724 kr = mach_vm_protect(map,
316670eb
A
3725 prot_addr,
3726 prot_size,
3727 FALSE,
3728 VM_PROT_NONE);
2d21ac55
A
3729 if (kr != KERN_SUCCESS) {
3730 (void) mach_vm_deallocate(map, addr, size);
3731 return kr;
3732 }
3733 }
316670eb 3734
2d21ac55 3735 return KERN_SUCCESS;
1c79356b
A
3736}
3737
3738#include <sys/reboot.h>
3739
91447636 3740static char init_program_name[128] = "/sbin/launchd";
1c79356b
A
3741
3742struct execve_args init_exec_args;
1c79356b 3743
2d21ac55
A
3744/*
3745 * load_init_program
3746 *
3747 * Description: Load the "init" program; in most cases, this will be "launchd"
3748 *
3749 * Parameters: p Process to call execve() to create
3750 * the "init" program
3751 *
3752 * Returns: (void)
3753 *
3754 * Notes: The process that is passed in is the first manufactured
3755 * process on the system, and gets here via bsd_ast() firing
3756 * for the first time. This is done to ensure that bsd_init()
3757 * has run to completion.
3758 */
1c79356b 3759void
2d21ac55 3760load_init_program(proc_t p)
1c79356b
A
3761{
3762 vm_offset_t init_addr;
2d21ac55 3763 int argc = 0;
b0d623f7 3764 uint32_t argv[3];
91447636 3765 int error;
b0d623f7 3766 int retval[2];
1c79356b 3767
2d21ac55
A
3768 /*
3769 * Copy out program name.
3770 */
1c79356b 3771
2d21ac55
A
3772 init_addr = VM_MIN_ADDRESS;
3773 (void) vm_allocate(current_map(), &init_addr, PAGE_SIZE,
3774 VM_FLAGS_ANYWHERE);
3775 if (init_addr == 0)
3776 init_addr++;
91447636 3777
2d21ac55
A
3778 (void) copyout((caddr_t) init_program_name, CAST_USER_ADDR_T(init_addr),
3779 (unsigned) sizeof(init_program_name)+1);
1c79356b 3780
b0d623f7 3781 argv[argc++] = (uint32_t)init_addr;
2d21ac55
A
3782 init_addr += sizeof(init_program_name);
3783 init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
1c79356b 3784
2d21ac55
A
3785 /*
3786 * Put out first (and only) argument, similarly.
3787 * Assumes everything fits in a page as allocated
3788 * above.
3789 */
3790 if (boothowto & RB_SINGLE) {
3791 const char *init_args = "-s";
1c79356b 3792
2d21ac55
A
3793 copyout(init_args, CAST_USER_ADDR_T(init_addr),
3794 strlen(init_args));
1c79356b 3795
b0d623f7 3796 argv[argc++] = (uint32_t)init_addr;
2d21ac55 3797 init_addr += strlen(init_args);
1c79356b
A
3798 init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
3799
2d21ac55 3800 }
1c79356b 3801
2d21ac55
A
3802 /*
3803 * Null-end the argument list
3804 */
b0d623f7 3805 argv[argc] = 0;
2d21ac55
A
3806
3807 /*
3808 * Copy out the argument list.
3809 */
3810
3811 (void) copyout((caddr_t) argv, CAST_USER_ADDR_T(init_addr),
3812 (unsigned) sizeof(argv));
1c79356b 3813
2d21ac55
A
3814 /*
3815 * Set up argument block for fake call to execve.
3816 */
1c79356b 3817
2d21ac55
A
3818 init_exec_args.fname = CAST_USER_ADDR_T(argv[0]);
3819 init_exec_args.argp = CAST_USER_ADDR_T((char **)init_addr);
3820 init_exec_args.envp = CAST_USER_ADDR_T(0);
3821
3822 /*
3823 * So that mach_init task is set with uid,gid 0 token
3824 */
3825 set_security_token(p);
1c79356b 3826
2d21ac55
A
3827 error = execve(p,&init_exec_args,retval);
3828 if (error)
6d2010ae 3829 panic("Process 1 exec of %s failed, errno %d",
2d21ac55 3830 init_program_name, error);
1c79356b
A
3831}
3832
3833/*
2d21ac55
A
3834 * load_return_to_errno
3835 *
3836 * Description: Convert a load_return_t (Mach error) to an errno (BSD error)
3837 *
3838 * Parameters: lrtn Mach error number
3839 *
3840 * Returns: (int) BSD error number
3841 * 0 Success
3842 * EBADARCH Bad architecture
3843 * EBADMACHO Bad Mach object file
3844 * ESHLIBVERS Bad shared library version
3845 * ENOMEM Out of memory/resource shortage
3846 * EACCES Access denied
3847 * ENOENT Entry not found (usually "file does
3848 * does not exist")
3849 * EIO An I/O error occurred
3850 * EBADEXEC The executable is corrupt/unknown
1c79356b
A
3851 */
3852static int
3853load_return_to_errno(load_return_t lrtn)
3854{
3855 switch (lrtn) {
2d21ac55
A
3856 case LOAD_SUCCESS:
3857 return 0;
3858 case LOAD_BADARCH:
3859 return EBADARCH;
3860 case LOAD_BADMACHO:
3861 return EBADMACHO;
3862 case LOAD_SHLIB:
3863 return ESHLIBVERS;
3864 case LOAD_NOSPACE:
3865 case LOAD_RESOURCE:
3866 return ENOMEM;
3867 case LOAD_PROTECT:
3868 return EACCES;
3869 case LOAD_ENOENT:
3870 return ENOENT;
3871 case LOAD_IOERROR:
3872 return EIO;
3873 case LOAD_FAILURE:
39236c6e 3874 case LOAD_DECRYPTFAIL:
2d21ac55
A
3875 default:
3876 return EBADEXEC;
1c79356b
A
3877 }
3878}
3879
765c9de3
A
3880#include <mach/mach_types.h>
3881#include <mach/vm_prot.h>
3882#include <mach/semaphore.h>
3883#include <mach/sync_policy.h>
3884#include <kern/clock.h>
3885#include <mach/kern_return.h>
3886
91447636 3887/*
2d21ac55
A
3888 * execargs_alloc
3889 *
3890 * Description: Allocate the block of memory used by the execve arguments.
3891 * At the same time, we allocate a page so that we can read in
3892 * the first page of the image.
3893 *
3894 * Parameters: struct image_params * the image parameter block
3895 *
3896 * Returns: 0 Success
3897 * EINVAL Invalid argument
3898 * EACCES Permission denied
3899 * EINTR Interrupted function
3900 * ENOMEM Not enough space
3901 *
3902 * Notes: This is a temporary allocation into the kernel address space
3903 * to enable us to copy arguments in from user space. This is
3904 * necessitated by not mapping the process calling execve() into
3905 * the kernel address space during the execve() system call.
3906 *
3907 * We assemble the argument and environment, etc., into this
3908 * region before copying it as a single block into the child
3909 * process address space (at the top or bottom of the stack,
3910 * depending on which way the stack grows; see the function
3911 * exec_copyout_strings() for details).
3912 *
3913 * This ends up with a second (possibly unnecessary) copy compared
3914 * with assembing the data directly into the child address space,
3915 * instead, but since we cannot be guaranteed that the parent has
3916 * not modified its environment, we can't really know that it's
3917 * really a block there as well.
91447636 3918 */
b0d623f7
A
3919
3920
3921static int execargs_waiters = 0;
3922lck_mtx_t *execargs_cache_lock;
3923
3924static void
3925execargs_lock_lock(void) {
3926 lck_mtx_lock_spin(execargs_cache_lock);
3927}
3928
3929static void
3930execargs_lock_unlock(void) {
3931 lck_mtx_unlock(execargs_cache_lock);
3932}
3933
39236c6e 3934static wait_result_t
b0d623f7 3935execargs_lock_sleep(void) {
39236c6e 3936 return(lck_mtx_sleep(execargs_cache_lock, LCK_SLEEP_DEFAULT, &execargs_free_count, THREAD_INTERRUPTIBLE));
b0d623f7
A
3937}
3938
3939static kern_return_t
3940execargs_purgeable_allocate(char **execarg_address) {
6d2010ae 3941 kern_return_t kr = vm_allocate(bsd_pageable_map, (vm_offset_t *)execarg_address, BSD_PAGEABLE_SIZE_PER_EXEC, VM_FLAGS_ANYWHERE | VM_FLAGS_PURGABLE);
b0d623f7
A
3942 assert(kr == KERN_SUCCESS);
3943 return kr;
3944}
3945
3946static kern_return_t
3947execargs_purgeable_reference(void *execarg_address) {
3948 int state = VM_PURGABLE_NONVOLATILE;
3949 kern_return_t kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
3950
3951 assert(kr == KERN_SUCCESS);
3952 return kr;
3953}
3954
3955static kern_return_t
3956execargs_purgeable_volatilize(void *execarg_address) {
3957 int state = VM_PURGABLE_VOLATILE | VM_PURGABLE_ORDERING_OBSOLETE;
3958 kern_return_t kr;
3959 kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
3960
3961 assert(kr == KERN_SUCCESS);
3962
3963 return kr;
3964}
3965
3966static void
3967execargs_wakeup_waiters(void) {
3968 thread_wakeup(&execargs_free_count);
3969}
3970
765c9de3 3971static int
91447636 3972execargs_alloc(struct image_params *imgp)
765c9de3
A
3973{
3974 kern_return_t kret;
39236c6e 3975 wait_result_t res;
b0d623f7 3976 int i, cache_index = -1;
765c9de3 3977
b0d623f7
A
3978 execargs_lock_lock();
3979
3980 while (execargs_free_count == 0) {
3981 execargs_waiters++;
39236c6e 3982 res = execargs_lock_sleep();
b0d623f7 3983 execargs_waiters--;
39236c6e
A
3984 if (res != THREAD_AWAKENED) {
3985 execargs_lock_unlock();
3986 return (EINTR);
3987 }
b0d623f7
A
3988 }
3989
3990 execargs_free_count--;
3991
3992 for (i = 0; i < execargs_cache_size; i++) {
3993 vm_offset_t element = execargs_cache[i];
3994 if (element) {
3995 cache_index = i;
3996 imgp->ip_strings = (char *)(execargs_cache[i]);
3997 execargs_cache[i] = 0;
3998 break;
765c9de3 3999 }
b0d623f7 4000 }
765c9de3 4001
b0d623f7
A
4002 assert(execargs_free_count >= 0);
4003
4004 execargs_lock_unlock();
4005
4006 if (cache_index == -1) {
4007 kret = execargs_purgeable_allocate(&imgp->ip_strings);
4008 }
4009 else
4010 kret = execargs_purgeable_reference(imgp->ip_strings);
4011
4012 assert(kret == KERN_SUCCESS);
55e303ae 4013 if (kret != KERN_SUCCESS) {
765c9de3 4014 return (ENOMEM);
55e303ae 4015 }
b0d623f7 4016
6d2010ae
A
4017 /* last page used to read in file headers */
4018 imgp->ip_vdata = imgp->ip_strings + ( NCARGS + PAGE_SIZE );
4019 imgp->ip_strendp = imgp->ip_strings;
4020 imgp->ip_argspace = NCARGS;
4021 imgp->ip_strspace = ( NCARGS + PAGE_SIZE );
b0d623f7 4022
765c9de3
A
4023 return (0);
4024}
4025
2d21ac55
A
4026/*
4027 * execargs_free
4028 *
4029 * Description: Free the block of memory used by the execve arguments and the
4030 * first page of the executable by a previous call to the function
4031 * execargs_alloc().
4032 *
4033 * Parameters: struct image_params * the image parameter block
4034 *
4035 * Returns: 0 Success
4036 * EINVAL Invalid argument
4037 * EINTR Oeration interrupted
4038 */
765c9de3 4039static int
91447636 4040execargs_free(struct image_params *imgp)
765c9de3
A
4041{
4042 kern_return_t kret;
b0d623f7
A
4043 int i;
4044 boolean_t needs_wakeup = FALSE;
4045
4046 kret = execargs_purgeable_volatilize(imgp->ip_strings);
765c9de3 4047
b0d623f7
A
4048 execargs_lock_lock();
4049 execargs_free_count++;
4050
4051 for (i = 0; i < execargs_cache_size; i++) {
4052 vm_offset_t element = execargs_cache[i];
4053 if (element == 0) {
4054 execargs_cache[i] = (vm_offset_t) imgp->ip_strings;
4055 imgp->ip_strings = NULL;
4056 break;
4057 }
765c9de3 4058 }
b0d623f7
A
4059
4060 assert(imgp->ip_strings == NULL);
4061
4062 if (execargs_waiters > 0)
4063 needs_wakeup = TRUE;
4064
4065 execargs_lock_unlock();
4066
4067 if (needs_wakeup == TRUE)
4068 execargs_wakeup_waiters();
4069
4070 return ((kret == KERN_SUCCESS ? 0 : EINVAL));
765c9de3 4071}
2d21ac55
A
4072
4073static void
4074exec_resettextvp(proc_t p, struct image_params *imgp)
4075{
4076 vnode_t vp;
4077 off_t offset;
4078 vnode_t tvp = p->p_textvp;
4079 int ret;
4080
4081 vp = imgp->ip_vp;
4082 offset = imgp->ip_arch_offset;
4083
4084 if (vp == NULLVP)
4085 panic("exec_resettextvp: expected valid vp");
4086
4087 ret = vnode_ref(vp);
4088 proc_lock(p);
4089 if (ret == 0) {
4090 p->p_textvp = vp;
4091 p->p_textoff = offset;
4092 } else {
4093 p->p_textvp = NULLVP; /* this is paranoia */
4094 p->p_textoff = 0;
4095 }
4096 proc_unlock(p);
4097
4098 if ( tvp != NULLVP) {
4099 if (vnode_getwithref(tvp) == 0) {
4100 vnode_rele(tvp);
4101 vnode_put(tvp);
4102 }
4103 }
4104
4105}
4106
39236c6e
A
4107/*
4108 * If the process is not signed or if it contains entitlements, we
4109 * need to communicate through the task_access_port to taskgated.
4110 *
4111 * taskgated will provide a detached code signature if present, and
4112 * will enforce any restrictions on entitlements.
4113 */
4114
4115static boolean_t
4116taskgated_required(proc_t p, boolean_t *require_success)
4117{
4118 size_t length;
4119 void *blob;
4120 int error;
4121
4122 if ((p->p_csflags & CS_VALID) == 0) {
4123 *require_success = FALSE;
4124 return TRUE;
4125 }
4126
4127 error = cs_entitlements_blob_get(p, &blob, &length);
4128 if (error == 0 && blob != NULL) {
4129 *require_success = TRUE; /* fatal on the desktop when entitlements are present */
4130 return TRUE;
4131 }
4132
4133 *require_success = FALSE;
4134 return 0;
4135}
4136
4137
4138static int
b0d623f7
A
4139check_for_signature(proc_t p, struct image_params *imgp)
4140{
4141 mach_port_t port = NULL;
6d2010ae
A
4142 kern_return_t kr = KERN_FAILURE;
4143 int error = EACCES;
39236c6e 4144 boolean_t unexpected_failure = FALSE;
b0d623f7 4145 unsigned char hash[SHA1_RESULTLEN];
39236c6e 4146 boolean_t require_success = FALSE;
b0d623f7
A
4147
4148 /*
4149 * Override inherited code signing flags with the
4150 * ones for the process that is being successfully
4151 * loaded
4152 */
4153 proc_lock(p);
4154 p->p_csflags = imgp->ip_csflags;
4155 proc_unlock(p);
4156
4157 /* Set the switch_protect flag on the map */
4158 if(p->p_csflags & (CS_HARD|CS_KILL)) {
4159 vm_map_switch_protect(get_task_map(p->task), TRUE);
4160 }
4161
39236c6e
A
4162 /* check if callout to taskgated is needed */
4163 if (!taskgated_required(p, &require_success)) {
6d2010ae
A
4164 error = 0;
4165 goto done;
4166 }
4167
4168 kr = task_get_task_access_port(p->task, &port);
4169 if (KERN_SUCCESS != kr || !IPC_PORT_VALID(port)) {
4170 error = 0;
39236c6e 4171 if (require_success)
6d2010ae 4172 error = EACCES;
6d2010ae
A
4173 goto done;
4174 }
4175
39236c6e
A
4176 /*
4177 * taskgated returns KERN_SUCCESS if it has completed its work
4178 * and the exec should continue, KERN_FAILURE if the exec should
4179 * fail, or it may error out with different error code in an
4180 * event of mig failure (e.g. process was signalled during the
4181 * rpc call, taskgated died, mig server died etc.).
4182 */
4183
6d2010ae 4184 kr = find_code_signature(port, p->p_pid);
39236c6e
A
4185 switch (kr) {
4186 case KERN_SUCCESS:
4187 error = 0;
4188 break;
4189 case KERN_FAILURE:
6d2010ae
A
4190 error = EACCES;
4191 goto done;
39236c6e
A
4192 default:
4193 error = EACCES;
4194 unexpected_failure = TRUE;
4195 goto done;
6d2010ae
A
4196 }
4197
4198 /* Only do this if exec_resettextvp() did not fail */
4199 if (p->p_textvp != NULLVP) {
4200 /*
4201 * If there's a new code directory, mark this process
4202 * as signed.
4203 */
4204 if (0 == ubc_cs_getcdhash(p->p_textvp, p->p_textoff, hash)) {
4205 proc_lock(p);
4206 p->p_csflags |= CS_VALID;
4207 proc_unlock(p);
b0d623f7
A
4208 }
4209 }
4210
6d2010ae 4211done:
39236c6e
A
4212 if (0 != error) {
4213 if (!unexpected_failure)
4214 p->p_csflags |= CS_KILLED;
6d2010ae
A
4215 /* make very sure execution fails */
4216 psignal(p, SIGKILL);
39236c6e 4217 }
6d2010ae 4218 return error;
b0d623f7
A
4219}
4220
316670eb
A
4221/*
4222 * Typically as soon as we start executing this process, the
4223 * first instruction will trigger a VM fault to bring the text
4224 * pages (as executable) into the address space, followed soon
4225 * thereafter by dyld data structures (for dynamic executable).
4226 * To optimize this, as well as improve support for hardware
4227 * debuggers that can only access resident pages present
4228 * in the process' page tables, we prefault some pages if
4229 * possible. Errors are non-fatal.
4230 */
4231static void exec_prefault_data(proc_t p __unused, struct image_params *imgp, load_result_t *load_result)
4232{
4233 int ret;
4234 size_t expected_all_image_infos_size;
4235
4236 /*
4237 * Prefault executable or dyld entry point.
4238 */
39236c6e
A
4239 vm_fault(current_map(),
4240 vm_map_trunc_page(load_result->entry_point,
4241 vm_map_page_mask(current_map())),
4242 VM_PROT_READ | VM_PROT_EXECUTE,
4243 FALSE,
4244 THREAD_UNINT, NULL, 0);
316670eb
A
4245
4246 if (imgp->ip_flags & IMGPF_IS_64BIT) {
4247 expected_all_image_infos_size = sizeof(struct user64_dyld_all_image_infos);
4248 } else {
4249 expected_all_image_infos_size = sizeof(struct user32_dyld_all_image_infos);
4250 }
4251
4252 /* Decode dyld anchor structure from <mach-o/dyld_images.h> */
4253 if (load_result->dynlinker &&
4254 load_result->all_image_info_addr &&
4255 load_result->all_image_info_size >= expected_all_image_infos_size) {
4256 union {
4257 struct user64_dyld_all_image_infos infos64;
4258 struct user32_dyld_all_image_infos infos32;
4259 } all_image_infos;
4260
4261 /*
4262 * Pre-fault to avoid copyin() going through the trap handler
4263 * and recovery path.
4264 */
39236c6e
A
4265 vm_fault(current_map(),
4266 vm_map_trunc_page(load_result->all_image_info_addr,
4267 vm_map_page_mask(current_map())),
4268 VM_PROT_READ | VM_PROT_WRITE,
4269 FALSE,
4270 THREAD_UNINT, NULL, 0);
316670eb
A
4271 if ((load_result->all_image_info_addr & PAGE_MASK) + expected_all_image_infos_size > PAGE_SIZE) {
4272 /* all_image_infos straddles a page */
39236c6e
A
4273 vm_fault(current_map(),
4274 vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size - 1,
4275 vm_map_page_mask(current_map())),
4276 VM_PROT_READ | VM_PROT_WRITE,
4277 FALSE,
4278 THREAD_UNINT, NULL, 0);
316670eb
A
4279 }
4280
4281 ret = copyin(load_result->all_image_info_addr,
4282 &all_image_infos,
4283 expected_all_image_infos_size);
4284 if (ret == 0 && all_image_infos.infos32.version >= 9) {
4285
4286 user_addr_t notification_address;
4287 user_addr_t dyld_image_address;
4288 user_addr_t dyld_version_address;
4289 user_addr_t dyld_all_image_infos_address;
4290 user_addr_t dyld_slide_amount;
4291
4292 if (imgp->ip_flags & IMGPF_IS_64BIT) {
4293 notification_address = all_image_infos.infos64.notification;
4294 dyld_image_address = all_image_infos.infos64.dyldImageLoadAddress;
4295 dyld_version_address = all_image_infos.infos64.dyldVersion;
4296 dyld_all_image_infos_address = all_image_infos.infos64.dyldAllImageInfosAddress;
4297 } else {
4298 notification_address = all_image_infos.infos32.notification;
4299 dyld_image_address = all_image_infos.infos32.dyldImageLoadAddress;
4300 dyld_version_address = all_image_infos.infos32.dyldVersion;
4301 dyld_all_image_infos_address = all_image_infos.infos32.dyldAllImageInfosAddress;
4302 }
4303
4304 /*
4305 * dyld statically sets up the all_image_infos in its Mach-O
4306 * binary at static link time, with pointers relative to its default
4307 * load address. Since ASLR might slide dyld before its first
4308 * instruction is executed, "dyld_slide_amount" tells us how far
4309 * dyld was loaded compared to its default expected load address.
4310 * All other pointers into dyld's image should be adjusted by this
4311 * amount. At some point later, dyld will fix up pointers to take
4312 * into account the slide, at which point the all_image_infos_address
4313 * field in the structure will match the runtime load address, and
4314 * "dyld_slide_amount" will be 0, if we were to consult it again.
4315 */
4316
4317 dyld_slide_amount = load_result->all_image_info_addr - dyld_all_image_infos_address;
4318
4319#if 0
4320 kprintf("exec_prefault: 0x%016llx 0x%08x 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
4321 (uint64_t)load_result->all_image_info_addr,
4322 all_image_infos.infos32.version,
4323 (uint64_t)notification_address,
4324 (uint64_t)dyld_image_address,
4325 (uint64_t)dyld_version_address,
4326 (uint64_t)dyld_all_image_infos_address);
4327#endif
4328
39236c6e
A
4329 vm_fault(current_map(),
4330 vm_map_trunc_page(notification_address + dyld_slide_amount,
4331 vm_map_page_mask(current_map())),
4332 VM_PROT_READ | VM_PROT_EXECUTE,
4333 FALSE,
4334 THREAD_UNINT, NULL, 0);
4335 vm_fault(current_map(),
4336 vm_map_trunc_page(dyld_image_address + dyld_slide_amount,
4337 vm_map_page_mask(current_map())),
4338 VM_PROT_READ | VM_PROT_EXECUTE,
4339 FALSE,
4340 THREAD_UNINT, NULL, 0);
4341 vm_fault(current_map(),
4342 vm_map_trunc_page(dyld_version_address + dyld_slide_amount,
4343 vm_map_page_mask(current_map())),
4344 VM_PROT_READ,
4345 FALSE,
4346 THREAD_UNINT, NULL, 0);
4347 vm_fault(current_map(),
4348 vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount,
4349 vm_map_page_mask(current_map())),
4350 VM_PROT_READ | VM_PROT_WRITE,
4351 FALSE,
4352 THREAD_UNINT, NULL, 0);
316670eb
A
4353 }
4354 }
4355}