]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_exec.c
xnu-2422.90.20.tar.gz
[apple/xnu.git] / bsd / kern / kern_exec.c
1 /*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Mach Operating System
31 * Copyright (c) 1987 Carnegie-Mellon University
32 * All rights reserved. The CMU software License Agreement specifies
33 * the terms and conditions for use and redistribution.
34 */
35
36 #include <cputypes.h>
37
38 /*-
39 * Copyright (c) 1982, 1986, 1991, 1993
40 * The Regents of the University of California. All rights reserved.
41 * (c) UNIX System Laboratories, Inc.
42 * All or some portions of this file are derived from material licensed
43 * to the University of California by American Telephone and Telegraph
44 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
45 * the permission of UNIX System Laboratories, Inc.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: @(#)kern_exec.c 8.1 (Berkeley) 6/10/93
76 */
77 /*
78 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
79 * support for mandatory and extensible security protections. This notice
80 * is included in support of clause 2.2 (b) of the Apple Public License,
81 * Version 2.0.
82 */
83 #include <machine/reg.h>
84 #include <machine/cpu_capabilities.h>
85
86 #include <sys/param.h>
87 #include <sys/systm.h>
88 #include <sys/filedesc.h>
89 #include <sys/kernel.h>
90 #include <sys/proc_internal.h>
91 #include <sys/kauth.h>
92 #include <sys/user.h>
93 #include <sys/socketvar.h>
94 #include <sys/malloc.h>
95 #include <sys/namei.h>
96 #include <sys/mount_internal.h>
97 #include <sys/vnode_internal.h>
98 #include <sys/file_internal.h>
99 #include <sys/stat.h>
100 #include <sys/uio_internal.h>
101 #include <sys/acct.h>
102 #include <sys/exec.h>
103 #include <sys/kdebug.h>
104 #include <sys/signal.h>
105 #include <sys/aio_kern.h>
106 #include <sys/sysproto.h>
107 #if SYSV_SHM
108 #include <sys/shm_internal.h> /* shmexec() */
109 #endif
110 #include <sys/ubc_internal.h> /* ubc_map() */
111 #include <sys/spawn.h>
112 #include <sys/spawn_internal.h>
113 #include <sys/process_policy.h>
114 #include <sys/codesign.h>
115 #include <crypto/sha1.h>
116
117 #include <libkern/libkern.h>
118
119 #include <security/audit/audit.h>
120
121 #include <ipc/ipc_types.h>
122
123 #include <mach/mach_types.h>
124 #include <mach/port.h>
125 #include <mach/task.h>
126 #include <mach/task_access.h>
127 #include <mach/thread_act.h>
128 #include <mach/vm_map.h>
129 #include <mach/mach_vm.h>
130 #include <mach/vm_param.h>
131
132 #include <kern/sched_prim.h> /* thread_wakeup() */
133 #include <kern/affinity.h>
134 #include <kern/assert.h>
135 #include <kern/task.h>
136
137 #if CONFIG_MACF
138 #include <security/mac.h>
139 #include <security/mac_mach_internal.h>
140 #endif
141
142 #include <vm/vm_map.h>
143 #include <vm/vm_kern.h>
144 #include <vm/vm_protos.h>
145 #include <vm/vm_kern.h>
146 #include <vm/vm_fault.h>
147 #include <vm/vm_pageout.h>
148
149 #include <kdp/kdp_dyld.h>
150
151 #include <machine/pal_routines.h>
152
153 #include <pexpert/pexpert.h>
154
155 #if CONFIG_MEMORYSTATUS
156 #include <sys/kern_memorystatus.h>
157 #endif
158
159 #if CONFIG_DTRACE
160 /* Do not include dtrace.h, it redefines kmem_[alloc/free] */
161 extern void (*dtrace_fasttrap_exec_ptr)(proc_t);
162 extern void (*dtrace_helpers_cleanup)(proc_t);
163 extern void dtrace_lazy_dofs_destroy(proc_t);
164
165 #include <sys/dtrace_ptss.h>
166 #endif
167
168 /* support for child creation in exec after vfork */
169 thread_t fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int is64bit);
170 void vfork_exit(proc_t p, int rv);
171 int setsigvec(proc_t, thread_t, int, struct __kern_sigaction *, boolean_t in_sigstart);
172 extern void proc_apply_task_networkbg_internal(proc_t, thread_t);
173
174 /*
175 * Mach things for which prototypes are unavailable from Mach headers
176 */
177 void ipc_task_reset(
178 task_t task);
179 void ipc_thread_reset(
180 thread_t thread);
181 kern_return_t ipc_object_copyin(
182 ipc_space_t space,
183 mach_port_name_t name,
184 mach_msg_type_name_t msgt_name,
185 ipc_object_t *objectp);
186 void ipc_port_release_send(ipc_port_t);
187
188 extern struct savearea *get_user_regs(thread_t);
189
190
191 #include <kern/thread.h>
192 #include <kern/task.h>
193 #include <kern/ast.h>
194 #include <kern/mach_loader.h>
195 #include <kern/mach_fat.h>
196 #include <mach-o/fat.h>
197 #include <mach-o/loader.h>
198 #include <machine/vmparam.h>
199 #include <sys/imgact.h>
200
201 #include <sys/sdt.h>
202
203
204 /*
205 * EAI_ITERLIMIT The maximum number of times to iterate an image
206 * activator in exec_activate_image() before treating
207 * it as malformed/corrupt.
208 */
209 #define EAI_ITERLIMIT 10
210
211 /*
212 * For #! interpreter parsing
213 */
214 #define IS_WHITESPACE(ch) ((ch == ' ') || (ch == '\t'))
215 #define IS_EOL(ch) ((ch == '#') || (ch == '\n'))
216
217 extern vm_map_t bsd_pageable_map;
218 extern const struct fileops vnops;
219
220 #define ROUND_PTR(type, addr) \
221 (type *)( ( (uintptr_t)(addr) + 16 - 1) \
222 & ~(16 - 1) )
223
224 struct image_params; /* Forward */
225 static int exec_activate_image(struct image_params *imgp);
226 static int exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp);
227 static int load_return_to_errno(load_return_t lrtn);
228 static int execargs_alloc(struct image_params *imgp);
229 static int execargs_free(struct image_params *imgp);
230 static int exec_check_permissions(struct image_params *imgp);
231 static int exec_extract_strings(struct image_params *imgp);
232 static int exec_add_apple_strings(struct image_params *imgp);
233 static int exec_handle_sugid(struct image_params *imgp);
234 static int sugid_scripts = 0;
235 SYSCTL_INT (_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW | CTLFLAG_LOCKED, &sugid_scripts, 0, "");
236 static kern_return_t create_unix_stack(vm_map_t map, load_result_t* load_result, proc_t p);
237 static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size);
238 static void exec_resettextvp(proc_t, struct image_params *);
239 static int check_for_signature(proc_t, struct image_params *);
240 static void exec_prefault_data(proc_t, struct image_params *, load_result_t *);
241 static errno_t exec_handle_port_actions(struct image_params *imgp, short psa_flags, int * need_portwatch, ipc_port_t * portwatch);
242 static errno_t exec_handle_spawnattr_apptype(proc_t p, int psa_apptype);
243
244 /*
245 * exec_add_user_string
246 *
247 * Add the requested string to the string space area.
248 *
249 * Parameters; struct image_params * image parameter block
250 * user_addr_t string to add to strings area
251 * int segment from which string comes
252 * boolean_t TRUE if string contributes to NCARGS
253 *
254 * Returns: 0 Success
255 * !0 Failure errno from copyinstr()
256 *
257 * Implicit returns:
258 * (imgp->ip_strendp) updated location of next add, if any
259 * (imgp->ip_strspace) updated byte count of space remaining
260 * (imgp->ip_argspace) updated byte count of space in NCARGS
261 */
262 static int
263 exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolean_t is_ncargs)
264 {
265 int error = 0;
266
267 do {
268 size_t len = 0;
269 int space;
270
271 if (is_ncargs)
272 space = imgp->ip_argspace; /* by definition smaller than ip_strspace */
273 else
274 space = imgp->ip_strspace;
275
276 if (space <= 0) {
277 error = E2BIG;
278 break;
279 }
280
281 if (!UIO_SEG_IS_USER_SPACE(seg)) {
282 char *kstr = CAST_DOWN(char *,str); /* SAFE */
283 error = copystr(kstr, imgp->ip_strendp, space, &len);
284 } else {
285 error = copyinstr(str, imgp->ip_strendp, space, &len);
286 }
287
288 imgp->ip_strendp += len;
289 imgp->ip_strspace -= len;
290 if (is_ncargs)
291 imgp->ip_argspace -= len;
292
293 } while (error == ENAMETOOLONG);
294
295 return error;
296 }
297
298 /*
299 * exec_save_path
300 *
301 * To support new app package launching for Mac OS X, the dyld needs the
302 * first argument to execve() stored on the user stack.
303 *
304 * Save the executable path name at the bottom of the strings area and set
305 * the argument vector pointer to the location following that to indicate
306 * the start of the argument and environment tuples, setting the remaining
307 * string space count to the size of the string area minus the path length.
308 *
309 * Parameters; struct image_params * image parameter block
310 * char * path used to invoke program
311 * int segment from which path comes
312 *
313 * Returns: int 0 Success
314 * EFAULT Bad address
315 * copy[in]str:EFAULT Bad address
316 * copy[in]str:ENAMETOOLONG Filename too long
317 *
318 * Implicit returns:
319 * (imgp->ip_strings) saved path
320 * (imgp->ip_strspace) space remaining in ip_strings
321 * (imgp->ip_strendp) start of remaining copy area
322 * (imgp->ip_argspace) space remaining of NCARGS
323 * (imgp->ip_applec) Initial applev[0]
324 *
325 * Note: We have to do this before the initial namei() since in the
326 * path contains symbolic links, namei() will overwrite the
327 * original path buffer contents. If the last symbolic link
328 * resolved was a relative pathname, we would lose the original
329 * "path", which could be an absolute pathname. This might be
330 * unacceptable for dyld.
331 */
332 static int
333 exec_save_path(struct image_params *imgp, user_addr_t path, int seg)
334 {
335 int error;
336 size_t len;
337 char *kpath;
338
339 len = MIN(MAXPATHLEN, imgp->ip_strspace);
340
341 switch(seg) {
342 case UIO_USERSPACE32:
343 case UIO_USERSPACE64: /* Same for copyin()... */
344 error = copyinstr(path, imgp->ip_strings, len, &len);
345 break;
346 case UIO_SYSSPACE:
347 kpath = CAST_DOWN(char *,path); /* SAFE */
348 error = copystr(kpath, imgp->ip_strings, len, &len);
349 break;
350 default:
351 error = EFAULT;
352 break;
353 }
354
355 if (!error) {
356 imgp->ip_strendp += len;
357 imgp->ip_strspace -= len;
358 }
359
360 return(error);
361 }
362
363 /*
364 * exec_reset_save_path
365 *
366 * If we detect a shell script, we need to reset the string area
367 * state so that the interpreter can be saved onto the stack.
368
369 * Parameters; struct image_params * image parameter block
370 *
371 * Returns: int 0 Success
372 *
373 * Implicit returns:
374 * (imgp->ip_strings) saved path
375 * (imgp->ip_strspace) space remaining in ip_strings
376 * (imgp->ip_strendp) start of remaining copy area
377 * (imgp->ip_argspace) space remaining of NCARGS
378 *
379 */
380 static int
381 exec_reset_save_path(struct image_params *imgp)
382 {
383 imgp->ip_strendp = imgp->ip_strings;
384 imgp->ip_argspace = NCARGS;
385 imgp->ip_strspace = ( NCARGS + PAGE_SIZE );
386
387 return (0);
388 }
389
390 /*
391 * exec_shell_imgact
392 *
393 * Image activator for interpreter scripts. If the image begins with the
394 * characters "#!", then it is an interpreter script. Verify that we are
395 * not already executing in PowerPC mode, and that the length of the script
396 * line indicating the interpreter is not in excess of the maximum allowed
397 * size. If this is the case, then break out the arguments, if any, which
398 * are separated by white space, and copy them into the argument save area
399 * as if they were provided on the command line before all other arguments.
400 * The line ends when we encounter a comment character ('#') or newline.
401 *
402 * Parameters; struct image_params * image parameter block
403 *
404 * Returns: -1 not an interpreter (keep looking)
405 * -3 Success: interpreter: relookup
406 * >0 Failure: interpreter: error number
407 *
408 * A return value other than -1 indicates subsequent image activators should
409 * not be given the opportunity to attempt to activate the image.
410 */
411 static int
412 exec_shell_imgact(struct image_params *imgp)
413 {
414 char *vdata = imgp->ip_vdata;
415 char *ihp;
416 char *line_startp, *line_endp;
417 char *interp;
418 proc_t p;
419 struct fileproc *fp;
420 int fd;
421 int error;
422
423 /*
424 * Make sure it's a shell script. If we've already redirected
425 * from an interpreted file once, don't do it again.
426 *
427 * Note: We disallow PowerPC, since the expectation is that we
428 * may run a PowerPC interpreter, but not an interpret a PowerPC
429 * image. This is consistent with historical behaviour.
430 */
431 if (vdata[0] != '#' ||
432 vdata[1] != '!' ||
433 (imgp->ip_flags & IMGPF_INTERPRET) != 0) {
434 return (-1);
435 }
436
437 imgp->ip_flags |= IMGPF_INTERPRET;
438 imgp->ip_interp_sugid_fd = -1;
439 imgp->ip_interp_buffer[0] = '\0';
440
441 /* Check to see if SUGID scripts are permitted. If they aren't then
442 * clear the SUGID bits.
443 * imgp->ip_vattr is known to be valid.
444 */
445 if (sugid_scripts == 0) {
446 imgp->ip_origvattr->va_mode &= ~(VSUID | VSGID);
447 }
448
449 /* Try to find the first non-whitespace character */
450 for( ihp = &vdata[2]; ihp < &vdata[IMG_SHSIZE]; ihp++ ) {
451 if (IS_EOL(*ihp)) {
452 /* Did not find interpreter, "#!\n" */
453 return (ENOEXEC);
454 } else if (IS_WHITESPACE(*ihp)) {
455 /* Whitespace, like "#! /bin/sh\n", keep going. */
456 } else {
457 /* Found start of interpreter */
458 break;
459 }
460 }
461
462 if (ihp == &vdata[IMG_SHSIZE]) {
463 /* All whitespace, like "#! " */
464 return (ENOEXEC);
465 }
466
467 line_startp = ihp;
468
469 /* Try to find the end of the interpreter+args string */
470 for ( ; ihp < &vdata[IMG_SHSIZE]; ihp++ ) {
471 if (IS_EOL(*ihp)) {
472 /* Got it */
473 break;
474 } else {
475 /* Still part of interpreter or args */
476 }
477 }
478
479 if (ihp == &vdata[IMG_SHSIZE]) {
480 /* A long line, like "#! blah blah blah" without end */
481 return (ENOEXEC);
482 }
483
484 /* Backtrack until we find the last non-whitespace */
485 while (IS_EOL(*ihp) || IS_WHITESPACE(*ihp)) {
486 ihp--;
487 }
488
489 /* The character after the last non-whitespace is our logical end of line */
490 line_endp = ihp + 1;
491
492 /*
493 * Now we have pointers to the usable part of:
494 *
495 * "#! /usr/bin/int first second third \n"
496 * ^ line_startp ^ line_endp
497 */
498
499 /* copy the interpreter name */
500 interp = imgp->ip_interp_buffer;
501 for ( ihp = line_startp; (ihp < line_endp) && !IS_WHITESPACE(*ihp); ihp++)
502 *interp++ = *ihp;
503 *interp = '\0';
504
505 exec_reset_save_path(imgp);
506 exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer),
507 UIO_SYSSPACE);
508
509 /* Copy the entire interpreter + args for later processing into argv[] */
510 interp = imgp->ip_interp_buffer;
511 for ( ihp = line_startp; (ihp < line_endp); ihp++)
512 *interp++ = *ihp;
513 *interp = '\0';
514
515 /*
516 * If we have a SUID oder SGID script, create a file descriptor
517 * from the vnode and pass /dev/fd/%d instead of the actual
518 * path name so that the script does not get opened twice
519 */
520 if (imgp->ip_origvattr->va_mode & (VSUID | VSGID)) {
521 p = vfs_context_proc(imgp->ip_vfs_context);
522 error = falloc(p, &fp, &fd, imgp->ip_vfs_context);
523 if (error)
524 return(error);
525
526 fp->f_fglob->fg_flag = FREAD;
527 fp->f_fglob->fg_ops = &vnops;
528 fp->f_fglob->fg_data = (caddr_t)imgp->ip_vp;
529
530 proc_fdlock(p);
531 procfdtbl_releasefd(p, fd, NULL);
532 fp_drop(p, fd, fp, 1);
533 proc_fdunlock(p);
534 vnode_ref(imgp->ip_vp);
535
536 imgp->ip_interp_sugid_fd = fd;
537 }
538
539 return (-3);
540 }
541
542
543
544 /*
545 * exec_fat_imgact
546 *
547 * Image activator for fat 1.0 binaries. If the binary is fat, then we
548 * need to select an image from it internally, and make that the image
549 * we are going to attempt to execute. At present, this consists of
550 * reloading the first page for the image with a first page from the
551 * offset location indicated by the fat header.
552 *
553 * Parameters; struct image_params * image parameter block
554 *
555 * Returns: -1 not a fat binary (keep looking)
556 * -2 Success: encapsulated binary: reread
557 * >0 Failure: error number
558 *
559 * Important: This image activator is byte order neutral.
560 *
561 * Note: A return value other than -1 indicates subsequent image
562 * activators should not be given the opportunity to attempt
563 * to activate the image.
564 *
565 * If we find an encapsulated binary, we make no assertions
566 * about its validity; instead, we leave that up to a rescan
567 * for an activator to claim it, and, if it is claimed by one,
568 * that activator is responsible for determining validity.
569 */
570 static int
571 exec_fat_imgact(struct image_params *imgp)
572 {
573 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
574 kauth_cred_t cred = kauth_cred_proc_ref(p);
575 struct fat_header *fat_header = (struct fat_header *)imgp->ip_vdata;
576 struct _posix_spawnattr *psa = NULL;
577 struct fat_arch fat_arch;
578 int resid, error;
579 load_return_t lret;
580
581 /* Make sure it's a fat binary */
582 if ((fat_header->magic != FAT_MAGIC) &&
583 (fat_header->magic != FAT_CIGAM)) {
584 error = -1;
585 goto bad;
586 }
587
588 /* If posix_spawn binprefs exist, respect those prefs. */
589 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
590 if (psa != NULL && psa->psa_binprefs[0] != 0) {
591 struct fat_arch *arches = (struct fat_arch *) (fat_header + 1);
592 int nfat_arch = 0, pr = 0, f = 0;
593
594 nfat_arch = OSSwapBigToHostInt32(fat_header->nfat_arch);
595 /* Check each preference listed against all arches in header */
596 for (pr = 0; pr < NBINPREFS; pr++) {
597 cpu_type_t pref = psa->psa_binprefs[pr];
598 if (pref == 0) {
599 /* No suitable arch in the pref list */
600 error = EBADARCH;
601 goto bad;
602 }
603
604 if (pref == CPU_TYPE_ANY) {
605 /* Fall through to regular grading */
606 break;
607 }
608
609 for (f = 0; f < nfat_arch; f++) {
610 cpu_type_t archtype = OSSwapBigToHostInt32(
611 arches[f].cputype);
612 cpu_type_t archsubtype = OSSwapBigToHostInt32(
613 arches[f].cpusubtype) & ~CPU_SUBTYPE_MASK;
614 if (pref == archtype &&
615 grade_binary(archtype, archsubtype)) {
616 /* We have a winner! */
617 fat_arch.cputype = archtype;
618 fat_arch.cpusubtype = archsubtype;
619 fat_arch.offset = OSSwapBigToHostInt32(
620 arches[f].offset);
621 fat_arch.size = OSSwapBigToHostInt32(
622 arches[f].size);
623 fat_arch.align = OSSwapBigToHostInt32(
624 arches[f].align);
625 goto use_arch;
626 }
627 }
628 }
629 }
630
631 /* Look up our preferred architecture in the fat file. */
632 lret = fatfile_getarch_affinity(imgp->ip_vp,
633 (vm_offset_t)fat_header,
634 &fat_arch,
635 (p->p_flag & P_AFFINITY));
636 if (lret != LOAD_SUCCESS) {
637 error = load_return_to_errno(lret);
638 goto bad;
639 }
640
641 use_arch:
642 /* Read the Mach-O header out of fat_arch */
643 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata,
644 PAGE_SIZE, fat_arch.offset,
645 UIO_SYSSPACE, (IO_UNIT|IO_NODELOCKED),
646 cred, &resid, p);
647 if (error) {
648 goto bad;
649 }
650
651 /* Did we read a complete header? */
652 if (resid) {
653 error = EBADEXEC;
654 goto bad;
655 }
656
657 /* Success. Indicate we have identified an encapsulated binary */
658 error = -2;
659 imgp->ip_arch_offset = (user_size_t)fat_arch.offset;
660 imgp->ip_arch_size = (user_size_t)fat_arch.size;
661
662 bad:
663 kauth_cred_unref(&cred);
664 return (error);
665 }
666
667 /*
668 * exec_mach_imgact
669 *
670 * Image activator for mach-o 1.0 binaries.
671 *
672 * Parameters; struct image_params * image parameter block
673 *
674 * Returns: -1 not a fat binary (keep looking)
675 * -2 Success: encapsulated binary: reread
676 * >0 Failure: error number
677 * EBADARCH Mach-o binary, but with an unrecognized
678 * architecture
679 * ENOMEM No memory for child process after -
680 * can only happen after vfork()
681 *
682 * Important: This image activator is NOT byte order neutral.
683 *
684 * Note: A return value other than -1 indicates subsequent image
685 * activators should not be given the opportunity to attempt
686 * to activate the image.
687 *
688 * TODO: More gracefully handle failures after vfork
689 */
690 static int
691 exec_mach_imgact(struct image_params *imgp)
692 {
693 struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
694 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
695 int error = 0;
696 int vfexec = 0;
697 task_t task;
698 task_t new_task = NULL; /* protected by vfexec */
699 thread_t thread;
700 struct uthread *uthread;
701 vm_map_t old_map = VM_MAP_NULL;
702 vm_map_t map;
703 load_return_t lret;
704 load_result_t load_result;
705 struct _posix_spawnattr *psa = NULL;
706 int spawn = (imgp->ip_flags & IMGPF_SPAWN);
707
708 /*
709 * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
710 * is a reserved field on the end, so for the most part, we can
711 * treat them as if they were identical. Reverse-endian Mach-O
712 * binaries are recognized but not compatible.
713 */
714 if ((mach_header->magic == MH_CIGAM) ||
715 (mach_header->magic == MH_CIGAM_64)) {
716 error = EBADARCH;
717 goto bad;
718 }
719
720 if ((mach_header->magic != MH_MAGIC) &&
721 (mach_header->magic != MH_MAGIC_64)) {
722 error = -1;
723 goto bad;
724 }
725
726 switch (mach_header->filetype) {
727 case MH_DYLIB:
728 case MH_BUNDLE:
729 error = -1;
730 goto bad;
731 }
732
733 if (!imgp->ip_origcputype) {
734 imgp->ip_origcputype = mach_header->cputype;
735 imgp->ip_origcpusubtype = mach_header->cpusubtype;
736 }
737
738 task = current_task();
739 thread = current_thread();
740 uthread = get_bsdthread_info(thread);
741
742 /*
743 * Save off the vfexec state up front; we have to do this, because
744 * we need to know if we were in this state initially subsequent to
745 * creating the backing task, thread, and uthread for the child
746 * process (from the vfs_context_t from in img_parms).
747 */
748 if (uthread->uu_flag & UT_VFORK)
749 vfexec = 1; /* Mark in exec */
750
751 if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64)
752 imgp->ip_flags |= IMGPF_IS_64BIT;
753
754 /* If posix_spawn binprefs exist, respect those prefs. */
755 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
756 if (psa != NULL && psa->psa_binprefs[0] != 0) {
757 int pr = 0;
758 for (pr = 0; pr < NBINPREFS; pr++) {
759 cpu_type_t pref = psa->psa_binprefs[pr];
760 if (pref == 0) {
761 /* No suitable arch in the pref list */
762 error = EBADARCH;
763 goto bad;
764 }
765
766 if (pref == CPU_TYPE_ANY) {
767 /* Jump to regular grading */
768 goto grade;
769 }
770
771 if (pref == imgp->ip_origcputype) {
772 /* We have a match! */
773 goto grade;
774 }
775 }
776 error = EBADARCH;
777 goto bad;
778 }
779 grade:
780 if (!grade_binary(imgp->ip_origcputype, imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK)) {
781 error = EBADARCH;
782 goto bad;
783 }
784
785 /* Copy in arguments/environment from the old process */
786 error = exec_extract_strings(imgp);
787 if (error)
788 goto bad;
789
790 error = exec_add_apple_strings(imgp);
791 if (error)
792 goto bad;
793
794 AUDIT_ARG(argv, imgp->ip_startargv, imgp->ip_argc,
795 imgp->ip_endargv - imgp->ip_startargv);
796 AUDIT_ARG(envv, imgp->ip_endargv, imgp->ip_envc,
797 imgp->ip_endenvv - imgp->ip_endargv);
798
799 /*
800 * We are being called to activate an image subsequent to a vfork()
801 * operation; in this case, we know that our task, thread, and
802 * uthread are actually those of our parent, and our proc, which we
803 * obtained indirectly from the image_params vfs_context_t, is the
804 * new child process.
805 */
806 if (vfexec || spawn) {
807 if (vfexec) {
808 imgp->ip_new_thread = fork_create_child(task, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT));
809 if (imgp->ip_new_thread == NULL) {
810 error = ENOMEM;
811 goto bad;
812 }
813 }
814
815 /* reset local idea of thread, uthread, task */
816 thread = imgp->ip_new_thread;
817 uthread = get_bsdthread_info(thread);
818 task = new_task = get_threadtask(thread);
819 map = get_task_map(task);
820 } else {
821 map = VM_MAP_NULL;
822 }
823
824 /*
825 * We set these flags here; this is OK, since if we fail after
826 * this point, we have already destroyed the parent process anyway.
827 */
828 task_set_dyld_info(task, MACH_VM_MIN_ADDRESS, 0);
829 if (imgp->ip_flags & IMGPF_IS_64BIT) {
830 task_set_64bit(task, TRUE);
831 OSBitOrAtomic(P_LP64, &p->p_flag);
832 } else {
833 task_set_64bit(task, FALSE);
834 OSBitAndAtomic(~((uint32_t)P_LP64), &p->p_flag);
835 }
836
837 /*
838 * Load the Mach-O file.
839 *
840 * NOTE: An error after this point indicates we have potentially
841 * destroyed or overwritten some process state while attempting an
842 * execve() following a vfork(), which is an unrecoverable condition.
843 */
844
845 /*
846 * Actually load the image file we previously decided to load.
847 */
848 lret = load_machfile(imgp, mach_header, thread, map, &load_result);
849
850 if (lret != LOAD_SUCCESS) {
851 error = load_return_to_errno(lret);
852 goto badtoolate;
853 }
854
855 vm_map_set_user_wire_limit(get_task_map(task), p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
856
857 /*
858 * Set code-signing flags if this binary is signed, or if parent has
859 * requested them on exec.
860 */
861 if (load_result.csflags & CS_VALID) {
862 imgp->ip_csflags |= load_result.csflags &
863 (CS_VALID|
864 CS_HARD|CS_KILL|CS_ENFORCEMENT|
865 CS_EXEC_SET_HARD|CS_EXEC_SET_KILL|CS_EXEC_SET_ENFORCEMENT);
866 } else {
867 imgp->ip_csflags &= ~CS_VALID;
868 }
869
870 if (p->p_csflags & CS_EXEC_SET_HARD)
871 imgp->ip_csflags |= CS_HARD;
872 if (p->p_csflags & CS_EXEC_SET_KILL)
873 imgp->ip_csflags |= CS_KILL;
874 if (p->p_csflags & CS_EXEC_SET_ENFORCEMENT)
875 imgp->ip_csflags |= CS_ENFORCEMENT;
876
877
878 /*
879 * Set up the system reserved areas in the new address space.
880 */
881 vm_map_exec(get_task_map(task),
882 task,
883 (void *) p->p_fd->fd_rdir,
884 cpu_type());
885
886 /*
887 * Close file descriptors which specify close-on-exec.
888 */
889 fdexec(p, psa != NULL ? psa->psa_flags : 0);
890
891 /*
892 * deal with set[ug]id.
893 */
894 error = exec_handle_sugid(imgp);
895
896 /* Make sure we won't interrupt ourself signalling a partial process */
897 if (!vfexec && !spawn && (p->p_lflag & P_LTRACED))
898 psignal(p, SIGTRAP);
899
900 if (error) {
901 goto badtoolate;
902 }
903
904 if (load_result.unixproc &&
905 create_unix_stack(get_task_map(task),
906 &load_result,
907 p) != KERN_SUCCESS) {
908 error = load_return_to_errno(LOAD_NOSPACE);
909 goto badtoolate;
910 }
911
912 if (vfexec || spawn) {
913 old_map = vm_map_switch(get_task_map(task));
914 }
915
916 if (load_result.unixproc) {
917 user_addr_t ap;
918
919 /*
920 * Copy the strings area out into the new process address
921 * space.
922 */
923 ap = p->user_stack;
924 error = exec_copyout_strings(imgp, &ap);
925 if (error) {
926 if (vfexec || spawn)
927 vm_map_switch(old_map);
928 goto badtoolate;
929 }
930 /* Set the stack */
931 thread_setuserstack(thread, ap);
932 }
933
934 if (load_result.dynlinker) {
935 uint64_t ap;
936 int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
937
938 /* Adjust the stack */
939 ap = thread_adjuserstack(thread, -new_ptr_size);
940 error = copyoutptr(load_result.mach_header, ap, new_ptr_size);
941
942 if (error) {
943 if (vfexec || spawn)
944 vm_map_switch(old_map);
945 goto badtoolate;
946 }
947 task_set_dyld_info(task, load_result.all_image_info_addr,
948 load_result.all_image_info_size);
949 }
950
951 /* Avoid immediate VM faults back into kernel */
952 exec_prefault_data(p, imgp, &load_result);
953
954 if (vfexec || spawn) {
955 vm_map_switch(old_map);
956 }
957 /* Set the entry point */
958 thread_setentrypoint(thread, load_result.entry_point);
959
960 /* Stop profiling */
961 stopprofclock(p);
962
963 /*
964 * Reset signal state.
965 */
966 execsigs(p, thread);
967
968 /*
969 * need to cancel async IO requests that can be cancelled and wait for those
970 * already active. MAY BLOCK!
971 */
972 _aio_exec( p );
973
974 #if SYSV_SHM
975 /* FIXME: Till vmspace inherit is fixed: */
976 if (!vfexec && p->vm_shm)
977 shmexec(p);
978 #endif
979 #if SYSV_SEM
980 /* Clean up the semaphores */
981 semexit(p);
982 #endif
983
984 /*
985 * Remember file name for accounting.
986 */
987 p->p_acflag &= ~AFORK;
988 /* If the translated name isn't NULL, then we want to use
989 * that translated name as the name we show as the "real" name.
990 * Otherwise, use the name passed into exec.
991 */
992 if (0 != imgp->ip_p_comm[0]) {
993 bcopy((caddr_t)imgp->ip_p_comm, (caddr_t)p->p_comm,
994 sizeof(p->p_comm));
995 } else {
996 if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN)
997 imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
998 bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
999 (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
1000 p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
1001 }
1002
1003 pal_dbg_set_task_name( p->task );
1004
1005 memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid));
1006
1007 // <rdar://6598155> dtrace code cleanup needed
1008 #if CONFIG_DTRACE
1009 /*
1010 * Invalidate any predicate evaluation already cached for this thread by DTrace.
1011 * That's because we've just stored to p_comm and DTrace refers to that when it
1012 * evaluates the "execname" special variable. uid and gid may have changed as well.
1013 */
1014 dtrace_set_thread_predcache(current_thread(), 0);
1015
1016 /*
1017 * Free any outstanding lazy dof entries. It is imperative we
1018 * always call dtrace_lazy_dofs_destroy, rather than null check
1019 * and call if !NULL. If we NULL test, during lazy dof faulting
1020 * we can race with the faulting code and proceed from here to
1021 * beyond the helpers cleanup. The lazy dof faulting will then
1022 * install new helpers which no longer belong to this process!
1023 */
1024 dtrace_lazy_dofs_destroy(p);
1025
1026
1027 /*
1028 * Clean up any DTrace helpers for the process.
1029 */
1030 if (p->p_dtrace_helpers != NULL && dtrace_helpers_cleanup) {
1031 (*dtrace_helpers_cleanup)(p);
1032 }
1033
1034 /*
1035 * Cleanup the DTrace provider associated with this process.
1036 */
1037 proc_lock(p);
1038 if (p->p_dtrace_probes && dtrace_fasttrap_exec_ptr) {
1039 (*dtrace_fasttrap_exec_ptr)(p);
1040 }
1041 proc_unlock(p);
1042 #endif
1043
1044 if (kdebug_enable) {
1045 long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
1046
1047 /*
1048 * Collect the pathname for tracing
1049 */
1050 kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
1051
1052 if (vfexec || spawn) {
1053 KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE,
1054 p->p_pid ,0,0,0, (uintptr_t)thread_tid(thread));
1055 KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
1056 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (uintptr_t)thread_tid(thread));
1057 } else {
1058 KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE,
1059 p->p_pid ,0,0,0,0);
1060 KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
1061 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
1062 }
1063 }
1064
1065 /*
1066 * Ensure the 'translated' and 'affinity' flags are cleared, since we
1067 * no longer run PowerPC binaries.
1068 */
1069 OSBitAndAtomic(~((uint32_t)(P_TRANSLATED | P_AFFINITY)), &p->p_flag);
1070
1071 /*
1072 * If posix_spawned with the START_SUSPENDED flag, stop the
1073 * process before it runs.
1074 */
1075 if (imgp->ip_px_sa != NULL) {
1076 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
1077 if (psa->psa_flags & POSIX_SPAWN_START_SUSPENDED) {
1078 proc_lock(p);
1079 p->p_stat = SSTOP;
1080 proc_unlock(p);
1081 (void) task_suspend(p->task);
1082 }
1083 }
1084
1085 /*
1086 * Apply the apptype state (which primes the task for importance donation)
1087 * This must be done after the exec so that the child's thread is ready
1088 */
1089 if (imgp->ip_px_sa != NULL) {
1090 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
1091 exec_handle_spawnattr_apptype(p, psa->psa_apptype);
1092 }
1093
1094 /*
1095 * mark as execed, wakeup the process that vforked (if any) and tell
1096 * it that it now has its own resources back
1097 */
1098 OSBitOrAtomic(P_EXEC, &p->p_flag);
1099 proc_resetregister(p);
1100 if (p->p_pptr && (p->p_lflag & P_LPPWAIT)) {
1101 proc_lock(p);
1102 p->p_lflag &= ~P_LPPWAIT;
1103 proc_unlock(p);
1104 wakeup((caddr_t)p->p_pptr);
1105 }
1106
1107 /*
1108 * Pay for our earlier safety; deliver the delayed signals from
1109 * the incomplete vfexec process now that it's complete.
1110 */
1111 if (vfexec && (p->p_lflag & P_LTRACED)) {
1112 psignal_vfork(p, new_task, thread, SIGTRAP);
1113 }
1114
1115 badtoolate:
1116 if (!spawn)
1117 /* notify only if it has not failed due to FP Key error */
1118 if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)
1119 proc_knote(p, NOTE_EXEC);
1120
1121 if (vfexec || spawn) {
1122 task_deallocate(new_task);
1123 thread_deallocate(thread);
1124 if (error)
1125 error = 0;
1126 }
1127
1128 bad:
1129 return(error);
1130 }
1131
1132
1133
1134
1135 /*
1136 * Our image activator table; this is the table of the image types we are
1137 * capable of loading. We list them in order of preference to ensure the
1138 * fastest image load speed.
1139 *
1140 * XXX hardcoded, for now; should use linker sets
1141 */
1142 struct execsw {
1143 int (*ex_imgact)(struct image_params *);
1144 const char *ex_name;
1145 } execsw[] = {
1146 { exec_mach_imgact, "Mach-o Binary" },
1147 { exec_fat_imgact, "Fat Binary" },
1148 { exec_shell_imgact, "Interpreter Script" },
1149 { NULL, NULL}
1150 };
1151
1152
1153 /*
1154 * exec_activate_image
1155 *
1156 * Description: Iterate through the available image activators, and activate
1157 * the image associated with the imgp structure. We start with
1158 * the
1159 *
1160 * Parameters: struct image_params * Image parameter block
1161 *
1162 * Returns: 0 Success
1163 * EBADEXEC The executable is corrupt/unknown
1164 * execargs_alloc:EINVAL Invalid argument
1165 * execargs_alloc:EACCES Permission denied
1166 * execargs_alloc:EINTR Interrupted function
1167 * execargs_alloc:ENOMEM Not enough space
1168 * exec_save_path:EFAULT Bad address
1169 * exec_save_path:ENAMETOOLONG Filename too long
1170 * exec_check_permissions:EACCES Permission denied
1171 * exec_check_permissions:ENOEXEC Executable file format error
1172 * exec_check_permissions:ETXTBSY Text file busy [misuse of error code]
1173 * exec_check_permissions:???
1174 * namei:???
1175 * vn_rdwr:??? [anything vn_rdwr can return]
1176 * <ex_imgact>:??? [anything an imgact can return]
1177 */
1178 static int
1179 exec_activate_image(struct image_params *imgp)
1180 {
1181 struct nameidata nd;
1182 int error;
1183 int resid;
1184 int once = 1; /* save SGUID-ness for interpreted files */
1185 int i;
1186 int iterlimit = EAI_ITERLIMIT;
1187 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1188
1189 error = execargs_alloc(imgp);
1190 if (error)
1191 goto bad_notrans;
1192
1193 error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg);
1194 if (error) {
1195 goto bad_notrans;
1196 }
1197
1198 /* Use imgp->ip_strings, which contains the copyin-ed exec path */
1199 DTRACE_PROC1(exec, uintptr_t, imgp->ip_strings);
1200
1201 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
1202 UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context);
1203
1204 again:
1205 error = namei(&nd);
1206 if (error)
1207 goto bad_notrans;
1208 imgp->ip_ndp = &nd; /* successful namei(); call nameidone() later */
1209 imgp->ip_vp = nd.ni_vp; /* if set, need to vnode_put() at some point */
1210
1211 /*
1212 * Before we start the transition from binary A to binary B, make
1213 * sure another thread hasn't started exiting the process. We grab
1214 * the proc lock to check p_lflag initially, and the transition
1215 * mechanism ensures that the value doesn't change after we release
1216 * the lock.
1217 */
1218 proc_lock(p);
1219 if (p->p_lflag & P_LEXIT) {
1220 proc_unlock(p);
1221 goto bad_notrans;
1222 }
1223 error = proc_transstart(p, 1);
1224 proc_unlock(p);
1225 if (error)
1226 goto bad_notrans;
1227
1228 error = exec_check_permissions(imgp);
1229 if (error)
1230 goto bad;
1231
1232 /* Copy; avoid invocation of an interpreter overwriting the original */
1233 if (once) {
1234 once = 0;
1235 *imgp->ip_origvattr = *imgp->ip_vattr;
1236 }
1237
1238 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, 0,
1239 UIO_SYSSPACE, IO_NODELOCKED,
1240 vfs_context_ucred(imgp->ip_vfs_context),
1241 &resid, vfs_context_proc(imgp->ip_vfs_context));
1242 if (error)
1243 goto bad;
1244
1245 encapsulated_binary:
1246 /* Limit the number of iterations we will attempt on each binary */
1247 if (--iterlimit == 0) {
1248 error = EBADEXEC;
1249 goto bad;
1250 }
1251 error = -1;
1252 for(i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) {
1253
1254 error = (*execsw[i].ex_imgact)(imgp);
1255
1256 switch (error) {
1257 /* case -1: not claimed: continue */
1258 case -2: /* Encapsulated binary */
1259 goto encapsulated_binary;
1260
1261 case -3: /* Interpreter */
1262 #if CONFIG_MACF
1263 /*
1264 * Copy the script label for later use. Note that
1265 * the label can be different when the script is
1266 * actually read by the interpreter.
1267 */
1268 if (imgp->ip_scriptlabelp)
1269 mac_vnode_label_free(imgp->ip_scriptlabelp);
1270 imgp->ip_scriptlabelp = mac_vnode_label_alloc();
1271 if (imgp->ip_scriptlabelp == NULL) {
1272 error = ENOMEM;
1273 break;
1274 }
1275 mac_vnode_label_copy(imgp->ip_vp->v_label,
1276 imgp->ip_scriptlabelp);
1277
1278 /*
1279 * Take a ref of the script vnode for later use.
1280 */
1281 if (imgp->ip_scriptvp)
1282 vnode_put(imgp->ip_scriptvp);
1283 if (vnode_getwithref(imgp->ip_vp) == 0)
1284 imgp->ip_scriptvp = imgp->ip_vp;
1285 #endif
1286
1287 nameidone(&nd);
1288
1289 vnode_put(imgp->ip_vp);
1290 imgp->ip_vp = NULL; /* already put */
1291 imgp->ip_ndp = NULL; /* already nameidone */
1292
1293 /* Use imgp->ip_strings, which exec_shell_imgact reset to the interpreter */
1294 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF,
1295 UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context);
1296
1297 proc_transend(p, 0);
1298 goto again;
1299
1300 default:
1301 break;
1302 }
1303 }
1304
1305 /*
1306 * Call out to allow 3rd party notification of exec.
1307 * Ignore result of kauth_authorize_fileop call.
1308 */
1309 if (error == 0 && kauth_authorize_fileop_has_listeners()) {
1310 kauth_authorize_fileop(vfs_context_ucred(imgp->ip_vfs_context),
1311 KAUTH_FILEOP_EXEC,
1312 (uintptr_t)nd.ni_vp, 0);
1313 }
1314
1315 bad:
1316 proc_transend(p, 0);
1317
1318 bad_notrans:
1319 if (imgp->ip_strings)
1320 execargs_free(imgp);
1321 if (imgp->ip_ndp)
1322 nameidone(imgp->ip_ndp);
1323
1324 return (error);
1325 }
1326
1327
1328 /*
1329 * exec_handle_spawnattr_apptype
1330 *
1331 * Description: Decode and apply the posix_spawn apptype to the task.
1332 *
1333 * Parameters: proc_t p process to apply attributes to
1334 * int psa_apptype posix spawn attribute apptype
1335 *
1336 * Returns: 0 Success
1337 */
1338 static errno_t
1339 exec_handle_spawnattr_apptype(proc_t p, int psa_apptype)
1340 {
1341 if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) {
1342 int apptype = TASK_APPTYPE_NONE;
1343 int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK;
1344
1345 switch(proctype) {
1346 case POSIX_SPAWN_PROC_TYPE_DAEMON_INTERACTIVE:
1347 apptype = TASK_APPTYPE_DAEMON_INTERACTIVE;
1348 break;
1349 case POSIX_SPAWN_PROC_TYPE_DAEMON_STANDARD:
1350 apptype = TASK_APPTYPE_DAEMON_STANDARD;
1351 break;
1352 case POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE:
1353 apptype = TASK_APPTYPE_DAEMON_ADAPTIVE;
1354 break;
1355 case POSIX_SPAWN_PROC_TYPE_DAEMON_BACKGROUND:
1356 apptype = TASK_APPTYPE_DAEMON_BACKGROUND;
1357 break;
1358 case POSIX_SPAWN_PROC_TYPE_APP_DEFAULT:
1359 apptype = TASK_APPTYPE_APP_DEFAULT;
1360 break;
1361 case POSIX_SPAWN_PROC_TYPE_APP_TAL:
1362 apptype = TASK_APPTYPE_APP_TAL;
1363 break;
1364 default:
1365 apptype = TASK_APPTYPE_NONE;
1366 break;
1367 }
1368
1369 proc_set_task_apptype(p->task, apptype);
1370
1371 /* TODO: Should an invalid value here fail the spawn? */
1372 return (0);
1373 }
1374
1375 return (0);
1376 }
1377
1378
1379 /*
1380 * exec_handle_port_actions
1381 *
1382 * Description: Go through the _posix_port_actions_t contents,
1383 * calling task_set_special_port, task_set_exception_ports
1384 * and/or audit_session_spawnjoin for the current task.
1385 *
1386 * Parameters: struct image_params * Image parameter block
1387 * short psa_flags posix spawn attribute flags
1388 *
1389 * Returns: 0 Success
1390 * EINVAL Failure
1391 * ENOTSUP Illegal posix_spawn attr flag was set
1392 */
1393 static errno_t
1394 exec_handle_port_actions(struct image_params *imgp, short psa_flags, int * need_portwatch, ipc_port_t * portwatch_ports)
1395 {
1396 _posix_spawn_port_actions_t pacts = imgp->ip_px_spa;
1397 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1398 _ps_port_action_t *act = NULL;
1399 task_t task = p->task;
1400 ipc_port_t port = NULL;
1401 errno_t ret = 0;
1402 int i;
1403
1404 if (need_portwatch != NULL)
1405 *need_portwatch = 0;
1406
1407 for (i = 0; i < pacts->pspa_count; i++) {
1408 act = &pacts->pspa_actions[i];
1409
1410 if (ipc_object_copyin(get_task_ipcspace(current_task()),
1411 act->new_port, MACH_MSG_TYPE_COPY_SEND,
1412 (ipc_object_t *) &port) != KERN_SUCCESS) {
1413 ret = EINVAL;
1414 goto done;
1415 }
1416
1417 switch (act->port_type) {
1418 case PSPA_SPECIAL:
1419 /* Only allowed when not under vfork */
1420 if (!(psa_flags & POSIX_SPAWN_SETEXEC))
1421 ret = ENOTSUP;
1422 else if (task_set_special_port(task,
1423 act->which, port) != KERN_SUCCESS)
1424 ret = EINVAL;
1425 break;
1426
1427 case PSPA_EXCEPTION:
1428 /* Only allowed when not under vfork */
1429 if (!(psa_flags & POSIX_SPAWN_SETEXEC))
1430 ret = ENOTSUP;
1431 else if (task_set_exception_ports(task,
1432 act->mask, port, act->behavior,
1433 act->flavor) != KERN_SUCCESS)
1434 ret = EINVAL;
1435 break;
1436 #if CONFIG_AUDIT
1437 case PSPA_AU_SESSION:
1438 ret = audit_session_spawnjoin(p, port);
1439 break;
1440 #endif
1441 case PSPA_IMP_WATCHPORTS:
1442 if (portwatch_ports != NULL) {
1443 if (need_portwatch != NULL)
1444 *need_portwatch = 1;
1445 /* hold on to this till end of spawn */
1446 portwatch_ports[i] = port;
1447 ret = 0;
1448 } else
1449 ipc_port_release_send(port);
1450 break;
1451 default:
1452 ret = EINVAL;
1453 break;
1454 }
1455
1456 /* action failed, so release port resources */
1457
1458 if (ret) {
1459 ipc_port_release_send(port);
1460 break;
1461 }
1462 }
1463
1464 done:
1465 if (0 != ret)
1466 DTRACE_PROC1(spawn__port__failure, mach_port_name_t, act->new_port);
1467 return (ret);
1468 }
1469
1470 /*
1471 * exec_handle_file_actions
1472 *
1473 * Description: Go through the _posix_file_actions_t contents applying the
1474 * open, close, and dup2 operations to the open file table for
1475 * the current process.
1476 *
1477 * Parameters: struct image_params * Image parameter block
1478 *
1479 * Returns: 0 Success
1480 * ???
1481 *
1482 * Note: Actions are applied in the order specified, with the credential
1483 * of the parent process. This is done to permit the parent
1484 * process to utilize POSIX_SPAWN_RESETIDS to drop privilege in
1485 * the child following operations the child may in fact not be
1486 * normally permitted to perform.
1487 */
1488 static int
1489 exec_handle_file_actions(struct image_params *imgp, short psa_flags)
1490 {
1491 int error = 0;
1492 int action;
1493 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1494 _posix_spawn_file_actions_t px_sfap = imgp->ip_px_sfa;
1495 int ival[2]; /* dummy retval for system calls) */
1496
1497 for (action = 0; action < px_sfap->psfa_act_count; action++) {
1498 _psfa_action_t *psfa = &px_sfap->psfa_act_acts[ action];
1499
1500 switch(psfa->psfaa_type) {
1501 case PSFA_OPEN: {
1502 /*
1503 * Open is different, in that it requires the use of
1504 * a path argument, which is normally copied in from
1505 * user space; because of this, we have to support an
1506 * open from kernel space that passes an address space
1507 * context of UIO_SYSSPACE, and casts the address
1508 * argument to a user_addr_t.
1509 */
1510 struct vnode_attr va;
1511 struct nameidata nd;
1512 int mode = psfa->psfaa_openargs.psfao_mode;
1513 struct dup2_args dup2a;
1514 struct close_nocancel_args ca;
1515 int origfd;
1516
1517 VATTR_INIT(&va);
1518 /* Mask off all but regular access permissions */
1519 mode = ((mode &~ p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT;
1520 VATTR_SET(&va, va_mode, mode & ACCESSPERMS);
1521
1522 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
1523 CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path),
1524 imgp->ip_vfs_context);
1525
1526 error = open1(imgp->ip_vfs_context,
1527 &nd,
1528 psfa->psfaa_openargs.psfao_oflag,
1529 &va,
1530 fileproc_alloc_init, NULL,
1531 ival);
1532
1533 /*
1534 * If there's an error, or we get the right fd by
1535 * accident, then drop out here. This is easier than
1536 * reworking all the open code to preallocate fd
1537 * slots, and internally taking one as an argument.
1538 */
1539 if (error || ival[0] == psfa->psfaa_filedes)
1540 break;
1541
1542 origfd = ival[0];
1543 /*
1544 * If we didn't fall out from an error, we ended up
1545 * with the wrong fd; so now we've got to try to dup2
1546 * it to the right one.
1547 */
1548 dup2a.from = origfd;
1549 dup2a.to = psfa->psfaa_filedes;
1550
1551 /*
1552 * The dup2() system call implementation sets
1553 * ival to newfd in the success case, but we
1554 * can ignore that, since if we didn't get the
1555 * fd we wanted, the error will stop us.
1556 */
1557 error = dup2(p, &dup2a, ival);
1558 if (error)
1559 break;
1560
1561 /*
1562 * Finally, close the original fd.
1563 */
1564 ca.fd = origfd;
1565
1566 error = close_nocancel(p, &ca, ival);
1567 }
1568 break;
1569
1570 case PSFA_DUP2: {
1571 struct dup2_args dup2a;
1572
1573 dup2a.from = psfa->psfaa_filedes;
1574 dup2a.to = psfa->psfaa_openargs.psfao_oflag;
1575
1576 /*
1577 * The dup2() system call implementation sets
1578 * ival to newfd in the success case, but we
1579 * can ignore that, since if we didn't get the
1580 * fd we wanted, the error will stop us.
1581 */
1582 error = dup2(p, &dup2a, ival);
1583 }
1584 break;
1585
1586 case PSFA_CLOSE: {
1587 struct close_nocancel_args ca;
1588
1589 ca.fd = psfa->psfaa_filedes;
1590
1591 error = close_nocancel(p, &ca, ival);
1592 }
1593 break;
1594
1595 case PSFA_INHERIT: {
1596 struct fcntl_nocancel_args fcntla;
1597
1598 /*
1599 * Check to see if the descriptor exists, and
1600 * ensure it's -not- marked as close-on-exec.
1601 *
1602 * Attempting to "inherit" a guarded fd will
1603 * result in a error.
1604 */
1605 fcntla.fd = psfa->psfaa_filedes;
1606 fcntla.cmd = F_GETFD;
1607 if ((error = fcntl_nocancel(p, &fcntla, ival)) != 0)
1608 break;
1609
1610 if ((ival[0] & FD_CLOEXEC) == FD_CLOEXEC) {
1611 fcntla.fd = psfa->psfaa_filedes;
1612 fcntla.cmd = F_SETFD;
1613 fcntla.arg = ival[0] & ~FD_CLOEXEC;
1614 error = fcntl_nocancel(p, &fcntla, ival);
1615 }
1616
1617 }
1618 break;
1619
1620 default:
1621 error = EINVAL;
1622 break;
1623 }
1624
1625 /* All file actions failures are considered fatal, per POSIX */
1626
1627 if (error) {
1628 if (PSFA_OPEN == psfa->psfaa_type) {
1629 DTRACE_PROC1(spawn__open__failure, uintptr_t,
1630 psfa->psfaa_openargs.psfao_path);
1631 } else {
1632 DTRACE_PROC1(spawn__fd__failure, int, psfa->psfaa_filedes);
1633 }
1634 break;
1635 }
1636 }
1637
1638 if (error != 0 || (psa_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) == 0)
1639 return (error);
1640
1641 /*
1642 * If POSIX_SPAWN_CLOEXEC_DEFAULT is set, behave (during
1643 * this spawn only) as if "close on exec" is the default
1644 * disposition of all pre-existing file descriptors. In this case,
1645 * the list of file descriptors mentioned in the file actions
1646 * are the only ones that can be inherited, so mark them now.
1647 *
1648 * The actual closing part comes later, in fdexec().
1649 */
1650 proc_fdlock(p);
1651 for (action = 0; action < px_sfap->psfa_act_count; action++) {
1652 _psfa_action_t *psfa = &px_sfap->psfa_act_acts[action];
1653 int fd = psfa->psfaa_filedes;
1654
1655 switch (psfa->psfaa_type) {
1656 case PSFA_DUP2:
1657 fd = psfa->psfaa_openargs.psfao_oflag;
1658 /*FALLTHROUGH*/
1659 case PSFA_OPEN:
1660 case PSFA_INHERIT:
1661 *fdflags(p, fd) |= UF_INHERIT;
1662 break;
1663
1664 case PSFA_CLOSE:
1665 break;
1666 }
1667 }
1668 proc_fdunlock(p);
1669
1670 return (0);
1671 }
1672
1673 #if CONFIG_MACF
1674 /*
1675 * exec_spawnattr_getmacpolicyinfo
1676 */
1677 void *
1678 exec_spawnattr_getmacpolicyinfo(const void *macextensions, const char *policyname, size_t *lenp)
1679 {
1680 const struct _posix_spawn_mac_policy_extensions *psmx = macextensions;
1681 int i;
1682
1683 if (psmx == NULL)
1684 return NULL;
1685
1686 for (i = 0; i < psmx->psmx_count; i++) {
1687 const _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
1688 if (strncmp(extension->policyname, policyname, sizeof(extension->policyname)) == 0) {
1689 if (lenp != NULL)
1690 *lenp = extension->datalen;
1691 return extension->datap;
1692 }
1693 }
1694
1695 if (lenp != NULL)
1696 *lenp = 0;
1697 return NULL;
1698 }
1699
1700 static int
1701 spawn_copyin_macpolicyinfo(const struct user__posix_spawn_args_desc *px_args, _posix_spawn_mac_policy_extensions_t *psmxp)
1702 {
1703 _posix_spawn_mac_policy_extensions_t psmx = NULL;
1704 int error = 0;
1705 int copycnt = 0;
1706 int i = 0;
1707
1708 *psmxp = NULL;
1709
1710 if (px_args->mac_extensions_size < PS_MAC_EXTENSIONS_SIZE(1) ||
1711 px_args->mac_extensions_size > PAGE_SIZE) {
1712 error = EINVAL;
1713 goto bad;
1714 }
1715
1716 MALLOC(psmx, _posix_spawn_mac_policy_extensions_t, px_args->mac_extensions_size, M_TEMP, M_WAITOK);
1717 if ((error = copyin(px_args->mac_extensions, psmx, px_args->mac_extensions_size)) != 0)
1718 goto bad;
1719
1720 if (PS_MAC_EXTENSIONS_SIZE(psmx->psmx_count) > px_args->mac_extensions_size) {
1721 error = EINVAL;
1722 goto bad;
1723 }
1724
1725 for (i = 0; i < psmx->psmx_count; i++) {
1726 _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
1727 if (extension->datalen == 0 || extension->datalen > PAGE_SIZE) {
1728 error = EINVAL;
1729 goto bad;
1730 }
1731 }
1732
1733 for (copycnt = 0; copycnt < psmx->psmx_count; copycnt++) {
1734 _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[copycnt];
1735 void *data = NULL;
1736
1737 MALLOC(data, void *, extension->datalen, M_TEMP, M_WAITOK);
1738 if ((error = copyin(extension->data, data, extension->datalen)) != 0) {
1739 FREE(data, M_TEMP);
1740 goto bad;
1741 }
1742 extension->datap = data;
1743 }
1744
1745 *psmxp = psmx;
1746 return 0;
1747
1748 bad:
1749 if (psmx != NULL) {
1750 for (i = 0; i < copycnt; i++)
1751 FREE(psmx->psmx_extensions[i].datap, M_TEMP);
1752 FREE(psmx, M_TEMP);
1753 }
1754 return error;
1755 }
1756
1757 static void
1758 spawn_free_macpolicyinfo(_posix_spawn_mac_policy_extensions_t psmx)
1759 {
1760 int i;
1761
1762 if (psmx == NULL)
1763 return;
1764 for (i = 0; i < psmx->psmx_count; i++)
1765 FREE(psmx->psmx_extensions[i].datap, M_TEMP);
1766 FREE(psmx, M_TEMP);
1767 }
1768 #endif /* CONFIG_MACF */
1769
1770 /*
1771 * posix_spawn
1772 *
1773 * Parameters: uap->pid Pointer to pid return area
1774 * uap->fname File name to exec
1775 * uap->argp Argument list
1776 * uap->envp Environment list
1777 *
1778 * Returns: 0 Success
1779 * EINVAL Invalid argument
1780 * ENOTSUP Not supported
1781 * ENOEXEC Executable file format error
1782 * exec_activate_image:EINVAL Invalid argument
1783 * exec_activate_image:EACCES Permission denied
1784 * exec_activate_image:EINTR Interrupted function
1785 * exec_activate_image:ENOMEM Not enough space
1786 * exec_activate_image:EFAULT Bad address
1787 * exec_activate_image:ENAMETOOLONG Filename too long
1788 * exec_activate_image:ENOEXEC Executable file format error
1789 * exec_activate_image:ETXTBSY Text file busy [misuse of error code]
1790 * exec_activate_image:EBADEXEC The executable is corrupt/unknown
1791 * exec_activate_image:???
1792 * mac_execve_enter:???
1793 *
1794 * TODO: Expect to need __mac_posix_spawn() at some point...
1795 * Handle posix_spawnattr_t
1796 * Handle posix_spawn_file_actions_t
1797 */
1798 int
1799 posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
1800 {
1801 proc_t p = ap; /* quiet bogus GCC vfork() warning */
1802 user_addr_t pid = uap->pid;
1803 int ival[2]; /* dummy retval for setpgid() */
1804 char *bufp = NULL;
1805 struct image_params *imgp;
1806 struct vnode_attr *vap;
1807 struct vnode_attr *origvap;
1808 struct uthread *uthread = 0; /* compiler complains if not set to 0*/
1809 int error, sig;
1810 char alt_p_comm[sizeof(p->p_comm)] = {0}; /* for PowerPC */
1811 int is_64 = IS_64BIT_PROCESS(p);
1812 struct vfs_context context;
1813 struct user__posix_spawn_args_desc px_args;
1814 struct _posix_spawnattr px_sa;
1815 _posix_spawn_file_actions_t px_sfap = NULL;
1816 _posix_spawn_port_actions_t px_spap = NULL;
1817 struct __kern_sigaction vec;
1818 boolean_t spawn_no_exec = FALSE;
1819 boolean_t proc_transit_set = TRUE;
1820 boolean_t exec_done = FALSE;
1821 int need_portwatch = 0, portwatch_count = 0;
1822 ipc_port_t * portwatch_ports = NULL;
1823 vm_size_t px_sa_offset = offsetof(struct _posix_spawnattr, psa_ports);
1824
1825 /*
1826 * Allocate a big chunk for locals instead of using stack since these
1827 * structures are pretty big.
1828 */
1829 MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
1830 imgp = (struct image_params *) bufp;
1831 if (bufp == NULL) {
1832 error = ENOMEM;
1833 goto bad;
1834 }
1835 vap = (struct vnode_attr *) (bufp + sizeof(*imgp));
1836 origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap));
1837
1838 /* Initialize the common data in the image_params structure */
1839 imgp->ip_user_fname = uap->path;
1840 imgp->ip_user_argv = uap->argv;
1841 imgp->ip_user_envv = uap->envp;
1842 imgp->ip_vattr = vap;
1843 imgp->ip_origvattr = origvap;
1844 imgp->ip_vfs_context = &context;
1845 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE);
1846 imgp->ip_p_comm = alt_p_comm; /* for PowerPC */
1847 imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
1848
1849 if (uap->adesc != USER_ADDR_NULL) {
1850 if(is_64) {
1851 error = copyin(uap->adesc, &px_args, sizeof(px_args));
1852 } else {
1853 struct user32__posix_spawn_args_desc px_args32;
1854
1855 error = copyin(uap->adesc, &px_args32, sizeof(px_args32));
1856
1857 /*
1858 * Convert arguments descriptor from external 32 bit
1859 * representation to internal 64 bit representation
1860 */
1861 px_args.attr_size = px_args32.attr_size;
1862 px_args.attrp = CAST_USER_ADDR_T(px_args32.attrp);
1863 px_args.file_actions_size = px_args32.file_actions_size;
1864 px_args.file_actions = CAST_USER_ADDR_T(px_args32.file_actions);
1865 px_args.port_actions_size = px_args32.port_actions_size;
1866 px_args.port_actions = CAST_USER_ADDR_T(px_args32.port_actions);
1867 px_args.mac_extensions_size = px_args32.mac_extensions_size;
1868 px_args.mac_extensions = CAST_USER_ADDR_T(px_args32.mac_extensions);
1869 }
1870 if (error)
1871 goto bad;
1872
1873 if (px_args.attr_size != 0) {
1874 /*
1875 * We are not copying the port_actions pointer,
1876 * because we already have it from px_args.
1877 */
1878
1879
1880 if ((error = copyin(px_args.attrp, &px_sa, px_sa_offset) != 0))
1881 goto bad;
1882
1883 bzero( (void *)( (unsigned long) &px_sa + px_sa_offset), sizeof(px_sa) - px_sa_offset );
1884
1885 imgp->ip_px_sa = &px_sa;
1886 }
1887 if (px_args.file_actions_size != 0) {
1888 /* Limit file_actions to allowed number of open files */
1889 int maxfa = (p->p_limit ? p->p_rlimit[RLIMIT_NOFILE].rlim_cur : NOFILE);
1890 if (px_args.file_actions_size < PSF_ACTIONS_SIZE(1) ||
1891 px_args.file_actions_size > PSF_ACTIONS_SIZE(maxfa)) {
1892 error = EINVAL;
1893 goto bad;
1894 }
1895 MALLOC(px_sfap, _posix_spawn_file_actions_t, px_args.file_actions_size, M_TEMP, M_WAITOK);
1896 if (px_sfap == NULL) {
1897 error = ENOMEM;
1898 goto bad;
1899 }
1900 imgp->ip_px_sfa = px_sfap;
1901
1902 if ((error = copyin(px_args.file_actions, px_sfap,
1903 px_args.file_actions_size)) != 0)
1904 goto bad;
1905
1906 /* Verify that the action count matches the struct size */
1907 if (PSF_ACTIONS_SIZE(px_sfap->psfa_act_count) != px_args.file_actions_size) {
1908 error = EINVAL;
1909 goto bad;
1910 }
1911 }
1912 if (px_args.port_actions_size != 0) {
1913 /* Limit port_actions to one page of data */
1914 if (px_args.port_actions_size < PS_PORT_ACTIONS_SIZE(1) ||
1915 px_args.port_actions_size > PAGE_SIZE) {
1916 error = EINVAL;
1917 goto bad;
1918 }
1919
1920 MALLOC(px_spap, _posix_spawn_port_actions_t,
1921 px_args.port_actions_size, M_TEMP, M_WAITOK);
1922 if (px_spap == NULL) {
1923 error = ENOMEM;
1924 goto bad;
1925 }
1926 imgp->ip_px_spa = px_spap;
1927
1928 if ((error = copyin(px_args.port_actions, px_spap,
1929 px_args.port_actions_size)) != 0)
1930 goto bad;
1931
1932 /* Verify that the action count matches the struct size */
1933 if (PS_PORT_ACTIONS_SIZE(px_spap->pspa_count) != px_args.port_actions_size) {
1934 error = EINVAL;
1935 goto bad;
1936 }
1937 }
1938 #if CONFIG_MACF
1939 if (px_args.mac_extensions_size != 0) {
1940 if ((error = spawn_copyin_macpolicyinfo(&px_args, (_posix_spawn_mac_policy_extensions_t *)&imgp->ip_px_smpx)) != 0)
1941 goto bad;
1942 }
1943 #endif /* CONFIG_MACF */
1944 }
1945
1946 /* set uthread to parent */
1947 uthread = get_bsdthread_info(current_thread());
1948
1949 /*
1950 * <rdar://6640530>; this does not result in a behaviour change
1951 * relative to Leopard, so there should not be any existing code
1952 * which depends on it.
1953 */
1954 if (uthread->uu_flag & UT_VFORK) {
1955 error = EINVAL;
1956 goto bad;
1957 }
1958
1959 /*
1960 * If we don't have the extension flag that turns "posix_spawn()"
1961 * into "execve() with options", then we will be creating a new
1962 * process which does not inherit memory from the parent process,
1963 * which is one of the most expensive things about using fork()
1964 * and execve().
1965 */
1966 if (imgp->ip_px_sa == NULL || !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)){
1967 if ((error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN)) != 0)
1968 goto bad;
1969 imgp->ip_flags |= IMGPF_SPAWN; /* spawn w/o exec */
1970 spawn_no_exec = TRUE; /* used in later tests */
1971 DTRACE_PROC1(create, proc_t, p);
1972 }
1973
1974 if (spawn_no_exec)
1975 p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread);
1976 assert(p != NULL);
1977
1978 /* By default, the thread everyone plays with is the parent */
1979 context.vc_thread = current_thread();
1980 context.vc_ucred = p->p_ucred; /* XXX must NOT be kauth_cred_get() */
1981
1982 /*
1983 * However, if we're not in the setexec case, redirect the context
1984 * to the newly created process instead
1985 */
1986 if (spawn_no_exec)
1987 context.vc_thread = imgp->ip_new_thread;
1988
1989 /*
1990 * Post fdcopy(), pre exec_handle_sugid() - this is where we want
1991 * to handle the file_actions. Since vfork() also ends up setting
1992 * us into the parent process group, and saved off the signal flags,
1993 * this is also where we want to handle the spawn flags.
1994 */
1995
1996 /* Has spawn file actions? */
1997 if (imgp->ip_px_sfa != NULL) {
1998 /*
1999 * The POSIX_SPAWN_CLOEXEC_DEFAULT flag
2000 * is handled in exec_handle_file_actions().
2001 */
2002 if ((error = exec_handle_file_actions(imgp,
2003 imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0)) != 0)
2004 goto bad;
2005 }
2006
2007 /* Has spawn port actions? */
2008 if (imgp->ip_px_spa != NULL) {
2009 boolean_t is_adaptive = FALSE;
2010
2011 /* Will this process become adaptive? The apptype isn't ready yet, so we can't look there. */
2012 if (imgp->ip_px_sa != NULL && px_sa.psa_apptype == POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE)
2013 is_adaptive = TRUE;
2014
2015 /*
2016 * portwatch only:
2017 * Allocate a place to store the ports we want to bind to the new task
2018 * We can't bind them until after the apptype is set.
2019 */
2020 if (px_spap->pspa_count != 0 && is_adaptive) {
2021 portwatch_count = px_spap->pspa_count;
2022 MALLOC(portwatch_ports, ipc_port_t *, (sizeof(ipc_port_t) * portwatch_count), M_TEMP, M_WAITOK | M_ZERO);
2023 } else {
2024 portwatch_ports = NULL;
2025 }
2026
2027 if ((error = exec_handle_port_actions(imgp,
2028 imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0, &need_portwatch, portwatch_ports)) != 0)
2029 goto bad;
2030 }
2031
2032 /* Has spawn attr? */
2033 if (imgp->ip_px_sa != NULL) {
2034 /*
2035 * Set the process group ID of the child process; this has
2036 * to happen before the image activation.
2037 */
2038 if (px_sa.psa_flags & POSIX_SPAWN_SETPGROUP) {
2039 struct setpgid_args spga;
2040 spga.pid = p->p_pid;
2041 spga.pgid = px_sa.psa_pgroup;
2042 /*
2043 * Effectively, call setpgid() system call; works
2044 * because there are no pointer arguments.
2045 */
2046 if((error = setpgid(p, &spga, ival)) != 0)
2047 goto bad;
2048 }
2049
2050 /*
2051 * Reset UID/GID to parent's RUID/RGID; This works only
2052 * because the operation occurs *after* the vfork() and
2053 * before the call to exec_handle_sugid() by the image
2054 * activator called from exec_activate_image(). POSIX
2055 * requires that any setuid/setgid bits on the process
2056 * image will take precedence over the spawn attributes
2057 * (re)setting them.
2058 *
2059 * The use of p_ucred is safe, since we are acting on the
2060 * new process, and it has no threads other than the one
2061 * we are creating for it.
2062 */
2063 if (px_sa.psa_flags & POSIX_SPAWN_RESETIDS) {
2064 kauth_cred_t my_cred = p->p_ucred;
2065 kauth_cred_t my_new_cred = kauth_cred_setuidgid(my_cred, kauth_cred_getruid(my_cred), kauth_cred_getrgid(my_cred));
2066 if (my_new_cred != my_cred) {
2067 p->p_ucred = my_new_cred;
2068 /* update cred on proc */
2069 PROC_UPDATE_CREDS_ONPROC(p);
2070 }
2071 }
2072
2073 /*
2074 * Disable ASLR for the spawned process.
2075 */
2076 /*
2077 * But only do so if we are not embedded; embedded allows for a
2078 * boot-arg (-disable_aslr) to deal with this (which itself is
2079 * only honored on DEVELOPMENT or DEBUG builds of xnu).
2080 */
2081 if (px_sa.psa_flags & _POSIX_SPAWN_DISABLE_ASLR)
2082 OSBitOrAtomic(P_DISABLE_ASLR, &p->p_flag);
2083
2084 /*
2085 * Forcibly disallow execution from data pages for the spawned process
2086 * even if it would otherwise be permitted by the architecture default.
2087 */
2088 if (px_sa.psa_flags & _POSIX_SPAWN_ALLOW_DATA_EXEC)
2089 imgp->ip_flags |= IMGPF_ALLOW_DATA_EXEC;
2090 }
2091
2092 /*
2093 * Disable ASLR during image activation. This occurs either if the
2094 * _POSIX_SPAWN_DISABLE_ASLR attribute was found above or if
2095 * P_DISABLE_ASLR was inherited from the parent process.
2096 */
2097 if (p->p_flag & P_DISABLE_ASLR)
2098 imgp->ip_flags |= IMGPF_DISABLE_ASLR;
2099
2100 /*
2101 * Clear transition flag so we won't hang if exec_activate_image() causes
2102 * an automount (and launchd does a proc sysctl to service it).
2103 *
2104 * <rdar://problem/6848672>, <rdar://problem/5959568>.
2105 */
2106 if (spawn_no_exec) {
2107 proc_transend(p, 0);
2108 proc_transit_set = 0;
2109 }
2110
2111 #if MAC_SPAWN /* XXX */
2112 if (uap->mac_p != USER_ADDR_NULL) {
2113 error = mac_execve_enter(uap->mac_p, imgp);
2114 if (error)
2115 goto bad;
2116 }
2117 #endif
2118
2119 /*
2120 * Activate the image
2121 */
2122 error = exec_activate_image(imgp);
2123
2124 if (error == 0) {
2125 /* process completed the exec */
2126 exec_done = TRUE;
2127 } else if (error == -1) {
2128 /* Image not claimed by any activator? */
2129 error = ENOEXEC;
2130 }
2131
2132 /*
2133 * If we have a spawn attr, and it contains signal related flags,
2134 * the we need to process them in the "context" of the new child
2135 * process, so we have to process it following image activation,
2136 * prior to making the thread runnable in user space. This is
2137 * necessitated by some signal information being per-thread rather
2138 * than per-process, and we don't have the new allocation in hand
2139 * until after the image is activated.
2140 */
2141 if (!error && imgp->ip_px_sa != NULL) {
2142 thread_t child_thread = current_thread();
2143 uthread_t child_uthread = uthread;
2144
2145 /*
2146 * If we created a new child thread, then the thread and
2147 * uthread are different than the current ones; otherwise,
2148 * we leave them, since we are in the exec case instead.
2149 */
2150 if (spawn_no_exec) {
2151 child_thread = imgp->ip_new_thread;
2152 child_uthread = get_bsdthread_info(child_thread);
2153 }
2154
2155 /*
2156 * Mask a list of signals, instead of them being unmasked, if
2157 * they were unmasked in the parent; note that some signals
2158 * are not maskable.
2159 */
2160 if (px_sa.psa_flags & POSIX_SPAWN_SETSIGMASK)
2161 child_uthread->uu_sigmask = (px_sa.psa_sigmask & ~sigcantmask);
2162 /*
2163 * Default a list of signals instead of ignoring them, if
2164 * they were ignored in the parent. Note that we pass
2165 * spawn_no_exec to setsigvec() to indicate that we called
2166 * fork1() and therefore do not need to call proc_signalstart()
2167 * internally.
2168 */
2169 if (px_sa.psa_flags & POSIX_SPAWN_SETSIGDEF) {
2170 vec.sa_handler = SIG_DFL;
2171 vec.sa_tramp = 0;
2172 vec.sa_mask = 0;
2173 vec.sa_flags = 0;
2174 for (sig = 0; sig < NSIG; sig++)
2175 if (px_sa.psa_sigdefault & (1 << sig)) {
2176 error = setsigvec(p, child_thread, sig + 1, &vec, spawn_no_exec);
2177 }
2178 }
2179
2180 /*
2181 * Activate the CPU usage monitor, if requested. This is done via a task-wide, per-thread CPU
2182 * usage limit, which will generate a resource exceeded exception if any one thread exceeds the
2183 * limit.
2184 *
2185 * Userland gives us interval in seconds, and the kernel SPI expects nanoseconds.
2186 */
2187 if (px_sa.psa_cpumonitor_percent != 0) {
2188 /*
2189 * Always treat a CPU monitor activation coming from spawn as entitled. Requiring
2190 * an entitlement to configure the monitor a certain way seems silly, since
2191 * whomever is turning it on could just as easily choose not to do so.
2192 *
2193 * XXX - Ignore the parameters that we get from userland. The spawnattr method of
2194 * activating the monitor always gets the system default parameters. Once we have
2195 * an explicit spawn SPI for configuring the defaults, we can revert this to
2196 * respect the params passed in from userland.
2197 */
2198 error = proc_set_task_ruse_cpu(p->task,
2199 TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC,
2200 PROC_POLICY_CPUMON_DEFAULTS, 0,
2201 0, TRUE);
2202 }
2203 }
2204
2205 bad:
2206
2207 if (portwatch_ports != NULL) {
2208 int needboost = 0;
2209
2210 /*
2211 * Mark the ports as destined to be handed off to the new task, and
2212 * transfer any boosts to the new task.
2213 * We need to release the rights even if the posix_spawn has failed.
2214 */
2215 if (need_portwatch != 0) {
2216 for (int i = 0; i < portwatch_count; i++) {
2217 ipc_port_t port = NULL;
2218
2219 if ((port = portwatch_ports[i]) != NULL) {
2220 int boost = 0;
2221 if (error == 0)
2222 task_add_importance_watchport(p->task, p->p_pid, port, &boost);
2223 ipc_port_release_send(port);
2224 needboost += boost;
2225 }
2226 }
2227 }
2228
2229 if (needboost != 0) {
2230 /*
2231 * Apply the boost count found on the ports, which will keep the
2232 * newly created process out of background until it handles the incoming messages.
2233 */
2234 task_hold_multiple_assertion(p->task, needboost);
2235 }
2236
2237 FREE(portwatch_ports, M_TEMP);
2238 portwatch_ports = NULL;
2239 portwatch_count = 0;
2240 }
2241
2242 if (error == 0) {
2243 /* reset delay idle sleep status if set */
2244 if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP)
2245 OSBitAndAtomic(~((uint32_t)P_DELAYIDLESLEEP), &p->p_flag);
2246 /* upon successful spawn, re/set the proc control state */
2247 if (imgp->ip_px_sa != NULL) {
2248 switch (px_sa.psa_pcontrol) {
2249 case POSIX_SPAWN_PCONTROL_THROTTLE:
2250 p->p_pcaction = P_PCTHROTTLE;
2251 break;
2252 case POSIX_SPAWN_PCONTROL_SUSPEND:
2253 p->p_pcaction = P_PCSUSP;
2254 break;
2255 case POSIX_SPAWN_PCONTROL_KILL:
2256 p->p_pcaction = P_PCKILL;
2257 break;
2258 case POSIX_SPAWN_PCONTROL_NONE:
2259 default:
2260 p->p_pcaction = 0;
2261 break;
2262 };
2263 }
2264 exec_resettextvp(p, imgp);
2265
2266 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
2267 /* Has jetsam attributes? */
2268 if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_SET)) {
2269 memorystatus_update(p, px_sa.psa_priority, 0, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
2270 TRUE, px_sa.psa_high_water_mark, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND));
2271 }
2272 #endif
2273 }
2274
2275 /*
2276 * If we successfully called fork1(), we always need to do this;
2277 * we identify this case by noting the IMGPF_SPAWN flag. This is
2278 * because we come back from that call with signals blocked in the
2279 * child, and we have to unblock them, but we want to wait until
2280 * after we've performed any spawn actions. This has to happen
2281 * before check_for_signature(), which uses psignal.
2282 */
2283 if (spawn_no_exec) {
2284 if (proc_transit_set)
2285 proc_transend(p, 0);
2286
2287 /*
2288 * Drop the signal lock on the child which was taken on our
2289 * behalf by forkproc()/cloneproc() to prevent signals being
2290 * received by the child in a partially constructed state.
2291 */
2292 proc_signalend(p, 0);
2293
2294 /* flag the 'fork' has occurred */
2295 proc_knote(p->p_pptr, NOTE_FORK | p->p_pid);
2296 /* then flag exec has occurred */
2297 /* notify only if it has not failed due to FP Key error */
2298 if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)
2299 proc_knote(p, NOTE_EXEC);
2300 }
2301
2302 /*
2303 * We have to delay operations which might throw a signal until after
2304 * the signals have been unblocked; however, we want that to happen
2305 * after exec_resettextvp() so that the textvp is correct when they
2306 * fire.
2307 */
2308 if (error == 0) {
2309 error = check_for_signature(p, imgp);
2310
2311 /*
2312 * Pay for our earlier safety; deliver the delayed signals from
2313 * the incomplete spawn process now that it's complete.
2314 */
2315 if (imgp != NULL && spawn_no_exec && (p->p_lflag & P_LTRACED)) {
2316 psignal_vfork(p, p->task, imgp->ip_new_thread, SIGTRAP);
2317 }
2318 }
2319
2320
2321 if (imgp != NULL) {
2322 if (imgp->ip_vp)
2323 vnode_put(imgp->ip_vp);
2324 if (imgp->ip_scriptvp)
2325 vnode_put(imgp->ip_scriptvp);
2326 if (imgp->ip_strings)
2327 execargs_free(imgp);
2328 if (imgp->ip_px_sfa != NULL)
2329 FREE(imgp->ip_px_sfa, M_TEMP);
2330 if (imgp->ip_px_spa != NULL)
2331 FREE(imgp->ip_px_spa, M_TEMP);
2332
2333 #if CONFIG_MACF
2334 if (imgp->ip_px_smpx != NULL)
2335 spawn_free_macpolicyinfo(imgp->ip_px_smpx);
2336 if (imgp->ip_execlabelp)
2337 mac_cred_label_free(imgp->ip_execlabelp);
2338 if (imgp->ip_scriptlabelp)
2339 mac_vnode_label_free(imgp->ip_scriptlabelp);
2340 #endif
2341 }
2342
2343 #if CONFIG_DTRACE
2344 if (spawn_no_exec) {
2345 /*
2346 * In the original DTrace reference implementation,
2347 * posix_spawn() was a libc routine that just
2348 * did vfork(2) then exec(2). Thus the proc::: probes
2349 * are very fork/exec oriented. The details of this
2350 * in-kernel implementation of posix_spawn() is different
2351 * (while producing the same process-observable effects)
2352 * particularly w.r.t. errors, and which thread/process
2353 * is constructing what on behalf of whom.
2354 */
2355 if (error) {
2356 DTRACE_PROC1(spawn__failure, int, error);
2357 } else {
2358 DTRACE_PROC(spawn__success);
2359 /*
2360 * Some DTrace scripts, e.g. newproc.d in
2361 * /usr/bin, rely on the the 'exec-success'
2362 * probe being fired in the child after the
2363 * new process image has been constructed
2364 * in order to determine the associated pid.
2365 *
2366 * So, even though the parent built the image
2367 * here, for compatibility, mark the new thread
2368 * so 'exec-success' fires on it as it leaves
2369 * the kernel.
2370 */
2371 dtrace_thread_didexec(imgp->ip_new_thread);
2372 }
2373 } else {
2374 if (error) {
2375 DTRACE_PROC1(exec__failure, int, error);
2376 } else {
2377 DTRACE_PROC(exec__success);
2378 }
2379 }
2380 #endif
2381
2382 /* Return to both the parent and the child? */
2383 if (imgp != NULL && spawn_no_exec) {
2384 /*
2385 * If the parent wants the pid, copy it out
2386 */
2387 if (pid != USER_ADDR_NULL)
2388 (void)suword(pid, p->p_pid);
2389 retval[0] = error;
2390
2391 /*
2392 * If we had an error, perform an internal reap ; this is
2393 * entirely safe, as we have a real process backing us.
2394 */
2395 if (error) {
2396 proc_list_lock();
2397 p->p_listflag |= P_LIST_DEADPARENT;
2398 proc_list_unlock();
2399 proc_lock(p);
2400 /* make sure no one else has killed it off... */
2401 if (p->p_stat != SZOMB && p->exit_thread == NULL) {
2402 p->exit_thread = current_thread();
2403 proc_unlock(p);
2404 exit1(p, 1, (int *)NULL);
2405 if (exec_done == FALSE) {
2406 task_deallocate(get_threadtask(imgp->ip_new_thread));
2407 thread_deallocate(imgp->ip_new_thread);
2408 }
2409 } else {
2410 /* someone is doing it for us; just skip it */
2411 proc_unlock(p);
2412 }
2413 } else {
2414
2415 /*
2416 * Return to the child
2417 *
2418 * Note: the image activator earlier dropped the
2419 * task/thread references to the newly spawned
2420 * process; this is OK, since we still have suspended
2421 * queue references on them, so we should be fine
2422 * with the delayed resume of the thread here.
2423 */
2424 (void)thread_resume(imgp->ip_new_thread);
2425 }
2426 }
2427 if (bufp != NULL) {
2428 FREE(bufp, M_TEMP);
2429 }
2430
2431 return(error);
2432 }
2433
2434
2435 /*
2436 * execve
2437 *
2438 * Parameters: uap->fname File name to exec
2439 * uap->argp Argument list
2440 * uap->envp Environment list
2441 *
2442 * Returns: 0 Success
2443 * __mac_execve:EINVAL Invalid argument
2444 * __mac_execve:ENOTSUP Invalid argument
2445 * __mac_execve:EACCES Permission denied
2446 * __mac_execve:EINTR Interrupted function
2447 * __mac_execve:ENOMEM Not enough space
2448 * __mac_execve:EFAULT Bad address
2449 * __mac_execve:ENAMETOOLONG Filename too long
2450 * __mac_execve:ENOEXEC Executable file format error
2451 * __mac_execve:ETXTBSY Text file busy [misuse of error code]
2452 * __mac_execve:???
2453 *
2454 * TODO: Dynamic linker header address on stack is copied via suword()
2455 */
2456 /* ARGSUSED */
2457 int
2458 execve(proc_t p, struct execve_args *uap, int32_t *retval)
2459 {
2460 struct __mac_execve_args muap;
2461 int err;
2462
2463 memoryshot(VM_EXECVE, DBG_FUNC_NONE);
2464
2465 muap.fname = uap->fname;
2466 muap.argp = uap->argp;
2467 muap.envp = uap->envp;
2468 muap.mac_p = USER_ADDR_NULL;
2469 err = __mac_execve(p, &muap, retval);
2470
2471 return(err);
2472 }
2473
2474 /*
2475 * __mac_execve
2476 *
2477 * Parameters: uap->fname File name to exec
2478 * uap->argp Argument list
2479 * uap->envp Environment list
2480 * uap->mac_p MAC label supplied by caller
2481 *
2482 * Returns: 0 Success
2483 * EINVAL Invalid argument
2484 * ENOTSUP Not supported
2485 * ENOEXEC Executable file format error
2486 * exec_activate_image:EINVAL Invalid argument
2487 * exec_activate_image:EACCES Permission denied
2488 * exec_activate_image:EINTR Interrupted function
2489 * exec_activate_image:ENOMEM Not enough space
2490 * exec_activate_image:EFAULT Bad address
2491 * exec_activate_image:ENAMETOOLONG Filename too long
2492 * exec_activate_image:ENOEXEC Executable file format error
2493 * exec_activate_image:ETXTBSY Text file busy [misuse of error code]
2494 * exec_activate_image:EBADEXEC The executable is corrupt/unknown
2495 * exec_activate_image:???
2496 * mac_execve_enter:???
2497 *
2498 * TODO: Dynamic linker header address on stack is copied via suword()
2499 */
2500 int
2501 __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval)
2502 {
2503 char *bufp = NULL;
2504 struct image_params *imgp;
2505 struct vnode_attr *vap;
2506 struct vnode_attr *origvap;
2507 int error;
2508 char alt_p_comm[sizeof(p->p_comm)] = {0}; /* for PowerPC */
2509 int is_64 = IS_64BIT_PROCESS(p);
2510 struct vfs_context context;
2511
2512 context.vc_thread = current_thread();
2513 context.vc_ucred = kauth_cred_proc_ref(p); /* XXX must NOT be kauth_cred_get() */
2514
2515 /* Allocate a big chunk for locals instead of using stack since these
2516 * structures a pretty big.
2517 */
2518 MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
2519 imgp = (struct image_params *) bufp;
2520 if (bufp == NULL) {
2521 error = ENOMEM;
2522 goto exit_with_error;
2523 }
2524 vap = (struct vnode_attr *) (bufp + sizeof(*imgp));
2525 origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap));
2526
2527 /* Initialize the common data in the image_params structure */
2528 imgp->ip_user_fname = uap->fname;
2529 imgp->ip_user_argv = uap->argp;
2530 imgp->ip_user_envv = uap->envp;
2531 imgp->ip_vattr = vap;
2532 imgp->ip_origvattr = origvap;
2533 imgp->ip_vfs_context = &context;
2534 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE) | ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE);
2535 imgp->ip_p_comm = alt_p_comm; /* for PowerPC */
2536 imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
2537
2538 #if CONFIG_MACF
2539 if (uap->mac_p != USER_ADDR_NULL) {
2540 error = mac_execve_enter(uap->mac_p, imgp);
2541 if (error) {
2542 kauth_cred_unref(&context.vc_ucred);
2543 goto exit_with_error;
2544 }
2545 }
2546 #endif
2547
2548 error = exec_activate_image(imgp);
2549
2550 kauth_cred_unref(&context.vc_ucred);
2551
2552 /* Image not claimed by any activator? */
2553 if (error == -1)
2554 error = ENOEXEC;
2555
2556 if (error == 0) {
2557 exec_resettextvp(p, imgp);
2558 error = check_for_signature(p, imgp);
2559 }
2560 if (imgp->ip_vp != NULLVP)
2561 vnode_put(imgp->ip_vp);
2562 if (imgp->ip_scriptvp != NULLVP)
2563 vnode_put(imgp->ip_scriptvp);
2564 if (imgp->ip_strings)
2565 execargs_free(imgp);
2566 #if CONFIG_MACF
2567 if (imgp->ip_execlabelp)
2568 mac_cred_label_free(imgp->ip_execlabelp);
2569 if (imgp->ip_scriptlabelp)
2570 mac_vnode_label_free(imgp->ip_scriptlabelp);
2571 #endif
2572 if (!error) {
2573 struct uthread *uthread;
2574
2575 /* Sever any extant thread affinity */
2576 thread_affinity_exec(current_thread());
2577
2578 DTRACE_PROC(exec__success);
2579 uthread = get_bsdthread_info(current_thread());
2580 if (uthread->uu_flag & UT_VFORK) {
2581 vfork_return(p, retval, p->p_pid);
2582 (void)thread_resume(imgp->ip_new_thread);
2583 }
2584 } else {
2585 DTRACE_PROC1(exec__failure, int, error);
2586 }
2587
2588 exit_with_error:
2589 if (bufp != NULL) {
2590 FREE(bufp, M_TEMP);
2591 }
2592
2593 return(error);
2594 }
2595
2596
2597 /*
2598 * copyinptr
2599 *
2600 * Description: Copy a pointer in from user space to a user_addr_t in kernel
2601 * space, based on 32/64 bitness of the user space
2602 *
2603 * Parameters: froma User space address
2604 * toptr Address of kernel space user_addr_t
2605 * ptr_size 4/8, based on 'froma' address space
2606 *
2607 * Returns: 0 Success
2608 * EFAULT Bad 'froma'
2609 *
2610 * Implicit returns:
2611 * *ptr_size Modified
2612 */
2613 static int
2614 copyinptr(user_addr_t froma, user_addr_t *toptr, int ptr_size)
2615 {
2616 int error;
2617
2618 if (ptr_size == 4) {
2619 /* 64 bit value containing 32 bit address */
2620 unsigned int i;
2621
2622 error = copyin(froma, &i, 4);
2623 *toptr = CAST_USER_ADDR_T(i); /* SAFE */
2624 } else {
2625 error = copyin(froma, toptr, 8);
2626 }
2627 return (error);
2628 }
2629
2630
2631 /*
2632 * copyoutptr
2633 *
2634 * Description: Copy a pointer out from a user_addr_t in kernel space to
2635 * user space, based on 32/64 bitness of the user space
2636 *
2637 * Parameters: ua User space address to copy to
2638 * ptr Address of kernel space user_addr_t
2639 * ptr_size 4/8, based on 'ua' address space
2640 *
2641 * Returns: 0 Success
2642 * EFAULT Bad 'ua'
2643 *
2644 */
2645 static int
2646 copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size)
2647 {
2648 int error;
2649
2650 if (ptr_size == 4) {
2651 /* 64 bit value containing 32 bit address */
2652 unsigned int i = CAST_DOWN_EXPLICIT(unsigned int,ua); /* SAFE */
2653
2654 error = copyout(&i, ptr, 4);
2655 } else {
2656 error = copyout(&ua, ptr, 8);
2657 }
2658 return (error);
2659 }
2660
2661
2662 /*
2663 * exec_copyout_strings
2664 *
2665 * Copy out the strings segment to user space. The strings segment is put
2666 * on a preinitialized stack frame.
2667 *
2668 * Parameters: struct image_params * the image parameter block
2669 * int * a pointer to the stack offset variable
2670 *
2671 * Returns: 0 Success
2672 * !0 Faiure: errno
2673 *
2674 * Implicit returns:
2675 * (*stackp) The stack offset, modified
2676 *
2677 * Note: The strings segment layout is backward, from the beginning
2678 * of the top of the stack to consume the minimal amount of
2679 * space possible; the returned stack pointer points to the
2680 * end of the area consumed (stacks grow downward).
2681 *
2682 * argc is an int; arg[i] are pointers; env[i] are pointers;
2683 * the 0's are (void *)NULL's
2684 *
2685 * The stack frame layout is:
2686 *
2687 * +-------------+ <- p->user_stack
2688 * | 16b |
2689 * +-------------+
2690 * | STRING AREA |
2691 * | : |
2692 * | : |
2693 * | : |
2694 * +- -- -- -- --+
2695 * | PATH AREA |
2696 * +-------------+
2697 * | 0 |
2698 * +-------------+
2699 * | applev[n] |
2700 * +-------------+
2701 * :
2702 * :
2703 * +-------------+
2704 * | applev[1] |
2705 * +-------------+
2706 * | exec_path / |
2707 * | applev[0] |
2708 * +-------------+
2709 * | 0 |
2710 * +-------------+
2711 * | env[n] |
2712 * +-------------+
2713 * :
2714 * :
2715 * +-------------+
2716 * | env[0] |
2717 * +-------------+
2718 * | 0 |
2719 * +-------------+
2720 * | arg[argc-1] |
2721 * +-------------+
2722 * :
2723 * :
2724 * +-------------+
2725 * | arg[0] |
2726 * +-------------+
2727 * | argc |
2728 * sp-> +-------------+
2729 *
2730 * Although technically a part of the STRING AREA, we treat the PATH AREA as
2731 * a separate entity. This allows us to align the beginning of the PATH AREA
2732 * to a pointer boundary so that the exec_path, env[i], and argv[i] pointers
2733 * which preceed it on the stack are properly aligned.
2734 */
2735
2736 static int
2737 exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp)
2738 {
2739 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
2740 int ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
2741 int ptr_area_size;
2742 void *ptr_buffer_start, *ptr_buffer;
2743 int string_size;
2744
2745 user_addr_t string_area; /* *argv[], *env[] */
2746 user_addr_t ptr_area; /* argv[], env[], applev[] */
2747 user_addr_t argc_area; /* argc */
2748 user_addr_t stack;
2749 int error;
2750
2751 unsigned i;
2752 struct copyout_desc {
2753 char *start_string;
2754 int count;
2755 #if CONFIG_DTRACE
2756 user_addr_t *dtrace_cookie;
2757 #endif
2758 boolean_t null_term;
2759 } descriptors[] = {
2760 {
2761 .start_string = imgp->ip_startargv,
2762 .count = imgp->ip_argc,
2763 #if CONFIG_DTRACE
2764 .dtrace_cookie = &p->p_dtrace_argv,
2765 #endif
2766 .null_term = TRUE
2767 },
2768 {
2769 .start_string = imgp->ip_endargv,
2770 .count = imgp->ip_envc,
2771 #if CONFIG_DTRACE
2772 .dtrace_cookie = &p->p_dtrace_envp,
2773 #endif
2774 .null_term = TRUE
2775 },
2776 {
2777 .start_string = imgp->ip_strings,
2778 .count = 1,
2779 #if CONFIG_DTRACE
2780 .dtrace_cookie = NULL,
2781 #endif
2782 .null_term = FALSE
2783 },
2784 {
2785 .start_string = imgp->ip_endenvv,
2786 .count = imgp->ip_applec - 1, /* exec_path handled above */
2787 #if CONFIG_DTRACE
2788 .dtrace_cookie = NULL,
2789 #endif
2790 .null_term = TRUE
2791 }
2792 };
2793
2794 stack = *stackp;
2795
2796 /*
2797 * All previous contributors to the string area
2798 * should have aligned their sub-area
2799 */
2800 if (imgp->ip_strspace % ptr_size != 0) {
2801 error = EINVAL;
2802 goto bad;
2803 }
2804
2805 /* Grow the stack down for the strings we've been building up */
2806 string_size = imgp->ip_strendp - imgp->ip_strings;
2807 stack -= string_size;
2808 string_area = stack;
2809
2810 /*
2811 * Need room for one pointer for each string, plus
2812 * one for the NULLs terminating the argv, envv, and apple areas.
2813 */
2814 ptr_area_size = (imgp->ip_argc + imgp->ip_envc + imgp->ip_applec + 3) *
2815 ptr_size;
2816 stack -= ptr_area_size;
2817 ptr_area = stack;
2818
2819 /* We'll construct all the pointer arrays in our string buffer,
2820 * which we already know is aligned properly, and ip_argspace
2821 * was used to verify we have enough space.
2822 */
2823 ptr_buffer_start = ptr_buffer = (void *)imgp->ip_strendp;
2824
2825 /*
2826 * Need room for pointer-aligned argc slot.
2827 */
2828 stack -= ptr_size;
2829 argc_area = stack;
2830
2831 /*
2832 * Record the size of the arguments area so that sysctl_procargs()
2833 * can return the argument area without having to parse the arguments.
2834 */
2835 proc_lock(p);
2836 p->p_argc = imgp->ip_argc;
2837 p->p_argslen = (int)(*stackp - string_area);
2838 proc_unlock(p);
2839
2840 /* Return the initial stack address: the location of argc */
2841 *stackp = stack;
2842
2843 /*
2844 * Copy out the entire strings area.
2845 */
2846 error = copyout(imgp->ip_strings, string_area,
2847 string_size);
2848 if (error)
2849 goto bad;
2850
2851 for (i = 0; i < sizeof(descriptors)/sizeof(descriptors[0]); i++) {
2852 char *cur_string = descriptors[i].start_string;
2853 int j;
2854
2855 #if CONFIG_DTRACE
2856 if (descriptors[i].dtrace_cookie) {
2857 proc_lock(p);
2858 *descriptors[i].dtrace_cookie = ptr_area + ((uintptr_t)ptr_buffer - (uintptr_t)ptr_buffer_start); /* dtrace convenience */
2859 proc_unlock(p);
2860 }
2861 #endif /* CONFIG_DTRACE */
2862
2863 /*
2864 * For each segment (argv, envv, applev), copy as many pointers as requested
2865 * to our pointer buffer.
2866 */
2867 for (j = 0; j < descriptors[i].count; j++) {
2868 user_addr_t cur_address = string_area + (cur_string - imgp->ip_strings);
2869
2870 /* Copy out the pointer to the current string. Alignment has been verified */
2871 if (ptr_size == 8) {
2872 *(uint64_t *)ptr_buffer = (uint64_t)cur_address;
2873 } else {
2874 *(uint32_t *)ptr_buffer = (uint32_t)cur_address;
2875 }
2876
2877 ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
2878 cur_string += strlen(cur_string) + 1; /* Only a NUL between strings in the same area */
2879 }
2880
2881 if (descriptors[i].null_term) {
2882 if (ptr_size == 8) {
2883 *(uint64_t *)ptr_buffer = 0ULL;
2884 } else {
2885 *(uint32_t *)ptr_buffer = 0;
2886 }
2887
2888 ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
2889 }
2890 }
2891
2892 /*
2893 * Copy out all our pointer arrays in bulk.
2894 */
2895 error = copyout(ptr_buffer_start, ptr_area,
2896 ptr_area_size);
2897 if (error)
2898 goto bad;
2899
2900 /* argc (int32, stored in a ptr_size area) */
2901 error = copyoutptr((user_addr_t)imgp->ip_argc, argc_area, ptr_size);
2902 if (error)
2903 goto bad;
2904
2905 bad:
2906 return(error);
2907 }
2908
2909
2910 /*
2911 * exec_extract_strings
2912 *
2913 * Copy arguments and environment from user space into work area; we may
2914 * have already copied some early arguments into the work area, and if
2915 * so, any arguments opied in are appended to those already there.
2916 * This function is the primary manipulator of ip_argspace, since
2917 * these are the arguments the client of execve(2) knows about. After
2918 * each argv[]/envv[] string is copied, we charge the string length
2919 * and argv[]/envv[] pointer slot to ip_argspace, so that we can
2920 * full preflight the arg list size.
2921 *
2922 * Parameters: struct image_params * the image parameter block
2923 *
2924 * Returns: 0 Success
2925 * !0 Failure: errno
2926 *
2927 * Implicit returns;
2928 * (imgp->ip_argc) Count of arguments, updated
2929 * (imgp->ip_envc) Count of environment strings, updated
2930 * (imgp->ip_argspace) Count of remaining of NCARGS
2931 * (imgp->ip_interp_buffer) Interpreter and args (mutated in place)
2932 *
2933 *
2934 * Note: The argument and environment vectors are user space pointers
2935 * to arrays of user space pointers.
2936 */
2937 static int
2938 exec_extract_strings(struct image_params *imgp)
2939 {
2940 int error = 0;
2941 int ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT) ? 8 : 4;
2942 int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
2943 user_addr_t argv = imgp->ip_user_argv;
2944 user_addr_t envv = imgp->ip_user_envv;
2945
2946 /*
2947 * Adjust space reserved for the path name by however much padding it
2948 * needs. Doing this here since we didn't know if this would be a 32-
2949 * or 64-bit process back in exec_save_path.
2950 */
2951 while (imgp->ip_strspace % new_ptr_size != 0) {
2952 *imgp->ip_strendp++ = '\0';
2953 imgp->ip_strspace--;
2954 /* imgp->ip_argspace--; not counted towards exec args total */
2955 }
2956
2957 /*
2958 * From now on, we start attributing string space to ip_argspace
2959 */
2960 imgp->ip_startargv = imgp->ip_strendp;
2961 imgp->ip_argc = 0;
2962
2963 if((imgp->ip_flags & IMGPF_INTERPRET) != 0) {
2964 user_addr_t arg;
2965 char *argstart, *ch;
2966
2967 /* First, the arguments in the "#!" string are tokenized and extracted. */
2968 argstart = imgp->ip_interp_buffer;
2969 while (argstart) {
2970 ch = argstart;
2971 while (*ch && !IS_WHITESPACE(*ch)) {
2972 ch++;
2973 }
2974
2975 if (*ch == '\0') {
2976 /* last argument, no need to NUL-terminate */
2977 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
2978 argstart = NULL;
2979 } else {
2980 /* NUL-terminate */
2981 *ch = '\0';
2982 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
2983
2984 /*
2985 * Find the next string. We know spaces at the end of the string have already
2986 * been stripped.
2987 */
2988 argstart = ch + 1;
2989 while (IS_WHITESPACE(*argstart)) {
2990 argstart++;
2991 }
2992 }
2993
2994 /* Error-check, regardless of whether this is the last interpreter arg or not */
2995 if (error)
2996 goto bad;
2997 if (imgp->ip_argspace < new_ptr_size) {
2998 error = E2BIG;
2999 goto bad;
3000 }
3001 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
3002 imgp->ip_argc++;
3003 }
3004
3005 if (argv != 0LL) {
3006 /*
3007 * If we are running an interpreter, replace the av[0] that was
3008 * passed to execve() with the path name that was
3009 * passed to execve() for interpreters which do not use the PATH
3010 * to locate their script arguments.
3011 */
3012 error = copyinptr(argv, &arg, ptr_size);
3013 if (error)
3014 goto bad;
3015 if (arg != 0LL) {
3016 argv += ptr_size; /* consume without using */
3017 }
3018 }
3019
3020 if (imgp->ip_interp_sugid_fd != -1) {
3021 char temp[19]; /* "/dev/fd/" + 10 digits + NUL */
3022 snprintf(temp, sizeof(temp), "/dev/fd/%d", imgp->ip_interp_sugid_fd);
3023 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(temp), UIO_SYSSPACE, TRUE);
3024 } else {
3025 error = exec_add_user_string(imgp, imgp->ip_user_fname, imgp->ip_seg, TRUE);
3026 }
3027
3028 if (error)
3029 goto bad;
3030 if (imgp->ip_argspace < new_ptr_size) {
3031 error = E2BIG;
3032 goto bad;
3033 }
3034 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
3035 imgp->ip_argc++;
3036 }
3037
3038 while (argv != 0LL) {
3039 user_addr_t arg;
3040
3041 error = copyinptr(argv, &arg, ptr_size);
3042 if (error)
3043 goto bad;
3044
3045 if (arg == 0LL) {
3046 break;
3047 }
3048
3049 argv += ptr_size;
3050
3051 /*
3052 * av[n...] = arg[n]
3053 */
3054 error = exec_add_user_string(imgp, arg, imgp->ip_seg, TRUE);
3055 if (error)
3056 goto bad;
3057 if (imgp->ip_argspace < new_ptr_size) {
3058 error = E2BIG;
3059 goto bad;
3060 }
3061 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
3062 imgp->ip_argc++;
3063 }
3064
3065 /* Save space for argv[] NULL terminator */
3066 if (imgp->ip_argspace < new_ptr_size) {
3067 error = E2BIG;
3068 goto bad;
3069 }
3070 imgp->ip_argspace -= new_ptr_size;
3071
3072 /* Note where the args ends and env begins. */
3073 imgp->ip_endargv = imgp->ip_strendp;
3074 imgp->ip_envc = 0;
3075
3076 /* Now, get the environment */
3077 while (envv != 0LL) {
3078 user_addr_t env;
3079
3080 error = copyinptr(envv, &env, ptr_size);
3081 if (error)
3082 goto bad;
3083
3084 envv += ptr_size;
3085 if (env == 0LL) {
3086 break;
3087 }
3088 /*
3089 * av[n...] = env[n]
3090 */
3091 error = exec_add_user_string(imgp, env, imgp->ip_seg, TRUE);
3092 if (error)
3093 goto bad;
3094 if (imgp->ip_argspace < new_ptr_size) {
3095 error = E2BIG;
3096 goto bad;
3097 }
3098 imgp->ip_argspace -= new_ptr_size; /* to hold envv[] entry */
3099 imgp->ip_envc++;
3100 }
3101
3102 /* Save space for envv[] NULL terminator */
3103 if (imgp->ip_argspace < new_ptr_size) {
3104 error = E2BIG;
3105 goto bad;
3106 }
3107 imgp->ip_argspace -= new_ptr_size;
3108
3109 /* Align the tail of the combined argv+envv area */
3110 while (imgp->ip_strspace % new_ptr_size != 0) {
3111 if (imgp->ip_argspace < 1) {
3112 error = E2BIG;
3113 goto bad;
3114 }
3115 *imgp->ip_strendp++ = '\0';
3116 imgp->ip_strspace--;
3117 imgp->ip_argspace--;
3118 }
3119
3120 /* Note where the envv ends and applev begins. */
3121 imgp->ip_endenvv = imgp->ip_strendp;
3122
3123 /*
3124 * From now on, we are no longer charging argument
3125 * space to ip_argspace.
3126 */
3127
3128 bad:
3129 return error;
3130 }
3131
3132 static char *
3133 random_hex_str(char *str, int len, boolean_t embedNUL)
3134 {
3135 uint64_t low, high, value;
3136 int idx;
3137 char digit;
3138
3139 /* A 64-bit value will only take 16 characters, plus '0x' and NULL. */
3140 if (len > 19)
3141 len = 19;
3142
3143 /* We need enough room for at least 1 digit */
3144 if (len < 4)
3145 return (NULL);
3146
3147 low = random();
3148 high = random();
3149 value = high << 32 | low;
3150
3151 if (embedNUL) {
3152 /*
3153 * Zero a byte to protect against C string vulnerabilities
3154 * e.g. for userland __stack_chk_guard.
3155 */
3156 value &= ~(0xffull << 8);
3157 }
3158
3159 str[0] = '0';
3160 str[1] = 'x';
3161 for (idx = 2; idx < len - 1; idx++) {
3162 digit = value & 0xf;
3163 value = value >> 4;
3164 if (digit < 10)
3165 str[idx] = '0' + digit;
3166 else
3167 str[idx] = 'a' + (digit - 10);
3168 }
3169 str[idx] = '\0';
3170 return (str);
3171 }
3172
3173 /*
3174 * Libc has an 8-element array set up for stack guard values. It only fills
3175 * in one of those entries, and both gcc and llvm seem to use only a single
3176 * 8-byte guard. Until somebody needs more than an 8-byte guard value, don't
3177 * do the work to construct them.
3178 */
3179 #define GUARD_VALUES 1
3180 #define GUARD_KEY "stack_guard="
3181
3182 /*
3183 * System malloc needs some entropy when it is initialized.
3184 */
3185 #define ENTROPY_VALUES 2
3186 #define ENTROPY_KEY "malloc_entropy="
3187
3188 /*
3189 * System malloc engages nanozone for UIAPP.
3190 */
3191 #define NANO_ENGAGE_KEY "MallocNanoZone=1"
3192
3193 #define PFZ_KEY "pfz="
3194 extern user32_addr_t commpage_text32_location;
3195 extern user64_addr_t commpage_text64_location;
3196 /*
3197 * Build up the contents of the apple[] string vector
3198 */
3199 static int
3200 exec_add_apple_strings(struct image_params *imgp)
3201 {
3202 int i, error;
3203 int new_ptr_size=4;
3204 char guard[19];
3205 char guard_vec[strlen(GUARD_KEY) + 19 * GUARD_VALUES + 1];
3206
3207 char entropy[19];
3208 char entropy_vec[strlen(ENTROPY_KEY) + 19 * ENTROPY_VALUES + 1];
3209
3210 char pfz_string[strlen(PFZ_KEY) + 16 + 4 +1];
3211
3212 if( imgp->ip_flags & IMGPF_IS_64BIT) {
3213 new_ptr_size = 8;
3214 snprintf(pfz_string, sizeof(pfz_string),PFZ_KEY "0x%llx",commpage_text64_location);
3215 } else {
3216 snprintf(pfz_string, sizeof(pfz_string),PFZ_KEY "0x%x",commpage_text32_location);
3217 }
3218
3219 /* exec_save_path stored the first string */
3220 imgp->ip_applec = 1;
3221
3222 /* adding the pfz string */
3223 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(pfz_string),UIO_SYSSPACE,FALSE);
3224 if(error)
3225 goto bad;
3226 imgp->ip_applec++;
3227
3228 /* adding the NANO_ENGAGE_KEY key */
3229 if (imgp->ip_px_sa) {
3230 int proc_type = (((struct _posix_spawnattr *) imgp->ip_px_sa)->psa_apptype) & POSIX_SPAWN_PROC_TYPE_MASK;
3231
3232 if (proc_type == POSIX_SPAWN_PROC_TYPE_APP_DEFAULT || proc_type == POSIX_SPAWN_PROC_TYPE_APP_TAL) {
3233 char uiapp_string[strlen(NANO_ENGAGE_KEY) + 1];
3234
3235 snprintf(uiapp_string, sizeof(uiapp_string), NANO_ENGAGE_KEY);
3236 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(uiapp_string),UIO_SYSSPACE,FALSE);
3237 if(error)
3238 goto bad;
3239 imgp->ip_applec++;
3240 }
3241 }
3242
3243 /*
3244 * Supply libc with a collection of random values to use when
3245 * implementing -fstack-protector.
3246 *
3247 * (The first random string always contains an embedded NUL so that
3248 * __stack_chk_guard also protects against C string vulnerabilities)
3249 */
3250 (void)strlcpy(guard_vec, GUARD_KEY, sizeof (guard_vec));
3251 for (i = 0; i < GUARD_VALUES; i++) {
3252 random_hex_str(guard, sizeof (guard), i == 0);
3253 if (i)
3254 (void)strlcat(guard_vec, ",", sizeof (guard_vec));
3255 (void)strlcat(guard_vec, guard, sizeof (guard_vec));
3256 }
3257
3258 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(guard_vec), UIO_SYSSPACE, FALSE);
3259 if (error)
3260 goto bad;
3261 imgp->ip_applec++;
3262
3263 /*
3264 * Supply libc with entropy for system malloc.
3265 */
3266 (void)strlcpy(entropy_vec, ENTROPY_KEY, sizeof(entropy_vec));
3267 for (i = 0; i < ENTROPY_VALUES; i++) {
3268 random_hex_str(entropy, sizeof (entropy), FALSE);
3269 if (i)
3270 (void)strlcat(entropy_vec, ",", sizeof (entropy_vec));
3271 (void)strlcat(entropy_vec, entropy, sizeof (entropy_vec));
3272 }
3273
3274 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(entropy_vec), UIO_SYSSPACE, FALSE);
3275 if (error)
3276 goto bad;
3277 imgp->ip_applec++;
3278
3279 /* Align the tail of the combined applev area */
3280 while (imgp->ip_strspace % new_ptr_size != 0) {
3281 *imgp->ip_strendp++ = '\0';
3282 imgp->ip_strspace--;
3283 }
3284
3285 bad:
3286 return error;
3287 }
3288
3289 #define unix_stack_size(p) (p->p_rlimit[RLIMIT_STACK].rlim_cur)
3290
3291 /*
3292 * exec_check_permissions
3293 *
3294 * Description: Verify that the file that is being attempted to be executed
3295 * is in fact allowed to be executed based on it POSIX file
3296 * permissions and other access control criteria
3297 *
3298 * Parameters: struct image_params * the image parameter block
3299 *
3300 * Returns: 0 Success
3301 * EACCES Permission denied
3302 * ENOEXEC Executable file format error
3303 * ETXTBSY Text file busy [misuse of error code]
3304 * vnode_getattr:???
3305 * vnode_authorize:???
3306 */
3307 static int
3308 exec_check_permissions(struct image_params *imgp)
3309 {
3310 struct vnode *vp = imgp->ip_vp;
3311 struct vnode_attr *vap = imgp->ip_vattr;
3312 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
3313 int error;
3314 kauth_action_t action;
3315
3316 /* Only allow execution of regular files */
3317 if (!vnode_isreg(vp))
3318 return (EACCES);
3319
3320 /* Get the file attributes that we will be using here and elsewhere */
3321 VATTR_INIT(vap);
3322 VATTR_WANTED(vap, va_uid);
3323 VATTR_WANTED(vap, va_gid);
3324 VATTR_WANTED(vap, va_mode);
3325 VATTR_WANTED(vap, va_fsid);
3326 VATTR_WANTED(vap, va_fileid);
3327 VATTR_WANTED(vap, va_data_size);
3328 if ((error = vnode_getattr(vp, vap, imgp->ip_vfs_context)) != 0)
3329 return (error);
3330
3331 /*
3332 * Ensure that at least one execute bit is on - otherwise root
3333 * will always succeed, and we don't want to happen unless the
3334 * file really is executable.
3335 */
3336 if (!vfs_authopaque(vnode_mount(vp)) && ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0))
3337 return (EACCES);
3338
3339 /* Disallow zero length files */
3340 if (vap->va_data_size == 0)
3341 return (ENOEXEC);
3342
3343 imgp->ip_arch_offset = (user_size_t)0;
3344 imgp->ip_arch_size = vap->va_data_size;
3345
3346 /* Disable setuid-ness for traced programs or if MNT_NOSUID */
3347 if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_lflag & P_LTRACED))
3348 vap->va_mode &= ~(VSUID | VSGID);
3349
3350 /*
3351 * Disable _POSIX_SPAWN_ALLOW_DATA_EXEC and _POSIX_SPAWN_DISABLE_ASLR
3352 * flags for setuid/setgid binaries.
3353 */
3354 if (vap->va_mode & (VSUID | VSGID))
3355 imgp->ip_flags &= ~(IMGPF_ALLOW_DATA_EXEC | IMGPF_DISABLE_ASLR);
3356
3357 #if CONFIG_MACF
3358 error = mac_vnode_check_exec(imgp->ip_vfs_context, vp, imgp);
3359 if (error)
3360 return (error);
3361 #endif
3362
3363 /* Check for execute permission */
3364 action = KAUTH_VNODE_EXECUTE;
3365 /* Traced images must also be readable */
3366 if (p->p_lflag & P_LTRACED)
3367 action |= KAUTH_VNODE_READ_DATA;
3368 if ((error = vnode_authorize(vp, NULL, action, imgp->ip_vfs_context)) != 0)
3369 return (error);
3370
3371 #if 0
3372 /* Don't let it run if anyone had it open for writing */
3373 vnode_lock(vp);
3374 if (vp->v_writecount) {
3375 panic("going to return ETXTBSY %x", vp);
3376 vnode_unlock(vp);
3377 return (ETXTBSY);
3378 }
3379 vnode_unlock(vp);
3380 #endif
3381
3382
3383 /* XXX May want to indicate to underlying FS that vnode is open */
3384
3385 return (error);
3386 }
3387
3388
3389 /*
3390 * exec_handle_sugid
3391 *
3392 * Initially clear the P_SUGID in the process flags; if an SUGID process is
3393 * exec'ing a non-SUGID image, then this is the point of no return.
3394 *
3395 * If the image being activated is SUGID, then replace the credential with a
3396 * copy, disable tracing (unless the tracing process is root), reset the
3397 * mach task port to revoke it, set the P_SUGID bit,
3398 *
3399 * If the saved user and group ID will be changing, then make sure it happens
3400 * to a new credential, rather than a shared one.
3401 *
3402 * Set the security token (this is probably obsolete, given that the token
3403 * should not technically be separate from the credential itself).
3404 *
3405 * Parameters: struct image_params * the image parameter block
3406 *
3407 * Returns: void No failure indication
3408 *
3409 * Implicit returns:
3410 * <process credential> Potentially modified/replaced
3411 * <task port> Potentially revoked
3412 * <process flags> P_SUGID bit potentially modified
3413 * <security token> Potentially modified
3414 */
3415 static int
3416 exec_handle_sugid(struct image_params *imgp)
3417 {
3418 kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context);
3419 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
3420 int i;
3421 int leave_sugid_clear = 0;
3422 int error = 0;
3423 #if CONFIG_MACF
3424 int mac_transition;
3425
3426 /*
3427 * Determine whether a call to update the MAC label will result in the
3428 * credential changing.
3429 *
3430 * Note: MAC policies which do not actually end up modifying
3431 * the label subsequently are strongly encouraged to
3432 * return 0 for this check, since a non-zero answer will
3433 * slow down the exec fast path for normal binaries.
3434 */
3435 mac_transition = mac_cred_check_label_update_execve(
3436 imgp->ip_vfs_context,
3437 imgp->ip_vp,
3438 imgp->ip_scriptvp,
3439 imgp->ip_scriptlabelp,
3440 imgp->ip_execlabelp,
3441 p,
3442 imgp->ip_px_smpx);
3443 #endif
3444
3445 OSBitAndAtomic(~((uint32_t)P_SUGID), &p->p_flag);
3446
3447 /*
3448 * Order of the following is important; group checks must go last,
3449 * as we use the success of the 'ismember' check combined with the
3450 * failure of the explicit match to indicate that we will be setting
3451 * the egid of the process even though the new process did not
3452 * require VSUID/VSGID bits in order for it to set the new group as
3453 * its egid.
3454 *
3455 * Note: Technically, by this we are implying a call to
3456 * setegid() in the new process, rather than implying
3457 * it used its VSGID bit to set the effective group,
3458 * even though there is no code in that process to make
3459 * such a call.
3460 */
3461 if (((imgp->ip_origvattr->va_mode & VSUID) != 0 &&
3462 kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) ||
3463 ((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
3464 ((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &leave_sugid_clear) || !leave_sugid_clear) ||
3465 (kauth_cred_getgid(cred) != imgp->ip_origvattr->va_gid)))) {
3466
3467 #if CONFIG_MACF
3468 /* label for MAC transition and neither VSUID nor VSGID */
3469 handle_mac_transition:
3470 #endif
3471
3472 /*
3473 * Replace the credential with a copy of itself if euid or
3474 * egid change.
3475 *
3476 * Note: setuid binaries will automatically opt out of
3477 * group resolver participation as a side effect
3478 * of this operation. This is an intentional
3479 * part of the security model, which requires a
3480 * participating credential be established by
3481 * escalating privilege, setting up all other
3482 * aspects of the credential including whether
3483 * or not to participate in external group
3484 * membership resolution, then dropping their
3485 * effective privilege to that of the desired
3486 * final credential state.
3487 */
3488 if (imgp->ip_origvattr->va_mode & VSUID) {
3489 p->p_ucred = kauth_cred_setresuid(p->p_ucred, KAUTH_UID_NONE, imgp->ip_origvattr->va_uid, imgp->ip_origvattr->va_uid, KAUTH_UID_NONE);
3490 /* update cred on proc */
3491 PROC_UPDATE_CREDS_ONPROC(p);
3492 }
3493 if (imgp->ip_origvattr->va_mode & VSGID) {
3494 p->p_ucred = kauth_cred_setresgid(p->p_ucred, KAUTH_GID_NONE, imgp->ip_origvattr->va_gid, imgp->ip_origvattr->va_gid);
3495 /* update cred on proc */
3496 PROC_UPDATE_CREDS_ONPROC(p);
3497 }
3498
3499 #if CONFIG_MACF
3500 /*
3501 * If a policy has indicated that it will transition the label,
3502 * before making the call into the MAC policies, get a new
3503 * duplicate credential, so they can modify it without
3504 * modifying any others sharing it.
3505 */
3506 if (mac_transition) {
3507 kauth_cred_t my_cred;
3508 if (kauth_proc_label_update_execve(p,
3509 imgp->ip_vfs_context,
3510 imgp->ip_vp,
3511 imgp->ip_scriptvp,
3512 imgp->ip_scriptlabelp,
3513 imgp->ip_execlabelp,
3514 imgp->ip_px_smpx)) {
3515 /*
3516 * If updating the MAC label resulted in a
3517 * disjoint credential, flag that we need to
3518 * set the P_SUGID bit. This protects
3519 * against debuggers being attached by an
3520 * insufficiently privileged process onto the
3521 * result of a transition to a more privileged
3522 * credential.
3523 */
3524 leave_sugid_clear = 0;
3525 }
3526
3527 my_cred = kauth_cred_proc_ref(p);
3528 mac_task_label_update_cred(my_cred, p->task);
3529 kauth_cred_unref(&my_cred);
3530 }
3531 #endif /* CONFIG_MACF */
3532
3533 /*
3534 * If 'leave_sugid_clear' is non-zero, then we passed the
3535 * VSUID and MACF checks, and successfully determined that
3536 * the previous cred was a member of the VSGID group, but
3537 * that it was not the default at the time of the execve,
3538 * and that the post-labelling credential was not disjoint.
3539 * So we don't set the P_SUGID or reset mach ports and fds
3540 * on the basis of simply running this code.
3541 */
3542 if (!leave_sugid_clear) {
3543 /*
3544 * Have mach reset the task and thread ports.
3545 * We don't want anyone who had the ports before
3546 * a setuid exec to be able to access/control the
3547 * task/thread after.
3548 */
3549 ipc_task_reset(p->task);
3550 ipc_thread_reset((imgp->ip_new_thread != NULL) ?
3551 imgp->ip_new_thread : current_thread());
3552
3553 /*
3554 * Flag the process as setuid.
3555 */
3556 OSBitOrAtomic(P_SUGID, &p->p_flag);
3557
3558 /*
3559 * Radar 2261856; setuid security hole fix
3560 * XXX For setuid processes, attempt to ensure that
3561 * stdin, stdout, and stderr are already allocated.
3562 * We do not want userland to accidentally allocate
3563 * descriptors in this range which has implied meaning
3564 * to libc.
3565 */
3566 for (i = 0; i < 3; i++) {
3567
3568 if (p->p_fd->fd_ofiles[i] != NULL)
3569 continue;
3570
3571 /*
3572 * Do the kernel equivalent of
3573 *
3574 * if i == 0
3575 * (void) open("/dev/null", O_RDONLY);
3576 * else
3577 * (void) open("/dev/null", O_WRONLY);
3578 */
3579
3580 struct fileproc *fp;
3581 int indx;
3582 int flag;
3583
3584 if (i == 0)
3585 flag = FREAD;
3586 else
3587 flag = FWRITE;
3588
3589 if ((error = falloc(p,
3590 &fp, &indx, imgp->ip_vfs_context)) != 0)
3591 continue;
3592
3593 struct nameidata nd1;
3594
3595 NDINIT(&nd1, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE,
3596 CAST_USER_ADDR_T("/dev/null"),
3597 imgp->ip_vfs_context);
3598
3599 if ((error = vn_open(&nd1, flag, 0)) != 0) {
3600 fp_free(p, indx, fp);
3601 break;
3602 }
3603
3604 struct fileglob *fg = fp->f_fglob;
3605
3606 fg->fg_flag = flag;
3607 fg->fg_ops = &vnops;
3608 fg->fg_data = nd1.ni_vp;
3609
3610 vnode_put(nd1.ni_vp);
3611
3612 proc_fdlock(p);
3613 procfdtbl_releasefd(p, indx, NULL);
3614 fp_drop(p, indx, fp, 1);
3615 proc_fdunlock(p);
3616 }
3617 }
3618 }
3619 #if CONFIG_MACF
3620 else {
3621 /*
3622 * We are here because we were told that the MAC label will
3623 * be transitioned, and the binary is not VSUID or VSGID; to
3624 * deal with this case, we could either duplicate a lot of
3625 * code, or we can indicate we want to default the P_SUGID
3626 * bit clear and jump back up.
3627 */
3628 if (mac_transition) {
3629 leave_sugid_clear = 1;
3630 goto handle_mac_transition;
3631 }
3632 }
3633 #endif /* CONFIG_MACF */
3634
3635 /*
3636 * Implement the semantic where the effective user and group become
3637 * the saved user and group in exec'ed programs.
3638 */
3639 p->p_ucred = kauth_cred_setsvuidgid(p->p_ucred, kauth_cred_getuid(p->p_ucred), kauth_cred_getgid(p->p_ucred));
3640 /* update cred on proc */
3641 PROC_UPDATE_CREDS_ONPROC(p);
3642
3643 /* Update the process' identity version and set the security token */
3644 p->p_idversion++;
3645 set_security_token(p);
3646
3647 return(error);
3648 }
3649
3650
3651 /*
3652 * create_unix_stack
3653 *
3654 * Description: Set the user stack address for the process to the provided
3655 * address. If a custom stack was not set as a result of the
3656 * load process (i.e. as specified by the image file for the
3657 * executable), then allocate the stack in the provided map and
3658 * set up appropriate guard pages for enforcing administrative
3659 * limits on stack growth, if they end up being needed.
3660 *
3661 * Parameters: p Process to set stack on
3662 * load_result Information from mach-o load commands
3663 * map Address map in which to allocate the new stack
3664 *
3665 * Returns: KERN_SUCCESS Stack successfully created
3666 * !KERN_SUCCESS Mach failure code
3667 */
3668 static kern_return_t
3669 create_unix_stack(vm_map_t map, load_result_t* load_result,
3670 proc_t p)
3671 {
3672 mach_vm_size_t size, prot_size;
3673 mach_vm_offset_t addr, prot_addr;
3674 kern_return_t kr;
3675
3676 mach_vm_address_t user_stack = load_result->user_stack;
3677
3678 proc_lock(p);
3679 p->user_stack = user_stack;
3680 proc_unlock(p);
3681
3682 if (!load_result->prog_allocated_stack) {
3683 /*
3684 * Allocate enough space for the maximum stack size we
3685 * will ever authorize and an extra page to act as
3686 * a guard page for stack overflows. For default stacks,
3687 * vm_initial_limit_stack takes care of the extra guard page.
3688 * Otherwise we must allocate it ourselves.
3689 */
3690
3691 size = mach_vm_round_page(load_result->user_stack_size);
3692 if (load_result->prog_stack_size)
3693 size += PAGE_SIZE;
3694 addr = mach_vm_trunc_page(load_result->user_stack - size);
3695 kr = mach_vm_allocate(map, &addr, size,
3696 VM_MAKE_TAG(VM_MEMORY_STACK) |
3697 VM_FLAGS_FIXED);
3698 if (kr != KERN_SUCCESS) {
3699 /* If can't allocate at default location, try anywhere */
3700 addr = 0;
3701 kr = mach_vm_allocate(map, &addr, size,
3702 VM_MAKE_TAG(VM_MEMORY_STACK) |
3703 VM_FLAGS_ANYWHERE);
3704 if (kr != KERN_SUCCESS)
3705 return kr;
3706
3707 user_stack = addr + size;
3708 load_result->user_stack = user_stack;
3709
3710 proc_lock(p);
3711 p->user_stack = user_stack;
3712 proc_unlock(p);
3713 }
3714
3715 /*
3716 * And prevent access to what's above the current stack
3717 * size limit for this process.
3718 */
3719 prot_addr = addr;
3720 if (load_result->prog_stack_size)
3721 prot_size = PAGE_SIZE;
3722 else
3723 prot_size = mach_vm_trunc_page(size - unix_stack_size(p));
3724 kr = mach_vm_protect(map,
3725 prot_addr,
3726 prot_size,
3727 FALSE,
3728 VM_PROT_NONE);
3729 if (kr != KERN_SUCCESS) {
3730 (void) mach_vm_deallocate(map, addr, size);
3731 return kr;
3732 }
3733 }
3734
3735 return KERN_SUCCESS;
3736 }
3737
3738 #include <sys/reboot.h>
3739
3740 static char init_program_name[128] = "/sbin/launchd";
3741
3742 struct execve_args init_exec_args;
3743
3744 /*
3745 * load_init_program
3746 *
3747 * Description: Load the "init" program; in most cases, this will be "launchd"
3748 *
3749 * Parameters: p Process to call execve() to create
3750 * the "init" program
3751 *
3752 * Returns: (void)
3753 *
3754 * Notes: The process that is passed in is the first manufactured
3755 * process on the system, and gets here via bsd_ast() firing
3756 * for the first time. This is done to ensure that bsd_init()
3757 * has run to completion.
3758 */
3759 void
3760 load_init_program(proc_t p)
3761 {
3762 vm_offset_t init_addr;
3763 int argc = 0;
3764 uint32_t argv[3];
3765 int error;
3766 int retval[2];
3767
3768 /*
3769 * Copy out program name.
3770 */
3771
3772 init_addr = VM_MIN_ADDRESS;
3773 (void) vm_allocate(current_map(), &init_addr, PAGE_SIZE,
3774 VM_FLAGS_ANYWHERE);
3775 if (init_addr == 0)
3776 init_addr++;
3777
3778 (void) copyout((caddr_t) init_program_name, CAST_USER_ADDR_T(init_addr),
3779 (unsigned) sizeof(init_program_name)+1);
3780
3781 argv[argc++] = (uint32_t)init_addr;
3782 init_addr += sizeof(init_program_name);
3783 init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
3784
3785 /*
3786 * Put out first (and only) argument, similarly.
3787 * Assumes everything fits in a page as allocated
3788 * above.
3789 */
3790 if (boothowto & RB_SINGLE) {
3791 const char *init_args = "-s";
3792
3793 copyout(init_args, CAST_USER_ADDR_T(init_addr),
3794 strlen(init_args));
3795
3796 argv[argc++] = (uint32_t)init_addr;
3797 init_addr += strlen(init_args);
3798 init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
3799
3800 }
3801
3802 /*
3803 * Null-end the argument list
3804 */
3805 argv[argc] = 0;
3806
3807 /*
3808 * Copy out the argument list.
3809 */
3810
3811 (void) copyout((caddr_t) argv, CAST_USER_ADDR_T(init_addr),
3812 (unsigned) sizeof(argv));
3813
3814 /*
3815 * Set up argument block for fake call to execve.
3816 */
3817
3818 init_exec_args.fname = CAST_USER_ADDR_T(argv[0]);
3819 init_exec_args.argp = CAST_USER_ADDR_T((char **)init_addr);
3820 init_exec_args.envp = CAST_USER_ADDR_T(0);
3821
3822 /*
3823 * So that mach_init task is set with uid,gid 0 token
3824 */
3825 set_security_token(p);
3826
3827 error = execve(p,&init_exec_args,retval);
3828 if (error)
3829 panic("Process 1 exec of %s failed, errno %d",
3830 init_program_name, error);
3831 }
3832
3833 /*
3834 * load_return_to_errno
3835 *
3836 * Description: Convert a load_return_t (Mach error) to an errno (BSD error)
3837 *
3838 * Parameters: lrtn Mach error number
3839 *
3840 * Returns: (int) BSD error number
3841 * 0 Success
3842 * EBADARCH Bad architecture
3843 * EBADMACHO Bad Mach object file
3844 * ESHLIBVERS Bad shared library version
3845 * ENOMEM Out of memory/resource shortage
3846 * EACCES Access denied
3847 * ENOENT Entry not found (usually "file does
3848 * does not exist")
3849 * EIO An I/O error occurred
3850 * EBADEXEC The executable is corrupt/unknown
3851 */
3852 static int
3853 load_return_to_errno(load_return_t lrtn)
3854 {
3855 switch (lrtn) {
3856 case LOAD_SUCCESS:
3857 return 0;
3858 case LOAD_BADARCH:
3859 return EBADARCH;
3860 case LOAD_BADMACHO:
3861 return EBADMACHO;
3862 case LOAD_SHLIB:
3863 return ESHLIBVERS;
3864 case LOAD_NOSPACE:
3865 case LOAD_RESOURCE:
3866 return ENOMEM;
3867 case LOAD_PROTECT:
3868 return EACCES;
3869 case LOAD_ENOENT:
3870 return ENOENT;
3871 case LOAD_IOERROR:
3872 return EIO;
3873 case LOAD_FAILURE:
3874 case LOAD_DECRYPTFAIL:
3875 default:
3876 return EBADEXEC;
3877 }
3878 }
3879
3880 #include <mach/mach_types.h>
3881 #include <mach/vm_prot.h>
3882 #include <mach/semaphore.h>
3883 #include <mach/sync_policy.h>
3884 #include <kern/clock.h>
3885 #include <mach/kern_return.h>
3886
3887 /*
3888 * execargs_alloc
3889 *
3890 * Description: Allocate the block of memory used by the execve arguments.
3891 * At the same time, we allocate a page so that we can read in
3892 * the first page of the image.
3893 *
3894 * Parameters: struct image_params * the image parameter block
3895 *
3896 * Returns: 0 Success
3897 * EINVAL Invalid argument
3898 * EACCES Permission denied
3899 * EINTR Interrupted function
3900 * ENOMEM Not enough space
3901 *
3902 * Notes: This is a temporary allocation into the kernel address space
3903 * to enable us to copy arguments in from user space. This is
3904 * necessitated by not mapping the process calling execve() into
3905 * the kernel address space during the execve() system call.
3906 *
3907 * We assemble the argument and environment, etc., into this
3908 * region before copying it as a single block into the child
3909 * process address space (at the top or bottom of the stack,
3910 * depending on which way the stack grows; see the function
3911 * exec_copyout_strings() for details).
3912 *
3913 * This ends up with a second (possibly unnecessary) copy compared
3914 * with assembing the data directly into the child address space,
3915 * instead, but since we cannot be guaranteed that the parent has
3916 * not modified its environment, we can't really know that it's
3917 * really a block there as well.
3918 */
3919
3920
3921 static int execargs_waiters = 0;
3922 lck_mtx_t *execargs_cache_lock;
3923
3924 static void
3925 execargs_lock_lock(void) {
3926 lck_mtx_lock_spin(execargs_cache_lock);
3927 }
3928
3929 static void
3930 execargs_lock_unlock(void) {
3931 lck_mtx_unlock(execargs_cache_lock);
3932 }
3933
3934 static wait_result_t
3935 execargs_lock_sleep(void) {
3936 return(lck_mtx_sleep(execargs_cache_lock, LCK_SLEEP_DEFAULT, &execargs_free_count, THREAD_INTERRUPTIBLE));
3937 }
3938
3939 static kern_return_t
3940 execargs_purgeable_allocate(char **execarg_address) {
3941 kern_return_t kr = vm_allocate(bsd_pageable_map, (vm_offset_t *)execarg_address, BSD_PAGEABLE_SIZE_PER_EXEC, VM_FLAGS_ANYWHERE | VM_FLAGS_PURGABLE);
3942 assert(kr == KERN_SUCCESS);
3943 return kr;
3944 }
3945
3946 static kern_return_t
3947 execargs_purgeable_reference(void *execarg_address) {
3948 int state = VM_PURGABLE_NONVOLATILE;
3949 kern_return_t kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
3950
3951 assert(kr == KERN_SUCCESS);
3952 return kr;
3953 }
3954
3955 static kern_return_t
3956 execargs_purgeable_volatilize(void *execarg_address) {
3957 int state = VM_PURGABLE_VOLATILE | VM_PURGABLE_ORDERING_OBSOLETE;
3958 kern_return_t kr;
3959 kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
3960
3961 assert(kr == KERN_SUCCESS);
3962
3963 return kr;
3964 }
3965
3966 static void
3967 execargs_wakeup_waiters(void) {
3968 thread_wakeup(&execargs_free_count);
3969 }
3970
3971 static int
3972 execargs_alloc(struct image_params *imgp)
3973 {
3974 kern_return_t kret;
3975 wait_result_t res;
3976 int i, cache_index = -1;
3977
3978 execargs_lock_lock();
3979
3980 while (execargs_free_count == 0) {
3981 execargs_waiters++;
3982 res = execargs_lock_sleep();
3983 execargs_waiters--;
3984 if (res != THREAD_AWAKENED) {
3985 execargs_lock_unlock();
3986 return (EINTR);
3987 }
3988 }
3989
3990 execargs_free_count--;
3991
3992 for (i = 0; i < execargs_cache_size; i++) {
3993 vm_offset_t element = execargs_cache[i];
3994 if (element) {
3995 cache_index = i;
3996 imgp->ip_strings = (char *)(execargs_cache[i]);
3997 execargs_cache[i] = 0;
3998 break;
3999 }
4000 }
4001
4002 assert(execargs_free_count >= 0);
4003
4004 execargs_lock_unlock();
4005
4006 if (cache_index == -1) {
4007 kret = execargs_purgeable_allocate(&imgp->ip_strings);
4008 }
4009 else
4010 kret = execargs_purgeable_reference(imgp->ip_strings);
4011
4012 assert(kret == KERN_SUCCESS);
4013 if (kret != KERN_SUCCESS) {
4014 return (ENOMEM);
4015 }
4016
4017 /* last page used to read in file headers */
4018 imgp->ip_vdata = imgp->ip_strings + ( NCARGS + PAGE_SIZE );
4019 imgp->ip_strendp = imgp->ip_strings;
4020 imgp->ip_argspace = NCARGS;
4021 imgp->ip_strspace = ( NCARGS + PAGE_SIZE );
4022
4023 return (0);
4024 }
4025
4026 /*
4027 * execargs_free
4028 *
4029 * Description: Free the block of memory used by the execve arguments and the
4030 * first page of the executable by a previous call to the function
4031 * execargs_alloc().
4032 *
4033 * Parameters: struct image_params * the image parameter block
4034 *
4035 * Returns: 0 Success
4036 * EINVAL Invalid argument
4037 * EINTR Oeration interrupted
4038 */
4039 static int
4040 execargs_free(struct image_params *imgp)
4041 {
4042 kern_return_t kret;
4043 int i;
4044 boolean_t needs_wakeup = FALSE;
4045
4046 kret = execargs_purgeable_volatilize(imgp->ip_strings);
4047
4048 execargs_lock_lock();
4049 execargs_free_count++;
4050
4051 for (i = 0; i < execargs_cache_size; i++) {
4052 vm_offset_t element = execargs_cache[i];
4053 if (element == 0) {
4054 execargs_cache[i] = (vm_offset_t) imgp->ip_strings;
4055 imgp->ip_strings = NULL;
4056 break;
4057 }
4058 }
4059
4060 assert(imgp->ip_strings == NULL);
4061
4062 if (execargs_waiters > 0)
4063 needs_wakeup = TRUE;
4064
4065 execargs_lock_unlock();
4066
4067 if (needs_wakeup == TRUE)
4068 execargs_wakeup_waiters();
4069
4070 return ((kret == KERN_SUCCESS ? 0 : EINVAL));
4071 }
4072
4073 static void
4074 exec_resettextvp(proc_t p, struct image_params *imgp)
4075 {
4076 vnode_t vp;
4077 off_t offset;
4078 vnode_t tvp = p->p_textvp;
4079 int ret;
4080
4081 vp = imgp->ip_vp;
4082 offset = imgp->ip_arch_offset;
4083
4084 if (vp == NULLVP)
4085 panic("exec_resettextvp: expected valid vp");
4086
4087 ret = vnode_ref(vp);
4088 proc_lock(p);
4089 if (ret == 0) {
4090 p->p_textvp = vp;
4091 p->p_textoff = offset;
4092 } else {
4093 p->p_textvp = NULLVP; /* this is paranoia */
4094 p->p_textoff = 0;
4095 }
4096 proc_unlock(p);
4097
4098 if ( tvp != NULLVP) {
4099 if (vnode_getwithref(tvp) == 0) {
4100 vnode_rele(tvp);
4101 vnode_put(tvp);
4102 }
4103 }
4104
4105 }
4106
4107 /*
4108 * If the process is not signed or if it contains entitlements, we
4109 * need to communicate through the task_access_port to taskgated.
4110 *
4111 * taskgated will provide a detached code signature if present, and
4112 * will enforce any restrictions on entitlements.
4113 */
4114
4115 static boolean_t
4116 taskgated_required(proc_t p, boolean_t *require_success)
4117 {
4118 size_t length;
4119 void *blob;
4120 int error;
4121
4122 if ((p->p_csflags & CS_VALID) == 0) {
4123 *require_success = FALSE;
4124 return TRUE;
4125 }
4126
4127 error = cs_entitlements_blob_get(p, &blob, &length);
4128 if (error == 0 && blob != NULL) {
4129 *require_success = TRUE; /* fatal on the desktop when entitlements are present */
4130 return TRUE;
4131 }
4132
4133 *require_success = FALSE;
4134 return 0;
4135 }
4136
4137
4138 static int
4139 check_for_signature(proc_t p, struct image_params *imgp)
4140 {
4141 mach_port_t port = NULL;
4142 kern_return_t kr = KERN_FAILURE;
4143 int error = EACCES;
4144 boolean_t unexpected_failure = FALSE;
4145 unsigned char hash[SHA1_RESULTLEN];
4146 boolean_t require_success = FALSE;
4147
4148 /*
4149 * Override inherited code signing flags with the
4150 * ones for the process that is being successfully
4151 * loaded
4152 */
4153 proc_lock(p);
4154 p->p_csflags = imgp->ip_csflags;
4155 proc_unlock(p);
4156
4157 /* Set the switch_protect flag on the map */
4158 if(p->p_csflags & (CS_HARD|CS_KILL)) {
4159 vm_map_switch_protect(get_task_map(p->task), TRUE);
4160 }
4161
4162 /* check if callout to taskgated is needed */
4163 if (!taskgated_required(p, &require_success)) {
4164 error = 0;
4165 goto done;
4166 }
4167
4168 kr = task_get_task_access_port(p->task, &port);
4169 if (KERN_SUCCESS != kr || !IPC_PORT_VALID(port)) {
4170 error = 0;
4171 if (require_success)
4172 error = EACCES;
4173 goto done;
4174 }
4175
4176 /*
4177 * taskgated returns KERN_SUCCESS if it has completed its work
4178 * and the exec should continue, KERN_FAILURE if the exec should
4179 * fail, or it may error out with different error code in an
4180 * event of mig failure (e.g. process was signalled during the
4181 * rpc call, taskgated died, mig server died etc.).
4182 */
4183
4184 kr = find_code_signature(port, p->p_pid);
4185 switch (kr) {
4186 case KERN_SUCCESS:
4187 error = 0;
4188 break;
4189 case KERN_FAILURE:
4190 error = EACCES;
4191 goto done;
4192 default:
4193 error = EACCES;
4194 unexpected_failure = TRUE;
4195 goto done;
4196 }
4197
4198 /* Only do this if exec_resettextvp() did not fail */
4199 if (p->p_textvp != NULLVP) {
4200 /*
4201 * If there's a new code directory, mark this process
4202 * as signed.
4203 */
4204 if (0 == ubc_cs_getcdhash(p->p_textvp, p->p_textoff, hash)) {
4205 proc_lock(p);
4206 p->p_csflags |= CS_VALID;
4207 proc_unlock(p);
4208 }
4209 }
4210
4211 done:
4212 if (0 != error) {
4213 if (!unexpected_failure)
4214 p->p_csflags |= CS_KILLED;
4215 /* make very sure execution fails */
4216 psignal(p, SIGKILL);
4217 }
4218 return error;
4219 }
4220
4221 /*
4222 * Typically as soon as we start executing this process, the
4223 * first instruction will trigger a VM fault to bring the text
4224 * pages (as executable) into the address space, followed soon
4225 * thereafter by dyld data structures (for dynamic executable).
4226 * To optimize this, as well as improve support for hardware
4227 * debuggers that can only access resident pages present
4228 * in the process' page tables, we prefault some pages if
4229 * possible. Errors are non-fatal.
4230 */
4231 static void exec_prefault_data(proc_t p __unused, struct image_params *imgp, load_result_t *load_result)
4232 {
4233 int ret;
4234 size_t expected_all_image_infos_size;
4235
4236 /*
4237 * Prefault executable or dyld entry point.
4238 */
4239 vm_fault(current_map(),
4240 vm_map_trunc_page(load_result->entry_point,
4241 vm_map_page_mask(current_map())),
4242 VM_PROT_READ | VM_PROT_EXECUTE,
4243 FALSE,
4244 THREAD_UNINT, NULL, 0);
4245
4246 if (imgp->ip_flags & IMGPF_IS_64BIT) {
4247 expected_all_image_infos_size = sizeof(struct user64_dyld_all_image_infos);
4248 } else {
4249 expected_all_image_infos_size = sizeof(struct user32_dyld_all_image_infos);
4250 }
4251
4252 /* Decode dyld anchor structure from <mach-o/dyld_images.h> */
4253 if (load_result->dynlinker &&
4254 load_result->all_image_info_addr &&
4255 load_result->all_image_info_size >= expected_all_image_infos_size) {
4256 union {
4257 struct user64_dyld_all_image_infos infos64;
4258 struct user32_dyld_all_image_infos infos32;
4259 } all_image_infos;
4260
4261 /*
4262 * Pre-fault to avoid copyin() going through the trap handler
4263 * and recovery path.
4264 */
4265 vm_fault(current_map(),
4266 vm_map_trunc_page(load_result->all_image_info_addr,
4267 vm_map_page_mask(current_map())),
4268 VM_PROT_READ | VM_PROT_WRITE,
4269 FALSE,
4270 THREAD_UNINT, NULL, 0);
4271 if ((load_result->all_image_info_addr & PAGE_MASK) + expected_all_image_infos_size > PAGE_SIZE) {
4272 /* all_image_infos straddles a page */
4273 vm_fault(current_map(),
4274 vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size - 1,
4275 vm_map_page_mask(current_map())),
4276 VM_PROT_READ | VM_PROT_WRITE,
4277 FALSE,
4278 THREAD_UNINT, NULL, 0);
4279 }
4280
4281 ret = copyin(load_result->all_image_info_addr,
4282 &all_image_infos,
4283 expected_all_image_infos_size);
4284 if (ret == 0 && all_image_infos.infos32.version >= 9) {
4285
4286 user_addr_t notification_address;
4287 user_addr_t dyld_image_address;
4288 user_addr_t dyld_version_address;
4289 user_addr_t dyld_all_image_infos_address;
4290 user_addr_t dyld_slide_amount;
4291
4292 if (imgp->ip_flags & IMGPF_IS_64BIT) {
4293 notification_address = all_image_infos.infos64.notification;
4294 dyld_image_address = all_image_infos.infos64.dyldImageLoadAddress;
4295 dyld_version_address = all_image_infos.infos64.dyldVersion;
4296 dyld_all_image_infos_address = all_image_infos.infos64.dyldAllImageInfosAddress;
4297 } else {
4298 notification_address = all_image_infos.infos32.notification;
4299 dyld_image_address = all_image_infos.infos32.dyldImageLoadAddress;
4300 dyld_version_address = all_image_infos.infos32.dyldVersion;
4301 dyld_all_image_infos_address = all_image_infos.infos32.dyldAllImageInfosAddress;
4302 }
4303
4304 /*
4305 * dyld statically sets up the all_image_infos in its Mach-O
4306 * binary at static link time, with pointers relative to its default
4307 * load address. Since ASLR might slide dyld before its first
4308 * instruction is executed, "dyld_slide_amount" tells us how far
4309 * dyld was loaded compared to its default expected load address.
4310 * All other pointers into dyld's image should be adjusted by this
4311 * amount. At some point later, dyld will fix up pointers to take
4312 * into account the slide, at which point the all_image_infos_address
4313 * field in the structure will match the runtime load address, and
4314 * "dyld_slide_amount" will be 0, if we were to consult it again.
4315 */
4316
4317 dyld_slide_amount = load_result->all_image_info_addr - dyld_all_image_infos_address;
4318
4319 #if 0
4320 kprintf("exec_prefault: 0x%016llx 0x%08x 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
4321 (uint64_t)load_result->all_image_info_addr,
4322 all_image_infos.infos32.version,
4323 (uint64_t)notification_address,
4324 (uint64_t)dyld_image_address,
4325 (uint64_t)dyld_version_address,
4326 (uint64_t)dyld_all_image_infos_address);
4327 #endif
4328
4329 vm_fault(current_map(),
4330 vm_map_trunc_page(notification_address + dyld_slide_amount,
4331 vm_map_page_mask(current_map())),
4332 VM_PROT_READ | VM_PROT_EXECUTE,
4333 FALSE,
4334 THREAD_UNINT, NULL, 0);
4335 vm_fault(current_map(),
4336 vm_map_trunc_page(dyld_image_address + dyld_slide_amount,
4337 vm_map_page_mask(current_map())),
4338 VM_PROT_READ | VM_PROT_EXECUTE,
4339 FALSE,
4340 THREAD_UNINT, NULL, 0);
4341 vm_fault(current_map(),
4342 vm_map_trunc_page(dyld_version_address + dyld_slide_amount,
4343 vm_map_page_mask(current_map())),
4344 VM_PROT_READ,
4345 FALSE,
4346 THREAD_UNINT, NULL, 0);
4347 vm_fault(current_map(),
4348 vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount,
4349 vm_map_page_mask(current_map())),
4350 VM_PROT_READ | VM_PROT_WRITE,
4351 FALSE,
4352 THREAD_UNINT, NULL, 0);
4353 }
4354 }
4355 }