bsd/kern/kern_exec.c

   1 /*
   2  * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  29 /*
  30  * Mach Operating System
  31  * Copyright (c) 1987 Carnegie-Mellon University
  32  * All rights reserved.  The CMU software License Agreement specifies
  33  * the terms and conditions for use and redistribution.
  34  */
  35
  36 /*-
  37  * Copyright (c) 1982, 1986, 1991, 1993
  38  *      The Regents of the University of California.  All rights reserved.
  39  * (c) UNIX System Laboratories, Inc.
  40  * All or some portions of this file are derived from material licensed
  41  * to the University of California by American Telephone and Telegraph
  42  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  43  * the permission of UNIX System Laboratories, Inc.
  44  *
  45  * Redistribution and use in source and binary forms, with or without
  46  * modification, are permitted provided that the following conditions
  47  * are met:
  48  * 1. Redistributions of source code must retain the above copyright
  49  *    notice, this list of conditions and the following disclaimer.
  50  * 2. Redistributions in binary form must reproduce the above copyright
  51  *    notice, this list of conditions and the following disclaimer in the
  52  *    documentation and/or other materials provided with the distribution.
  53  * 3. All advertising materials mentioning features or use of this software
  54  *    must display the following acknowledgement:
  55  *      This product includes software developed by the University of
  56  *      California, Berkeley and its contributors.
  57  * 4. Neither the name of the University nor the names of its contributors
  58  *    may be used to endorse or promote products derived from this software
  59  *    without specific prior written permission.
  60  *
  61  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  62  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  63  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  64  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  65  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  66  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  67  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  68  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  69  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  70  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  71  * SUCH DAMAGE.
  72  *
  73  *      from: @(#)kern_exec.c   8.1 (Berkeley) 6/10/93
  74  */
  75 /*
  76  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  77  * support for mandatory and extensible security protections.  This notice
  78  * is included in support of clause 2.2 (b) of the Apple Public License,
  79  * Version 2.0.
  80  */
  81 #include <machine/reg.h>
  82 #include <machine/cpu_capabilities.h>
  83
  84 #include <sys/param.h>
  85 #include <sys/systm.h>
  86 #include <sys/filedesc.h>
  87 #include <sys/kernel.h>
  88 #include <sys/proc_internal.h>
  89 #include <sys/kauth.h>
  90 #include <sys/user.h>
  91 #include <sys/socketvar.h>
  92 #include <sys/malloc.h>
  93 #include <sys/namei.h>
  94 #include <sys/mount_internal.h>
  95 #include <sys/vnode_internal.h>
  96 #include <sys/file_internal.h>
  97 #include <sys/stat.h>
  98 #include <sys/uio_internal.h>
  99 #include <sys/acct.h>
 100 #include <sys/exec.h>
 101 #include <sys/kdebug.h>
 102 #include <sys/signal.h>
 103 #include <sys/aio_kern.h>
 104 #include <sys/sysproto.h>
 105 #if SYSV_SHM
 106 #include <sys/shm_internal.h>           /* shmexec() */
 107 #endif
 108 #include <sys/ubc_internal.h>           /* ubc_map() */
 109 #include <sys/spawn.h>
 110 #include <sys/spawn_internal.h>
 111 #include <sys/process_policy.h>
 112 #include <sys/codesign.h>
 113 #include <crypto/sha1.h>
 114
 115 #include <libkern/libkern.h>
 116
 117 #include <security/audit/audit.h>
 118
 119 #include <ipc/ipc_types.h>
 120
 121 #include <mach/mach_types.h>
 122 #include <mach/port.h>
 123 #include <mach/task.h>
 124 #include <mach/task_access.h>
 125 #include <mach/thread_act.h>
 126 #include <mach/vm_map.h>
 127 #include <mach/mach_vm.h>
 128 #include <mach/vm_param.h>
 129
 130 #include <kern/sched_prim.h> /* thread_wakeup() */
 131 #include <kern/affinity.h>
 132 #include <kern/assert.h>
 133 #include <kern/task.h>
 134 #include <kern/coalition.h>
 135
 136 #if CONFIG_MACF
 137 #include <security/mac.h>
 138 #include <security/mac_mach_internal.h>
 139 #endif
 140
 141 #include <vm/vm_map.h>
 142 #include <vm/vm_kern.h>
 143 #include <vm/vm_protos.h>
 144 #include <vm/vm_kern.h>
 145 #include <vm/vm_fault.h>
 146 #include <vm/vm_pageout.h>
 147
 148 #include <kdp/kdp_dyld.h>
 149
 150 #include <machine/pal_routines.h>
 151
 152 #include <pexpert/pexpert.h>
 153
 154 #if CONFIG_MEMORYSTATUS
 155 #include <sys/kern_memorystatus.h>
 156 #endif
 157
 158 #if CONFIG_DTRACE
 159 /* Do not include dtrace.h, it redefines kmem_[alloc/free] */
 160 extern void (*dtrace_fasttrap_exec_ptr)(proc_t);
 161 extern void (*dtrace_proc_waitfor_exec_ptr)(proc_t);
 162 extern void (*dtrace_helpers_cleanup)(proc_t);
 163 extern void dtrace_lazy_dofs_destroy(proc_t);
 164
 165 /*
 166  * Since dtrace_proc_waitfor_exec_ptr can be added/removed in dtrace_subr.c,
 167  * we will store its value before actually calling it.
 168  */
 169 static void (*dtrace_proc_waitfor_hook)(proc_t) = NULL;
 170
 171 #include <sys/dtrace_ptss.h>
 172 #endif
 173
 174 /* support for child creation in exec after vfork */
 175 thread_t fork_create_child(task_t parent_task, coalition_t parent_coalition, proc_t child_proc, int inherit_memory, int is64bit);
 176 void vfork_exit(proc_t p, int rv);
 177 int setsigvec(proc_t, thread_t, int, struct __kern_sigaction *, boolean_t in_sigstart);
 178 extern void proc_apply_task_networkbg_internal(proc_t, thread_t);
 179
 180 /*
 181  * Mach things for which prototypes are unavailable from Mach headers
 182  */
 183 void            ipc_task_reset(
 184                         task_t          task);
 185 void            ipc_thread_reset(
 186                         thread_t        thread);
 187 kern_return_t ipc_object_copyin(
 188         ipc_space_t             space,
 189         mach_port_name_t        name,
 190         mach_msg_type_name_t    msgt_name,
 191         ipc_object_t            *objectp);
 192 void ipc_port_release_send(ipc_port_t);
 193
 194 #if DEVELOPMENT || DEBUG
 195 void task_importance_update_owner_info(task_t);
 196 #endif
 197
 198 extern struct savearea *get_user_regs(thread_t);
 199
 200 __attribute__((noinline)) int __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid);
 201
 202 #include <kern/thread.h>
 203 #include <kern/task.h>
 204 #include <kern/ast.h>
 205 #include <kern/mach_loader.h>
 206 #include <kern/mach_fat.h>
 207 #include <mach-o/fat.h>
 208 #include <mach-o/loader.h>
 209 #include <machine/vmparam.h>
 210 #include <sys/imgact.h>
 211
 212 #include <sys/sdt.h>
 213
 214
 215 /*
 216  * EAI_ITERLIMIT        The maximum number of times to iterate an image
 217  *                      activator in exec_activate_image() before treating
 218  *                      it as malformed/corrupt.
 219  */
 220 #define EAI_ITERLIMIT           3
 221
 222 /*
 223  * For #! interpreter parsing
 224  */
 225 #define IS_WHITESPACE(ch) ((ch == ' ') || (ch == '\t'))
 226 #define IS_EOL(ch) ((ch == '#') || (ch == '\n'))
 227
 228 extern vm_map_t bsd_pageable_map;
 229 extern const struct fileops vnops;
 230
 231 #define ROUND_PTR(type, addr)   \
 232         (type *)( ( (uintptr_t)(addr) + 16 - 1) \
 233                   & ~(16 - 1) )
 234
 235 struct image_params;    /* Forward */
 236 static int exec_activate_image(struct image_params *imgp);
 237 static int exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp);
 238 static int load_return_to_errno(load_return_t lrtn);
 239 static int execargs_alloc(struct image_params *imgp);
 240 static int execargs_free(struct image_params *imgp);
 241 static int exec_check_permissions(struct image_params *imgp);
 242 static int exec_extract_strings(struct image_params *imgp);
 243 static int exec_add_apple_strings(struct image_params *imgp);
 244 static int exec_handle_sugid(struct image_params *imgp);
 245 static int sugid_scripts = 0;
 246 SYSCTL_INT (_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW | CTLFLAG_LOCKED, &sugid_scripts, 0, "");
 247 static kern_return_t create_unix_stack(vm_map_t map, load_result_t* load_result, proc_t p);
 248 static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size);
 249 static void exec_resettextvp(proc_t, struct image_params *);
 250 static int check_for_signature(proc_t, struct image_params *);
 251 static void exec_prefault_data(proc_t, struct image_params *, load_result_t *);
 252 static errno_t exec_handle_port_actions(struct image_params *imgp, short psa_flags, boolean_t * portwatch_present, ipc_port_t * portwatch_ports);
 253 static errno_t exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp,
 254                              ipc_port_t * portwatch_ports, int portwatch_count);
 255
 256 /*
 257  * exec_add_user_string
 258  *
 259  * Add the requested string to the string space area.
 260  *
 261  * Parameters;  struct image_params *           image parameter block
 262  *              user_addr_t                     string to add to strings area
 263  *              int                             segment from which string comes
 264  *              boolean_t                       TRUE if string contributes to NCARGS
 265  *
 266  * Returns:     0                       Success
 267  *              !0                      Failure errno from copyinstr()
 268  *
 269  * Implicit returns:
 270  *              (imgp->ip_strendp)      updated location of next add, if any
 271  *              (imgp->ip_strspace)     updated byte count of space remaining
 272  *              (imgp->ip_argspace) updated byte count of space in NCARGS
 273  */
 274 static int
 275 exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolean_t is_ncargs)
 276 {
 277         int error = 0;
 278
 279         do {
 280                 size_t len = 0;
 281                 int space;
 282
 283                 if (is_ncargs)
 284                         space = imgp->ip_argspace; /* by definition smaller than ip_strspace */
 285                 else
 286                         space = imgp->ip_strspace;
 287
 288                 if (space <= 0) {
 289                         error = E2BIG;
 290                         break;
 291                 }
 292
 293                 if (!UIO_SEG_IS_USER_SPACE(seg)) {
 294                         char *kstr = CAST_DOWN(char *,str);     /* SAFE */
 295                         error = copystr(kstr, imgp->ip_strendp, space, &len);
 296                 } else  {
 297                         error = copyinstr(str, imgp->ip_strendp, space, &len);
 298                 }
 299
 300                 imgp->ip_strendp += len;
 301                 imgp->ip_strspace -= len;
 302                 if (is_ncargs)
 303                         imgp->ip_argspace -= len;
 304
 305         } while (error == ENAMETOOLONG);
 306
 307         return error;
 308 }
 309
 310 /*
 311  * exec_save_path
 312  *
 313  * To support new app package launching for Mac OS X, the dyld needs the
 314  * first argument to execve() stored on the user stack.
 315  *
 316  * Save the executable path name at the bottom of the strings area and set
 317  * the argument vector pointer to the location following that to indicate
 318  * the start of the argument and environment tuples, setting the remaining
 319  * string space count to the size of the string area minus the path length.
 320  *
 321  * Parameters;  struct image_params *           image parameter block
 322  *              char *                          path used to invoke program
 323  *              int                             segment from which path comes
 324  *
 325  * Returns:     int                     0       Success
 326  *              EFAULT                          Bad address
 327  *      copy[in]str:EFAULT                      Bad address
 328  *      copy[in]str:ENAMETOOLONG                Filename too long
 329  *
 330  * Implicit returns:
 331  *              (imgp->ip_strings)              saved path
 332  *              (imgp->ip_strspace)             space remaining in ip_strings
 333  *              (imgp->ip_strendp)              start of remaining copy area
 334  *              (imgp->ip_argspace)             space remaining of NCARGS
 335  *              (imgp->ip_applec)               Initial applev[0]
 336  *
 337  * Note:        We have to do this before the initial namei() since in the
 338  *              path contains symbolic links, namei() will overwrite the
 339  *              original path buffer contents.  If the last symbolic link
 340  *              resolved was a relative pathname, we would lose the original
 341  *              "path", which could be an absolute pathname. This might be
 342  *              unacceptable for dyld.
 343  */
 344 static int
 345 exec_save_path(struct image_params *imgp, user_addr_t path, int seg)
 346 {
 347         int error;
 348         size_t  len;
 349         char *kpath;
 350
 351         len = MIN(MAXPATHLEN, imgp->ip_strspace);
 352
 353         switch(seg) {
 354         case UIO_USERSPACE32:
 355         case UIO_USERSPACE64:   /* Same for copyin()... */
 356                 error = copyinstr(path, imgp->ip_strings, len, &len);
 357                 break;
 358         case UIO_SYSSPACE:
 359                 kpath = CAST_DOWN(char *,path); /* SAFE */
 360                 error = copystr(kpath, imgp->ip_strings, len, &len);
 361                 break;
 362         default:
 363                 error = EFAULT;
 364                 break;
 365         }
 366
 367         if (!error) {
 368                 imgp->ip_strendp += len;
 369                 imgp->ip_strspace -= len;
 370         }
 371
 372         return(error);
 373 }
 374
 375 /*
 376  * exec_reset_save_path
 377  *
 378  * If we detect a shell script, we need to reset the string area
 379  * state so that the interpreter can be saved onto the stack.
 380
 381  * Parameters;  struct image_params *           image parameter block
 382  *
 383  * Returns:     int                     0       Success
 384  *
 385  * Implicit returns:
 386  *              (imgp->ip_strings)              saved path
 387  *              (imgp->ip_strspace)             space remaining in ip_strings
 388  *              (imgp->ip_strendp)              start of remaining copy area
 389  *              (imgp->ip_argspace)             space remaining of NCARGS
 390  *
 391  */
 392 static int
 393 exec_reset_save_path(struct image_params *imgp)
 394 {
 395         imgp->ip_strendp = imgp->ip_strings;
 396         imgp->ip_argspace = NCARGS;
 397         imgp->ip_strspace = ( NCARGS + PAGE_SIZE );
 398
 399         return (0);
 400 }
 401
 402 /*
 403  * exec_shell_imgact
 404  *
 405  * Image activator for interpreter scripts.  If the image begins with
 406  * the characters "#!", then it is an interpreter script.  Verify the
 407  * length of the script line indicating the interpreter is not in
 408  * excess of the maximum allowed size.  If this is the case, then
 409  * break out the arguments, if any, which are separated by white
 410  * space, and copy them into the argument save area as if they were
 411  * provided on the command line before all other arguments.  The line
 412  * ends when we encounter a comment character ('#') or newline.
 413  *
 414  * Parameters;  struct image_params *   image parameter block
 415  *
 416  * Returns:     -1                      not an interpreter (keep looking)
 417  *              -3                      Success: interpreter: relookup
 418  *              >0                      Failure: interpreter: error number
 419  *
 420  * A return value other than -1 indicates subsequent image activators should
 421  * not be given the opportunity to attempt to activate the image.
 422  */
 423 static int
 424 exec_shell_imgact(struct image_params *imgp)
 425 {
 426         char *vdata = imgp->ip_vdata;
 427         char *ihp;
 428         char *line_startp, *line_endp;
 429         char *interp;
 430         proc_t p;
 431         struct fileproc *fp;
 432         int fd;
 433         int error;
 434
 435         /*
 436          * Make sure it's a shell script.  If we've already redirected
 437          * from an interpreted file once, don't do it again.
 438          */
 439         if (vdata[0] != '#' ||
 440             vdata[1] != '!' ||
 441             (imgp->ip_flags & IMGPF_INTERPRET) != 0) {
 442                 return (-1);
 443         }
 444
 445         if (imgp->ip_origcputype != 0) {
 446                 /* Fat header previously matched, don't allow shell script inside */
 447                 return (-1);
 448         }
 449
 450         imgp->ip_flags |= IMGPF_INTERPRET;
 451         imgp->ip_interp_sugid_fd = -1;
 452         imgp->ip_interp_buffer[0] = '\0';
 453
 454         /* Check to see if SUGID scripts are permitted.  If they aren't then
 455          * clear the SUGID bits.
 456          * imgp->ip_vattr is known to be valid.
 457          */
 458         if (sugid_scripts == 0) {
 459                 imgp->ip_origvattr->va_mode &= ~(VSUID | VSGID);
 460         }
 461
 462         /* Try to find the first non-whitespace character */
 463         for( ihp = &vdata[2]; ihp < &vdata[IMG_SHSIZE]; ihp++ ) {
 464                 if (IS_EOL(*ihp)) {
 465                         /* Did not find interpreter, "#!\n" */
 466                         return (ENOEXEC);
 467                 } else if (IS_WHITESPACE(*ihp)) {
 468                         /* Whitespace, like "#!    /bin/sh\n", keep going. */
 469                 } else {
 470                         /* Found start of interpreter */
 471                         break;
 472                 }
 473         }
 474
 475         if (ihp == &vdata[IMG_SHSIZE]) {
 476                 /* All whitespace, like "#!           " */
 477                 return (ENOEXEC);
 478         }
 479
 480         line_startp = ihp;
 481
 482         /* Try to find the end of the interpreter+args string */
 483         for ( ; ihp < &vdata[IMG_SHSIZE]; ihp++ ) {
 484                 if (IS_EOL(*ihp)) {
 485                         /* Got it */
 486                         break;
 487                 } else {
 488                         /* Still part of interpreter or args */
 489                 }
 490         }
 491
 492         if (ihp == &vdata[IMG_SHSIZE]) {
 493                 /* A long line, like "#! blah blah blah" without end */
 494                 return (ENOEXEC);
 495         }
 496
 497         /* Backtrack until we find the last non-whitespace */
 498         while (IS_EOL(*ihp) || IS_WHITESPACE(*ihp)) {
 499                 ihp--;
 500         }
 501
 502         /* The character after the last non-whitespace is our logical end of line */
 503         line_endp = ihp + 1;
 504
 505         /*
 506          * Now we have pointers to the usable part of:
 507          *
 508          * "#!  /usr/bin/int first    second   third    \n"
 509          *      ^ line_startp                       ^ line_endp
 510          */
 511
 512         /* copy the interpreter name */
 513         interp = imgp->ip_interp_buffer;
 514         for ( ihp = line_startp; (ihp < line_endp) && !IS_WHITESPACE(*ihp); ihp++)
 515                 *interp++ = *ihp;
 516         *interp = '\0';
 517
 518         exec_reset_save_path(imgp);
 519         exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer),
 520                                                         UIO_SYSSPACE);
 521
 522         /* Copy the entire interpreter + args for later processing into argv[] */
 523         interp = imgp->ip_interp_buffer;
 524         for ( ihp = line_startp; (ihp < line_endp); ihp++)
 525                 *interp++ = *ihp;
 526         *interp = '\0';
 527
 528         /*
 529          * If we have a SUID oder SGID script, create a file descriptor
 530          * from the vnode and pass /dev/fd/%d instead of the actual
 531          * path name so that the script does not get opened twice
 532          */
 533         if (imgp->ip_origvattr->va_mode & (VSUID | VSGID)) {
 534                 p = vfs_context_proc(imgp->ip_vfs_context);
 535                 error = falloc(p, &fp, &fd, imgp->ip_vfs_context);
 536                 if (error)
 537                         return(error);
 538
 539                 fp->f_fglob->fg_flag = FREAD;
 540                 fp->f_fglob->fg_ops = &vnops;
 541                 fp->f_fglob->fg_data = (caddr_t)imgp->ip_vp;
 542
 543                 proc_fdlock(p);
 544                 procfdtbl_releasefd(p, fd, NULL);
 545                 fp_drop(p, fd, fp, 1);
 546                 proc_fdunlock(p);
 547                 vnode_ref(imgp->ip_vp);
 548
 549                 imgp->ip_interp_sugid_fd = fd;
 550         }
 551
 552         return (-3);
 553 }
 554
 555
 556
 557 /*
 558  * exec_fat_imgact
 559  *
 560  * Image activator for fat 1.0 binaries.  If the binary is fat, then we
 561  * need to select an image from it internally, and make that the image
 562  * we are going to attempt to execute.  At present, this consists of
 563  * reloading the first page for the image with a first page from the
 564  * offset location indicated by the fat header.
 565  *
 566  * Parameters;  struct image_params *   image parameter block
 567  *
 568  * Returns:     -1                      not a fat binary (keep looking)
 569  *              -2                      Success: encapsulated binary: reread
 570  *              >0                      Failure: error number
 571  *
 572  * Important:   This image activator is byte order neutral.
 573  *
 574  * Note:        A return value other than -1 indicates subsequent image
 575  *              activators should not be given the opportunity to attempt
 576  *              to activate the image.
 577  *
 578  *              If we find an encapsulated binary, we make no assertions
 579  *              about its  validity; instead, we leave that up to a rescan
 580  *              for an activator to claim it, and, if it is claimed by one,
 581  *              that activator is responsible for determining validity.
 582  */
 583 static int
 584 exec_fat_imgact(struct image_params *imgp)
 585 {
 586         proc_t p = vfs_context_proc(imgp->ip_vfs_context);
 587         kauth_cred_t cred = kauth_cred_proc_ref(p);
 588         struct fat_header *fat_header = (struct fat_header *)imgp->ip_vdata;
 589         struct _posix_spawnattr *psa = NULL;
 590         struct fat_arch fat_arch;
 591         int resid, error;
 592         load_return_t lret;
 593
 594         if (imgp->ip_origcputype != 0) {
 595                 /* Fat header previously matched, don't allow another fat file inside */
 596                 return (-1);
 597         }
 598
 599         /* Make sure it's a fat binary */
 600         if (OSSwapBigToHostInt32(fat_header->magic) != FAT_MAGIC) {
 601                 error = -1; /* not claimed */
 602                 goto bad;
 603         }
 604
 605         /* imgp->ip_vdata has PAGE_SIZE, zerofilled if the file is smaller */
 606         lret = fatfile_validate_fatarches((vm_offset_t)fat_header, PAGE_SIZE);
 607         if (lret != LOAD_SUCCESS) {
 608                 error = load_return_to_errno(lret);
 609                 goto bad;
 610         }
 611
 612         /* If posix_spawn binprefs exist, respect those prefs. */
 613         psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
 614         if (psa != NULL && psa->psa_binprefs[0] != 0) {
 615                 uint32_t pr = 0;
 616
 617                 /* Check each preference listed against all arches in header */
 618                 for (pr = 0; pr < NBINPREFS; pr++) {
 619                         cpu_type_t pref = psa->psa_binprefs[pr];
 620                         if (pref == 0) {
 621                                 /* No suitable arch in the pref list */
 622                                 error = EBADARCH;
 623                                 goto bad;
 624                         }
 625
 626                         if (pref == CPU_TYPE_ANY) {
 627                                 /* Fall through to regular grading */
 628                                 goto regular_grading;
 629                         }
 630
 631                         lret = fatfile_getbestarch_for_cputype(pref,
 632                                                         (vm_offset_t)fat_header,
 633                                                         PAGE_SIZE,
 634                                                         &fat_arch);
 635                         if (lret == LOAD_SUCCESS) {
 636                                 goto use_arch;
 637                         }
 638                 }
 639
 640                 /* Requested binary preference was not honored */
 641                 error = EBADEXEC;
 642                 goto bad;
 643         }
 644
 645 regular_grading:
 646         /* Look up our preferred architecture in the fat file. */
 647         lret = fatfile_getbestarch((vm_offset_t)fat_header,
 648                                 PAGE_SIZE,
 649                                 &fat_arch);
 650         if (lret != LOAD_SUCCESS) {
 651                 error = load_return_to_errno(lret);
 652                 goto bad;
 653         }
 654
 655 use_arch:
 656         /* Read the Mach-O header out of fat_arch */
 657         error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata,
 658                         PAGE_SIZE, fat_arch.offset,
 659                         UIO_SYSSPACE, (IO_UNIT|IO_NODELOCKED),
 660                         cred, &resid, p);
 661         if (error) {
 662                 goto bad;
 663         }
 664
 665         if (resid) {
 666                 memset(imgp->ip_vdata + (PAGE_SIZE - resid), 0x0, resid);
 667         }
 668
 669         /* Success.  Indicate we have identified an encapsulated binary */
 670         error = -2;
 671         imgp->ip_arch_offset = (user_size_t)fat_arch.offset;
 672         imgp->ip_arch_size = (user_size_t)fat_arch.size;
 673         imgp->ip_origcputype = fat_arch.cputype;
 674         imgp->ip_origcpusubtype = fat_arch.cpusubtype;
 675
 676 bad:
 677         kauth_cred_unref(&cred);
 678         return (error);
 679 }
 680
 681 /*
 682  * exec_mach_imgact
 683  *
 684  * Image activator for mach-o 1.0 binaries.
 685  *
 686  * Parameters;  struct image_params *   image parameter block
 687  *
 688  * Returns:     -1                      not a fat binary (keep looking)
 689  *              -2                      Success: encapsulated binary: reread
 690  *              >0                      Failure: error number
 691  *              EBADARCH                Mach-o binary, but with an unrecognized
 692  *                                      architecture
 693  *              ENOMEM                  No memory for child process after -
 694  *                                      can only happen after vfork()
 695  *
 696  * Important:   This image activator is NOT byte order neutral.
 697  *
 698  * Note:        A return value other than -1 indicates subsequent image
 699  *              activators should not be given the opportunity to attempt
 700  *              to activate the image.
 701  *
 702  * TODO:        More gracefully handle failures after vfork
 703  */
 704 static int
 705 exec_mach_imgact(struct image_params *imgp)
 706 {
 707         struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
 708         proc_t                  p = vfs_context_proc(imgp->ip_vfs_context);
 709         int                     error = 0;
 710         task_t                  task;
 711         task_t                  new_task = NULL; /* protected by vfexec */
 712         thread_t                thread;
 713         struct uthread          *uthread;
 714         vm_map_t old_map = VM_MAP_NULL;
 715         vm_map_t map;
 716         load_return_t           lret;
 717         load_result_t           load_result;
 718         struct _posix_spawnattr *psa = NULL;
 719         int                     spawn = (imgp->ip_flags & IMGPF_SPAWN);
 720         int                     vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
 721
 722         /*
 723          * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
 724          * is a reserved field on the end, so for the most part, we can
 725          * treat them as if they were identical. Reverse-endian Mach-O
 726          * binaries are recognized but not compatible.
 727          */
 728         if ((mach_header->magic == MH_CIGAM) ||
 729             (mach_header->magic == MH_CIGAM_64)) {
 730                 error = EBADARCH;
 731                 goto bad;
 732         }
 733
 734         if ((mach_header->magic != MH_MAGIC) &&
 735             (mach_header->magic != MH_MAGIC_64)) {
 736                 error = -1;
 737                 goto bad;
 738         }
 739
 740         if (mach_header->filetype != MH_EXECUTE) {
 741                 error = -1;
 742                 goto bad;
 743         }
 744
 745         if (imgp->ip_origcputype != 0) {
 746                 /* Fat header previously had an idea about this thin file */
 747                 if (imgp->ip_origcputype != mach_header->cputype ||
 748                         imgp->ip_origcpusubtype != mach_header->cpusubtype) {
 749                         error = EBADARCH;
 750                         goto bad;
 751                 }
 752         } else {
 753                 imgp->ip_origcputype = mach_header->cputype;
 754                 imgp->ip_origcpusubtype = mach_header->cpusubtype;
 755         }
 756
 757         task = current_task();
 758         thread = current_thread();
 759         uthread = get_bsdthread_info(thread);
 760
 761         if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64)
 762                 imgp->ip_flags |= IMGPF_IS_64BIT;
 763
 764         /* If posix_spawn binprefs exist, respect those prefs. */
 765         psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
 766         if (psa != NULL && psa->psa_binprefs[0] != 0) {
 767                 int pr = 0;
 768                 for (pr = 0; pr < NBINPREFS; pr++) {
 769                         cpu_type_t pref = psa->psa_binprefs[pr];
 770                         if (pref == 0) {
 771                                 /* No suitable arch in the pref list */
 772                                 error = EBADARCH;
 773                                 goto bad;
 774                         }
 775
 776                         if (pref == CPU_TYPE_ANY) {
 777                                 /* Jump to regular grading */
 778                                 goto grade;
 779                         }
 780
 781                         if (pref == imgp->ip_origcputype) {
 782                                 /* We have a match! */
 783                                 goto grade;
 784                         }
 785                 }
 786                 error = EBADARCH;
 787                 goto bad;
 788         }
 789 grade:
 790         if (!grade_binary(imgp->ip_origcputype, imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK)) {
 791                 error = EBADARCH;
 792                 goto bad;
 793         }
 794
 795         /* Copy in arguments/environment from the old process */
 796         error = exec_extract_strings(imgp);
 797         if (error)
 798                 goto bad;
 799
 800         error = exec_add_apple_strings(imgp);
 801         if (error)
 802                 goto bad;
 803
 804         AUDIT_ARG(argv, imgp->ip_startargv, imgp->ip_argc,
 805             imgp->ip_endargv - imgp->ip_startargv);
 806         AUDIT_ARG(envv, imgp->ip_endargv, imgp->ip_envc,
 807             imgp->ip_endenvv - imgp->ip_endargv);
 808
 809         /*
 810          * We are being called to activate an image subsequent to a vfork()
 811          * operation; in this case, we know that our task, thread, and
 812          * uthread are actually those of our parent, and our proc, which we
 813          * obtained indirectly from the image_params vfs_context_t, is the
 814          * new child process.
 815          */
 816         if (vfexec || spawn) {
 817                 if (vfexec) {
 818                         imgp->ip_new_thread = fork_create_child(task, COALITION_NULL, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT));
 819                         if (imgp->ip_new_thread == NULL) {
 820                                 error = ENOMEM;
 821                                 goto bad;
 822                         }
 823                 }
 824
 825                 /* reset local idea of thread, uthread, task */
 826                 thread = imgp->ip_new_thread;
 827                 uthread = get_bsdthread_info(thread);
 828                 task = new_task = get_threadtask(thread);
 829                 map = get_task_map(task);
 830         } else {
 831                 map = VM_MAP_NULL;
 832         }
 833
 834         /*
 835          * We set these flags here; this is OK, since if we fail after
 836          * this point, we have already destroyed the parent process anyway.
 837          */
 838         task_set_dyld_info(task, MACH_VM_MIN_ADDRESS, 0);
 839         if (imgp->ip_flags & IMGPF_IS_64BIT) {
 840                 task_set_64bit(task, TRUE);
 841                 OSBitOrAtomic(P_LP64, &p->p_flag);
 842         } else {
 843                 task_set_64bit(task, FALSE);
 844                 OSBitAndAtomic(~((uint32_t)P_LP64), &p->p_flag);
 845         }
 846
 847         /*
 848          *      Load the Mach-O file.
 849          *
 850          * NOTE: An error after this point  indicates we have potentially
 851          * destroyed or overwritten some process state while attempting an
 852          * execve() following a vfork(), which is an unrecoverable condition.
 853          * We send the new process an immediate SIGKILL to avoid it executing
 854          * any instructions in the mutated address space. For true spawns,
 855          * this is not the case, and "too late" is still not too late to
 856          * return an error code to the parent process.
 857          */
 858
 859         /*
 860          * Actually load the image file we previously decided to load.
 861          */
 862         lret = load_machfile(imgp, mach_header, thread, map, &load_result);
 863
 864         if (lret != LOAD_SUCCESS) {
 865                 error = load_return_to_errno(lret);
 866                 goto badtoolate;
 867         }
 868
 869         proc_lock(p);
 870         p->p_cputype = imgp->ip_origcputype;
 871         p->p_cpusubtype = imgp->ip_origcpusubtype;
 872         proc_unlock(p);
 873
 874         vm_map_set_user_wire_limit(get_task_map(task), p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
 875
 876         /*
 877          * Set code-signing flags if this binary is signed, or if parent has
 878          * requested them on exec.
 879          */
 880         if (load_result.csflags & CS_VALID) {
 881                 imgp->ip_csflags |= load_result.csflags &
 882                         (CS_VALID|
 883                          CS_HARD|CS_KILL|CS_ENFORCEMENT|CS_REQUIRE_LV|CS_DYLD_PLATFORM|
 884                          CS_EXEC_SET_HARD|CS_EXEC_SET_KILL|CS_EXEC_SET_ENFORCEMENT);
 885         } else {
 886                 imgp->ip_csflags &= ~CS_VALID;
 887         }
 888
 889         if (p->p_csflags & CS_EXEC_SET_HARD)
 890                 imgp->ip_csflags |= CS_HARD;
 891         if (p->p_csflags & CS_EXEC_SET_KILL)
 892                 imgp->ip_csflags |= CS_KILL;
 893         if (p->p_csflags & CS_EXEC_SET_ENFORCEMENT)
 894                 imgp->ip_csflags |= CS_ENFORCEMENT;
 895         if (p->p_csflags & CS_EXEC_SET_INSTALLER)
 896                 imgp->ip_csflags |= CS_INSTALLER;
 897
 898
 899         /*
 900          * Set up the system reserved areas in the new address space.
 901          */
 902         vm_map_exec(get_task_map(task),
 903                     task,
 904                     (void *) p->p_fd->fd_rdir,
 905                     cpu_type());
 906
 907         /*
 908          * Close file descriptors which specify close-on-exec.
 909          */
 910         fdexec(p, psa != NULL ? psa->psa_flags : 0);
 911
 912         /*
 913          * deal with set[ug]id.
 914          */
 915         error = exec_handle_sugid(imgp);
 916         if (error) {
 917                 goto badtoolate;
 918         }
 919
 920         /*
 921          * deal with voucher on exec-calling thread.
 922          */
 923         if (imgp->ip_new_thread == NULL)
 924                 thread_set_mach_voucher(current_thread(), IPC_VOUCHER_NULL);
 925
 926         /* Make sure we won't interrupt ourself signalling a partial process */
 927         if (!vfexec && !spawn && (p->p_lflag & P_LTRACED))
 928                 psignal(p, SIGTRAP);
 929
 930         if (load_result.unixproc &&
 931                 create_unix_stack(get_task_map(task),
 932                                   &load_result,
 933                                   p) != KERN_SUCCESS) {
 934                 error = load_return_to_errno(LOAD_NOSPACE);
 935                 goto badtoolate;
 936         }
 937
 938         if (vfexec || spawn) {
 939                 old_map = vm_map_switch(get_task_map(task));
 940         }
 941
 942         if (load_result.unixproc) {
 943                 user_addr_t     ap;
 944
 945                 /*
 946                  * Copy the strings area out into the new process address
 947                  * space.
 948                  */
 949                 ap = p->user_stack;
 950                 error = exec_copyout_strings(imgp, &ap);
 951                 if (error) {
 952                         if (vfexec || spawn)
 953                                 vm_map_switch(old_map);
 954                         goto badtoolate;
 955                 }
 956                 /* Set the stack */
 957                 thread_setuserstack(thread, ap);
 958         }
 959
 960         if (load_result.dynlinker) {
 961                 uint64_t        ap;
 962                 int                     new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
 963
 964                 /* Adjust the stack */
 965                 ap = thread_adjuserstack(thread, -new_ptr_size);
 966                 error = copyoutptr(load_result.mach_header, ap, new_ptr_size);
 967
 968                 if (error) {
 969                         if (vfexec || spawn)
 970                                 vm_map_switch(old_map);
 971                         goto badtoolate;
 972                 }
 973                 task_set_dyld_info(task, load_result.all_image_info_addr,
 974                     load_result.all_image_info_size);
 975         }
 976
 977         /* Avoid immediate VM faults back into kernel */
 978         exec_prefault_data(p, imgp, &load_result);
 979
 980         if (vfexec || spawn) {
 981                 vm_map_switch(old_map);
 982         }
 983         /* Set the entry point */
 984         thread_setentrypoint(thread, load_result.entry_point);
 985
 986         /* Stop profiling */
 987         stopprofclock(p);
 988
 989         /*
 990          * Reset signal state.
 991          */
 992         execsigs(p, thread);
 993
 994         /*
 995          * need to cancel async IO requests that can be cancelled and wait for those
 996          * already active.  MAY BLOCK!
 997          */
 998         _aio_exec( p );
 999
1000 #if SYSV_SHM
1001         /* FIXME: Till vmspace inherit is fixed: */
1002         if (!vfexec && p->vm_shm)
1003                 shmexec(p);
1004 #endif
1005 #if SYSV_SEM
1006         /* Clean up the semaphores */
1007         semexit(p);
1008 #endif
1009
1010         /*
1011          * Remember file name for accounting.
1012          */
1013         p->p_acflag &= ~AFORK;
1014         /* If the translated name isn't NULL, then we want to use
1015          * that translated name as the name we show as the "real" name.
1016          * Otherwise, use the name passed into exec.
1017          */
1018         if (0 != imgp->ip_p_comm[0]) {
1019                 bcopy((caddr_t)imgp->ip_p_comm, (caddr_t)p->p_comm,
1020                         sizeof(p->p_comm));
1021         } else {
1022                 if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN)
1023                         imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
1024                 bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
1025                         (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
1026                 p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
1027         }
1028
1029         pal_dbg_set_task_name( p->task );
1030
1031 #if DEVELOPMENT || DEBUG
1032         /*
1033          * Update the pid an proc name for importance base if any
1034          */
1035         task_importance_update_owner_info(p->task);
1036 #endif
1037
1038         memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid));
1039
1040 // <rdar://6598155> dtrace code cleanup needed
1041 #if CONFIG_DTRACE
1042         /*
1043          * Invalidate any predicate evaluation already cached for this thread by DTrace.
1044          * That's because we've just stored to p_comm and DTrace refers to that when it
1045          * evaluates the "execname" special variable. uid and gid may have changed as well.
1046          */
1047         dtrace_set_thread_predcache(current_thread(), 0);
1048
1049         /*
1050          * Free any outstanding lazy dof entries. It is imperative we
1051          * always call dtrace_lazy_dofs_destroy, rather than null check
1052          * and call if !NULL. If we NULL test, during lazy dof faulting
1053          * we can race with the faulting code and proceed from here to
1054          * beyond the helpers cleanup. The lazy dof faulting will then
1055          * install new helpers which no longer belong to this process!
1056          */
1057         dtrace_lazy_dofs_destroy(p);
1058
1059
1060         /*
1061          * Clean up any DTrace helpers for the process.
1062          */
1063         if (p->p_dtrace_helpers != NULL && dtrace_helpers_cleanup) {
1064                 (*dtrace_helpers_cleanup)(p);
1065         }
1066
1067         /*
1068          * Cleanup the DTrace provider associated with this process.
1069          */
1070         proc_lock(p);
1071         if (p->p_dtrace_probes && dtrace_fasttrap_exec_ptr) {
1072                 (*dtrace_fasttrap_exec_ptr)(p);
1073         }
1074         proc_unlock(p);
1075 #endif
1076
1077         if (kdebug_enable) {
1078                 long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
1079
1080                 /*
1081                  * Collect the pathname for tracing
1082                  */
1083                 kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
1084
1085                 if (vfexec || spawn) {
1086                         KERNEL_DEBUG_CONSTANT1(TRACE_DATA_EXEC | DBG_FUNC_NONE,
1087                                         p->p_pid ,0,0,0, (uintptr_t)thread_tid(thread));
1088                         KERNEL_DEBUG_CONSTANT1(TRACE_STRING_EXEC | DBG_FUNC_NONE,
1089                                         dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (uintptr_t)thread_tid(thread));
1090                 } else {
1091                         KERNEL_DEBUG_CONSTANT(TRACE_DATA_EXEC | DBG_FUNC_NONE,
1092                                         p->p_pid ,0,0,0,0);
1093                         KERNEL_DEBUG_CONSTANT(TRACE_STRING_EXEC | DBG_FUNC_NONE,
1094                                         dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
1095                 }
1096         }
1097
1098         /*
1099          * Ensure the 'translated' and 'affinity' flags are cleared, since we
1100          * no longer run PowerPC binaries.
1101          */
1102         OSBitAndAtomic(~((uint32_t)(P_TRANSLATED | P_AFFINITY)), &p->p_flag);
1103
1104         /*
1105          * If posix_spawned with the START_SUSPENDED flag, stop the
1106          * process before it runs.
1107          */
1108         if (imgp->ip_px_sa != NULL) {
1109                 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
1110                 if (psa->psa_flags & POSIX_SPAWN_START_SUSPENDED) {
1111                         proc_lock(p);
1112                         p->p_stat = SSTOP;
1113                         proc_unlock(p);
1114                         (void) task_suspend(p->task);
1115                 }
1116         }
1117
1118         /*
1119          * mark as execed, wakeup the process that vforked (if any) and tell
1120          * it that it now has its own resources back
1121          */
1122         OSBitOrAtomic(P_EXEC, &p->p_flag);
1123         proc_resetregister(p);
1124         if (p->p_pptr && (p->p_lflag & P_LPPWAIT)) {
1125                 proc_lock(p);
1126                 p->p_lflag &= ~P_LPPWAIT;
1127                 proc_unlock(p);
1128                 wakeup((caddr_t)p->p_pptr);
1129         }
1130
1131         /*
1132          * Pay for our earlier safety; deliver the delayed signals from
1133          * the incomplete vfexec process now that it's complete.
1134          */
1135         if (vfexec && (p->p_lflag & P_LTRACED)) {
1136                 psignal_vfork(p, new_task, thread, SIGTRAP);
1137         }
1138
1139         goto done;
1140
1141 badtoolate:
1142         /* Don't allow child process to execute any instructions */
1143         if (!spawn) {
1144                 if (vfexec) {
1145                         psignal_vfork(p, new_task, thread, SIGKILL);
1146                 } else {
1147                         psignal(p, SIGKILL);
1148                 }
1149
1150                 /* We can't stop this system call at this point, so just pretend we succeeded */
1151                 error = 0;
1152         }
1153
1154 done:
1155         if (!spawn) {
1156                 /* notify only if it has not failed due to FP Key error */
1157                 if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)
1158                         proc_knote(p, NOTE_EXEC);
1159         }
1160
1161         /* Drop extra references for cases where we don't expect the caller to clean up */
1162         if (vfexec || (spawn && error == 0)) {
1163                 task_deallocate(new_task);
1164                 thread_deallocate(thread);
1165         }
1166
1167 bad:
1168         return(error);
1169 }
1170
1171
1172
1173
1174 /*
1175  * Our image activator table; this is the table of the image types we are
1176  * capable of loading.  We list them in order of preference to ensure the
1177  * fastest image load speed.
1178  *
1179  * XXX hardcoded, for now; should use linker sets
1180  */
1181 struct execsw {
1182         int (*ex_imgact)(struct image_params *);
1183         const char *ex_name;
1184 } execsw[] = {
1185         { exec_mach_imgact,             "Mach-o Binary" },
1186         { exec_fat_imgact,              "Fat Binary" },
1187         { exec_shell_imgact,            "Interpreter Script" },
1188         { NULL, NULL}
1189 };
1190
1191
1192 /*
1193  * exec_activate_image
1194  *
1195  * Description: Iterate through the available image activators, and activate
1196  *              the image associated with the imgp structure.  We start with
1197  *              the
1198  *
1199  * Parameters:  struct image_params *   Image parameter block
1200  *
1201  * Returns:     0                       Success
1202  *              EBADEXEC                The executable is corrupt/unknown
1203  *      execargs_alloc:EINVAL           Invalid argument
1204  *      execargs_alloc:EACCES           Permission denied
1205  *      execargs_alloc:EINTR            Interrupted function
1206  *      execargs_alloc:ENOMEM           Not enough space
1207  *      exec_save_path:EFAULT           Bad address
1208  *      exec_save_path:ENAMETOOLONG     Filename too long
1209  *      exec_check_permissions:EACCES   Permission denied
1210  *      exec_check_permissions:ENOEXEC  Executable file format error
1211  *      exec_check_permissions:ETXTBSY  Text file busy [misuse of error code]
1212  *      exec_check_permissions:???
1213  *      namei:???
1214  *      vn_rdwr:???                     [anything vn_rdwr can return]
1215  *      <ex_imgact>:???                 [anything an imgact can return]
1216  */
1217 static int
1218 exec_activate_image(struct image_params *imgp)
1219 {
1220         struct nameidata *ndp = NULL;
1221         int error;
1222         int resid;
1223         int once = 1;   /* save SGUID-ness for interpreted files */
1224         int i;
1225         int itercount = 0;
1226         proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1227
1228         error = execargs_alloc(imgp);
1229         if (error)
1230                 goto bad_notrans;
1231
1232         error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg);
1233         if (error) {
1234                 goto bad_notrans;
1235         }
1236
1237         /* Use imgp->ip_strings, which contains the copyin-ed exec path */
1238         DTRACE_PROC1(exec, uintptr_t, imgp->ip_strings);
1239
1240         MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
1241         if (ndp == NULL) {
1242                 error = ENOMEM;
1243                 goto bad_notrans;
1244         }
1245
1246         NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
1247                    UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context);
1248
1249 again:
1250         error = namei(ndp);
1251         if (error)
1252                 goto bad_notrans;
1253         imgp->ip_ndp = ndp;     /* successful namei(); call nameidone() later */
1254         imgp->ip_vp = ndp->ni_vp;       /* if set, need to vnode_put() at some point */
1255
1256         /*
1257          * Before we start the transition from binary A to binary B, make
1258          * sure another thread hasn't started exiting the process.  We grab
1259          * the proc lock to check p_lflag initially, and the transition
1260          * mechanism ensures that the value doesn't change after we release
1261          * the lock.
1262          */
1263         proc_lock(p);
1264         if (p->p_lflag & P_LEXIT) {
1265                 proc_unlock(p);
1266                 goto bad_notrans;
1267         }
1268         error = proc_transstart(p, 1, 0);
1269         proc_unlock(p);
1270         if (error)
1271                 goto bad_notrans;
1272
1273         error = exec_check_permissions(imgp);
1274         if (error)
1275                 goto bad;
1276
1277         /* Copy; avoid invocation of an interpreter overwriting the original */
1278         if (once) {
1279                 once = 0;
1280                 *imgp->ip_origvattr = *imgp->ip_vattr;
1281         }
1282
1283         error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, 0,
1284                         UIO_SYSSPACE, IO_NODELOCKED,
1285                         vfs_context_ucred(imgp->ip_vfs_context),
1286                         &resid, vfs_context_proc(imgp->ip_vfs_context));
1287         if (error)
1288                 goto bad;
1289
1290         if (resid) {
1291                 memset(imgp->ip_vdata + (PAGE_SIZE - resid), 0x0, resid);
1292         }
1293
1294 encapsulated_binary:
1295         /* Limit the number of iterations we will attempt on each binary */
1296         if (++itercount > EAI_ITERLIMIT) {
1297                 error = EBADEXEC;
1298                 goto bad;
1299         }
1300         error = -1;
1301         for(i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) {
1302
1303                 error = (*execsw[i].ex_imgact)(imgp);
1304
1305                 switch (error) {
1306                 /* case -1: not claimed: continue */
1307                 case -2:                /* Encapsulated binary, imgp->ip_XXX set for next iteration */
1308                         goto encapsulated_binary;
1309
1310                 case -3:                /* Interpreter */
1311 #if CONFIG_MACF
1312                         /*
1313                          * Copy the script label for later use. Note that
1314                          * the label can be different when the script is
1315                          * actually read by the interpreter.
1316                          */
1317                         if (imgp->ip_scriptlabelp)
1318                                 mac_vnode_label_free(imgp->ip_scriptlabelp);
1319                         imgp->ip_scriptlabelp = mac_vnode_label_alloc();
1320                         if (imgp->ip_scriptlabelp == NULL) {
1321                                 error = ENOMEM;
1322                                 break;
1323                         }
1324                         mac_vnode_label_copy(imgp->ip_vp->v_label,
1325                                              imgp->ip_scriptlabelp);
1326
1327                         /*
1328                          * Take a ref of the script vnode for later use.
1329                          */
1330                         if (imgp->ip_scriptvp)
1331                                 vnode_put(imgp->ip_scriptvp);
1332                         if (vnode_getwithref(imgp->ip_vp) == 0)
1333                                 imgp->ip_scriptvp = imgp->ip_vp;
1334 #endif
1335
1336                         nameidone(ndp);
1337
1338                         vnode_put(imgp->ip_vp);
1339                         imgp->ip_vp = NULL;     /* already put */
1340                         imgp->ip_ndp = NULL; /* already nameidone */
1341
1342                         /* Use imgp->ip_strings, which exec_shell_imgact reset to the interpreter */
1343                         NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF,
1344                                    UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context);
1345
1346                         proc_transend(p, 0);
1347                         goto again;
1348
1349                 default:
1350                         break;
1351                 }
1352         }
1353
1354         /*
1355          * Call out to allow 3rd party notification of exec.
1356          * Ignore result of kauth_authorize_fileop call.
1357          */
1358         if (error == 0 && kauth_authorize_fileop_has_listeners()) {
1359                 kauth_authorize_fileop(vfs_context_ucred(imgp->ip_vfs_context),
1360                                         KAUTH_FILEOP_EXEC,
1361                                         (uintptr_t)ndp->ni_vp, 0);
1362         }
1363
1364 bad:
1365         proc_transend(p, 0);
1366
1367 bad_notrans:
1368         if (imgp->ip_strings)
1369                 execargs_free(imgp);
1370         if (imgp->ip_ndp)
1371                 nameidone(imgp->ip_ndp);
1372         if (ndp)
1373                 FREE(ndp, M_TEMP);
1374
1375         return (error);
1376 }
1377
1378
1379 /*
1380  * exec_handle_spawnattr_policy
1381  *
1382  * Description: Decode and apply the posix_spawn apptype, qos clamp, and watchport ports to the task.
1383  *
1384  * Parameters:  proc_t p                process to apply attributes to
1385  *              int psa_apptype         posix spawn attribute apptype
1386  *
1387  * Returns:     0                       Success
1388  */
1389 static errno_t
1390 exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp,
1391                              ipc_port_t * portwatch_ports, int portwatch_count)
1392 {
1393         int apptype     = TASK_APPTYPE_NONE;
1394         int qos_clamp   = THREAD_QOS_UNSPECIFIED;
1395
1396         if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) {
1397                 int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK;
1398
1399                 switch(proctype) {
1400                         case POSIX_SPAWN_PROC_TYPE_DAEMON_INTERACTIVE:
1401                                 apptype = TASK_APPTYPE_DAEMON_INTERACTIVE;
1402                                 break;
1403                         case POSIX_SPAWN_PROC_TYPE_DAEMON_STANDARD:
1404                                 apptype = TASK_APPTYPE_DAEMON_STANDARD;
1405                                 break;
1406                         case POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE:
1407                                 apptype = TASK_APPTYPE_DAEMON_ADAPTIVE;
1408                                 break;
1409                         case POSIX_SPAWN_PROC_TYPE_DAEMON_BACKGROUND:
1410                                 apptype = TASK_APPTYPE_DAEMON_BACKGROUND;
1411                                 break;
1412                         case POSIX_SPAWN_PROC_TYPE_APP_DEFAULT:
1413                                 apptype = TASK_APPTYPE_APP_DEFAULT;
1414                                 break;
1415                         case POSIX_SPAWN_PROC_TYPE_APP_TAL:
1416                                 apptype = TASK_APPTYPE_APP_TAL;
1417                                 break;
1418                         default:
1419                                 apptype = TASK_APPTYPE_NONE;
1420                                 /* TODO: Should an invalid value here fail the spawn? */
1421                                 break;
1422                 }
1423         }
1424
1425         if (psa_qos_clamp != POSIX_SPAWN_PROC_CLAMP_NONE) {
1426                 switch (psa_qos_clamp) {
1427                         case POSIX_SPAWN_PROC_CLAMP_UTILITY:
1428                                 qos_clamp = THREAD_QOS_UTILITY;
1429                                 break;
1430                         case POSIX_SPAWN_PROC_CLAMP_BACKGROUND:
1431                                 qos_clamp = THREAD_QOS_BACKGROUND;
1432                                 break;
1433                         case POSIX_SPAWN_PROC_CLAMP_MAINTENANCE:
1434                                 qos_clamp = THREAD_QOS_MAINTENANCE;
1435                                 break;
1436                         default:
1437                                 qos_clamp = THREAD_QOS_UNSPECIFIED;
1438                                 /* TODO: Should an invalid value here fail the spawn? */
1439                                 break;
1440                 }
1441         }
1442
1443         if (psa_apptype != TASK_APPTYPE_NONE || qos_clamp != THREAD_QOS_UNSPECIFIED) {
1444                 proc_set_task_spawnpolicy(p->task, apptype, qos_clamp,
1445                                           portwatch_ports, portwatch_count);
1446         }
1447
1448         return (0);
1449 }
1450
1451
1452 /*
1453  * exec_handle_port_actions
1454  *
1455  * Description: Go through the _posix_port_actions_t contents,
1456  *              calling task_set_special_port, task_set_exception_ports
1457  *              and/or audit_session_spawnjoin for the current task.
1458  *
1459  * Parameters:  struct image_params *   Image parameter block
1460  *              short psa_flags         posix spawn attribute flags
1461  *
1462  * Returns:     0                       Success
1463  *              EINVAL                  Failure
1464  *              ENOTSUP                 Illegal posix_spawn attr flag was set
1465  */
1466 static errno_t
1467 exec_handle_port_actions(struct image_params *imgp, short psa_flags, boolean_t * portwatch_present, ipc_port_t * portwatch_ports)
1468 {
1469         _posix_spawn_port_actions_t pacts = imgp->ip_px_spa;
1470         proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1471         _ps_port_action_t *act = NULL;
1472         task_t task = p->task;
1473         ipc_port_t port = NULL;
1474         errno_t ret = 0;
1475         int i;
1476
1477         *portwatch_present = FALSE;
1478
1479         for (i = 0; i < pacts->pspa_count; i++) {
1480                 act = &pacts->pspa_actions[i];
1481
1482                 if (ipc_object_copyin(get_task_ipcspace(current_task()),
1483                     act->new_port, MACH_MSG_TYPE_COPY_SEND,
1484                     (ipc_object_t *) &port) != KERN_SUCCESS) {
1485                         ret = EINVAL;
1486                         goto done;
1487                 }
1488
1489                 switch (act->port_type) {
1490                 case PSPA_SPECIAL:
1491                         /* Only allowed when not under vfork */
1492                         if (!(psa_flags & POSIX_SPAWN_SETEXEC))
1493                                 ret = ENOTSUP;
1494                         else if (task_set_special_port(task,
1495                         act->which, port) != KERN_SUCCESS)
1496                                 ret = EINVAL;
1497                         break;
1498
1499                 case PSPA_EXCEPTION:
1500                         /* Only allowed when not under vfork */
1501                         if (!(psa_flags & POSIX_SPAWN_SETEXEC))
1502                                 ret = ENOTSUP;
1503                         else if (task_set_exception_ports(task,
1504                         act->mask, port, act->behavior,
1505                         act->flavor) != KERN_SUCCESS)
1506                                 ret = EINVAL;
1507                         break;
1508 #if CONFIG_AUDIT
1509                 case PSPA_AU_SESSION:
1510                         ret = audit_session_spawnjoin(p, port);
1511                         break;
1512 #endif
1513                 case PSPA_IMP_WATCHPORTS:
1514                         if (portwatch_ports != NULL) {
1515                                 *portwatch_present = TRUE;
1516                                 /* hold on to this till end of spawn */
1517                                 portwatch_ports[i] = port;
1518                                 ret = 0;
1519                         } else
1520                                 ipc_port_release_send(port);
1521                         break;
1522                 default:
1523                         ret = EINVAL;
1524                         break;
1525                 }
1526
1527                 /* action failed, so release port resources */
1528
1529                 if (ret) {
1530                         ipc_port_release_send(port);
1531                         break;
1532                 }
1533         }
1534
1535 done:
1536         if (0 != ret)
1537                 DTRACE_PROC1(spawn__port__failure, mach_port_name_t, act->new_port);
1538         return (ret);
1539 }
1540
1541 /*
1542  * exec_handle_file_actions
1543  *
1544  * Description: Go through the _posix_file_actions_t contents applying the
1545  *              open, close, and dup2 operations to the open file table for
1546  *              the current process.
1547  *
1548  * Parameters:  struct image_params *   Image parameter block
1549  *
1550  * Returns:     0                       Success
1551  *              ???
1552  *
1553  * Note:        Actions are applied in the order specified, with the credential
1554  *              of the parent process.  This is done to permit the parent
1555  *              process to utilize POSIX_SPAWN_RESETIDS to drop privilege in
1556  *              the child following operations the child may in fact not be
1557  *              normally permitted to perform.
1558  */
1559 static int
1560 exec_handle_file_actions(struct image_params *imgp, short psa_flags)
1561 {
1562         int error = 0;
1563         int action;
1564         proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1565         _posix_spawn_file_actions_t px_sfap = imgp->ip_px_sfa;
1566         int ival[2];            /* dummy retval for system calls) */
1567
1568         for (action = 0; action < px_sfap->psfa_act_count; action++) {
1569                 _psfa_action_t *psfa = &px_sfap->psfa_act_acts[ action];
1570
1571                 switch(psfa->psfaa_type) {
1572                 case PSFA_OPEN: {
1573                         /*
1574                          * Open is different, in that it requires the use of
1575                          * a path argument, which is normally copied in from
1576                          * user space; because of this, we have to support an
1577                          * open from kernel space that passes an address space
1578                          * context of UIO_SYSSPACE, and casts the address
1579                          * argument to a user_addr_t.
1580                          */
1581                         char *bufp = NULL;
1582                         struct vnode_attr *vap;
1583                         struct nameidata *ndp;
1584                         int mode = psfa->psfaa_openargs.psfao_mode;
1585                         struct dup2_args dup2a;
1586                         struct close_nocancel_args ca;
1587                         int origfd;
1588
1589                         MALLOC(bufp, char *, sizeof(*vap) + sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
1590                         if (bufp == NULL) {
1591                                 error = ENOMEM;
1592                                 break;
1593                         }
1594
1595                         vap = (struct vnode_attr *) bufp;
1596                         ndp = (struct nameidata *) (bufp + sizeof(*vap));
1597
1598                         VATTR_INIT(vap);
1599                         /* Mask off all but regular access permissions */
1600                         mode = ((mode &~ p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT;
1601                         VATTR_SET(vap, va_mode, mode & ACCESSPERMS);
1602
1603                         NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
1604                                CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path),
1605                                imgp->ip_vfs_context);
1606
1607                         error = open1(imgp->ip_vfs_context,
1608                                         ndp,
1609                                         psfa->psfaa_openargs.psfao_oflag,
1610                                         vap,
1611                                         fileproc_alloc_init, NULL,
1612                                         ival);
1613
1614                         FREE(bufp, M_TEMP);
1615
1616                         /*
1617                          * If there's an error, or we get the right fd by
1618                          * accident, then drop out here.  This is easier than
1619                          * reworking all the open code to preallocate fd
1620                          * slots, and internally taking one as an argument.
1621                          */
1622                         if (error || ival[0] == psfa->psfaa_filedes)
1623                                 break;
1624
1625                         origfd = ival[0];
1626                         /*
1627                          * If we didn't fall out from an error, we ended up
1628                          * with the wrong fd; so now we've got to try to dup2
1629                          * it to the right one.
1630                          */
1631                         dup2a.from = origfd;
1632                         dup2a.to = psfa->psfaa_filedes;
1633
1634                         /*
1635                          * The dup2() system call implementation sets
1636                          * ival to newfd in the success case, but we
1637                          * can ignore that, since if we didn't get the
1638                          * fd we wanted, the error will stop us.
1639                          */
1640                         error = dup2(p, &dup2a, ival);
1641                         if (error)
1642                                 break;
1643
1644                         /*
1645                          * Finally, close the original fd.
1646                          */
1647                         ca.fd = origfd;
1648
1649                         error = close_nocancel(p, &ca, ival);
1650                         }
1651                         break;
1652
1653                 case PSFA_DUP2: {
1654                         struct dup2_args dup2a;
1655
1656                         dup2a.from = psfa->psfaa_filedes;
1657                         dup2a.to = psfa->psfaa_openargs.psfao_oflag;
1658
1659                         /*
1660                          * The dup2() system call implementation sets
1661                          * ival to newfd in the success case, but we
1662                          * can ignore that, since if we didn't get the
1663                          * fd we wanted, the error will stop us.
1664                          */
1665                         error = dup2(p, &dup2a, ival);
1666                         }
1667                         break;
1668
1669                 case PSFA_CLOSE: {
1670                         struct close_nocancel_args ca;
1671
1672                         ca.fd = psfa->psfaa_filedes;
1673
1674                         error = close_nocancel(p, &ca, ival);
1675                         }
1676                         break;
1677
1678                 case PSFA_INHERIT: {
1679                         struct fcntl_nocancel_args fcntla;
1680
1681                         /*
1682                          * Check to see if the descriptor exists, and
1683                          * ensure it's -not- marked as close-on-exec.
1684                          *
1685                          * Attempting to "inherit" a guarded fd will
1686                          * result in a error.
1687                          */
1688                         fcntla.fd = psfa->psfaa_filedes;
1689                         fcntla.cmd = F_GETFD;
1690                         if ((error = fcntl_nocancel(p, &fcntla, ival)) != 0)
1691                                 break;
1692
1693                         if ((ival[0] & FD_CLOEXEC) == FD_CLOEXEC) {
1694                                 fcntla.fd = psfa->psfaa_filedes;
1695                                 fcntla.cmd = F_SETFD;
1696                                 fcntla.arg = ival[0] & ~FD_CLOEXEC;
1697                                 error = fcntl_nocancel(p, &fcntla, ival);
1698                         }
1699
1700                         }
1701                         break;
1702
1703                 default:
1704                         error = EINVAL;
1705                         break;
1706                 }
1707
1708                 /* All file actions failures are considered fatal, per POSIX */
1709
1710                 if (error) {
1711                         if (PSFA_OPEN == psfa->psfaa_type) {
1712                                 DTRACE_PROC1(spawn__open__failure, uintptr_t,
1713                                     psfa->psfaa_openargs.psfao_path);
1714                         } else {
1715                                 DTRACE_PROC1(spawn__fd__failure, int, psfa->psfaa_filedes);
1716                         }
1717                         break;
1718                 }
1719         }
1720
1721         if (error != 0 || (psa_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) == 0)
1722                 return (error);
1723
1724         /*
1725          * If POSIX_SPAWN_CLOEXEC_DEFAULT is set, behave (during
1726          * this spawn only) as if "close on exec" is the default
1727          * disposition of all pre-existing file descriptors.  In this case,
1728          * the list of file descriptors mentioned in the file actions
1729          * are the only ones that can be inherited, so mark them now.
1730          *
1731          * The actual closing part comes later, in fdexec().
1732          */
1733         proc_fdlock(p);
1734         for (action = 0; action < px_sfap->psfa_act_count; action++) {
1735                 _psfa_action_t *psfa = &px_sfap->psfa_act_acts[action];
1736                 int fd = psfa->psfaa_filedes;
1737
1738                 switch (psfa->psfaa_type) {
1739                 case PSFA_DUP2:
1740                         fd = psfa->psfaa_openargs.psfao_oflag;
1741                         /*FALLTHROUGH*/
1742                 case PSFA_OPEN:
1743                 case PSFA_INHERIT:
1744                         *fdflags(p, fd) |= UF_INHERIT;
1745                         break;
1746
1747                 case PSFA_CLOSE:
1748                         break;
1749                 }
1750         }
1751         proc_fdunlock(p);
1752
1753         return (0);
1754 }
1755
1756 #if CONFIG_MACF
1757 /*
1758  * exec_spawnattr_getmacpolicyinfo
1759  */
1760 void *
1761 exec_spawnattr_getmacpolicyinfo(const void *macextensions, const char *policyname, size_t *lenp)
1762 {
1763         const struct _posix_spawn_mac_policy_extensions *psmx = macextensions;
1764         int i;
1765
1766         if (psmx == NULL)
1767                 return NULL;
1768
1769         for (i = 0; i < psmx->psmx_count; i++) {
1770                 const _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
1771                 if (strncmp(extension->policyname, policyname, sizeof(extension->policyname)) == 0) {
1772                         if (lenp != NULL)
1773                                 *lenp = extension->datalen;
1774                         return extension->datap;
1775                 }
1776         }
1777
1778         if (lenp != NULL)
1779                 *lenp = 0;
1780         return NULL;
1781 }
1782
1783 static int
1784 spawn_copyin_macpolicyinfo(const struct user__posix_spawn_args_desc *px_args, _posix_spawn_mac_policy_extensions_t *psmxp)
1785 {
1786         _posix_spawn_mac_policy_extensions_t psmx = NULL;
1787         int error = 0;
1788         int copycnt = 0;
1789         int i = 0;
1790
1791         *psmxp = NULL;
1792
1793         if (px_args->mac_extensions_size < PS_MAC_EXTENSIONS_SIZE(1) ||
1794             px_args->mac_extensions_size > PAGE_SIZE) {
1795                 error = EINVAL;
1796                 goto bad;
1797         }
1798
1799         MALLOC(psmx, _posix_spawn_mac_policy_extensions_t, px_args->mac_extensions_size, M_TEMP, M_WAITOK);
1800         if ((error = copyin(px_args->mac_extensions, psmx, px_args->mac_extensions_size)) != 0)
1801                 goto bad;
1802
1803         if (PS_MAC_EXTENSIONS_SIZE(psmx->psmx_count) > px_args->mac_extensions_size) {
1804                 error = EINVAL;
1805                 goto bad;
1806         }
1807
1808         for (i = 0; i < psmx->psmx_count; i++) {
1809                 _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
1810                 if (extension->datalen == 0 || extension->datalen > PAGE_SIZE) {
1811                         error = EINVAL;
1812                         goto bad;
1813                 }
1814         }
1815
1816         for (copycnt = 0; copycnt < psmx->psmx_count; copycnt++) {
1817                 _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[copycnt];
1818                 void *data = NULL;
1819
1820                 MALLOC(data, void *, extension->datalen, M_TEMP, M_WAITOK);
1821                 if ((error = copyin(extension->data, data, extension->datalen)) != 0) {
1822                         FREE(data, M_TEMP);
1823                         goto bad;
1824                 }
1825                 extension->datap = data;
1826         }
1827
1828         *psmxp = psmx;
1829         return 0;
1830
1831 bad:
1832         if (psmx != NULL) {
1833                 for (i = 0; i < copycnt; i++)
1834                         FREE(psmx->psmx_extensions[i].datap, M_TEMP);
1835                 FREE(psmx, M_TEMP);
1836         }
1837         return error;
1838 }
1839
1840 static void
1841 spawn_free_macpolicyinfo(_posix_spawn_mac_policy_extensions_t psmx)
1842 {
1843         int i;
1844
1845         if (psmx == NULL)
1846                 return;
1847         for (i = 0; i < psmx->psmx_count; i++)
1848                 FREE(psmx->psmx_extensions[i].datap, M_TEMP);
1849         FREE(psmx, M_TEMP);
1850 }
1851 #endif /* CONFIG_MACF */
1852
1853 /*
1854  * posix_spawn
1855  *
1856  * Parameters:  uap->pid                Pointer to pid return area
1857  *              uap->fname              File name to exec
1858  *              uap->argp               Argument list
1859  *              uap->envp               Environment list
1860  *
1861  * Returns:     0                       Success
1862  *              EINVAL                  Invalid argument
1863  *              ENOTSUP                 Not supported
1864  *              ENOEXEC                 Executable file format error
1865  *      exec_activate_image:EINVAL      Invalid argument
1866  *      exec_activate_image:EACCES      Permission denied
1867  *      exec_activate_image:EINTR       Interrupted function
1868  *      exec_activate_image:ENOMEM      Not enough space
1869  *      exec_activate_image:EFAULT      Bad address
1870  *      exec_activate_image:ENAMETOOLONG        Filename too long
1871  *      exec_activate_image:ENOEXEC     Executable file format error
1872  *      exec_activate_image:ETXTBSY     Text file busy [misuse of error code]
1873  *      exec_activate_image:EBADEXEC    The executable is corrupt/unknown
1874  *      exec_activate_image:???
1875  *      mac_execve_enter:???
1876  *
1877  * TODO:        Expect to need __mac_posix_spawn() at some point...
1878  *              Handle posix_spawnattr_t
1879  *              Handle posix_spawn_file_actions_t
1880  */
1881 int
1882 posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
1883 {
1884         proc_t p = ap;          /* quiet bogus GCC vfork() warning */
1885         user_addr_t pid = uap->pid;
1886         int ival[2];            /* dummy retval for setpgid() */
1887         char *bufp = NULL;
1888         struct image_params *imgp;
1889         struct vnode_attr *vap;
1890         struct vnode_attr *origvap;
1891         struct uthread  *uthread = 0;   /* compiler complains if not set to 0*/
1892         int error, sig;
1893         char alt_p_comm[sizeof(p->p_comm)] = {0};       /* for PowerPC */
1894         int is_64 = IS_64BIT_PROCESS(p);
1895         struct vfs_context context;
1896         struct user__posix_spawn_args_desc px_args;
1897         struct _posix_spawnattr px_sa;
1898         _posix_spawn_file_actions_t px_sfap = NULL;
1899         _posix_spawn_port_actions_t px_spap = NULL;
1900         struct __kern_sigaction vec;
1901         boolean_t spawn_no_exec = FALSE;
1902         boolean_t proc_transit_set = TRUE;
1903         boolean_t exec_done = FALSE;
1904         int portwatch_count = 0;
1905         ipc_port_t * portwatch_ports = NULL;
1906         vm_size_t px_sa_offset = offsetof(struct _posix_spawnattr, psa_ports);
1907
1908         /*
1909          * Allocate a big chunk for locals instead of using stack since these
1910          * structures are pretty big.
1911          */
1912         MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
1913         imgp = (struct image_params *) bufp;
1914         if (bufp == NULL) {
1915                 error = ENOMEM;
1916                 goto bad;
1917         }
1918         vap = (struct vnode_attr *) (bufp + sizeof(*imgp));
1919         origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap));
1920
1921         /* Initialize the common data in the image_params structure */
1922         imgp->ip_user_fname = uap->path;
1923         imgp->ip_user_argv = uap->argv;
1924         imgp->ip_user_envv = uap->envp;
1925         imgp->ip_vattr = vap;
1926         imgp->ip_origvattr = origvap;
1927         imgp->ip_vfs_context = &context;
1928         imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE);
1929         imgp->ip_p_comm = alt_p_comm;           /* for PowerPC */
1930         imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
1931         imgp->ip_mac_return = 0;
1932
1933         if (uap->adesc != USER_ADDR_NULL) {
1934                 if(is_64) {
1935                         error = copyin(uap->adesc, &px_args, sizeof(px_args));
1936                 } else {
1937                         struct user32__posix_spawn_args_desc px_args32;
1938
1939                         error = copyin(uap->adesc, &px_args32, sizeof(px_args32));
1940
1941                         /*
1942                          * Convert arguments descriptor from external 32 bit
1943                          * representation to internal 64 bit representation
1944                          */
1945                         px_args.attr_size = px_args32.attr_size;
1946                         px_args.attrp = CAST_USER_ADDR_T(px_args32.attrp);
1947                         px_args.file_actions_size = px_args32.file_actions_size;
1948                         px_args.file_actions = CAST_USER_ADDR_T(px_args32.file_actions);
1949                         px_args.port_actions_size = px_args32.port_actions_size;
1950                         px_args.port_actions = CAST_USER_ADDR_T(px_args32.port_actions);
1951                         px_args.mac_extensions_size = px_args32.mac_extensions_size;
1952                         px_args.mac_extensions = CAST_USER_ADDR_T(px_args32.mac_extensions);
1953                 }
1954                 if (error)
1955                         goto bad;
1956
1957                 if (px_args.attr_size != 0) {
1958                         /*
1959                          * We are not copying the port_actions pointer,
1960                          * because we already have it from px_args.
1961                          * This is a bit fragile: <rdar://problem/16427422>
1962                          */
1963
1964                         if ((error = copyin(px_args.attrp, &px_sa, px_sa_offset) != 0))
1965                         goto bad;
1966
1967                         bzero( (void *)( (unsigned long) &px_sa + px_sa_offset), sizeof(px_sa) - px_sa_offset );
1968
1969                         imgp->ip_px_sa = &px_sa;
1970                 }
1971                 if (px_args.file_actions_size != 0) {
1972                         /* Limit file_actions to allowed number of open files */
1973                         int maxfa = (p->p_limit ? p->p_rlimit[RLIMIT_NOFILE].rlim_cur : NOFILE);
1974                         if (px_args.file_actions_size < PSF_ACTIONS_SIZE(1) ||
1975                                 px_args.file_actions_size > PSF_ACTIONS_SIZE(maxfa)) {
1976                                 error = EINVAL;
1977                                 goto bad;
1978                         }
1979                         MALLOC(px_sfap, _posix_spawn_file_actions_t, px_args.file_actions_size, M_TEMP, M_WAITOK);
1980                         if (px_sfap == NULL) {
1981                                 error = ENOMEM;
1982                                 goto bad;
1983                         }
1984                         imgp->ip_px_sfa = px_sfap;
1985
1986                         if ((error = copyin(px_args.file_actions, px_sfap,
1987                                                         px_args.file_actions_size)) != 0)
1988                                 goto bad;
1989
1990                         /* Verify that the action count matches the struct size */
1991                         if (PSF_ACTIONS_SIZE(px_sfap->psfa_act_count) != px_args.file_actions_size) {
1992                                 error = EINVAL;
1993                                 goto bad;
1994                         }
1995                 }
1996                 if (px_args.port_actions_size != 0) {
1997                         /* Limit port_actions to one page of data */
1998                         if (px_args.port_actions_size < PS_PORT_ACTIONS_SIZE(1) ||
1999                                 px_args.port_actions_size > PAGE_SIZE) {
2000                                 error = EINVAL;
2001                                 goto bad;
2002                         }
2003
2004                         MALLOC(px_spap, _posix_spawn_port_actions_t,
2005                                         px_args.port_actions_size, M_TEMP, M_WAITOK);
2006                         if (px_spap == NULL) {
2007                                 error = ENOMEM;
2008                                 goto bad;
2009                         }
2010                         imgp->ip_px_spa = px_spap;
2011
2012                         if ((error = copyin(px_args.port_actions, px_spap,
2013                                                         px_args.port_actions_size)) != 0)
2014                                 goto bad;
2015
2016                         /* Verify that the action count matches the struct size */
2017                         if (PS_PORT_ACTIONS_SIZE(px_spap->pspa_count) != px_args.port_actions_size) {
2018                                 error = EINVAL;
2019                                 goto bad;
2020                         }
2021                 }
2022 #if CONFIG_MACF
2023                 if (px_args.mac_extensions_size != 0) {
2024                         if ((error = spawn_copyin_macpolicyinfo(&px_args, (_posix_spawn_mac_policy_extensions_t *)&imgp->ip_px_smpx)) != 0)
2025                                 goto bad;
2026                 }
2027 #endif /* CONFIG_MACF */
2028         }
2029
2030         /* set uthread to parent */
2031         uthread = get_bsdthread_info(current_thread());
2032
2033         /*
2034          * <rdar://6640530>; this does not result in a behaviour change
2035          * relative to Leopard, so there should not be any existing code
2036          * which depends on it.
2037          */
2038         if (uthread->uu_flag & UT_VFORK) {
2039             error = EINVAL;
2040             goto bad;
2041         }
2042
2043         /*
2044          * If we don't have the extension flag that turns "posix_spawn()"
2045          * into "execve() with options", then we will be creating a new
2046          * process which does not inherit memory from the parent process,
2047          * which is one of the most expensive things about using fork()
2048          * and execve().
2049          */
2050         if (imgp->ip_px_sa == NULL || !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)){
2051
2052                 /*
2053                  * Set the new task's coalition, if it is requested.
2054                  * TODO: privilege check - 15365900
2055                  */
2056                 coalition_t coal = COALITION_NULL;
2057 #if CONFIG_COALITIONS
2058                 if (imgp->ip_px_sa) {
2059                         uint64_t cid = px_sa.psa_coalitionid;
2060                         if (cid != 0) {
2061 #if COALITION_DEBUG
2062                                 printf("%s: searching for coalition ID %llu\n", __func__, cid);
2063 #endif
2064                                 coal = coalition_find_and_activate_by_id(cid);
2065                                 if (coal == COALITION_NULL) {
2066 #if COALITION_DEBUG
2067                                         printf("%s: could not find coalition ID %llu (perhaps it has been terminated or reaped)\n", __func__, cid);
2068 #endif
2069                                         error = ESRCH;
2070                                         goto bad;
2071                                 }
2072                         }
2073                 }
2074 #endif /* CONFIG_COALITIONS */
2075
2076                 error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN, coal);
2077
2078                 if (error != 0) {
2079                         if (coal != COALITION_NULL) {
2080 #if CONFIG_COALITIONS
2081                                 coalition_remove_active(coal);
2082                                 coalition_release(coal);
2083 #endif /* CONFIG_COALITIONS */
2084                         }
2085                         goto bad;
2086                 }
2087                 imgp->ip_flags |= IMGPF_SPAWN;  /* spawn w/o exec */
2088                 spawn_no_exec = TRUE;           /* used in later tests */
2089
2090                 if (coal != COALITION_NULL) {
2091 #if CONFIG_COALITIONS
2092                         coalition_remove_active(coal);
2093                         coalition_release(coal);
2094 #endif /* CONFIG_COALITIONS */
2095                 }
2096         }
2097
2098         if (spawn_no_exec) {
2099                 p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread);
2100
2101                 /*
2102                  * We had to wait until this point before firing the
2103                  * proc:::create probe, otherwise p would not point to the
2104                  * child process.
2105                  */
2106                 DTRACE_PROC1(create, proc_t, p);
2107         }
2108         assert(p != NULL);
2109
2110         /* By default, the thread everyone plays with is the parent */
2111         context.vc_thread = current_thread();
2112         context.vc_ucred = p->p_ucred;  /* XXX must NOT be kauth_cred_get() */
2113
2114         /*
2115          * However, if we're not in the setexec case, redirect the context
2116          * to the newly created process instead
2117          */
2118         if (spawn_no_exec)
2119                 context.vc_thread = imgp->ip_new_thread;
2120
2121         /*
2122          * Post fdcopy(), pre exec_handle_sugid() - this is where we want
2123          * to handle the file_actions.  Since vfork() also ends up setting
2124          * us into the parent process group, and saved off the signal flags,
2125          * this is also where we want to handle the spawn flags.
2126          */
2127
2128         /* Has spawn file actions? */
2129         if (imgp->ip_px_sfa != NULL) {
2130                 /*
2131                  * The POSIX_SPAWN_CLOEXEC_DEFAULT flag
2132                  * is handled in exec_handle_file_actions().
2133                  */
2134                 if ((error = exec_handle_file_actions(imgp,
2135                     imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0)) != 0)
2136                         goto bad;
2137         }
2138
2139         /* Has spawn port actions? */
2140         if (imgp->ip_px_spa != NULL) {
2141                 boolean_t is_adaptive = FALSE;
2142                 boolean_t portwatch_present = FALSE;
2143
2144                 /* Will this process become adaptive? The apptype isn't ready yet, so we can't look there. */
2145                 if (imgp->ip_px_sa != NULL && px_sa.psa_apptype == POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE)
2146                         is_adaptive = TRUE;
2147
2148                 /*
2149                  * portwatch only:
2150                  * Allocate a place to store the ports we want to bind to the new task
2151                  * We can't bind them until after the apptype is set.
2152                  */
2153                 if (px_spap->pspa_count != 0 && is_adaptive) {
2154                         portwatch_count = px_spap->pspa_count;
2155                         MALLOC(portwatch_ports, ipc_port_t *, (sizeof(ipc_port_t) * portwatch_count), M_TEMP, M_WAITOK | M_ZERO);
2156                 } else {
2157                         portwatch_ports = NULL;
2158                 }
2159
2160                 if ((error = exec_handle_port_actions(imgp,
2161                     imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0, &portwatch_present, portwatch_ports)) != 0)
2162                         goto bad;
2163
2164                 if (portwatch_present == FALSE && portwatch_ports != NULL) {
2165                         FREE(portwatch_ports, M_TEMP);
2166                         portwatch_ports = NULL;
2167                         portwatch_count = 0;
2168                 }
2169         }
2170
2171         /* Has spawn attr? */
2172         if (imgp->ip_px_sa != NULL) {
2173                 /*
2174                  * Set the process group ID of the child process; this has
2175                  * to happen before the image activation.
2176                  */
2177                 if (px_sa.psa_flags & POSIX_SPAWN_SETPGROUP) {
2178                         struct setpgid_args spga;
2179                         spga.pid = p->p_pid;
2180                         spga.pgid = px_sa.psa_pgroup;
2181                         /*
2182                          * Effectively, call setpgid() system call; works
2183                          * because there are no pointer arguments.
2184                          */
2185                         if((error = setpgid(p, &spga, ival)) != 0)
2186                                 goto bad;
2187                 }
2188
2189                 /*
2190                  * Reset UID/GID to parent's RUID/RGID; This works only
2191                  * because the operation occurs *after* the vfork() and
2192                  * before the call to exec_handle_sugid() by the image
2193                  * activator called from exec_activate_image().  POSIX
2194                  * requires that any setuid/setgid bits on the process
2195                  * image will take precedence over the spawn attributes
2196                  * (re)setting them.
2197                  *
2198                  * The use of p_ucred is safe, since we are acting on the
2199                  * new process, and it has no threads other than the one
2200                  * we are creating for it.
2201                  */
2202                 if (px_sa.psa_flags & POSIX_SPAWN_RESETIDS) {
2203                         kauth_cred_t my_cred = p->p_ucred;
2204                         kauth_cred_t my_new_cred = kauth_cred_setuidgid(my_cred, kauth_cred_getruid(my_cred), kauth_cred_getrgid(my_cred));
2205                         if (my_new_cred != my_cred) {
2206                                 p->p_ucred = my_new_cred;
2207                                 /* update cred on proc */
2208                                 PROC_UPDATE_CREDS_ONPROC(p);
2209                         }
2210                 }
2211
2212                 /*
2213                  * Disable ASLR for the spawned process.
2214                  */
2215                 /*
2216                  * But only do so if we are not embedded; embedded allows for a
2217                  * boot-arg (-disable_aslr) to deal with this (which itself is
2218                  * only honored on DEVELOPMENT or DEBUG builds of xnu).
2219                  */
2220                 if (px_sa.psa_flags & _POSIX_SPAWN_DISABLE_ASLR)
2221                         OSBitOrAtomic(P_DISABLE_ASLR, &p->p_flag);
2222
2223                 /*
2224                  * Forcibly disallow execution from data pages for the spawned process
2225                  * even if it would otherwise be permitted by the architecture default.
2226                  */
2227                 if (px_sa.psa_flags & _POSIX_SPAWN_ALLOW_DATA_EXEC)
2228                         imgp->ip_flags |= IMGPF_ALLOW_DATA_EXEC;
2229         }
2230
2231         /*
2232          * Disable ASLR during image activation.  This occurs either if the
2233          * _POSIX_SPAWN_DISABLE_ASLR attribute was found above or if
2234          * P_DISABLE_ASLR was inherited from the parent process.
2235          */
2236         if (p->p_flag & P_DISABLE_ASLR)
2237                 imgp->ip_flags |= IMGPF_DISABLE_ASLR;
2238
2239         /*
2240          * Clear transition flag so we won't hang if exec_activate_image() causes
2241          * an automount (and launchd does a proc sysctl to service it).
2242          *
2243          * <rdar://problem/6848672>, <rdar://problem/5959568>.
2244          */
2245         if (spawn_no_exec) {
2246                 proc_transend(p, 0);
2247                 proc_transit_set = 0;
2248         }
2249
2250 #if MAC_SPAWN   /* XXX */
2251         if (uap->mac_p != USER_ADDR_NULL) {
2252                 error = mac_execve_enter(uap->mac_p, imgp);
2253                 if (error)
2254                         goto bad;
2255         }
2256 #endif
2257
2258         /*
2259          * Activate the image
2260          */
2261         error = exec_activate_image(imgp);
2262
2263         if (error == 0) {
2264                 /* process completed the exec */
2265                 exec_done = TRUE;
2266         } else if (error == -1) {
2267                 /* Image not claimed by any activator? */
2268                 error = ENOEXEC;
2269         }
2270
2271         /*
2272          * If we have a spawn attr, and it contains signal related flags,
2273          * the we need to process them in the "context" of the new child
2274          * process, so we have to process it following image activation,
2275          * prior to making the thread runnable in user space.  This is
2276          * necessitated by some signal information being per-thread rather
2277          * than per-process, and we don't have the new allocation in hand
2278          * until after the image is activated.
2279          */
2280         if (!error && imgp->ip_px_sa != NULL) {
2281                 thread_t child_thread = current_thread();
2282                 uthread_t child_uthread = uthread;
2283
2284                 /*
2285                  * If we created a new child thread, then the thread and
2286                  * uthread are different than the current ones; otherwise,
2287                  * we leave them, since we are in the exec case instead.
2288                  */
2289                 if (spawn_no_exec) {
2290                         child_thread = imgp->ip_new_thread;
2291                         child_uthread = get_bsdthread_info(child_thread);
2292                 }
2293
2294                 /*
2295                  * Mask a list of signals, instead of them being unmasked, if
2296                  * they were unmasked in the parent; note that some signals
2297                  * are not maskable.
2298                  */
2299                 if (px_sa.psa_flags & POSIX_SPAWN_SETSIGMASK)
2300                         child_uthread->uu_sigmask = (px_sa.psa_sigmask & ~sigcantmask);
2301                 /*
2302                  * Default a list of signals instead of ignoring them, if
2303                  * they were ignored in the parent.  Note that we pass
2304                  * spawn_no_exec to setsigvec() to indicate that we called
2305                  * fork1() and therefore do not need to call proc_signalstart()
2306                  * internally.
2307                  */
2308                 if (px_sa.psa_flags & POSIX_SPAWN_SETSIGDEF) {
2309                         vec.sa_handler = SIG_DFL;
2310                         vec.sa_tramp = 0;
2311                         vec.sa_mask = 0;
2312                         vec.sa_flags = 0;
2313                         for (sig = 0; sig < NSIG; sig++)
2314                                 if (px_sa.psa_sigdefault & (1 << sig)) {
2315                                         error = setsigvec(p, child_thread, sig + 1, &vec, spawn_no_exec);
2316                         }
2317                 }
2318
2319                 /*
2320                  * Activate the CPU usage monitor, if requested. This is done via a task-wide, per-thread CPU
2321                  * usage limit, which will generate a resource exceeded exception if any one thread exceeds the
2322                  * limit.
2323                  *
2324                  * Userland gives us interval in seconds, and the kernel SPI expects nanoseconds.
2325                  */
2326                 if (px_sa.psa_cpumonitor_percent != 0) {
2327                         /*
2328                          * Always treat a CPU monitor activation coming from spawn as entitled. Requiring
2329                          * an entitlement to configure the monitor a certain way seems silly, since
2330                          * whomever is turning it on could just as easily choose not to do so.
2331                          *
2332                          * XXX - Ignore the parameters that we get from userland. The spawnattr method of
2333                          * activating the monitor always gets the system default parameters. Once we have
2334                          * an explicit spawn SPI for configuring the defaults, we can revert this to
2335                          * respect the params passed in from userland.
2336                          */
2337                         error = proc_set_task_ruse_cpu(p->task,
2338                                         TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC,
2339                                         PROC_POLICY_CPUMON_DEFAULTS, 0,
2340                                         0, TRUE);
2341                 }
2342         }
2343
2344 bad:
2345
2346         if (error == 0) {
2347                 /* reset delay idle sleep status if set */
2348                 if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP)
2349                         OSBitAndAtomic(~((uint32_t)P_DELAYIDLESLEEP), &p->p_flag);
2350                 /* upon  successful spawn, re/set the proc control state */
2351                 if (imgp->ip_px_sa != NULL) {
2352                         switch (px_sa.psa_pcontrol) {
2353                                 case POSIX_SPAWN_PCONTROL_THROTTLE:
2354                                         p->p_pcaction = P_PCTHROTTLE;
2355                                         break;
2356                                 case POSIX_SPAWN_PCONTROL_SUSPEND:
2357                                         p->p_pcaction = P_PCSUSP;
2358                                         break;
2359                                 case POSIX_SPAWN_PCONTROL_KILL:
2360                                         p->p_pcaction = P_PCKILL;
2361                                         break;
2362                                 case POSIX_SPAWN_PCONTROL_NONE:
2363                                 default:
2364                                         p->p_pcaction = 0;
2365                                         break;
2366                         };
2367                 }
2368                 exec_resettextvp(p, imgp);
2369
2370 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
2371                 /* Has jetsam attributes? */
2372                 if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_SET)) {
2373                         memorystatus_update(p, px_sa.psa_priority, 0, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
2374                             TRUE, px_sa.psa_high_water_mark, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND),
2375                                             (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_FATAL));
2376                 }
2377 #endif
2378         }
2379
2380         /*
2381          * If we successfully called fork1(), we always need to do this;
2382          * we identify this case by noting the IMGPF_SPAWN flag.  This is
2383          * because we come back from that call with signals blocked in the
2384          * child, and we have to unblock them, but we want to wait until
2385          * after we've performed any spawn actions.  This has to happen
2386          * before check_for_signature(), which uses psignal.
2387          */
2388         if (spawn_no_exec) {
2389                 if (proc_transit_set)
2390                         proc_transend(p, 0);
2391
2392                 /*
2393                  * Drop the signal lock on the child which was taken on our
2394                  * behalf by forkproc()/cloneproc() to prevent signals being
2395                  * received by the child in a partially constructed state.
2396                  */
2397                 proc_signalend(p, 0);
2398
2399                 /* flag the 'fork' has occurred */
2400                 proc_knote(p->p_pptr, NOTE_FORK | p->p_pid);
2401                 /* then flag exec has occurred */
2402                 /* notify only if it has not failed due to FP Key error */
2403                 if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)
2404                         proc_knote(p, NOTE_EXEC);
2405         } else if (error == 0) {
2406                 /* reset the importance attribute from our previous life */
2407                 task_importance_reset(p->task);
2408
2409                 /* reset atm context from task */
2410                 task_atm_reset(p->task);
2411         }
2412
2413         /*
2414          * Apply the spawnattr policy, apptype (which primes the task for importance donation),
2415          * and bind any portwatch ports to the new task.
2416          * This must be done after the exec so that the child's thread is ready,
2417          * and after the in transit state has been released, because priority is
2418          * dropped here so we need to be prepared for a potentially long preemption interval
2419          *
2420          * TODO: Consider splitting this up into separate phases
2421          */
2422         if (error == 0 && imgp->ip_px_sa != NULL) {
2423                 struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
2424
2425                 exec_handle_spawnattr_policy(p, psa->psa_apptype, psa->psa_qos_clamp,
2426                                               portwatch_ports, portwatch_count);
2427         }
2428
2429         /* Apply the main thread qos */
2430         if (error == 0) {
2431                 thread_t main_thread = (imgp->ip_new_thread != NULL) ? imgp->ip_new_thread : current_thread();
2432
2433                 task_set_main_thread_qos(p->task, main_thread);
2434         }
2435
2436         /*
2437          * Release any ports we kept around for binding to the new task
2438          * We need to release the rights even if the posix_spawn has failed.
2439          */
2440         if (portwatch_ports != NULL) {
2441                 for (int i = 0; i < portwatch_count; i++) {
2442                         ipc_port_t port = NULL;
2443                         if ((port = portwatch_ports[i]) != NULL) {
2444                                 ipc_port_release_send(port);
2445                         }
2446                 }
2447                 FREE(portwatch_ports, M_TEMP);
2448                 portwatch_ports = NULL;
2449                 portwatch_count = 0;
2450         }
2451
2452         /*
2453          * We have to delay operations which might throw a signal until after
2454          * the signals have been unblocked; however, we want that to happen
2455          * after exec_resettextvp() so that the textvp is correct when they
2456          * fire.
2457          */
2458         if (error == 0) {
2459                 error = check_for_signature(p, imgp);
2460
2461                 /*
2462                  * Pay for our earlier safety; deliver the delayed signals from
2463                  * the incomplete spawn process now that it's complete.
2464                  */
2465                 if (imgp != NULL && spawn_no_exec && (p->p_lflag & P_LTRACED)) {
2466                         psignal_vfork(p, p->task, imgp->ip_new_thread, SIGTRAP);
2467                 }
2468         }
2469
2470
2471         if (imgp != NULL) {
2472                 if (imgp->ip_vp)
2473                         vnode_put(imgp->ip_vp);
2474                 if (imgp->ip_scriptvp)
2475                         vnode_put(imgp->ip_scriptvp);
2476                 if (imgp->ip_strings)
2477                         execargs_free(imgp);
2478                 if (imgp->ip_px_sfa != NULL)
2479                         FREE(imgp->ip_px_sfa, M_TEMP);
2480                 if (imgp->ip_px_spa != NULL)
2481                         FREE(imgp->ip_px_spa, M_TEMP);
2482
2483 #if CONFIG_MACF
2484                 if (imgp->ip_px_smpx != NULL)
2485                         spawn_free_macpolicyinfo(imgp->ip_px_smpx);
2486                 if (imgp->ip_execlabelp)
2487                         mac_cred_label_free(imgp->ip_execlabelp);
2488                 if (imgp->ip_scriptlabelp)
2489                         mac_vnode_label_free(imgp->ip_scriptlabelp);
2490 #endif
2491         }
2492
2493 #if CONFIG_DTRACE
2494         if (spawn_no_exec) {
2495                 /*
2496                  * In the original DTrace reference implementation,
2497                  * posix_spawn() was a libc routine that just
2498                  * did vfork(2) then exec(2).  Thus the proc::: probes
2499                  * are very fork/exec oriented.  The details of this
2500                  * in-kernel implementation of posix_spawn() is different
2501                  * (while producing the same process-observable effects)
2502                  * particularly w.r.t. errors, and which thread/process
2503                  * is constructing what on behalf of whom.
2504                  */
2505                 if (error) {
2506                         DTRACE_PROC1(spawn__failure, int, error);
2507                 } else {
2508                         DTRACE_PROC(spawn__success);
2509                         /*
2510                          * Some DTrace scripts, e.g. newproc.d in
2511                          * /usr/bin, rely on the the 'exec-success'
2512                          * probe being fired in the child after the
2513                          * new process image has been constructed
2514                          * in order to determine the associated pid.
2515                          *
2516                          * So, even though the parent built the image
2517                          * here, for compatibility, mark the new thread
2518                          * so 'exec-success' fires on it as it leaves
2519                          * the kernel.
2520                          */
2521                         dtrace_thread_didexec(imgp->ip_new_thread);
2522                 }
2523         } else {
2524                 if (error) {
2525                         DTRACE_PROC1(exec__failure, int, error);
2526                 } else {
2527                         DTRACE_PROC(exec__success);
2528                 }
2529         }
2530
2531         if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL)
2532                 (*dtrace_proc_waitfor_hook)(p);
2533 #endif
2534
2535         /* Return to both the parent and the child? */
2536         if (imgp != NULL && spawn_no_exec) {
2537                 /*
2538                  * If the parent wants the pid, copy it out
2539                  */
2540                 if (pid != USER_ADDR_NULL)
2541                         (void)suword(pid, p->p_pid);
2542                 retval[0] = error;
2543
2544                 /*
2545                  * If we had an error, perform an internal reap ; this is
2546                  * entirely safe, as we have a real process backing us.
2547                  */
2548                 if (error) {
2549                         proc_list_lock();
2550                         p->p_listflag |= P_LIST_DEADPARENT;
2551                         proc_list_unlock();
2552                         proc_lock(p);
2553                         /* make sure no one else has killed it off... */
2554                         if (p->p_stat != SZOMB && p->exit_thread == NULL) {
2555                                 p->exit_thread = current_thread();
2556                                 proc_unlock(p);
2557                                 exit1(p, 1, (int *)NULL);
2558                                 if (exec_done == FALSE) {
2559                                         task_deallocate(get_threadtask(imgp->ip_new_thread));
2560                                         thread_deallocate(imgp->ip_new_thread);
2561                                 }
2562                         } else {
2563                                 /* someone is doing it for us; just skip it */
2564                                 proc_unlock(p);
2565                         }
2566                 } else {
2567
2568                         /*
2569                          * Return to the child
2570                          *
2571                          * Note: the image activator earlier dropped the
2572                          * task/thread references to the newly spawned
2573                          * process; this is OK, since we still have suspended
2574                          * queue references on them, so we should be fine
2575                          * with the delayed resume of the thread here.
2576                          */
2577                         (void)thread_resume(imgp->ip_new_thread);
2578                 }
2579         }
2580         if (bufp != NULL) {
2581                 FREE(bufp, M_TEMP);
2582         }
2583
2584         return(error);
2585 }
2586
2587
2588 /*
2589  * execve
2590  *
2591  * Parameters:  uap->fname              File name to exec
2592  *              uap->argp               Argument list
2593  *              uap->envp               Environment list
2594  *
2595  * Returns:     0                       Success
2596  *      __mac_execve:EINVAL             Invalid argument
2597  *      __mac_execve:ENOTSUP            Invalid argument
2598  *      __mac_execve:EACCES             Permission denied
2599  *      __mac_execve:EINTR              Interrupted function
2600  *      __mac_execve:ENOMEM             Not enough space
2601  *      __mac_execve:EFAULT             Bad address
2602  *      __mac_execve:ENAMETOOLONG       Filename too long
2603  *      __mac_execve:ENOEXEC            Executable file format error
2604  *      __mac_execve:ETXTBSY            Text file busy [misuse of error code]
2605  *      __mac_execve:???
2606  *
2607  * TODO:        Dynamic linker header address on stack is copied via suword()
2608  */
2609 /* ARGSUSED */
2610 int
2611 execve(proc_t p, struct execve_args *uap, int32_t *retval)
2612 {
2613         struct __mac_execve_args muap;
2614         int err;
2615
2616         memoryshot(VM_EXECVE, DBG_FUNC_NONE);
2617
2618         muap.fname = uap->fname;
2619         muap.argp = uap->argp;
2620         muap.envp = uap->envp;
2621         muap.mac_p = USER_ADDR_NULL;
2622         err = __mac_execve(p, &muap, retval);
2623
2624         return(err);
2625 }
2626
2627 /*
2628  * __mac_execve
2629  *
2630  * Parameters:  uap->fname              File name to exec
2631  *              uap->argp               Argument list
2632  *              uap->envp               Environment list
2633  *              uap->mac_p              MAC label supplied by caller
2634  *
2635  * Returns:     0                       Success
2636  *              EINVAL                  Invalid argument
2637  *              ENOTSUP                 Not supported
2638  *              ENOEXEC                 Executable file format error
2639  *      exec_activate_image:EINVAL      Invalid argument
2640  *      exec_activate_image:EACCES      Permission denied
2641  *      exec_activate_image:EINTR       Interrupted function
2642  *      exec_activate_image:ENOMEM      Not enough space
2643  *      exec_activate_image:EFAULT      Bad address
2644  *      exec_activate_image:ENAMETOOLONG        Filename too long
2645  *      exec_activate_image:ENOEXEC     Executable file format error
2646  *      exec_activate_image:ETXTBSY     Text file busy [misuse of error code]
2647  *      exec_activate_image:EBADEXEC    The executable is corrupt/unknown
2648  *      exec_activate_image:???
2649  *      mac_execve_enter:???
2650  *
2651  * TODO:        Dynamic linker header address on stack is copied via suword()
2652  */
2653 int
2654 __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval)
2655 {
2656         char *bufp = NULL;
2657         struct image_params *imgp;
2658         struct vnode_attr *vap;
2659         struct vnode_attr *origvap;
2660         int error;
2661         char alt_p_comm[sizeof(p->p_comm)] = {0};       /* for PowerPC */
2662         int is_64 = IS_64BIT_PROCESS(p);
2663         struct vfs_context context;
2664         struct uthread  *uthread;
2665
2666         context.vc_thread = current_thread();
2667         context.vc_ucred = kauth_cred_proc_ref(p);      /* XXX must NOT be kauth_cred_get() */
2668
2669         /* Allocate a big chunk for locals instead of using stack since these
2670          * structures a pretty big.
2671          */
2672         MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
2673         imgp = (struct image_params *) bufp;
2674         if (bufp == NULL) {
2675                 error = ENOMEM;
2676                 goto exit_with_error;
2677         }
2678         vap = (struct vnode_attr *) (bufp + sizeof(*imgp));
2679         origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap));
2680
2681         /* Initialize the common data in the image_params structure */
2682         imgp->ip_user_fname = uap->fname;
2683         imgp->ip_user_argv = uap->argp;
2684         imgp->ip_user_envv = uap->envp;
2685         imgp->ip_vattr = vap;
2686         imgp->ip_origvattr = origvap;
2687         imgp->ip_vfs_context = &context;
2688         imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE) | ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE);
2689         imgp->ip_p_comm = alt_p_comm;           /* for PowerPC */
2690         imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
2691         imgp->ip_mac_return = 0;
2692
2693         uthread = get_bsdthread_info(current_thread());
2694         if (uthread->uu_flag & UT_VFORK) {
2695                 imgp->ip_flags |= IMGPF_VFORK_EXEC;
2696         }
2697
2698 #if CONFIG_MACF
2699         if (uap->mac_p != USER_ADDR_NULL) {
2700                 error = mac_execve_enter(uap->mac_p, imgp);
2701                 if (error) {
2702                         kauth_cred_unref(&context.vc_ucred);
2703                         goto exit_with_error;
2704                 }
2705         }
2706 #endif
2707
2708         error = exec_activate_image(imgp);
2709
2710         kauth_cred_unref(&context.vc_ucred);
2711
2712         /* Image not claimed by any activator? */
2713         if (error == -1)
2714                 error = ENOEXEC;
2715
2716         if (error == 0) {
2717                 exec_resettextvp(p, imgp);
2718                 error = check_for_signature(p, imgp);
2719         }
2720         if (imgp->ip_vp != NULLVP)
2721                 vnode_put(imgp->ip_vp);
2722         if (imgp->ip_scriptvp != NULLVP)
2723                 vnode_put(imgp->ip_scriptvp);
2724         if (imgp->ip_strings)
2725                 execargs_free(imgp);
2726 #if CONFIG_MACF
2727         if (imgp->ip_execlabelp)
2728                 mac_cred_label_free(imgp->ip_execlabelp);
2729         if (imgp->ip_scriptlabelp)
2730                 mac_vnode_label_free(imgp->ip_scriptlabelp);
2731 #endif
2732         if (!error) {
2733                 /* Sever any extant thread affinity */
2734                 thread_affinity_exec(current_thread());
2735
2736                 thread_t main_thread = (imgp->ip_new_thread != NULL) ? imgp->ip_new_thread : current_thread();
2737
2738                 task_set_main_thread_qos(p->task, main_thread);
2739
2740                 /* reset task importance */
2741                 task_importance_reset(p->task);
2742
2743                 /* reset atm context from task */
2744                 task_atm_reset(p->task);
2745
2746                 DTRACE_PROC(exec__success);
2747
2748 #if CONFIG_DTRACE
2749                 if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL)
2750                         (*dtrace_proc_waitfor_hook)(p);
2751 #endif
2752
2753                 if (imgp->ip_flags & IMGPF_VFORK_EXEC) {
2754                         vfork_return(p, retval, p->p_pid);
2755                         (void)thread_resume(imgp->ip_new_thread);
2756                 }
2757         } else {
2758                 DTRACE_PROC1(exec__failure, int, error);
2759         }
2760
2761 exit_with_error:
2762         if (bufp != NULL) {
2763                 FREE(bufp, M_TEMP);
2764         }
2765
2766         return(error);
2767 }
2768
2769
2770 /*
2771  * copyinptr
2772  *
2773  * Description: Copy a pointer in from user space to a user_addr_t in kernel
2774  *              space, based on 32/64 bitness of the user space
2775  *
2776  * Parameters:  froma                   User space address
2777  *              toptr                   Address of kernel space user_addr_t
2778  *              ptr_size                4/8, based on 'froma' address space
2779  *
2780  * Returns:     0                       Success
2781  *              EFAULT                  Bad 'froma'
2782  *
2783  * Implicit returns:
2784  *              *ptr_size               Modified
2785  */
2786 static int
2787 copyinptr(user_addr_t froma, user_addr_t *toptr, int ptr_size)
2788 {
2789         int error;
2790
2791         if (ptr_size == 4) {
2792                 /* 64 bit value containing 32 bit address */
2793                 unsigned int i;
2794
2795                 error = copyin(froma, &i, 4);
2796                 *toptr = CAST_USER_ADDR_T(i);   /* SAFE */
2797         } else {
2798                 error = copyin(froma, toptr, 8);
2799         }
2800         return (error);
2801 }
2802
2803
2804 /*
2805  * copyoutptr
2806  *
2807  * Description: Copy a pointer out from a user_addr_t in kernel space to
2808  *              user space, based on 32/64 bitness of the user space
2809  *
2810  * Parameters:  ua                      User space address to copy to
2811  *              ptr                     Address of kernel space user_addr_t
2812  *              ptr_size                4/8, based on 'ua' address space
2813  *
2814  * Returns:     0                       Success
2815  *              EFAULT                  Bad 'ua'
2816  *
2817  */
2818 static int
2819 copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size)
2820 {
2821         int error;
2822
2823         if (ptr_size == 4) {
2824                 /* 64 bit value containing 32 bit address */
2825                 unsigned int i = CAST_DOWN_EXPLICIT(unsigned int,ua);   /* SAFE */
2826
2827                 error = copyout(&i, ptr, 4);
2828         } else {
2829                 error = copyout(&ua, ptr, 8);
2830         }
2831         return (error);
2832 }
2833
2834
2835 /*
2836  * exec_copyout_strings
2837  *
2838  * Copy out the strings segment to user space.  The strings segment is put
2839  * on a preinitialized stack frame.
2840  *
2841  * Parameters:  struct image_params *   the image parameter block
2842  *              int *                   a pointer to the stack offset variable
2843  *
2844  * Returns:     0                       Success
2845  *              !0                      Faiure: errno
2846  *
2847  * Implicit returns:
2848  *              (*stackp)               The stack offset, modified
2849  *
2850  * Note:        The strings segment layout is backward, from the beginning
2851  *              of the top of the stack to consume the minimal amount of
2852  *              space possible; the returned stack pointer points to the
2853  *              end of the area consumed (stacks grow downward).
2854  *
2855  *              argc is an int; arg[i] are pointers; env[i] are pointers;
2856  *              the 0's are (void *)NULL's
2857  *
2858  * The stack frame layout is:
2859  *
2860  *      +-------------+ <- p->user_stack
2861  *      |     16b     |
2862  *      +-------------+
2863  *      | STRING AREA |
2864  *      |      :      |
2865  *      |      :      |
2866  *      |      :      |
2867  *      +- -- -- -- --+
2868  *      |  PATH AREA  |
2869  *      +-------------+
2870  *      |      0      |
2871  *      +-------------+
2872  *      |  applev[n]  |
2873  *      +-------------+
2874  *             :
2875  *             :
2876  *      +-------------+
2877  *      |  applev[1]  |
2878  *      +-------------+
2879  *      | exec_path / |
2880  *      |  applev[0]  |
2881  *      +-------------+
2882  *      |      0      |
2883  *      +-------------+
2884  *      |    env[n]   |
2885  *      +-------------+
2886  *             :
2887  *             :
2888  *      +-------------+
2889  *      |    env[0]   |
2890  *      +-------------+
2891  *      |      0      |
2892  *      +-------------+
2893  *      | arg[argc-1] |
2894  *      +-------------+
2895  *             :
2896  *             :
2897  *      +-------------+
2898  *      |    arg[0]   |
2899  *      +-------------+
2900  *      |     argc    |
2901  * sp-> +-------------+
2902  *
2903  * Although technically a part of the STRING AREA, we treat the PATH AREA as
2904  * a separate entity.  This allows us to align the beginning of the PATH AREA
2905  * to a pointer boundary so that the exec_path, env[i], and argv[i] pointers
2906  * which preceed it on the stack are properly aligned.
2907  */
2908
2909 static int
2910 exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp)
2911 {
2912         proc_t p = vfs_context_proc(imgp->ip_vfs_context);
2913         int     ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
2914         int     ptr_area_size;
2915         void *ptr_buffer_start, *ptr_buffer;
2916         int string_size;
2917
2918         user_addr_t     string_area;    /* *argv[], *env[] */
2919         user_addr_t     ptr_area;       /* argv[], env[], applev[] */
2920         user_addr_t argc_area;  /* argc */
2921         user_addr_t     stack;
2922         int error;
2923
2924         unsigned i;
2925         struct copyout_desc {
2926                 char    *start_string;
2927                 int             count;
2928 #if CONFIG_DTRACE
2929                 user_addr_t     *dtrace_cookie;
2930 #endif
2931                 boolean_t       null_term;
2932         } descriptors[] = {
2933                 {
2934                         .start_string = imgp->ip_startargv,
2935                         .count = imgp->ip_argc,
2936 #if CONFIG_DTRACE
2937                         .dtrace_cookie = &p->p_dtrace_argv,
2938 #endif
2939                         .null_term = TRUE
2940                 },
2941                 {
2942                         .start_string = imgp->ip_endargv,
2943                         .count = imgp->ip_envc,
2944 #if CONFIG_DTRACE
2945                         .dtrace_cookie = &p->p_dtrace_envp,
2946 #endif
2947                         .null_term = TRUE
2948                 },
2949                 {
2950                         .start_string = imgp->ip_strings,
2951                         .count = 1,
2952 #if CONFIG_DTRACE
2953                         .dtrace_cookie = NULL,
2954 #endif
2955                         .null_term = FALSE
2956                 },
2957                 {
2958                         .start_string = imgp->ip_endenvv,
2959                         .count = imgp->ip_applec - 1, /* exec_path handled above */
2960 #if CONFIG_DTRACE
2961                         .dtrace_cookie = NULL,
2962 #endif
2963                         .null_term = TRUE
2964                 }
2965         };
2966
2967         stack = *stackp;
2968
2969         /*
2970          * All previous contributors to the string area
2971          * should have aligned their sub-area
2972          */
2973         if (imgp->ip_strspace % ptr_size != 0) {
2974                 error = EINVAL;
2975                 goto bad;
2976         }
2977
2978         /* Grow the stack down for the strings we've been building up */
2979         string_size = imgp->ip_strendp - imgp->ip_strings;
2980         stack -= string_size;
2981         string_area = stack;
2982
2983         /*
2984          * Need room for one pointer for each string, plus
2985          * one for the NULLs terminating the argv, envv, and apple areas.
2986          */
2987         ptr_area_size = (imgp->ip_argc + imgp->ip_envc + imgp->ip_applec + 3) *
2988             ptr_size;
2989         stack -= ptr_area_size;
2990         ptr_area = stack;
2991
2992         /* We'll construct all the pointer arrays in our string buffer,
2993          * which we already know is aligned properly, and ip_argspace
2994          * was used to verify we have enough space.
2995          */
2996         ptr_buffer_start = ptr_buffer = (void *)imgp->ip_strendp;
2997
2998         /*
2999          * Need room for pointer-aligned argc slot.
3000          */
3001         stack -= ptr_size;
3002         argc_area = stack;
3003
3004         /*
3005          * Record the size of the arguments area so that sysctl_procargs()
3006          * can return the argument area without having to parse the arguments.
3007          */
3008         proc_lock(p);
3009         p->p_argc = imgp->ip_argc;
3010         p->p_argslen = (int)(*stackp - string_area);
3011         proc_unlock(p);
3012
3013         /* Return the initial stack address: the location of argc */
3014         *stackp = stack;
3015
3016         /*
3017          * Copy out the entire strings area.
3018          */
3019         error = copyout(imgp->ip_strings, string_area,
3020                                                    string_size);
3021         if (error)
3022                 goto bad;
3023
3024         for (i = 0; i < sizeof(descriptors)/sizeof(descriptors[0]); i++) {
3025                 char *cur_string = descriptors[i].start_string;
3026                 int j;
3027
3028 #if CONFIG_DTRACE
3029                 if (descriptors[i].dtrace_cookie) {
3030                         proc_lock(p);
3031                         *descriptors[i].dtrace_cookie = ptr_area + ((uintptr_t)ptr_buffer - (uintptr_t)ptr_buffer_start); /* dtrace convenience */
3032                         proc_unlock(p);
3033                 }
3034 #endif /* CONFIG_DTRACE */
3035
3036                 /*
3037                  * For each segment (argv, envv, applev), copy as many pointers as requested
3038                  * to our pointer buffer.
3039                  */
3040                 for (j = 0; j < descriptors[i].count; j++) {
3041                         user_addr_t cur_address = string_area + (cur_string - imgp->ip_strings);
3042
3043                         /* Copy out the pointer to the current string. Alignment has been verified  */
3044                         if (ptr_size == 8) {
3045                                 *(uint64_t *)ptr_buffer = (uint64_t)cur_address;
3046                         } else {
3047                                 *(uint32_t *)ptr_buffer = (uint32_t)cur_address;
3048                         }
3049
3050                         ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
3051                         cur_string += strlen(cur_string) + 1; /* Only a NUL between strings in the same area */
3052                 }
3053
3054                 if (descriptors[i].null_term) {
3055                         if (ptr_size == 8) {
3056                                 *(uint64_t *)ptr_buffer = 0ULL;
3057                         } else {
3058                                 *(uint32_t *)ptr_buffer = 0;
3059                         }
3060
3061                         ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
3062                 }
3063         }
3064
3065         /*
3066          * Copy out all our pointer arrays in bulk.
3067          */
3068         error = copyout(ptr_buffer_start, ptr_area,
3069                                         ptr_area_size);
3070         if (error)
3071                 goto bad;
3072
3073         /* argc (int32, stored in a ptr_size area) */
3074         error = copyoutptr((user_addr_t)imgp->ip_argc, argc_area, ptr_size);
3075         if (error)
3076                 goto bad;
3077
3078 bad:
3079         return(error);
3080 }
3081
3082
3083 /*
3084  * exec_extract_strings
3085  *
3086  * Copy arguments and environment from user space into work area; we may
3087  * have already copied some early arguments into the work area, and if
3088  * so, any arguments opied in are appended to those already there.
3089  * This function is the primary manipulator of ip_argspace, since
3090  * these are the arguments the client of execve(2) knows about. After
3091  * each argv[]/envv[] string is copied, we charge the string length
3092  * and argv[]/envv[] pointer slot to ip_argspace, so that we can
3093  * full preflight the arg list size.
3094  *
3095  * Parameters:  struct image_params *   the image parameter block
3096  *
3097  * Returns:     0                       Success
3098  *              !0                      Failure: errno
3099  *
3100  * Implicit returns;
3101  *              (imgp->ip_argc)         Count of arguments, updated
3102  *              (imgp->ip_envc)         Count of environment strings, updated
3103  *              (imgp->ip_argspace)     Count of remaining of NCARGS
3104  *              (imgp->ip_interp_buffer)        Interpreter and args (mutated in place)
3105  *
3106  *
3107  * Note:        The argument and environment vectors are user space pointers
3108  *              to arrays of user space pointers.
3109  */
3110 static int
3111 exec_extract_strings(struct image_params *imgp)
3112 {
3113         int error = 0;
3114         int     ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT) ? 8 : 4;
3115         int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
3116         user_addr_t     argv = imgp->ip_user_argv;
3117         user_addr_t     envv = imgp->ip_user_envv;
3118
3119         /*
3120          * Adjust space reserved for the path name by however much padding it
3121          * needs. Doing this here since we didn't know if this would be a 32-
3122          * or 64-bit process back in exec_save_path.
3123          */
3124         while (imgp->ip_strspace % new_ptr_size != 0) {
3125                 *imgp->ip_strendp++ = '\0';
3126                 imgp->ip_strspace--;
3127                 /* imgp->ip_argspace--; not counted towards exec args total */
3128         }
3129
3130         /*
3131          * From now on, we start attributing string space to ip_argspace
3132          */
3133         imgp->ip_startargv = imgp->ip_strendp;
3134         imgp->ip_argc = 0;
3135
3136         if((imgp->ip_flags & IMGPF_INTERPRET) != 0) {
3137                 user_addr_t     arg;
3138                 char *argstart, *ch;
3139
3140                 /* First, the arguments in the "#!" string are tokenized and extracted. */
3141                 argstart = imgp->ip_interp_buffer;
3142                 while (argstart) {
3143                         ch = argstart;
3144                         while (*ch && !IS_WHITESPACE(*ch)) {
3145                                 ch++;
3146                         }
3147
3148                         if (*ch == '\0') {
3149                                 /* last argument, no need to NUL-terminate */
3150                                 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
3151                                 argstart = NULL;
3152                         } else {
3153                                 /* NUL-terminate */
3154                                 *ch = '\0';
3155                                 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
3156
3157                                 /*
3158                                  * Find the next string. We know spaces at the end of the string have already
3159                                  * been stripped.
3160                                  */
3161                                 argstart = ch + 1;
3162                                 while (IS_WHITESPACE(*argstart)) {
3163                                         argstart++;
3164                                 }
3165                         }
3166
3167                         /* Error-check, regardless of whether this is the last interpreter arg or not */
3168                         if (error)
3169                                 goto bad;
3170                         if (imgp->ip_argspace < new_ptr_size) {
3171                                 error = E2BIG;
3172                                 goto bad;
3173                         }
3174                         imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
3175                         imgp->ip_argc++;
3176                 }
3177
3178                 if (argv != 0LL) {
3179                         /*
3180                          * If we are running an interpreter, replace the av[0] that was
3181                          * passed to execve() with the path name that was
3182                          * passed to execve() for interpreters which do not use the PATH
3183                          * to locate their script arguments.
3184                          */
3185                         error = copyinptr(argv, &arg, ptr_size);
3186                         if (error)
3187                                 goto bad;
3188                         if (arg != 0LL) {
3189                                 argv += ptr_size; /* consume without using */
3190                         }
3191                 }
3192
3193                 if (imgp->ip_interp_sugid_fd != -1) {
3194                         char temp[19]; /* "/dev/fd/" + 10 digits + NUL */
3195                         snprintf(temp, sizeof(temp), "/dev/fd/%d", imgp->ip_interp_sugid_fd);
3196                         error = exec_add_user_string(imgp, CAST_USER_ADDR_T(temp), UIO_SYSSPACE, TRUE);
3197                 } else {
3198                         error = exec_add_user_string(imgp, imgp->ip_user_fname, imgp->ip_seg, TRUE);
3199                 }
3200
3201                 if (error)
3202                         goto bad;
3203                 if (imgp->ip_argspace < new_ptr_size) {
3204                         error = E2BIG;
3205                         goto bad;
3206                 }
3207                 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
3208                 imgp->ip_argc++;
3209         }
3210
3211         while (argv != 0LL) {
3212                 user_addr_t     arg;
3213
3214                 error = copyinptr(argv, &arg, ptr_size);
3215                 if (error)
3216                         goto bad;
3217
3218                 if (arg == 0LL) {
3219                         break;
3220                 }
3221
3222                 argv += ptr_size;
3223
3224                 /*
3225                 * av[n...] = arg[n]
3226                 */
3227                 error = exec_add_user_string(imgp, arg, imgp->ip_seg, TRUE);
3228                 if (error)
3229                         goto bad;
3230                 if (imgp->ip_argspace < new_ptr_size) {
3231                         error = E2BIG;
3232                         goto bad;
3233                 }
3234                 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
3235                 imgp->ip_argc++;
3236         }
3237
3238         /* Save space for argv[] NULL terminator */
3239         if (imgp->ip_argspace < new_ptr_size) {
3240                 error = E2BIG;
3241                 goto bad;
3242         }
3243         imgp->ip_argspace -= new_ptr_size;
3244
3245         /* Note where the args ends and env begins. */
3246         imgp->ip_endargv = imgp->ip_strendp;
3247         imgp->ip_envc = 0;
3248
3249         /* Now, get the environment */
3250         while (envv != 0LL) {
3251                 user_addr_t     env;
3252
3253                 error = copyinptr(envv, &env, ptr_size);
3254                 if (error)
3255                         goto bad;
3256
3257                 envv += ptr_size;
3258                 if (env == 0LL) {
3259                         break;
3260                 }
3261                 /*
3262                 * av[n...] = env[n]
3263                 */
3264                 error = exec_add_user_string(imgp, env, imgp->ip_seg, TRUE);
3265                 if (error)
3266                         goto bad;
3267                 if (imgp->ip_argspace < new_ptr_size) {
3268                         error = E2BIG;
3269                         goto bad;
3270                 }
3271                 imgp->ip_argspace -= new_ptr_size; /* to hold envv[] entry */
3272                 imgp->ip_envc++;
3273         }
3274
3275         /* Save space for envv[] NULL terminator */
3276         if (imgp->ip_argspace < new_ptr_size) {
3277                 error = E2BIG;
3278                 goto bad;
3279         }
3280         imgp->ip_argspace -= new_ptr_size;
3281
3282         /* Align the tail of the combined argv+envv area */
3283         while (imgp->ip_strspace % new_ptr_size != 0) {
3284                 if (imgp->ip_argspace < 1) {
3285                         error = E2BIG;
3286                         goto bad;
3287                 }
3288                 *imgp->ip_strendp++ = '\0';
3289                 imgp->ip_strspace--;
3290                 imgp->ip_argspace--;
3291         }
3292
3293         /* Note where the envv ends and applev begins. */
3294         imgp->ip_endenvv = imgp->ip_strendp;
3295
3296         /*
3297          * From now on, we are no longer charging argument
3298          * space to ip_argspace.
3299          */
3300
3301 bad:
3302         return error;
3303 }
3304
3305 static char *
3306 random_hex_str(char *str, int len, boolean_t embedNUL)
3307 {
3308         uint64_t low, high, value;
3309         int idx;
3310         char digit;
3311
3312         /* A 64-bit value will only take 16 characters, plus '0x' and NULL. */
3313         if (len > 19)
3314                 len = 19;
3315
3316         /* We need enough room for at least 1 digit */
3317         if (len < 4)
3318                 return (NULL);
3319
3320         low = random();
3321         high = random();
3322         value = high << 32 | low;
3323
3324         if (embedNUL) {
3325                 /*
3326                  * Zero a byte to protect against C string vulnerabilities
3327                  * e.g. for userland __stack_chk_guard.
3328                  */
3329                 value &= ~(0xffull << 8);
3330         }
3331
3332         str[0] = '0';
3333         str[1] = 'x';
3334         for (idx = 2; idx < len - 1; idx++) {
3335                 digit = value & 0xf;
3336                 value = value >> 4;
3337                 if (digit < 10)
3338                         str[idx] = '0' + digit;
3339                 else
3340                         str[idx] = 'a' + (digit - 10);
3341         }
3342         str[idx] = '\0';
3343         return (str);
3344 }
3345
3346 /*
3347  * Libc has an 8-element array set up for stack guard values.  It only fills
3348  * in one of those entries, and both gcc and llvm seem to use only a single
3349  * 8-byte guard.  Until somebody needs more than an 8-byte guard value, don't
3350  * do the work to construct them.
3351  */
3352 #define GUARD_VALUES 1
3353 #define GUARD_KEY "stack_guard="
3354
3355 /*
3356  * System malloc needs some entropy when it is initialized.
3357  */
3358 #define ENTROPY_VALUES 2
3359 #define ENTROPY_KEY "malloc_entropy="
3360
3361 /*
3362  * System malloc engages nanozone for UIAPP.
3363  */
3364 #define NANO_ENGAGE_KEY "MallocNanoZone=1"
3365
3366 #define PFZ_KEY "pfz="
3367 extern user32_addr_t commpage_text32_location;
3368 extern user64_addr_t commpage_text64_location;
3369 /*
3370  * Build up the contents of the apple[] string vector
3371  */
3372 static int
3373 exec_add_apple_strings(struct image_params *imgp)
3374 {
3375         int i, error;
3376         int new_ptr_size=4;
3377         char guard[19];
3378         char guard_vec[strlen(GUARD_KEY) + 19 * GUARD_VALUES + 1];
3379
3380         char entropy[19];
3381         char entropy_vec[strlen(ENTROPY_KEY) + 19 * ENTROPY_VALUES + 1];
3382
3383         char pfz_string[strlen(PFZ_KEY) + 16 + 4 +1];
3384
3385         if( imgp->ip_flags & IMGPF_IS_64BIT) {
3386                 new_ptr_size = 8;
3387                 snprintf(pfz_string, sizeof(pfz_string),PFZ_KEY "0x%llx",commpage_text64_location);
3388         } else {
3389                 snprintf(pfz_string, sizeof(pfz_string),PFZ_KEY "0x%x",commpage_text32_location);
3390         }
3391
3392         /* exec_save_path stored the first string */
3393         imgp->ip_applec = 1;
3394
3395         /* adding the pfz string */
3396         error = exec_add_user_string(imgp, CAST_USER_ADDR_T(pfz_string),UIO_SYSSPACE,FALSE);
3397         if(error)
3398                 goto bad;
3399         imgp->ip_applec++;
3400
3401         /* adding the NANO_ENGAGE_KEY key */
3402         if (imgp->ip_px_sa) {
3403                 int proc_flags = (((struct _posix_spawnattr *) imgp->ip_px_sa)->psa_flags);
3404
3405                 if ((proc_flags & _POSIX_SPAWN_NANO_ALLOCATOR) == _POSIX_SPAWN_NANO_ALLOCATOR) {
3406                         char uiapp_string[strlen(NANO_ENGAGE_KEY) + 1];
3407
3408                         snprintf(uiapp_string, sizeof(uiapp_string), NANO_ENGAGE_KEY);
3409                         error = exec_add_user_string(imgp, CAST_USER_ADDR_T(uiapp_string),UIO_SYSSPACE,FALSE);
3410                         if (error)
3411                                 goto bad;
3412                         imgp->ip_applec++;
3413                 }
3414         }
3415
3416         /*
3417          * Supply libc with a collection of random values to use when
3418          * implementing -fstack-protector.
3419          *
3420          * (The first random string always contains an embedded NUL so that
3421          * __stack_chk_guard also protects against C string vulnerabilities)
3422          */
3423         (void)strlcpy(guard_vec, GUARD_KEY, sizeof (guard_vec));
3424         for (i = 0; i < GUARD_VALUES; i++) {
3425                 random_hex_str(guard, sizeof (guard), i == 0);
3426                 if (i)
3427                         (void)strlcat(guard_vec, ",", sizeof (guard_vec));
3428                 (void)strlcat(guard_vec, guard, sizeof (guard_vec));
3429         }
3430
3431         error = exec_add_user_string(imgp, CAST_USER_ADDR_T(guard_vec), UIO_SYSSPACE, FALSE);
3432         if (error)
3433                 goto bad;
3434         imgp->ip_applec++;
3435
3436         /*
3437          * Supply libc with entropy for system malloc.
3438          */
3439         (void)strlcpy(entropy_vec, ENTROPY_KEY, sizeof(entropy_vec));
3440         for (i = 0; i < ENTROPY_VALUES; i++) {
3441                 random_hex_str(entropy, sizeof (entropy), FALSE);
3442                 if (i)
3443                         (void)strlcat(entropy_vec, ",", sizeof (entropy_vec));
3444                 (void)strlcat(entropy_vec, entropy, sizeof (entropy_vec));
3445         }
3446
3447         error = exec_add_user_string(imgp, CAST_USER_ADDR_T(entropy_vec), UIO_SYSSPACE, FALSE);
3448         if (error)
3449                 goto bad;
3450         imgp->ip_applec++;
3451
3452         /* Align the tail of the combined applev area */
3453         while (imgp->ip_strspace % new_ptr_size != 0) {
3454                 *imgp->ip_strendp++ = '\0';
3455                 imgp->ip_strspace--;
3456         }
3457
3458 bad:
3459         return error;
3460 }
3461
3462 #define unix_stack_size(p)      (p->p_rlimit[RLIMIT_STACK].rlim_cur)
3463
3464 /*
3465  * exec_check_permissions
3466  *
3467  * Description: Verify that the file that is being attempted to be executed
3468  *              is in fact allowed to be executed based on it POSIX file
3469  *              permissions and other access control criteria
3470  *
3471  * Parameters:  struct image_params *   the image parameter block
3472  *
3473  * Returns:     0                       Success
3474  *              EACCES                  Permission denied
3475  *              ENOEXEC                 Executable file format error
3476  *              ETXTBSY                 Text file busy [misuse of error code]
3477  *      vnode_getattr:???
3478  *      vnode_authorize:???
3479  */
3480 static int
3481 exec_check_permissions(struct image_params *imgp)
3482 {
3483         struct vnode *vp = imgp->ip_vp;
3484         struct vnode_attr *vap = imgp->ip_vattr;
3485         proc_t p = vfs_context_proc(imgp->ip_vfs_context);
3486         int error;
3487         kauth_action_t action;
3488
3489         /* Only allow execution of regular files */
3490         if (!vnode_isreg(vp))
3491                 return (EACCES);
3492
3493         /* Get the file attributes that we will be using here and elsewhere */
3494         VATTR_INIT(vap);
3495         VATTR_WANTED(vap, va_uid);
3496         VATTR_WANTED(vap, va_gid);
3497         VATTR_WANTED(vap, va_mode);
3498         VATTR_WANTED(vap, va_fsid);
3499         VATTR_WANTED(vap, va_fileid);
3500         VATTR_WANTED(vap, va_data_size);
3501         if ((error = vnode_getattr(vp, vap, imgp->ip_vfs_context)) != 0)
3502                 return (error);
3503
3504         /*
3505          * Ensure that at least one execute bit is on - otherwise root
3506          * will always succeed, and we don't want to happen unless the
3507          * file really is executable.
3508          */
3509         if (!vfs_authopaque(vnode_mount(vp)) && ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0))
3510                 return (EACCES);
3511
3512         /* Disallow zero length files */
3513         if (vap->va_data_size == 0)
3514                 return (ENOEXEC);
3515
3516         imgp->ip_arch_offset = (user_size_t)0;
3517         imgp->ip_arch_size = vap->va_data_size;
3518
3519         /* Disable setuid-ness for traced programs or if MNT_NOSUID */
3520         if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_lflag & P_LTRACED))
3521                 vap->va_mode &= ~(VSUID | VSGID);
3522
3523         /*
3524          * Disable _POSIX_SPAWN_ALLOW_DATA_EXEC and _POSIX_SPAWN_DISABLE_ASLR
3525          * flags for setuid/setgid binaries.
3526          */
3527         if (vap->va_mode & (VSUID | VSGID))
3528                 imgp->ip_flags &= ~(IMGPF_ALLOW_DATA_EXEC | IMGPF_DISABLE_ASLR);
3529
3530 #if CONFIG_MACF
3531         error = mac_vnode_check_exec(imgp->ip_vfs_context, vp, imgp);
3532         if (error)
3533                 return (error);
3534 #endif
3535
3536         /* Check for execute permission */
3537         action = KAUTH_VNODE_EXECUTE;
3538         /* Traced images must also be readable */
3539         if (p->p_lflag & P_LTRACED)
3540                 action |= KAUTH_VNODE_READ_DATA;
3541         if ((error = vnode_authorize(vp, NULL, action, imgp->ip_vfs_context)) != 0)
3542                 return (error);
3543
3544 #if 0
3545         /* Don't let it run if anyone had it open for writing */
3546         vnode_lock(vp);
3547         if (vp->v_writecount) {
3548                 panic("going to return ETXTBSY %x", vp);
3549                 vnode_unlock(vp);
3550                 return (ETXTBSY);
3551         }
3552         vnode_unlock(vp);
3553 #endif
3554
3555
3556         /* XXX May want to indicate to underlying FS that vnode is open */
3557
3558         return (error);
3559 }
3560
3561
3562 /*
3563  * exec_handle_sugid
3564  *
3565  * Initially clear the P_SUGID in the process flags; if an SUGID process is
3566  * exec'ing a non-SUGID image, then  this is the point of no return.
3567  *
3568  * If the image being activated is SUGID, then replace the credential with a
3569  * copy, disable tracing (unless the tracing process is root), reset the
3570  * mach task port to revoke it, set the P_SUGID bit,
3571  *
3572  * If the saved user and group ID will be changing, then make sure it happens
3573  * to a new credential, rather than a shared one.
3574  *
3575  * Set the security token (this is probably obsolete, given that the token
3576  * should not technically be separate from the credential itself).
3577  *
3578  * Parameters:  struct image_params *   the image parameter block
3579  *
3580  * Returns:     void                    No failure indication
3581  *
3582  * Implicit returns:
3583  *              <process credential>    Potentially modified/replaced
3584  *              <task port>             Potentially revoked
3585  *              <process flags>         P_SUGID bit potentially modified
3586  *              <security token>        Potentially modified
3587  */
3588 static int
3589 exec_handle_sugid(struct image_params *imgp)
3590 {
3591         kauth_cred_t            cred = vfs_context_ucred(imgp->ip_vfs_context);
3592         proc_t                  p = vfs_context_proc(imgp->ip_vfs_context);
3593         int                     i;
3594         int                     leave_sugid_clear = 0;
3595         int                     mac_reset_ipc = 0;
3596         int                     error = 0;
3597 #if CONFIG_MACF
3598         int                     mac_transition, disjoint_cred = 0;
3599         int             label_update_return = 0;
3600
3601         /*
3602          * Determine whether a call to update the MAC label will result in the
3603          * credential changing.
3604          *
3605          * Note:        MAC policies which do not actually end up modifying
3606          *              the label subsequently are strongly encouraged to
3607          *              return 0 for this check, since a non-zero answer will
3608          *              slow down the exec fast path for normal binaries.
3609          */
3610         mac_transition = mac_cred_check_label_update_execve(
3611                                                         imgp->ip_vfs_context,
3612                                                         imgp->ip_vp,
3613                                                         imgp->ip_arch_offset,
3614                                                         imgp->ip_scriptvp,
3615                                                         imgp->ip_scriptlabelp,
3616                                                         imgp->ip_execlabelp,
3617                                                         p,
3618                                                         imgp->ip_px_smpx);
3619 #endif
3620
3621         OSBitAndAtomic(~((uint32_t)P_SUGID), &p->p_flag);
3622
3623         /*
3624          * Order of the following is important; group checks must go last,
3625          * as we use the success of the 'ismember' check combined with the
3626          * failure of the explicit match to indicate that we will be setting
3627          * the egid of the process even though the new process did not
3628          * require VSUID/VSGID bits in order for it to set the new group as
3629          * its egid.
3630          *
3631          * Note:        Technically, by this we are implying a call to
3632          *              setegid() in the new process, rather than implying
3633          *              it used its VSGID bit to set the effective group,
3634          *              even though there is no code in that process to make
3635          *              such a call.
3636          */
3637         if (((imgp->ip_origvattr->va_mode & VSUID) != 0 &&
3638              kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) ||
3639             ((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
3640                  ((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &leave_sugid_clear) || !leave_sugid_clear) ||
3641                  (kauth_cred_getgid(cred) != imgp->ip_origvattr->va_gid)))) {
3642
3643 #if CONFIG_MACF
3644 /* label for MAC transition and neither VSUID nor VSGID */
3645 handle_mac_transition:
3646 #endif
3647
3648                 /*
3649                  * Replace the credential with a copy of itself if euid or
3650                  * egid change.
3651                  *
3652                  * Note:        setuid binaries will automatically opt out of
3653                  *              group resolver participation as a side effect
3654                  *              of this operation.  This is an intentional
3655                  *              part of the security model, which requires a
3656                  *              participating credential be established by
3657                  *              escalating privilege, setting up all other
3658                  *              aspects of the credential including whether
3659                  *              or not to participate in external group
3660                  *              membership resolution, then dropping their
3661                  *              effective privilege to that of the desired
3662                  *              final credential state.
3663                  */
3664                 if (imgp->ip_origvattr->va_mode & VSUID) {
3665                         p->p_ucred  = kauth_cred_setresuid(p->p_ucred, KAUTH_UID_NONE, imgp->ip_origvattr->va_uid, imgp->ip_origvattr->va_uid, KAUTH_UID_NONE);
3666                         /* update cred on proc */
3667                         PROC_UPDATE_CREDS_ONPROC(p);
3668                 }
3669                 if (imgp->ip_origvattr->va_mode & VSGID) {
3670                         p->p_ucred = kauth_cred_setresgid(p->p_ucred, KAUTH_GID_NONE, imgp->ip_origvattr->va_gid, imgp->ip_origvattr->va_gid);
3671                         /* update cred on proc */
3672                         PROC_UPDATE_CREDS_ONPROC(p);
3673                 }
3674
3675 #if CONFIG_MACF
3676                 /*
3677                  * If a policy has indicated that it will transition the label,
3678                  * before making the call into the MAC policies, get a new
3679                  * duplicate credential, so they can modify it without
3680                  * modifying any others sharing it.
3681                  */
3682                 if (mac_transition) {
3683                         kauth_proc_label_update_execve(p,
3684                                                 imgp->ip_vfs_context,
3685                                                 imgp->ip_vp,
3686                                                 imgp->ip_arch_offset,
3687                                                 imgp->ip_scriptvp,
3688                                                 imgp->ip_scriptlabelp,
3689                                                 imgp->ip_execlabelp,
3690                                                 &imgp->ip_csflags,
3691                                                 imgp->ip_px_smpx,
3692                                                 &disjoint_cred, /* will be non zero if disjoint */
3693                                                 &label_update_return);
3694
3695                         if (disjoint_cred) {
3696                                 /*
3697                                  * If updating the MAC label resulted in a
3698                                  * disjoint credential, flag that we need to
3699                                  * set the P_SUGID bit.  This protects
3700                                  * against debuggers being attached by an
3701                                  * insufficiently privileged process onto the
3702                                  * result of a transition to a more privileged
3703                                  * credential.
3704                                  */
3705                                 leave_sugid_clear = 0;
3706                         }
3707
3708                         imgp->ip_mac_return = label_update_return;
3709                 }
3710
3711                 mac_reset_ipc = mac_proc_check_inherit_ipc_ports(p, p->p_textvp, p->p_textoff, imgp->ip_vp, imgp->ip_arch_offset, imgp->ip_scriptvp);
3712
3713 #endif  /* CONFIG_MACF */
3714
3715                 /*
3716                  * If 'leave_sugid_clear' is non-zero, then we passed the
3717                  * VSUID and MACF checks, and successfully determined that
3718                  * the previous cred was a member of the VSGID group, but
3719                  * that it was not the default at the time of the execve,
3720                  * and that the post-labelling credential was not disjoint.
3721                  * So we don't set the P_SUGID or reset mach ports and fds
3722                  * on the basis of simply running this code.
3723                  */
3724                 if (mac_reset_ipc || !leave_sugid_clear) {
3725                         /*
3726                          * Have mach reset the task and thread ports.
3727                          * We don't want anyone who had the ports before
3728                          * a setuid exec to be able to access/control the
3729                          * task/thread after.
3730                          */
3731                         ipc_task_reset(p->task);
3732                         ipc_thread_reset((imgp->ip_new_thread != NULL) ?
3733                                          imgp->ip_new_thread : current_thread());
3734                 }
3735
3736                 if (!leave_sugid_clear) {
3737                         /*
3738                          * Flag the process as setuid.
3739                          */
3740                         OSBitOrAtomic(P_SUGID, &p->p_flag);
3741
3742                         /*
3743                          * Radar 2261856; setuid security hole fix
3744                          * XXX For setuid processes, attempt to ensure that
3745                          * stdin, stdout, and stderr are already allocated.
3746                          * We do not want userland to accidentally allocate
3747                          * descriptors in this range which has implied meaning
3748                          * to libc.
3749                          */
3750                         for (i = 0; i < 3; i++) {
3751
3752                                 if (p->p_fd->fd_ofiles[i] != NULL)
3753                                         continue;
3754
3755                                 /*
3756                                  * Do the kernel equivalent of
3757                                  *
3758                                  *      if i == 0
3759                                  *              (void) open("/dev/null", O_RDONLY);
3760                                  *      else
3761                                  *              (void) open("/dev/null", O_WRONLY);
3762                                  */
3763
3764                                 struct fileproc *fp;
3765                                 int indx;
3766                                 int flag;
3767                                 struct nameidata *ndp = NULL;
3768
3769                                 if (i == 0)
3770                                         flag = FREAD;
3771                                 else
3772                                         flag = FWRITE;
3773
3774                                 if ((error = falloc(p,
3775                                     &fp, &indx, imgp->ip_vfs_context)) != 0)
3776                                         continue;
3777
3778                                 MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
3779                                 if (ndp == NULL) {
3780                                         error = ENOMEM;
3781                                         break;
3782                                 }
3783
3784                                 NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE,
3785                                     CAST_USER_ADDR_T("/dev/null"),
3786                                     imgp->ip_vfs_context);
3787
3788                                 if ((error = vn_open(ndp, flag, 0)) != 0) {
3789                                         fp_free(p, indx, fp);
3790                                         break;
3791                                 }
3792
3793                                 struct fileglob *fg = fp->f_fglob;
3794
3795                                 fg->fg_flag = flag;
3796                                 fg->fg_ops = &vnops;
3797                                 fg->fg_data = ndp->ni_vp;
3798
3799                                 vnode_put(ndp->ni_vp);
3800
3801                                 proc_fdlock(p);
3802                                 procfdtbl_releasefd(p, indx, NULL);
3803                                 fp_drop(p, indx, fp, 1);
3804                                 proc_fdunlock(p);
3805
3806                                 FREE(ndp, M_TEMP);
3807                         }
3808                 }
3809         }
3810 #if CONFIG_MACF
3811         else {
3812                 /*
3813                  * We are here because we were told that the MAC label will
3814                  * be transitioned, and the binary is not VSUID or VSGID; to
3815                  * deal with this case, we could either duplicate a lot of
3816                  * code, or we can indicate we want to default the P_SUGID
3817                  * bit clear and jump back up.
3818                  */
3819                 if (mac_transition) {
3820                         leave_sugid_clear = 1;
3821                         goto handle_mac_transition;
3822                 }
3823         }
3824
3825 #endif  /* CONFIG_MACF */
3826
3827         /*
3828          * Implement the semantic where the effective user and group become
3829          * the saved user and group in exec'ed programs.
3830          */
3831         p->p_ucred = kauth_cred_setsvuidgid(p->p_ucred, kauth_cred_getuid(p->p_ucred),  kauth_cred_getgid(p->p_ucred));
3832         /* update cred on proc */
3833         PROC_UPDATE_CREDS_ONPROC(p);
3834
3835         /* Update the process' identity version and set the security token */
3836         p->p_idversion++;
3837         set_security_token(p);
3838
3839         return(error);
3840 }
3841
3842
3843 /*
3844  * create_unix_stack
3845  *
3846  * Description: Set the user stack address for the process to the provided
3847  *              address.  If a custom stack was not set as a result of the
3848  *              load process (i.e. as specified by the image file for the
3849  *              executable), then allocate the stack in the provided map and
3850  *              set up appropriate guard pages for enforcing administrative
3851  *              limits on stack growth, if they end up being needed.
3852  *
3853  * Parameters:  p                       Process to set stack on
3854  *              load_result             Information from mach-o load commands
3855  *              map                     Address map in which to allocate the new stack
3856  *
3857  * Returns:     KERN_SUCCESS            Stack successfully created
3858  *              !KERN_SUCCESS           Mach failure code
3859  */
3860 static kern_return_t
3861 create_unix_stack(vm_map_t map, load_result_t* load_result,
3862                         proc_t p)
3863 {
3864         mach_vm_size_t          size, prot_size;
3865         mach_vm_offset_t        addr, prot_addr;
3866         kern_return_t           kr;
3867
3868         mach_vm_address_t       user_stack = load_result->user_stack;
3869
3870         proc_lock(p);
3871         p->user_stack = user_stack;
3872         proc_unlock(p);
3873
3874         if (!load_result->prog_allocated_stack) {
3875                 /*
3876                  * Allocate enough space for the maximum stack size we
3877                  * will ever authorize and an extra page to act as
3878                  * a guard page for stack overflows. For default stacks,
3879                  * vm_initial_limit_stack takes care of the extra guard page.
3880                  * Otherwise we must allocate it ourselves.
3881                  */
3882
3883                 size = mach_vm_round_page(load_result->user_stack_size);
3884                 if (load_result->prog_stack_size)
3885                         size += PAGE_SIZE;
3886                 addr = mach_vm_trunc_page(load_result->user_stack - size);
3887                 kr = mach_vm_allocate(map, &addr, size,
3888                                         VM_MAKE_TAG(VM_MEMORY_STACK) |
3889                                         VM_FLAGS_FIXED);
3890                 if (kr != KERN_SUCCESS) {
3891                         /* If can't allocate at default location, try anywhere */
3892                         addr = 0;
3893                         kr = mach_vm_allocate(map, &addr, size,
3894                                                                   VM_MAKE_TAG(VM_MEMORY_STACK) |
3895                                                                   VM_FLAGS_ANYWHERE);
3896                         if (kr != KERN_SUCCESS)
3897                                 return kr;
3898
3899                         user_stack = addr + size;
3900                         load_result->user_stack = user_stack;
3901
3902                         proc_lock(p);
3903                         p->user_stack = user_stack;
3904                         proc_unlock(p);
3905                 }
3906
3907                 /*
3908                  * And prevent access to what's above the current stack
3909                  * size limit for this process.
3910                  */
3911                 prot_addr = addr;
3912                 if (load_result->prog_stack_size)
3913                         prot_size = PAGE_SIZE;
3914                 else
3915                         prot_size = mach_vm_trunc_page(size - unix_stack_size(p));
3916                 kr = mach_vm_protect(map,
3917                                                          prot_addr,
3918                                                          prot_size,
3919                                                          FALSE,
3920                                                          VM_PROT_NONE);
3921                 if (kr != KERN_SUCCESS) {
3922                         (void) mach_vm_deallocate(map, addr, size);
3923                         return kr;
3924                 }
3925         }
3926
3927         return KERN_SUCCESS;
3928 }
3929
3930 #include <sys/reboot.h>
3931
3932 static const char * init_programs[] = {
3933 #if DEVELOPMENT || DEBUG
3934         "/usr/local/sbin/launchd.development",
3935 #endif
3936         "/sbin/launchd",
3937 };
3938
3939 /*
3940  * load_init_program
3941  *
3942  * Description: Load the "init" program; in most cases, this will be "launchd"
3943  *
3944  * Parameters:  p                       Process to call execve() to create
3945  *                                      the "init" program
3946  *
3947  * Returns:     (void)
3948  *
3949  * Notes:       The process that is passed in is the first manufactured
3950  *              process on the system, and gets here via bsd_ast() firing
3951  *              for the first time.  This is done to ensure that bsd_init()
3952  *              has run to completion.
3953  */
3954 void
3955 load_init_program(proc_t p)
3956 {
3957         vm_offset_t     init_addr, addr;
3958         int             argc;
3959         uint32_t argv[3];
3960         unsigned int i;
3961         int                     error;
3962         int             retval[2];
3963         const char *init_program_name;
3964         struct execve_args init_exec_args;
3965
3966         init_addr = VM_MIN_ADDRESS;
3967         (void) vm_allocate(current_map(), &init_addr, PAGE_SIZE, VM_FLAGS_ANYWHERE);
3968         if (init_addr == 0)
3969                 init_addr++;
3970
3971         for (i = 0; i < sizeof(init_programs)/sizeof(init_programs[0]); i++) {
3972
3973                 init_program_name = init_programs[i];
3974                 addr = init_addr;
3975                 argc = 0;
3976
3977                 /*
3978                  * Copy out program name.
3979                  */
3980                 (void) copyout(init_program_name, CAST_USER_ADDR_T(addr), strlen(init_program_name)+1);
3981
3982                 argv[argc++] = (uint32_t)addr;
3983                 addr += strlen(init_program_name)+1;
3984                 addr = (vm_offset_t)ROUND_PTR(char, addr);
3985
3986                 /*
3987                  * Put out first (and only) argument, similarly.
3988                  * Assumes everything fits in a page as allocated above.
3989                  */
3990                 if (boothowto & RB_SINGLE) {
3991                         const char *init_args = "-s";
3992
3993                         copyout(init_args, CAST_USER_ADDR_T(addr), strlen(init_args)+1);
3994
3995                         argv[argc++] = (uint32_t)addr;
3996                         addr += strlen(init_args)+1;
3997                         addr = (vm_offset_t)ROUND_PTR(char, addr);
3998                 }
3999
4000                 /*
4001                  * Null-end the argument list
4002                  */
4003                 argv[argc] = 0;
4004
4005                 /*
4006                  * Copy out the argument list.
4007                  */
4008                 (void) copyout(argv, CAST_USER_ADDR_T(addr), sizeof(argv));
4009
4010                 /*
4011                  * Set up argument block for fake call to execve.
4012                  */
4013                 init_exec_args.fname = CAST_USER_ADDR_T(argv[0]);
4014                 init_exec_args.argp = CAST_USER_ADDR_T((char **)addr);
4015                 init_exec_args.envp = CAST_USER_ADDR_T(0);
4016
4017                 /*
4018                  * So that init task is set with uid,gid 0 token
4019                  */
4020                 set_security_token(p);
4021
4022                 error = execve(p, &init_exec_args, retval);
4023                 if (!error)
4024                         return;
4025         }
4026
4027         panic("Process 1 exec of %s failed, errno %d", init_program_name, error);
4028 }
4029
4030 /*
4031  * load_return_to_errno
4032  *
4033  * Description: Convert a load_return_t (Mach error) to an errno (BSD error)
4034  *
4035  * Parameters:  lrtn                    Mach error number
4036  *
4037  * Returns:     (int)                   BSD error number
4038  *              0                       Success
4039  *              EBADARCH                Bad architecture
4040  *              EBADMACHO               Bad Mach object file
4041  *              ESHLIBVERS              Bad shared library version
4042  *              ENOMEM                  Out of memory/resource shortage
4043  *              EACCES                  Access denied
4044  *              ENOENT                  Entry not found (usually "file does
4045  *                                      does not exist")
4046  *              EIO                     An I/O error occurred
4047  *              EBADEXEC                The executable is corrupt/unknown
4048  */
4049 static int
4050 load_return_to_errno(load_return_t lrtn)
4051 {
4052         switch (lrtn) {
4053         case LOAD_SUCCESS:
4054                 return 0;
4055         case LOAD_BADARCH:
4056                 return EBADARCH;
4057         case LOAD_BADMACHO:
4058                 return EBADMACHO;
4059         case LOAD_SHLIB:
4060                 return ESHLIBVERS;
4061         case LOAD_NOSPACE:
4062         case LOAD_RESOURCE:
4063                 return ENOMEM;
4064         case LOAD_PROTECT:
4065                 return EACCES;
4066         case LOAD_ENOENT:
4067                 return ENOENT;
4068         case LOAD_IOERROR:
4069                 return EIO;
4070         case LOAD_FAILURE:
4071         case LOAD_DECRYPTFAIL:
4072         default:
4073                 return EBADEXEC;
4074         }
4075 }
4076
4077 #include <mach/mach_types.h>
4078 #include <mach/vm_prot.h>
4079 #include <mach/semaphore.h>
4080 #include <mach/sync_policy.h>
4081 #include <kern/clock.h>
4082 #include <mach/kern_return.h>
4083
4084 /*
4085  * execargs_alloc
4086  *
4087  * Description: Allocate the block of memory used by the execve arguments.
4088  *              At the same time, we allocate a page so that we can read in
4089  *              the first page of the image.
4090  *
4091  * Parameters:  struct image_params *   the image parameter block
4092  *
4093  * Returns:     0                       Success
4094  *              EINVAL                  Invalid argument
4095  *              EACCES                  Permission denied
4096  *              EINTR                   Interrupted function
4097  *              ENOMEM                  Not enough space
4098  *
4099  * Notes:       This is a temporary allocation into the kernel address space
4100  *              to enable us to copy arguments in from user space.  This is
4101  *              necessitated by not mapping the process calling execve() into
4102  *              the kernel address space during the execve() system call.
4103  *
4104  *              We assemble the argument and environment, etc., into this
4105  *              region before copying it as a single block into the child
4106  *              process address space (at the top or bottom of the stack,
4107  *              depending on which way the stack grows; see the function
4108  *              exec_copyout_strings() for details).
4109  *
4110  *              This ends up with a second (possibly unnecessary) copy compared
4111  *              with assembing the data directly into the child address space,
4112  *              instead, but since we cannot be guaranteed that the parent has
4113  *              not modified its environment, we can't really know that it's
4114  *              really a block there as well.
4115  */
4116
4117
4118 static int execargs_waiters = 0;
4119 lck_mtx_t *execargs_cache_lock;
4120
4121 static void
4122 execargs_lock_lock(void) {
4123         lck_mtx_lock_spin(execargs_cache_lock);
4124 }
4125
4126 static void
4127 execargs_lock_unlock(void) {
4128         lck_mtx_unlock(execargs_cache_lock);
4129 }
4130
4131 static wait_result_t
4132 execargs_lock_sleep(void) {
4133         return(lck_mtx_sleep(execargs_cache_lock, LCK_SLEEP_DEFAULT, &execargs_free_count, THREAD_INTERRUPTIBLE));
4134 }
4135
4136 static kern_return_t
4137 execargs_purgeable_allocate(char **execarg_address) {
4138         kern_return_t kr = vm_allocate(bsd_pageable_map, (vm_offset_t *)execarg_address, BSD_PAGEABLE_SIZE_PER_EXEC, VM_FLAGS_ANYWHERE | VM_FLAGS_PURGABLE);
4139         assert(kr == KERN_SUCCESS);
4140         return kr;
4141 }
4142
4143 static kern_return_t
4144 execargs_purgeable_reference(void *execarg_address) {
4145         int state = VM_PURGABLE_NONVOLATILE;
4146         kern_return_t kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
4147
4148         assert(kr == KERN_SUCCESS);
4149         return kr;
4150 }
4151
4152 static kern_return_t
4153 execargs_purgeable_volatilize(void *execarg_address) {
4154         int state = VM_PURGABLE_VOLATILE | VM_PURGABLE_ORDERING_OBSOLETE;
4155         kern_return_t kr;
4156         kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
4157
4158         assert(kr == KERN_SUCCESS);
4159
4160         return kr;
4161 }
4162
4163 static void
4164 execargs_wakeup_waiters(void) {
4165         thread_wakeup(&execargs_free_count);
4166 }
4167
4168 static int
4169 execargs_alloc(struct image_params *imgp)
4170 {
4171         kern_return_t kret;
4172         wait_result_t res;
4173         int i, cache_index = -1;
4174
4175         execargs_lock_lock();
4176
4177         while (execargs_free_count == 0) {
4178                 execargs_waiters++;
4179                 res = execargs_lock_sleep();
4180                 execargs_waiters--;
4181                 if (res != THREAD_AWAKENED) {
4182                         execargs_lock_unlock();
4183                         return (EINTR);
4184                 }
4185         }
4186
4187         execargs_free_count--;
4188
4189         for (i = 0; i < execargs_cache_size; i++) {
4190                 vm_offset_t element = execargs_cache[i];
4191                 if (element) {
4192                         cache_index = i;
4193                         imgp->ip_strings = (char *)(execargs_cache[i]);
4194                         execargs_cache[i] = 0;
4195                         break;
4196                 }
4197         }
4198
4199         assert(execargs_free_count >= 0);
4200
4201         execargs_lock_unlock();
4202
4203         if (cache_index == -1) {
4204                 kret = execargs_purgeable_allocate(&imgp->ip_strings);
4205         }
4206         else
4207                 kret = execargs_purgeable_reference(imgp->ip_strings);
4208
4209         assert(kret == KERN_SUCCESS);
4210         if (kret != KERN_SUCCESS) {
4211                 return (ENOMEM);
4212         }
4213
4214         /* last page used to read in file headers */
4215         imgp->ip_vdata = imgp->ip_strings + ( NCARGS + PAGE_SIZE );
4216         imgp->ip_strendp = imgp->ip_strings;
4217         imgp->ip_argspace = NCARGS;
4218         imgp->ip_strspace = ( NCARGS + PAGE_SIZE );
4219
4220         return (0);
4221 }
4222
4223 /*
4224  * execargs_free
4225  *
4226  * Description: Free the block of memory used by the execve arguments and the
4227  *              first page of the executable by a previous call to the function
4228  *              execargs_alloc().
4229  *
4230  * Parameters:  struct image_params *   the image parameter block
4231  *
4232  * Returns:     0                       Success
4233  *              EINVAL                  Invalid argument
4234  *              EINTR                   Oeration interrupted
4235  */
4236 static int
4237 execargs_free(struct image_params *imgp)
4238 {
4239         kern_return_t kret;
4240         int i;
4241         boolean_t needs_wakeup = FALSE;
4242
4243         kret = execargs_purgeable_volatilize(imgp->ip_strings);
4244
4245         execargs_lock_lock();
4246         execargs_free_count++;
4247
4248         for (i = 0; i < execargs_cache_size; i++) {
4249                 vm_offset_t element = execargs_cache[i];
4250                 if (element == 0) {
4251                         execargs_cache[i] = (vm_offset_t) imgp->ip_strings;
4252                         imgp->ip_strings = NULL;
4253                         break;
4254                 }
4255         }
4256
4257         assert(imgp->ip_strings == NULL);
4258
4259         if (execargs_waiters > 0)
4260                 needs_wakeup = TRUE;
4261
4262         execargs_lock_unlock();
4263
4264         if (needs_wakeup == TRUE)
4265                 execargs_wakeup_waiters();
4266
4267         return ((kret == KERN_SUCCESS ? 0 : EINVAL));
4268 }
4269
4270 static void
4271 exec_resettextvp(proc_t p, struct image_params *imgp)
4272 {
4273         vnode_t vp;
4274         off_t offset;
4275         vnode_t tvp  = p->p_textvp;
4276         int ret;
4277
4278         vp = imgp->ip_vp;
4279         offset = imgp->ip_arch_offset;
4280
4281         if (vp == NULLVP)
4282                 panic("exec_resettextvp: expected valid vp");
4283
4284         ret = vnode_ref(vp);
4285         proc_lock(p);
4286         if (ret == 0) {
4287                 p->p_textvp = vp;
4288                 p->p_textoff = offset;
4289         } else {
4290                 p->p_textvp = NULLVP;   /* this is paranoia */
4291                 p->p_textoff = 0;
4292         }
4293         proc_unlock(p);
4294
4295         if ( tvp != NULLVP) {
4296                 if (vnode_getwithref(tvp) == 0) {
4297                         vnode_rele(tvp);
4298                         vnode_put(tvp);
4299                 }
4300         }
4301
4302 }
4303
4304 /*
4305  * If the process is not signed or if it contains entitlements, we
4306  * need to communicate through the task_access_port to taskgated.
4307  *
4308  * taskgated will provide a detached code signature if present, and
4309  * will enforce any restrictions on entitlements.
4310  */
4311
4312 static boolean_t
4313 taskgated_required(proc_t p, boolean_t *require_success)
4314 {
4315         size_t length;
4316         void *blob;
4317         int error;
4318
4319         if ((p->p_csflags & CS_VALID) == 0) {
4320                 *require_success = FALSE;
4321                 return TRUE;
4322         }
4323
4324         error = cs_entitlements_blob_get(p, &blob, &length);
4325         if (error == 0 && blob != NULL) {
4326                 /*
4327                  * fatal on the desktop when entitlements are present,
4328                  * unless we started in single-user mode
4329                  */
4330                 if ((boothowto & RB_SINGLE) == 0)
4331                         *require_success = TRUE;
4332                 /*
4333                  * Allow initproc to run without causing taskgated to launch
4334                  */
4335                 if (p == initproc) {
4336                         *require_success = FALSE;
4337                         return FALSE;
4338                 }
4339
4340                 return TRUE;
4341         }
4342
4343         *require_success = FALSE;
4344         return 0;
4345 }
4346
4347 /*
4348  * __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__
4349  *
4350  * Description: Waits for the userspace daemon to respond to the request
4351  *              we made. Function declared non inline to be visible in
4352  *              stackshots and spindumps as well as debugging.
4353  */
4354 __attribute__((noinline)) int
4355 __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid)
4356 {
4357         return find_code_signature(task_access_port, new_pid);
4358 }
4359
4360 static int
4361 check_for_signature(proc_t p, struct image_params *imgp)
4362 {
4363         mach_port_t port = NULL;
4364         kern_return_t kr = KERN_FAILURE;
4365         int error = EACCES;
4366         boolean_t unexpected_failure = FALSE;
4367         unsigned char hash[SHA1_RESULTLEN];
4368         boolean_t require_success = FALSE;
4369         int spawn = (imgp->ip_flags & IMGPF_SPAWN);
4370         int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
4371
4372         /*
4373          * Override inherited code signing flags with the
4374          * ones for the process that is being successfully
4375          * loaded
4376          */
4377         proc_lock(p);
4378         p->p_csflags = imgp->ip_csflags;
4379         proc_unlock(p);
4380
4381         /* Set the switch_protect flag on the map */
4382         if(p->p_csflags & (CS_HARD|CS_KILL)) {
4383                 vm_map_switch_protect(get_task_map(p->task), TRUE);
4384         }
4385
4386         /*
4387          * image activation may be failed due to policy
4388          * which is unexpected but security framework does not
4389          * approve of exec, kill and return immediately.
4390          */
4391         if (imgp->ip_mac_return != 0) {
4392                 error = imgp->ip_mac_return;
4393                 unexpected_failure = TRUE;
4394                 goto done;
4395         }
4396
4397         /* check if callout to taskgated is needed */
4398         if (!taskgated_required(p, &require_success)) {
4399                 error = 0;
4400                 goto done;
4401         }
4402
4403         kr = task_get_task_access_port(p->task, &port);
4404         if (KERN_SUCCESS != kr || !IPC_PORT_VALID(port)) {
4405                 error = 0;
4406                 if (require_success)
4407                         error = EACCES;
4408                 goto done;
4409         }
4410
4411         /*
4412          * taskgated returns KERN_SUCCESS if it has completed its work
4413          * and the exec should continue, KERN_FAILURE if the exec should
4414          * fail, or it may error out with different error code in an
4415          * event of mig failure (e.g. process was signalled during the
4416          * rpc call, taskgated died, mig server died etc.).
4417          */
4418
4419         kr = __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(port, p->p_pid);
4420         switch (kr) {
4421         case KERN_SUCCESS:
4422                 error = 0;
4423                 break;
4424         case KERN_FAILURE:
4425                 error = EACCES;
4426                 goto done;
4427         default:
4428                 error = EACCES;
4429                 unexpected_failure = TRUE;
4430                 goto done;
4431         }
4432
4433         /* Only do this if exec_resettextvp() did not fail */
4434         if (p->p_textvp != NULLVP) {
4435                 /*
4436                  * If there's a new code directory, mark this process
4437                  * as signed.
4438                  */
4439                 if (0 == ubc_cs_getcdhash(p->p_textvp, p->p_textoff, hash)) {
4440                         proc_lock(p);
4441                         p->p_csflags |= CS_VALID;
4442                         proc_unlock(p);
4443                 }
4444         }
4445
4446 done:
4447         if (0 != error) {
4448                 if (!unexpected_failure)
4449                         p->p_csflags |= CS_KILLED;
4450                 /* make very sure execution fails */
4451                 if (vfexec || spawn) {
4452                         psignal_vfork(p, p->task, imgp->ip_new_thread, SIGKILL);
4453                         error = 0;
4454                 } else {
4455                         psignal(p, SIGKILL);
4456                 }
4457         }
4458         return error;
4459 }
4460
4461 /*
4462  * Typically as soon as we start executing this process, the
4463  * first instruction will trigger a VM fault to bring the text
4464  * pages (as executable) into the address space, followed soon
4465  * thereafter by dyld data structures (for dynamic executable).
4466  * To optimize this, as well as improve support for hardware
4467  * debuggers that can only access resident pages present
4468  * in the process' page tables, we prefault some pages if
4469  * possible. Errors are non-fatal.
4470  */
4471 static void exec_prefault_data(proc_t p __unused, struct image_params *imgp, load_result_t *load_result)
4472 {
4473         int ret;
4474         size_t expected_all_image_infos_size;
4475
4476         /*
4477          * Prefault executable or dyld entry point.
4478          */
4479         vm_fault(current_map(),
4480                  vm_map_trunc_page(load_result->entry_point,
4481                                    vm_map_page_mask(current_map())),
4482                  VM_PROT_READ | VM_PROT_EXECUTE,
4483                  FALSE,
4484                  THREAD_UNINT, NULL, 0);
4485
4486         if (imgp->ip_flags & IMGPF_IS_64BIT) {
4487                 expected_all_image_infos_size = sizeof(struct user64_dyld_all_image_infos);
4488         } else {
4489                 expected_all_image_infos_size = sizeof(struct user32_dyld_all_image_infos);
4490         }
4491
4492         /* Decode dyld anchor structure from <mach-o/dyld_images.h> */
4493         if (load_result->dynlinker &&
4494                 load_result->all_image_info_addr &&
4495                 load_result->all_image_info_size >= expected_all_image_infos_size) {
4496                 union {
4497                         struct user64_dyld_all_image_infos      infos64;
4498                         struct user32_dyld_all_image_infos      infos32;
4499                 } all_image_infos;
4500
4501                 /*
4502                  * Pre-fault to avoid copyin() going through the trap handler
4503                  * and recovery path.
4504                  */
4505                 vm_fault(current_map(),
4506                          vm_map_trunc_page(load_result->all_image_info_addr,
4507                                            vm_map_page_mask(current_map())),
4508                          VM_PROT_READ | VM_PROT_WRITE,
4509                          FALSE,
4510                          THREAD_UNINT, NULL, 0);
4511                 if ((load_result->all_image_info_addr & PAGE_MASK) + expected_all_image_infos_size > PAGE_SIZE) {
4512                         /* all_image_infos straddles a page */
4513                         vm_fault(current_map(),
4514                                  vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size - 1,
4515                                                    vm_map_page_mask(current_map())),
4516                                  VM_PROT_READ | VM_PROT_WRITE,
4517                                  FALSE,
4518                                  THREAD_UNINT, NULL, 0);
4519                 }
4520
4521                 ret = copyin(load_result->all_image_info_addr,
4522                                          &all_image_infos,
4523                                          expected_all_image_infos_size);
4524                 if (ret == 0 && all_image_infos.infos32.version >= 9) {
4525
4526                         user_addr_t notification_address;
4527                         user_addr_t dyld_image_address;
4528                         user_addr_t dyld_version_address;
4529                         user_addr_t dyld_all_image_infos_address;
4530                         user_addr_t dyld_slide_amount;
4531
4532                         if (imgp->ip_flags & IMGPF_IS_64BIT) {
4533                                 notification_address = all_image_infos.infos64.notification;
4534                                 dyld_image_address = all_image_infos.infos64.dyldImageLoadAddress;
4535                                 dyld_version_address = all_image_infos.infos64.dyldVersion;
4536                                 dyld_all_image_infos_address = all_image_infos.infos64.dyldAllImageInfosAddress;
4537                         } else {
4538                                 notification_address = all_image_infos.infos32.notification;
4539                                 dyld_image_address = all_image_infos.infos32.dyldImageLoadAddress;
4540                                 dyld_version_address = all_image_infos.infos32.dyldVersion;
4541                                 dyld_all_image_infos_address = all_image_infos.infos32.dyldAllImageInfosAddress;
4542                         }
4543
4544                         /*
4545                          * dyld statically sets up the all_image_infos in its Mach-O
4546                          * binary at static link time, with pointers relative to its default
4547                          * load address. Since ASLR might slide dyld before its first
4548                          * instruction is executed, "dyld_slide_amount" tells us how far
4549                          * dyld was loaded compared to its default expected load address.
4550                          * All other pointers into dyld's image should be adjusted by this
4551                          * amount. At some point later, dyld will fix up pointers to take
4552                          * into account the slide, at which point the all_image_infos_address
4553                          * field in the structure will match the runtime load address, and
4554                          * "dyld_slide_amount" will be 0, if we were to consult it again.
4555                          */
4556
4557                         dyld_slide_amount = load_result->all_image_info_addr - dyld_all_image_infos_address;
4558
4559 #if 0
4560                         kprintf("exec_prefault: 0x%016llx 0x%08x 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
4561                                         (uint64_t)load_result->all_image_info_addr,
4562                                         all_image_infos.infos32.version,
4563                                         (uint64_t)notification_address,
4564                                         (uint64_t)dyld_image_address,
4565                                         (uint64_t)dyld_version_address,
4566                                         (uint64_t)dyld_all_image_infos_address);
4567 #endif
4568
4569                         vm_fault(current_map(),
4570                                  vm_map_trunc_page(notification_address + dyld_slide_amount,
4571                                                    vm_map_page_mask(current_map())),
4572                                  VM_PROT_READ | VM_PROT_EXECUTE,
4573                                  FALSE,
4574                                  THREAD_UNINT, NULL, 0);
4575                         vm_fault(current_map(),
4576                                  vm_map_trunc_page(dyld_image_address + dyld_slide_amount,
4577                                                    vm_map_page_mask(current_map())),
4578                                  VM_PROT_READ | VM_PROT_EXECUTE,
4579                                  FALSE,
4580                                  THREAD_UNINT, NULL, 0);
4581                         vm_fault(current_map(),
4582                                  vm_map_trunc_page(dyld_version_address + dyld_slide_amount,
4583                                                    vm_map_page_mask(current_map())),
4584                                  VM_PROT_READ,
4585                                  FALSE,
4586                                  THREAD_UNINT, NULL, 0);
4587                         vm_fault(current_map(),
4588                                  vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount,
4589                                                    vm_map_page_mask(current_map())),
4590                                  VM_PROT_READ | VM_PROT_WRITE,
4591                                  FALSE,
4592                                  THREAD_UNINT, NULL, 0);
4593                 }
4594         }
4595 }