+
+ /* Get the file attributes that we will be using here and elsewhere */
+ VATTR_INIT(vap);
+ VATTR_WANTED(vap, va_uid);
+ VATTR_WANTED(vap, va_gid);
+ VATTR_WANTED(vap, va_mode);
+ VATTR_WANTED(vap, va_fsid);
+ VATTR_WANTED(vap, va_fileid);
+ VATTR_WANTED(vap, va_data_size);
+ if ((error = vnode_getattr(vp, vap, imgp->ip_vfs_context)) != 0)
+ return (error);
+
+ /*
+ * Ensure that at least one execute bit is on - otherwise root
+ * will always succeed, and we don't want to happen unless the
+ * file really is executable.
+ */
+ if ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)
+ return (EACCES);
+
+ /* Disallow zero length files */
+ if (vap->va_data_size == 0)
+ return (ENOEXEC);
+
+ imgp->ip_arch_offset = (user_size_t)0;
+ imgp->ip_arch_size = vap->va_data_size;
+
+ /* Disable setuid-ness for traced programs or if MNT_NOSUID */
+ if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_lflag & P_LTRACED)) {
+ vap->va_mode &= ~(VSUID | VSGID);
+#if CONFIG_MACF
+ imgp->ip_no_trans = 1;
+#endif
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_exec(imgp->ip_vfs_context, vp, imgp);
+ if (error)
+ return (error);
+#endif
+
+ /* Check for execute permission */
+ action = KAUTH_VNODE_EXECUTE;
+ /* Traced images must also be readable */
+ if (p->p_lflag & P_LTRACED)
+ action |= KAUTH_VNODE_READ_DATA;
+ if ((error = vnode_authorize(vp, NULL, action, imgp->ip_vfs_context)) != 0)
+ return (error);
+
+#if 0
+ /* Don't let it run if anyone had it open for writing */
+ vnode_lock(vp);
+ if (vp->v_writecount) {
+ panic("going to return ETXTBSY %x", vp);
+ vnode_unlock(vp);
+ return (ETXTBSY);
+ }
+ vnode_unlock(vp);
+#endif
+
+
+#ifdef IMGPF_POWERPC
+ /*
+ * If the file we are about to attempt to load is the exec_handler_ppc,
+ * which is determined by matching the vattr fields against previously
+ * cached values, then we set the PowerPC environment flag.
+ */
+ if (vap->va_fsid == exec_archhandler_ppc.fsid &&
+ vap->va_fileid == (uint64_t)((u_long)exec_archhandler_ppc.fileid)) {
+ imgp->ip_flags |= IMGPF_POWERPC;
+ }
+#endif /* IMGPF_POWERPC */
+
+ /* XXX May want to indicate to underlying FS that vnode is open */
+
+ return (error);
+}
+
+
+/*
+ * exec_handle_sugid
+ *
+ * Initially clear the P_SUGID in the process flags; if an SUGID process is
+ * exec'ing a non-SUGID image, then this is the point of no return.
+ *
+ * If the image being activated is SUGID, then replace the credential with a
+ * copy, disable tracing (unless the tracing process is root), reset the
+ * mach task port to revoke it, set the P_SUGID bit,
+ *
+ * If the saved user and group ID will be changing, then make sure it happens
+ * to a new credential, rather than a shared one.
+ *
+ * Set the security token (this is probably obsolete, given that the token
+ * should not technically be separate from the credential itself).
+ *
+ * Parameters: struct image_params * the image parameter block
+ *
+ * Returns: void No failure indication
+ *
+ * Implicit returns:
+ * <process credential> Potentially modified/replaced
+ * <task port> Potentially revoked
+ * <process flags> P_SUGID bit potentially modified
+ * <security token> Potentially modified
+ */
+static int
+exec_handle_sugid(struct image_params *imgp)
+{
+ kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context);
+ proc_t p = vfs_context_proc(imgp->ip_vfs_context);
+ int i;
+ int is_member = 0;
+ int error = 0;
+ struct vnode *dev_null = NULLVP;
+#if CONFIG_MACF
+ kauth_cred_t my_cred;
+#endif
+
+#if CONFIG_MACF
+ int mac_transition;
+ mac_transition = mac_cred_check_label_update_execve(imgp->ip_vfs_context, imgp->ip_vp,
+ imgp->ip_scriptlabelp, imgp->ip_execlabelp, p);
+#endif
+
+ OSBitAndAtomic(~((uint32_t)P_SUGID), (UInt32 *)&p->p_flag);
+
+ /*
+ * Order of the following is important; group checks must go last,
+ * as we use the success of the 'is_member' check combined with the
+ * failure of the explicit match to indicate that we will be setting
+ * the egid of the process even though the new process did not
+ * require VSUID/VSGID bits in order for it to set the new group as
+ * its egid.
+ *
+ * Note: Technically, by this we are implying a call to
+ * setegid() in the new process, rather than implying
+ * it used its VSGID bit to set the effective group,
+ * even though there is no code in that process to make
+ * such a call.
+ */
+ if (((imgp->ip_origvattr->va_mode & VSUID) != 0 &&
+ kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) ||
+#if CONFIG_MACF
+ mac_transition || /* A policy wants to transition */
+#endif
+ ((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
+ ((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &is_member) || !is_member) ||
+ (cred->cr_gid != imgp->ip_origvattr->va_gid)))) {
+
+ /*
+ * Replace the credential with a copy of itself if euid or
+ * egid change.
+ *
+ * Note: setuid binaries will automatically opt out of
+ * group resolver participation as a side effect
+ * of this operation. This is an intentional
+ * part of the security model, which requires a
+ * participating credential be established by
+ * escalating privilege, setting up all other
+ * aspects of the credential including whether
+ * or not to participate in external group
+ * membership resolution, then dropping their
+ * effective privilege to that of the desired
+ * final credential state.
+ */
+ if (imgp->ip_origvattr->va_mode & VSUID) {
+ p->p_ucred = kauth_cred_setresuid(p->p_ucred, KAUTH_UID_NONE, imgp->ip_origvattr->va_uid, imgp->ip_origvattr->va_uid, KAUTH_UID_NONE);
+ }
+ if (imgp->ip_origvattr->va_mode & VSGID) {
+ p->p_ucred = kauth_cred_setresgid(p->p_ucred, KAUTH_GID_NONE, imgp->ip_origvattr->va_gid, imgp->ip_origvattr->va_gid);
+ }
+
+#if CONFIG_MACF
+ /*
+ * XXXMAC: In FreeBSD, we set P_SUGID on a MAC transition
+ * to protect against debuggers being attached by an
+ * insufficiently privileged process onto the result of
+ * a transition to a more privileged credential. This is
+ * too conservative on FreeBSD, but we need to do
+ * something similar here, or risk vulnerability.
+ *
+ * Before we make the call into the MAC policies, get a new
+ * duplicate credential, so they can modify it without
+ * modifying any others sharing it.
+ */
+ if (mac_transition && !imgp->ip_no_trans) {
+ kauth_proc_label_update_execve(p,
+ imgp->ip_vfs_context,
+ imgp->ip_vp,
+ imgp->ip_scriptlabelp, imgp->ip_execlabelp);
+
+ my_cred = kauth_cred_proc_ref(p);
+ mac_task_label_update_cred(my_cred, p->task);
+ kauth_cred_unref(&my_cred);
+ }
+#endif
+ /*
+ * Have mach reset the task and thread ports.
+ * We don't want anyone who had the ports before
+ * a setuid exec to be able to access/control the
+ * task/thread after.
+ */
+ if (current_task() == p->task) {
+ ipc_task_reset(p->task);
+ ipc_thread_reset(current_thread());
+ }
+
+ /*
+ * If 'is_member' is non-zero, then we passed the VSUID and
+ * MACF checks, and successfully determined that the previous
+ * cred was a member of the VSGID group, but that it was not
+ * the default at the time of the execve. So we don't set the
+ * P_SUGID on the basis of simply running this code.
+ */
+ if (!is_member)
+ OSBitOrAtomic(P_SUGID, (UInt32 *)&p->p_flag);
+
+ /* Cache the vnode for /dev/null the first time around */
+ if (dev_null == NULLVP) {
+ struct nameidata nd1;
+
+ NDINIT(&nd1, LOOKUP, FOLLOW, UIO_SYSSPACE32,
+ CAST_USER_ADDR_T("/dev/null"),
+ imgp->ip_vfs_context);
+
+ if ((error = vn_open(&nd1, FREAD, 0)) == 0) {
+ dev_null = nd1.ni_vp;
+ /*
+ * vn_open returns with both a use_count
+ * and an io_count on the found vnode
+ * drop the io_count, but keep the use_count
+ */
+ vnode_put(nd1.ni_vp);
+ }
+ }
+
+ /* Radar 2261856; setuid security hole fix */
+ /* Patch from OpenBSD: A. Ramesh */
+ /*
+ * XXX For setuid processes, attempt to ensure that
+ * stdin, stdout, and stderr are already allocated.
+ * We do not want userland to accidentally allocate
+ * descriptors in this range which has implied meaning
+ * to libc.
+ */
+ if (dev_null != NULLVP) {
+ for (i = 0; i < 3; i++) {
+ struct fileproc *fp;
+ int indx;
+
+ if (p->p_fd->fd_ofiles[i] != NULL)
+ continue;
+
+ if ((error = falloc(p, &fp, &indx, imgp->ip_vfs_context)) != 0)
+ continue;
+
+ if ((error = vnode_ref_ext(dev_null, FREAD)) != 0) {
+ fp_free(p, indx, fp);
+ break;
+ }
+
+ fp->f_fglob->fg_flag = FREAD;
+ fp->f_fglob->fg_type = DTYPE_VNODE;
+ fp->f_fglob->fg_ops = &vnops;
+ fp->f_fglob->fg_data = (caddr_t)dev_null;
+
+ proc_fdlock(p);
+ procfdtbl_releasefd(p, indx, NULL);
+ fp_drop(p, indx, fp, 1);
+ proc_fdunlock(p);
+ }
+ /*
+ * for now we need to drop the reference immediately
+ * since we don't have any mechanism in place to
+ * release it before starting to unmount "/dev"
+ * during a reboot/shutdown
+ */
+ vnode_rele(dev_null);
+ dev_null = NULLVP;
+ }
+ }
+
+ /*
+ * Implement the semantic where the effective user and group become
+ * the saved user and group in exec'ed programs.
+ */
+ p->p_ucred = kauth_cred_setsvuidgid(p->p_ucred, kauth_cred_getuid(p->p_ucred), p->p_ucred->cr_gid);
+
+ /* XXX Obsolete; security token should not be separate from cred */
+ set_security_token(p);
+
+ return(error);
+}
+
+
+/*
+ * create_unix_stack
+ *
+ * Description: Set the user stack address for the process to the provided
+ * address. If a custom stack was not set as a result of the
+ * load process (i.e. as specified by the image file for the
+ * executable), then allocate the stack in the provided map and
+ * set up appropriate guard pages for enforcing administrative
+ * limits on stack growth, if they end up being needed.
+ *
+ * Parameters: p Process to set stack on
+ * user_stack Address to set stack for process to
+ * customstack FALSE if no custom stack in binary
+ * map Address map in which to allocate the
+ * new stack, if 'customstack' is FALSE
+ *
+ * Returns: KERN_SUCCESS Stack successfully created
+ * !KERN_SUCCESS Mach failure code
+ */
+static kern_return_t
+create_unix_stack(vm_map_t map, user_addr_t user_stack, int customstack,
+ proc_t p)
+{
+ mach_vm_size_t size, prot_size;
+ mach_vm_offset_t addr, prot_addr;
+ kern_return_t kr;
+
+ proc_lock(p);
+ p->user_stack = user_stack;
+ proc_unlock(p);
+
+ if (!customstack) {
+ /*
+ * Allocate enough space for the maximum stack size we
+ * will ever authorize and an extra page to act as
+ * a guard page for stack overflows.
+ */
+ size = mach_vm_round_page(MAXSSIZ);
+#if STACK_GROWTH_UP
+ addr = mach_vm_trunc_page(user_stack);
+#else /* STACK_GROWTH_UP */
+ addr = mach_vm_trunc_page(user_stack - size);
+#endif /* STACK_GROWTH_UP */
+ kr = mach_vm_allocate(map, &addr, size,
+ VM_MAKE_TAG(VM_MEMORY_STACK) |
+ VM_FLAGS_FIXED);
+ if (kr != KERN_SUCCESS) {
+ return kr;
+ }
+ /*
+ * And prevent access to what's above the current stack
+ * size limit for this process.
+ */
+ prot_addr = addr;
+#if STACK_GROWTH_UP
+ prot_addr += unix_stack_size(p);
+#endif /* STACK_GROWTH_UP */
+ prot_addr = mach_vm_round_page(prot_addr);
+ prot_size = mach_vm_trunc_page(size - unix_stack_size(p));
+ kr = mach_vm_protect(map,
+ prot_addr,
+ prot_size,
+ FALSE,
+ VM_PROT_NONE);
+ if (kr != KERN_SUCCESS) {
+ (void) mach_vm_deallocate(map, addr, size);
+ return kr;
+ }
+ }
+ return KERN_SUCCESS;
+}
+
+#include <sys/reboot.h>
+
+static char init_program_name[128] = "/sbin/launchd";
+
+struct execve_args init_exec_args;
+
+/*
+ * load_init_program
+ *
+ * Description: Load the "init" program; in most cases, this will be "launchd"
+ *
+ * Parameters: p Process to call execve() to create
+ * the "init" program
+ *
+ * Returns: (void)
+ *
+ * Notes: The process that is passed in is the first manufactured
+ * process on the system, and gets here via bsd_ast() firing
+ * for the first time. This is done to ensure that bsd_init()
+ * has run to completion.
+ */
+void
+load_init_program(proc_t p)
+{
+ vm_offset_t init_addr;
+ int argc = 0;
+ char *argv[3];
+ int error;
+ register_t retval[2];
+
+ /*
+ * Copy out program name.
+ */
+
+ init_addr = VM_MIN_ADDRESS;
+ (void) vm_allocate(current_map(), &init_addr, PAGE_SIZE,
+ VM_FLAGS_ANYWHERE);
+ if (init_addr == 0)
+ init_addr++;
+
+ (void) copyout((caddr_t) init_program_name, CAST_USER_ADDR_T(init_addr),
+ (unsigned) sizeof(init_program_name)+1);
+
+ argv[argc++] = (char *) init_addr;
+ init_addr += sizeof(init_program_name);
+ init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
+
+ /*
+ * Put out first (and only) argument, similarly.
+ * Assumes everything fits in a page as allocated
+ * above.
+ */
+ if (boothowto & RB_SINGLE) {
+ const char *init_args = "-s";
+
+ copyout(init_args, CAST_USER_ADDR_T(init_addr),
+ strlen(init_args));
+
+ argv[argc++] = (char *)init_addr;
+ init_addr += strlen(init_args);
+ init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
+
+ }
+
+ /*
+ * Null-end the argument list
+ */
+ argv[argc] = NULL;
+
+ /*
+ * Copy out the argument list.
+ */
+
+ (void) copyout((caddr_t) argv, CAST_USER_ADDR_T(init_addr),
+ (unsigned) sizeof(argv));
+
+ /*
+ * Set up argument block for fake call to execve.
+ */
+
+ init_exec_args.fname = CAST_USER_ADDR_T(argv[0]);
+ init_exec_args.argp = CAST_USER_ADDR_T((char **)init_addr);
+ init_exec_args.envp = CAST_USER_ADDR_T(0);
+
+ /*
+ * So that mach_init task is set with uid,gid 0 token
+ */
+ set_security_token(p);
+
+ error = execve(p,&init_exec_args,retval);
+ if (error)
+ panic("Process 1 exec of %s failed, errno %d\n",
+ init_program_name, error);
+}
+
+/*
+ * load_return_to_errno
+ *
+ * Description: Convert a load_return_t (Mach error) to an errno (BSD error)
+ *
+ * Parameters: lrtn Mach error number
+ *
+ * Returns: (int) BSD error number
+ * 0 Success
+ * EBADARCH Bad architecture
+ * EBADMACHO Bad Mach object file
+ * ESHLIBVERS Bad shared library version
+ * ENOMEM Out of memory/resource shortage
+ * EACCES Access denied
+ * ENOENT Entry not found (usually "file does
+ * does not exist")
+ * EIO An I/O error occurred
+ * EBADEXEC The executable is corrupt/unknown
+ */
+static int
+load_return_to_errno(load_return_t lrtn)
+{
+ switch (lrtn) {
+ case LOAD_SUCCESS:
+ return 0;
+ case LOAD_BADARCH:
+ return EBADARCH;
+ case LOAD_BADMACHO:
+ return EBADMACHO;
+ case LOAD_SHLIB:
+ return ESHLIBVERS;
+ case LOAD_NOSPACE:
+ case LOAD_RESOURCE:
+ return ENOMEM;
+ case LOAD_PROTECT:
+ return EACCES;
+ case LOAD_ENOENT:
+ return ENOENT;
+ case LOAD_IOERROR:
+ return EIO;
+ case LOAD_FAILURE:
+ default:
+ return EBADEXEC;
+ }
+}
+
+#include <mach/mach_types.h>
+#include <mach/vm_prot.h>
+#include <mach/semaphore.h>
+#include <mach/sync_policy.h>
+#include <kern/clock.h>
+#include <mach/kern_return.h>
+
+extern semaphore_t execve_semaphore;
+
+/*
+ * execargs_alloc
+ *
+ * Description: Allocate the block of memory used by the execve arguments.
+ * At the same time, we allocate a page so that we can read in
+ * the first page of the image.
+ *
+ * Parameters: struct image_params * the image parameter block
+ *
+ * Returns: 0 Success
+ * EINVAL Invalid argument
+ * EACCES Permission denied
+ * EINTR Interrupted function
+ * ENOMEM Not enough space
+ *
+ * Notes: This is a temporary allocation into the kernel address space
+ * to enable us to copy arguments in from user space. This is
+ * necessitated by not mapping the process calling execve() into
+ * the kernel address space during the execve() system call.
+ *
+ * We assemble the argument and environment, etc., into this
+ * region before copying it as a single block into the child
+ * process address space (at the top or bottom of the stack,
+ * depending on which way the stack grows; see the function
+ * exec_copyout_strings() for details).
+ *
+ * This ends up with a second (possibly unnecessary) copy compared
+ * with assembing the data directly into the child address space,
+ * instead, but since we cannot be guaranteed that the parent has
+ * not modified its environment, we can't really know that it's
+ * really a block there as well.
+ */
+static int
+execargs_alloc(struct image_params *imgp)
+{
+ kern_return_t kret;
+
+ kret = semaphore_wait(execve_semaphore);
+ if (kret != KERN_SUCCESS)
+ switch (kret) {
+ default:
+ return (EINVAL);
+ case KERN_INVALID_ADDRESS:
+ case KERN_PROTECTION_FAILURE:
+ return (EACCES);
+ case KERN_ABORTED:
+ case KERN_OPERATION_TIMED_OUT:
+ return (EINTR);
+ }
+
+ kret = kmem_alloc_pageable(bsd_pageable_map, (vm_offset_t *)&imgp->ip_strings, NCARGS + PAGE_SIZE);
+ imgp->ip_vdata = imgp->ip_strings + NCARGS;
+ if (kret != KERN_SUCCESS) {
+ semaphore_signal(execve_semaphore);
+ return (ENOMEM);
+ }