]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/kern/kern_exec.c
xnu-517.tar.gz
[apple/xnu.git] / bsd / kern / kern_exec.c
index 1db47973a1063ca3a72e037f642a77b746d9844e..c39e7ecc2f7a1c09d2e028065992ab0ce47ab86e 100644 (file)
@@ -1,21 +1,24 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
  * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
  * 
  * @APPLE_LICENSE_HEADER_END@
  */
 #include <sys/stat.h>
 #include <sys/uio.h>
 #include <sys/acct.h>
+#include <sys/kern_audit.h>
 #include <sys/exec.h>
 #include <sys/kdebug.h>
 #include <sys/signal.h>
+#include <sys/aio_kern.h>
 
 #include <mach/vm_param.h>
 
 #include <vm/vm_map.h>
+
+extern vm_map_t vm_map_switch(vm_map_t    map); /* XXX */
+
 #include <vm/vm_kern.h>
+#include <vm/vm_shared_memory_server.h>
 
 #include <kern/thread.h>
 #include <kern/task.h>
 #include <mach-o/fat.h>
 #include <mach-o/loader.h>
 #include <machine/vmparam.h>
+#if KTRACE   
+#include <sys/ktrace.h>
+#endif
+
+int    app_profile = 0;
 
 extern vm_map_t bsd_pageable_map;
 
@@ -112,6 +126,8 @@ extern vm_map_t bsd_pageable_map;
 
 static int load_return_to_errno(load_return_t lrtn);
 int execve(struct proc *p, struct execve_args *uap, register_t *retval);
+static int execargs_alloc(vm_offset_t *addrp);
+static int execargs_free(vm_offset_t addr);
 
 int
 execv(p, args, retval)
@@ -123,6 +139,45 @@ execv(p, args, retval)
        return (execve(p, args, retval));
 }
 
+extern char classichandler[32];
+extern long classichandler_fsid;
+extern long classichandler_fileid;
+
+/*
+ * Helper routine to get rid of a loop in execve.  Given a pointer to
+ * something for the arg list (which might be in kernel space or in user
+ * space), copy it into the kernel buffer at the currentWritePt.  This code
+ * does the proper thing to get the data transferred.
+ * bytesWritten, currentWritePt, and bytesLeft are kept up-to-date.
+ */
+
+static int copyArgument(char *argument, int pointerInKernel,
+                       int *bytesWritten,char **currentWritePt,
+                       int *bytesLeft){
+        int error = 0;
+        do {
+                size_t len = 0;
+               if (*bytesLeft <= 0) {
+                       error = E2BIG;
+                       break;
+               }
+               if (pointerInKernel == UIO_SYSSPACE) {
+                       error = copystr(argument, *currentWritePt, (unsigned)*bytesLeft, &len);
+               } else  {
+              /*
+               * pointer in kernel == UIO_USERSPACE
+               * Copy in from user space.
+               */ 
+                 error = copyinstr((caddr_t)argument, *currentWritePt, (unsigned)*bytesLeft,
+                           &len);
+               }
+               *currentWritePt += len;
+               *bytesWritten += len;
+               *bytesLeft -= len;
+       } while (error == ENAMETOOLONG);
+       return error;
+}
+
 /* ARGSUSED */
 int
 execve(p, uap, retval)
@@ -132,12 +187,14 @@ execve(p, uap, retval)
 {
        register struct ucred *cred = p->p_ucred;
        register struct filedesc *fdp = p->p_fd;
-       register nc;
-       register char *cp;
+       int nc;
+       char *cp;
        int na, ne, ucp, ap, cc;
        unsigned len;
-       int indir;
-       char *sharg;
+       int executingInterpreter=0;
+
+       int executingClassic=0;
+       char binaryWithClassicName[sizeof(p->p_comm)] = {0};
        char *execnamep;
        struct vnode *vp;
        struct vattr vattr;
@@ -146,6 +203,10 @@ execve(p, uap, retval)
        struct nameidata nd;
        struct ps_strings ps;
 #define        SHSIZE  512
+       /* Argument(s) to an interpreter.  If we're executing a shell
+        * script, the name (#!/bin/csh) is allowed to be followed by
+        * arguments.  cfarg holds these arguments.
+        */
        char cfarg[SHSIZE];
        boolean_t               is_fat;
        kern_return_t           ret;
@@ -155,7 +216,13 @@ execve(p, uap, retval)
        load_return_t           lret;
        load_result_t           load_result;
        struct uthread          *uthread;
+       vm_map_t old_map;
+       vm_map_t map;
        int i;
+       boolean_t                               clean_regions = FALSE;
+       shared_region_mapping_t shared_region = NULL;
+    shared_region_mapping_t initial_region = NULL;
+
        union {
                /* #! and name of interpreter */
                char                    ex_shell[SHSIZE];
@@ -170,28 +237,43 @@ execve(p, uap, retval)
        int savedpathlen = 0;
        vm_offset_t *execargsp;
        char *cpnospace;
-       task_t tsk;
+       task_t  task;
+       task_t new_task;
+       thread_act_t thr_act;
        int numthreads;
-
-       tsk = current_task();
-
-
-       if(tsk != kernel_task) { 
-               numthreads = get_task_numacts(tsk);
-               if (numthreads <= 0 )
-                       return(EINVAL);
-               if (numthreads > 1) {
-                       return(EOPNOTSUPP);
+       int vfexec=0;
+       unsigned long arch_offset =0;
+       unsigned long arch_size = 0;
+        char           *ws_cache_name = NULL;  /* used for pre-heat */
+
+        /*
+         * XXXAUDIT: Currently, we only audit the pathname of the binary.
+         * There may also be poor interaction with dyld.
+         */
+
+       cfarg[0] = '\0'; /* initialize to null value. */
+       task = current_task();
+       thr_act = current_act();
+       uthread = get_bsdthread_info(thr_act);
+
+       if (uthread->uu_flag & P_VFORK) {
+                       vfexec = 1; /* Mark in exec */
+       } else {
+               if (task != kernel_task) { 
+                       numthreads = get_task_numacts(task);
+                       if (numthreads <= 0 )
+                               return(EINVAL);
+                       if (numthreads > 1) {
+                               return(EOPNOTSUPP);
+                       }
                }
        }
 
-       ret = kmem_alloc_pageable(bsd_pageable_map, &execargs, NCARGS);
-       if (ret != KERN_SUCCESS)
-               return(ENOMEM);
-
-       uthread = get_bsdthread_info(current_act());
+       error = execargs_alloc(&execargs);
+       if (error)
+               return(error);
 
-       savedpath = execargs;
+       savedpath = (char *)execargs;
 
        /*
         * To support new app package launching for Mac OS X, the dyld
@@ -202,25 +284,52 @@ execve(p, uap, retval)
         * We have to do this before namei() because in case of
         * symbolic links, namei() would overwrite the original "path".
         * In case the last symbolic link resolved was a relative pathname
-        * we would loose the original "path", which could be an
+        * we would lose the original "path", which could be an
         * absolute pathname. This might be unacceptable for dyld.
         */
        /* XXX We could optimize to avoid copyinstr in the namei() */
+
+       /*
+        * XXXAUDIT: Note: the double copyin introduces an audit
+        * race.  To correct this race, we must use a single
+        * copyin().
+        */
        
-       error = copyinstr(uap->fname, savedpath, MAXPATHLEN, &savedpathlen);
-       if (error)
-               return (error);
+       error = copyinstr(uap->fname, savedpath,
+                               MAXPATHLEN, (size_t *)&savedpathlen);
+       if (error) {
+               execargs_free(execargs);
+               return(error);
+       }
        /*
         * copyinstr will put in savedpathlen, the count of
         * characters (including NULL) in the path.
+        * No app profiles under chroot
         */
-       
+
+       if((fdp->fd_rdir == NULLVP) && (app_profile != 0)) {
+
+               /* grab the name of the file out of its path */
+               /* we will need this for lookup within the   */
+               /* name file */
+               ws_cache_name = savedpath + savedpathlen;
+                       while (ws_cache_name[0] != '/') {
+                               if(ws_cache_name == savedpath) {
+                                       ws_cache_name--;
+                                       break;
+                       }
+                               ws_cache_name--;
+                       }
+                       ws_cache_name++;
+       }
+
        /* Save the name aside for future use */
        execargsp = (vm_offset_t *)((char *)(execargs) + savedpathlen);
        
-       NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | SAVENAME,
+       NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | SAVENAME | AUDITVNPATH1,
                                        UIO_USERSPACE, uap->fname, p);
-       if ((error = namei(&nd)))
+       error = namei(&nd);
+       if (error)
                goto bad1;
        vp = nd.ni_vp;
        VOP_LEASE(vp, p, p->p_ucred, LEASE_READ);
@@ -234,7 +343,6 @@ execve(p, uap, retval)
                goto bad;
        }
 
-       indir = 0;
        if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_flag & P_TRACED))
                origvattr.va_mode &= ~(VSUID | VSGID);
                
@@ -278,27 +386,46 @@ again:
 #endif /* lint */
        mach_header = &exdata.mach_header;
        fat_header = &exdata.fat_header;
-       if (mach_header->magic == MH_MAGIC)
+       if ((mach_header->magic == MH_CIGAM) &&
+           (classichandler[0] == 0)) {
+               error = EBADARCH;
+               goto bad;
+       } else if ((mach_header->magic == MH_MAGIC) || 
+               (mach_header->magic == MH_CIGAM)) {
            is_fat = FALSE;
-       else if (fat_header->magic == FAT_MAGIC ||
-                fat_header->magic == FAT_CIGAM)
+       } else if ((fat_header->magic == FAT_MAGIC) ||
+                      (fat_header->magic == FAT_CIGAM)) {
            is_fat = TRUE;
-       else if (mach_header->magic == MH_CIGAM) {
-           error = EBADARCH;
-           goto bad;
        } else {
+         /* If we've already redirected once from an interpreted file
+          * to an interpreter, don't permit the second time.
+          */
                if (exdata.ex_shell[0] != '#' ||
                    exdata.ex_shell[1] != '!' ||
-                   indir) {
+                   executingInterpreter) {
                        error = ENOEXEC;
                        goto bad;
                }
+               if (executingClassic == 1) {
+                 error = EBADARCH;
+                 goto bad;
+               }
                cp = &exdata.ex_shell[2];               /* skip "#!" */
                while (cp < &exdata.ex_shell[SHSIZE]) {
-                       if (*cp == '\t')
+                       if (*cp == '\t')                /* convert all tabs to spaces */
                                *cp = ' ';
-                       else if (*cp == '\n') {
-                               *cp = '\0';
+                       else if (*cp == '\n' || *cp == '#') {
+                               *cp = '\0';                     /* trunc the line at nl or comment */
+
+                               /* go back and remove the spaces before the /n or # */
+                               /* todo: do we have to do this if we fix the passing of args to shells ? */
+                               if ( cp != &exdata.ex_shell[2] ) {
+                                       do {
+                                               if ( *(cp-1) != ' ')
+                                                       break;
+                                               *(--cp) = '\0';
+                                       } while ( cp != &exdata.ex_shell[2] );
+                               }
                                break;
                        }
                        cp++;
@@ -330,14 +457,15 @@ again:
                 * savedpathlen. +1 for NULL.
                 */
                savedpathlen = (cpnospace - execnamep + 1);
-               error = copystr(execnamep, savedpath, savedpathlen, &savedpathlen);
+               error = copystr(execnamep, savedpath,
+                                       savedpathlen, (size_t *)&savedpathlen);
                if (error)
                        goto bad;
 
                /* Save the name aside for future use */
                execargsp = (vm_offset_t *)((char *)(execargs) + savedpathlen);
 
-               indir = 1;
+               executingInterpreter= 1;
                vput(vp);
                nd.ni_cnd.cn_nameiop = LOOKUP;
                nd.ni_cnd.cn_flags = (nd.ni_cnd.cn_flags & HASBUF) |
@@ -374,56 +502,7 @@ again:
        /*
         * Copy arguments into file in argdev area.
         */
-       if (uap->argp) for (;;) {
-               ap = NULL;
-               sharg = NULL;
-               if (indir && na == 0) {
-                       sharg = nd.ni_cnd.cn_nameptr;
-                       ap = (int)sharg;
-                       uap->argp++;            /* ignore argv[0] */
-               } else if (indir && (na == 1 && cfarg[0])) {
-                       sharg = cfarg;
-                       ap = (int)sharg;
-               } else if (indir && (na == 1 || (na == 2 && cfarg[0])))
-                       ap = (int)uap->fname;
-               else if (uap->argp) {
-                       ap = fuword((caddr_t)uap->argp);
-                       uap->argp++;
-               }
-               if (ap == NULL && uap->envp) {
-                       uap->argp = NULL;
-                       if ((ap = fuword((caddr_t)uap->envp)) != NULL)
-                               uap->envp++, ne++;
-               }
-               if (ap == NULL)
-                       break;
-               na++;
-               if (ap == -1) {
-                       error = EFAULT;
-                       break;
-               }
-               do {
-                       if (nc >= (NCARGS - savedpathlen - 2*NBPW -1)) {
-                               error = E2BIG;
-                               break;
-                       }
-                       if (sharg) {
-                               error = copystr(sharg, cp, (unsigned)cc, &len);
-                               sharg += len;
-                       } else {
-                               error = copyinstr((caddr_t)ap, cp, (unsigned)cc,
-                                   &len);
-                               ap += len;
-                       }
-                       cp += len;
-                       nc += len;
-                       cc -= len;
-               } while (error == ENAMETOOLONG);
-               if (error) {
-                       goto bad;
-               }
-       }
-       nc = (nc + NBPW-1) & ~(NBPW-1);
+
 
        /*
         * If we have a fat file, find "our" executable.
@@ -432,7 +511,8 @@ again:
                /*
                 * Look up our architecture in the fat file.
                 */
-               lret = fatfile_getarch(vp, (vm_offset_t)fat_header, &fat_arch);
+               lret = fatfile_getarch_affinity(vp,(vm_offset_t)fat_header, &fat_arch,
+                                               (p->p_flag & P_AFFINITY));
                if (lret != LOAD_SUCCESS) {
                        error = load_return_to_errno(lret);
                        goto bad;
@@ -454,29 +534,251 @@ again:
                }
 
                /* Is what we found a Mach-O executable */
-               if (mach_header->magic != MH_MAGIC) {
+               if ((mach_header->magic != MH_MAGIC) &&
+                   (mach_header->magic != MH_CIGAM)) {
                        error = ENOEXEC;
                        goto bad;
                }
 
-               /*
-                *      Load the Mach-O file.
-                */
-        VOP_UNLOCK(vp, 0, p);
-               lret = load_machfile(vp, mach_header, fat_arch.offset,
-                                   fat_arch.size, &load_result);
+               arch_offset = fat_arch.offset;
+               arch_size = fat_arch.size;
        } else {
                /*
                 *      Load the Mach-O file.
                 */
-               VOP_UNLOCK(vp, 0, p);
-               lret = load_machfile(vp, mach_header, 0,
-                                   (u_long)vattr.va_size, &load_result);
+               arch_offset = 0;
+               arch_size = (u_long)vattr.va_size;
+       }
+
+       if ( ! check_cpu_subtype(mach_header->cpusubtype) ) {
+               error = EBADARCH;
+               goto bad;
+       }
+
+       if (mach_header->magic == MH_CIGAM) {
+
+               int classicBinaryLen = nd.ni_cnd.cn_namelen;
+               if (classicBinaryLen > MAXCOMLEN)
+               classicBinaryLen = MAXCOMLEN;
+               bcopy((caddr_t)nd.ni_cnd.cn_nameptr,
+                               (caddr_t)binaryWithClassicName, 
+                               (unsigned)classicBinaryLen);
+               binaryWithClassicName[classicBinaryLen] = '\0';
+               executingClassic = 1;
+
+               vput(vp); /* cleanup? */
+               nd.ni_cnd.cn_nameiop = LOOKUP;
+
+               nd.ni_cnd.cn_flags = (nd.ni_cnd.cn_flags & HASBUF) |
+               /*      (FOLLOW | LOCKLEAF | SAVENAME) */
+               (LOCKLEAF | SAVENAME);
+                nd.ni_segflg = UIO_SYSSPACE;
+
+                       nd.ni_dirp = classichandler;
+                       if ((error = namei(&nd)) != 0) {
+                       error = EBADARCH;
+                               goto bad1;
+               }
+               vp = nd.ni_vp;
+
+               VOP_LEASE(vp,p,cred,LEASE_READ);
+               if ((error = VOP_GETATTR(vp,&vattr,p->p_ucred,p))) {
+                       goto bad;
+               }
+               goto again;
+       }
+
+       if (uap->argp != NULL) {
+         /* geez -- why would argp ever be NULL, and why would we proceed? */
+         
+         /* First, handle any argument massaging */
+         if (executingInterpreter && executingClassic) {
+           error = copyArgument(classichandler,UIO_SYSSPACE,&nc,&cp,&cc);
+           na++;
+           if (error) goto bad;
+           
+           /* Now name the interpreter. */
+           error = copyArgument(savedpath,UIO_SYSSPACE,&nc,&cp,&cc);
+           na++;
+           if (error) goto bad;
+
+           /*
+            * if we're running an interpreter, as we'd be passing the
+            * command line executable as an argument to the interpreter already.
+            * Doing "execve("myShellScript","bogusName",arg1,arg2,...)
+            * probably shouldn't ever let bogusName be seen by the shell
+            * script.
+            */
+
+           if (cfarg[0]) {
+             error = copyArgument(cfarg,UIO_SYSSPACE,&nc,&cp,&cc);
+             na++;
+             if (error) goto bad;
+           }
+
+           char* originalExecutable = uap->fname;
+           error = copyArgument(originalExecutable,UIO_USERSPACE,&nc,&cp,&cc);
+           na++;
+           /* remove argv[0] b/c we've already placed it at */
+           /* this point */
+           uap->argp++;
+           if (error) goto bad;
+
+           /* and continue with rest of the arguments. */
+         } else if (executingClassic) {
+           error = copyArgument(classichandler,UIO_SYSSPACE,&nc,&cp,&cc);
+           na++;
+           if (error) goto bad;
+           
+           char* originalExecutable = uap->fname;
+           error = copyArgument(originalExecutable,UIO_USERSPACE,&nc,&cp,&cc);
+           if (error) goto bad;
+           uap->argp++;
+           na++;
+
+           /* and rest of arguments continue as before. */
+         } else if (executingInterpreter) {
+           char *actualExecutable = nd.ni_cnd.cn_nameptr;
+           error = copyArgument(actualExecutable,UIO_SYSSPACE,&nc,&cp,&cc);
+           na++;
+           /* remove argv[0] b/c we just placed it in the arg list. */
+           uap->argp++;
+           if (error) goto bad;
+           /* Copy the argument in the interpreter first line if there
+            * was one. 
+            */
+           if (cfarg[0]) {
+             error = copyArgument(cfarg,UIO_SYSSPACE,&nc,&cp,&cc);
+             na++;
+             if (error) goto bad;
+           }
+           
+           /* copy the name of the file being interpreted, gotten from
+            * the structures passed in to execve.
+            */
+           error = copyArgument(uap->fname,UIO_USERSPACE,&nc,&cp,&cc);
+           na++;
+         }
+         /* Now, get rest of arguments */
+         while (uap->argp != NULL) {
+           char* userArgument = (char*)fuword((caddr_t) uap->argp);
+           uap->argp++;
+           if (userArgument == NULL) {
+             break;
+           } else if ((int)userArgument == -1) {
+             /* Um... why would it be -1? */
+             error = EFAULT;
+             goto bad;
+           }
+           error = copyArgument(userArgument, UIO_USERSPACE,&nc,&cp,&cc);
+           if (error) goto bad;
+           na++;
+         }      
+         /* Now, get the environment */
+         while (uap->envp != NULL) {
+           char *userEnv = (char*) fuword((caddr_t) uap->envp);
+           uap->envp++;
+           if (userEnv == NULL) {
+             break;
+           } else if ((int)userEnv == -1) {
+             error = EFAULT;
+             goto bad;
+           }
+           error = copyArgument(userEnv,UIO_USERSPACE,&nc,&cp,&cc);
+           if (error) goto bad;
+           na++;
+           ne++;
+         }
+       }
+
+       /* make sure there are nulls are the end!! */
+       {
+               int     cnt = 3;
+               char *mp = cp;
+
+               while ( cnt-- )
+                       *mp++ = '\0';   
        }
 
+       /* and round up count of bytes written to next word. */
+       nc = (nc + NBPW-1) & ~(NBPW-1);
+
+       if (vattr.va_fsid == classichandler_fsid &&
+               vattr.va_fileid == classichandler_fileid) {
+               executingClassic = 1;
+       }
+
+       if (vfexec) {
+               kern_return_t   result;
+
+               result = task_create_internal(task, FALSE, &new_task);
+               if (result != KERN_SUCCESS)
+               printf("execve: task_create failed. Code: 0x%x\n", result);
+               p->task = new_task;
+               set_bsdtask_info(new_task, p);
+               if (p->p_nice != 0)
+                       resetpriority(p);
+               task = new_task;
+               map = get_task_map(new_task);
+               result = thread_create(new_task, &thr_act);
+               if (result != KERN_SUCCESS)
+               printf("execve: thread_create failed. Code: 0x%x\n", result);
+               uthread = get_bsdthread_info(thr_act);
+       } else {
+               map = VM_MAP_NULL;
+       }
+
+       /*
+        *      Load the Mach-O file.
+        */
+       VOP_UNLOCK(vp, 0, p);   /* XXX */
+       if(ws_cache_name) {
+               tws_handle_startup_file(task, cred->cr_uid, 
+                       ws_cache_name, vp, &clean_regions);
+       }
+
+       vm_get_shared_region(task, &initial_region);
+    int parentIsClassic = (p->p_flag & P_CLASSIC);
+       struct vnode *rootDir = p->p_fd->fd_rdir;
+
+       if ((parentIsClassic && !executingClassic) ||
+               (!parentIsClassic && executingClassic)) {
+               shared_region = lookup_default_shared_region(
+                               (int)rootDir,
+                               (executingClassic ?
+                               CPU_TYPE_POWERPC :
+                               machine_slot[cpu_number()].cpu_type));
+               if (shared_region == NULL) {
+                       shared_region_mapping_t old_region;
+                       shared_region_mapping_t new_region;
+                       vm_get_shared_region(current_task(), &old_region);
+                       /* grrrr... this sets current_task(), not task
+                       * -- they're different (usually)
+                       */
+                       shared_file_boot_time_init(
+                               (int)rootDir,
+                               (executingClassic ?
+                               CPU_TYPE_POWERPC :
+                               machine_slot[cpu_number()].cpu_type));
+                       if ( current_task() != task ) {
+                               vm_get_shared_region(current_task(),&new_region);
+                               vm_set_shared_region(task,new_region);
+                               vm_set_shared_region(current_task(),old_region);
+                       }
+               } else {
+                       vm_set_shared_region(task, shared_region);
+               }
+               shared_region_mapping_dealloc(initial_region);
+       }
+       
+       lret = load_machfile(vp, mach_header, arch_offset,
+               arch_size, &load_result, thr_act, map, clean_regions);
+
        if (lret != LOAD_SUCCESS) {
                error = load_return_to_errno(lret);
-               goto bad;
+               vrele(vp);
+               vp = NULL;
+               goto badtoolate;
        }
 
        /* load_machfile() maps the vnode */
@@ -497,9 +799,10 @@ again:
                 * root set it.
                 */
                if (p->p_tracep && !(p->p_traceflag & KTRFAC_ROOT)) {
-                       vrele(p->p_tracep);
+                       struct vnode *tvp = p->p_tracep;
                        p->p_tracep = NULL;
                        p->p_traceflag = 0;
+                       vrele(tvp);
                }
 #endif
                if (origvattr.va_mode & VSUID)
@@ -507,6 +810,14 @@ again:
                if (origvattr.va_mode & VSGID)
                        p->p_ucred->cr_gid = origvattr.va_gid;
 
+               /*
+                * Have mach reset the task port.  We don't want
+                * anyone who had the task port before a setuid
+                * exec to be able to access/control the task
+                * after.
+                */
+               ipc_task_reset(task);
+
                set_security_token(p);
                p->p_flag |= P_SUGID;
 
@@ -546,34 +857,40 @@ again:
        p->p_cred->p_svuid = p->p_ucred->cr_uid;
        p->p_cred->p_svgid = p->p_ucred->cr_gid;
 
-       if (p->p_flag & P_TRACED) {
+       KNOTE(&p->p_klist, NOTE_EXEC);
+
+       if (!vfexec && (p->p_flag & P_TRACED))
                psignal(p, SIGTRAP);
-#ifdef BSD_USE_APC
-               thread_apc_set(current_act(), bsd_ast);
-#else
-               ast_on(AST_BSD);
-#endif
-       }
 
        if (error) {
-               goto bad;
+               vrele(vp);
+               vp = NULL;
+               goto badtoolate;
        }
-       VOP_LOCK(vp,  LK_EXCLUSIVE | LK_RETRY, p);
+       VOP_LOCK(vp,  LK_EXCLUSIVE | LK_RETRY, p); /* XXX */
        vput(vp);
        vp = NULL;
        
        if (load_result.unixproc &&
-               create_unix_stack(current_map(),
-                                 load_result.user_stack, p)) {
+               create_unix_stack(get_task_map(task),
+                                 load_result.user_stack, load_result.customstack, p)) {
                error = load_return_to_errno(LOAD_NOSPACE);
-               goto bad;
+               goto badtoolate;
+       }
+
+       if (vfexec) {
+               uthread->uu_ar0 = (void *)get_user_regs(thr_act);
        }
 
        /*
         * Copy back arglist if necessary.
         */
 
-       ucp = p->user_stack;
+
+       ucp = (int)p->user_stack;
+       if (vfexec) {
+               old_map = vm_map_switch(get_task_map(task));
+       }
        if (load_result.unixproc) {
                int pathptr;
                
@@ -584,14 +901,26 @@ again:
                 * the "path" at the begining of the execargs buffer.
                 * copy it just before the string area.
                 */
-                savedpathlen = (savedpathlen + NBPW-1) & ~(NBPW-1);
                len = 0;
-               pathptr = ucp - savedpathlen;
+               pathptr = ucp - ((savedpathlen + NBPW-1) & ~(NBPW-1));
                error = copyoutstr(savedpath, (caddr_t)pathptr,
-                                       (unsigned)savedpathlen, &len);
-               if (error)
-                       goto bad;
-               
+                                       (unsigned)savedpathlen, (size_t *)&len);
+               savedpathlen = (savedpathlen + NBPW-1) & ~(NBPW-1);
+
+               if (error) {
+                       if (vfexec)
+                               vm_map_switch(old_map);
+                       goto badtoolate;
+               }
+
+               /*
+                * Record the size of the arguments area so that
+                * sysctl_procargs() can return the argument area without having
+                * to parse the arguments.
+                */
+               p->p_argslen = (int)p->user_stack - pathptr;
+               p->p_argc = na - ne;    /* save argc for sysctl_procargs() */
+
                /* Save a NULL pointer below it */
                (void) suword((caddr_t)(pathptr - NBPW), 0);
 
@@ -606,7 +935,11 @@ again:
                 * and NBPW for the NULL after pointer to path.
                 */
                ap = ucp - na*NBPW - 3*NBPW - savedpathlen - 2*NBPW;
+#if defined(ppc)
+               thread_setuserstack(thr_act, ap);       /* Set the stack */
+#else
                uthread->uu_ar0[SP] = ap;
+#endif
                (void) suword((caddr_t)ap, na-ne); /* argc */
                nc = 0;
                cc = 0;
@@ -628,7 +961,7 @@ again:
                        (void) suword((caddr_t)ap, ucp);
                        do {
                                error = copyoutstr(cp, (caddr_t)ucp,
-                                                  (unsigned)cc, &len);
+                                                  (unsigned)cc, (size_t *)&len);
                                ucp += len;
                                cp += len;
                                nc += len;
@@ -641,11 +974,20 @@ again:
        }
        
        if (load_result.dynlinker) {
+#if defined(ppc)
+               ap = thread_adjuserstack(thr_act, -4);  /* Adjust the stack */
+#else
                ap = uthread->uu_ar0[SP] -= 4;
+#endif
                (void) suword((caddr_t)ap, load_result.mach_header);
        }
 
-#if defined(i386) || defined(ppc)
+       if (vfexec) {
+               vm_map_switch(old_map);
+       }
+#if defined(ppc)
+       thread_setentrypoint(thr_act, load_result.entry_point); /* Set the entry point */
+#elif defined(i386) 
        uthread->uu_ar0[PC] = load_result.entry_point;
 #else
 #error architecture not implemented!
@@ -657,26 +999,44 @@ again:
        /*
         * Reset signal state.
         */
-       execsigs(p);
+       execsigs(p, thr_act);
 
        /*
         * Close file descriptors
         * which specify close-on-exec.
         */
        fdexec(p);
+
+       /*
+        * need to cancel async IO requests that can be cancelled and wait for those
+        * already active.  MAY BLOCK!
+        */
+       _aio_exec( p );
+
        /* FIXME: Till vmspace inherit is fixed: */
-       if (p->vm_shm)
-               shmexit(p);
+       if (!vfexec && p->vm_shm)
+               shmexec(p);
+       /* Clean up the semaphores */
+       semexit(p);
 
        /*
         * Remember file name for accounting.
         */
        p->p_acflag &= ~AFORK;
-       if (nd.ni_cnd.cn_namelen > MAXCOMLEN)
-               nd.ni_cnd.cn_namelen = MAXCOMLEN;
-       bcopy((caddr_t)nd.ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
-           (unsigned)nd.ni_cnd.cn_namelen);
-       p->p_comm[nd.ni_cnd.cn_namelen] = '\0';
+       /* If the translated name isn't NULL, then we want to use
+        * that translated name as the name we show as the "real" name.
+        * Otherwise, use the name passed into exec.
+        */
+       if (0 != binaryWithClassicName[0]) {
+               bcopy((caddr_t)binaryWithClassicName, (caddr_t)p->p_comm,
+                       sizeof(binaryWithClassicName));
+       } else {
+               if (nd.ni_cnd.cn_namelen > MAXCOMLEN)
+                       nd.ni_cnd.cn_namelen = MAXCOMLEN;
+               bcopy((caddr_t)nd.ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
+                       (unsigned)nd.ni_cnd.cn_namelen);
+               p->p_comm[nd.ni_cnd.cn_namelen] = '\0';
+       }
 
        {
          /* This is for kdebug */
@@ -684,10 +1044,30 @@ again:
 
          /* Collect the pathname for tracing */
          kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
-         KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
-                               dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
+
+
+
+         if (vfexec)
+         {
+                 KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE,
+                                       p->p_pid ,0,0,0, (unsigned int)thr_act);
+                 KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
+                                       dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (unsigned int)thr_act);
+         }
+         else
+         {
+                 KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE,
+                                       p->p_pid ,0,0,0,0);
+                 KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
+                                       dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
+         }
        }
 
+       if (executingClassic)
+               p->p_flag |= P_CLASSIC | P_AFFINITY;
+       else
+               p->p_flag &= ~P_CLASSIC;
+
        /*
         * mark as execed, wakeup the process that vforked (if any) and tell
         * it that it now has it's own resources back
@@ -698,18 +1078,29 @@ again:
                wakeup((caddr_t)p->p_pptr);
        }
 
+       if (vfexec && (p->p_flag & P_TRACED)) {
+                       psignal_vfork(p, new_task, thr_act, SIGTRAP);
+       }
+
+badtoolate:
+       if (vfexec) {
+               task_deallocate(new_task);
+               act_deallocate(thr_act);
+               if (error)
+                       error = 0;
+       }
 bad:
        FREE_ZONE(nd.ni_cnd.cn_pnbuf, nd.ni_cnd.cn_pnlen, M_NAMEI);
        if (vp)
                vput(vp);
 bad1:
-#if FIXME  /* [ */
-       if (execargs)
-               kmem_free_wakeup(bsd_pageable_map, execargs, NCARGS);
-#else  /* FIXME ][ */
        if (execargs)
-               kmem_free(bsd_pageable_map, execargs, NCARGS);
-#endif  /* FIXME ] */
+               execargs_free(execargs);
+       if (!error && vfexec) {
+                       vfork_return(current_act(), p->p_pptr, p, retval);
+                       (void) thread_resume(thr_act);
+                       return(0);
+       }
        return(error);
 }
 
@@ -717,23 +1108,23 @@ bad1:
 #define        unix_stack_size(p)      (p->p_rlimit[RLIMIT_STACK].rlim_cur)
 
 kern_return_t
-create_unix_stack(map, user_stack, p)
+create_unix_stack(map, user_stack, customstack, p)
        vm_map_t        map;
        vm_offset_t     user_stack;
+       int                     customstack;
        struct proc     *p;
 {
        vm_size_t       size;
        vm_offset_t     addr;
 
-       p->user_stack = user_stack;
-       size = round_page(unix_stack_size(p));
-#if    STACK_GROWTH_UP
-       /* stack always points to first address for stacks */
-       addr = user_stack;
-#else  STACK_GROWTH_UP
-       addr = trunc_page(user_stack - size);
-#endif /* STACK_GROWTH_UP */
-       return (vm_allocate(map,&addr, size, FALSE));
+       p->user_stack = (caddr_t)user_stack;
+       if (!customstack) {
+               size = round_page_64(unix_stack_size(p));
+               addr = trunc_page_32(user_stack - size);
+               return (vm_allocate(map, &addr, size,
+                                       VM_MAKE_TAG(VM_MEMORY_STACK) | FALSE));
+       } else
+               return(KERN_SUCCESS);
 }
 
 #include <sys/reboot.h>
@@ -757,8 +1148,6 @@ load_init_program(p)
        register_t retval[2];
        struct uthread * ut;
 
-       unix_master();
-
        error = 0;
 
        /* init_args are copied in string form directly from bootstrap */
@@ -851,8 +1240,6 @@ load_init_program(p)
 
                error = execve(p,&init_exec_args,retval);
        } while (error);
-
-       unix_release();
 }
 
 /*
@@ -863,7 +1250,7 @@ load_return_to_errno(load_return_t lrtn)
 {
        switch (lrtn) {
            case LOAD_SUCCESS:
-               return 0;
+                       return 0;
            case LOAD_BADARCH:
                return EBADARCH;
            case LOAD_BADMACHO:
@@ -871,10 +1258,14 @@ load_return_to_errno(load_return_t lrtn)
            case LOAD_SHLIB:
                return ESHLIBVERS;
            case LOAD_NOSPACE:
+           case LOAD_RESOURCE:
                return ENOMEM;
            case LOAD_PROTECT:
                return EACCES;
-           case LOAD_RESOURCE:
+               case LOAD_ENOENT:
+                       return ENOENT;
+               case LOAD_IOERROR:
+                       return EIO;
            case LOAD_FAILURE:
            default:
                return EBADEXEC;
@@ -906,3 +1297,61 @@ check_exec_access(p, vp, vap)
        return (0);
 }
 
+#include <mach/mach_types.h>
+#include <mach/vm_prot.h>
+#include <mach/semaphore.h>
+#include <mach/sync_policy.h>
+#include <kern/clock.h>
+#include <mach/kern_return.h>
+
+extern semaphore_t execve_semaphore;
+
+static int
+execargs_alloc(addrp)
+       vm_offset_t     *addrp;
+{
+       kern_return_t kret;
+
+       kret = semaphore_wait(execve_semaphore);
+       if (kret != KERN_SUCCESS)
+               switch (kret) {
+               default:
+                       return (EINVAL);
+               case KERN_INVALID_ADDRESS:
+               case KERN_PROTECTION_FAILURE:
+                       return (EACCES);
+               case KERN_ABORTED:
+               case KERN_OPERATION_TIMED_OUT:
+                       return (EINTR);
+               }
+
+       kret = kmem_alloc_pageable(bsd_pageable_map, addrp, NCARGS);
+       if (kret != KERN_SUCCESS) {
+               semaphore_signal(execve_semaphore);
+               return (ENOMEM);
+       }
+       return (0);
+}
+
+static int
+execargs_free(addr)
+       vm_offset_t     addr;
+{
+       kern_return_t kret;
+
+       kmem_free(bsd_pageable_map, addr, NCARGS);
+
+       kret = semaphore_signal(execve_semaphore);
+       switch (kret) { 
+       case KERN_INVALID_ADDRESS:
+       case KERN_PROTECTION_FAILURE:
+               return (EINVAL);
+       case KERN_ABORTED:
+       case KERN_OPERATION_TIMED_OUT:
+               return (EINTR);
+       case KERN_SUCCESS:
+               return(0);
+       default:
+               return (EINVAL);
+       }
+}