2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */ 
  30  * Copyright (c) 1982, 1986, 1989, 1991, 1993 
  31  *      The Regents of the University of California.  All rights reserved. 
  32  * (c) UNIX System Laboratories, Inc. 
  33  * All or some portions of this file are derived from material licensed 
  34  * to the University of California by American Telephone and Telegraph 
  35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with 
  36  * the permission of UNIX System Laboratories, Inc. 
  38  * Redistribution and use in source and binary forms, with or without 
  39  * modification, are permitted provided that the following conditions 
  41  * 1. Redistributions of source code must retain the above copyright 
  42  *    notice, this list of conditions and the following disclaimer. 
  43  * 2. Redistributions in binary form must reproduce the above copyright 
  44  *    notice, this list of conditions and the following disclaimer in the 
  45  *    documentation and/or other materials provided with the distribution. 
  46  * 3. All advertising materials mentioning features or use of this software 
  47  *    must display the following acknowledgement: 
  48  *      This product includes software developed by the University of 
  49  *      California, Berkeley and its contributors. 
  50  * 4. Neither the name of the University nor the names of its contributors 
  51  *    may be used to endorse or promote products derived from this software 
  52  *    without specific prior written permission. 
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  66  *      @(#)kern_fork.c 8.8 (Berkeley) 2/14/95 
  69  * NOTICE: This file was modified by McAfee Research in 2004 to introduce 
  70  * support for mandatory and extensible security protections.  This notice 
  71  * is included in support of clause 2.2 (b) of the Apple Public License, 
  75  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 
  76  * support for mandatory and extensible security protections.  This notice 
  77  * is included in support of clause 2.2 (b) of the Apple Public License, 
  81 #include <kern/assert.h> 
  82 #include <sys/param.h> 
  83 #include <sys/systm.h> 
  84 #include <sys/filedesc.h> 
  85 #include <sys/kernel.h> 
  86 #include <sys/malloc.h> 
  87 #include <sys/proc_internal.h> 
  88 #include <sys/kauth.h> 
  90 #include <sys/reason.h> 
  91 #include <sys/resourcevar.h> 
  92 #include <sys/vnode_internal.h> 
  93 #include <sys/file_internal.h> 
  95 #include <sys/codesign.h> 
  96 #include <sys/sysproto.h> 
  98 #include <sys/persona.h> 
 100 #include <sys/doc_tombstone.h> 
 102 /* Do not include dtrace.h, it redefines kmem_[alloc/free] */ 
 103 extern void (*dtrace_proc_waitfor_exec_ptr
)(proc_t
); 
 104 extern void dtrace_proc_fork(proc_t
, proc_t
, int); 
 107  * Since dtrace_proc_waitfor_exec_ptr can be added/removed in dtrace_subr.c, 
 108  * we will store its value before actually calling it. 
 110 static void (*dtrace_proc_waitfor_hook
)(proc_t
) = NULL
; 
 112 #include <sys/dtrace_ptss.h> 
 115 #include <security/audit/audit.h> 
 117 #include <mach/mach_types.h> 
 118 #include <kern/coalition.h> 
 119 #include <kern/kern_types.h> 
 120 #include <kern/kalloc.h> 
 121 #include <kern/mach_param.h> 
 122 #include <kern/task.h> 
 123 #include <kern/thread.h> 
 124 #include <kern/thread_call.h> 
 125 #include <kern/zalloc.h> 
 130 #include <security/mac_framework.h> 
 131 #include <security/mac_mach_internal.h> 
 134 #include <vm/vm_map.h> 
 135 #include <vm/vm_protos.h> 
 136 #include <vm/vm_shared_region.h> 
 138 #include <sys/shm_internal.h>   /* for shmfork() */ 
 139 #include <mach/task.h>          /* for thread_create() */ 
 140 #include <mach/thread_act.h>    /* for thread_resume() */ 
 144 #if CONFIG_MEMORYSTATUS 
 145 #include <sys/kern_memorystatus.h> 
 148 /* XXX routines which should have Mach prototypes, but don't */ 
 149 void thread_set_parent(thread_t parent
, int pid
); 
 150 extern void act_thread_catt(void *ctx
); 
 151 void thread_set_child(thread_t child
, int pid
); 
 152 void *act_thread_csave(void); 
 153 extern boolean_t 
task_is_exec_copy(task_t
); 
 154 int nextpidversion 
= 0; 
 157 thread_t 
cloneproc(task_t
, coalition_t 
*, proc_t
, int, int); 
 158 proc_t 
forkproc(proc_t
); 
 159 void forkproc_free(proc_t
); 
 160 thread_t 
fork_create_child(task_t parent_task
, 
 161     coalition_t 
*parent_coalitions
, 
 167 void proc_vfork_begin(proc_t parent_proc
); 
 168 void proc_vfork_end(proc_t parent_proc
); 
 170 static LCK_GRP_DECLARE(rethrottle_lock_grp
, "rethrottle"); 
 171 static ZONE_DECLARE(uthread_zone
, "uthreads", 
 172     sizeof(struct uthread
), ZC_ZFREE_CLEARMEM
); 
 174 SECURITY_READ_ONLY_LATE(zone_t
) proc_zone
; 
 175 ZONE_INIT(&proc_zone
, "proc", sizeof(struct proc
), ZC_ZFREE_CLEARMEM
, 
 178 ZONE_DECLARE(proc_stats_zone
, "pstats", 
 179     sizeof(struct pstats
), ZC_NOENCRYPT 
| ZC_ZFREE_CLEARMEM
); 
 181 ZONE_DECLARE(proc_sigacts_zone
, "sigacts", 
 182     sizeof(struct sigacts
), ZC_NOENCRYPT
); 
 184 #define DOFORK  0x1     /* fork() system call */ 
 185 #define DOVFORK 0x2     /* vfork() system call */ 
 190  * Description: start a vfork on a process 
 192  * Parameters:  parent_proc             process (re)entering vfork state 
 196  * Notes:       Although this function increments a count, a count in 
 197  *              excess of 1 is not currently supported.  According to the 
 198  *              POSIX standard, calling anything other than execve() or 
 199  *              _exit() following a vfork(), including calling vfork() 
 200  *              itself again, will result in undefined behaviour 
 203 proc_vfork_begin(proc_t parent_proc
) 
 205         proc_lock(parent_proc
); 
 206         parent_proc
->p_lflag  
|= P_LVFORK
; 
 207         parent_proc
->p_vforkcnt
++; 
 208         proc_unlock(parent_proc
); 
 214  * Description: stop a vfork on a process 
 216  * Parameters:  parent_proc             process leaving vfork state 
 220  * Notes:       Decrements the count; currently, reentrancy of vfork() 
 221  *              is unsupported on the current process 
 224 proc_vfork_end(proc_t parent_proc
) 
 226         proc_lock(parent_proc
); 
 227         parent_proc
->p_vforkcnt
--; 
 228         if (parent_proc
->p_vforkcnt 
< 0) { 
 229                 panic("vfork cnt is -ve"); 
 231         if (parent_proc
->p_vforkcnt 
== 0) { 
 232                 parent_proc
->p_lflag  
&= ~P_LVFORK
; 
 234         proc_unlock(parent_proc
); 
 241  * Description: vfork system call 
 243  * Parameters:  void                    [no arguments] 
 245  * Retval:      0                       (to child process) 
 246  *              !0                      pid of child (to parent process) 
 247  *              -1                      error (see "Returns:") 
 249  * Returns:     EAGAIN                  Administrative limit reached 
 250  *              EINVAL                  vfork() called during vfork() 
 251  *              ENOMEM                  Failed to allocate new process 
 253  * Note:        After a successful call to this function, the parent process 
 254  *              has its task, thread, and uthread lent to the child process, 
 255  *              and control is returned to the caller; if this function is 
 256  *              invoked as a system call, the return is to user space, and 
 257  *              is effectively running on the child process. 
 259  *              Subsequent calls that operate on process state are permitted, 
 260  *              though discouraged, and will operate on the child process; any 
 261  *              operations on the task, thread, or uthread will result in 
 262  *              changes in the parent state, and, if inheritable, the child 
 263  *              state, when a task, thread, and uthread are realized for the 
 264  *              child process at execve() time, will also be effected.  Given 
 265  *              this, it's recemmended that people use the posix_spawn() call 
 268  * BLOCK DIAGRAM OF VFORK 
 272  *     ,----------------.         ,-------------. 
 274  *     | parent_thread  | ------> | parent_task | 
 276  *     `----------------'         `-------------' 
 277  *    uthread |  ^             bsd_info |  ^ 
 278  *            v  | vc_thread            v  | task 
 279  *     ,----------------.         ,-------------. 
 281  *     | parent_uthread | <.list. | parent_proc | <-- current_proc() 
 283  *     `----------------'         `-------------' 
 290  *                 ,----------------.         ,-------------. 
 292  *          ,----> | parent_thread  | ------> | parent_task | 
 294  *          |      `----------------'         `-------------' 
 295  *          |     uthread |  ^             bsd_info |  ^ 
 296  *          |             v  | vc_thread            v  | task 
 297  *          |      ,----------------.         ,-------------. 
 299  *          |      | parent_uthread | <.list. | parent_proc | 
 301  *          |      `----------------'         `-------------' 
 304  *          |      ,----------------. 
 306  *      p_vforkact | child_proc     | <-- current_proc() 
 311 vfork(proc_t parent_proc
, __unused 
struct vfork_args 
*uap
, int32_t *retval
) 
 313         thread_t child_thread
; 
 316         if ((err 
= fork1(parent_proc
, &child_thread
, PROC_CREATE_VFORK
, NULL
)) != 0) { 
 319                 uthread_t ut 
= get_bsdthread_info(current_thread()); 
 320                 proc_t child_proc 
= ut
->uu_proc
; 
 322                 retval
[0] = child_proc
->p_pid
; 
 323                 retval
[1] = 1;          /* flag child return for user space */ 
 326                  * Drop the signal lock on the child which was taken on our 
 327                  * behalf by forkproc()/cloneproc() to prevent signals being 
 328                  * received by the child in a partially constructed state. 
 330                 proc_signalend(child_proc
, 0); 
 331                 proc_transend(child_proc
, 0); 
 333                 proc_knote(parent_proc
, NOTE_FORK 
| child_proc
->p_pid
); 
 334                 DTRACE_PROC1(create
, proc_t
, child_proc
); 
 335                 ut
->uu_flag 
&= ~UT_VFORKING
; 
 345  * Description: common code used by all new process creation other than the 
 346  *              bootstrap of the initial process on the system 
 348  * Parameters: parent_proc              parent process of the process being 
 349  *              child_threadp           pointer to location to receive the 
 350  *                                      Mach thread_t of the child process 
 352  *              kind                    kind of creation being requested 
 353  *              coalitions              if spawn, the set of coalitions the 
 354  *                                      child process should join, or NULL to 
 355  *                                      inherit the parent's. On non-spawns, 
 356  *                                      this param is ignored and the child 
 357  *                                      always inherits the parent's 
 360  * Notes:       Permissable values for 'kind': 
 362  *              PROC_CREATE_FORK        Create a complete process which will 
 363  *                                      return actively running in both the 
 364  *                                      parent and the child; the child copies 
 365  *                                      the parent address space. 
 366  *              PROC_CREATE_SPAWN       Create a complete process which will 
 367  *                                      return actively running in the parent 
 368  *                                      only after returning actively running 
 369  *                                      in the child; the child address space 
 370  *                                      is newly created by an image activator, 
 371  *                                      after which the child is run. 
 372  *              PROC_CREATE_VFORK       Creates a partial process which will 
 373  *                                      borrow the parent task, thread, and 
 374  *                                      uthread to return running in the child; 
 375  *                                      the child address space and other parts 
 376  *                                      are lazily created at execve() time, or 
 377  *                                      the child is terminated, and the parent 
 378  *                                      does not actively run until that 
 381  *              At first it may seem strange that we return the child thread 
 382  *              address rather than process structure, since the process is 
 383  *              the only part guaranteed to be "new"; however, since we do 
 384  *              not actualy adjust other references between Mach and BSD (see 
 385  *              the block diagram above the implementation of vfork()), this 
 386  *              is the only method which guarantees us the ability to get 
 387  *              back to the other information. 
 390 fork1(proc_t parent_proc
, thread_t 
*child_threadp
, int kind
, coalition_t 
*coalitions
) 
 392         thread_t parent_thread 
= (thread_t
)current_thread(); 
 393         uthread_t parent_uthread 
= (uthread_t
)get_bsdthread_info(parent_thread
); 
 394         proc_t child_proc 
= NULL
;       /* set in switch, but compiler... */ 
 395         thread_t child_thread 
= NULL
; 
 400         rlim_t rlimit_nproc_cur
; 
 403          * Although process entries are dynamically created, we still keep 
 404          * a global limit on the maximum number we will create.  Don't allow 
 405          * a nonprivileged user to use the last process; don't let root 
 406          * exceed the limit. The variable nprocs is the current number of 
 407          * processes, maxproc is the limit. 
 409         uid 
= kauth_getruid(); 
 411         if ((nprocs 
>= maxproc 
- 1 && uid 
!= 0) || nprocs 
>= maxproc
) { 
 412 #if (DEVELOPMENT || DEBUG) && !defined(XNU_TARGET_OS_OSX) 
 414                  * On the development kernel, panic so that the fact that we hit 
 415                  * the process limit is obvious, as this may very well wedge the 
 418                 panic("The process table is full; parent pid=%d", parent_proc
->p_pid
); 
 427          * Increment the count of procs running with this uid. Don't allow 
 428          * a nonprivileged user to exceed their current limit, which is 
 429          * always less than what an rlim_t can hold. 
 430          * (locking protection is provided by list lock held in chgproccnt) 
 432         count 
= chgproccnt(uid
, 1); 
 433         rlimit_nproc_cur 
= proc_limitgetcur(parent_proc
, RLIMIT_NPROC
, TRUE
); 
 435             (rlim_t
)count 
> rlimit_nproc_cur
) { 
 436 #if (DEVELOPMENT || DEBUG) && !defined(XNU_TARGET_OS_OSX) 
 438                  * On the development kernel, panic so that the fact that we hit 
 439                  * the per user process limit is obvious.  This may be less dire 
 440                  * than hitting the global process limit, but we cannot rely on 
 443                 panic("The per-user process limit has been hit; parent pid=%d, uid=%d", parent_proc
->p_pid
, uid
); 
 451          * Determine if MAC policies applied to the process will allow 
 452          * it to fork.  This is an advisory-only check. 
 454         err 
= mac_proc_check_fork(parent_proc
); 
 461         case PROC_CREATE_VFORK
: 
 463                  * Prevent a vfork while we are in vfork(); we should 
 464                  * also likely preventing a fork here as well, and this 
 465                  * check should then be outside the switch statement, 
 466                  * since the proc struct contents will copy from the 
 467                  * child and the tash/thread/uthread from the parent in 
 468                  * that case.  We do not support vfork() in vfork() 
 469                  * because we don't have to; the same non-requirement 
 470                  * is true of both fork() and posix_spawn() and any 
 471                  * call  other than execve() amd _exit(), but we've 
 472                  * been historically lenient, so we continue to be so 
 475                  * <rdar://6640521> Probably a source of random panics 
 477                 if (parent_uthread
->uu_flag 
& UT_VFORK
) { 
 478                         printf("fork1 called within vfork by %s\n", parent_proc
->p_comm
); 
 484                  * Flag us in progress; if we chose to support vfork() in 
 485                  * vfork(), we would chain our parent at this point (in 
 486                  * effect, a stack push).  We don't, since we actually want 
 487                  * to disallow everything not specified in the standard 
 489                 proc_vfork_begin(parent_proc
); 
 491                 /* The newly created process comes with signal lock held */ 
 492                 if ((child_proc 
= forkproc(parent_proc
)) == NULL
) { 
 493                         /* Failed to allocate new process */ 
 494                         proc_vfork_end(parent_proc
); 
 499 // XXX BEGIN: wants to move to be common code (and safe) 
 502                  * allow policies to associate the credential/label that 
 503                  * we referenced from the parent ... with the child 
 504                  * JMM - this really isn't safe, as we can drop that 
 505                  *       association without informing the policy in other 
 506                  *       situations (keep long enough to get policies changed) 
 508                 mac_cred_label_associate_fork(child_proc
->p_ucred
, child_proc
); 
 512                  * Propogate change of PID - may get new cred if auditing. 
 514                  * NOTE: This has no effect in the vfork case, since 
 515                  *      child_proc->task != current_task(), but we duplicate it 
 516                  *      because this is probably, ultimately, wrong, since we 
 517                  *      will be running in the "child" which is the parent task 
 518                  *      with the wrong token until we get to the execve() or 
 519                  *      _exit() call; a lot of "undefined" can happen before 
 522                  * <rdar://6640530> disallow everything but exeve()/_exit()? 
 524                 set_security_token(child_proc
); 
 526                 AUDIT_ARG(pid
, child_proc
->p_pid
); 
 528 // XXX END: wants to move to be common code (and safe) 
 531                  * BORROW PARENT TASK, THREAD, UTHREAD FOR CHILD 
 533                  * Note: this is where we would "push" state instead of setting 
 534                  * it for nested vfork() support (see proc_vfork_end() for 
 535                  * description if issues here). 
 537                 child_proc
->task 
= parent_proc
->task
; 
 539                 child_proc
->p_lflag  
|= P_LINVFORK
; 
 540                 child_proc
->p_vforkact 
= parent_thread
; 
 541                 child_proc
->p_stat 
= SRUN
; 
 544                  * Until UT_VFORKING is cleared at the end of the vfork 
 545                  * syscall, the process identity of this thread is slightly 
 548                  * As long as UT_VFORK and it's associated field (uu_proc) 
 549                  * is set, current_proc() will always return the child process. 
 551                  * However dtrace_proc_selfpid() returns the parent pid to 
 552                  * ensure that e.g. the proc:::create probe actions accrue 
 553                  * to the parent.  (Otherwise the child magically seems to 
 554                  * have created itself!) 
 556                 parent_uthread
->uu_flag 
|= UT_VFORK 
| UT_VFORKING
; 
 557                 parent_uthread
->uu_proc 
= child_proc
; 
 558                 parent_uthread
->uu_userstate 
= (void *)act_thread_csave(); 
 559                 parent_uthread
->uu_vforkmask 
= parent_uthread
->uu_sigmask
; 
 561                 /* temporarily drop thread-set-id state */ 
 562                 if (parent_uthread
->uu_flag 
& UT_SETUID
) { 
 563                         parent_uthread
->uu_flag 
|= UT_WASSETUID
; 
 564                         parent_uthread
->uu_flag 
&= ~UT_SETUID
; 
 567                 /* blow thread state information */ 
 568                 /* XXX is this actually necessary, given syscall return? */ 
 569                 thread_set_child(parent_thread
, child_proc
->p_pid
); 
 571                 child_proc
->p_acflag 
= AFORK
;   /* forked but not exec'ed */ 
 574                  * Preserve synchronization semantics of vfork.  If 
 575                  * waiting for child to exec or exit, set P_PPWAIT 
 576                  * on child, and sleep on our proc (in case of exit). 
 578                 child_proc
->p_lflag 
|= P_LPPWAIT
; 
 579                 pinsertchild(parent_proc
, child_proc
);  /* set visible */ 
 583         case PROC_CREATE_SPAWN
: 
 585                  * A spawned process differs from a forked process in that 
 586                  * the spawned process does not carry around the parents 
 587                  * baggage with regard to address space copying, dtrace, 
 594         case PROC_CREATE_FORK
: 
 596                  * When we clone the parent process, we are going to inherit 
 597                  * its task attributes and memory, since when we fork, we 
 598                  * will, in effect, create a duplicate of it, with only minor 
 599                  * differences.  Contrarily, spawned processes do not inherit. 
 601                 if ((child_thread 
= cloneproc(parent_proc
->task
, 
 602                     spawn 
? coalitions 
: NULL
, 
 604                     spawn 
? FALSE 
: TRUE
, 
 606                         /* Failed to create thread */ 
 611                 /* copy current thread state into the child thread (only for fork) */ 
 613                         thread_dup(child_thread
); 
 616                 /* child_proc = child_thread->task->proc; */ 
 617                 child_proc 
= (proc_t
)(get_bsdtask_info(get_threadtask(child_thread
))); 
 619 // XXX BEGIN: wants to move to be common code (and safe) 
 622                  * allow policies to associate the credential/label that 
 623                  * we referenced from the parent ... with the child 
 624                  * JMM - this really isn't safe, as we can drop that 
 625                  *       association without informing the policy in other 
 626                  *       situations (keep long enough to get policies changed) 
 628                 mac_cred_label_associate_fork(child_proc
->p_ucred
, child_proc
); 
 632                  * Propogate change of PID - may get new cred if auditing. 
 634                  * NOTE: This has no effect in the vfork case, since 
 635                  *      child_proc->task != current_task(), but we duplicate it 
 636                  *      because this is probably, ultimately, wrong, since we 
 637                  *      will be running in the "child" which is the parent task 
 638                  *      with the wrong token until we get to the execve() or 
 639                  *      _exit() call; a lot of "undefined" can happen before 
 642                  * <rdar://6640530> disallow everything but exeve()/_exit()? 
 644                 set_security_token(child_proc
); 
 646                 AUDIT_ARG(pid
, child_proc
->p_pid
); 
 648 // XXX END: wants to move to be common code (and safe) 
 651                  * Blow thread state information; this is what gives the child 
 652                  * process its "return" value from a fork() call. 
 654                  * Note: this should probably move to fork() proper, since it 
 655                  * is not relevent to spawn, and the value won't matter 
 656                  * until we resume the child there.  If you are in here 
 657                  * refactoring code, consider doing this at the same time. 
 659                 thread_set_child(child_thread
, child_proc
->p_pid
); 
 661                 child_proc
->p_acflag 
= AFORK
;   /* forked but not exec'ed */ 
 664                 dtrace_proc_fork(parent_proc
, child_proc
, spawn
); 
 665 #endif  /* CONFIG_DTRACE */ 
 668                          * Of note, we need to initialize the bank context behind 
 669                          * the protection of the proc_trans lock to prevent a race with exit. 
 671                         task_bank_init(get_threadtask(child_thread
)); 
 677                 panic("fork1 called with unknown kind %d", kind
); 
 682         /* return the thread pointer to the caller */ 
 683         *child_threadp 
= child_thread
; 
 687          * In the error case, we return a 0 value for the returned pid (but 
 688          * it is ignored in the trampoline due to the error return); this 
 689          * is probably not necessary. 
 692                 (void)chgproccnt(uid
, -1); 
 702  * Description: "Return" to parent vfork thread() following execve/_exit; 
 703  *              this is done by reassociating the parent process structure 
 704  *              with the task, thread, and uthread. 
 706  *              Refer to the ASCII art above vfork() to figure out the 
 707  *              state we're undoing. 
 709  * Parameters:  child_proc              Child process 
 710  *              retval                  System call return value array 
 711  *              rval                    Return value to present to parent 
 715  * Notes:       The caller resumes or exits the parent, as appropriate, after 
 716  *              calling this function. 
 719 vfork_return(proc_t child_proc
, int32_t *retval
, int rval
) 
 721         task_t parent_task 
= get_threadtask(child_proc
->p_vforkact
); 
 722         proc_t parent_proc 
= get_bsdtask_info(parent_task
); 
 723         thread_t th 
= current_thread(); 
 724         uthread_t uth 
= get_bsdthread_info(th
); 
 726         act_thread_catt(uth
->uu_userstate
); 
 728         /* clear vfork state in parent proc structure */ 
 729         proc_vfork_end(parent_proc
); 
 731         /* REPATRIATE PARENT TASK, THREAD, UTHREAD */ 
 732         uth
->uu_userstate 
= 0; 
 733         uth
->uu_flag 
&= ~UT_VFORK
; 
 734         /* restore thread-set-id state */ 
 735         if (uth
->uu_flag 
& UT_WASSETUID
) { 
 736                 uth
->uu_flag 
|= UT_SETUID
; 
 737                 uth
->uu_flag 
&= ~UT_WASSETUID
; 
 740         uth
->uu_sigmask 
= uth
->uu_vforkmask
; 
 742         proc_lock(child_proc
); 
 743         child_proc
->p_lflag 
&= ~P_LINVFORK
; 
 744         child_proc
->p_vforkact 
= 0; 
 745         proc_unlock(child_proc
); 
 747         thread_set_parent(th
, rval
); 
 751                 retval
[1] = 0;                  /* mark parent */ 
 759  * Description: Common operations associated with the creation of a child 
 762  * Parameters:  parent_task             parent task 
 763  *              parent_coalitions       parent's set of coalitions 
 764  *              child_proc                      child process 
 765  *              inherit_memory          TRUE, if the parents address space is 
 766  *                                                      to be inherited by the child 
 767  *              is_64bit_addr           TRUE, if the child being created will 
 768  *                                                      be associated with a 64 bit address space 
 769  *              is_64bit_data           TRUE if the child being created will use a 
 770  *                                                       64-bit register state 
 771  *              in_exec                         TRUE, if called from execve or posix spawn set exec 
 772  *                                                      FALSE, if called from fork or vfexec 
 774  * Note:        This code is called in the fork() case, from the execve() call 
 775  *              graph, if implementing an execve() following a vfork(), from 
 776  *              the posix_spawn() call graph (which implicitly includes a 
 777  *              vfork() equivalent call, and in the system bootstrap case. 
 779  *              It creates a new task and thread (and as a side effect of the 
 780  *              thread creation, a uthread) in the parent coalition set, which is 
 781  *              then associated with the process 'child'.  If the parent 
 782  *              process address space is to be inherited, then a flag 
 783  *              indicates that the newly created task should inherit this from 
 786  *              As a special concession to bootstrapping the initial process 
 787  *              in the system, it's possible for 'parent_task' to be TASK_NULL; 
 788  *              in this case, 'inherit_memory' MUST be FALSE. 
 791 fork_create_child(task_t parent_task
, 
 792     coalition_t 
*parent_coalitions
, 
 799         thread_t        child_thread 
= NULL
; 
 801         kern_return_t   result
; 
 803         /* Create a new task for the child process */ 
 804         result 
= task_create_internal(parent_task
, 
 810             in_exec 
? TPF_EXEC_COPY 
: TPF_NONE
,                        /* Mark the task exec copy if in execve */ 
 811             (TRW_LRETURNWAIT 
| TRW_LRETURNWAITER
),                     /* All created threads will wait in task_wait_to_return */ 
 813         if (result 
!= KERN_SUCCESS
) { 
 814                 printf("%s: task_create_internal failed.  Code: %d\n", 
 821                  * Set the child process task to the new task if not in exec, 
 822                  * will set the task for exec case in proc_exec_switch_task after image activation. 
 824                 child_proc
->task 
= child_task
; 
 827         /* Set child task process to child proc */ 
 828         set_bsdtask_info(child_task
, child_proc
); 
 830         /* Propagate CPU limit timer from parent */ 
 831         if (timerisset(&child_proc
->p_rlim_cpu
)) { 
 832                 task_vtimer_set(child_task
, TASK_VTIMER_RLIM
); 
 836          * Set child process BSD visible scheduler priority if nice value 
 837          * inherited from parent 
 839         if (child_proc
->p_nice 
!= 0) { 
 840                 resetpriority(child_proc
); 
 844          * Create a new thread for the child process 
 845          * The new thread is waiting on the event triggered by 'task_clear_return_wait' 
 847         result 
= thread_create_waiting(child_task
, 
 848             (thread_continue_t
)task_wait_to_return
, 
 849             task_get_return_wait_event(child_task
), 
 852         if (result 
!= KERN_SUCCESS
) { 
 853                 printf("%s: thread_create failed. Code: %d\n", 
 855                 task_deallocate(child_task
); 
 860          * Tag thread as being the first thread in its task. 
 862         thread_set_tag(child_thread
, THREAD_TAG_MAINTHREAD
); 
 865         thread_yield_internal(1); 
 874  * Description: fork system call. 
 876  * Parameters:  parent                  Parent process to fork 
 877  *              uap (void)              [unused] 
 878  *              retval                  Return value 
 881  *              EAGAIN                  Resource unavailable, try again 
 883  * Notes:       Attempts to create a new child process which inherits state 
 884  *              from the parent process.  If successful, the call returns 
 885  *              having created an initially suspended child process with an 
 886  *              extra Mach task and thread reference, for which the thread 
 887  *              is initially suspended.  Until we resume the child process, 
 888  *              it is not yet running. 
 890  *              The return information to the child is contained in the 
 891  *              thread state structure of the new child, and does not 
 892  *              become visible to the child through a normal return process, 
 893  *              since it never made the call into the kernel itself in the 
 896  *              After resuming the thread, this function returns directly to 
 897  *              the parent process which invoked the fork() system call. 
 899  * Important:   The child thread_resume occurs before the parent returns; 
 900  *              depending on scheduling latency, this means that it is not 
 901  *              deterministic as to whether the parent or child is scheduled 
 902  *              to run first.  It is entirely possible that the child could 
 903  *              run to completion prior to the parent running. 
 906 fork(proc_t parent_proc
, __unused 
struct fork_args 
*uap
, int32_t *retval
) 
 908         thread_t child_thread
; 
 911         retval
[1] = 0;          /* flag parent return for user space */ 
 913         if ((err 
= fork1(parent_proc
, &child_thread
, PROC_CREATE_FORK
, NULL
)) == 0) { 
 917                 /* Return to the parent */ 
 918                 child_proc 
= (proc_t
)get_bsdthreadtask_info(child_thread
); 
 919                 retval
[0] = child_proc
->p_pid
; 
 922                  * Drop the signal lock on the child which was taken on our 
 923                  * behalf by forkproc()/cloneproc() to prevent signals being 
 924                  * received by the child in a partially constructed state. 
 926                 proc_signalend(child_proc
, 0); 
 927                 proc_transend(child_proc
, 0); 
 929                 /* flag the fork has occurred */ 
 930                 proc_knote(parent_proc
, NOTE_FORK 
| child_proc
->p_pid
); 
 931                 DTRACE_PROC1(create
, proc_t
, child_proc
); 
 934                 if ((dtrace_proc_waitfor_hook 
= dtrace_proc_waitfor_exec_ptr
) != NULL
) { 
 935                         (*dtrace_proc_waitfor_hook
)(child_proc
); 
 939                 /* "Return" to the child */ 
 940                 task_clear_return_wait(get_threadtask(child_thread
), TCRW_CLEAR_ALL_WAIT
); 
 942                 /* drop the extra references we got during the creation */ 
 943                 if ((child_task 
= (task_t
)get_threadtask(child_thread
)) != NULL
) { 
 944                         task_deallocate(child_task
); 
 946                 thread_deallocate(child_thread
); 
 956  * Description: Create a new process from a specified process. 
 958  * Parameters:  parent_task             The parent task to be cloned, or 
 959  *                                      TASK_NULL is task characteristics 
 960  *                                      are not to be inherited 
 961  *                                      be cloned, or TASK_NULL if the new 
 962  *                                      task is not to inherit the VM 
 963  *                                      characteristics of the parent 
 964  *              parent_proc             The parent process to be cloned 
 965  *              inherit_memory          True if the child is to inherit 
 966  *                                      memory from the parent; if this is 
 967  *                                      non-NULL, then the parent_task must 
 969  *              memstat_internal        Whether to track the process in the 
 970  *                                      jetsam priority list (if configured) 
 972  * Returns:     !NULL                   pointer to new child thread 
 973  *              NULL                    Failure (unspecified) 
 975  * Note:        On return newly created child process has signal lock held 
 976  *              to block delivery of signal to it if called with lock set. 
 977  *              fork() code needs to explicity remove this lock before 
 978  *              signals can be delivered 
 980  *              In the case of bootstrap, this function can be called from 
 981  *              bsd_utaskbootstrap() in order to bootstrap the first process; 
 982  *              the net effect is to provide a uthread structure for the 
 983  *              kernel process associated with the kernel task. 
 985  * XXX:         Tristating using the value parent_task as the major key 
 986  *              and inherit_memory as the minor key is something we should 
 987  *              refactor later; we owe the current semantics, ultimately, 
 988  *              to the semantics of task_create_internal.  For now, we will 
 989  *              live with this being somewhat awkward. 
 992 cloneproc(task_t parent_task
, coalition_t 
*parent_coalitions
, proc_t parent_proc
, int inherit_memory
, int memstat_internal
) 
 994 #if !CONFIG_MEMORYSTATUS 
 995 #pragma unused(memstat_internal) 
 999         thread_t child_thread 
= NULL
; 
1001         if ((child_proc 
= forkproc(parent_proc
)) == NULL
) { 
1002                 /* Failed to allocate new process */ 
1007          * In the case where the parent_task is TASK_NULL (during the init path) 
1008          * we make the assumption that the register size will be the same as the 
1009          * address space size since there's no way to determine the possible 
1010          * register size until an image is exec'd. 
1012          * The only architecture that has different address space and register sizes 
1013          * (arm64_32) isn't being used within kernel-space, so the above assumption 
1014          * always holds true for the init path. 
1016         const int parent_64bit_addr 
= parent_proc
->p_flag 
& P_LP64
; 
1017         const int parent_64bit_data 
= (parent_task 
== TASK_NULL
) ? parent_64bit_addr 
: task_get_64bit_data(parent_task
); 
1019         child_thread 
= fork_create_child(parent_task
, 
1027         if (child_thread 
== NULL
) { 
1029                  * Failed to create thread; now we must deconstruct the new 
1030                  * process previously obtained from forkproc(). 
1032                 forkproc_free(child_proc
); 
1036         child_task 
= get_threadtask(child_thread
); 
1037         if (parent_64bit_addr
) { 
1038                 OSBitOrAtomic(P_LP64
, (UInt32 
*)&child_proc
->p_flag
); 
1040                 OSBitAndAtomic(~((uint32_t)P_LP64
), (UInt32 
*)&child_proc
->p_flag
); 
1043 #if CONFIG_MEMORYSTATUS 
1044         if (memstat_internal
) { 
1046                 child_proc
->p_memstat_state 
|= P_MEMSTAT_INTERNAL
; 
1051         /* make child visible */ 
1052         pinsertchild(parent_proc
, child_proc
); 
1055          * Make child runnable, set start time. 
1057         child_proc
->p_stat 
= SRUN
; 
1059         return child_thread
; 
1064  * Destroy a process structure that resulted from a call to forkproc(), but 
1065  * which must be returned to the system because of a subsequent failure 
1066  * preventing it from becoming active. 
1068  * Parameters:  p                       The incomplete process from forkproc() 
1072  * Note:        This function should only be used in an error handler following 
1073  *              a call to forkproc(). 
1075  *              Operations occur in reverse order of those in forkproc(). 
1078 forkproc_free(proc_t p
) 
1081         persona_proc_drop(p
); 
1082 #endif /* CONFIG_PERSONAS */ 
1085         pth_proc_hashdelete(p
); 
1088         /* We held signal and a transition locks; drop them */ 
1089         proc_signalend(p
, 0); 
1090         proc_transend(p
, 0); 
1093          * If we have our own copy of the resource limits structure, we 
1094          * need to free it.  If it's a shared copy, we need to drop our 
1100         /* Need to drop references to the shared memory segment(s), if any */ 
1103                  * Use shmexec(): we have no address space, so no mappings 
1105                  * XXX Yes, the routine is badly named. 
1111         /* Need to undo the effects of the fdcopy(), if any */ 
1115          * Drop the reference on a text vnode pointer, if any 
1116          * XXX This code is broken in forkproc(); see <rdar://4256419>; 
1117          * XXX if anyone ever uses this field, we will be extremely unhappy. 
1120                 vnode_rele(p
->p_textvp
); 
1124         /* Update the audit session proc count */ 
1125         AUDIT_SESSION_PROCEXIT(p
); 
1127         lck_mtx_destroy(&p
->p_mlock
, proc_mlock_grp
); 
1128         lck_mtx_destroy(&p
->p_fdmlock
, proc_fdmlock_grp
); 
1129         lck_mtx_destroy(&p
->p_ucred_mlock
, proc_ucred_mlock_grp
); 
1131         lck_mtx_destroy(&p
->p_dtrace_sprlock
, proc_lck_grp
); 
1133         lck_spin_destroy(&p
->p_slock
, proc_slock_grp
); 
1135         /* Release the credential reference */ 
1136         kauth_cred_t tmp_ucred 
= p
->p_ucred
; 
1137         kauth_cred_unref(&tmp_ucred
); 
1138         p
->p_ucred 
= tmp_ucred
; 
1141         /* Decrement the count of processes in the system */ 
1144         /* Take it out of process hash */ 
1145         LIST_REMOVE(p
, p_hash
); 
1149         thread_call_free(p
->p_rcall
); 
1151         /* Free allocated memory */ 
1152         zfree(proc_sigacts_zone
, p
->p_sigacts
); 
1153         p
->p_sigacts 
= NULL
; 
1154         zfree(proc_stats_zone
, p
->p_stats
); 
1156         FREE(p
->p_subsystem_root_path
, M_SBUF
); 
1157         p
->p_subsystem_root_path 
= NULL
; 
1159         proc_checkdeadrefs(p
); 
1160         zfree(proc_zone
, p
); 
1167  * Description: Create a new process structure, given a parent process 
1170  * Parameters:  parent_proc             The parent process 
1172  * Returns:     !NULL                   The new process structure 
1173  *              NULL                    Error (insufficient free memory) 
1175  * Note:        When successful, the newly created process structure is 
1176  *              partially initialized; if a caller needs to deconstruct the 
1177  *              returned structure, they must call forkproc_free() to do so. 
1180 forkproc(proc_t parent_proc
) 
1182         proc_t child_proc
;      /* Our new process */ 
1183         static int nextpid 
= 0, pidwrap 
= 0; 
1184         static uint64_t nextuniqueid 
= 0; 
1186         struct session 
*sessp
; 
1187         uthread_t parent_uthread 
= (uthread_t
)get_bsdthread_info(current_thread()); 
1188         rlim_t rlimit_cpu_cur
; 
1190         child_proc 
= zalloc_flags(proc_zone
, Z_WAITOK 
| Z_ZERO
); 
1191         child_proc
->p_stats 
= zalloc_flags(proc_stats_zone
, Z_WAITOK 
| Z_ZERO
); 
1192         child_proc
->p_sigacts 
= zalloc_flags(proc_sigacts_zone
, Z_WAITOK
); 
1194         /* allocate a callout for use by interval timers */ 
1195         child_proc
->p_rcall 
= thread_call_allocate((thread_call_func_t
)realitexpire
, child_proc
); 
1196         if (child_proc
->p_rcall 
== NULL
) { 
1197                 zfree(proc_sigacts_zone
, child_proc
->p_sigacts
); 
1198                 zfree(proc_stats_zone
, child_proc
->p_stats
); 
1199                 zfree(proc_zone
, child_proc
); 
1206          * Find an unused PID. 
1214          * If the process ID prototype has wrapped around, 
1215          * restart somewhat above 0, as the low-numbered procs 
1216          * tend to include daemons that don't exit. 
1218         if (nextpid 
>= PID_MAX
) { 
1223                 /* if the pid stays in hash both for zombie and runniing state */ 
1224                 if (pfind_locked(nextpid
) != PROC_NULL
) { 
1229                 if (pgfind_internal(nextpid
) != PGRP_NULL
) { 
1233                 if (session_find_internal(nextpid
) != SESSION_NULL
) { 
1239         child_proc
->p_pid 
= nextpid
; 
1240         child_proc
->p_idversion 
= OSIncrementAtomic(&nextpidversion
); 
1241         /* kernel process is handcrafted and not from fork, so start from 1 */ 
1242         child_proc
->p_uniqueid 
= ++nextuniqueid
; 
1244         if (child_proc
->p_pid 
!= 0) { 
1245                 if (pfind_locked(child_proc
->p_pid
) != PROC_NULL
) { 
1246                         panic("proc in the list already\n"); 
1250         /* Insert in the hash */ 
1251         child_proc
->p_listflag 
|= (P_LIST_INHASH 
| P_LIST_INCREATE
); 
1252         LIST_INSERT_HEAD(PIDHASH(child_proc
->p_pid
), child_proc
, p_hash
); 
1255         if (child_proc
->p_uniqueid 
== startup_serial_num_procs
) { 
1257                  * Turn off startup serial logging now that we have reached 
1258                  * the defined number of startup processes. 
1260                 startup_serial_logging_active 
= false; 
1264          * We've identified the PID we are going to use; initialize the new 
1265          * process structure. 
1267         child_proc
->p_stat 
= SIDL
; 
1268         child_proc
->p_pgrpid 
= PGRPID_DEAD
; 
1271          * The zero'ing of the proc was at the allocation time due to need 
1272          * for insertion to hash.  Copy the section that is to be copied 
1273          * directly from the parent. 
1275         __nochk_bcopy(&parent_proc
->p_startcopy
, &child_proc
->p_startcopy
, 
1276             (unsigned) ((caddr_t
)&child_proc
->p_endcopy 
- (caddr_t
)&child_proc
->p_startcopy
)); 
1278 #if defined(HAS_APPLE_PAC) 
1280          * The p_textvp and p_pgrp pointers are address-diversified by PAC, so we must 
1281          * resign them here for the new proc 
1283         if (parent_proc
->p_textvp
) { 
1284                 child_proc
->p_textvp 
= parent_proc
->p_textvp
; 
1287         if (parent_proc
->p_pgrp
) { 
1288                 child_proc
->p_pgrp 
= parent_proc
->p_pgrp
; 
1290 #endif /* defined(HAS_APPLE_PAC) */ 
1292         child_proc
->p_sessionid 
= parent_proc
->p_sessionid
; 
1295          * Some flags are inherited from the parent. 
1296          * Duplicate sub-structures as needed. 
1297          * Increase reference counts on shared objects. 
1298          * The p_stats and p_sigacts substructs are set in vm_fork. 
1300 #if CONFIG_DELAY_IDLE_SLEEP 
1301         child_proc
->p_flag 
= (parent_proc
->p_flag 
& (P_LP64 
| P_TRANSLATED 
| P_DISABLE_ASLR 
| P_DELAYIDLESLEEP 
| P_SUGID 
| P_AFFINITY
)); 
1302 #else /* CONFIG_DELAY_IDLE_SLEEP */ 
1303         child_proc
->p_flag 
= (parent_proc
->p_flag 
& (P_LP64 
| P_TRANSLATED 
| P_DISABLE_ASLR 
| P_SUGID
)); 
1304 #endif /* CONFIG_DELAY_IDLE_SLEEP */ 
1306         child_proc
->p_vfs_iopolicy 
= (parent_proc
->p_vfs_iopolicy 
& (P_VFS_IOPOLICY_VALID_MASK
)); 
1308         child_proc
->p_responsible_pid 
= parent_proc
->p_responsible_pid
; 
1311          * Note that if the current thread has an assumed identity, this 
1312          * credential will be granted to the new process. 
1314         child_proc
->p_ucred 
= kauth_cred_get_with_ref(); 
1315         /* update cred on proc */ 
1316         PROC_UPDATE_CREDS_ONPROC(child_proc
); 
1317         /* update audit session proc count */ 
1318         AUDIT_SESSION_PROCNEW(child_proc
); 
1320         lck_mtx_init(&child_proc
->p_mlock
, proc_mlock_grp
, proc_lck_attr
); 
1321         lck_mtx_init(&child_proc
->p_fdmlock
, proc_fdmlock_grp
, proc_lck_attr
); 
1322         lck_mtx_init(&child_proc
->p_ucred_mlock
, proc_ucred_mlock_grp
, proc_lck_attr
); 
1324         lck_mtx_init(&child_proc
->p_dtrace_sprlock
, proc_lck_grp
, proc_lck_attr
); 
1326         lck_spin_init(&child_proc
->p_slock
, proc_slock_grp
, proc_lck_attr
); 
1328         klist_init(&child_proc
->p_klist
); 
1330         if (child_proc
->p_textvp 
!= NULLVP
) { 
1331                 /* bump references to the text vnode */ 
1332                 /* Need to hold iocount across the ref call */ 
1333                 if ((error 
= vnode_getwithref(child_proc
->p_textvp
)) == 0) { 
1334                         error 
= vnode_ref(child_proc
->p_textvp
); 
1335                         vnode_put(child_proc
->p_textvp
); 
1339                         child_proc
->p_textvp 
= NULLVP
; 
1344          * Copy the parents per process open file table to the child; if 
1345          * there is a per-thread current working directory, set the childs 
1346          * per-process current working directory to that instead of the 
1349          * XXX may fail to copy descriptors to child 
1351         lck_rw_init(&child_proc
->p_dirs_lock
, proc_dirslock_grp
, proc_lck_attr
); 
1352         child_proc
->p_fd 
= fdcopy(parent_proc
, parent_uthread
->uu_cdir
); 
1355         if (parent_proc
->vm_shm
) { 
1356                 /* XXX may fail to attach shm to child */ 
1357                 (void)shmfork(parent_proc
, child_proc
); 
1362          * Child inherits the parent's plimit 
1364         proc_limitfork(parent_proc
, child_proc
); 
1366         rlimit_cpu_cur 
= proc_limitgetcur(child_proc
, RLIMIT_CPU
, TRUE
); 
1367         if (rlimit_cpu_cur 
!= RLIM_INFINITY
) { 
1368                 child_proc
->p_rlim_cpu
.tv_sec 
= (rlimit_cpu_cur 
> __INT_MAX__
) ? __INT_MAX__ 
: rlimit_cpu_cur
; 
1371         /* Intialize new process stats, including start time */ 
1372         /* <rdar://6640543> non-zeroed portion contains garbage AFAICT */ 
1373         microtime_with_abstime(&child_proc
->p_start
, &child_proc
->p_stats
->ps_start
); 
1375         if (parent_proc
->p_sigacts 
!= NULL
) { 
1376                 (void)memcpy(child_proc
->p_sigacts
, 
1377                     parent_proc
->p_sigacts
, sizeof *child_proc
->p_sigacts
); 
1379                 (void)memset(child_proc
->p_sigacts
, 0, sizeof *child_proc
->p_sigacts
); 
1382         sessp 
= proc_session(parent_proc
); 
1383         if (sessp
->s_ttyvp 
!= NULL 
&& parent_proc
->p_flag 
& P_CONTROLT
) { 
1384                 OSBitOrAtomic(P_CONTROLT
, &child_proc
->p_flag
); 
1386         session_rele(sessp
); 
1389          * block all signals to reach the process. 
1390          * no transition race should be occuring with the child yet, 
1391          * but indicate that the process is in (the creation) transition. 
1393         proc_signalstart(child_proc
, 0); 
1394         proc_transstart(child_proc
, 0, 0); 
1396         child_proc
->p_pcaction 
= 0; 
1398         TAILQ_INIT(&child_proc
->p_uthlist
); 
1399         TAILQ_INIT(&child_proc
->p_aio_activeq
); 
1400         TAILQ_INIT(&child_proc
->p_aio_doneq
); 
1402         /* Inherit the parent flags for code sign */ 
1403         child_proc
->p_csflags 
= (parent_proc
->p_csflags 
& ~CS_KILLED
); 
1406          * Copy work queue information 
1408          * Note: This should probably only happen in the case where we are 
1409          *      creating a child that is a copy of the parent; since this 
1410          *      routine is called in the non-duplication case of vfork() 
1411          *      or posix_spawn(), then this information should likely not 
1414          * <rdar://6640553> Work queue pointers that no longer point to code 
1416         child_proc
->p_wqthread 
= parent_proc
->p_wqthread
; 
1417         child_proc
->p_threadstart 
= parent_proc
->p_threadstart
; 
1418         child_proc
->p_pthsize 
= parent_proc
->p_pthsize
; 
1419         if ((parent_proc
->p_lflag 
& P_LREGISTER
) != 0) { 
1420                 child_proc
->p_lflag 
|= P_LREGISTER
; 
1422         child_proc
->p_dispatchqueue_offset 
= parent_proc
->p_dispatchqueue_offset
; 
1423         child_proc
->p_dispatchqueue_serialno_offset 
= parent_proc
->p_dispatchqueue_serialno_offset
; 
1424         child_proc
->p_dispatchqueue_label_offset 
= parent_proc
->p_dispatchqueue_label_offset
; 
1425         child_proc
->p_return_to_kernel_offset 
= parent_proc
->p_return_to_kernel_offset
; 
1426         child_proc
->p_mach_thread_self_offset 
= parent_proc
->p_mach_thread_self_offset
; 
1427         child_proc
->p_pth_tsd_offset 
= parent_proc
->p_pth_tsd_offset
; 
1429         pth_proc_hashinit(child_proc
); 
1433         child_proc
->p_persona 
= NULL
; 
1434         error 
= persona_proc_inherit(child_proc
, parent_proc
); 
1436                 printf("forkproc: persona_proc_inherit failed (persona %d being destroyed?)\n", persona_get_uid(parent_proc
->p_persona
)); 
1437                 forkproc_free(child_proc
); 
1443 #if CONFIG_MEMORYSTATUS 
1444         /* Memorystatus init */ 
1445         child_proc
->p_memstat_state 
= 0; 
1446         child_proc
->p_memstat_effectivepriority 
= JETSAM_PRIORITY_DEFAULT
; 
1447         child_proc
->p_memstat_requestedpriority 
= JETSAM_PRIORITY_DEFAULT
; 
1448         child_proc
->p_memstat_assertionpriority 
= 0; 
1449         child_proc
->p_memstat_userdata          
= 0; 
1450         child_proc
->p_memstat_idle_start        
= 0; 
1451         child_proc
->p_memstat_idle_delta        
= 0; 
1452         child_proc
->p_memstat_memlimit          
= 0; 
1453         child_proc
->p_memstat_memlimit_active   
= 0; 
1454         child_proc
->p_memstat_memlimit_inactive 
= 0; 
1455         child_proc
->p_memstat_relaunch_flags    
= P_MEMSTAT_RELAUNCH_UNKNOWN
; 
1457         child_proc
->p_memstat_freeze_sharedanon_pages 
= 0; 
1459         child_proc
->p_memstat_dirty 
= 0; 
1460         child_proc
->p_memstat_idledeadline 
= 0; 
1461 #endif /* CONFIG_MEMORYSTATUS */ 
1463         if (parent_proc
->p_subsystem_root_path
) { 
1464                 size_t parent_length 
= strlen(parent_proc
->p_subsystem_root_path
) + 1; 
1465                 MALLOC(child_proc
->p_subsystem_root_path
, char *, parent_length
, M_SBUF
, M_WAITOK 
| M_ZERO
); 
1466                 memcpy(child_proc
->p_subsystem_root_path
, parent_proc
->p_subsystem_root_path
, parent_length
); 
1476         LCK_MTX_ASSERT(proc_list_mlock
, LCK_MTX_ASSERT_NOTOWNED
); 
1477         lck_mtx_lock(&p
->p_mlock
); 
1481 proc_unlock(proc_t p
) 
1483         lck_mtx_unlock(&p
->p_mlock
); 
1487 proc_spinlock(proc_t p
) 
1489         lck_spin_lock_grp(&p
->p_slock
, proc_slock_grp
); 
1493 proc_spinunlock(proc_t p
) 
1495         lck_spin_unlock(&p
->p_slock
); 
1499 proc_list_lock(void) 
1501         lck_mtx_lock(proc_list_mlock
); 
1505 proc_list_unlock(void) 
1507         lck_mtx_unlock(proc_list_mlock
); 
1511 proc_ucred_lock(proc_t p
) 
1513         lck_mtx_lock(&p
->p_ucred_mlock
); 
1517 proc_ucred_unlock(proc_t p
) 
1519         lck_mtx_unlock(&p
->p_ucred_mlock
); 
1523 uthread_alloc(task_t task
, thread_t thread
, int noinherit
) 
1527         uthread_t uth_parent
; 
1530         ut 
= zalloc_flags(uthread_zone
, Z_WAITOK 
| Z_ZERO
); 
1532         p 
= (proc_t
) get_bsdtask_info(task
); 
1533         uth 
= (uthread_t
)ut
; 
1534         uth
->uu_thread 
= thread
; 
1536         lck_spin_init(&uth
->uu_rethrottle_lock
, &rethrottle_lock_grp
, 
1540          * Thread inherits credential from the creating thread, if both 
1541          * are in the same task. 
1543          * If the creating thread has no credential or is from another 
1544          * task we can leave the new thread credential NULL.  If it needs 
1545          * one later, it will be lazily assigned from the task's process. 
1547         uth_parent 
= (uthread_t
)get_bsdthread_info(current_thread()); 
1548         if ((noinherit 
== 0) && task 
== current_task() && 
1549             uth_parent 
!= NULL 
&& 
1550             IS_VALID_CRED(uth_parent
->uu_ucred
)) { 
1552                  * XXX The new thread is, in theory, being created in context 
1553                  * XXX of parent thread, so a direct reference to the parent 
1556                 kauth_cred_ref(uth_parent
->uu_ucred
); 
1557                 uth
->uu_ucred 
= uth_parent
->uu_ucred
; 
1558                 /* the credential we just inherited is an assumed credential */ 
1559                 if (uth_parent
->uu_flag 
& UT_SETUID
) { 
1560                         uth
->uu_flag 
|= UT_SETUID
; 
1563                 /* sometimes workqueue threads are created out task context */ 
1564                 if ((task 
!= kernel_task
) && (p 
!= PROC_NULL
)) { 
1565                         uth
->uu_ucred 
= kauth_cred_proc_ref(p
); 
1567                         uth
->uu_ucred 
= NOCRED
; 
1572         if ((task 
!= kernel_task
) && p
) { 
1574                 if (noinherit 
!= 0) { 
1575                         /* workq threads will not inherit masks */ 
1576                         uth
->uu_sigmask 
= ~workq_threadmask
; 
1577                 } else if (uth_parent
) { 
1578                         if (uth_parent
->uu_flag 
& UT_SAS_OLDMASK
) { 
1579                                 uth
->uu_sigmask 
= uth_parent
->uu_oldmask
; 
1581                                 uth
->uu_sigmask 
= uth_parent
->uu_sigmask
; 
1584                 uth
->uu_context
.vc_thread 
= thread
; 
1586                  * Do not add the uthread to proc uthlist for exec copy task, 
1587                  * since they do not hold a ref on proc. 
1589                 if (!task_is_exec_copy(task
)) { 
1590                         TAILQ_INSERT_TAIL(&p
->p_uthlist
, uth
, uu_list
); 
1595                 if (p
->p_dtrace_ptss_pages 
!= NULL 
&& !task_is_exec_copy(task
)) { 
1596                         uth
->t_dtrace_scratch 
= dtrace_ptss_claim_entry(p
); 
1605  * This routine frees the thread name field of the uthread_t structure. Split out of 
1606  * uthread_cleanup() so thread name does not get deallocated while generating a corpse fork. 
1609 uthread_cleanup_name(void *uthread
) 
1611         uthread_t uth 
= (uthread_t
)uthread
; 
1615          * Set pth_name to NULL before calling free(). 
1616          * Previously there was a race condition in the 
1617          * case this code was executing during a stackshot 
1618          * where the stackshot could try and copy pth_name 
1619          * after it had been freed and before if was marked 
1622         if (uth
->pth_name 
!= NULL
) { 
1623                 void *pth_name 
= uth
->pth_name
; 
1624                 uth
->pth_name 
= NULL
; 
1625                 kfree(pth_name
, MAXTHREADNAMESIZE
); 
1631  * This routine frees all the BSD context in uthread except the credential. 
1632  * It does not free the uthread structure as well 
1635 uthread_cleanup(task_t task
, void *uthread
, void * bsd_info
) 
1637         struct _select 
*sel
; 
1638         uthread_t uth 
= (uthread_t
)uthread
; 
1639         proc_t p 
= (proc_t
)bsd_info
; 
1642         if (__improbable(uthread_get_proc_refcount(uthread
) != 0)) { 
1643                 panic("uthread_cleanup called for uthread %p with uu_proc_refcount != 0", uthread
); 
1647         if (uth
->uu_lowpri_window 
|| uth
->uu_throttle_info
) { 
1649                  * task is marked as a low priority I/O type 
1650                  * and we've somehow managed to not dismiss the throttle 
1651                  * through the normal exit paths back to user space... 
1652                  * no need to throttle this thread since its going away 
1653                  * but we do need to update our bookeeping w/r to throttled threads 
1655                  * Calling this routine will clean up any throttle info reference 
1656                  * still inuse by the thread. 
1658                 throttle_lowpri_io(0); 
1661          * Per-thread audit state should never last beyond system 
1662          * call return.  Since we don't audit the thread creation/ 
1663          * removal, the thread state pointer should never be 
1664          * non-NULL when we get here. 
1666         assert(uth
->uu_ar 
== NULL
); 
1668         if (uth
->uu_kqr_bound
) { 
1669                 kqueue_threadreq_unbind(p
, uth
->uu_kqr_bound
); 
1672         sel 
= &uth
->uu_select
; 
1673         /* cleanup the select bit space */ 
1675                 FREE(sel
->ibits
, M_TEMP
); 
1676                 FREE(sel
->obits
, M_TEMP
); 
1681                 vnode_rele(uth
->uu_cdir
); 
1682                 uth
->uu_cdir 
= NULLVP
; 
1685         if (uth
->uu_wqset
) { 
1686                 if (waitq_set_is_valid(uth
->uu_wqset
)) { 
1687                         waitq_set_deinit(uth
->uu_wqset
); 
1689                 FREE(uth
->uu_wqset
, M_SELECT
); 
1690                 uth
->uu_wqset 
= NULL
; 
1691                 uth
->uu_wqstate_sz 
= 0; 
1694         os_reason_free(uth
->uu_exit_reason
); 
1696         if ((task 
!= kernel_task
) && p
) { 
1697                 if (((uth
->uu_flag 
& UT_VFORK
) == UT_VFORK
) && (uth
->uu_proc 
!= PROC_NULL
)) { 
1698                         vfork_exit_internal(uth
->uu_proc
, 0, 1); 
1701                  * Remove the thread from the process list and 
1702                  * transfer [appropriate] pending signals to the process. 
1703                  * Do not remove the uthread from proc uthlist for exec 
1704                  * copy task, since they does not have a ref on proc and 
1705                  * would not have been added to the list. 
1707                 if (get_bsdtask_info(task
) == p 
&& !task_is_exec_copy(task
)) { 
1710                         TAILQ_REMOVE(&p
->p_uthlist
, uth
, uu_list
); 
1711                         p
->p_siglist 
|= (uth
->uu_siglist 
& execmask 
& (~p
->p_sigignore 
| sigcantmask
)); 
1715                 struct dtrace_ptss_page_entry 
*tmpptr 
= uth
->t_dtrace_scratch
; 
1716                 uth
->t_dtrace_scratch 
= NULL
; 
1717                 if (tmpptr 
!= NULL 
&& !task_is_exec_copy(task
)) { 
1718                         dtrace_ptss_release_entry(p
, tmpptr
); 
1724 /* This routine releases the credential stored in uthread */ 
1726 uthread_cred_free(void *uthread
) 
1728         uthread_t uth 
= (uthread_t
)uthread
; 
1730         /* and free the uthread itself */ 
1731         if (IS_VALID_CRED(uth
->uu_ucred
)) { 
1732                 kauth_cred_t oldcred 
= uth
->uu_ucred
; 
1733                 uth
->uu_ucred 
= NOCRED
; 
1734                 kauth_cred_unref(&oldcred
); 
1738 /* This routine frees the uthread structure held in thread structure */ 
1740 uthread_zone_free(void *uthread
) 
1742         uthread_t uth 
= (uthread_t
)uthread
; 
1744         if (uth
->t_tombstone
) { 
1745                 kfree(uth
->t_tombstone
, sizeof(struct doc_tombstone
)); 
1746                 uth
->t_tombstone 
= NULL
; 
1749         lck_spin_destroy(&uth
->uu_rethrottle_lock
, &rethrottle_lock_grp
); 
1751         uthread_cleanup_name(uthread
); 
1752         /* and free the uthread itself */ 
1753         zfree(uthread_zone
, uthread
);