]>
git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_fork.c
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1982, 1986, 1989, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 * @(#)kern_fork.c 8.8 (Berkeley) 2/14/95
69 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
75 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
76 * support for mandatory and extensible security protections. This notice
77 * is included in support of clause 2.2 (b) of the Apple Public License,
81 #include <kern/assert.h>
82 #include <sys/param.h>
83 #include <sys/systm.h>
84 #include <sys/filedesc.h>
85 #include <sys/kernel.h>
86 #include <sys/malloc.h>
87 #include <sys/proc_internal.h>
88 #include <sys/kauth.h>
90 #include <sys/resourcevar.h>
91 #include <sys/vnode_internal.h>
92 #include <sys/file_internal.h>
94 #include <sys/codesign.h>
95 #include <sys/sysproto.h>
97 /* Do not include dtrace.h, it redefines kmem_[alloc/free] */
98 extern void dtrace_fasttrap_fork(proc_t
, proc_t
);
99 extern void (*dtrace_helpers_fork
)(proc_t
, proc_t
);
100 extern void dtrace_lazy_dofs_duplicate(proc_t
, proc_t
);
102 #include <sys/dtrace_ptss.h>
105 #include <bsm/audit_kernel.h>
107 #include <mach/mach_types.h>
108 #include <kern/kern_types.h>
109 #include <kern/kalloc.h>
110 #include <kern/mach_param.h>
111 #include <kern/task.h>
112 #include <kern/thread_call.h>
113 #include <kern/zalloc.h>
115 #include <machine/spl.h>
118 #include <security/mac.h>
119 #include <security/mac_mach_internal.h>
122 #include <vm/vm_map.h>
123 #include <vm/vm_protos.h>
124 #include <vm/vm_shared_region.h>
126 #include <sys/shm_internal.h> /* for shmfork() */
127 #include <mach/task.h> /* for thread_create() */
128 #include <mach/thread_act.h> /* for thread_resume() */
132 /* XXX routines which should have Mach prototypes, but don't */
133 void thread_set_parent(thread_t parent
, int pid
);
134 extern void act_thread_catt(void *ctx
);
135 void thread_set_child(thread_t child
, int pid
);
136 void *act_thread_csave(void);
139 thread_t
cloneproc(proc_t
, int);
140 proc_t
forkproc(proc_t
, int);
141 void forkproc_free(proc_t
, int);
142 thread_t
procdup(proc_t parent
, proc_t child
);
143 thread_t
fork_create_child(task_t parent_task
, proc_t child
, int inherit_memory
, int is64bit
);
145 #define DOFORK 0x1 /* fork() system call */
146 #define DOVFORK 0x2 /* vfork() system call */
152 * Description: vfork system call
154 * Parameters: void [no arguments]
156 * Retval: 0 (to child process)
157 * !0 pid of child (to parent process)
158 * -1 error (see "Returns:")
160 * Returns: EAGAIN Administrative limit reached
161 * EINVAL vfork() caled during vfork()
162 * ENOMEM Failed to allocate new process
164 * Note: After a successful call to this function, the parent process
165 * has its task, thread, and uthread lent to the child process,
166 * and control is returned to the caller; if this function is
167 * invoked as a system call, the return is to user space, and
168 * is effectively running on the child process.
170 * Subsequent calls that operate on process state are permitted,
171 * though discouraged, and will operate on the child process; any
172 * operations on the task, thread, or uthread will result in
173 * changes in the parent state, and, if inheritable, the child
174 * state, when a task, thread, and uthread are realized for the
175 * child process at execve() time, will also be effected. Given
176 * this, it's recemmended that people use the posix_spawn() call
180 vfork(proc_t parent
, __unused
struct vfork_args
*uap
, register_t
*retval
)
184 thread_t cur_act
= (thread_t
)current_thread();
192 * Although process entries are dynamically created, we still keep
193 * a global limit on the maximum number we will create. Don't allow
194 * a nonprivileged user to use the last process; don't let root
195 * exceed the limit. The variable nprocs is the current number of
196 * processes, maxproc is the limit.
198 uid
= kauth_cred_get()->cr_ruid
;
200 if ((nprocs
>= maxproc
- 1 && uid
!= 0) || nprocs
>= maxproc
) {
209 * Increment the count of procs running with this uid. Don't allow
210 * a nonprivileged user to exceed their current limit, which is
211 * always less than what an rlim_t can hold.
212 * (locking protection is provided by list lock held in chgproccnt)
214 count
= chgproccnt(uid
, 1);
216 (rlim_t
)count
> parent
->p_rlimit
[RLIMIT_NPROC
].rlim_cur
) {
217 (void)chgproccnt(uid
, -1);
221 ut
= (uthread_t
)get_bsdthread_info(cur_act
);
222 if (ut
->uu_flag
& UT_VFORK
) {
223 printf("vfork called recursively by %s\n", parent
->p_comm
);
224 (void)chgproccnt(uid
, -1);
230 * Determine if MAC policies applied to the process will allow
233 err
= mac_proc_check_fork(parent
);
235 (void)chgproccnt(uid
, -1);
241 parent
->p_lflag
|= P_LVFORK
;
242 parent
->p_vforkcnt
++;
245 /* The newly created process comes with signal lock held */
246 if ((child
= forkproc(parent
,1)) == NULL
) {
247 /* Failed to allocate new process */
248 (void)chgproccnt(uid
, -1);
250 * XXX kludgy, but necessary without a full flags audit...
251 * XXX these are inherited by the child, which depends on
252 * XXX P_VFORK being set.
255 parent
->p_lflag
&= ~P_LVFORK
;
256 parent
->p_vforkcnt
--;
262 /* allow policies to associate the credential/label */
263 /* that we referenced from the parent ... with the child */
264 /* JMM - this really isn't safe, as we can drop that */
265 /* association without informing the policy in other */
266 /* situations (keep long enough to get policies changed) */
267 mac_cred_label_associate_fork(child
->p_ucred
, child
);
270 AUDIT_ARG(pid
, child
->p_pid
);
272 child
->task
= parent
->task
;
274 /* make child visible */
275 pinsertchild(parent
, child
);
277 child
->p_lflag
|= P_LINVFORK
;
278 child
->p_vforkact
= cur_act
;
279 child
->p_stat
= SRUN
;
281 ut
->uu_flag
|= UT_VFORK
;
283 ut
->uu_userstate
= (void *)act_thread_csave();
284 ut
->uu_vforkmask
= ut
->uu_sigmask
;
286 /* temporarily drop thread-set-id state */
287 if (ut
->uu_flag
& UT_SETUID
) {
288 ut
->uu_flag
|= UT_WASSETUID
;
289 ut
->uu_flag
&= ~UT_SETUID
;
292 thread_set_child(cur_act
, child
->p_pid
);
294 microtime(&child
->p_start
);
295 microtime(&child
->p_stats
->p_start
); /* for compat sake */
296 child
->p_acflag
= AFORK
;
299 * Preserve synchronization semantics of vfork. If waiting for
300 * child to exec or exit, set P_PPWAIT on child, and sleep on our
301 * proc (in case of exit).
303 child
->p_lflag
|= P_LPPWAIT
;
305 /* drop the signal lock on the child */
306 proc_signalend(child
, 0);
307 proc_transend(child
, 0);
309 retval
[0] = child
->p_pid
;
310 retval
[1] = 1; /* flag child return for user space */
312 DTRACE_PROC1(create
, proc_t
, child
);
320 * Description: "Return" to parent vfork thread() following execve/_exit;
321 * this is done by reassociating the parent process structure
322 * with the task, thread, and uthread.
324 * Parameters: child Child process
325 * retval System call return value array
326 * rval Return value to present to parent
330 * Note: The caller resumes or exits the parent, as appropriate, after
331 * callling this function.
334 vfork_return(proc_t child
, register_t
*retval
, int rval
)
336 proc_t parent
= child
->p_pptr
;
337 thread_t cur_act
= (thread_t
)current_thread();
340 ut
= (uthread_t
)get_bsdthread_info(cur_act
);
342 act_thread_catt(ut
->uu_userstate
);
344 /* Make sure only one at this time */
346 parent
->p_vforkcnt
--;
347 if (parent
->p_vforkcnt
<0)
348 panic("vfork cnt is -ve");
349 if (parent
->p_vforkcnt
<=0)
350 parent
->p_lflag
&= ~P_LVFORK
;
352 ut
->uu_userstate
= 0;
353 ut
->uu_flag
&= ~UT_VFORK
;
354 /* restore thread-set-id state */
355 if (ut
->uu_flag
& UT_WASSETUID
) {
356 ut
->uu_flag
|= UT_SETUID
;
357 ut
->uu_flag
&= UT_WASSETUID
;
360 ut
->uu_sigmask
= ut
->uu_vforkmask
;
361 child
->p_lflag
&= ~P_LINVFORK
;
362 child
->p_vforkact
= (void *)0;
364 thread_set_parent(cur_act
, rval
);
368 retval
[1] = 0; /* mark parent */
378 * Description: Common operations associated with the creation of a child
381 * Parameters: parent_task parent task
382 * child child process
383 * inherit_memory TRUE, if the parents address space is
384 * to be inherited by the child
385 * is64bit TRUE, if the child being created will
386 * be associated with a 64 bit process
387 * rather than a 32 bit process
389 * Note: This code is called in the fork() case, from the execve() call
390 * graph, if implementing an execve() following a vfork(), from
391 * the posix_spawn() call graph (which implicitly includes a
392 * vfork() equivalent call, and in the system bootstrap case.
394 * It creates a new task and thread (and as a side effect of the
395 * thread creation, a uthread), which is then associated with the
396 * process 'child'. If the parent process address space is to
397 * be inherited, then a flag indicates that the newly created
398 * task should inherit this from the child task.
400 * As a special concession to bootstrapping the initial process
401 * in the system, it's possible for 'parent_task' to be TASK_NULL;
402 * in this case, 'inherit_memory' MUST be FALSE.
405 fork_create_child(task_t parent_task
, proc_t child
, int inherit_memory
, int is64bit
)
407 thread_t child_thread
= NULL
;
409 kern_return_t result
;
411 /* Create a new task for the child process */
412 result
= task_create_internal(parent_task
,
416 if (result
!= KERN_SUCCESS
) {
417 printf("execve: task_create_internal failed. Code: %d\n", result
);
421 /* Set the child task to the new task */
422 child
->task
= child_task
;
424 /* Set child task proc to child proc */
425 set_bsdtask_info(child_task
, child
);
427 /* Propagate CPU limit timer from parent */
428 if (timerisset(&child
->p_rlim_cpu
))
429 task_vtimer_set(child_task
, TASK_VTIMER_RLIM
);
431 /* Set/clear 64 bit vm_map flag */
433 vm_map_set_64bit(get_task_map(child_task
));
435 vm_map_set_32bit(get_task_map(child_task
));
438 /* Update task for MAC framework */
439 /* valid to use p_ucred as child is still not running ... */
440 mac_task_label_update_cred(child
->p_ucred
, child_task
);
443 /* Set child scheduler priority if nice value inherited from parent */
444 if (child
->p_nice
!= 0)
445 resetpriority(child
);
447 /* Create a new thread for the child process */
448 result
= thread_create(child_task
, &child_thread
);
449 if (result
!= KERN_SUCCESS
) {
450 printf("execve: thread_create failed. Code: %d\n", result
);
451 task_deallocate(child_task
);
455 thread_yield_internal(1);
457 return(child_thread
);
464 * Description: Givben a parent process, provide a duplicate task and thread
465 * for a child process of that parent.
467 * Parameters: parent Parent process to use as the template
468 * child Child process to duplicate into
470 * Returns: !NULL Child process thread pointer
471 * NULL Failure (unspecified)
473 * Note: Most of the heavy lifting is done by fork_create_child(); this
474 * function exists more or less to deal with the 64 bit commpage,
475 * which requires explicit inheritance, the x86 commpage, which
476 * should not need explicit mapping any more, but apparently does,
477 * and to be variant for the bootstrap process.
479 * There is a special case where the system is being bootstraped,
480 * where this function will be called from cloneproc(), called in
481 * turn from bsd_utaskbootstrap(). In this case, we are acting
482 * to create a task and thread (and uthread) for the benefit of
483 * the kernel process - the first process in the system (PID 0).
485 * In that specific case, we will *not* pass a parent task, since
486 * there is *not* parent task present to pass.
488 * XXX: This function should go away; the variance can moved into
489 * XXX: cloneproc(), and the 64bit commpage code can be moved into
490 * XXX: fork_create_child(), after the x86 commpage inheritance is
494 procdup(proc_t parent
, proc_t child
)
496 thread_t child_thread
;
499 if (parent
->task
== kernel_task
)
500 child_thread
= fork_create_child(TASK_NULL
, child
, FALSE
, FALSE
);
502 child_thread
= fork_create_child(parent
->task
, child
, TRUE
, (parent
->p_flag
& P_LP64
));
504 if (child_thread
!= NULL
) {
505 child_task
= get_threadtask(child_thread
);
506 if (parent
->p_flag
& P_LP64
) {
507 task_set_64bit(child_task
, TRUE
);
508 OSBitOrAtomic(P_LP64
, (UInt32
*)&child
->p_flag
);
510 /* LP64todo - clean up hacked mapping of commpage */
512 * PPC51: ppc64 is limited to 51-bit addresses.
513 * Memory above that limit is handled specially at
516 pmap_map_sharedpage(child_task
, get_map_pmap(get_task_map(child_task
)));
519 task_set_64bit(child_task
, FALSE
);
520 OSBitAndAtomic(~((uint32_t)P_LP64
), (UInt32
*)&child
->p_flag
);
524 return(child_thread
);
531 * Description: fork system call.
533 * Parameters: parent Parent process to fork
534 * uap (void) [unused]
535 * retval Return value
538 * EAGAIN Resource unavailable, try again
541 fork(proc_t parent
, __unused
struct fork_args
*uap
, register_t
*retval
)
553 * Although process entries are dynamically created, we still keep
554 * a global limit on the maximum number we will create. Don't allow
555 * a nonprivileged user to use the last process; don't let root
556 * exceed the limit. The variable nprocs is the current number of
557 * processes, maxproc is the limit.
559 uid
= kauth_cred_get()->cr_ruid
;
561 if ((nprocs
>= maxproc
- 1 && uid
!= 0) || nprocs
>= maxproc
) {
570 * Increment the count of procs running with this uid. Don't allow
571 * a nonprivileged user to exceed their current limit, which is
572 * always less than what an rlim_t can hold.
573 * (locking protection is provided by list lock held in chgproccnt)
575 count
= chgproccnt(uid
, 1);
577 (rlim_t
)count
> parent
->p_rlimit
[RLIMIT_NPROC
].rlim_cur
) {
578 (void)chgproccnt(uid
, -1);
584 * Determine if MAC policies applied to the process will allow
587 err
= mac_proc_check_fork(parent
);
589 (void)chgproccnt(uid
, -1);
594 /* The newly created process comes with signal lock held */
595 if ((newth
= cloneproc(parent
, 1)) == NULL
) {
596 /* Failed to create thread */
597 (void)chgproccnt(uid
, -1);
602 /* child = newth->task->proc; */
603 child
= (proc_t
)(get_bsdtask_info(get_threadtask(newth
)));
606 /* inform policies of new process sharing this cred/label */
607 /* safe to use p_ucred here since child is not running */
608 /* JMM - unsafe to assume the association will stay - as */
609 /* there are other ways it can be dropped without */
610 /* informing the policies. */
611 mac_cred_label_associate_fork(child
->p_ucred
, child
);
614 /* propogate change of PID - may get new cred if auditing */
615 set_security_token(child
);
617 AUDIT_ARG(pid
, child
->p_pid
);
619 thread_set_child(newth
, child
->p_pid
);
621 microtime(&child
->p_start
);
622 microtime(&child
->p_stats
->p_start
); /* for compat sake */
623 child
->p_acflag
= AFORK
;
627 * APPLE NOTE: Solaris does a sprlock() and drops the proc_lock
628 * here. We're cheating a bit and only taking the p_dtrace_sprlock
629 * lock. A full sprlock would task_suspend the parent.
631 lck_mtx_lock(&parent
->p_dtrace_sprlock
);
634 * Remove all DTrace tracepoints from the child process. We
635 * need to do this _before_ duplicating USDT providers since
636 * any associated probes may be immediately enabled.
638 if (parent
->p_dtrace_count
> 0) {
639 dtrace_fasttrap_fork(parent
, child
);
642 lck_mtx_unlock(&parent
->p_dtrace_sprlock
);
645 * Duplicate any lazy dof(s). This must be done while NOT
646 * holding the parent sprlock! Lock ordering is dtrace_dof_mode_lock,
647 * then sprlock. It is imperative we always call
648 * dtrace_lazy_dofs_duplicate, rather than null check and
649 * call if !NULL. If we NULL test, during lazy dof faulting
650 * we can race with the faulting code and proceed from here to
651 * beyond the helpers copy. The lazy dof faulting will then
652 * fail to copy the helpers to the child process.
654 dtrace_lazy_dofs_duplicate(parent
, child
);
657 * Duplicate any helper actions and providers. The SFORKING
658 * we set above informs the code to enable USDT probes that
659 * sprlock() may fail because the child is being forked.
662 * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent
663 * never fails to find the child. We do not set SFORKING.
665 if (parent
->p_dtrace_helpers
!= NULL
&& dtrace_helpers_fork
) {
666 (*dtrace_helpers_fork
)(parent
, child
);
671 /* drop the signal lock on the child */
672 proc_signalend(child
, 0);
673 proc_transend(child
, 0);
675 /* "Return" to the child */
676 (void)thread_resume(newth
);
678 /* drop the extra references we got during the creation */
679 if ((t
= (task_t
)get_threadtask(newth
)) != NULL
) {
682 thread_deallocate(newth
);
684 proc_knote(parent
, NOTE_FORK
| child
->p_pid
);
686 retval
[0] = child
->p_pid
;
687 retval
[1] = 0; /* flag parent */
689 DTRACE_PROC1(create
, proc_t
, child
);
697 * Description: Create a new process from a specified process.
699 * Parameters: parent The parent process of the process to
701 * lock Whether or not the signal lock was held
702 * when calling cloneproc().
704 * Returns: !NULL pointer to new child thread
705 * NULL Failure (unspecified)
707 * Note: On return newly created child process has signal lock held
708 * to block delivery of signal to it if called with lock set.
709 * fork() code needs to explicity remove this lock before
710 * signals can be delivered
712 * In the case of bootstrap, this function can be called from
713 * bsd_utaskbootstrap() in order to bootstrap the first process;
714 * the net effect is to provide a uthread structure for the
715 * kernel process associated with the kernel task. This results
716 * in a side effect in procdup(), which is why the code is more
717 * complicated at the top of that function.
720 cloneproc(proc_t parent
, int lock
)
725 if ((child
= forkproc(parent
,lock
)) == NULL
) {
726 /* Failed to allocate new process */
730 if ((th
= procdup(parent
, child
)) == NULL
) {
732 * Failed to create thread; now we must deconstruct the new
733 * process previously obtained from forkproc().
735 forkproc_free(child
, lock
);
739 /* make child visible */
740 pinsertchild(parent
, child
);
743 * Make child runnable, set start time.
745 child
->p_stat
= SRUN
;
752 * Destroy a process structure that resulted from a call to forkproc(), but
753 * which must be returned to the system because of a subsequent failure
754 * preventing it from becoming active.
756 * Parameters: p The incomplete process from forkproc()
757 * lock Whether or not the signal lock was held
758 * when calling forkproc().
762 * Note: This function should only be used in an error handler following
763 * a call to forkproc(). The 'lock' paramenter should be the same
764 * as the lock parameter passed to forkproc().
766 * Operations occur in reverse order of those in forkproc().
769 forkproc_free(proc_t p
, int lock
)
772 /* Drop the signal lock, if it was held */
774 proc_signalend(p
, 0);
779 * If we have our own copy of the resource limits structure, we
780 * need to free it. If it's a shared copy, we need to drop our
783 proc_limitdrop(p
, 0);
787 /* Need to drop references to the shared memory segment(s), if any */
790 * Use shmexec(): we have no address space, so no mappings
792 * XXX Yes, the routine is badly named.
798 /* Need to undo the effects of the fdcopy(), if any */
802 * Drop the reference on a text vnode pointer, if any
803 * XXX This code is broken in forkproc(); see <rdar://4256419>;
804 * XXX if anyone ever uses this field, we will be extremely unhappy.
807 vnode_rele(p
->p_textvp
);
811 /* Stop the profiling clock */
814 /* Release the credential reference */
815 kauth_cred_unref(&p
->p_ucred
);
818 /* Decrement the count of processes in the system */
822 thread_call_free(p
->p_rcall
);
824 /* Free allocated memory */
825 FREE_ZONE(p
->p_sigacts
, sizeof *p
->p_sigacts
, M_SIGACTS
);
826 FREE_ZONE(p
->p_stats
, sizeof *p
->p_stats
, M_PSTATS
);
827 proc_checkdeadrefs(p
);
828 FREE_ZONE(p
, sizeof *p
, M_PROC
);
835 * Description: Create a new process structure, given a parent process
838 * Parameters: parent The parent process
839 * lock If the signal lock should be taken on
840 * the newly created process.
842 * Returns: !NULL The new process structure
843 * NULL Error (insufficient free memory)
845 * Note: When successful, the newly created process structure is
846 * partially initialized; if a caller needs to deconstruct the
847 * returned structure, they must call forkproc_free() to do so.
850 forkproc(proc_t parent
, int lock
)
852 struct proc
* child
; /* Our new process */
853 static int nextpid
= 0, pidwrap
= 0, nextpidversion
= 0;
855 struct session
*sessp
;
856 uthread_t uth_parent
= (uthread_t
)get_bsdthread_info(current_thread());
858 MALLOC_ZONE(child
, proc_t
, sizeof *child
, M_PROC
, M_WAITOK
);
860 printf("forkproc: M_PROC zone exhausted\n");
863 /* zero it out as we need to insert in hash */
864 bzero(child
, sizeof *child
);
866 MALLOC_ZONE(child
->p_stats
, struct pstats
*,
867 sizeof *child
->p_stats
, M_PSTATS
, M_WAITOK
);
868 if (child
->p_stats
== NULL
) {
869 printf("forkproc: M_SUBPROC zone exhausted (p_stats)\n");
870 FREE_ZONE(child
, sizeof *child
, M_PROC
);
874 MALLOC_ZONE(child
->p_sigacts
, struct sigacts
*,
875 sizeof *child
->p_sigacts
, M_SIGACTS
, M_WAITOK
);
876 if (child
->p_sigacts
== NULL
) {
877 printf("forkproc: M_SUBPROC zone exhausted (p_sigacts)\n");
878 FREE_ZONE(child
->p_stats
, sizeof *child
->p_stats
, M_PSTATS
);
879 FREE_ZONE(child
, sizeof *child
, M_PROC
);
883 child
->p_rcall
= thread_call_allocate((thread_call_func_t
)realitexpire
, child
);
884 if (child
->p_rcall
== NULL
) {
885 FREE_ZONE(child
->p_sigacts
, sizeof *child
->p_sigacts
, M_SIGACTS
);
886 FREE_ZONE(child
->p_stats
, sizeof *child
->p_stats
, M_PSTATS
);
887 FREE_ZONE(child
, sizeof *child
, M_PROC
);
894 * Find an unused PID.
902 * If the process ID prototype has wrapped around,
903 * restart somewhat above 0, as the low-numbered procs
904 * tend to include daemons that don't exit.
906 if (nextpid
>= PID_MAX
) {
912 /* if the pid stays in hash both for zombie and runniing state */
913 if (pfind_locked(nextpid
) != PROC_NULL
) {
918 if (pgfind_internal(nextpid
) != PGRP_NULL
) {
922 if (session_find_internal(nextpid
) != SESSION_NULL
) {
928 child
->p_pid
= nextpid
;
929 child
->p_idversion
= nextpidversion
++;
931 if (child
->p_pid
!= 0) {
932 if (pfind_locked(child
->p_pid
) != PROC_NULL
)
933 panic("proc in the list already\n");
936 /* Insert in the hash */
937 child
->p_listflag
|= (P_LIST_INHASH
| P_LIST_INCREATE
);
938 LIST_INSERT_HEAD(PIDHASH(child
->p_pid
), child
, p_hash
);
943 * We've identified the PID we are going to use; initialize the new
946 child
->p_stat
= SIDL
;
947 child
->p_pgrpid
= PGRPID_DEAD
;
950 * The zero'ing of the proc was at the allocation time due to need for insertion
951 * to hash. Copy the section that is to be copied directly from the parent.
953 bcopy(&parent
->p_startcopy
, &child
->p_startcopy
,
954 (unsigned) ((caddr_t
)&child
->p_endcopy
- (caddr_t
)&child
->p_startcopy
));
957 * Some flags are inherited from the parent.
958 * Duplicate sub-structures as needed.
959 * Increase reference counts on shared objects.
960 * The p_stats and p_sigacts substructs are set in vm_fork.
962 child
->p_flag
= (parent
->p_flag
& (P_LP64
| P_TRANSLATED
| P_AFFINITY
));
963 if (parent
->p_flag
& P_PROFIL
)
964 startprofclock(child
);
966 * Note that if the current thread has an assumed identity, this
967 * credential will be granted to the new process.
969 child
->p_ucred
= kauth_cred_get_with_ref();
971 lck_mtx_init(&child
->p_mlock
, proc_lck_grp
, proc_lck_attr
);
972 lck_mtx_init(&child
->p_fdmlock
, proc_lck_grp
, proc_lck_attr
);
974 lck_mtx_init(&child
->p_dtrace_sprlock
, proc_lck_grp
, proc_lck_attr
);
976 lck_spin_init(&child
->p_slock
, proc_lck_grp
, proc_lck_attr
);
977 klist_init(&child
->p_klist
);
979 if (child
->p_textvp
!= NULLVP
) {
980 /* bump references to the text vnode */
981 /* Need to hold iocount across the ref call */
982 if (vnode_getwithref(child
->p_textvp
) == 0) {
983 error
= vnode_ref(child
->p_textvp
);
984 vnode_put(child
->p_textvp
);
986 child
->p_textvp
= NULLVP
;
990 /* XXX may fail to copy descriptors to child */
991 child
->p_fd
= fdcopy(parent
, uth_parent
->uu_cdir
);
994 if (parent
->vm_shm
) {
995 /* XXX may fail to attach shm to child */
996 (void)shmfork(parent
,child
);
1000 * inherit the limit structure to child
1002 proc_limitfork(parent
, child
);
1004 if (child
->p_limit
->pl_rlimit
[RLIMIT_CPU
].rlim_cur
!= RLIM_INFINITY
) {
1005 uint64_t rlim_cur
= child
->p_limit
->pl_rlimit
[RLIMIT_CPU
].rlim_cur
;
1006 child
->p_rlim_cpu
.tv_sec
= (rlim_cur
> __INT_MAX__
) ? __INT_MAX__
: rlim_cur
;
1009 bzero(&child
->p_stats
->pstat_startzero
,
1010 (unsigned) ((caddr_t
)&child
->p_stats
->pstat_endzero
-
1011 (caddr_t
)&child
->p_stats
->pstat_startzero
));
1013 bzero(&child
->p_stats
->user_p_prof
, sizeof(struct user_uprof
));
1015 if (parent
->p_sigacts
!= NULL
)
1016 (void)memcpy(child
->p_sigacts
,
1017 parent
->p_sigacts
, sizeof *child
->p_sigacts
);
1019 (void)memset(child
->p_sigacts
, 0, sizeof *child
->p_sigacts
);
1021 sessp
= proc_session(parent
);
1022 if (sessp
->s_ttyvp
!= NULL
&& parent
->p_flag
& P_CONTROLT
)
1023 OSBitOrAtomic(P_CONTROLT
, (UInt32
*)&child
->p_flag
);
1024 session_rele(sessp
);
1026 /* block all signals to reach the process */
1028 proc_signalstart(child
, 0);
1029 proc_transstart(child
, 0);
1032 TAILQ_INIT(&child
->p_uthlist
);
1033 TAILQ_INIT(&child
->aio_activeq
);
1034 TAILQ_INIT(&child
->aio_doneq
);
1035 /* Inherit the parent flags for code sign */
1036 child
->p_csflags
= parent
->p_csflags
;
1037 child
->p_wqthread
= parent
->p_wqthread
;
1038 child
->p_threadstart
= parent
->p_threadstart
;
1039 child
->p_pthsize
= parent
->p_pthsize
;
1040 workqueue_init_lock(child
);
1043 child
->p_lctx
= NULL
;
1044 /* Add new process to login context (if any). */
1045 if (parent
->p_lctx
!= NULL
) {
1046 LCTX_LOCK(parent
->p_lctx
);
1047 enterlctx(child
, parent
->p_lctx
, 0);
1058 lck_mtx_lock(&p
->p_mlock
);
1062 proc_unlock(proc_t p
)
1064 lck_mtx_unlock(&p
->p_mlock
);
1068 proc_spinlock(proc_t p
)
1070 lck_spin_lock(&p
->p_slock
);
1074 proc_spinunlock(proc_t p
)
1076 lck_spin_unlock(&p
->p_slock
);
1080 proc_list_lock(void)
1082 lck_mtx_lock(proc_list_mlock
);
1086 proc_list_unlock(void)
1088 lck_mtx_unlock(proc_list_mlock
);
1091 #include <kern/zalloc.h>
1093 struct zone
*uthread_zone
;
1094 static int uthread_zone_inited
= 0;
1097 uthread_zone_init(void)
1099 if (!uthread_zone_inited
) {
1100 uthread_zone
= zinit(sizeof(struct uthread
),
1101 THREAD_MAX
* sizeof(struct uthread
),
1102 THREAD_CHUNK
* sizeof(struct uthread
),
1104 uthread_zone_inited
= 1;
1109 uthread_alloc(task_t task
, thread_t thread
)
1113 uthread_t uth_parent
;
1116 if (!uthread_zone_inited
)
1117 uthread_zone_init();
1119 ut
= (void *)zalloc(uthread_zone
);
1120 bzero(ut
, sizeof(struct uthread
));
1122 p
= (proc_t
) get_bsdtask_info(task
);
1123 uth
= (uthread_t
)ut
;
1126 * Thread inherits credential from the creating thread, if both
1127 * are in the same task.
1129 * If the creating thread has no credential or is from another
1130 * task we can leave the new thread credential NULL. If it needs
1131 * one later, it will be lazily assigned from the task's process.
1133 uth_parent
= (uthread_t
)get_bsdthread_info(current_thread());
1134 if (task
== current_task() &&
1135 uth_parent
!= NULL
&&
1136 IS_VALID_CRED(uth_parent
->uu_ucred
)) {
1138 * XXX The new thread is, in theory, being created in context
1139 * XXX of parent thread, so a direct reference to the parent
1142 kauth_cred_ref(uth_parent
->uu_ucred
);
1143 uth
->uu_ucred
= uth_parent
->uu_ucred
;
1144 /* the credential we just inherited is an assumed credential */
1145 if (uth_parent
->uu_flag
& UT_SETUID
)
1146 uth
->uu_flag
|= UT_SETUID
;
1148 uth
->uu_ucred
= NOCRED
;
1152 if ((task
!= kernel_task
) && p
) {
1156 if (uth_parent
->uu_flag
& UT_SAS_OLDMASK
)
1157 uth
->uu_sigmask
= uth_parent
->uu_oldmask
;
1159 uth
->uu_sigmask
= uth_parent
->uu_sigmask
;
1161 uth
->uu_context
.vc_thread
= thread
;
1162 TAILQ_INSERT_TAIL(&p
->p_uthlist
, uth
, uu_list
);
1166 if (p
->p_dtrace_ptss_pages
!= NULL
) {
1167 uth
->t_dtrace_scratch
= dtrace_ptss_claim_entry(p
);
1177 * This routine frees all the BSD context in uthread except the credential.
1178 * It does not free the uthread structure as well
1181 uthread_cleanup(task_t task
, void *uthread
, void * bsd_info
)
1183 struct _select
*sel
;
1184 uthread_t uth
= (uthread_t
)uthread
;
1185 proc_t p
= (proc_t
)bsd_info
;
1188 if (uth
->uu_lowpri_window
) {
1190 * task is marked as a low priority I/O type
1191 * and we've somehow managed to not dismiss the throttle
1192 * through the normal exit paths back to user space...
1193 * no need to throttle this thread since its going away
1194 * but we do need to update our bookeeping w/r to throttled threads
1196 throttle_lowpri_io(FALSE
);
1199 * Per-thread audit state should never last beyond system
1200 * call return. Since we don't audit the thread creation/
1201 * removal, the thread state pointer should never be
1202 * non-NULL when we get here.
1204 assert(uth
->uu_ar
== NULL
);
1206 sel
= &uth
->uu_select
;
1207 /* cleanup the select bit space */
1209 FREE(sel
->ibits
, M_TEMP
);
1210 FREE(sel
->obits
, M_TEMP
);
1215 vnode_rele(uth
->uu_cdir
);
1216 uth
->uu_cdir
= NULLVP
;
1219 if (uth
->uu_allocsize
&& uth
->uu_wqset
){
1220 kfree(uth
->uu_wqset
, uth
->uu_allocsize
);
1222 uth
->uu_allocsize
= 0;
1228 if ((task
!= kernel_task
) && p
) {
1230 if (((uth
->uu_flag
& UT_VFORK
) == UT_VFORK
) && (uth
->uu_proc
!= PROC_NULL
)) {
1231 vfork_exit_internal(uth
->uu_proc
, 0, 1);
1233 if (get_bsdtask_info(task
) == p
) {
1235 TAILQ_REMOVE(&p
->p_uthlist
, uth
, uu_list
);
1239 if (uth
->t_dtrace_scratch
!= NULL
) {
1240 dtrace_ptss_release_entry(p
, uth
->t_dtrace_scratch
);
1246 /* This routine releases the credential stored in uthread */
1248 uthread_cred_free(void *uthread
)
1250 uthread_t uth
= (uthread_t
)uthread
;
1252 /* and free the uthread itself */
1253 if (IS_VALID_CRED(uth
->uu_ucred
)) {
1254 kauth_cred_t oldcred
= uth
->uu_ucred
;
1255 uth
->uu_ucred
= NOCRED
;
1256 kauth_cred_unref(&oldcred
);
1260 /* This routine frees the uthread structure held in thread structure */
1262 uthread_zone_free(void *uthread
)
1264 /* and free the uthread itself */
1265 zfree(uthread_zone
, uthread
);