]>
git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_fork.c
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1982, 1986, 1989, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 * @(#)kern_fork.c 8.8 (Berkeley) 2/14/95
69 #include <kern/assert.h>
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/filedesc.h>
73 #include <sys/kernel.h>
74 #include <sys/malloc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/kauth.h>
78 #include <sys/resourcevar.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/file_internal.h>
83 #include <sys/ktrace.h>
86 #include <bsm/audit_kernel.h>
88 #include <mach/mach_types.h>
89 #include <kern/kern_types.h>
90 #include <kern/kalloc.h>
91 #include <kern/mach_param.h>
92 #include <kern/task.h>
93 #include <kern/zalloc.h>
95 #include <machine/spl.h>
97 #include <vm/vm_protos.h> // for vm_map_commpage64
99 thread_t
cloneproc(struct proc
*, int);
100 struct proc
* forkproc(struct proc
*, int);
101 thread_t
procdup(struct proc
*child
, struct proc
*parent
);
103 #define DOFORK 0x1 /* fork() system call */
104 #define DOVFORK 0x2 /* vfork() system call */
105 static int fork1(struct proc
*, long, register_t
*);
111 fork(struct proc
*p
, __unused
void *uap
, register_t
*retval
)
113 return (fork1(p
, (long)DOFORK
, retval
));
120 vfork(struct proc
*p
, void *uap
, register_t
*retval
)
122 register struct proc
* newproc
;
124 thread_t cur_act
= (thread_t
)current_thread();
130 * Although process entries are dynamically created, we still keep
131 * a global limit on the maximum number we will create. Don't allow
132 * a nonprivileged user to use the last process; don't let root
133 * exceed the limit. The variable nprocs is the current number of
134 * processes, maxproc is the limit.
136 uid
= kauth_cred_get()->cr_ruid
;
137 if ((nprocs
>= maxproc
- 1 && uid
!= 0) || nprocs
>= maxproc
) {
144 * Increment the count of procs running with this uid. Don't allow
145 * a nonprivileged user to exceed their current limit.
147 count
= chgproccnt(uid
, 1);
148 if (uid
!= 0 && count
> p
->p_rlimit
[RLIMIT_NPROC
].rlim_cur
) {
149 (void)chgproccnt(uid
, -1);
153 ut
= (struct uthread
*)get_bsdthread_info(cur_act
);
154 if (ut
->uu_flag
& UT_VFORK
) {
155 printf("vfork called recursively by %s\n", p
->p_comm
);
156 (void)chgproccnt(uid
, -1);
159 p
->p_flag
|= P_VFORK
;
162 /* The newly created process comes with signal lock held */
163 newproc
= (struct proc
*)forkproc(p
,1);
165 AUDIT_ARG(pid
, newproc
->p_pid
);
167 LIST_INSERT_AFTER(p
, newproc
, p_pglist
);
169 newproc
->task
= p
->task
;
170 LIST_INSERT_HEAD(&p
->p_children
, newproc
, p_sibling
);
171 LIST_INIT(&newproc
->p_children
);
172 LIST_INSERT_HEAD(&allproc
, newproc
, p_list
);
173 LIST_INSERT_HEAD(PIDHASH(newproc
->p_pid
), newproc
, p_hash
);
174 TAILQ_INIT(& newproc
->p_evlist
);
175 newproc
->p_stat
= SRUN
;
176 newproc
->p_flag
|= P_INVFORK
;
177 newproc
->p_vforkact
= cur_act
;
179 ut
->uu_flag
|= UT_VFORK
;
180 ut
->uu_proc
= newproc
;
181 ut
->uu_userstate
= (void *)act_thread_csave();
182 ut
->uu_vforkmask
= ut
->uu_sigmask
;
184 /* temporarily drop thread-set-id state */
185 if (ut
->uu_flag
& UT_SETUID
) {
186 ut
->uu_flag
|= UT_WASSETUID
;
187 ut
->uu_flag
&= ~UT_SETUID
;
190 thread_set_child(cur_act
, newproc
->p_pid
);
192 microtime(&newproc
->p_stats
->p_start
);
193 newproc
->p_acflag
= AFORK
;
196 * Preserve synchronization semantics of vfork. If waiting for
197 * child to exec or exit, set P_PPWAIT on child, and sleep on our
198 * proc (in case of exit).
200 newproc
->p_flag
|= P_PPWAIT
;
202 /* drop the signal lock on the child */
203 signal_unlock(newproc
);
205 retval
[0] = newproc
->p_pid
;
206 retval
[1] = 1; /* mark child */
212 * Return to parent vfork ehread()
215 vfork_return(__unused thread_t th_act
, struct proc
*p
, struct proc
*p2
,
218 thread_t cur_act
= (thread_t
)current_thread();
221 ut
= (struct uthread
*)get_bsdthread_info(cur_act
);
223 act_thread_catt(ut
->uu_userstate
);
225 /* Make sure only one at this time */
227 if (p
->p_vforkcnt
<0)
228 panic("vfork cnt is -ve");
229 if (p
->p_vforkcnt
<=0)
230 p
->p_flag
&= ~P_VFORK
;
231 ut
->uu_userstate
= 0;
232 ut
->uu_flag
&= ~UT_VFORK
;
233 /* restore thread-set-id state */
234 if (ut
->uu_flag
& UT_WASSETUID
) {
235 ut
->uu_flag
|= UT_SETUID
;
236 ut
->uu_flag
&= UT_WASSETUID
;
239 ut
->uu_sigmask
= ut
->uu_vforkmask
;
240 p2
->p_flag
&= ~P_INVFORK
;
241 p2
->p_vforkact
= (void *)0;
243 thread_set_parent(cur_act
, p2
->p_pid
);
246 retval
[0] = p2
->p_pid
;
247 retval
[1] = 0; /* mark parent */
254 procdup(struct proc
*child
, struct proc
*parent
)
258 kern_return_t result
;
260 if (parent
->task
== kernel_task
)
261 result
= task_create_internal(TASK_NULL
, FALSE
, &task
);
263 result
= task_create_internal(parent
->task
, TRUE
, &task
);
264 if (result
!= KERN_SUCCESS
)
265 printf("fork/procdup: task_create failed. Code: 0x%x\n", result
);
267 /* task->proc = child; */
268 set_bsdtask_info(task
, child
);
269 if (parent
->p_flag
& P_LP64
) {
270 task_set_64bit(task
, TRUE
);
271 child
->p_flag
|= P_LP64
;
273 /* LP64todo - clean up this hacked mapping of commpage */
274 pmap_map_sharedpage(task
, get_map_pmap(get_task_map(task
)));
275 vm_map_commpage64(get_task_map(task
));
278 task_set_64bit(task
, FALSE
);
279 child
->p_flag
&= ~P_LP64
;
281 if (child
->p_nice
!= 0)
282 resetpriority(child
);
284 result
= thread_create(task
, &thread
);
285 if (result
!= KERN_SUCCESS
)
286 printf("fork/procdup: thread_create failed. Code: 0x%x\n", result
);
293 fork1(p1
, flags
, retval
)
298 register struct proc
*p2
;
305 * Although process entries are dynamically created, we still keep
306 * a global limit on the maximum number we will create. Don't allow
307 * a nonprivileged user to use the last process; don't let root
308 * exceed the limit. The variable nprocs is the current number of
309 * processes, maxproc is the limit.
311 uid
= kauth_cred_get()->cr_ruid
;
312 if ((nprocs
>= maxproc
- 1 && uid
!= 0) || nprocs
>= maxproc
) {
319 * Increment the count of procs running with this uid. Don't allow
320 * a nonprivileged user to exceed their current limit.
322 count
= chgproccnt(uid
, 1);
323 if (uid
!= 0 && count
> p1
->p_rlimit
[RLIMIT_NPROC
].rlim_cur
) {
324 (void)chgproccnt(uid
, -1);
328 /* The newly created process comes with signal lock held */
329 newth
= cloneproc(p1
, 1);
331 /* p2 = newth->task->proc; */
332 p2
= (struct proc
*)(get_bsdtask_info(get_threadtask(newth
)));
333 set_security_token(p2
); /* propagate change of PID */
335 AUDIT_ARG(pid
, p2
->p_pid
);
337 thread_set_child(newth
, p2
->p_pid
);
339 microtime(&p2
->p_stats
->p_start
);
340 p2
->p_acflag
= AFORK
;
343 * Preserve synchronization semantics of vfork. If waiting for
344 * child to exec or exit, set P_PPWAIT on child, and sleep on our
345 * proc (in case of exit).
347 if (flags
== DOVFORK
)
348 p2
->p_flag
|= P_PPWAIT
;
349 /* drop the signal lock on the child */
352 (void) thread_resume(newth
);
354 /* drop the extra references we got during the creation */
355 if ((t
= (task_t
)get_threadtask(newth
)) != NULL
) {
358 thread_deallocate(newth
);
360 KNOTE(&p1
->p_klist
, NOTE_FORK
| p2
->p_pid
);
362 while (p2
->p_flag
& P_PPWAIT
)
363 tsleep(p1
, PWAIT
, "ppwait", 0);
365 retval
[0] = p2
->p_pid
;
366 retval
[1] = 0; /* mark parent */
374 * Create a new process from a specified process.
375 * On return newly created child process has signal
376 * lock held to block delivery of signal to it if called with
377 * lock set. fork() code needs to explicity remove this lock
378 * before signals can be delivered
382 register struct proc
*p1
;
385 register struct proc
*p2
;
388 p2
= (struct proc
*)forkproc(p1
,lock
);
391 th
= procdup(p2
, p1
); /* child, parent */
393 LIST_INSERT_AFTER(p1
, p2
, p_pglist
);
395 LIST_INSERT_HEAD(&p1
->p_children
, p2
, p_sibling
);
396 LIST_INIT(&p2
->p_children
);
397 LIST_INSERT_HEAD(&allproc
, p2
, p_list
);
398 LIST_INSERT_HEAD(PIDHASH(p2
->p_pid
), p2
, p_hash
);
399 TAILQ_INIT(&p2
->p_evlist
);
401 * Make child runnable, set start time.
410 register struct proc
*p1
;
413 register struct proc
*p2
, *newproc
;
414 static int nextpid
= 0, pidchecked
= 0;
416 /* Allocate new proc. */
417 MALLOC_ZONE(newproc
, struct proc
*,
418 sizeof *newproc
, M_PROC
, M_WAITOK
);
420 panic("forkproc: M_PROC zone exhausted");
421 MALLOC_ZONE(newproc
->p_stats
, struct pstats
*,
422 sizeof *newproc
->p_stats
, M_SUBPROC
, M_WAITOK
);
423 if (newproc
->p_stats
== NULL
)
424 panic("forkproc: M_SUBPROC zone exhausted (p_stats)");
425 MALLOC_ZONE(newproc
->p_sigacts
, struct sigacts
*,
426 sizeof *newproc
->p_sigacts
, M_SUBPROC
, M_WAITOK
);
427 if (newproc
->p_sigacts
== NULL
)
428 panic("forkproc: M_SUBPROC zone exhausted (p_sigacts)");
431 * Find an unused process ID. We remember a range of unused IDs
432 * ready to use (from nextpid+1 through pidchecked-1).
437 * If the process ID prototype has wrapped around,
438 * restart somewhat above 0, as the low-numbered procs
439 * tend to include daemons that don't exit.
441 if (nextpid
>= PID_MAX
) {
445 if (nextpid
>= pidchecked
) {
448 pidchecked
= PID_MAX
;
450 * Scan the active and zombie procs to check whether this pid
451 * is in use. Remember the lowest pid that's greater
452 * than nextpid, so we can avoid checking for a while.
454 p2
= allproc
.lh_first
;
456 for (; p2
!= 0; p2
= p2
->p_list
.le_next
) {
457 while (p2
->p_pid
== nextpid
||
458 p2
->p_pgrp
->pg_id
== nextpid
||
459 p2
->p_session
->s_sid
== nextpid
) {
461 if (nextpid
>= pidchecked
)
464 if (p2
->p_pid
> nextpid
&& pidchecked
> p2
->p_pid
)
465 pidchecked
= p2
->p_pid
;
466 if (p2
->p_pgrp
&& p2
->p_pgrp
->pg_id
> nextpid
&&
467 pidchecked
> p2
->p_pgrp
->pg_id
)
468 pidchecked
= p2
->p_pgrp
->pg_id
;
469 if (p2
->p_session
->s_sid
> nextpid
&&
470 pidchecked
> p2
->p_session
->s_sid
)
471 pidchecked
= p2
->p_session
->s_sid
;
475 p2
= zombproc
.lh_first
;
483 p2
->p_shutdownstate
= 0;
487 * Make a proc table entry for the new process.
488 * Start by zeroing the section of proc that is zero-initialized,
489 * then copy the section that is copied directly from the parent.
491 bzero(&p2
->p_startzero
,
492 (unsigned) ((caddr_t
)&p2
->p_endzero
- (caddr_t
)&p2
->p_startzero
));
493 bcopy(&p1
->p_startcopy
, &p2
->p_startcopy
,
494 (unsigned) ((caddr_t
)&p2
->p_endcopy
- (caddr_t
)&p2
->p_startcopy
));
495 p2
->vm_shm
= (void *)NULL
; /* Make sure it is zero */
498 * Some flags are inherited from the parent.
499 * Duplicate sub-structures as needed.
500 * Increase reference counts on shared objects.
501 * The p_stats and p_sigacts substructs are set in vm_fork.
503 p2
->p_flag
= (p1
->p_flag
& (P_LP64
| P_CLASSIC
| P_AFFINITY
));
504 if (p1
->p_flag
& P_PROFIL
)
507 * Note that if the current thread has an assumed identity, this
508 * credential will be granted to the new process.
510 p2
->p_ucred
= kauth_cred_get_with_ref();
512 lck_mtx_init(&p2
->p_mlock
, proc_lck_grp
, proc_lck_attr
);
513 lck_mtx_init(&p2
->p_fdmlock
, proc_lck_grp
, proc_lck_attr
);
514 klist_init(&p2
->p_klist
);
516 /* bump references to the text vnode */
517 p2
->p_textvp
= p1
->p_textvp
;
519 vnode_rele(p2
->p_textvp
);
521 /* XXX may fail to copy descriptors to child */
522 p2
->p_fd
= fdcopy(p1
);
525 /* XXX may fail to attach shm to child */
526 (void)shmfork(p1
,p2
);
529 * If p_limit is still copy-on-write, bump refcnt,
530 * otherwise get a copy that won't be modified.
531 * (If PL_SHAREMOD is clear, the structure is shared
534 if (p1
->p_limit
->p_lflags
& PL_SHAREMOD
)
535 p2
->p_limit
= limcopy(p1
->p_limit
);
537 p2
->p_limit
= p1
->p_limit
;
538 p2
->p_limit
->p_refcnt
++;
541 bzero(&p2
->p_stats
->pstat_startzero
,
542 (unsigned) ((caddr_t
)&p2
->p_stats
->pstat_endzero
-
543 (caddr_t
)&p2
->p_stats
->pstat_startzero
));
544 bcopy(&p1
->p_stats
->pstat_startcopy
, &p2
->p_stats
->pstat_startcopy
,
545 ((caddr_t
)&p2
->p_stats
->pstat_endcopy
-
546 (caddr_t
)&p2
->p_stats
->pstat_startcopy
));
548 bzero(&p2
->p_stats
->user_p_prof
, sizeof(struct user_uprof
));
550 if (p1
->p_sigacts
!= NULL
)
551 (void)memcpy(p2
->p_sigacts
,
552 p1
->p_sigacts
, sizeof *p2
->p_sigacts
);
554 (void)memset(p2
->p_sigacts
, 0, sizeof *p2
->p_sigacts
);
556 if (p1
->p_session
->s_ttyvp
!= NULL
&& p1
->p_flag
& P_CONTROLT
)
557 p2
->p_flag
|= P_CONTROLT
;
559 p2
->p_argslen
= p1
->p_argslen
;
560 p2
->p_argc
= p1
->p_argc
;
564 p2
->p_debugger
= 0; /* don't inherit */
565 lockinit(&p2
->signal_lock
, PVM
, "signal", 0, 0);
566 /* block all signals to reach the process */
570 p2
->sigwait_thread
= NULL
;
571 p2
->exit_thread
= NULL
;
572 p2
->user_stack
= p1
->user_stack
;
577 p2
->p_internalref
= 0;
578 TAILQ_INIT(&p2
->p_uthlist
);
579 TAILQ_INIT(&p2
->aio_activeq
);
580 TAILQ_INIT(&p2
->aio_doneq
);
581 p2
->aio_active_count
= 0;
582 p2
->aio_done_count
= 0;
586 * Copy traceflag and tracefile if enabled.
587 * If not inherited, these were zeroed above.
589 if (p1
->p_traceflag
&KTRFAC_INHERIT
) {
590 p2
->p_traceflag
= p1
->p_traceflag
;
591 if ((p2
->p_tracep
= p1
->p_tracep
) != NULL
) {
592 vnode_ref(p2
->p_tracep
);
603 lck_mtx_lock(&p
->p_mlock
);
607 proc_unlock(proc_t p
)
609 lck_mtx_unlock(&p
->p_mlock
);
612 #include <kern/zalloc.h>
614 struct zone
*uthread_zone
;
615 int uthread_zone_inited
= 0;
618 uthread_zone_init(void)
620 if (!uthread_zone_inited
) {
621 uthread_zone
= zinit(sizeof(struct uthread
),
622 THREAD_MAX
* sizeof(struct uthread
),
623 THREAD_CHUNK
* sizeof(struct uthread
),
625 uthread_zone_inited
= 1;
630 uthread_alloc(task_t task
, thread_t thr_act
)
633 struct uthread
*uth
, *uth_parent
;
635 boolean_t funnel_state
;
637 if (!uthread_zone_inited
)
640 ut
= (void *)zalloc(uthread_zone
);
641 bzero(ut
, sizeof(struct uthread
));
643 p
= (struct proc
*) get_bsdtask_info(task
);
644 uth
= (struct uthread
*)ut
;
647 * Thread inherits credential from the creating thread, if both
648 * are in the same task.
650 * If the creating thread has no credential or is from another
651 * task we can leave the new thread credential NULL. If it needs
652 * one later, it will be lazily assigned from the task's process.
654 uth_parent
= (struct uthread
*)get_bsdthread_info(current_thread());
655 if ((task
== current_task()) &&
656 (uth_parent
!= NULL
) &&
657 (uth_parent
->uu_ucred
!= NOCRED
)) {
658 uth
->uu_ucred
= uth_parent
->uu_ucred
;
659 kauth_cred_ref(uth
->uu_ucred
);
660 /* the credential we just inherited is an assumed credential */
661 if (uth_parent
->uu_flag
& UT_SETUID
)
662 uth
->uu_flag
|= UT_SETUID
;
664 uth
->uu_ucred
= NOCRED
;
667 if (task
!= kernel_task
) {
669 funnel_state
= thread_funnel_set(kernel_flock
, TRUE
);
671 if (uth_parent
->uu_flag
& UT_SAS_OLDMASK
)
672 uth
->uu_sigmask
= uth_parent
->uu_oldmask
;
674 uth
->uu_sigmask
= uth_parent
->uu_sigmask
;
676 uth
->uu_act
= thr_act
;
679 TAILQ_INSERT_TAIL(&p
->p_uthlist
, uth
, uu_list
);
682 (void)thread_funnel_set(kernel_flock
, funnel_state
);
690 uthread_free(task_t task
, void *uthread
, void * bsd_info
)
693 struct uthread
*uth
= (struct uthread
*)uthread
;
694 struct proc
* p
= (struct proc
*)bsd_info
;
695 boolean_t funnel_state
;
698 * Per-thread audit state should never last beyond system
699 * call return. Since we don't audit the thread creation/
700 * removal, the thread state pointer should never be
701 * non-NULL when we get here.
703 assert(uth
->uu_ar
== NULL
);
705 sel
= &uth
->uu_select
;
706 /* cleanup the select bit space */
708 FREE(sel
->ibits
, M_TEMP
);
709 FREE(sel
->obits
, M_TEMP
);
712 if (sel
->allocsize
&& sel
->wqset
){
713 kfree(sel
->wqset
, sel
->allocsize
);
720 if (uth
->uu_ucred
!= NOCRED
)
721 kauth_cred_rele(uth
->uu_ucred
);
723 if ((task
!= kernel_task
) && p
) {
724 funnel_state
= thread_funnel_set(kernel_flock
, TRUE
);
726 TAILQ_REMOVE(&p
->p_uthlist
, uth
, uu_list
);
728 (void)thread_funnel_set(kernel_flock
, funnel_state
);
730 /* and free the uthread itself */
731 zfree(uthread_zone
, uthread
);