]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_fork.c
xnu-344.49.tar.gz
[apple/xnu.git] / bsd / kern / kern_fork.c
1 /*
2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
26 /*
27 * Copyright (c) 1982, 1986, 1989, 1991, 1993
28 * The Regents of the University of California. All rights reserved.
29 * (c) UNIX System Laboratories, Inc.
30 * All or some portions of this file are derived from material licensed
31 * to the University of California by American Telephone and Telegraph
32 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
33 * the permission of UNIX System Laboratories, Inc.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)kern_fork.c 8.8 (Berkeley) 2/14/95
64 */
65
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/filedesc.h>
69 #include <sys/kernel.h>
70 #include <sys/malloc.h>
71 #include <sys/proc.h>
72 #include <sys/user.h>
73 #include <sys/resourcevar.h>
74 #include <sys/vnode.h>
75 #include <sys/file.h>
76 #include <sys/acct.h>
77 #if KTRACE
78 #include <sys/ktrace.h>
79 #endif
80
81 #include <mach/mach_types.h>
82 #include <kern/mach_param.h>
83
84 #include <machine/spl.h>
85
86 thread_act_t cloneproc(struct proc *, int);
87 struct proc * forkproc(struct proc *, int);
88 thread_act_t procdup();
89
90 #define DOFORK 0x1 /* fork() system call */
91 #define DOVFORK 0x2 /* vfork() system call */
92 static int fork1(struct proc *, long, register_t *);
93
94 /*
95 * fork system call.
96 */
97 int
98 fork(p, uap, retval)
99 struct proc *p;
100 void *uap;
101 register_t *retval;
102 {
103 return (fork1(p, (long)DOFORK, retval));
104 }
105
106 /*
107 * vfork system call
108 */
109 int
110 vfork(p, uap, retval)
111 struct proc *p;
112 void *uap;
113 register_t *retval;
114 {
115 register struct proc * newproc;
116 register uid_t uid;
117 thread_act_t cur_act = (thread_act_t)current_act();
118 int count;
119 task_t t;
120 uthread_t ut;
121
122 /*
123 * Although process entries are dynamically created, we still keep
124 * a global limit on the maximum number we will create. Don't allow
125 * a nonprivileged user to use the last process; don't let root
126 * exceed the limit. The variable nprocs is the current number of
127 * processes, maxproc is the limit.
128 */
129 uid = p->p_cred->p_ruid;
130 if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) {
131 tablefull("proc");
132 retval[1] = 0;
133 return (EAGAIN);
134 }
135
136 /*
137 * Increment the count of procs running with this uid. Don't allow
138 * a nonprivileged user to exceed their current limit.
139 */
140 count = chgproccnt(uid, 1);
141 if (uid != 0 && count > p->p_rlimit[RLIMIT_NPROC].rlim_cur) {
142 (void)chgproccnt(uid, -1);
143 return (EAGAIN);
144 }
145
146 ut = (struct uthread *)get_bsdthread_info(cur_act);
147 if (ut->uu_flag & P_VFORK) {
148 printf("vfork called recursively by %s\n", p->p_comm);
149 return (EINVAL);
150 }
151 p->p_flag |= P_VFORK;
152 p->p_vforkcnt++;
153
154 /* The newly created process comes with signal lock held */
155 newproc = (struct proc *)forkproc(p,1);
156
157 LIST_INSERT_AFTER(p, newproc, p_pglist);
158 newproc->p_pptr = p;
159 newproc->task = p->task;
160 LIST_INSERT_HEAD(&p->p_children, newproc, p_sibling);
161 LIST_INIT(&newproc->p_children);
162 LIST_INSERT_HEAD(&allproc, newproc, p_list);
163 LIST_INSERT_HEAD(PIDHASH(newproc->p_pid), newproc, p_hash);
164 TAILQ_INIT(& newproc->p_evlist);
165 newproc->p_stat = SRUN;
166 newproc->p_flag |= P_INVFORK;
167 newproc->p_vforkact = cur_act;
168
169 ut->uu_flag |= P_VFORK;
170 ut->uu_proc = newproc;
171 ut->uu_userstate = (void *)act_thread_csave();
172 ut->uu_vforkmask = ut->uu_sigmask;
173
174 thread_set_child(cur_act, newproc->p_pid);
175
176 newproc->p_stats->p_start = time;
177 newproc->p_acflag = AFORK;
178
179 /*
180 * Preserve synchronization semantics of vfork. If waiting for
181 * child to exec or exit, set P_PPWAIT on child, and sleep on our
182 * proc (in case of exit).
183 */
184 newproc->p_flag |= P_PPWAIT;
185
186 /* drop the signal lock on the child */
187 signal_unlock(newproc);
188
189 retval[0] = newproc->p_pid;
190 retval[1] = 1; /* mark child */
191
192 return (0);
193 }
194
195 /*
196 * Return to parent vfork ehread()
197 */
198 void
199 vfork_return(th_act, p, p2, retval)
200 thread_act_t th_act;
201 struct proc * p;
202 struct proc *p2;
203 register_t *retval;
204 {
205 long flags;
206 register uid_t uid;
207 thread_t newth, self = current_thread();
208 thread_act_t cur_act = (thread_act_t)current_act();
209 int s, count;
210 task_t t;
211 uthread_t ut;
212
213 ut = (struct uthread *)get_bsdthread_info(cur_act);
214
215 act_thread_catt(ut->uu_userstate);
216
217 /* Make sure only one at this time */
218 p->p_vforkcnt--;
219 if (p->p_vforkcnt <0)
220 panic("vfork cnt is -ve");
221 if (p->p_vforkcnt <=0)
222 p->p_flag &= ~P_VFORK;
223 ut->uu_userstate = 0;
224 ut->uu_flag &= ~P_VFORK;
225 ut->uu_proc = 0;
226 ut->uu_sigmask = ut->uu_vforkmask;
227 p2->p_flag &= ~P_INVFORK;
228 p2->p_vforkact = (void *)0;
229
230 thread_set_parent(cur_act, p2->p_pid);
231
232 if (retval) {
233 retval[0] = p2->p_pid;
234 retval[1] = 0; /* mark parent */
235 }
236
237 return;
238 }
239
240 thread_act_t
241 procdup(
242 struct proc *child,
243 struct proc *parent)
244 {
245 thread_act_t thread;
246 task_t task;
247 kern_return_t result;
248 extern task_t kernel_task;
249
250 if (parent->task == kernel_task)
251 result = task_create_local(TASK_NULL, FALSE, FALSE, &task);
252 else
253 result = task_create_local(parent->task, TRUE, FALSE, &task);
254 if (result != KERN_SUCCESS)
255 printf("fork/procdup: task_create failed. Code: 0x%x\n", result);
256 child->task = task;
257 /* task->proc = child; */
258 set_bsdtask_info(task, child);
259 if (child->p_nice != 0)
260 resetpriority(child);
261 result = thread_create(task, &thread);
262 if (result != KERN_SUCCESS)
263 printf("fork/procdup: thread_create failed. Code: 0x%x\n", result);
264
265 return(thread);
266 }
267
268
269 static int
270 fork1(p1, flags, retval)
271 struct proc *p1;
272 long flags;
273 register_t *retval;
274 {
275 register struct proc *p2;
276 register uid_t uid;
277 thread_act_t newth;
278 int s, count;
279 task_t t;
280
281 /*
282 * Although process entries are dynamically created, we still keep
283 * a global limit on the maximum number we will create. Don't allow
284 * a nonprivileged user to use the last process; don't let root
285 * exceed the limit. The variable nprocs is the current number of
286 * processes, maxproc is the limit.
287 */
288 uid = p1->p_cred->p_ruid;
289 if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) {
290 tablefull("proc");
291 retval[1] = 0;
292 return (EAGAIN);
293 }
294
295 /*
296 * Increment the count of procs running with this uid. Don't allow
297 * a nonprivileged user to exceed their current limit.
298 */
299 count = chgproccnt(uid, 1);
300 if (uid != 0 && count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur) {
301 (void)chgproccnt(uid, -1);
302 return (EAGAIN);
303 }
304
305 /* The newly created process comes with signal lock held */
306 newth = cloneproc(p1, 1);
307 thread_dup(newth);
308 /* p2 = newth->task->proc; */
309 p2 = (struct proc *)(get_bsdtask_info(get_threadtask(newth)));
310
311 thread_set_child(newth, p2->p_pid);
312
313 s = splhigh();
314 p2->p_stats->p_start = time;
315 splx(s);
316 p2->p_acflag = AFORK;
317
318 /*
319 * Preserve synchronization semantics of vfork. If waiting for
320 * child to exec or exit, set P_PPWAIT on child, and sleep on our
321 * proc (in case of exit).
322 */
323 if (flags == DOVFORK)
324 p2->p_flag |= P_PPWAIT;
325 /* drop the signal lock on the child */
326 signal_unlock(p2);
327
328 (void) thread_resume(newth);
329
330 /* drop the extra references we got during the creation */
331 if (t = (task_t)get_threadtask(newth)) {
332 task_deallocate(t);
333 }
334 act_deallocate(newth);
335
336 while (p2->p_flag & P_PPWAIT)
337 tsleep(p1, PWAIT, "ppwait", 0);
338
339 retval[0] = p2->p_pid;
340 retval[1] = 0; /* mark parent */
341
342 return (0);
343 }
344
345 /*
346 * cloneproc()
347 *
348 * Create a new process from a specified process.
349 * On return newly created child process has signal
350 * lock held to block delivery of signal to it if called with
351 * lock set. fork() code needs to explicity remove this lock
352 * before signals can be delivered
353 */
354 thread_act_t
355 cloneproc(p1, lock)
356 register struct proc *p1;
357 register int lock;
358 {
359 register struct proc *p2;
360 thread_act_t th;
361
362 p2 = (struct proc *)forkproc(p1,lock);
363
364
365 th = procdup(p2, p1); /* child, parent */
366
367 LIST_INSERT_AFTER(p1, p2, p_pglist);
368 p2->p_pptr = p1;
369 LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling);
370 LIST_INIT(&p2->p_children);
371 LIST_INSERT_HEAD(&allproc, p2, p_list);
372 LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
373 TAILQ_INIT(&p2->p_evlist);
374 /*
375 * Make child runnable, set start time.
376 */
377 p2->p_stat = SRUN;
378
379 return(th);
380 }
381
382 struct proc *
383 forkproc(p1, lock)
384 register struct proc *p1;
385 register int lock;
386 {
387 register struct proc *p2, *newproc;
388 static int nextpid = 0, pidchecked = 0;
389 thread_t th;
390
391 /* Allocate new proc. */
392 MALLOC_ZONE(newproc, struct proc *,
393 sizeof *newproc, M_PROC, M_WAITOK);
394 MALLOC_ZONE(newproc->p_cred, struct pcred *,
395 sizeof *newproc->p_cred, M_SUBPROC, M_WAITOK);
396 MALLOC_ZONE(newproc->p_stats, struct pstats *,
397 sizeof *newproc->p_stats, M_SUBPROC, M_WAITOK);
398 MALLOC_ZONE(newproc->p_sigacts, struct sigacts *,
399 sizeof *newproc->p_sigacts, M_SUBPROC, M_WAITOK);
400
401 /*
402 * Find an unused process ID. We remember a range of unused IDs
403 * ready to use (from nextpid+1 through pidchecked-1).
404 */
405 nextpid++;
406 retry:
407 /*
408 * If the process ID prototype has wrapped around,
409 * restart somewhat above 0, as the low-numbered procs
410 * tend to include daemons that don't exit.
411 */
412 if (nextpid >= PID_MAX) {
413 nextpid = 100;
414 pidchecked = 0;
415 }
416 if (nextpid >= pidchecked) {
417 int doingzomb = 0;
418
419 pidchecked = PID_MAX;
420 /*
421 * Scan the active and zombie procs to check whether this pid
422 * is in use. Remember the lowest pid that's greater
423 * than nextpid, so we can avoid checking for a while.
424 */
425 p2 = allproc.lh_first;
426 again:
427 for (; p2 != 0; p2 = p2->p_list.le_next) {
428 while (p2->p_pid == nextpid ||
429 p2->p_pgrp->pg_id == nextpid ||
430 p2->p_session->s_sid == nextpid) {
431 nextpid++;
432 if (nextpid >= pidchecked)
433 goto retry;
434 }
435 if (p2->p_pid > nextpid && pidchecked > p2->p_pid)
436 pidchecked = p2->p_pid;
437 if (p2->p_pgrp && p2->p_pgrp->pg_id > nextpid &&
438 pidchecked > p2->p_pgrp->pg_id)
439 pidchecked = p2->p_pgrp->pg_id;
440 if (p2->p_session->s_sid > nextpid &&
441 pidchecked > p2->p_session->s_sid)
442 pidchecked = p2->p_session->s_sid;
443 }
444 if (!doingzomb) {
445 doingzomb = 1;
446 p2 = zombproc.lh_first;
447 goto again;
448 }
449 }
450
451 nprocs++;
452 p2 = newproc;
453 p2->p_stat = SIDL;
454 p2->p_pid = nextpid;
455
456 /*
457 * Make a proc table entry for the new process.
458 * Start by zeroing the section of proc that is zero-initialized,
459 * then copy the section that is copied directly from the parent.
460 */
461 bzero(&p2->p_startzero,
462 (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero));
463 bcopy(&p1->p_startcopy, &p2->p_startcopy,
464 (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy));
465 p2->vm_shm = (void *)NULL; /* Make sure it is zero */
466
467 /*
468 * Duplicate sub-structures as needed.
469 * Increase reference counts on shared objects.
470 * The p_stats and p_sigacts substructs are set in vm_fork.
471 */
472 p2->p_flag = P_INMEM;
473 if (p1->p_flag & P_PROFIL)
474 startprofclock(p2);
475 bcopy(p1->p_cred, p2->p_cred, sizeof(*p2->p_cred));
476 p2->p_cred->p_refcnt = 1;
477 crhold(p1->p_ucred);
478 lockinit(&p2->p_cred->pc_lock, PLOCK, "proc cred", 0, 0);
479
480 /* bump references to the text vnode */
481 p2->p_textvp = p1->p_textvp;
482 if (p2->p_textvp)
483 VREF(p2->p_textvp);
484
485 p2->p_fd = fdcopy(p1);
486 if (p1->vm_shm) {
487 shmfork(p1,p2);
488 }
489 /*
490 * If p_limit is still copy-on-write, bump refcnt,
491 * otherwise get a copy that won't be modified.
492 * (If PL_SHAREMOD is clear, the structure is shared
493 * copy-on-write.)
494 */
495 if (p1->p_limit->p_lflags & PL_SHAREMOD)
496 p2->p_limit = limcopy(p1->p_limit);
497 else {
498 p2->p_limit = p1->p_limit;
499 p2->p_limit->p_refcnt++;
500 }
501
502 bzero(&p2->p_stats->pstat_startzero,
503 (unsigned) ((caddr_t)&p2->p_stats->pstat_endzero -
504 (caddr_t)&p2->p_stats->pstat_startzero));
505 bcopy(&p1->p_stats->pstat_startcopy, &p2->p_stats->pstat_startcopy,
506 ((caddr_t)&p2->p_stats->pstat_endcopy -
507 (caddr_t)&p2->p_stats->pstat_startcopy));
508
509 if (p1->p_sigacts != NULL)
510 (void)memcpy(p2->p_sigacts,
511 p1->p_sigacts, sizeof *p2->p_sigacts);
512 else
513 (void)memset(p2->p_sigacts, 0, sizeof *p2->p_sigacts);
514
515 if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
516 p2->p_flag |= P_CONTROLT;
517
518 p2->p_xstat = 0;
519 p2->p_ru = NULL;
520
521 p2->p_debugger = 0; /* don't inherit */
522 lockinit(&p2->signal_lock, PVM, "signal", 0, 0);
523 /* block all signals to reach the process */
524 if (lock)
525 signal_lock(p2);
526 p2->sigwait = FALSE;
527 p2->sigwait_thread = NULL;
528 p2->exit_thread = NULL;
529 p2->user_stack = p1->user_stack;
530 p2->p_xxxsigpending = 0;
531 p2->p_vforkcnt = 0;
532 p2->p_vforkact = 0;
533 TAILQ_INIT(&p2->p_uthlist);
534
535 #if KTRACE
536 /*
537 * Copy traceflag and tracefile if enabled.
538 * If not inherited, these were zeroed above.
539 */
540 if (p1->p_traceflag&KTRFAC_INHERIT) {
541 p2->p_traceflag = p1->p_traceflag;
542 if ((p2->p_tracep = p1->p_tracep) != NULL)
543 VREF(p2->p_tracep);
544 }
545 #endif
546 return(p2);
547
548 }
549
550 #include <kern/zalloc.h>
551
552 struct zone *uthread_zone;
553 int uthread_zone_inited = 0;
554
555 void
556 uthread_zone_init()
557 {
558 if (!uthread_zone_inited) {
559 uthread_zone = zinit(sizeof(struct uthread),
560 THREAD_MAX * sizeof(struct uthread),
561 THREAD_CHUNK * sizeof(struct uthread),
562 "uthreads");
563 uthread_zone_inited = 1;
564 }
565 }
566
567 void *
568 uthread_alloc(task_t task, thread_act_t thr_act )
569 {
570 struct proc *p;
571 struct uthread *uth, *uth_parent;
572 void *ut;
573 extern task_t kernel_task;
574 boolean_t funnel_state;
575
576 if (!uthread_zone_inited)
577 uthread_zone_init();
578
579 ut = (void *)zalloc(uthread_zone);
580 bzero(ut, sizeof(struct uthread));
581
582 if (task != kernel_task) {
583 uth = (struct uthread *)ut;
584 p = get_bsdtask_info(task);
585
586 funnel_state = thread_funnel_set(kernel_flock, TRUE);
587 uth_parent = (struct uthread *)get_bsdthread_info(current_act());
588 if (uth_parent) {
589 if (uth_parent->uu_flag & USAS_OLDMASK)
590 uth->uu_sigmask = uth_parent->uu_oldmask;
591 else
592 uth->uu_sigmask = uth_parent->uu_sigmask;
593 }
594 uth->uu_act = thr_act;
595 //signal_lock(p);
596 if (p)
597 TAILQ_INSERT_TAIL(&p->p_uthlist, uth, uu_list);
598 //signal_unlock(p);
599 (void)thread_funnel_set(kernel_flock, funnel_state);
600 }
601
602 return (ut);
603 }
604
605
606 void
607 uthread_free(task_t task, void *uthread, void * bsd_info)
608 {
609 struct _select *sel;
610 struct uthread *uth = (struct uthread *)uthread;
611 struct proc * p = (struct proc *)bsd_info;
612 extern task_t kernel_task;
613 int size;
614 boolean_t funnel_state;
615
616 sel = &uth->uu_state.ss_select;
617 /* cleanup the select bit space */
618 if (sel->nbytes) {
619 FREE(sel->ibits, M_TEMP);
620 FREE(sel->obits, M_TEMP);
621 }
622
623 if (sel->allocsize && uth->uu_wqsub){
624 kfree(uth->uu_wqsub, sel->allocsize);
625 sel->count = sel->nfcount = 0;
626 sel->allocsize = 0;
627 uth->uu_wqsub = 0;
628 sel->wql = 0;
629 }
630
631 if ((task != kernel_task) && p) {
632 funnel_state = thread_funnel_set(kernel_flock, TRUE);
633 //signal_lock(p);
634 TAILQ_REMOVE(&p->p_uthlist, uth, uu_list);
635 //signal_unlock(p);
636 (void)thread_funnel_set(kernel_flock, funnel_state);
637 }
638 /* and free the uthread itself */
639 zfree(uthread_zone, (vm_offset_t)uthread);
640 }