]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_fork.c
da5fec8f85d58155543f5292e72cb94982b80fd8
[apple/xnu.git] / bsd / kern / kern_fork.c
1 /*
2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
26 /*
27 * Copyright (c) 1982, 1986, 1989, 1991, 1993
28 * The Regents of the University of California. All rights reserved.
29 * (c) UNIX System Laboratories, Inc.
30 * All or some portions of this file are derived from material licensed
31 * to the University of California by American Telephone and Telegraph
32 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
33 * the permission of UNIX System Laboratories, Inc.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)kern_fork.c 8.8 (Berkeley) 2/14/95
64 */
65
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/filedesc.h>
69 #include <sys/kernel.h>
70 #include <sys/malloc.h>
71 #include <sys/proc.h>
72 #include <sys/user.h>
73 #include <sys/resourcevar.h>
74 #include <sys/vnode.h>
75 #include <sys/file.h>
76 #include <sys/acct.h>
77 #if KTRACE
78 #include <sys/ktrace.h>
79 #endif
80
81 #include <mach/mach_types.h>
82 #include <kern/mach_param.h>
83
84 #include <machine/spl.h>
85
86 thread_act_t cloneproc(struct proc *, int);
87 struct proc * forkproc(struct proc *, int);
88 thread_act_t procdup();
89
90 #define DOFORK 0x1 /* fork() system call */
91 #define DOVFORK 0x2 /* vfork() system call */
92 static int fork1(struct proc *, long, register_t *);
93
94 /*
95 * fork system call.
96 */
97 int
98 fork(p, uap, retval)
99 struct proc *p;
100 void *uap;
101 register_t *retval;
102 {
103 return (fork1(p, (long)DOFORK, retval));
104 }
105
106 /*
107 * vfork system call
108 */
109 int
110 vfork(p, uap, retval)
111 struct proc *p;
112 void *uap;
113 register_t *retval;
114 {
115 register struct proc * newproc;
116 register uid_t uid;
117 thread_act_t cur_act = (thread_act_t)current_act();
118 int count;
119 task_t t;
120 uthread_t ut;
121
122 /*
123 * Although process entries are dynamically created, we still keep
124 * a global limit on the maximum number we will create. Don't allow
125 * a nonprivileged user to use the last process; don't let root
126 * exceed the limit. The variable nprocs is the current number of
127 * processes, maxproc is the limit.
128 */
129 uid = p->p_cred->p_ruid;
130 if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) {
131 tablefull("proc");
132 retval[1] = 0;
133 return (EAGAIN);
134 }
135
136 /*
137 * Increment the count of procs running with this uid. Don't allow
138 * a nonprivileged user to exceed their current limit.
139 */
140 count = chgproccnt(uid, 1);
141 if (uid != 0 && count > p->p_rlimit[RLIMIT_NPROC].rlim_cur) {
142 (void)chgproccnt(uid, -1);
143 return (EAGAIN);
144 }
145
146 ut = (struct uthread *)get_bsdthread_info(cur_act);
147 if (ut->uu_flag & P_VFORK) {
148 printf("vfork called recursively by %s\n", p->p_comm);
149 return (EINVAL);
150 }
151 p->p_flag |= P_VFORK;
152 p->p_vforkcnt++;
153
154 /* The newly created process comes with signal lock held */
155 newproc = (struct proc *)forkproc(p,1);
156
157 LIST_INSERT_AFTER(p, newproc, p_pglist);
158 newproc->p_pptr = p;
159 newproc->task = p->task;
160 LIST_INSERT_HEAD(&p->p_children, newproc, p_sibling);
161 LIST_INIT(&newproc->p_children);
162 LIST_INSERT_HEAD(&allproc, newproc, p_list);
163 LIST_INSERT_HEAD(PIDHASH(newproc->p_pid), newproc, p_hash);
164 TAILQ_INIT(& newproc->p_evlist);
165 newproc->p_stat = SRUN;
166 newproc->p_flag |= P_INVFORK;
167 newproc->p_vforkact = cur_act;
168
169 ut->uu_flag |= P_VFORK;
170 ut->uu_proc = newproc;
171 ut->uu_userstate = (void *)act_thread_csave();
172 ut->uu_vforkmask = ut->uu_sigmask;
173
174 thread_set_child(cur_act, newproc->p_pid);
175
176 newproc->p_stats->p_start = time;
177 newproc->p_acflag = AFORK;
178
179 /*
180 * Preserve synchronization semantics of vfork. If waiting for
181 * child to exec or exit, set P_PPWAIT on child, and sleep on our
182 * proc (in case of exit).
183 */
184 newproc->p_flag |= P_PPWAIT;
185
186 /* drop the signal lock on the child */
187 signal_unlock(newproc);
188
189 retval[0] = newproc->p_pid;
190 retval[1] = 1; /* mark child */
191
192 return (0);
193 }
194
195 /*
196 * Return to parent vfork ehread()
197 */
198 void
199 vfork_return(th_act, p, p2, retval)
200 thread_act_t th_act;
201 struct proc * p;
202 struct proc *p2;
203 register_t *retval;
204 {
205 long flags;
206 register uid_t uid;
207 thread_t newth, self = current_thread();
208 thread_act_t cur_act = (thread_act_t)current_act();
209 int s, count;
210 task_t t;
211 uthread_t ut;
212
213 ut = (struct uthread *)get_bsdthread_info(cur_act);
214
215 act_thread_catt(ut->uu_userstate);
216
217 /* Make sure only one at this time */
218 p->p_vforkcnt--;
219 if (p->p_vforkcnt <0)
220 panic("vfork cnt is -ve");
221 if (p->p_vforkcnt <=0)
222 p->p_flag &= ~P_VFORK;
223 ut->uu_userstate = 0;
224 ut->uu_flag &= ~P_VFORK;
225 ut->uu_proc = 0;
226 ut->uu_sigmask = ut->uu_vforkmask;
227 p2->p_flag &= ~P_INVFORK;
228 p2->p_vforkact = (void *)0;
229
230 thread_set_parent(cur_act, p2->p_pid);
231
232 if (retval) {
233 retval[0] = p2->p_pid;
234 retval[1] = 0; /* mark parent */
235 }
236
237 return;
238 }
239
240 thread_act_t
241 procdup(
242 struct proc *child,
243 struct proc *parent)
244 {
245 thread_act_t thread;
246 task_t task;
247 kern_return_t result;
248 pmap_t pmap;
249 extern task_t kernel_task;
250
251 if (parent->task == kernel_task)
252 result = task_create_local(TASK_NULL, FALSE, FALSE, &task);
253 else
254 result = task_create_local(parent->task, TRUE, FALSE, &task);
255 if (result != KERN_SUCCESS)
256 printf("fork/procdup: task_create failed. Code: 0x%x\n", result);
257 child->task = task;
258 /* task->proc = child; */
259 set_bsdtask_info(task, child);
260 if (child->p_nice != 0)
261 resetpriority(child);
262
263 result = thread_create(task, &thread);
264 if (result != KERN_SUCCESS)
265 printf("fork/procdup: thread_create failed. Code: 0x%x\n", result);
266
267 return(thread);
268 }
269
270
271 static int
272 fork1(p1, flags, retval)
273 struct proc *p1;
274 long flags;
275 register_t *retval;
276 {
277 register struct proc *p2;
278 register uid_t uid;
279 thread_act_t newth;
280 int s, count;
281 task_t t;
282
283 /*
284 * Although process entries are dynamically created, we still keep
285 * a global limit on the maximum number we will create. Don't allow
286 * a nonprivileged user to use the last process; don't let root
287 * exceed the limit. The variable nprocs is the current number of
288 * processes, maxproc is the limit.
289 */
290 uid = p1->p_cred->p_ruid;
291 if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) {
292 tablefull("proc");
293 retval[1] = 0;
294 return (EAGAIN);
295 }
296
297 /*
298 * Increment the count of procs running with this uid. Don't allow
299 * a nonprivileged user to exceed their current limit.
300 */
301 count = chgproccnt(uid, 1);
302 if (uid != 0 && count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur) {
303 (void)chgproccnt(uid, -1);
304 return (EAGAIN);
305 }
306
307 /* The newly created process comes with signal lock held */
308 newth = cloneproc(p1, 1);
309 thread_dup(newth);
310 /* p2 = newth->task->proc; */
311 p2 = (struct proc *)(get_bsdtask_info(get_threadtask(newth)));
312
313 thread_set_child(newth, p2->p_pid);
314
315 s = splhigh();
316 p2->p_stats->p_start = time;
317 splx(s);
318 p2->p_acflag = AFORK;
319
320 /*
321 * Preserve synchronization semantics of vfork. If waiting for
322 * child to exec or exit, set P_PPWAIT on child, and sleep on our
323 * proc (in case of exit).
324 */
325 if (flags == DOVFORK)
326 p2->p_flag |= P_PPWAIT;
327 /* drop the signal lock on the child */
328 signal_unlock(p2);
329
330 (void) thread_resume(newth);
331
332 /* drop the extra references we got during the creation */
333 if (t = (task_t)get_threadtask(newth)) {
334 task_deallocate(t);
335 }
336 act_deallocate(newth);
337
338 while (p2->p_flag & P_PPWAIT)
339 tsleep(p1, PWAIT, "ppwait", 0);
340
341 retval[0] = p2->p_pid;
342 retval[1] = 0; /* mark parent */
343
344 return (0);
345 }
346
347 /*
348 * cloneproc()
349 *
350 * Create a new process from a specified process.
351 * On return newly created child process has signal
352 * lock held to block delivery of signal to it if called with
353 * lock set. fork() code needs to explicity remove this lock
354 * before signals can be delivered
355 */
356 thread_act_t
357 cloneproc(p1, lock)
358 register struct proc *p1;
359 register int lock;
360 {
361 register struct proc *p2;
362 thread_act_t th;
363
364 p2 = (struct proc *)forkproc(p1,lock);
365
366
367 th = procdup(p2, p1); /* child, parent */
368
369 LIST_INSERT_AFTER(p1, p2, p_pglist);
370 p2->p_pptr = p1;
371 LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling);
372 LIST_INIT(&p2->p_children);
373 LIST_INSERT_HEAD(&allproc, p2, p_list);
374 LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
375 TAILQ_INIT(&p2->p_evlist);
376 /*
377 * Make child runnable, set start time.
378 */
379 p2->p_stat = SRUN;
380
381 return(th);
382 }
383
384 struct proc *
385 forkproc(p1, lock)
386 register struct proc *p1;
387 register int lock;
388 {
389 register struct proc *p2, *newproc;
390 static int nextpid = 0, pidchecked = 0;
391 thread_t th;
392
393 /* Allocate new proc. */
394 MALLOC_ZONE(newproc, struct proc *,
395 sizeof *newproc, M_PROC, M_WAITOK);
396 MALLOC_ZONE(newproc->p_cred, struct pcred *,
397 sizeof *newproc->p_cred, M_SUBPROC, M_WAITOK);
398 MALLOC_ZONE(newproc->p_stats, struct pstats *,
399 sizeof *newproc->p_stats, M_SUBPROC, M_WAITOK);
400 MALLOC_ZONE(newproc->p_sigacts, struct sigacts *,
401 sizeof *newproc->p_sigacts, M_SUBPROC, M_WAITOK);
402
403 /*
404 * Find an unused process ID. We remember a range of unused IDs
405 * ready to use (from nextpid+1 through pidchecked-1).
406 */
407 nextpid++;
408 retry:
409 /*
410 * If the process ID prototype has wrapped around,
411 * restart somewhat above 0, as the low-numbered procs
412 * tend to include daemons that don't exit.
413 */
414 if (nextpid >= PID_MAX) {
415 nextpid = 100;
416 pidchecked = 0;
417 }
418 if (nextpid >= pidchecked) {
419 int doingzomb = 0;
420
421 pidchecked = PID_MAX;
422 /*
423 * Scan the active and zombie procs to check whether this pid
424 * is in use. Remember the lowest pid that's greater
425 * than nextpid, so we can avoid checking for a while.
426 */
427 p2 = allproc.lh_first;
428 again:
429 for (; p2 != 0; p2 = p2->p_list.le_next) {
430 while (p2->p_pid == nextpid ||
431 p2->p_pgrp->pg_id == nextpid ||
432 p2->p_session->s_sid == nextpid) {
433 nextpid++;
434 if (nextpid >= pidchecked)
435 goto retry;
436 }
437 if (p2->p_pid > nextpid && pidchecked > p2->p_pid)
438 pidchecked = p2->p_pid;
439 if (p2->p_pgrp && p2->p_pgrp->pg_id > nextpid &&
440 pidchecked > p2->p_pgrp->pg_id)
441 pidchecked = p2->p_pgrp->pg_id;
442 if (p2->p_session->s_sid > nextpid &&
443 pidchecked > p2->p_session->s_sid)
444 pidchecked = p2->p_session->s_sid;
445 }
446 if (!doingzomb) {
447 doingzomb = 1;
448 p2 = zombproc.lh_first;
449 goto again;
450 }
451 }
452
453 nprocs++;
454 p2 = newproc;
455 p2->p_stat = SIDL;
456 p2->p_pid = nextpid;
457
458 /*
459 * Make a proc table entry for the new process.
460 * Start by zeroing the section of proc that is zero-initialized,
461 * then copy the section that is copied directly from the parent.
462 */
463 bzero(&p2->p_startzero,
464 (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero));
465 bcopy(&p1->p_startcopy, &p2->p_startcopy,
466 (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy));
467 p2->vm_shm = (void *)NULL; /* Make sure it is zero */
468
469 /*
470 * Duplicate sub-structures as needed.
471 * Increase reference counts on shared objects.
472 * The p_stats and p_sigacts substructs are set in vm_fork.
473 */
474 p2->p_flag = P_INMEM;
475 if (p1->p_flag & P_PROFIL)
476 startprofclock(p2);
477 bcopy(p1->p_cred, p2->p_cred, sizeof(*p2->p_cred));
478 p2->p_cred->p_refcnt = 1;
479 crhold(p1->p_ucred);
480 lockinit(&p2->p_cred->pc_lock, PLOCK, "proc cred", 0, 0);
481
482 /* bump references to the text vnode */
483 p2->p_textvp = p1->p_textvp;
484 if (p2->p_textvp)
485 VREF(p2->p_textvp);
486
487 p2->p_fd = fdcopy(p1);
488 if (p1->vm_shm) {
489 shmfork(p1,p2);
490 }
491 /*
492 * If p_limit is still copy-on-write, bump refcnt,
493 * otherwise get a copy that won't be modified.
494 * (If PL_SHAREMOD is clear, the structure is shared
495 * copy-on-write.)
496 */
497 if (p1->p_limit->p_lflags & PL_SHAREMOD)
498 p2->p_limit = limcopy(p1->p_limit);
499 else {
500 p2->p_limit = p1->p_limit;
501 p2->p_limit->p_refcnt++;
502 }
503
504 bzero(&p2->p_stats->pstat_startzero,
505 (unsigned) ((caddr_t)&p2->p_stats->pstat_endzero -
506 (caddr_t)&p2->p_stats->pstat_startzero));
507 bcopy(&p1->p_stats->pstat_startcopy, &p2->p_stats->pstat_startcopy,
508 ((caddr_t)&p2->p_stats->pstat_endcopy -
509 (caddr_t)&p2->p_stats->pstat_startcopy));
510
511 if (p1->p_sigacts != NULL)
512 (void)memcpy(p2->p_sigacts,
513 p1->p_sigacts, sizeof *p2->p_sigacts);
514 else
515 (void)memset(p2->p_sigacts, 0, sizeof *p2->p_sigacts);
516
517 if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
518 p2->p_flag |= P_CONTROLT;
519
520 p2->p_xstat = 0;
521 p2->p_ru = NULL;
522
523 p2->p_debugger = 0; /* don't inherit */
524 lockinit(&p2->signal_lock, PVM, "signal", 0, 0);
525 /* block all signals to reach the process */
526 if (lock)
527 signal_lock(p2);
528 p2->sigwait = FALSE;
529 p2->sigwait_thread = NULL;
530 p2->exit_thread = NULL;
531 p2->user_stack = p1->user_stack;
532 p2->p_xxxsigpending = 0;
533 p2->p_vforkcnt = 0;
534 p2->p_vforkact = 0;
535 TAILQ_INIT(&p2->p_uthlist);
536
537 #if KTRACE
538 /*
539 * Copy traceflag and tracefile if enabled.
540 * If not inherited, these were zeroed above.
541 */
542 if (p1->p_traceflag&KTRFAC_INHERIT) {
543 p2->p_traceflag = p1->p_traceflag;
544 if ((p2->p_tracep = p1->p_tracep) != NULL)
545 VREF(p2->p_tracep);
546 }
547 #endif
548 return(p2);
549
550 }
551
552 #include <kern/zalloc.h>
553
554 struct zone *uthread_zone;
555 int uthread_zone_inited = 0;
556
557 void
558 uthread_zone_init()
559 {
560 if (!uthread_zone_inited) {
561 uthread_zone = zinit(sizeof(struct uthread),
562 THREAD_MAX * sizeof(struct uthread),
563 THREAD_CHUNK * sizeof(struct uthread),
564 "uthreads");
565 uthread_zone_inited = 1;
566 }
567 }
568
569 void *
570 uthread_alloc(task_t task, thread_act_t thr_act )
571 {
572 struct proc *p;
573 struct uthread *uth, *uth_parent;
574 void *ut;
575 extern task_t kernel_task;
576 boolean_t funnel_state;
577
578 if (!uthread_zone_inited)
579 uthread_zone_init();
580
581 ut = (void *)zalloc(uthread_zone);
582 bzero(ut, sizeof(struct uthread));
583
584 if (task != kernel_task) {
585 uth = (struct uthread *)ut;
586 p = get_bsdtask_info(task);
587
588 funnel_state = thread_funnel_set(kernel_flock, TRUE);
589 uth_parent = (struct uthread *)get_bsdthread_info(current_act());
590 if (uth_parent) {
591 if (uth_parent->uu_flag & USAS_OLDMASK)
592 uth->uu_sigmask = uth_parent->uu_oldmask;
593 else
594 uth->uu_sigmask = uth_parent->uu_sigmask;
595 }
596 uth->uu_act = thr_act;
597 //signal_lock(p);
598 if (p)
599 TAILQ_INSERT_TAIL(&p->p_uthlist, uth, uu_list);
600 //signal_unlock(p);
601 (void)thread_funnel_set(kernel_flock, funnel_state);
602 }
603
604 return (ut);
605 }
606
607
608 void
609 uthread_free(task_t task, void *uthread, void * bsd_info)
610 {
611 struct _select *sel;
612 struct uthread *uth = (struct uthread *)uthread;
613 struct proc * p = (struct proc *)bsd_info;
614 extern task_t kernel_task;
615 int size;
616 boolean_t funnel_state;
617
618 sel = &uth->uu_state.ss_select;
619 /* cleanup the select bit space */
620 if (sel->nbytes) {
621 FREE(sel->ibits, M_TEMP);
622 FREE(sel->obits, M_TEMP);
623 }
624
625 if (sel->allocsize && uth->uu_wqsub){
626 kfree(uth->uu_wqsub, sel->allocsize);
627 sel->count = sel->nfcount = 0;
628 sel->allocsize = 0;
629 uth->uu_wqsub = 0;
630 sel->wql = 0;
631 }
632
633 if ((task != kernel_task) && p) {
634 funnel_state = thread_funnel_set(kernel_flock, TRUE);
635 //signal_lock(p);
636 TAILQ_REMOVE(&p->p_uthlist, uth, uu_list);
637 //signal_unlock(p);
638 (void)thread_funnel_set(kernel_flock, funnel_state);
639 }
640 /* and free the uthread itself */
641 zfree(uthread_zone, (vm_offset_t)uthread);
642 }