]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_exit.c
c866e46c232ee573d5a11ad130c7fa0ffa87e01c
[apple/xnu.git] / bsd / kern / kern_exit.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
24 /*
25 * Copyright (c) 1982, 1986, 1989, 1991, 1993
26 * The Regents of the University of California. All rights reserved.
27 * (c) UNIX System Laboratories, Inc.
28 * All or some portions of this file are derived from material licensed
29 * to the University of California by American Telephone and Telegraph
30 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
31 * the permission of UNIX System Laboratories, Inc.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94
62 */
63
64 #include <machine/reg.h>
65 #include <machine/psl.h>
66
67 #include "compat_43.h"
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/ioctl.h>
72 #include <sys/proc_internal.h>
73 #include <sys/kauth.h>
74 #include <sys/tty.h>
75 #include <sys/time.h>
76 #include <sys/resource.h>
77 #include <sys/kernel.h>
78 #include <sys/wait.h>
79 #include <sys/file_internal.h>
80 #include <sys/vnode_internal.h>
81 #include <sys/syslog.h>
82 #include <sys/malloc.h>
83 #include <sys/resourcevar.h>
84 #include <sys/ptrace.h>
85 #include <sys/user.h>
86 #include <sys/aio_kern.h>
87 #include <sys/sysproto.h>
88 #include <sys/signalvar.h>
89 #include <sys/filedesc.h> /* fdfree */
90 #include <sys/shm_internal.h> /* shmexit */
91 #include <sys/acct.h> /* acct_process */
92 #include <machine/spl.h>
93
94 #include <bsm/audit_kernel.h>
95 #include <bsm/audit_kevents.h>
96
97 #include <mach/mach_types.h>
98
99 #include <kern/kern_types.h>
100 #include <kern/kalloc.h>
101 #include <kern/task.h>
102 #include <kern/thread.h>
103 #include <kern/sched_prim.h>
104 #include <kern/assert.h>
105 #if KTRACE
106 #include <sys/ktrace.h>
107 #endif
108
109 #include <mach/mach_types.h>
110 #include <mach/task.h>
111 #include <mach/thread_act.h>
112 #include <mach/mach_traps.h> /* init_process */
113
114 extern char init_task_failure_data[];
115 int exit1(struct proc *, int, int *);
116 void proc_prepareexit(struct proc *p);
117 void vfork_exit(struct proc *p, int rv);
118 void vproc_exit(struct proc *p);
119 __private_extern__ void munge_rusage(struct rusage *a_rusage_p, struct user_rusage *a_user_rusage_p);
120
121 /*
122 * Things which should have prototypes in headers, but don't
123 */
124 void unix_syscall_return(int);
125 void *get_bsduthreadarg(thread_t);
126 void proc_exit(struct proc *p);
127 int wait1continue(int result);
128 int waitidcontinue(int result);
129 int *get_bsduthreadrval(thread_t);
130 kern_return_t sys_perf_notify(struct task *task, exception_data_t code,
131 mach_msg_type_number_t codeCnt);
132
133 /*
134 * NOTE: Source and target may *NOT* overlap!
135 * XXX Should share code with bsd/dev/ppc/unix_signal.c
136 */
137 static void
138 siginfo_64to32(user_siginfo_t *in, siginfo_t *out)
139 {
140 out->si_signo = in->si_signo;
141 out->si_errno = in->si_errno;
142 out->si_code = in->si_code;
143 out->si_pid = in->si_pid;
144 out->si_uid = in->si_uid;
145 out->si_status = in->si_status;
146 out->si_addr = CAST_DOWN(void *,in->si_addr);
147 /* following cast works for sival_int because of padding */
148 out->si_value.sival_ptr = CAST_DOWN(void *,in->si_value.sival_ptr);
149 out->si_band = in->si_band; /* range reduction */
150 out->pad[0] = in->pad[0]; /* mcontext.ss.r1 */
151 }
152
153 /*
154 * exit --
155 * Death of process.
156 */
157 void
158 exit(struct proc *p, struct exit_args *uap, int *retval)
159 {
160 exit1(p, W_EXITCODE(uap->rval, 0), retval);
161
162 /* drop funnel before we return */
163 thread_funnel_set(kernel_flock, FALSE);
164 thread_exception_return();
165 /* NOTREACHED */
166 while (TRUE)
167 thread_block(THREAD_CONTINUE_NULL);
168 /* NOTREACHED */
169 }
170
171 /*
172 * Exit: deallocate address space and other resources, change proc state
173 * to zombie, and unlink proc from allproc and parent's lists. Save exit
174 * status and rusage for wait(). Check for child processes and orphan them.
175 */
176 int
177 exit1(struct proc *p, int rv, int *retval)
178 {
179 thread_t self = current_thread();
180 struct task *task = p->task;
181 register int s;
182 struct uthread *ut;
183
184 /*
185 * If a thread in this task has already
186 * called exit(), then halt any others
187 * right here.
188 */
189
190 ut = get_bsdthread_info(self);
191 if (ut->uu_flag & UT_VFORK) {
192 vfork_exit(p, rv);
193 vfork_return(self, p->p_pptr, p , retval);
194 unix_syscall_return(0);
195 /* NOT REACHED */
196 }
197 AUDIT_SYSCALL_EXIT(0, p, ut); /* Exit is always successfull */
198 signal_lock(p);
199 while (p->exit_thread != self) {
200 if (sig_try_locked(p) <= 0) {
201 if (get_threadtask(self) != task) {
202 signal_unlock(p);
203 return(0);
204 }
205 signal_unlock(p);
206 thread_terminate(self);
207 thread_funnel_set(kernel_flock, FALSE);
208 thread_exception_return();
209 /* NOTREACHED */
210 }
211 sig_lock_to_exit(p);
212 }
213 signal_unlock(p);
214 if (p->p_pid == 1) {
215 printf("pid 1 exited (signal %d, exit %d)",
216 WTERMSIG(rv), WEXITSTATUS(rv));
217 panic("init died\nState at Last Exception:\n\n%s",
218 init_task_failure_data);
219 }
220
221 s = splsched();
222 p->p_flag |= P_WEXIT;
223 splx(s);
224 proc_prepareexit(p);
225 p->p_xstat = rv;
226
227 /* task terminate will call proc_terminate and that cleans it up */
228 task_terminate_internal(task);
229
230 return(0);
231 }
232
233 void
234 proc_prepareexit(struct proc *p)
235 {
236 struct uthread *ut;
237 exception_data_t code[EXCEPTION_CODE_MAX];
238 thread_t self = current_thread();
239
240 code[0] = (exception_data_t)0xFF000001; /* Set terminate code */
241 code[1] = (exception_data_t)p->p_pid; /* Pass out the pid */
242 /* Notify the perf server */
243 (void)sys_perf_notify(p->task, (exception_data_t)&code, 2);
244
245 /*
246 * Remove proc from allproc queue and from pidhash chain.
247 * Need to do this before we do anything that can block.
248 * Not doing causes things like mount() find this on allproc
249 * in partially cleaned state.
250 */
251 LIST_REMOVE(p, p_list);
252 LIST_INSERT_HEAD(&zombproc, p, p_list); /* Place onto zombproc. */
253 LIST_REMOVE(p, p_hash);
254
255 #ifdef PGINPROF
256 vmsizmon();
257 #endif
258 /*
259 * If parent is waiting for us to exit or exec,
260 * P_PPWAIT is set; we will wakeup the parent below.
261 */
262 p->p_flag &= ~(P_TRACED | P_PPWAIT);
263 p->p_sigignore = ~(sigcantmask);
264 p->p_siglist = 0;
265 ut = get_bsdthread_info(self);
266 ut->uu_siglist = 0;
267 untimeout(realitexpire, (caddr_t)p->p_pid);
268 }
269
270 void
271 proc_exit(struct proc *p)
272 {
273 register struct proc *q, *nq, *pp;
274 struct task *task = p->task;
275 register int s;
276 boolean_t funnel_state;
277
278 /* This can happen if thread_terminate of the single thread
279 * process
280 */
281
282 funnel_state = thread_funnel_set(kernel_flock, TRUE);
283 if( !(p->p_flag & P_WEXIT)) {
284 s = splsched();
285 p->p_flag |= P_WEXIT;
286 splx(s);
287 proc_prepareexit(p);
288 }
289
290 p->p_lflag |= P_LPEXIT;
291 /* XXX Zombie allocation may fail, in which case stats get lost */
292 MALLOC_ZONE(p->p_ru, struct rusage *,
293 sizeof (*p->p_ru), M_ZOMBIE, M_WAITOK);
294
295 /*
296 * need to cancel async IO requests that can be cancelled and wait for those
297 * already active. MAY BLOCK!
298 */
299
300 p->p_lflag |= P_LREFDRAIN;
301 while (p->p_internalref) {
302 p->p_lflag |= P_LREFDRAINWAIT;
303 msleep(&p->p_internalref, (lck_mtx_t *)0, 0, "proc_refdrain", 0) ;
304 }
305 p->p_lflag &= ~P_LREFDRAIN;
306 p->p_lflag |= P_LREFDEAD;
307
308 _aio_exit( p );
309
310 /*
311 * Close open files and release open-file table.
312 * This may block!
313 */
314 fdfree(p);
315
316 /* Close ref SYSV Shared memory*/
317 if (p->vm_shm)
318 shmexit(p);
319 /* Release SYSV semaphores */
320 semexit(p);
321
322 if (SESS_LEADER(p)) {
323 register struct session *sp = p->p_session;
324
325 if (sp->s_ttyvp) {
326 struct vnode *ttyvp;
327 struct vfs_context context;
328
329 /*
330 * Controlling process.
331 * Signal foreground pgrp,
332 * drain controlling terminal
333 * and revoke access to controlling terminal.
334 */
335 if (sp->s_ttyp->t_session == sp) {
336 if (sp->s_ttyp->t_pgrp)
337 pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1);
338 (void) ttywait(sp->s_ttyp);
339 /*
340 * The tty could have been revoked
341 * if we blocked.
342 */
343 context.vc_proc = p;
344 context.vc_ucred = p->p_ucred;
345 if (sp->s_ttyvp)
346 VNOP_REVOKE(sp->s_ttyvp, REVOKEALL, &context);
347 }
348 ttyvp = sp->s_ttyvp;
349 sp->s_ttyvp = NULL;
350 if (ttyvp) {
351 vnode_rele(ttyvp);
352 }
353 /*
354 * s_ttyp is not zero'd; we use this to indicate
355 * that the session once had a controlling terminal.
356 * (for logging and informational purposes)
357 */
358 }
359 sp->s_leader = NULL;
360 }
361
362 fixjobc(p, p->p_pgrp, 0);
363 p->p_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
364 (void)acct_process(p);
365
366 #if KTRACE
367 /*
368 * release trace file
369 */
370 p->p_traceflag = 0; /* don't trace the vnode_put() */
371 if (p->p_tracep) {
372 struct vnode *tvp = p->p_tracep;
373 p->p_tracep = NULL;
374 vnode_rele(tvp);
375 }
376 #endif
377
378 while (q = p->p_children.lh_first) {
379 proc_reparent(q, initproc);
380 /*
381 * Traced processes are killed
382 * since their existence means someone is messing up.
383 */
384 if (q->p_flag & P_TRACED) {
385 q->p_flag &= ~P_TRACED;
386 if (q->sigwait_thread) {
387 /*
388 * The sigwait_thread could be stopped at a
389 * breakpoint. Wake it up to kill.
390 * Need to do this as it could be a thread which is not
391 * the first thread in the task. So any attempts to kill
392 * the process would result into a deadlock on q->sigwait.
393 */
394 thread_resume((thread_t)q->sigwait_thread);
395 clear_wait(q->sigwait_thread, THREAD_INTERRUPTED);
396 threadsignal((thread_t)q->sigwait_thread, SIGKILL, 0);
397 }
398 psignal(q, SIGKILL);
399 }
400 }
401
402 /*
403 * Save exit status and final rusage info, adding in child rusage
404 * info and self times. If we were unable to allocate a zombie
405 * structure, this information is lost.
406 */
407 if (p->p_ru != NULL) {
408 *p->p_ru = p->p_stats->p_ru;
409
410 timerclear(&p->p_ru->ru_utime);
411 timerclear(&p->p_ru->ru_stime);
412
413 if (task) {
414 task_basic_info_data_t tinfo;
415 task_thread_times_info_data_t ttimesinfo;
416 int task_info_stuff, task_ttimes_stuff;
417 struct timeval ut,st;
418
419 task_info_stuff = TASK_BASIC_INFO_COUNT;
420 task_info(task, TASK_BASIC_INFO,
421 (task_info_t)&tinfo, &task_info_stuff);
422 p->p_ru->ru_utime.tv_sec = tinfo.user_time.seconds;
423 p->p_ru->ru_utime.tv_usec = tinfo.user_time.microseconds;
424 p->p_ru->ru_stime.tv_sec = tinfo.system_time.seconds;
425 p->p_ru->ru_stime.tv_usec = tinfo.system_time.microseconds;
426
427 task_ttimes_stuff = TASK_THREAD_TIMES_INFO_COUNT;
428 task_info(task, TASK_THREAD_TIMES_INFO,
429 (task_info_t)&ttimesinfo, &task_ttimes_stuff);
430
431 ut.tv_sec = ttimesinfo.user_time.seconds;
432 ut.tv_usec = ttimesinfo.user_time.microseconds;
433 st.tv_sec = ttimesinfo.system_time.seconds;
434 st.tv_usec = ttimesinfo.system_time.microseconds;
435 timeradd(&ut,&p->p_ru->ru_utime,&p->p_ru->ru_utime);
436 timeradd(&st,&p->p_ru->ru_stime,&p->p_ru->ru_stime);
437 }
438
439 ruadd(p->p_ru, &p->p_stats->p_cru);
440 }
441
442 /*
443 * Free up profiling buffers.
444 */
445 {
446 struct uprof *p0 = &p->p_stats->p_prof, *p1, *pn;
447
448 p1 = p0->pr_next;
449 p0->pr_next = NULL;
450 p0->pr_scale = 0;
451
452 for (; p1 != NULL; p1 = pn) {
453 pn = p1->pr_next;
454 kfree(p1, sizeof *p1);
455 }
456 }
457
458 /*
459 * Other substructures are freed from wait().
460 */
461 FREE_ZONE(p->p_stats, sizeof *p->p_stats, M_SUBPROC);
462 p->p_stats = NULL;
463
464 FREE_ZONE(p->p_sigacts, sizeof *p->p_sigacts, M_SUBPROC);
465 p->p_sigacts = NULL;
466
467 if (--p->p_limit->p_refcnt == 0)
468 FREE_ZONE(p->p_limit, sizeof *p->p_limit, M_SUBPROC);
469 p->p_limit = NULL;
470
471 /*
472 * Finish up by terminating the task
473 * and halt this thread (only if a
474 * member of the task exiting).
475 */
476 p->task = TASK_NULL;
477 //task->proc = NULL;
478 set_bsdtask_info(task, NULL);
479
480 KNOTE(&p->p_klist, NOTE_EXIT);
481
482 /*
483 * Notify parent that we're gone.
484 */
485 if (p->p_pptr->p_flag & P_NOCLDWAIT) {
486 struct proc *opp = p->p_pptr;
487
488 /*
489 * Add child resource usage to parent before giving
490 * zombie to init. If we were unable to allocate a
491 * zombie structure, this information is lost.
492 */
493 if (p->p_ru != NULL)
494 ruadd(&p->p_pptr->p_stats->p_cru, p->p_ru);
495
496 proc_reparent(p, initproc);
497 /* If there are no more children wakeup parent */
498 if (LIST_EMPTY(&opp->p_children))
499 wakeup((caddr_t)opp);
500 }
501 /* should be fine as parent proc would be initproc */
502 pp = p->p_pptr;
503 if (pp != initproc) {
504 pp->si_pid = p->p_pid;
505 pp->si_status = p->p_xstat;
506 pp->si_code = CLD_EXITED;
507 pp->si_uid = p->p_ucred->cr_ruid;
508 }
509 /* mark as a zombie */
510 p->p_stat = SZOMB;
511
512 psignal(pp, SIGCHLD);
513
514 /* and now wakeup the parent */
515 wakeup((caddr_t)p->p_pptr);
516
517 (void) thread_funnel_set(kernel_flock, funnel_state);
518 }
519
520
521 /*
522 * reap_child_process
523 *
524 * Description: Given a process from which all status information needed
525 * has already been extracted, if the process is a ptrace
526 * attach process, detach it and give it back to its real
527 * parent, else recover all resources remaining associated
528 * with it.
529 *
530 * Parameters: struct proc *parent Parent of process being reaped
531 * struct proc *child Process to reap
532 *
533 * Returns: 0 Process was not reaped because it
534 * came from an attach
535 * 1 Process was reaped
536 */
537 static int
538 reap_child_process(struct proc *parent, struct proc *child)
539 {
540 struct proc *trace_parent; /* Traced parent process, if tracing */
541 struct vnode *tvp; /* Traced vnode pointer, if used */
542
543 /*
544 * If we got the child via a ptrace 'attach',
545 * we need to give it back to the old parent.
546 */
547 if (child->p_oppid && (trace_parent = pfind(child->p_oppid))) {
548 child->p_oppid = 0;
549 proc_reparent(child, trace_parent);
550 if (trace_parent != initproc) {
551 trace_parent->si_pid = child->p_pid;
552 trace_parent->si_status = child->p_xstat;
553 trace_parent->si_code = CLD_CONTINUED;
554 trace_parent->si_uid = child->p_ucred->cr_ruid;
555 }
556 psignal(trace_parent, SIGCHLD);
557 wakeup((caddr_t)trace_parent);
558 return (0);
559 }
560 child->p_xstat = 0;
561 if (child->p_ru) {
562 ruadd(&parent->p_stats->p_cru, child->p_ru);
563 FREE_ZONE(child->p_ru, sizeof *child->p_ru, M_ZOMBIE);
564 child->p_ru = NULL;
565 } else {
566 printf("Warning : lost p_ru for %s\n", child->p_comm);
567 }
568
569 /*
570 * Decrement the count of procs running with this uid.
571 */
572 (void)chgproccnt(child->p_ucred->cr_ruid, -1);
573
574 /*
575 * Free up credentials.
576 */
577 if (child->p_ucred != NOCRED) {
578 kauth_cred_t ucr = child->p_ucred;
579 child->p_ucred = NOCRED;
580 kauth_cred_rele(ucr);
581 }
582
583 /*
584 * Release reference to text vnode
585 */
586 tvp = child->p_textvp;
587 child->p_textvp = NULL;
588 if (tvp) {
589 vnode_rele(tvp);
590 }
591 /*
592 * Finally finished with old proc entry.
593 * Unlink it from its process group and free it.
594 */
595 leavepgrp(child);
596 LIST_REMOVE(child, p_list); /* off zombproc */
597 LIST_REMOVE(child, p_sibling);
598 child->p_lflag &= ~P_LWAITING;
599 wakeup(&child->p_stat);
600
601 lck_mtx_destroy(&child->p_mlock, proc_lck_grp);
602 lck_mtx_destroy(&child->p_fdmlock, proc_lck_grp);
603 FREE_ZONE(child, sizeof *child, M_PROC);
604 nprocs--;
605 return (1);
606 }
607
608
609 int
610 wait1continue(int result)
611 {
612 void *vt;
613 thread_t thread;
614 int *retval;
615 struct proc *p;
616
617 if (result)
618 return(result);
619
620 p = current_proc();
621 thread = current_thread();
622 vt = get_bsduthreadarg(thread);
623 retval = get_bsduthreadrval(thread);
624 return(wait4((struct proc *)p, (struct wait4_args *)vt, retval));
625 }
626
627 int
628 wait4(struct proc *q, struct wait4_args *uap, register_t *retval)
629 {
630 register int nfound;
631 register struct proc *p;
632 int status, error;
633
634 if (uap->pid == 0)
635 uap->pid = -q->p_pgid;
636
637 loop:
638 nfound = 0;
639 for (p = q->p_children.lh_first; p != 0; p = p->p_sibling.le_next) {
640 if (uap->pid != WAIT_ANY &&
641 p->p_pid != uap->pid &&
642 p->p_pgid != -(uap->pid))
643 continue;
644 nfound++;
645
646 /* XXX This is racy because we don't get the lock!!!! */
647
648 if (p->p_lflag & P_LWAITING) {
649 (void)tsleep(&p->p_stat, PWAIT, "waitcoll", 0);
650 goto loop;
651 }
652 p->p_lflag |= P_LWAITING; /* only allow single thread to wait() */
653
654 if (p->p_stat == SZOMB) {
655 retval[0] = p->p_pid;
656 if (uap->status) {
657 status = p->p_xstat; /* convert to int */
658 error = copyout((caddr_t)&status,
659 uap->status,
660 sizeof(status));
661 if (error) {
662 p->p_lflag &= ~P_LWAITING;
663 wakeup(&p->p_stat);
664 return (error);
665 }
666 }
667 if (uap->rusage) {
668 if (p->p_ru == NULL) {
669 error = ENOMEM;
670 } else {
671 if (IS_64BIT_PROCESS(q)) {
672 struct user_rusage my_rusage;
673 munge_rusage(p->p_ru, &my_rusage);
674 error = copyout((caddr_t)&my_rusage,
675 uap->rusage,
676 sizeof (my_rusage));
677 }
678 else {
679 error = copyout((caddr_t)p->p_ru,
680 uap->rusage,
681 sizeof (struct rusage));
682 }
683 }
684 /* information unavailable? */
685 if (error) {
686 p->p_lflag &= ~P_LWAITING;
687 wakeup(&p->p_stat);
688 return (error);
689 }
690 }
691
692 /* Clean up */
693 if (!reap_child_process(q, p)) {
694 p->p_lflag &= ~P_LWAITING;
695 wakeup(&p->p_stat);
696 }
697
698 return (0);
699 }
700 if (p->p_stat == SSTOP && (p->p_flag & P_WAITED) == 0 &&
701 (p->p_flag & P_TRACED || uap->options & WUNTRACED)) {
702 p->p_flag |= P_WAITED;
703 retval[0] = p->p_pid;
704 if (uap->status) {
705 status = W_STOPCODE(p->p_xstat);
706 error = copyout((caddr_t)&status,
707 uap->status,
708 sizeof(status));
709 } else
710 error = 0;
711 p->p_lflag &= ~P_LWAITING;
712 wakeup(&p->p_stat);
713 return (error);
714 }
715 p->p_lflag &= ~P_LWAITING;
716 wakeup(&p->p_stat);
717 }
718 if (nfound == 0)
719 return (ECHILD);
720
721 if (uap->options & WNOHANG) {
722 retval[0] = 0;
723 return (0);
724 }
725
726 if ((error = tsleep0((caddr_t)q, PWAIT | PCATCH, "wait", 0, wait1continue)))
727 return (error);
728
729 goto loop;
730 }
731
732
733 int
734 waitidcontinue(int result)
735 {
736 void *vt;
737 thread_t thread;
738 int *retval;
739 struct proc *p;
740
741 if (result)
742 return(result);
743
744 p = current_proc();
745 thread = current_thread();
746 vt = get_bsduthreadarg(thread);
747 retval = get_bsduthreadrval(thread);
748 return(waitid((struct proc *)p, (struct waitid_args *)vt, retval));
749 }
750
751 /*
752 * Description: Suspend the calling thread until one child of the process
753 * containing the calling thread changes state.
754 *
755 * Parameters: uap->idtype one of P_PID, P_PGID, P_ALL
756 * uap->id pid_t or gid_t or ignored
757 * uap->infop Address of signinfo_t struct in
758 * user space into which to return status
759 * uap->options flag values
760 *
761 * Returns: 0 Success
762 * !0 Error returning status to user space
763 */
764 int
765 waitid(struct proc *q, struct waitid_args *uap, register_t *retval)
766 {
767 user_siginfo_t collect64; /* siginfo data to return to caller */
768
769 register int nfound;
770 register struct proc *p;
771 int error;
772
773 loop:
774 nfound = 0;
775 for (p = q->p_children.lh_first; p != 0; p = p->p_sibling.le_next) {
776 switch(uap->idtype) {
777 case P_PID: /* child with process ID equal to... */
778 if (p->p_pid != (pid_t)uap->id)
779 continue;
780 break;
781 case P_PGID: /* child with process group ID equal to... */
782 if (p->p_pgid != (pid_t)uap->id)
783 continue;
784 break;
785 case P_ALL: /* any child */
786 break;
787 }
788
789 /* XXX This is racy because we don't get the lock!!!! */
790
791 /*
792 * Wait collision; go to sleep and restart; used to maintain
793 * the single return for waited process guarantee.
794 */
795 if (p->p_lflag & P_LWAITING) {
796 (void)tsleep(&p->p_stat, PWAIT, "waitidcoll", 0);
797 goto loop;
798 }
799 p->p_lflag |= P_LWAITING; /* mark busy */
800
801 nfound++;
802
803 /*
804 * Types of processes we are interested in
805 *
806 * XXX Don't know what to do for WCONTINUED?!?
807 */
808 switch(p->p_stat) {
809 case SZOMB: /* Exited */
810 if (!(uap->options & WEXITED))
811 break;
812
813 /* Collect "siginfo" information for caller */
814 collect64.si_signo = 0;
815 collect64.si_code = 0;
816 collect64.si_errno = 0;
817 collect64.si_pid = 0;
818 collect64.si_uid = 0;
819 collect64.si_addr = 0;
820 collect64.si_status = p->p_xstat;
821 collect64.si_band = 0;
822
823 if (IS_64BIT_PROCESS(p)) {
824 error = copyout((caddr_t)&collect64,
825 uap->infop,
826 sizeof(collect64));
827 } else {
828 siginfo_t collect;
829 siginfo_64to32(&collect64,&collect);
830 error = copyout((caddr_t)&collect,
831 uap->infop,
832 sizeof(collect));
833 }
834 /* information unavailable? */
835 if (error) {
836 p->p_lflag &= ~P_LWAITING;
837 wakeup(&p->p_stat);
838 return (error);
839 }
840
841 /* Prevent other process for waiting for this event? */
842 if (!(uap->options & WNOWAIT)) {
843 /* Clean up */
844 if (!reap_child_process(q, p)) {
845 p->p_lflag &= ~P_LWAITING;
846 wakeup(&p->p_stat);
847 }
848 }
849
850 return (0);
851
852 case SSTOP: /* Stopped */
853 /*
854 * If we are not interested in stopped processes, then
855 * ignore this one.
856 */
857 if (!(uap->options & WSTOPPED))
858 break;
859
860 /*
861 * If someone has already waited it, we lost a race
862 * to be the one to return status.
863 */
864 if ((p->p_flag & P_WAITED) != 0)
865 break;
866
867 /*
868 * If this is not a traced process, and they haven't
869 * indicated an interest in untraced processes, then
870 * ignore this one.
871 */
872 if (!(p->p_flag & P_TRACED) && !(uap->options & WUNTRACED))
873 break;
874
875 /* Collect "siginfo" information for caller */
876 collect64.si_signo = 0;
877 collect64.si_code = 0;
878 collect64.si_errno = 0;
879 collect64.si_pid = 0;
880 collect64.si_uid = 0;
881 collect64.si_addr = 0;
882 collect64.si_status = p->p_xstat;
883 collect64.si_band = 0;
884
885 if (IS_64BIT_PROCESS(p)) {
886 error = copyout((caddr_t)&collect64,
887 uap->infop,
888 sizeof(collect64));
889 } else {
890 siginfo_t collect;
891 siginfo_64to32(&collect64,&collect);
892 error = copyout((caddr_t)&collect,
893 uap->infop,
894 sizeof(collect));
895 }
896 /* information unavailable? */
897 if (error) {
898 p->p_lflag &= ~P_LWAITING;
899 wakeup(&p->p_stat);
900 return (error);
901 }
902
903 /* Prevent other process for waiting for this event? */
904 if (!(uap->options & WNOWAIT)) {
905 p->p_flag |= P_WAITED;
906 }
907
908 p->p_lflag &= ~P_LWAITING;
909 wakeup(&p->p_stat);
910 return (0);
911
912 default: /* All others */
913 /* ...meaning Continued */
914 if (!(uap->options & WCONTINUED))
915 break;
916
917 /*
918 * If the flag isn't set, then this process has not
919 * been stopped and continued, or the status has
920 * already been reaped by another caller of waitid().
921 */
922 if ((p->p_flag & P_CONTINUED) == 0)
923 break;
924
925 /* Collect "siginfo" information for caller */
926 collect64.si_signo = 0;
927 collect64.si_code = 0;
928 collect64.si_errno = 0;
929 collect64.si_pid = 0;
930 collect64.si_uid = 0;
931 collect64.si_addr = 0;
932 collect64.si_status = p->p_xstat;
933 collect64.si_band = 0;
934
935 if (IS_64BIT_PROCESS(p)) {
936 error = copyout((caddr_t)&collect64,
937 uap->infop,
938 sizeof(collect64));
939 } else {
940 siginfo_t collect;
941 siginfo_64to32(&collect64,&collect);
942 error = copyout((caddr_t)&collect,
943 uap->infop,
944 sizeof(collect));
945 }
946 /* information unavailable? */
947 if (error) {
948 p->p_lflag &= ~P_LWAITING;
949 wakeup(&p->p_stat);
950 return (error);
951 }
952
953 /* Prevent other process for waiting for this event? */
954 if (!(uap->options & WNOWAIT)) {
955 p->p_flag &= ~P_CONTINUED;
956 }
957
958 p->p_lflag &= ~P_LWAITING;
959 wakeup(&p->p_stat);
960 return (0);
961
962 break;
963 }
964
965
966 /* Not a process we are interested in; go on to next child */
967 p->p_lflag &= ~P_LWAITING;
968 wakeup(&p->p_stat);
969 }
970
971 /* No child processes that could possibly satisfy the request? */
972 if (nfound == 0)
973 return (ECHILD);
974
975 if (uap->options & WNOHANG) {
976 retval[0] = 0;
977 return (0);
978 }
979
980 if ((error = tsleep0((caddr_t)q, PWAIT | PCATCH, "waitid", 0, waitidcontinue)))
981 return (error);
982
983 goto loop;
984 }
985
986 /*
987 * make process 'parent' the new parent of process 'child'.
988 */
989 void
990 proc_reparent(struct proc *child, struct proc *parent)
991 {
992
993 if (child->p_pptr == parent)
994 return;
995
996 LIST_REMOVE(child, p_sibling);
997 LIST_INSERT_HEAD(&parent->p_children, child, p_sibling);
998 child->p_pptr = parent;
999
1000 if (initproc == parent && child->p_stat == SZOMB)
1001 psignal(initproc, SIGCHLD);
1002 }
1003
1004 /*
1005 * Make the current process an "init" process, meaning
1006 * that it doesn't have a parent, and that it won't be
1007 * gunned down by kill(-1, 0).
1008 */
1009 kern_return_t
1010 init_process(__unused struct init_process_args *args)
1011 {
1012 register struct proc *p = current_proc();
1013
1014 AUDIT_MACH_SYSCALL_ENTER(AUE_INITPROCESS);
1015 if (suser(kauth_cred_get(), &p->p_acflag)) {
1016 AUDIT_MACH_SYSCALL_EXIT(KERN_NO_ACCESS);
1017 return(KERN_NO_ACCESS);
1018 }
1019
1020 if (p->p_pid != 1 && p->p_pgid != p->p_pid)
1021 enterpgrp(p, p->p_pid, 0);
1022 p->p_flag |= P_SYSTEM;
1023
1024 /*
1025 * Take us out of the sibling chain, and
1026 * out of our parent's child chain.
1027 */
1028 LIST_REMOVE(p, p_sibling);
1029 p->p_sibling.le_prev = NULL;
1030 p->p_sibling.le_next = NULL;
1031 p->p_pptr = kernproc;
1032
1033 AUDIT_MACH_SYSCALL_EXIT(KERN_SUCCESS);
1034 return(KERN_SUCCESS);
1035 }
1036
1037
1038 /*
1039 * Exit: deallocate address space and other resources, change proc state
1040 * to zombie, and unlink proc from allproc and parent's lists. Save exit
1041 * status and rusage for wait(). Check for child processes and orphan them.
1042 */
1043
1044 void
1045 vfork_exit(struct proc *p, int rv)
1046 {
1047 thread_t self = current_thread();
1048 #ifdef FIXME
1049 struct task *task = p->task;
1050 #endif
1051 register int s;
1052 struct uthread *ut;
1053 exception_data_t code[EXCEPTION_CODE_MAX];
1054
1055 /*
1056 * If a thread in this task has already
1057 * called exit(), then halt any others
1058 * right here.
1059 */
1060
1061 ut = get_bsdthread_info(self);
1062 #ifdef FIXME
1063 signal_lock(p);
1064 while (p->exit_thread != self) {
1065 if (sig_try_locked(p) <= 0) {
1066 if (get_threadtask(self) != task) {
1067 signal_unlock(p);
1068 return;
1069 }
1070 signal_unlock(p);
1071 thread_terminate(self);
1072 thread_funnel_set(kernel_flock, FALSE);
1073 thread_exception_return();
1074 /* NOTREACHED */
1075 }
1076 sig_lock_to_exit(p);
1077 }
1078 signal_unlock(p);
1079 if (p->p_pid == 1) {
1080 printf("pid 1 exited (signal %d, exit %d)",
1081 WTERMSIG(rv), WEXITSTATUS(rv));
1082 panic("init died\nState at Last Exception:\n\n%s", init_task_failure_data);
1083 }
1084 #endif /* FIXME */
1085
1086 s = splsched();
1087 p->p_flag |= P_WEXIT;
1088 p->p_lflag |= P_LPEXIT;
1089 splx(s);
1090
1091 code[0] = (exception_data_t)0xFF000001; /* Set terminate code */
1092 code[1] = (exception_data_t)p->p_pid; /* Pass out the pid */
1093 /* Notify the perf server */
1094 (void)sys_perf_notify(p->task, (exception_data_t)&code, 2);
1095
1096 /*
1097 * Remove proc from allproc queue and from pidhash chain.
1098 * Need to do this before we do anything that can block.
1099 * Not doing causes things like mount() find this on allproc
1100 * in partially cleaned state.
1101 */
1102 LIST_REMOVE(p, p_list);
1103 LIST_INSERT_HEAD(&zombproc, p, p_list); /* Place onto zombproc. */
1104 LIST_REMOVE(p, p_hash);
1105 /*
1106 * If parent is waiting for us to exit or exec,
1107 * P_PPWAIT is set; we will wakeup the parent below.
1108 */
1109 p->p_flag &= ~(P_TRACED | P_PPWAIT);
1110 p->p_sigignore = ~0;
1111 p->p_siglist = 0;
1112
1113 ut->uu_siglist = 0;
1114 untimeout(realitexpire, (caddr_t)p->p_pid);
1115
1116 p->p_xstat = rv;
1117
1118 vproc_exit(p);
1119 }
1120
1121 void
1122 vproc_exit(struct proc *p)
1123 {
1124 register struct proc *q, *nq, *pp;
1125 #ifdef FIXME
1126 struct task *task = p->task;
1127 #endif
1128
1129 /* XXX Zombie allocation may fail, in which case stats get lost */
1130 MALLOC_ZONE(p->p_ru, struct rusage *,
1131 sizeof (*p->p_ru), M_ZOMBIE, M_WAITOK);
1132
1133 /*
1134 * Close open files and release open-file table.
1135 * This may block!
1136 */
1137 fdfree(p);
1138
1139 if (SESS_LEADER(p)) {
1140 register struct session *sp = p->p_session;
1141
1142 if (sp->s_ttyvp) {
1143 struct vnode *ttyvp;
1144 struct vfs_context context;
1145
1146 /*
1147 * Controlling process.
1148 * Signal foreground pgrp,
1149 * drain controlling terminal
1150 * and revoke access to controlling terminal.
1151 */
1152 if (sp->s_ttyp->t_session == sp) {
1153 if (sp->s_ttyp->t_pgrp)
1154 pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1);
1155 (void) ttywait(sp->s_ttyp);
1156 /*
1157 * The tty could have been revoked
1158 * if we blocked.
1159 */
1160 context.vc_proc = p;
1161 context.vc_ucred = p->p_ucred;
1162 if (sp->s_ttyvp)
1163 VNOP_REVOKE(sp->s_ttyvp, REVOKEALL, &context);
1164 }
1165 ttyvp = sp->s_ttyvp;
1166 sp->s_ttyvp = NULL;
1167 if (ttyvp) {
1168 vnode_rele(ttyvp);
1169 }
1170 /*
1171 * s_ttyp is not zero'd; we use this to indicate
1172 * that the session once had a controlling terminal.
1173 * (for logging and informational purposes)
1174 */
1175 }
1176 sp->s_leader = NULL;
1177 }
1178
1179 fixjobc(p, p->p_pgrp, 0);
1180 p->p_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
1181
1182 #if KTRACE
1183 /*
1184 * release trace file
1185 */
1186 p->p_traceflag = 0; /* don't trace the vnode_rele() */
1187 if (p->p_tracep) {
1188 struct vnode *tvp = p->p_tracep;
1189 p->p_tracep = NULL;
1190 vnode_rele(tvp);
1191 }
1192 #endif
1193
1194 while (q = p->p_children.lh_first) {
1195 proc_reparent(q, initproc);
1196 /*
1197 * Traced processes are killed
1198 * since their existence means someone is messing up.
1199 */
1200 if (q->p_flag & P_TRACED) {
1201 q->p_flag &= ~P_TRACED;
1202 if (q->sigwait_thread) {
1203 /*
1204 * The sigwait_thread could be stopped at a
1205 * breakpoint. Wake it up to kill.
1206 * Need to do this as it could be a thread which is not
1207 * the first thread in the task. So any attempts to kill
1208 * the process would result into a deadlock on q->sigwait.
1209 */
1210 thread_resume((thread_t)q->sigwait_thread);
1211 clear_wait(q->sigwait_thread, THREAD_INTERRUPTED);
1212 threadsignal((thread_t)q->sigwait_thread, SIGKILL, 0);
1213 }
1214 psignal(q, SIGKILL);
1215 }
1216 }
1217
1218 /*
1219 * Save exit status and final rusage info, adding in child rusage
1220 * info and self times. If we were unable to allocate a zombie
1221 * structure, this information is lost.
1222 */
1223 if (p->p_ru != NULL) {
1224 *p->p_ru = p->p_stats->p_ru;
1225 timerclear(&p->p_ru->ru_utime);
1226 timerclear(&p->p_ru->ru_stime);
1227
1228 #ifdef FIXME
1229 if (task) {
1230 task_basic_info_data_t tinfo;
1231 task_thread_times_info_data_t ttimesinfo;
1232 int task_info_stuff, task_ttimes_stuff;
1233 struct timeval ut,st;
1234
1235 task_info_stuff = TASK_BASIC_INFO_COUNT;
1236 task_info(task, TASK_BASIC_INFO,
1237 &tinfo, &task_info_stuff);
1238 p->p_ru->ru_utime.tv_sec = tinfo.user_time.seconds;
1239 p->p_ru->ru_utime.tv_usec = tinfo.user_time.microseconds;
1240 p->p_ru->ru_stime.tv_sec = tinfo.system_time.seconds;
1241 p->p_ru->ru_stime.tv_usec = tinfo.system_time.microseconds;
1242
1243 task_ttimes_stuff = TASK_THREAD_TIMES_INFO_COUNT;
1244 task_info(task, TASK_THREAD_TIMES_INFO,
1245 &ttimesinfo, &task_ttimes_stuff);
1246
1247 ut.tv_sec = ttimesinfo.user_time.seconds;
1248 ut.tv_usec = ttimesinfo.user_time.microseconds;
1249 st.tv_sec = ttimesinfo.system_time.seconds;
1250 st.tv_usec = ttimesinfo.system_time.microseconds;
1251 timeradd(&ut,&p->p_ru->ru_utime,&p->p_ru->ru_utime);
1252 timeradd(&st,&p->p_ru->ru_stime,&p->p_ru->ru_stime);
1253 }
1254 #endif /* FIXME */
1255
1256 ruadd(p->p_ru, &p->p_stats->p_cru);
1257 }
1258
1259 /*
1260 * Free up profiling buffers.
1261 */
1262 {
1263 struct uprof *p0 = &p->p_stats->p_prof, *p1, *pn;
1264
1265 p1 = p0->pr_next;
1266 p0->pr_next = NULL;
1267 p0->pr_scale = 0;
1268
1269 for (; p1 != NULL; p1 = pn) {
1270 pn = p1->pr_next;
1271 kfree(p1, sizeof *p1);
1272 }
1273 }
1274
1275 /*
1276 * Other substructures are freed from wait().
1277 */
1278 FREE_ZONE(p->p_stats, sizeof *p->p_stats, M_SUBPROC);
1279 p->p_stats = NULL;
1280
1281 FREE_ZONE(p->p_sigacts, sizeof *p->p_sigacts, M_SUBPROC);
1282 p->p_sigacts = NULL;
1283
1284 if (--p->p_limit->p_refcnt == 0)
1285 FREE_ZONE(p->p_limit, sizeof *p->p_limit, M_SUBPROC);
1286 p->p_limit = NULL;
1287
1288 /*
1289 * Finish up by terminating the task
1290 * and halt this thread (only if a
1291 * member of the task exiting).
1292 */
1293 p->task = TASK_NULL;
1294
1295 /*
1296 * Notify parent that we're gone.
1297 */
1298 pp = p->p_pptr;
1299 if (pp != initproc) {
1300 pp->si_pid = p->p_pid;
1301 pp->si_status = p->p_xstat;
1302 pp->si_code = CLD_EXITED;
1303 pp->si_uid = p->p_ucred->cr_ruid;
1304 }
1305 /* mark as a zombie */
1306 p->p_stat = SZOMB;
1307
1308 psignal(p->p_pptr, SIGCHLD);
1309
1310 /* and now wakeup the parent */
1311 wakeup((caddr_t)p->p_pptr);
1312 }
1313
1314
1315 /*
1316 * munge_rusage
1317 * LP64 support - long is 64 bits if we are dealing with a 64 bit user
1318 * process. We munge the kernel (32 bit) version of rusage into the
1319 * 64 bit version.
1320 */
1321 __private_extern__ void
1322 munge_rusage(struct rusage *a_rusage_p, struct user_rusage *a_user_rusage_p)
1323 {
1324 /* timeval changes size, so utime and stime need special handling */
1325 a_user_rusage_p->ru_utime.tv_sec = a_rusage_p->ru_utime.tv_sec;
1326 a_user_rusage_p->ru_utime.tv_usec = a_rusage_p->ru_utime.tv_usec;
1327 a_user_rusage_p->ru_stime.tv_sec = a_rusage_p->ru_stime.tv_sec;
1328 a_user_rusage_p->ru_stime.tv_usec = a_rusage_p->ru_stime.tv_usec;
1329 /*
1330 * everything else can be a direct assign, since there is no loss
1331 * of precision implied boing 32->64.
1332 */
1333 a_user_rusage_p->ru_maxrss = a_rusage_p->ru_maxrss;
1334 a_user_rusage_p->ru_ixrss = a_rusage_p->ru_ixrss;
1335 a_user_rusage_p->ru_idrss = a_rusage_p->ru_idrss;
1336 a_user_rusage_p->ru_isrss = a_rusage_p->ru_isrss;
1337 a_user_rusage_p->ru_minflt = a_rusage_p->ru_minflt;
1338 a_user_rusage_p->ru_majflt = a_rusage_p->ru_majflt;
1339 a_user_rusage_p->ru_nswap = a_rusage_p->ru_nswap;
1340 a_user_rusage_p->ru_inblock = a_rusage_p->ru_inblock;
1341 a_user_rusage_p->ru_oublock = a_rusage_p->ru_oublock;
1342 a_user_rusage_p->ru_msgsnd = a_rusage_p->ru_msgsnd;
1343 a_user_rusage_p->ru_msgrcv = a_rusage_p->ru_msgrcv;
1344 a_user_rusage_p->ru_nsignals = a_rusage_p->ru_nsignals;
1345 a_user_rusage_p->ru_nvcsw = a_rusage_p->ru_nvcsw;
1346 a_user_rusage_p->ru_nivcsw = a_rusage_p->ru_nivcsw;
1347 }