]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_exit.c
xnu-1228.tar.gz
[apple/xnu.git] / bsd / kern / kern_exit.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94
67 */
68 /*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75 #include <machine/reg.h>
76 #include <machine/psl.h>
77
78 #include "compat_43.h"
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/ioctl.h>
83 #include <sys/proc_internal.h>
84 #include <sys/proc.h>
85 #include <sys/kauth.h>
86 #include <sys/tty.h>
87 #include <sys/time.h>
88 #include <sys/resource.h>
89 #include <sys/kernel.h>
90 #include <sys/wait.h>
91 #include <sys/file_internal.h>
92 #include <sys/vnode_internal.h>
93 #include <sys/syslog.h>
94 #include <sys/malloc.h>
95 #include <sys/resourcevar.h>
96 #include <sys/ptrace.h>
97 #include <sys/user.h>
98 #include <sys/aio_kern.h>
99 #include <sys/sysproto.h>
100 #include <sys/signalvar.h>
101 #include <sys/filedesc.h> /* fdfree */
102 #if SYSV_SHM
103 #include <sys/shm_internal.h> /* shmexit */
104 #endif
105 #include <sys/acct.h> /* acct_process */
106
107 #include <bsm/audit_kernel.h>
108 #include <bsm/audit_kevents.h>
109
110 #include <mach/mach_types.h>
111
112 #include <kern/kern_types.h>
113 #include <kern/kalloc.h>
114 #include <kern/task.h>
115 #include <kern/thread.h>
116 #include <kern/thread_call.h>
117 #include <kern/sched_prim.h>
118 #include <kern/assert.h>
119 #if CONFIG_DTRACE
120 /* Do not include dtrace.h, it redefines kmem_[alloc/free] */
121 extern void (*dtrace_fasttrap_exit_ptr)(proc_t);
122 extern void (*dtrace_helpers_cleanup)(proc_t);
123 extern void dtrace_lazy_dofs_destroy(proc_t);
124
125 #include <sys/dtrace_ptss.h>
126 #endif
127
128 #if CONFIG_MACF
129 #include <security/mac.h>
130 #include <sys/syscall.h>
131 #endif
132
133 #include <mach/mach_types.h>
134 #include <mach/task.h>
135 #include <mach/thread_act.h>
136 #include <mach/mach_traps.h> /* init_process */
137
138 #include <sys/sdt.h>
139
140 extern char init_task_failure_data[];
141 void proc_prepareexit(proc_t p, int rv);
142 void vfork_exit(proc_t p, int rv);
143 void vproc_exit(proc_t p);
144 __private_extern__ void munge_rusage(struct rusage *a_rusage_p, struct user_rusage *a_user_rusage_p);
145 static int reap_child_locked(proc_t parent, proc_t child, int deadparent, int locked, int droplock);
146
147 /*
148 * Things which should have prototypes in headers, but don't
149 */
150 void *get_bsduthreadarg(thread_t);
151 void proc_exit(proc_t p);
152 int wait1continue(int result);
153 int waitidcontinue(int result);
154 int *get_bsduthreadrval(thread_t);
155 kern_return_t sys_perf_notify(thread_t thread, int pid);
156 kern_return_t abnormal_exit_notify(mach_exception_data_type_t code,
157 mach_exception_data_type_t subcode);
158 int in_shutdown(void);
159 void workqueue_exit(struct proc *);
160 void delay(int);
161
162 /*
163 * NOTE: Source and target may *NOT* overlap!
164 * XXX Should share code with bsd/dev/ppc/unix_signal.c
165 */
166 static void
167 siginfo_64to32(user_siginfo_t *in, siginfo_t *out)
168 {
169 out->si_signo = in->si_signo;
170 out->si_errno = in->si_errno;
171 out->si_code = in->si_code;
172 out->si_pid = in->si_pid;
173 out->si_uid = in->si_uid;
174 out->si_status = in->si_status;
175 out->si_addr = CAST_DOWN(void *,in->si_addr);
176 /* following cast works for sival_int because of padding */
177 out->si_value.sival_ptr = CAST_DOWN(void *,in->si_value.sival_ptr);
178 out->si_band = in->si_band; /* range reduction */
179 out->__pad[0] = in->pad[0]; /* mcontext.ss.r1 */
180 }
181
182 /*
183 * exit --
184 * Death of process.
185 */
186 void
187 exit(proc_t p, struct exit_args *uap, int *retval)
188 {
189 exit1(p, W_EXITCODE(uap->rval, 0), retval);
190
191 /* drop funnel before we return */
192 thread_exception_return();
193 /* NOTREACHED */
194 while (TRUE)
195 thread_block(THREAD_CONTINUE_NULL);
196 /* NOTREACHED */
197 }
198
199 /*
200 * Exit: deallocate address space and other resources, change proc state
201 * to zombie, and unlink proc from allproc and parent's lists. Save exit
202 * status and rusage for wait(). Check for child processes and orphan them.
203 */
204 int
205 exit1(proc_t p, int rv, int *retval)
206 {
207 thread_t self = current_thread();
208 struct task *task = p->task;
209 struct uthread *ut;
210
211 /*
212 * If a thread in this task has already
213 * called exit(), then halt any others
214 * right here.
215 */
216
217 ut = get_bsdthread_info(self);
218 if (ut->uu_flag & UT_VFORK) {
219 vfork_exit(p, rv);
220 vfork_return(p , retval, p->p_pid);
221 unix_syscall_return(0);
222 /* NOT REACHED */
223 }
224
225 /*
226 * The parameter list of audit_syscall_exit() was augmented to
227 * take the Darwin syscall number as the first parameter,
228 * which is currently required by mac_audit_postselect().
229 */
230
231 AUDIT_SYSCALL_EXIT(SYS_exit, p, ut, 0); /* Exit is always successfull */
232
233 DTRACE_PROC1(exit, int, CLD_EXITED);
234
235 proc_lock(p);
236 while (p->exit_thread != self) {
237 if (sig_try_locked(p) <= 0) {
238 if (get_threadtask(self) != task) {
239 proc_unlock(p);
240 return(0);
241 }
242 proc_unlock(p);
243 thread_terminate(self);
244 thread_exception_return();
245 /* NOTREACHED */
246 }
247 sig_lock_to_exit(p);
248 }
249 #if !CONFIG_EMBEDDED /* BER_XXX */
250 if (p->p_pid == 1) {
251 proc_unlock(p);
252 printf("pid 1 exited (signal %d, exit %d)",
253 WTERMSIG(rv), WEXITSTATUS(rv));
254 panic("%s died\nState at Last Exception:\n\n%s",
255 (p->p_comm[0] != '\0' ?
256 p->p_comm :
257 "launchd"),
258 init_task_failure_data);
259 }
260 #endif
261
262 p->p_lflag |= P_LEXIT;
263 p->p_xstat = rv;
264
265 proc_unlock(p);
266
267 proc_prepareexit(p, rv);
268
269 /* task terminate will call proc_terminate and that cleans it up */
270 task_terminate_internal(task);
271
272 return(0);
273 }
274
275 void
276 proc_prepareexit(proc_t p, int rv)
277 {
278 mach_exception_data_type_t code, subcode;
279 struct uthread *ut;
280 thread_t self = current_thread();
281 ut = get_bsdthread_info(self);
282
283 /* If a core should be generated, notify crash reporter */
284 if (!in_shutdown() && hassigprop(WTERMSIG(rv), SA_CORE)) {
285 /*
286 * Workaround for processes checking up on PT_DENY_ATTACH:
287 * should be backed out post-Leopard (details in 5431025).
288 */
289 if ((SIGSEGV == WTERMSIG(rv)) &&
290 (p->p_pptr->p_lflag & P_LNOATTACH)) {
291 goto skipcheck;
292 }
293
294 /*
295 * Crash Reporter looks for the signal value, original exception
296 * type, and low 20 bits of the original code in code[0]
297 * (8, 4, and 20 bits respectively). code[1] is unmodified.
298 */
299 code = ((WTERMSIG(rv) & 0xff) << 24) |
300 ((ut->uu_exception & 0x0f) << 20) |
301 ((int)ut->uu_code & 0xfffff);
302 subcode = ut->uu_subcode;
303 (void) abnormal_exit_notify(code, subcode);
304 }
305
306 skipcheck:
307 /* Notify the perf server */
308 (void)sys_perf_notify(self, p->p_pid);
309
310 /*
311 * Remove proc from allproc queue and from pidhash chain.
312 * Need to do this before we do anything that can block.
313 * Not doing causes things like mount() find this on allproc
314 * in partially cleaned state.
315 */
316
317 proc_list_lock();
318
319 LIST_REMOVE(p, p_list);
320 LIST_INSERT_HEAD(&zombproc, p, p_list); /* Place onto zombproc. */
321 /* will not be visible via proc_find */
322 p->p_listflag |= P_LIST_EXITED;
323
324 proc_list_unlock();
325
326
327 #ifdef PGINPROF
328 vmsizmon();
329 #endif
330 /*
331 * If parent is waiting for us to exit or exec,
332 * P_LPPWAIT is set; we will wakeup the parent below.
333 */
334 proc_lock(p);
335 p->p_lflag &= ~(P_LTRACED | P_LPPWAIT);
336 p->p_sigignore = ~(sigcantmask);
337 ut->uu_siglist = 0;
338 proc_unlock(p);
339 }
340
341 void
342 proc_exit(proc_t p)
343 {
344 proc_t q;
345 proc_t pp;
346 struct task *task = p->task;
347 boolean_t fstate;
348 vnode_t tvp = NULLVP;
349 struct pgrp * pg;
350 struct session *sessp;
351 struct uthread * uth;
352
353 /* This can happen if thread_terminate of the single thread
354 * process
355 */
356
357 uth = (struct uthread *)get_bsdthread_info(current_thread());
358
359 proc_lock(p);
360 if( !(p->p_lflag & P_LEXIT)) {
361 p->p_lflag |= P_LEXIT;
362 proc_unlock(p);
363 proc_prepareexit(p, 0);
364 proc_lock(p);
365 }
366
367 p->p_lflag |= P_LPEXIT;
368 proc_unlock(p);
369
370 #if CONFIG_DTRACE
371 /*
372 * Free any outstanding lazy dof entries. It is imperative we
373 * always call dtrace_lazy_dofs_destroy, rather than null check
374 * and call if !NULL. If we NULL test, during lazy dof faulting
375 * we can race with the faulting code and proceed from here to
376 * beyond the helpers cleanup. The lazy dof faulting will then
377 * install new helpers which will never be cleaned up, and leak.
378 */
379 dtrace_lazy_dofs_destroy(p);
380
381 /*
382 * Clean up any DTrace helper actions or probes for the process.
383 */
384 if (p->p_dtrace_helpers != NULL) {
385 (*dtrace_helpers_cleanup)(p);
386 }
387
388 /*
389 * Clean up any DTrace probes associated with this process.
390 */
391 /*
392 * APPLE NOTE: We release ptss pages/entries in dtrace_fasttrap_exit_ptr(),
393 * call this after dtrace_helpers_cleanup()
394 */
395 proc_lock(p);
396 if (p->p_dtrace_probes && dtrace_fasttrap_exit_ptr) {
397 (*dtrace_fasttrap_exit_ptr)(p);
398 }
399 proc_unlock(p);
400 #endif
401
402 /* XXX Zombie allocation may fail, in which case stats get lost */
403 MALLOC_ZONE(p->p_ru, struct rusage *,
404 sizeof (*p->p_ru), M_ZOMBIE, M_WAITOK);
405
406 /*
407 * need to cancel async IO requests that can be cancelled and wait for those
408 * already active. MAY BLOCK!
409 */
410
411 proc_refdrain(p);
412
413 workqueue_exit(p);
414
415 _aio_exit( p );
416
417 /*
418 * Close open files and release open-file table.
419 * This may block!
420 */
421 fdfree(p);
422
423 #if SYSV_SHM
424 /* Close ref SYSV Shared memory*/
425 if (p->vm_shm)
426 shmexit(p);
427 #endif
428 #if SYSV_SEM
429 /* Release SYSV semaphores */
430 semexit(p);
431 #endif
432
433 sessp = proc_session(p);
434 if (SESS_LEADER(p, sessp)) {
435
436 /* Protected by funnel for tty accesses */
437 fstate = thread_funnel_set(kernel_flock, TRUE);
438
439 if (sessp->s_ttyvp != NULLVP) {
440 struct vnode *ttyvp;
441 int ttyvid;
442 struct vfs_context context;
443 struct tty * tp;
444
445
446 /*
447 * Controlling process.
448 * Signal foreground pgrp,
449 * drain controlling terminal
450 * and revoke access to controlling terminal.
451 */
452 tp = sessp->s_ttyp;
453
454 if ((tp != TTY_NULL) && (tp->t_session == sessp)) {
455 tty_pgsignal(tp, SIGHUP, 1);
456 (void) ttywait(tp);
457 /*
458 * The tty could have been revoked
459 * if we blocked.
460 */
461
462 session_lock(sessp);
463 ttyvp = sessp->s_ttyvp;
464 ttyvid = sessp->s_ttyvid;
465 sessp->s_ttyvp = NULL;
466 sessp->s_ttyvid = 0;
467 sessp->s_ttyp = NULL;
468 sessp->s_ttypgrpid = NO_PID;
469 session_unlock(sessp);
470
471 if ((ttyvp != NULLVP) && (vnode_getwithvid(ttyvp, ttyvid) == 0)) {
472 context.vc_thread = proc_thread(p); /* XXX */
473 context.vc_ucred = kauth_cred_proc_ref(p);
474 VNOP_REVOKE(ttyvp, REVOKEALL, &context);
475 vnode_put(ttyvp);
476 kauth_cred_unref(&context.vc_ucred);
477 }
478 } else {
479 session_lock(sessp);
480 ttyvp = sessp->s_ttyvp;
481 sessp->s_ttyvp = NULL;
482 sessp->s_ttyvid = 0;
483 sessp->s_ttyp = NULL;
484 sessp->s_ttypgrpid = NO_PID;
485 session_unlock(sessp);
486 }
487 if (ttyvp)
488 vnode_rele(ttyvp);
489 /*
490 * s_ttyp is not zero'd; we use this to indicate
491 * that the session once had a controlling terminal.
492 * (for logging and informational purposes)
493 */
494 }
495
496 (void) thread_funnel_set(kernel_flock, fstate);
497 session_lock(sessp);
498 sessp->s_leader = NULL;
499 session_unlock(sessp);
500 }
501 session_rele(sessp);
502
503 pg = proc_pgrp(p);
504 fixjobc(p, pg, 0);
505 pg_rele(pg);
506
507 p->p_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
508 (void)acct_process(p);
509
510 proc_list_lock();
511 /* wait till parentrefs are dropped and grant no more */
512 proc_childdrainstart(p);
513 while ((q = p->p_children.lh_first) != NULL) {
514 q->p_listflag |= P_LIST_DEADPARENT;
515 if (q->p_stat == SZOMB) {
516 if (p != q->p_pptr)
517 panic("parent child linkage broken");
518 /* check for sysctl zomb lookup */
519 while ((q->p_listflag & P_LIST_WAITING) == P_LIST_WAITING) {
520 msleep(&q->p_stat, proc_list_mlock, PWAIT, "waitcoll", 0);
521 }
522 q->p_listflag |= P_LIST_WAITING;
523 /*
524 * This is a named reference and it is not granted
525 * if the reap is already in progress. So we get
526 * the reference here exclusively and their can be
527 * no waiters. So there is no need for a wakeup
528 * after we are done. AlsO the reap frees the structure
529 * and the proc struct cannot be used for wakeups as well.
530 * It is safe to use q here as this is system reap
531 */
532 (void)reap_child_locked(p, q, 1, 1, 0);
533 } else {
534 proc_reparentlocked(q, initproc, 0, 1);
535 /*
536 * Traced processes are killed
537 * since their existence means someone is messing up.
538 */
539 if (q->p_lflag & P_LTRACED) {
540 proc_list_unlock();
541 proc_lock(q);
542 q->p_lflag &= ~P_LTRACED;
543 if (q->sigwait_thread) {
544 proc_unlock(q);
545 /*
546 * The sigwait_thread could be stopped at a
547 * breakpoint. Wake it up to kill.
548 * Need to do this as it could be a thread which is not
549 * the first thread in the task. So any attempts to kill
550 * the process would result into a deadlock on q->sigwait.
551 */
552 thread_resume((thread_t)q->sigwait_thread);
553 clear_wait(q->sigwait_thread, THREAD_INTERRUPTED);
554 threadsignal((thread_t)q->sigwait_thread, SIGKILL, 0);
555 } else
556 proc_unlock(q);
557 psignal(q, SIGKILL);
558 proc_list_lock();
559 }
560 }
561 }
562
563 proc_childdrainend(p);
564 proc_list_unlock();
565
566 /*
567 * Release reference to text vnode
568 */
569 tvp = p->p_textvp;
570 p->p_textvp = NULL;
571 if (tvp != NULLVP) {
572 vnode_rele(tvp);
573 }
574
575 /*
576 * Save exit status and final rusage info, adding in child rusage
577 * info and self times. If we were unable to allocate a zombie
578 * structure, this information is lost.
579 */
580 /* No need for locking here as no one than this thread can access this */
581 if (p->p_ru != NULL) {
582 *p->p_ru = p->p_stats->p_ru;
583
584 timerclear(&p->p_ru->ru_utime);
585 timerclear(&p->p_ru->ru_stime);
586
587 if (task) {
588 task_basic_info_32_data_t tinfo;
589 task_thread_times_info_data_t ttimesinfo;
590 task_events_info_data_t teventsinfo;
591 mach_msg_type_number_t task_info_stuff, task_ttimes_stuff;
592 mach_msg_type_number_t task_events_stuff;
593 struct timeval ut,st;
594
595 task_info_stuff = TASK_BASIC_INFO_32_COUNT;
596 task_info(task, TASK_BASIC2_INFO_32,
597 (task_info_t)&tinfo, &task_info_stuff);
598 p->p_ru->ru_utime.tv_sec = tinfo.user_time.seconds;
599 p->p_ru->ru_utime.tv_usec = tinfo.user_time.microseconds;
600 p->p_ru->ru_stime.tv_sec = tinfo.system_time.seconds;
601 p->p_ru->ru_stime.tv_usec = tinfo.system_time.microseconds;
602
603 p->p_ru->ru_maxrss = tinfo.resident_size;
604
605 task_ttimes_stuff = TASK_THREAD_TIMES_INFO_COUNT;
606 task_info(task, TASK_THREAD_TIMES_INFO,
607 (task_info_t)&ttimesinfo, &task_ttimes_stuff);
608
609 ut.tv_sec = ttimesinfo.user_time.seconds;
610 ut.tv_usec = ttimesinfo.user_time.microseconds;
611 st.tv_sec = ttimesinfo.system_time.seconds;
612 st.tv_usec = ttimesinfo.system_time.microseconds;
613 timeradd(&ut,&p->p_ru->ru_utime,&p->p_ru->ru_utime);
614 timeradd(&st,&p->p_ru->ru_stime,&p->p_ru->ru_stime);
615
616 task_events_stuff = TASK_EVENTS_INFO_COUNT;
617 task_info(task, TASK_EVENTS_INFO,
618 (task_info_t)&teventsinfo, &task_events_stuff);
619
620 p->p_ru->ru_minflt = (teventsinfo.faults -
621 teventsinfo.pageins);
622 p->p_ru->ru_majflt = teventsinfo.pageins;
623 p->p_ru->ru_nivcsw = (teventsinfo.csw -
624 p->p_ru->ru_nvcsw);
625 if (p->p_ru->ru_nivcsw < 0)
626 p->p_ru->ru_nivcsw = 0;
627 }
628
629 ruadd(p->p_ru, &p->p_stats->p_cru);
630 }
631
632 /*
633 * Free up profiling buffers.
634 */
635 {
636 struct uprof *p0 = &p->p_stats->p_prof, *p1, *pn;
637
638 p1 = p0->pr_next;
639 p0->pr_next = NULL;
640 p0->pr_scale = 0;
641
642 for (; p1 != NULL; p1 = pn) {
643 pn = p1->pr_next;
644 kfree(p1, sizeof *p1);
645 }
646 }
647
648 proc_spinlock(p);
649 if (thread_call_cancel(p->p_rcall))
650 p->p_ractive--;
651
652 while (p->p_ractive > 0) {
653 proc_spinunlock(p);
654
655 delay(1);
656
657 proc_spinlock(p);
658 }
659 proc_spinunlock(p);
660
661 thread_call_free(p->p_rcall);
662 p->p_rcall = NULL;
663
664 /*
665 * Other substructures are freed from wait().
666 */
667 FREE_ZONE(p->p_stats, sizeof *p->p_stats, M_PSTATS);
668 p->p_stats = NULL;
669
670 FREE_ZONE(p->p_sigacts, sizeof *p->p_sigacts, M_SIGACTS);
671 p->p_sigacts = NULL;
672
673 proc_limitdrop(p, 1);
674 p->p_limit = NULL;
675
676
677 /*
678 * Finish up by terminating the task
679 * and halt this thread (only if a
680 * member of the task exiting).
681 */
682 p->task = TASK_NULL;
683 set_bsdtask_info(task, NULL);
684
685 proc_knote(p, NOTE_EXIT);
686
687 /* mark the thread as the one that is doing proc_exit
688 * no need to hold proc lock in uthread_free
689 */
690 uth->uu_flag |= UT_PROCEXIT;
691 /*
692 * Notify parent that we're gone.
693 */
694 pp = proc_parent(p);
695 if (pp->p_flag & P_NOCLDWAIT) {
696
697 #if 3839178
698 /*
699 * If the parent is ignoring SIGCHLD, then POSIX requires
700 * us to not add the resource usage to the parent process -
701 * we are only going to hand it off to init to get reaped.
702 * We should contest the standard in this case on the basis
703 * of RLIMIT_CPU.
704 */
705 #else /* !3839178 */
706 /*
707 * Add child resource usage to parent before giving
708 * zombie to init. If we were unable to allocate a
709 * zombie structure, this information is lost.
710 */
711 if (p->p_ru != NULL) {
712 proc_lock(pp);
713 ruadd(&pp->p_stats->p_cru, p->p_ru);
714 proc_unlock(pp);
715 }
716 #endif /* !3839178 */
717
718 /* kernel can reap this one, no need to move it to launchd */
719 proc_list_lock();
720 p->p_listflag |= P_LIST_DEADPARENT;
721 proc_list_unlock();
722 }
723 if ((p->p_listflag & P_LIST_DEADPARENT) == 0) {
724 if (pp != initproc) {
725 proc_lock(pp);
726 pp->si_pid = p->p_pid;
727 pp->si_status = p->p_xstat;
728 pp->si_code = CLD_EXITED;
729 /*
730 * p_ucred usage is safe as it is an exiting process
731 * and reference is dropped in reap
732 */
733 pp->si_uid = p->p_ucred->cr_ruid;
734 proc_unlock(pp);
735 }
736 /* mark as a zombie */
737 /* No need to take proc lock as all refs are drained and
738 * no one except parent (reaping ) can look at this.
739 * The write is to an int and is coherent. Also parent is
740 * keyed off of list lock for reaping
741 */
742 p->p_stat = SZOMB;
743 /*
744 * The current process can be reaped so, no one
745 * can depend on this
746 */
747
748 psignal(pp, SIGCHLD);
749
750 /* and now wakeup the parent */
751 proc_list_lock();
752 wakeup((caddr_t)pp);
753 proc_list_unlock();
754 } else {
755 /* should be fine as parent proc would be initproc */
756 /* mark as a zombie */
757 /* No need to take proc lock as all refs are drained and
758 * no one except parent (reaping ) can look at this.
759 * The write is to an int and is coherent. Also parent is
760 * keyed off of list lock for reaping
761 */
762 proc_list_lock();
763 p->p_stat = SZOMB;
764 /* check for sysctl zomb lookup */
765 while ((p->p_listflag & P_LIST_WAITING) == P_LIST_WAITING) {
766 msleep(&p->p_stat, proc_list_mlock, PWAIT, "waitcoll", 0);
767 }
768 /* safe to use p as this is a system reap */
769 p->p_listflag |= P_LIST_WAITING;
770 /*
771 * This is a named reference and it is not granted
772 * if the reap is already in progress. So we get
773 * the reference here exclusively and their can be
774 * no waiters. So there is no need for a wakeup
775 * after we are done. AlsO the reap frees the structure
776 * and the proc struct cannot be used for wakeups as well.
777 * It is safe to use p here as this is system reap
778 */
779 (void)reap_child_locked(pp, p, 1, 1, 1);
780 /* list lock dropped by reap_child_locked */
781 }
782
783 proc_rele(pp);
784
785 }
786
787
788 /*
789 * reap_child_locked
790 *
791 * Description: Given a process from which all status information needed
792 * has already been extracted, if the process is a ptrace
793 * attach process, detach it and give it back to its real
794 * parent, else recover all resources remaining associated
795 * with it.
796 *
797 * Parameters: proc_t parent Parent of process being reaped
798 * proc_t child Process to reap
799 *
800 * Returns: 0 Process was not reaped because it
801 * came from an attach
802 * 1 Process was reaped
803 */
804 static int
805 reap_child_locked(proc_t parent, proc_t child, int deadparent, int locked, int droplock)
806 {
807 proc_t trace_parent; /* Traced parent process, if tracing */
808
809 /*
810 * If we got the child via a ptrace 'attach',
811 * we need to give it back to the old parent.
812 */
813 if (locked == 1)
814 proc_list_unlock();
815 if (child->p_oppid && (trace_parent = proc_find(child->p_oppid))) {
816 proc_lock(child);
817 child->p_oppid = 0;
818 proc_unlock(child);
819 if (trace_parent != initproc) {
820 /*
821 * proc internal fileds and p_ucred usage safe
822 * here as child is dead and is not reaped or
823 * reparented yet
824 */
825 proc_lock(trace_parent);
826 trace_parent->si_pid = child->p_pid;
827 trace_parent->si_status = child->p_xstat;
828 trace_parent->si_code = CLD_CONTINUED;
829 trace_parent->si_uid = child->p_ucred->cr_ruid;
830 proc_unlock(trace_parent);
831 }
832 proc_reparentlocked(child, trace_parent, 1, 0);
833 psignal(trace_parent, SIGCHLD);
834 proc_list_lock();
835 wakeup((caddr_t)trace_parent);
836 child->p_listflag &= ~P_LIST_WAITING;
837 wakeup(&child->p_stat);
838 proc_list_unlock();
839 proc_rele(trace_parent);
840 if ((locked == 1) && (droplock == 0))
841 proc_list_lock();
842 return (0);
843 }
844
845 proc_knote(child, NOTE_REAP);
846
847 child->p_xstat = 0;
848 if (child->p_ru) {
849 proc_lock(parent);
850 #if 3839178
851 /*
852 * If the parent is ignoring SIGCHLD, then POSIX requires
853 * us to not add the resource usage to the parent process -
854 * we are only going to hand it off to init to get reaped.
855 * We should contest the standard in this case on the basis
856 * of RLIMIT_CPU.
857 */
858 if (!(parent->p_flag & P_NOCLDWAIT))
859 #endif /* 3839178 */
860 ruadd(&parent->p_stats->p_cru, child->p_ru);
861 proc_unlock(parent);
862 FREE_ZONE(child->p_ru, sizeof *child->p_ru, M_ZOMBIE);
863 child->p_ru = NULL;
864 } else {
865 printf("Warning : lost p_ru for %s\n", child->p_comm);
866 }
867
868 /*
869 * Decrement the count of procs running with this uid.
870 * p_ucred usage is safe here as it is an exited process.
871 * and refernce is dropped after these calls down below
872 * (locking protection is provided by list lock held in chgproccnt)
873 */
874 (void)chgproccnt(child->p_ucred->cr_ruid, -1);
875
876 #if CONFIG_LCTX
877 ALLLCTX_LOCK;
878 leavelctx(child);
879 ALLLCTX_UNLOCK;
880 #endif
881
882 /*
883 * Free up credentials.
884 */
885 if (IS_VALID_CRED(child->p_ucred)) {
886 kauth_cred_unref(&child->p_ucred);
887 }
888
889 /* XXXX Note NOT SAFE TO USE p_ucred from this point onwards */
890
891 /*
892 * Finally finished with old proc entry.
893 * Unlink it from its process group and free it.
894 */
895 leavepgrp(child);
896
897 proc_list_lock();
898 LIST_REMOVE(child, p_list); /* off zombproc */
899 parent->p_childrencnt--;
900 LIST_REMOVE(child, p_sibling);
901 /* If there are no more children wakeup parent */
902 if ((deadparent != 0) && (LIST_EMPTY(&parent->p_children)))
903 wakeup((caddr_t)parent); /* with list lock held */
904 child->p_listflag &= ~P_LIST_WAITING;
905 wakeup(&child->p_stat);
906
907 /* Take it out of process hash */
908 LIST_REMOVE(child, p_hash);
909 child->p_listflag &= ~P_LIST_INHASH;
910 proc_checkdeadrefs(child);
911 nprocs--;
912
913 proc_list_unlock();
914
915 lck_mtx_destroy(&child->p_mlock, proc_lck_grp);
916 lck_mtx_destroy(&child->p_fdmlock, proc_lck_grp);
917 #if CONFIG_DTRACE
918 lck_mtx_destroy(&child->p_dtrace_sprlock, proc_lck_grp);
919 #endif
920 lck_spin_destroy(&child->p_slock, proc_lck_grp);
921 workqueue_destroy_lock(child);
922
923 FREE_ZONE(child, sizeof *child, M_PROC);
924 if ((locked == 1) && (droplock == 0))
925 proc_list_lock();
926
927 return (1);
928 }
929
930
931 int
932 wait1continue(int result)
933 {
934 void *vt;
935 thread_t thread;
936 int *retval;
937 proc_t p;
938
939 if (result)
940 return(result);
941
942 p = current_proc();
943 thread = current_thread();
944 vt = get_bsduthreadarg(thread);
945 retval = get_bsduthreadrval(thread);
946 return(wait4(p, (struct wait4_args *)vt, retval));
947 }
948
949 int
950 wait4(proc_t q, struct wait4_args *uap, register_t *retval)
951 {
952 __pthread_testcancel(1);
953 return(wait4_nocancel(q, (struct wait4_nocancel_args *)uap, retval));
954 }
955
956 int
957 wait4_nocancel(proc_t q, struct wait4_nocancel_args *uap, register_t *retval)
958 {
959 int nfound;
960 proc_t p;
961 int status, error;
962
963 if (uap->pid == 0)
964 uap->pid = -q->p_pgrpid;
965
966 loop:
967 proc_list_lock();
968 loop1:
969 nfound = 0;
970 for (p = q->p_children.lh_first; p != 0; p = p->p_sibling.le_next) {
971 if (uap->pid != WAIT_ANY &&
972 p->p_pid != uap->pid &&
973 p->p_pgrpid != -(uap->pid))
974 continue;
975
976 nfound++;
977
978 /* XXX This is racy because we don't get the lock!!!! */
979
980 if (p->p_listflag & P_LIST_WAITING) {
981 (void)msleep(&p->p_stat, proc_list_mlock, PWAIT, "waitcoll", 0);
982 goto loop1;
983 }
984 p->p_listflag |= P_LIST_WAITING; /* only allow single thread to wait() */
985
986
987 if (p->p_stat == SZOMB) {
988 proc_list_unlock();
989 #if CONFIG_MACF
990 if ((error = mac_proc_check_wait(q, p)) != 0)
991 goto out;
992 #endif
993 retval[0] = p->p_pid;
994 if (uap->status) {
995 /* Legacy apps expect only 8 bits of status */
996 status = 0xffff & p->p_xstat; /* convert to int */
997 error = copyout((caddr_t)&status,
998 uap->status,
999 sizeof(status));
1000 if (error)
1001 goto out;
1002 }
1003 if (uap->rusage) {
1004 if (p->p_ru == NULL) {
1005 error = ENOMEM;
1006 } else {
1007 if (IS_64BIT_PROCESS(q)) {
1008 struct user_rusage my_rusage;
1009 munge_rusage(p->p_ru, &my_rusage);
1010 error = copyout((caddr_t)&my_rusage,
1011 uap->rusage,
1012 sizeof (my_rusage));
1013 }
1014 else {
1015 error = copyout((caddr_t)p->p_ru,
1016 uap->rusage,
1017 sizeof (struct rusage));
1018 }
1019 }
1020 /* information unavailable? */
1021 if (error)
1022 goto out;
1023 }
1024
1025 /* Clean up */
1026 if (!reap_child_locked(q, p, 0, 0, 0)) {
1027 proc_list_lock();
1028 p->p_listflag &= ~P_LIST_WAITING;
1029 wakeup(&p->p_stat);
1030 proc_list_unlock();
1031 }
1032
1033 return (0);
1034 }
1035 if (p->p_stat == SSTOP && (p->p_lflag & P_LWAITED) == 0 &&
1036 (p->p_lflag & P_LTRACED || uap->options & WUNTRACED)) {
1037 proc_list_unlock();
1038 #if CONFIG_MACF
1039 if ((error = mac_proc_check_wait(q, p)) != 0)
1040 goto out;
1041 #endif
1042 proc_lock(p);
1043 p->p_lflag |= P_LWAITED;
1044 proc_unlock(p);
1045 retval[0] = p->p_pid;
1046 if (uap->status) {
1047 status = W_STOPCODE(p->p_xstat);
1048 error = copyout((caddr_t)&status,
1049 uap->status,
1050 sizeof(status));
1051 } else
1052 error = 0;
1053 goto out;
1054 }
1055 /*
1056 * If we are waiting for continued processses, and this
1057 * process was continued
1058 */
1059 if ((uap->options & WCONTINUED) &&
1060 (p->p_flag & P_CONTINUED)) {
1061 proc_list_unlock();
1062 #if CONFIG_MACF
1063 if ((error = mac_proc_check_wait(q, p)) != 0)
1064 goto out;
1065 #endif
1066
1067 /* Prevent other process for waiting for this event */
1068 OSBitAndAtomic(~((uint32_t)P_CONTINUED), (UInt32 *)&p->p_flag);
1069 retval[0] = p->p_pid;
1070 if (uap->status) {
1071 status = W_STOPCODE(SIGCONT);
1072 error = copyout((caddr_t)&status,
1073 uap->status,
1074 sizeof(status));
1075 } else
1076 error = 0;
1077 goto out;
1078 }
1079 p->p_listflag &= ~P_LIST_WAITING;
1080 wakeup(&p->p_stat);
1081 }
1082 /* list lock is held when we get here any which way */
1083 if (nfound == 0) {
1084 proc_list_unlock();
1085 return (ECHILD);
1086 }
1087
1088 if (uap->options & WNOHANG) {
1089 retval[0] = 0;
1090 proc_list_unlock();
1091 return (0);
1092 }
1093
1094 if ((error = msleep0((caddr_t)q, proc_list_mlock, PWAIT | PCATCH | PDROP, "wait", 0, wait1continue)))
1095 return (error);
1096
1097 goto loop;
1098 out:
1099 proc_list_lock();
1100 p->p_listflag &= ~P_LIST_WAITING;
1101 wakeup(&p->p_stat);
1102 proc_list_unlock();
1103 return (error);
1104 }
1105
1106
1107 int
1108 waitidcontinue(int result)
1109 {
1110 void *vt;
1111 thread_t thread;
1112 int *retval;
1113
1114 if (result)
1115 return(result);
1116
1117 thread = current_thread();
1118 vt = get_bsduthreadarg(thread);
1119 retval = get_bsduthreadrval(thread);
1120 return(waitid(current_proc(), (struct waitid_args *)vt, retval));
1121 }
1122
1123 /*
1124 * Description: Suspend the calling thread until one child of the process
1125 * containing the calling thread changes state.
1126 *
1127 * Parameters: uap->idtype one of P_PID, P_PGID, P_ALL
1128 * uap->id pid_t or gid_t or ignored
1129 * uap->infop Address of signinfo_t struct in
1130 * user space into which to return status
1131 * uap->options flag values
1132 *
1133 * Returns: 0 Success
1134 * !0 Error returning status to user space
1135 */
1136 int
1137 waitid(proc_t q, struct waitid_args *uap, register_t *retval)
1138 {
1139 __pthread_testcancel(1);
1140 return(waitid_nocancel(q, (struct waitid_nocancel_args *)uap, retval));
1141 }
1142
1143 int
1144 waitid_nocancel(proc_t q, struct waitid_nocancel_args *uap, __unused register_t *retval)
1145 {
1146 user_siginfo_t collect64; /* siginfo data to return to caller */
1147
1148 int nfound;
1149 proc_t p;
1150 int error;
1151
1152 /*
1153 * Forced validation of options for T.waitpid 21; should be a TSD!
1154 * This will pass the test, but note that we have more bits than the
1155 * standard specifies that we will allow in, in this case. The test
1156 * passes because they light all the bits, not just the ones we allow,
1157 * and so the following check returns EINVAL like the test wants.
1158 */
1159 if (((uap->options & (WNOHANG|WNOWAIT|WCONTINUED|WUNTRACED|WSTOPPED|WEXITED)) != uap->options) ||
1160 (uap->options == 0))
1161 return (EINVAL); /* bits set that aren't recognized */
1162
1163 /*
1164 * Overly critical options checking, per POSIX
1165 */
1166 switch(uap->idtype) {
1167 case P_PID: /* child with process ID equal to... */
1168 case P_PGID: /* child with process group ID equal to... */
1169 if (((int)uap->id) < 0)
1170 return (EINVAL);
1171 break;
1172 case P_ALL: /* any child */
1173 break;
1174 }
1175
1176 loop:
1177 proc_list_lock();
1178 loop1:
1179 nfound = 0;
1180 for (p = q->p_children.lh_first; p != 0; p = p->p_sibling.le_next) {
1181 switch(uap->idtype) {
1182 case P_PID: /* child with process ID equal to... */
1183 if (p->p_pid != (pid_t)uap->id)
1184 continue;
1185 break;
1186 case P_PGID: /* child with process group ID equal to... */
1187 if (p->p_pgrpid != (pid_t)uap->id)
1188 continue;
1189 break;
1190 case P_ALL: /* any child */
1191 break;
1192 }
1193
1194 /* XXX This is racy because we don't get the lock!!!! */
1195
1196 /*
1197 * Wait collision; go to sleep and restart; used to maintain
1198 * the single return for waited process guarantee.
1199 */
1200 if (p->p_listflag & P_LIST_WAITING) {
1201 (void)msleep(&p->p_stat, proc_list_mlock, PWAIT, "waitidcoll", 0);
1202 goto loop1;
1203 }
1204 p->p_listflag |= P_LIST_WAITING; /* mark busy */
1205
1206 nfound++;
1207
1208 /*
1209 * Types of processes we are interested in
1210 *
1211 * XXX Don't know what to do for WCONTINUED?!?
1212 */
1213 switch(p->p_stat) {
1214 case SZOMB: /* Exited */
1215 if (!(uap->options & WEXITED))
1216 break;
1217
1218 /* drop the lock and the thread is going to return */
1219 proc_list_unlock();
1220
1221 /* Collect "siginfo" information for caller */
1222 collect64.si_signo = SIGCHLD;
1223 collect64.si_code = 0;
1224 collect64.si_errno = 0;
1225 collect64.si_pid = 0;
1226 collect64.si_uid = 0;
1227 collect64.si_addr = 0;
1228 collect64.si_status = WEXITSTATUS(p->p_xstat);
1229 collect64.si_band = 0;
1230
1231 if (IS_64BIT_PROCESS(p)) {
1232 error = copyout((caddr_t)&collect64,
1233 uap->infop,
1234 sizeof(collect64));
1235 } else {
1236 siginfo_t collect;
1237 siginfo_64to32(&collect64,&collect);
1238 error = copyout((caddr_t)&collect,
1239 uap->infop,
1240 sizeof(collect));
1241 }
1242 /* information unavailable? */
1243 if (error)
1244 goto out;
1245
1246 /* Prevent other process for waiting for this event? */
1247 if (!(uap->options & WNOWAIT)) {
1248 /* Clean up */
1249 if (!reap_child_locked(q, p, 0, 0, 0)) {
1250 proc_list_lock();
1251 p->p_listflag &= ~P_LIST_WAITING;
1252 wakeup(&p->p_stat);
1253 proc_list_unlock();
1254 }
1255 } else {
1256 proc_list_lock();
1257 p->p_listflag &= ~P_LIST_WAITING;
1258 proc_list_unlock();
1259 }
1260
1261 return (0);
1262
1263 case SSTOP: /* Stopped */
1264 /*
1265 * If we are not interested in stopped processes, then
1266 * ignore this one.
1267 */
1268 if (!(uap->options & WSTOPPED))
1269 break;
1270
1271 /*
1272 * If someone has already waited it, we lost a race
1273 * to be the one to return status.
1274 */
1275 if ((p->p_lflag & P_LWAITED) != 0)
1276 break;
1277
1278 /* drop the lock and the thread is going to return */
1279 proc_list_unlock();
1280
1281 /* Collect "siginfo" information for caller */
1282 collect64.si_signo = SIGCHLD;
1283 collect64.si_code = 0;
1284 collect64.si_errno = 0;
1285 collect64.si_pid = 0;
1286 collect64.si_uid = 0;
1287 collect64.si_addr = 0;
1288 proc_lock(p);
1289 collect64.si_status = p->p_xstat;
1290 proc_unlock(p);
1291 collect64.si_band = 0;
1292
1293 if (IS_64BIT_PROCESS(p)) {
1294 error = copyout((caddr_t)&collect64,
1295 uap->infop,
1296 sizeof(collect64));
1297 } else {
1298 siginfo_t collect;
1299 siginfo_64to32(&collect64,&collect);
1300 error = copyout((caddr_t)&collect,
1301 uap->infop,
1302 sizeof(collect));
1303 }
1304 /* information unavailable? */
1305 if (error)
1306 goto out;
1307
1308 /* Prevent other process for waiting for this event? */
1309 if (!(uap->options & WNOWAIT)) {
1310 proc_lock(p);
1311 p->p_lflag |= P_LWAITED;
1312 proc_unlock(p);
1313 }
1314
1315 error = 0;
1316 goto out;
1317
1318 default: /* All others */
1319 /* ...meaning Continued */
1320 if (!(uap->options & WCONTINUED))
1321 break;
1322
1323 /*
1324 * If the flag isn't set, then this process has not
1325 * been stopped and continued, or the status has
1326 * already been reaped by another caller of waitid().
1327 */
1328 if ((p->p_flag & P_CONTINUED) == 0)
1329 break;
1330
1331 /* drop the lock and the thread is going to return */
1332 proc_list_unlock();
1333
1334 /* Collect "siginfo" information for caller */
1335 proc_lock(p);
1336 collect64.si_signo = SIGCHLD;
1337 collect64.si_code = CLD_CONTINUED;
1338 collect64.si_errno = 0;
1339 collect64.si_pid = p->p_contproc;
1340 collect64.si_uid = 0;
1341 collect64.si_addr = 0;
1342 collect64.si_status = p->p_xstat;
1343 collect64.si_band = 0;
1344 proc_unlock(p);
1345
1346 if (IS_64BIT_PROCESS(p)) {
1347 error = copyout((caddr_t)&collect64,
1348 uap->infop,
1349 sizeof(collect64));
1350 } else {
1351 siginfo_t collect;
1352 siginfo_64to32(&collect64,&collect);
1353 error = copyout((caddr_t)&collect,
1354 uap->infop,
1355 sizeof(collect));
1356 }
1357 /* information unavailable? */
1358 if (error)
1359 goto out;
1360
1361 /* Prevent other process for waiting for this event? */
1362 if (!(uap->options & WNOWAIT)) {
1363 OSBitAndAtomic(~((uint32_t)P_CONTINUED), (UInt32 *)&p->p_flag);
1364 }
1365
1366 error = 0;
1367 goto out;
1368 }
1369 /* LIST LOCK IS HELD HERE */
1370 /* Not a process we are interested in; go on to next child */
1371
1372 p->p_listflag &= ~P_LIST_WAITING;
1373 wakeup(&p->p_stat);
1374 }
1375
1376 /* list lock is always held */
1377 /* No child processes that could possibly satisfy the request? */
1378 if (nfound == 0) {
1379 proc_list_unlock();
1380 return (ECHILD);
1381 }
1382
1383 if (uap->options & WNOHANG) {
1384 proc_list_unlock();
1385 return (0);
1386 }
1387
1388 if ((error = msleep0((caddr_t)q, proc_list_mlock, PWAIT | PCATCH | PDROP, "waitid", 0, waitidcontinue)))
1389 return (error);
1390
1391 goto loop;
1392 out:
1393 proc_list_lock();
1394 p->p_listflag &= ~P_LIST_WAITING;
1395 wakeup(&p->p_stat);
1396 proc_list_unlock();
1397 return (error);
1398 }
1399
1400 /*
1401 * make process 'parent' the new parent of process 'child'.
1402 */
1403 void
1404 proc_reparentlocked(proc_t child, proc_t parent, int cansignal, int locked)
1405 {
1406 proc_t oldparent = PROC_NULL;
1407
1408 if (child->p_pptr == parent)
1409 return;
1410
1411 if (locked == 0)
1412 proc_list_lock();
1413
1414 oldparent = child->p_pptr;
1415 #if __PROC_INTERNAL_DEBUG
1416 if (oldparent == PROC_NULL)
1417 panic("proc_reparent: process %x does not have a parent\n", (unsigned int)child);
1418 #endif
1419
1420 LIST_REMOVE(child, p_sibling);
1421 #if __PROC_INTERNAL_DEBUG
1422 if (oldparent->p_childrencnt == 0)
1423 panic("process children count already 0\n");
1424 #endif
1425 oldparent->p_childrencnt--;
1426 #if __PROC_INTERNAL_DEBUG1
1427 if (oldparent->p_childrencnt < 0)
1428 panic("process children count -ve\n");
1429 #endif
1430 LIST_INSERT_HEAD(&parent->p_children, child, p_sibling);
1431 parent->p_childrencnt++;
1432 child->p_pptr = parent;
1433 child->p_ppid = parent->p_pid;
1434
1435 proc_list_unlock();
1436
1437 if ((cansignal != 0) && (initproc == parent) && (child->p_stat == SZOMB))
1438 psignal(initproc, SIGCHLD);
1439 if (locked == 1)
1440 proc_list_lock();
1441 }
1442
1443 /*
1444 * Make the current process an "init" process, meaning
1445 * that it doesn't have a parent, and that it won't be
1446 * gunned down by kill(-1, 0).
1447 */
1448 kern_return_t
1449 init_process(__unused struct init_process_args *args)
1450 {
1451 proc_t p = current_proc();
1452
1453 AUDIT_MACH_SYSCALL_ENTER(AUE_INITPROCESS);
1454 if (suser(kauth_cred_get(), &p->p_acflag)) {
1455 AUDIT_MACH_SYSCALL_EXIT(KERN_NO_ACCESS);
1456 return(KERN_NO_ACCESS);
1457 }
1458
1459 if (p->p_pid != 1 && p->p_pgrpid != p->p_pid)
1460 enterpgrp(p, p->p_pid, 0);
1461 OSBitOrAtomic(P_SYSTEM, (UInt32 *)&p->p_flag);
1462
1463 /*
1464 * Take us out of the sibling chain, and
1465 * out of our parent's child chain.
1466 */
1467 proc_list_lock();
1468 LIST_REMOVE(p, p_sibling);
1469 p->p_sibling.le_prev = NULL;
1470 p->p_sibling.le_next = NULL;
1471 p->p_pptr = kernproc;
1472 p->p_ppid = 0;
1473 proc_list_unlock();
1474
1475
1476 AUDIT_MACH_SYSCALL_EXIT(KERN_SUCCESS);
1477 return(KERN_SUCCESS);
1478 }
1479
1480
1481 /*
1482 * Exit: deallocate address space and other resources, change proc state
1483 * to zombie, and unlink proc from allproc and parent's lists. Save exit
1484 * status and rusage for wait(). Check for child processes and orphan them.
1485 */
1486
1487 void
1488 vfork_exit(proc_t p, int rv)
1489 {
1490 vfork_exit_internal(p, rv, 0);
1491 }
1492
1493 void
1494 vfork_exit_internal(proc_t p, int rv, int forceexit)
1495 {
1496 thread_t self = current_thread();
1497 #ifdef FIXME
1498 struct task *task = p->task;
1499 #endif
1500 struct uthread *ut;
1501
1502 /*
1503 * If a thread in this task has already
1504 * called exit(), then halt any others
1505 * right here.
1506 */
1507
1508 ut = get_bsdthread_info(self);
1509
1510
1511 proc_lock(p);
1512 if ((p->p_lflag & P_LPEXIT) == P_LPEXIT) {
1513 /*
1514 * This happens when a parent exits/killed and vfork is in progress
1515 * other threads. But shutdown code for ex has already called exit1()
1516 */
1517 proc_unlock(p);
1518 return;
1519 }
1520 p->p_lflag |= (P_LEXIT | P_LPEXIT);
1521 proc_unlock(p);
1522
1523 if (forceexit == 0) {
1524 /*
1525 * parent of a vfork child has already called exit() and the
1526 * thread that has vfork in proress terminates. So there is no
1527 * separate address space here and it has already been marked for
1528 * termination. This was never covered before and could cause problems
1529 * if we block here for outside code.
1530 */
1531 /* Notify the perf server */
1532 (void)sys_perf_notify(self, p->p_pid);
1533 }
1534
1535 /*
1536 * Remove proc from allproc queue and from pidhash chain.
1537 * Need to do this before we do anything that can block.
1538 * Not doing causes things like mount() find this on allproc
1539 * in partially cleaned state.
1540 */
1541
1542 proc_list_lock();
1543
1544 LIST_REMOVE(p, p_list);
1545 LIST_INSERT_HEAD(&zombproc, p, p_list); /* Place onto zombproc. */
1546 /* will not be visible via proc_find */
1547 p->p_listflag |= P_LIST_EXITED;
1548
1549 proc_list_unlock();
1550
1551 proc_lock(p);
1552 p->p_xstat = rv;
1553 p->p_lflag &= ~(P_LTRACED | P_LPPWAIT);
1554 p->p_sigignore = ~0;
1555 proc_unlock(p);
1556
1557 proc_spinlock(p);
1558 if (thread_call_cancel(p->p_rcall))
1559 p->p_ractive--;
1560
1561 while (p->p_ractive > 0) {
1562 proc_spinunlock(p);
1563
1564 delay(1);
1565
1566 proc_spinlock(p);
1567 }
1568 proc_spinunlock(p);
1569
1570 thread_call_free(p->p_rcall);
1571 p->p_rcall = NULL;
1572
1573 ut->uu_siglist = 0;
1574
1575 vproc_exit(p);
1576 }
1577
1578 void
1579 vproc_exit(proc_t p)
1580 {
1581 proc_t q;
1582 proc_t pp;
1583
1584 vnode_t tvp;
1585 #ifdef FIXME
1586 struct task *task = p->task;
1587 #endif
1588 struct pgrp * pg;
1589 struct session *sessp;
1590 boolean_t fstate;
1591
1592 /* XXX Zombie allocation may fail, in which case stats get lost */
1593 MALLOC_ZONE(p->p_ru, struct rusage *,
1594 sizeof (*p->p_ru), M_ZOMBIE, M_WAITOK);
1595
1596
1597 proc_refdrain(p);
1598
1599 /*
1600 * Close open files and release open-file table.
1601 * This may block!
1602 */
1603 fdfree(p);
1604
1605 sessp = proc_session(p);
1606 if (SESS_LEADER(p, sessp)) {
1607
1608 /* Protected by funnel for tty accesses */
1609 fstate = thread_funnel_set(kernel_flock, TRUE);
1610
1611 if (sessp->s_ttyvp != NULLVP) {
1612 struct vnode *ttyvp;
1613 int ttyvid;
1614 struct vfs_context context;
1615 struct tty * tp;
1616
1617 /*
1618 * Controlling process.
1619 * Signal foreground pgrp,
1620 * drain controlling terminal
1621 * and revoke access to controlling terminal.
1622 */
1623 tp = sessp->s_ttyp;
1624
1625 if ((tp != TTY_NULL) && (tp->t_session == sessp)) {
1626 tty_pgsignal(tp, SIGHUP, 1);
1627 (void) ttywait(tp);
1628 /*
1629 * The tty could have been revoked
1630 * if we blocked.
1631 */
1632
1633 session_lock(sessp);
1634 ttyvp = sessp->s_ttyvp;
1635 ttyvid = sessp->s_ttyvid;
1636 sessp->s_ttyvp = NULL;
1637 sessp->s_ttyvid = 0;
1638 sessp->s_ttyp = NULL;
1639 sessp->s_ttypgrpid = NO_PID;
1640 session_unlock(sessp);
1641
1642 if ((ttyvp != NULLVP) && (vnode_getwithvid(ttyvp, ttyvid) == 0)) {
1643 context.vc_thread = proc_thread(p); /* XXX */
1644 context.vc_ucred = kauth_cred_proc_ref(p);
1645 VNOP_REVOKE(ttyvp, REVOKEALL, &context);
1646 vnode_put(ttyvp);
1647 kauth_cred_unref(&context.vc_ucred);
1648 }
1649 } else {
1650 session_lock(sessp);
1651 ttyvp = sessp->s_ttyvp;
1652 sessp->s_ttyvp = NULL;
1653 sessp->s_ttyvid = 0;
1654 sessp->s_ttyp = NULL;
1655 sessp->s_ttypgrpid = NO_PID;
1656 session_unlock(sessp);
1657 }
1658 if (ttyvp)
1659 vnode_rele(ttyvp);
1660 /*
1661 * s_ttyp is not zero'd; we use this to indicate
1662 * that the session once had a controlling terminal.
1663 * (for logging and informational purposes)
1664 */
1665 }
1666 (void) thread_funnel_set(kernel_flock, fstate);
1667
1668 session_lock(sessp);
1669 sessp->s_leader = NULL;
1670 session_unlock(sessp);
1671 }
1672 session_rele(sessp);
1673
1674 pg = proc_pgrp(p);
1675 fixjobc(p, pg, 0);
1676 pg_rele(pg);
1677
1678 p->p_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
1679
1680 proc_list_lock();
1681 proc_childdrainstart(p);
1682 while ((q = p->p_children.lh_first) != NULL) {
1683 q->p_listflag |= P_LIST_DEADPARENT;
1684 if (q->p_stat == SZOMB) {
1685 if (p != q->p_pptr)
1686 panic("parent child linkage broken");
1687 /* check for lookups by zomb sysctl */
1688 while ((q->p_listflag & P_LIST_WAITING) == P_LIST_WAITING) {
1689 msleep(&q->p_stat, proc_list_mlock, PWAIT, "waitcoll", 0);
1690 }
1691 q->p_listflag |= P_LIST_WAITING;
1692 /*
1693 * This is a named reference and it is not granted
1694 * if the reap is already in progress. So we get
1695 * the reference here exclusively and their can be
1696 * no waiters. So there is no need for a wakeup
1697 * after we are done. AlsO the reap frees the structure
1698 * and the proc struct cannot be used for wakeups as well.
1699 * It is safe to use q here as this is system reap
1700 */
1701 (void)reap_child_locked(p, q, 1, 1, 0);
1702 } else {
1703 proc_reparentlocked(q, initproc, 0, 1);
1704 /*
1705 * Traced processes are killed
1706 * since their existence means someone is messing up.
1707 */
1708 if (q->p_lflag & P_LTRACED) {
1709 proc_list_unlock();
1710 proc_lock(q);
1711 q->p_lflag &= ~P_LTRACED;
1712 if (q->sigwait_thread) {
1713 proc_unlock(q);
1714 /*
1715 * The sigwait_thread could be stopped at a
1716 * breakpoint. Wake it up to kill.
1717 * Need to do this as it could be a thread which is not
1718 * the first thread in the task. So any attempts to kill
1719 * the process would result into a deadlock on q->sigwait.
1720 */
1721 thread_resume((thread_t)q->sigwait_thread);
1722 clear_wait(q->sigwait_thread, THREAD_INTERRUPTED);
1723 threadsignal((thread_t)q->sigwait_thread, SIGKILL, 0);
1724 } else
1725 proc_unlock(q);
1726
1727 psignal(q, SIGKILL);
1728 proc_list_lock();
1729 }
1730 }
1731 }
1732
1733 proc_childdrainend(p);
1734 proc_list_unlock();
1735
1736 /*
1737 * Release reference to text vnode
1738 */
1739 tvp = p->p_textvp;
1740 p->p_textvp = NULL;
1741 if (tvp != NULLVP) {
1742 vnode_rele(tvp);
1743 }
1744
1745 /*
1746 * Save exit status and final rusage info, adding in child rusage
1747 * info and self times. If we were unable to allocate a zombie
1748 * structure, this information is lost.
1749 */
1750 /* No need for locking here as no one than this thread can access this */
1751 if (p->p_ru != NULL) {
1752 *p->p_ru = p->p_stats->p_ru;
1753 timerclear(&p->p_ru->ru_utime);
1754 timerclear(&p->p_ru->ru_stime);
1755
1756 #ifdef FIXME
1757 if (task) {
1758 task_basic_info_data_t tinfo;
1759 task_thread_times_info_data_t ttimesinfo;
1760 int task_info_stuff, task_ttimes_stuff;
1761 struct timeval ut,st;
1762
1763 task_info_stuff = TASK_BASIC_INFO_COUNT;
1764 task_info(task, TASK_BASIC_INFO,
1765 &tinfo, &task_info_stuff);
1766 p->p_ru->ru_utime.tv_sec = tinfo.user_time.seconds;
1767 p->p_ru->ru_utime.tv_usec = tinfo.user_time.microseconds;
1768 p->p_ru->ru_stime.tv_sec = tinfo.system_time.seconds;
1769 p->p_ru->ru_stime.tv_usec = tinfo.system_time.microseconds;
1770
1771 task_ttimes_stuff = TASK_THREAD_TIMES_INFO_COUNT;
1772 task_info(task, TASK_THREAD_TIMES_INFO,
1773 &ttimesinfo, &task_ttimes_stuff);
1774
1775 ut.tv_sec = ttimesinfo.user_time.seconds;
1776 ut.tv_usec = ttimesinfo.user_time.microseconds;
1777 st.tv_sec = ttimesinfo.system_time.seconds;
1778 st.tv_usec = ttimesinfo.system_time.microseconds;
1779 timeradd(&ut,&p->p_ru->ru_utime,&p->p_ru->ru_utime);
1780 timeradd(&st,&p->p_ru->ru_stime,&p->p_ru->ru_stime);
1781 }
1782 #endif /* FIXME */
1783
1784 ruadd(p->p_ru, &p->p_stats->p_cru);
1785 }
1786
1787 /*
1788 * Free up profiling buffers.
1789 */
1790 {
1791 struct uprof *p0 = &p->p_stats->p_prof, *p1, *pn;
1792
1793 p1 = p0->pr_next;
1794 p0->pr_next = NULL;
1795 p0->pr_scale = 0;
1796
1797 for (; p1 != NULL; p1 = pn) {
1798 pn = p1->pr_next;
1799 kfree(p1, sizeof *p1);
1800 }
1801 }
1802
1803 /*
1804 * Other substructures are freed from wait().
1805 */
1806 FREE_ZONE(p->p_stats, sizeof *p->p_stats, M_PSTATS);
1807 p->p_stats = NULL;
1808
1809 FREE_ZONE(p->p_sigacts, sizeof *p->p_sigacts, M_SIGACTS);
1810 p->p_sigacts = NULL;
1811
1812 proc_limitdrop(p, 1);
1813 p->p_limit = NULL;
1814
1815 /*
1816 * Finish up by terminating the task
1817 * and halt this thread (only if a
1818 * member of the task exiting).
1819 */
1820 p->task = TASK_NULL;
1821
1822 /*
1823 * Notify parent that we're gone.
1824 */
1825 pp = proc_parent(p);
1826 if ((p->p_listflag & P_LIST_DEADPARENT) == 0) {
1827 if (pp != initproc) {
1828 proc_lock(pp);
1829 pp->si_pid = p->p_pid;
1830 pp->si_status = p->p_xstat;
1831 pp->si_code = CLD_EXITED;
1832 /*
1833 * p_ucred usage is safe as it is an exiting process
1834 * and reference is dropped in reap
1835 */
1836 pp->si_uid = p->p_ucred->cr_ruid;
1837 proc_unlock(pp);
1838 }
1839 /* mark as a zombie */
1840 /* mark as a zombie */
1841 /* No need to take proc lock as all refs are drained and
1842 * no one except parent (reaping ) can look at this.
1843 * The write is to an int and is coherent. Also parent is
1844 * keyed off of list lock for reaping
1845 */
1846 p->p_stat = SZOMB;
1847
1848 psignal(pp, SIGCHLD);
1849
1850 /* and now wakeup the parent */
1851 proc_list_lock();
1852 wakeup((caddr_t)pp);
1853 proc_list_unlock();
1854 } else {
1855 proc_list_lock();
1856 p->p_stat = SZOMB;
1857 /* check for lookups by zomb sysctl */
1858 while ((p->p_listflag & P_LIST_WAITING) == P_LIST_WAITING) {
1859 msleep(&p->p_stat, proc_list_mlock, PWAIT, "waitcoll", 0);
1860 }
1861 p->p_listflag |= P_LIST_WAITING;
1862 /*
1863 * This is a named reference and it is not granted
1864 * if the reap is already in progress. So we get
1865 * the reference here exclusively and their can be
1866 * no waiters. So there is no need for a wakeup
1867 * after we are done. AlsO the reap frees the structure
1868 * and the proc struct cannot be used for wakeups as well.
1869 * It is safe to use p here as this is system reap
1870 */
1871 (void)reap_child_locked(pp, p, 0, 1, 1);
1872 /* list lock dropped by reap_child_locked */
1873 }
1874 proc_rele(pp);
1875 }
1876
1877
1878 /*
1879 * munge_rusage
1880 * LP64 support - long is 64 bits if we are dealing with a 64 bit user
1881 * process. We munge the kernel (32 bit) version of rusage into the
1882 * 64 bit version.
1883 */
1884 __private_extern__ void
1885 munge_rusage(struct rusage *a_rusage_p, struct user_rusage *a_user_rusage_p)
1886 {
1887 /* timeval changes size, so utime and stime need special handling */
1888 a_user_rusage_p->ru_utime.tv_sec = a_rusage_p->ru_utime.tv_sec;
1889 a_user_rusage_p->ru_utime.tv_usec = a_rusage_p->ru_utime.tv_usec;
1890 a_user_rusage_p->ru_stime.tv_sec = a_rusage_p->ru_stime.tv_sec;
1891 a_user_rusage_p->ru_stime.tv_usec = a_rusage_p->ru_stime.tv_usec;
1892 /*
1893 * everything else can be a direct assign, since there is no loss
1894 * of precision implied boing 32->64.
1895 */
1896 a_user_rusage_p->ru_maxrss = a_rusage_p->ru_maxrss;
1897 a_user_rusage_p->ru_ixrss = a_rusage_p->ru_ixrss;
1898 a_user_rusage_p->ru_idrss = a_rusage_p->ru_idrss;
1899 a_user_rusage_p->ru_isrss = a_rusage_p->ru_isrss;
1900 a_user_rusage_p->ru_minflt = a_rusage_p->ru_minflt;
1901 a_user_rusage_p->ru_majflt = a_rusage_p->ru_majflt;
1902 a_user_rusage_p->ru_nswap = a_rusage_p->ru_nswap;
1903 a_user_rusage_p->ru_inblock = a_rusage_p->ru_inblock;
1904 a_user_rusage_p->ru_oublock = a_rusage_p->ru_oublock;
1905 a_user_rusage_p->ru_msgsnd = a_rusage_p->ru_msgsnd;
1906 a_user_rusage_p->ru_msgrcv = a_rusage_p->ru_msgrcv;
1907 a_user_rusage_p->ru_nsignals = a_rusage_p->ru_nsignals;
1908 a_user_rusage_p->ru_nvcsw = a_rusage_p->ru_nvcsw;
1909 a_user_rusage_p->ru_nivcsw = a_rusage_p->ru_nivcsw;
1910 }